好了不多废话了,代码很少,直接贴出来!附件是完整代码
tiebasearch.zip
0.70KB
ZIP
33次下载
#!/usr/bin/env python
import re
import os
import sys
import time
import random
import urllib
def get_urls_in_apage(page):
out = XXXXXndall(r'href\=\"\/p\/[^\s]*',page)
for i in out:
#print i
t = "XXXXXXXXXXXXXXXXXXXXXX" + i[6:len(i)-1]
print t
cid = XXXXXndall(r'\#[^\s]*',t)
ids = cid[0][1:]
#print ids
def get_record_size(tname):
urls = "XXXXXXXXXXXXXXXXXXXXXX/f/search/ures?ie=utf-8&kw=&qw=&rn=30&un="
req = urllib.urlopen(urlstr + tname)
web = XXXXXXad()
totalsizestr = XXXXXndall(r's\_nav\_right hasPage[^\s]*',web)
totalsize = XXXXXndall(r'[0-9][0-9]{0,}',totalsizestr[0])
sizex = int(totalsize[0])
return sizex
if __name__ == '__main__':
urlstr = "XXXXXXXXXXXXXXXXXXXXXX/f/search/ures?ie=utf-8&kw=&qw=&rn=30&un="
if(len(XXXXXXgv)==3 and XXXXXXgv[1] == '-k' ):
namex = XXXXXXgv[2]
sizex = get_record_size(namex)
print "find all record size = " + str(sizex)
rn = 30
pn = sizex / rn
print pn
i = 1
while i < pn:
newurl = urlstr + namex +"&" + "pn=" + str(i)
print newurl
reqx = urllib.urlopen(newurl)
web2 = XXXXXXXad()
get_urls_in_apage(web2)
i = i + 1
if i > 76:
break
print str(i)
XXXXXXXeep(1)