Here's the code:# coding:utf-8# author: Rushter# site: http://rushter.comimport urllib2,random,re,time,json,urllibff = open("ua.txt").readlines()ques = open("query.txt").readlines()out = open("parsed_query.txt","a")err = 0for query in ques: try: for j in range(0,12,4): header = {"User-Agent":random.choice(ff)} lnk = "http://ajax.googleapis.com/ajax/services/search/web?v=1.0&start={0}&q={1}".format(str(j),urllib.quote(query.strip("\r\n"))) page_request = urllib2.Request(url=lnk, headers=header) page = urllib2.urlopen(url=page_request) pp = json.load(page)["responseData"]["results"] try: for i in xrange(4): out.write(pp[i]["unescapedUrl"]+"\n") out.flush() except: if err>100: break except: if err>100: break err = err+1Allows you to search without a proxy. You can perform thousands of queries without the ban.
Thank you, Xakep. Using http://ajax.googleapis.com/ is a good idea!
Here's the code:
ReplyDelete# coding:utf-8
# author: Rushter
# site: http://rushter.com
import urllib2,random,re,time,json,urllib
ff = open("ua.txt").readlines()
ques = open("query.txt").readlines()
out = open("parsed_query.txt","a")
err = 0
for query in ques:
try:
for j in range(0,12,4):
header = {"User-Agent":random.choice(ff)}
lnk = "http://ajax.googleapis.com/ajax/services/search/web?v=1.0&start={0}&q={1}".format(str(j),urllib.quote(query.strip("\r\n")))
page_request = urllib2.Request(url=lnk, headers=header)
page = urllib2.urlopen(url=page_request)
pp = json.load(page)["responseData"]["results"]
try:
for i in xrange(4):
out.write(pp[i]["unescapedUrl"]+"\n")
out.flush()
except:
if err>100:
break
except:
if err>100:
break
err = err+1
Allows you to search without a proxy. You can perform thousands of queries without the ban.
Thank you, Xakep. Using http://ajax.googleapis.com/ is a good idea!
ReplyDelete