作者:SeanCheney
來源:简书
import urllib.request
import urllib.parse
import time
import http.cookiejar
webUrl = "https://www.zhihu.com/login/email"
webheader = {
'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Mobile Safari/537.36',
}
postData = {
'email': '在这里写你的账号',
'captcha_type': 'cn',
'password': '在这里写你的密码',
'_xsrf': '',
'captcha': ''
}
localStorePath = "写你想保存的验证码图片的地址"
if __name__ == '__main__':
cookie = http.cookiejar.CookieJar()
handler = urllib.request.HTTPCookieProcessor(cookie)
opener = urllib.request.build_opener(handler)
urllib.request.install_opener(opener)
captcha_url = 'https://www.zhihu.com/captcha.gif?r=%d&type=login&lang=cn' % (time.time() * 1000)
req = urllib.request.Request(url=captcha_url,headers=webheader)
content = urllib.request.urlopen(req)
captcha_name = 'D:/Python学习/crawler_learning/知乎登录专题研究/知乎验证码图片/myNewCaptcha.gif'
content = content.read()
with open(captcha_name, 'wb') as f:
f.write(content)
postData['captcha'] = input('请输入验证码')
postData['_xsrf'] = 'fa5ae712244bd4287e371801052003fc'
print(postData['_xsrf'])
postData_encoded = urllib.parse.urlencode(postData).encode('utf-8')
req = urllib.request.Request(url=webUrl,data=postData_encoded,headers=webheader)
webPage = urllib.request.urlopen(req)
data = webPage.read().decode('utf-8')
print(data)
with open("D:/知乎服务器反馈的内容.txt",mode='w',encoding='utf-8') as dataFile:
dataFile.write(data)
参考:
http://blog.csdn.net/hudeyu777/article/details/76706007
http://www.jianshu.com/p/50c5815bb60b#