各位大神,请教一个问题,python爬虫爬取淘宝网页商品评论,运行出现了一个编码问题:'UCS-2' codec can't encode characters in position 8-8: Non-BMP character not supported in Tk
0
代码为:
#淘宝评论
#淘宝评论
import urllib.request
import re
import urllib.error
import sys
headers=("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.90 Safari/537.36 2345Explorer/9.2.1.17116")
#模拟浏览器
opener=urllib.request.build_opener()
opener.addheaders=[headers]
urllib.request.install_opener(opener)
for i in range(1,2):
try:
url="https://rate.taobao.com/feedRa ... 2Bstr(i)+"&pageSize=20&rateType=&orderType=sort_weight&attribute=&sku=&hasSku=false&folded=0&ua=098%23E1hvLQvWvRhvUpCkvvvvvjiPPF5h6jtVP2LZtjrCPmPv6jlhRsdO0jnmPsLyzjl8PIhCvvswPEaeprMwznsDxDurvpvEvv9E9fuTvHFXiQhvCvvv9UUtvpvhvvvvv8yCvv9vvUm17X%2FgpgyCvvOUvvVvaZRtvpvIvvvvvhCvHHyvvU87phvUv9vv9krvpvQvvvmm86CvmVWvvUUdphvUOQyCvhQWVKvvCANv%2BExrVTtYVVzpafmAdcpiYWFUAC4AVAElYVllHdUf8%2BClYE7rV16AAnLO%2Bb8rwZClYWFZe3WDN%2BLvafmAdcHVaNLtD40OvphvC9vhvvCvpvGCvvpvvvvv3QhvCvvhvvvtvpvhvvvvvv%3D%3D&_ksTS=1525145827632_1410&callback=jsonp_tbcrate_reviews_list"
data=urllib.request.urlopen(url).read().decode("utf-8","ignore")
patcom='"content":"(.*?)"'
comdata=re.compile(patcom).findall(data)
for j in range(0,len(comdata)):
print("第"+str(i)+"页第"+str(j)+"条评论的内容是:")
print(comdata[j]) #eval是一个函数
except urllib.error.URLError as e:
if hasattr(e,"code"):
print(e.code)
if hasattr(e,"reason"):
print(e.reason)
except Exception as e:
print(e)
#淘宝评论
#淘宝评论
import urllib.request
import re
import urllib.error
import sys
headers=("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.90 Safari/537.36 2345Explorer/9.2.1.17116")
#模拟浏览器
opener=urllib.request.build_opener()
opener.addheaders=[headers]
urllib.request.install_opener(opener)
for i in range(1,2):
try:
url="https://rate.taobao.com/feedRa ... 2Bstr(i)+"&pageSize=20&rateType=&orderType=sort_weight&attribute=&sku=&hasSku=false&folded=0&ua=098%23E1hvLQvWvRhvUpCkvvvvvjiPPF5h6jtVP2LZtjrCPmPv6jlhRsdO0jnmPsLyzjl8PIhCvvswPEaeprMwznsDxDurvpvEvv9E9fuTvHFXiQhvCvvv9UUtvpvhvvvvv8yCvv9vvUm17X%2FgpgyCvvOUvvVvaZRtvpvIvvvvvhCvHHyvvU87phvUv9vv9krvpvQvvvmm86CvmVWvvUUdphvUOQyCvhQWVKvvCANv%2BExrVTtYVVzpafmAdcpiYWFUAC4AVAElYVllHdUf8%2BClYE7rV16AAnLO%2Bb8rwZClYWFZe3WDN%2BLvafmAdcHVaNLtD40OvphvC9vhvvCvpvGCvvpvvvvv3QhvCvvhvvvtvpvhvvvvvv%3D%3D&_ksTS=1525145827632_1410&callback=jsonp_tbcrate_reviews_list"
data=urllib.request.urlopen(url).read().decode("utf-8","ignore")
patcom='"content":"(.*?)"'
comdata=re.compile(patcom).findall(data)
for j in range(0,len(comdata)):
print("第"+str(i)+"页第"+str(j)+"条评论的内容是:")
print(comdata[j]) #eval是一个函数
except urllib.error.URLError as e:
if hasattr(e,"code"):
print(e.code)
if hasattr(e,"reason"):
print(e.reason)
except Exception as e:
print(e)
没有找到相关结果
重要提示:提问者不能发表回复,可以通过评论与回答者沟通,沟通后可以通过编辑功能完善问题描述,以便后续其他人能够更容易理解问题.
0 个回复