本文基于Python2.X版本的
先看看结果把
好了不废话了,直接发车请注意
# -*- coding: utf-8 -*-
import urllib,urllib2
import re
def download(url,name):
path = 'E:\pychar/image/%s.mp4' % name.decode('utf-8').encode('gbk') #下载路径,记得修改成自己
urllib.urlretrieve(url,path) #下载MP4
def getUrllist():
req = urllib2.Request('http://www.budejie.com/video/')
req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36')
res = urllib2.urlopen(req).read()
reg_all = re.compile(r'(<div class="j-r-list-c">.*?</div>.*?</div>)', re.S) #正则匹配 因为视频名字在HTML中存在多个所以先进行一次大的匹配
temp = re.findall(reg_all, res)
for i in temp:
re_mp4 = r'data-mp4="(.*?)">' #匹配具体的MP4
temp_all = re.findall(re_mp4,i)
re_mp4_name = r'<a href="/detail.*?.html">(.*?)</a>'#匹配标题
temp_name = re.findall(re_mp4_name,i)
for i ,j in zip(temp_all,temp_name): #下载
download(i,j)
print i,j
if __name__ == '__main__':
getUrllist()