import re from urllib.request import urlopen def getPage(url):response=urlopen(url)return response.read().decode('gbk',errors='ignore') def parsePage(s):com=re.compile(r'<td height="26">.*?<b>.*?<a href="(?P<url_name>.*?)" class="ulink">.*?',re.S)ret=com.finditer(s)for i in ret :return "http://www.dytt8.net"+i.group("url_name") def parsePage1(s):com=re.compile(r'<div id="Zoom">.*?译.*?名(?P<name>.*?)<br />◎片.*?名(?P<pianname>.*?)<br />.*?◎导.*?演(?P<daoyan>.*?)<br />'+ '◎主.*?演(?P<zhuyan>.*?)<br /><br />◎简.*?介.*?<td.*?><a href="(?P<xiazaidizhi>.*?)">',re.S)ret1=com.finditer(s)# print('****************************************************************')for i in ret1 :yield {"yiming":(re.sub("[\u3000]", "",i.group('name'))),"pianming":re.sub("[\u3000]", "",i.group("pianname")),"daoyan":re.sub("[\u3000]", "",i.group("daoyan")),"zhuyan":re.sub("[\u3000]", "",i.group("zhuyan")),"xiazaidizhi":re.sub("[\u3000]", "",i.group("xiazaidizhi"))} def main(num):url="http://www.dytt8.net/html/gndy/dyzz/list_23_%s.html" % numresponse_html=getPage(url)xiangqing=parsePage(response_html)response1_html = getPage(xiangqing)ret=parsePage1(response1_html)f = open("move_list", "a", encoding="utf8")for obj in ret:print(obj)data = str(obj)f.write(data + "\n") for i in range(1,181):main(i)