本文主要是介绍python 360 社区 监控 爬虫 in not in 问题,希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!
发生个特别奇怪的情况,最近老是收到重复邮件,检查爬虫里面有个地方竟然走了两个分支,
如果用in,元素存在的情况下,竟然会走到else里面,用notin,就不会,实在是太奇怪了,写简单的demo的时候不会出现这个情况,不知道是python的问题还是我的问题
#*-coding:utf-8-*-
import urllib2
import re
import smtplib
import time
from email.mime.text import MIMETextretries1=30
_to = "XXXXXXXXXXXXX@360.cn"class SendQqMail:def getqqmail(self, retries):_user = "XXXXXXXXXXXXXX@qq.com"_pwd = "XXXXXXXXXXXXXXXXXXX"msg = MIMEText(listitem1[74:-4])msg["Subject"] = listitem1[0:42]msg["From"] = _usermsg["To"] = _totry:time.sleep(30)s = smtplib.SMTP_SSL("smtp.qq.com", 465)s.login(_user, _pwd)s.sendmail(_user, _to, msg.as_string())s.quit()print "Send QQ Email Success!"except smtplib.SMTPException, e:print "retry.QQ mail.........,%s" % eif retries > 0:return self.getqqmail(retries - 1)else:print "Send QQ Email Falied,%s" % eclass Send163Mail:def get163mail(self, retries):_user = "python_smtp_test@163.com"_pwd = "zk199245qqq"msg = MIMEText(listitem1[74:-4])msg["Subject"] = listitem1[0:42]msg["From"] = _usermsg["To"] = _totry:time.sleep(30)s = smtplib.SMTP_SSL("smtp.163.com", 465)s.login(_user, _pwd)s.sendmail(_user, _to, msg.as_string())s.quit()print "Send 163 Email Success!"except smtplib.SMTPException, e:print "retry.163mail..........,%s" % eif retries > 0:return self.get163mail(retries - 1)else:print "Send 163 Email Falied,%s" % e#卫士板块
req1 = urllib2.Request("http://bbs.360.cn/forum-140-1.html")class openurlrequest:def tryopenurlrequest(self, req, retries):try:time.sleep(30)response = urllib2.urlopen(req)bufferread = response.read()except Exception, what:#print what, reqif retries > 0:return self.tryopenurlrequest(req, retries - 1)else:print 'open url request Failed', reqreturn bufferreadbuff = openurlrequest()
buffer = buff.tryopenurlrequest(req1, retries1)
getarticlelist = re.compile(r'http://bbs.360.cn/thread-.+-1-1.html" \s?target="_blank" class="s xst".*\s*</a>')
pagemsg = re.findall(getarticlelist,buffer)print ("卫士板块监控系统已启动,如果监控到新的信息将会自动发送到您的邮箱").decode("utf-8")
print 'ready variables of num: %s' %len(pagemsg)allurllist = []
for eveurllist in pagemsg:allurllist.append(eveurllist[0:42])
print len(allurllist)while True:time.sleep(30)buff1 = openurlrequest()buffer1 = buff.tryopenurlrequest(req1, retries1)getarticlelist1 = re.compile(r'http://bbs.360.cn/thread-.+-1-1.html" \s?target="_blank" class="s xst".*\s*</a>')pagemsg1 = re.findall(getarticlelist1,buffer1)for listitem1 in pagemsg1:#奇怪的地方就在这里,用in的时候取到的最后一条listitem1[0:42]在allurllist里面,应该不会走到else里面,但是测试的时候,会把两种情况都打印出来,而用not in就不会,这他妈的是因为啥啊if (listitem1[0:42] not in allurllist):allurllist.append(listitem1[0:42])try:qqsendmailer = SendQqMail()time.sleep(30)qqsendmailer.getqqmail(30)except:print "QQ mail try five times fail,change 163mail"neteasysendmailer = Send163Mail()time.sleep(30)neteasysendmailer.get163mail(30)else:pass
这篇关于python 360 社区 监控 爬虫 in not in 问题的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!