本文主要是介绍selenium入门级项目 - 豆豆玩竞猜,希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!
在初步学习selenium的定位方法之后,我们就可以找些网站来测试了,这次我选择的网站是豆豆玩
测试目的
- 点掉首页弹窗
-
模拟登陆,含简单的验证码识别
-
表格提交
-
获取15期结果,存进Mysql数据库
思路与主要代码
去JS弹窗
这个容易,获取CSS标签,点掉即可:
s = Service("C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe")
driver = webdriver.Chrome(options=chrome_options, service=s)
ddwURL = "http://www.doudouwan.net/"
driver.get(url=ddwURL)
time.sleep(6)
driver.find_elements(by=By.CLASS_NAME,value='layui-layer-btn0')[0].click()
模拟登陆
这里登陆填账号密码不再赘述,重点讨论验证码识别。
网站的验证码文件是html格式,并且是随机的,网页打开就会刷新一次:2578
我的思路是:网页截取验证码所在区域的图片,保存到本地,然后进行文字OCR识别,
我们不研究识别的原理,现在有很多开源的库可以供我们使用,选择了ddddocr,除了有广告,基本可以使用:
def validate(url):ocr = ddddocr.DdddOcr()with open(url, 'rb') as f:image = f.read()res = ocr.classification(image)return resdef snipScreent(url):driver.get(url)time.sleep(2)width = driver.execute_script("return document.documentElement.scrollWidth")height = driver.execute_script("return document.documentElement.scrollHeight")driver.set_window_size(width, height) # 修改浏览器窗口大小# 搜索结果部分完整截图r_node = driver.find_element(by='xpath', value='/html/body/div[3]/div/div[2]/dl/dd[3]/img')print('网页模块尺寸:height={},width={}'.format(r_node.size['height'], r_node.size['width']))times = int(time.time())pngPath = r'D:\image\%s.png' % timesr_node.screenshot(pngPath)# im = Image.open(pngPath)# print("截图尺寸:height={},width={}".format(im.size[1], im.size[0]))return pngPathpicPath = snipScreent(url=ddwURL)
driver.find_element(by=By.CSS_SELECTOR,value="body > div.top > div > div.index_member.no_login > dl > dd.i.i_username > input").send_keys("bgone")
driver.find_element(by=By.CSS_SELECTOR,value="body > div.top > div > div.index_member.no_login > dl > dd.i.i_password > input").send_keys("123456")# downURL = "http://doudouwan.net/register/register_ver_code.html"
# picPath = download(url=downURL)
num = validate(url=picPath)
driver.find_element(by=By.CSS_SELECTOR,value="body > div.top > div > div.index_member.no_login > dl > dd.i.i_code > input").send_keys(num)
time.sleep(3)
driver.find_element(by=By.CSS_SELECTOR,value="body > div.top > div > div.index_member.no_login > dl > dd.a > a.submit").click()
randomIdle()
表格提交
投注本身也不难,但我们需要写一个算法以尽量维持程序运行:
def throw():asserT = Falsewhile not asserT:try:t = driver.find_element(by=By.CSS_SELECTOR,value=r"body > div.fun_main > div.fun_left > div.left_table > table > tbody > tr:nth-child(6) > td:nth-child(1)").get_attribute("textContent")css = "#revoke_%s > a" % tdriver.find_element(by=By.CSS_SELECTOR, value=css).click()except Exception as ep:print(ep)randomIdle()driver.refresh()else:asserT = Truedef bet(input=0):a = getCoins()driver.find_element(by=By.CSS_SELECTOR, value="#tbLuck28Value1").send_keys(input)driver.find_element(by=By.CSS_SELECTOR, value="#tbLuck28Value3").send_keys(input)driver.find_element(by=By.CSS_SELECTOR, value="#tbLuck28Value5").send_keys(input)driver.find_element(by=By.CSS_SELECTOR, value="#tbLuck28Value7").send_keys(input)driver.find_element(by=By.CSS_SELECTOR, value="#tbLuck28Value9").send_keys(input)driver.find_element(by=By.XPATH, value="/html/body/div[3]/div[1]/div[5]/div/div[3]/div[2]/div/div[3]/a").click()return int(a)-5*inputdef getTimer():decrypt = driver.find_element(by=By.CSS_SELECTOR,value="#bettingLottTime").get_attribute("textContent")partake = driver.find_element(by=By.CSS_SELECTOR,value="#bettingOverTime").get_attribute("textContent")if "已停止参与" in partake:if "解谜中,请稍后" in decrypt:return Truereturn Falsep = False
while not p:initialC = 20mp = Falsefor i in range(1,maxloop):# time.sleep(50)driver.get(url=guessURL90)throw()# 投入a = bet(input=initialC)# 等待解谜timerAssert = getTimer()while not timerAssert:time.sleep(2)timerAssert = getTimer()time.sleep(5)## 获取结果driver.refresh()c = collection()print(c)if c < 0:initialC = initialC * 2if initialC*5 > (a+c):breaktime.sleep(5)elif c > 0:mp = Truebreak
数据库操作
先本地搭建mysql服务器,Navicat Premium 15 建个表ddw,字段为:
主键在Number上;然后脚本获取数据并存入:
class Sql():def __int__(self):self.host = "192.168.222.1"self.username = "root"self.password = ""def connectMysql(self):# 然后连接数据库connection = pymysql.connect(host="localhost",user="root",password="",db='ddw',charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor)return connectiondef instertMysql(self,num,date,result,coin,hits,inn,out):connection = self.connectMysql()# 对数据库进行操作try:with connection.cursor() as cursor:# 创建新记录sql = "INSERT INTO `ddw`.`ddw` (`Number`, `DateTime`, `Result`, `Coins`, `Hits`, `In`, `Out`) VALUES (%s, %s, %s, %s, %s, %s, %s)"cursor.execute(sql, (num, date, result, coin, hits, inn, out))# 默认不会自动提交,所以需要我们自己提交来保存改变后的内容*connection.commit()with connection.cursor() as cursor:# 读取单个记录sql = "SELECT Number FROM ddw.ddw WHERE Number=%s"cursor.execute(sql, (num))result = cursor.fetchone()print(result)finally:connection.close()def selectMysql(self,index=0):# def instertMysql(self, mum, date, result, coin, hits, inn, out):# 对数据库进行操作try:with connection.cursor() as cursor:# 读取单个记录if not index:sql = "select * from ddw ORDER BY 'Number' DESC LIMIT 1"else:sql = "select * from ddw ORDER BY 'Number' DESC LIMIT %d"cursor.execute(sql, (index))result = cursor.fetchone()print(result)return resultfinally:connection.close()def collection():year = datetime.datetime.now().yearsql = Sql()connection = sql.connectMysql()content = driver.find_elements(by=By.XPATH,value="/html/body/div[3]/div[1]/div[5]/table/tbody/tr/td")alist = []for i in content:html = etree.fromstring(i.get_attribute("innerHTML"), parser=etree.HTMLParser())try:alist.append(html.xpath("//text()"))except Exception:alist.append([i.get_attribute("innerHTML")])usefulContent = alist[36:-1]# 0 1 2 3 4# `Number`, `DateTime`, `Result`, `Coins`, `Hits`, `In`, `Out`for index in range(0,len(usefulContent),7):# import pdb# pdb.set_trace()# n = index % 7# if n == 0:Number= int(usefulContent[index][0])# elif n == 1:DateTime = str(year) + "-" + usefulContent[index+1][0]# elif n == 2:Result = int(usefulContent[index+2][0])# elif n == 3:Coins = "".join(usefulContent[index+3][0].split(","))# elif n == 4:Hits = int("".join(usefulContent[index+4][0].split(",")))# elif n == 5:In = int("".join(usefulContent[index+5][0].split(":")[-1].split(",")))Out = int("".join(usefulContent[index+5][1].split(":")[-1].split(",")))with connection.cursor() as cursor:try:existOne = "SELECT Number FROM ddw.ddw WHERE Number=%s"cursor.execute(existOne, (Number))result = cursor.fetchone()print(result)if not result:sql.instertMysql(Number, DateTime, Result, Coins, Hits, In, Out)except Exception as ep:print(ep)finally:cursor.close()connection.close()return In-Out
结果展示:
总结
难度系数低,可以获取数据用于日后数据分析
这篇关于selenium入门级项目 - 豆豆玩竞猜的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!