本文主要是介绍python selenium+phontomjs的详细用法及简单案例,希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!
运行环境python2.7
相关模块需要自行下载安装
将phontomjs插件放入环境变量所在的路径
(提示:1.因为phontomjs是无界面浏览器,所以可以通过截图来直观展示 2.selenium类似于按键精灵,代替手动点击网页)
模拟访问百度并截图
#coding:utf8 # 导入包 from selenium import webdriver# 使用插件phontomjs driver=webdriver.PhantomJS()#访问百度 driver.get("http://www.baidu.com/")#截图 driver.save_screenshot("baidu.png")
模拟浏览器的前进和后退
#coding:utf8 from selenium import webdriver obj=webdriver.PhantomJS() try:obj.get('http://www.baidu.com') #访问百度obj.save_screenshot('13.png')obj.get('http://www.sina.com.cn') #访问新浪obj.save_screenshot('14.png')obj.back() #返回百度obj.save_screenshot('15.png')obj.forward() #前进到新浪obj.save_screenshot('16.png') except Exception as e:print e
设置浏览器的宽高#coding:utf8 from selenium import webdriver obj=webdriver.PhantomJS() obj.set_page_load_timeout(5) obj.set_window_size('360','360') #设置浏览器宽高为480,800 try:obj.get("http://www.baidu.com")obj.save_screenshot('12.png') #截图 except Exception as e:print e
对页面对话框等做一些操作#coding:utf-8 from selenium import webdriver obj=webdriver.PhantomJS() obj.set_page_load_timeout(50) try:obj.get('http://www.baidu.com')print obj.find_element_by_id("cp").text #获取元素的文本信息obj.find_element_by_id('kw').clear() #清除输入框内容obj.find_element_by_id('kw').send_keys('hello') #在输入框内输入helloobj.find_element_by_id('su').click() #用于点击按钮# obj.find_element_by_id('su').submit() #用于提交表单内容obj.save_screenshot('17.png') except Exception as e:print e
截取全屏#coding:utf8 from selenium import webdriver obj=webdriver.PhantomJS() obj.set_page_load_timeout(5)#设置全屏 obj.maximize_window()try:obj.get("http://www.baidu.com")obj.save_screenshot('11.png') except Exception as e:print e
定位节点和标签#coding:utf8 from selenium import webdriverobj = webdriver.PhantomJS() obj.set_page_load_timeout(5) try:obj.get('http://www.baidu.com')obj.find_element_by_id('kw') # 通过ID定位obj.find_element_by_class_name('s_ipt') # 通过class属性定位obj.find_element_by_name('wd') # 通过标签name属性定位obj.find_element_by_tag_name('input') # 通过标签属性定位obj.find_element_by_css_selector('#kw') # 通过css方式定位obj.find_element_by_xpath("//input[@id='kw']") # 通过xpath方式定位obj.find_element_by_link_text("贴吧") # 通过xpath方式定位print obj.find_element_by_id('kw').tag_name # 获取标签的类型 except Exception as e:print e
添加代理和报头#coding:utf8 from selenium import webdriver from random import choiceuser_agent=["Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.89 Safari/537.36""Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0""Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299""Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50" ]# 引入配置对象DesiredCapabilities from selenium.webdriver.common.desired_capabilities import DesiredCapabilities dcap = dict(DesiredCapabilities.PHANTOMJS) #从USER_AGENTS列表中随机选一个浏览器头,伪装浏览器 dcap["phantomjs.page.settings.userAgent"] = (choice(user_agent)) # 不载入图片,爬页面速度会快很多 dcap["phantomjs.page.settings.loadImages"] = False # 设置代理 service_args = ['--proxy=61.135.217.7:80','--proxy-type=socks5'] #打开带配置信息的phantomJS浏览器 driver = webdriver.PhantomJS( desired_capabilities=dcap,service_args=service_args) # 隐式等待5秒,可以自己调节 driver.implicitly_wait(5) # 设置10秒页面超时返回,类似于requests.get()的timeout选项,driver.get()没有timeout选项 # 以前遇到过driver.get(url)一直不返回,但也不报错的问题,这时程序会卡住,设置超时选项能解决这个问题。 driver.set_page_load_timeout(10) # 设置10秒脚本超时时间 driver.set_script_timeout(10)driver.get('http://ip38.com/') driver.save_screenshot("5.png")
键盘事件
#coding:utf8 from selenium.webdriver.common.keys import Keys from selenium import webdriver obj=webdriver.PhantomJS() obj.set_page_load_timeout(10) try:obj.get('http://www.baidu.com')obj.find_element_by_id('kw').send_keys(Keys.TAB)obj.find_element_by_id('kw').send_keys('hello')obj.find_element_by_id('kw').send_keys(Keys.CONTROL,'a') #ctrl + a 全选输入框内容obj.find_element_by_id('kw').send_keys(Keys.CONTROL,'x') #ctrl + x 剪切输入框内容obj.find_element_by_id('kw').send_keys(u'美女')obj.find_element_by_id('su').send_keys(Keys.ENTER)obj.save_screenshot('18.png')except Exception as e:print e
添加代理ip
#coding:utf8 from selenium import webdriverbrowser=webdriver.PhantomJS() # browser.get('http://ip38.com/') # browser.save_screenshot("3.png") # 截图保存# 利用DesiredCapabilities(代理设置)参数值,重新打开一个sessionId,我看意思就相当于浏览器清空缓存后,加上代理重新访问一次url proxy=webdriver.Proxy() proxy.http_proxy='112.74.32.237:6666' # 将代理设置添加到webdriver.DesiredCapabilities.PHANTOMJS中 proxy.add_to_capabilities(webdriver.DesiredCapabilities.PHANTOMJS) browser.start_session(webdriver.DesiredCapabilities.PHANTOMJS) browser.get('http://ip38.com/') browser.save_screenshot("2.png")# 还原为系统代理 # proxy=webdriver.Proxy() # # proxy.proxy_type=ProxyType.DIRECT # proxy.add_to_capabilities(webdriver.DesiredCapabilities.PHANTOMJS) # browser.start_session(webdriver.DesiredCapabilities.PHANTOMJS) # browser.get('http://ip38.com/')
这篇关于python selenium+phontomjs的详细用法及简单案例的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!