本文主要是介绍5i5j的房屋出租爬取,希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!
# 导入模块
import requests
from lxml import etree
import time
# 导入mysql封装的class类
from mysql import MysqlHelper
# 实例化mysql类
mc = MysqlHelper()
def wujia(url,headers):# 定义要爬取的页数for i in range(1,21):fullurl = url.format(i)response = requests.get(fullurl,headers=headers)html = response.texthtml = etree.HTML(html)ul_list = html.xpath('//ul[@class="pList"]/li')# print(ul_list)for i in ul_list:# print(i)# 获取地址dizhi = i.xpath('./div[2]/div/p[2]/text()')# print(dizhi)if dizhi:dizhi = dizhi[0].replace('·','').strip()print(dizhi)else:print('无地址')#获取租房介绍room_data = i.xpath('./div[2]/div[1]/p[1]/text()')[0].replace(' ','').replace('·','')print(room_data)# 获取价钱/月money = i.xpath('./div[2]/div[1]/div//strong/text()')[0]money = int(money)print(money)
#写sql语句,存入数据库sql = 'insert into 5i5j(dizhi,room_data,money) values ("%s","%s","%s")'data = (dizhi,room_data,money)#调用mysql类中的执行方法mc.execute_modify_sql(sql,data)
#设置休息时间
# time.sleep(3)
#执行函数和传参
if __name__ == '__main__':url = 'https://bj.5i5j.com/zufang/huilongguan/n{}/'headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',# 'Accept-Encoding': 'gzip, deflate, br',# 'Accept-Language': 'zh-CN,zh;q=0.9','Cache-Control': 'max-age=0','Connection': 'keep-alive','Cookie': '_Jo0OQK=1349C4397FAF634DC28494B7372F7EE6A6BAFA7626819FB7027758023F7044ABBF04A8994B1C24AC6546C9F8AD8FBB6099D9577A4343988ABD03B1C034CCCF5A512DE8682CA7D10E3B498FB9E3C853EFEE298FB9E3C853EFEE215D8BEE34E43E5C0GJ1Z1IQ==; _ga=GA1.2.1741003594.1534567087; _gid=GA1.2.429105984.1534567087; yfx_c_g_u_id_10000001=_ck18081812380714557549321883736; PHPSESSID=a1eros861f9teh5n4r6lv6f30j; Hm_lvt_94ed3d23572054a86ed341d64b267ec6=1534567088,1534572065; zufang_BROWSES=41290006; domain=bj; yfx_f_l_v_t_10000001=f_t_1534567087448__r_t_1534567087448__v_t_1534591789846__r_c_0; Hm_lpvt_94ed3d23572054a86ed341d64b267ec6=1534591798','Host': 'bj.5i5j.com','Referer': 'https://bj.5i5j.com/zufang/huilongguan/n2/','Upgrade-Insecure-Requests': '1','User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',}wujia(url,headers)
这篇关于5i5j的房屋出租爬取的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!