本文主要是介绍网易云歌单爬取并保存,希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!
爬取网易云2011-2017melon 年榜歌单并保存
# -*- coding: utf-8 -*-
"""
Created on Thu Jan 25 19:57:09 2018@author: marson
"""import requests
from bs4 import BeautifulSoup
import json
import create_songlistdef get_ist(i,id):#i='2011'#id='560117127'play_url = 'http://music.163.com/playlist?id='+str(id)#560117127 --2011#560095569 --2012#19020312 --2013#559454416 --2014#159677693 --2015#560080737 --2016#2074743371 --2017s = requests.session()s = BeautifulSoup(s.get(play_url,headers = headers).content,'lxml')main = s.find('ul',{'class':'f-hide'})f = open('E:\\python\\melon\\榜单\\'+str(i)+'.txt','a',encoding='utf-8') #保存本地m=1for music in main.find_all('a'):#print('{} '.format(music['href']))singer_url = 'http://music.163.com'+music['href']#print(singer_url)se = requests.session()se = BeautifulSoup(se.get(singer_url,headers = headers).content,'lxml')des=se.find('script',type="application/ld+json").get_text()desb = json.loads(des)singer = desb['description'].split('。')[0].split(':')[1]album = desb['description'].split('。')[1].split(':')[1]sg = singer.split(',')for ss in sg:f.write(str(m)+'|'+desb['title']+'|'+ss+'|'+album+'|'+str(i)+'\n')data.append((m,desb['title'],ss,album,i))m=m+1#print (str(m),desb['title'],desb['description'])f.close()if __name__ == '__main__': headers = {'Referer':'http://music.163.com/','Host':'music.163.com','User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Firefox/38.0 Ic','Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'}bd=[560117127,560095569,19020312,559454416,159677693,560080737,2074743371] #歌单列表i = 2011data = []create_songlist.create_table()for id in bd:get_ist(i,id)i = i+1create_songlist.insert_table(data)df = create_songlist.get_data()
create_songlist 文件
import sqlite3
import pandas as pd
def create_table():conn = sqlite3.connect('melon.db')print ("Opened database successfully")c = conn.cursor()c.execute('''CREATE TABLE MUSIC(ID INT NOT NULL,song TEXT NOT NULL,singer INT NOT NULL,album TEXT,year INT);''')print ("Table created successfully")conn.commit()conn.close()def insert_table(data): conn = sqlite3.connect('melon.db')c = conn.cursor()print("Opened database successfully")sql_word = "INSERT INTO MUSIC (ID,song,singer,album,year) VALUES (?,?,?,?,?);"c.executemany(sql_word,data)conn.commit()print( "Records created successfully")conn.close()def get_data():conn = sqlite3.connect('melon.db')#c = conn.cursor()print ("Opened database successfully")sql_word = "SELECT * from MUSIC"#cursor = c.execute("SELECT * from MUSIC")#cursor.fetchall()data1 = pd.read_sql(sql_word,conn)conn.close()return data1def test(): conn = sqlite3.connect('melon.db')c = conn.cursor()c.execute("select singer,count(*) from music group by 1 having count(*)>5 order by 2 desc")AN = c.fetchall() conn.close()print (AN)
这篇关于网易云歌单爬取并保存的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!