本文主要是介绍bilibili上爬取视频,希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!
一、bilibili视频的URL结构
人员素质测评理论与方法-06090-人力资源管理本科-江苏省高等教育自学考试_哔哩哔哩_bilibili
URL+P(参数)
二、编写程序输入URL和起始终止P
# 输入bilibili视频的BV号# bv = input('视频BV号:')bv = 'BV15v411k75j'url = 'https://www.bilibili.com/video/' + bv# 选择视频从第几p开始到第几p结束# startPart = input('起始P:')startPart = 58# endPart = input('终止P;')endPart = 100
三、解析网页Chrome+F12,在<head>标签里,找到包含‘baseUrl’的<script>标签 具体是第几个,修改://head/script[5]/text()
ideoPlayInfo = str(_element.xpath('//head/script[5]/text()')[0].encode('utf-8').decode('utf-8'))[20:]
找到User-Agent,替换headers
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36','Refer''er': 'https://www.bilibili.com/'}
四、下载视频和音频
区分GET和OPTION模式
五、合并视频和音频
ffmpeg实现时频和音频合并
def combineVideoAudio(videopath, audiopath, outpath):#command = 'E:\\FFmpeg\\bin\\ffmpeg.exe -i ' + '"' + audioDir + '"' ' -i ' + '"' + videoDir + '"' + ' -acodec copy -vcodec copy ' + '"' + outDir + '"'subprocess.call(("E:/ffmpeg/bin/ffmpeg -i " + videopath + " -i " + audiopath + " -vcodec copy -acodec copy " + outpath).encode("utf-8").decode("utf-8"), shell=True)os.remove(videopath)os.remove(audiopath)
整体下载源代码:
import jsonimport osimport subprocessimport requestsfrom lxml import etreeimport shutil
# 防止因https证书问题报错requests.packages.urllib3.disable_warnings()headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3970.5 Safari/537.36','Refer''er': 'https://www.bilibili.com/'}'''获取bilibili视频的主要函数@param url 视频页面url 结构为:url?参数@param p 视频p数@param bv 视频bv数'''def getBiliBiliVideo(url, p, bv):session = requests.session()res = session.get(url=url, headers=headers, verify=False)_element = etree.HTML(res.content)# 获取window.__playinfo__的json对象,[20:]表示截取'window.__playinfo__='后面的json字符串videoPlayInfo = str(_element.xpath('//head/script[5]/text()')[0].encode('utf-8').decode('utf-8'))[20:]videoJson = json.loads(videoPlayInfo)# 获取视频链接和音频链接try:# 2018年以后的b站视频由.audio和.video组成videoURL = videoJson['data']['dash']['video'][0]['baseUrl']audioURl = videoJson['data']['dash']['audio'][0]['baseUrl']flag = 0except Exception:# 2018年以前的b站视频音频视频结合在一起,后缀为.flvvideoURL = videoJson['data']['durl'][0]['url']flag = 1# 指定文件生成目录,如果不存在则创建目录dirname = ("E:/result").encode("utf-8").decode("utf-8")if not os.path.exists(dirname):os.makedirs(dirname)print('文件夹创建成功!')# 获取每一集的名称name = bv + "-" + str(p)# 下载视频和音频print('正在下载 "' + name + '" 的视频····')fileDownload(homeurl=url, url=videoURL, name='E:/result/' + name + '_Video.mp4', session=session)if flag == 0:print('正在下载 "' + name + '" 的音频····')fileDownload(homeurl=url, url=audioURl, name='E:/result/' + name + '_Audio.mp3', session=session)print('正在组合 "' + name + '" 的视频和音频····')combineVideoAudio('E:/result/' + name + '_Video.mp4', 'E:/result/' + name + '_Audio.mp3','E:/result/Bilibili' + name + '_output.mp4')print(' "' + name + '" 下载完成!')'''使用session保持会话下载文件@param homeurl 访问来源@param url 音频或视频资源的链接@param name 下载后生成的文件名@session 用于保持会话
'''def fileDownload(homeurl, url, name, session=requests.session()):# 添加请求头键值对,写上 refered:请求来源headers.update({'Referer': homeurl})# 发送option请求服务器分配资源session.options(url=url, headers=headers, verify=False)# 指定每次下载1M的数据begin = 0end = 1024 * 512 - 1flag = 0while True:# 添加请求头键值对,写上 range:请求字节范围headers.update({'Range': 'bytes=' + str(begin) + '-' + str(end)})# 获取视频分片res = session.get(url=url, headers=headers, verify=False)if res.status_code != 416:# 响应码不为为416时有数据begin = end + 1end = end + 1024 * 512else:headers.update({'Range': str(end + 1) + '-'})res = session.get(url=url, headers=headers, verify=False)flag = 1with open(name.encode("utf-8").decode("utf-8"), 'ab') as fp:fp.write(res.content)fp.flush()# data=data+res.contentif flag == 1:fp.close()break'''用于合并音频与视频@param videopath 视频路径@param audiopath 音频路径@param outpath 生成合并视频的路径'''def combineVideoAudio(videopath, audiopath, outpath):#command = 'E:\\FFmpeg\\bin\\ffmpeg.exe -i ' + '"' + audioDir + '"' ' -i ' + '"' + videoDir + '"' + ' -acodec copy -vcodec copy ' + '"' + outDir + '"'subprocess.call(("E:/ffmpeg/bin/ffmpeg -i " + videopath + " -i " + audiopath + " -vcodec copy -acodec copy " + outpath).encode("utf-8").decode("utf-8"), shell=True)os.remove(videopath)os.remove(audiopath)if __name__ == '__main__':# 输入bilibili视频的BV号# bv = input('视频BV号:')bv = 'BV15v411k75j'url = 'https://www.bilibili.com/video/' + bv# 选择视频从第几p开始到第几p结束# startPart = input('起始P:')startPart = 58# endPart = input('终止P;')endPart = 100for p in range(int(startPart), int(endPart) + 1):getBiliBiliVideo(url + '?p=' + str(p), p, bv)
这篇关于bilibili上爬取视频的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!