当前位置:网站首页>How do programmers without objects spend the Chinese Valentine's Day
How do programmers without objects spend the Chinese Valentine's Day
2022-08-05 01:52:00 【Z_Xshan】

Run the code and enter the connection to crawl the video 直接上代码
# encoding: utf-8
'''
爬取b站视频
'''
import requests
import json
import re
import os
class BilibiliVideoSpider(object):
def __init__(self, url, output_root=''):
self.url = url
if not os.path.isdir(output_root):
output_root = os.path.abspath(os.path.dirname(__file__))
self.output_root = output_root
self.headers = {
'Accept': '*/*',
'Accept-Language': 'en-US,en;q=0.5',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'
} # 定义请求头
def _match(self, text, pattern):
match = re.search(pattern, text)
if match is None:
print('this pattern was not matched !')
return json.loads(match.group(1))
def getHtml(self):
try:
response = requests.get(url=self.url, headers=self.headers) # 发请求,拿数据 (获取响应对象)
print(f'status_code: {response.status_code}')
if response.status_code == 200:
return response
except RequestException:
print('html reques error !')
def parseHtml(self, response):
playinfo = self._match(response.text, '__playinfo__=(.*?)</script><script>') # 视频详情json
initial_state = self._match(response.text, r'__INITIAL_STATE__=(.*?);\(function\(\)') # 视频内容json
video_url = playinfo['data']['dash']['video'][0]['baseUrl'] # 视频分多种格式,直接取分辨率最高的视频 1080p
audio_url = playinfo['data']['dash']['audio'][0]['baseUrl'] # get audio address
video_name = initial_state['videoData']['title'] # Take the video name
return video_url, audio_url, video_name
def video_audio_merge(self, video_src, audio_src, video_dst):
'''使用ffmpeg单个视频音频合并'''
import subprocess
command = 'ffmpeg -i %s_video.mp4 -i %s_audio.mp4 -c copy %s.mp4 -y -loglevel quiet' % (
video_src, audio_src, video_dst)
subprocess.Popen(command, shell=True)
def downloadVideo(self, video_url, audio_url, video_name):
self.headers.update({"Referer": self.url})
print('开始下载视频: ')
video_content = requests.get(video_url, headers=self.headers)
audio_content = requests.get(audio_url, headers=self.headers)
print('%s视频大小:' % video_name, video_content.headers['content-length'])
print('%s音频大小:' % video_name, audio_content.headers['content-length'])
# 下载视频
received_video = 0
video = f'{self.output_root}/video.mp4'
with open(video, 'ab') as output:
while int(video_content.headers['content-length']) > received_video:
self.headers['Range'] = 'bytes=' + str(received_video) + '-'
response = requests.get(video_url, headers=self.headers)
output.write(response.content)
received_video += len(response.content)
# 下载音频开始
audio_content = requests.get(audio_url, headers=self.headers)
received_audio = 0
audio = f'{self.output_root}/audio.mp4'
with open(audio, 'ab') as output:
while int(audio_content.headers['content-length']) > received_audio:
self.headers['Range'] = 'bytes=' + str(received_audio) + '-'
response = requests.get(audio_url, headers=self.headers)
output.write(response.content)
received_audio += len(response.content)
print('视频下载完成')
video_dst = f'{self.output_root}/download.mp4'
self.video_audio_merge(video, audio, video_dst)
print(f'下载的视频: {video_dst}')
os.remove(video)
os.remove(audio)
def video_audio_merge(self, video_src, audio_src, video_dst):
'''使用ffmpeg单个视频音频合并'''
cmd = f'ffmpeg -y -i {audio_src} -i {video_src} -vcodec copy -acodec aac -strict -2 -q:v 1 {video_dst}'
print('execute cmd:', cmd)
os.system(cmd)
# subprocess.Popen(command, shell=True)
def run(self):
response = self.getHtml()
video_url, audio_url, video_name = self.parseHtml(response)
self.downloadVideo(video_url, audio_url, video_name)
def demo():
# url = 'https://www.bilibili.com/video/BV1Q5411p7bz?from=search&seid=14643382716113842219'
url=input('请输入视频地址:')
b = BilibiliVideoSpider(url)
b.run()
if __name__ == '__main__':
demo()
Then save it to a file in the current path
边栏推荐
- 第09章 性能分析工具的使用【2.索引及调优篇】【MySQL高级】
- AI+小核酸药物|Eleven完成2200万美元种子轮融资
- Object.defineProperty实时监听数据变化并更新页面
- day14--postman interface test
- 多线程涉及的其它知识(死锁(等待唤醒机制),内存可见性问题以及定时器)
- Use of pytorch: Convolutional Neural Network Module
- 蓝牙Mesh系统开发五 ble mesh设备增加与移除
Utilities - 新唐NUC980使用记录:在用户应用中使用GPIO
- Bit rate vs. resolution, which one is more important?
猜你喜欢
随机推荐
.Net C# 控制台 使用 Win32 API 创建一个窗口
执掌图表
dotnet 6 为什么网络请求不跟随系统网络代理变化而动态切换代理
The use of pytorch: temperature prediction using neural networks
[Word] #() error occurs after Word formula is exported to PDF
从一次数据库误操作开始了解MySQL日志【bin log、redo log、undo log】
【Endnote】Word插入自定义形式的Endnote文献格式
【Redis】Linux下Redis安装
多线程涉及的其它知识(死锁(等待唤醒机制),内存可见性问题以及定时器)
基于OpenVINO工具套件简单实现YOLOv7预训练模型的部署
MySQL学习
Knowledge Points for Network Planning Designers' Morning Questions in November 2021 (Part 2)
[parameters of PyQT5 binding functions]
Chapter 09 Use of Performance Analysis Tools [2. Index and Tuning] [MySQL Advanced]
Day Fourteen & Postman
新唐NUC980使用记录:在用户应用中使用GPIO
PHP Skills Assessment
记录谷歌gn编译时碰到的一个错误“I could not find a “.gn“ file ...”
GCC: Shield dependencies between dynamic libraries
张驰咨询:揭晓六西格玛管理(6 Sigma)长盛不衰的秘密



![[Word] #() error occurs after Word formula is exported to PDF](/img/6a/168ded19d22826b6af974907f480fb.png)





