python批量爬取下载抖音视频
Python  /  管理员 发布于 7年前   244
本文实例为大家分享了python批量爬取下载抖音视频的具体代码,供大家参考,具体内容如下
import osimport requestsimport reimport sysimport asyncioimport aiohttpheaders = { 'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) ' 'Version/11.0 Mobile/15A372 Safari/604.1'}VIDEO_URLS, PAGE = [], 1def get_info(url): """ :param url: 用户的链接 :return:返回name,dytk,user_id 参数 """ name = None dytk = None user_id = None try: response = requests.get(url, headers=headers) user_id = response.url.split('/')[5].split('?')[0] name = re.search(r'class="nickname">(.*?)<', response.text)[1] dytk = re.search(r"dytk: '(.*?)'", response.text)[1] except (TypeError, IndexError): sys.stdout.write('Waring:输入的链接错误') except requests.exceptions: sys.stdout.write('Waring:链接错误') finally: return name, user_id, dytkdef make_dir(name): """ 建立文件夹 :param name: 用户名称 :return: """ if not os.path.isdir(name): os.mkdir(name) else: passdef get_all_video(user_id, max_cursor, dytk): """ 获取视频的地址 :param user_id: :param max_cursor: :param dytk: :return: """ url = "https://www.amemv.com/aweme/v1/aweme/post/?" params = {'user_id': user_id, 'count': 21, 'max_cursor': max_cursor, 'dytk': dytk} try: response = requests.get(url=url, params=params, headers=headers) if response.status_code == 200: datas = response.json() for data in datas['aweme_list']: name = data.get('share_info').get('share_desc') url = data.get('video').get('play_addr').get('url_list')[0].replace('playwm', 'play') VIDEO_URLS.append([name, url]) if datas['has_more'] == 1 and datas.get('max_cursor') != 0: global PAGE print(f'收集第{PAGE}页视频') PAGE += 1 return get_all_video(user_id, datas.get('max_cursor'), dytk) else: print('收集完成') return VIDEO_URLS else: print('状态码:', response.status_code) return None except Exception as e: print('Waring:', e) returnasync def download_video(index, name, video_name, url): """ 下载视频 :param index: 视频id :param name: 用户名称 :param video_name: 视频名称 :param url: 下载url :return: """ print(f'正在下载第{index}个视频:{video_name}') video_path = '{}/{}.mp4'.format(name, video_name) if not os.path.isfile(video_path): try: async with aiohttp.ClientSession() as session: async with session.get(url=url, headers=headers, ssl=False) as response: with open(video_path, 'wb') as f:while True: chunk = await response.content.read(1024) f.write(chunk) if not chunk: breakprint(f'下载完成第{index}个视频:{video_name}') except Exception as e: print('waring:download faild', video_name, e) return else: print('文件已存在')def main(): url = 'http://v.douyin.com/dEorkn/' name, user_id, dytk = get_info(url) if not (name, user_id, dytk): return make_dir(name) get_all_video(user_id, 0, dytk) print(f'{name}:总共有{len(VIDEO_URLS)}个视频') tasks = [] for index, item in enumerate(VIDEO_URLS, 1): video_name = item[0] url = item[1] tasks.append(asyncio.ensure_future(download_video(index, name, video_name, url))) loop = asyncio.get_event_loop() loop.run_until_complete(asyncio.wait(tasks)) loop.run_until_complete(asyncio.sleep(0)) loop.close() print(f'{name}视频下载完成!')if __name__ == '__main__': main()
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持。
122 在
学历:一种延缓就业设计,生活需求下的权衡之选中评论 工作几年后,报名考研了,到现在还没认真学习备考,迷茫中。作为一名北漂互联网打工人..123 在
Clash for Windows作者删库跑路了,github已404中评论 按理说只要你在国内,所有的流量进出都在监控范围内,不管你怎么隐藏也没用,想搞你分..原梓番博客 在
在Laravel框架中使用模型Model分表最简单的方法中评论 好久好久都没看友情链接申请了,今天刚看,已经添加。..博主 在
佛跳墙vpn软件不会用?上不了网?佛跳墙vpn常见问题以及解决办法中评论 @1111老铁这个不行了,可以看看近期评论的其他文章..1111 在
佛跳墙vpn软件不会用?上不了网?佛跳墙vpn常见问题以及解决办法中评论 网站不能打开,博主百忙中能否发个APP下载链接,佛跳墙或极光..
Copyright·© 2019 侯体宗版权所有·
粤ICP备20027696号