【教程分享】[Python] 百度小说全文爬取

请勿商业运营,违法使用和传播!资源仅供研究学习使用!解压密码为:www.nnymk.com
注:在使用本系统时,使用方必须在国家相关法律法规范围内并经过国家相关部门的授权许可,禁止用于一切非法行为。使用用途仅限于测试、实验、研究为目的,禁止用于一切商业运营,本团队不承担使用者在使用过程中的任何违法行为负责 所有源码请自测!不保证你源码完整性有效性所有源码都是全网搜集
免责声明:本资源并未取得原始权利人的授权,不可商用,仅可用于学习分析底层代码,CSS等,禁止用于商业行为。如因擅自商用引起的相关纠纷及法律责任,由使用人全部承担。支持正版,人人有责,请于下载后24小时内删除,谢谢支持!
如果蓝奏云下载失效请更换最新链接:nnym.lanzouh.com//xxxx 【x改成h】


输入小说的ID号,该ID号可在

【教程分享】[Python] 百度小说全文爬取-牛牛源码库

中查看

默认存放路径D:/小说

import time,os
import requests
import asyncio
import aiohttp
import aiofiles
async def get_chapters_ids(n_id):
book_url = f'https://dushu.baidu.com/api/pc/getCatalog?data=%7B"book_id":{n_id}%7D'
t_start = int(time.time())
tasks =[]
with requests.get(book_url) as respon:
dic = respon.json()
for i in dic['data']['novel']['items']:
title = i['title']
chapter_id = i['cid']
tasks.append(asyncio.create_task(get_chapters(n_id,title,chapter_id)))
await asyncio.wait(tasks)
t_over = int(time.time())
print('下载完毕!')
print('共用时:',t_over-t_start,'秒')
async def get_chapters(n_id,title,chapter_id):
chapter_url = f'https://dushu.baidu.com/api/pc/getChapterContent?data=%7B"book_id":"{n_id}","cid":"{n_id}|{chapter_id}","need_bookinfo":1%7D'
# print(chapter_url)
async with aiohttp.ClientSession() as req:
async with req.get(chapter_url) as respon:
dic = await respon.json()
async with aiofiles.open(f'D:\小说\{title}.txt',mode='w',encoding='utf-8') as f:
await f.write(dic['data']['novel']['content'])
print(title,'下载完成')
if __name__ =='__main__':
if not os.path.exists(r'd:\小说'):
os.mkdir(r'd:\小说')
novel_id = input('输入小说编号:')
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loop.run_until_complete(get_chapters_ids(novel_id))
import time,os
import requests
import asyncio
import aiohttp
import aiofiles
 
async def get_chapters_ids(n_id):
    book_url = f'https://dushu.baidu.com/api/pc/getCatalog?data=%7B"book_id":{n_id}%7D'
    t_start = int(time.time())
    tasks =[]
    with requests.get(book_url) as respon:
        dic = respon.json()
        for i in dic['data']['novel']['items']:
            title = i['title']
            chapter_id = i['cid']
            tasks.append(asyncio.create_task(get_chapters(n_id,title,chapter_id)))
        await asyncio.wait(tasks)
    t_over = int(time.time())
    print('下载完毕!')
    print('共用时:',t_over-t_start,'秒')
 
async def get_chapters(n_id,title,chapter_id):
    chapter_url = f'https://dushu.baidu.com/api/pc/getChapterContent?data=%7B"book_id":"{n_id}","cid":"{n_id}|{chapter_id}","need_bookinfo":1%7D'
    # print(chapter_url)
    async with aiohttp.ClientSession() as req:
        async with req.get(chapter_url) as respon:
            dic = await respon.json()
            async with aiofiles.open(f'D:\小说\{title}.txt',mode='w',encoding='utf-8') as f:
                await f.write(dic['data']['novel']['content'])
    print(title,'下载完成')
 
if __name__ =='__main__':
    if not os.path.exists(r'd:\小说'):
        os.mkdir(r'd:\小说')
    novel_id = input('输入小说编号:')
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    loop.run_until_complete(get_chapters_ids(novel_id))
import time,os import requests import asyncio import aiohttp import aiofiles async def get_chapters_ids(n_id): book_url = f'https://dushu.baidu.com/api/pc/getCatalog?data=%7B"book_id":{n_id}%7D' t_start = int(time.time()) tasks =[] with requests.get(book_url) as respon: dic = respon.json() for i in dic['data']['novel']['items']: title = i['title'] chapter_id = i['cid'] tasks.append(asyncio.create_task(get_chapters(n_id,title,chapter_id))) await asyncio.wait(tasks) t_over = int(time.time()) print('下载完毕!') print('共用时:',t_over-t_start,'秒') async def get_chapters(n_id,title,chapter_id): chapter_url = f'https://dushu.baidu.com/api/pc/getChapterContent?data=%7B"book_id":"{n_id}","cid":"{n_id}|{chapter_id}","need_bookinfo":1%7D' # print(chapter_url) async with aiohttp.ClientSession() as req: async with req.get(chapter_url) as respon: dic = await respon.json() async with aiofiles.open(f'D:\小说\{title}.txt',mode='w',encoding='utf-8') as f: await f.write(dic['data']['novel']['content']) print(title,'下载完成') if __name__ =='__main__': if not os.path.exists(r'd:\小说'): os.mkdir(r'd:\小说') novel_id = input('输入小说编号:') loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) loop.run_until_complete(get_chapters_ids(novel_id))
------本页内容已结束,喜欢请分享------

感谢您的来访,获取更多精彩文章请收藏本站。

© 版权声明
THE END
喜欢就支持一下吧
点赞9 分享
As long as there s tomorrow, today s always the startng lne.
只要还有明天,今天就永远是起跑线
评论 抢沙发
头像
欢迎您留下宝贵的见解!
提交
头像

昵称

取消
昵称表情代码图片

    暂无评论内容