python nhentai图片下载

2018 年 10 月 08 日

6315 次浏览

3735字数

python nhentai图片下载
使用方法
shell下输入python 1.py "要下载的url"


# -*- coding=utf-8
import os
import requests
import _thread
from time import sleep
import re
from bs4 import BeautifulSoup
import sys


urllist = [
' https://nhentai.net/g/238135/ '
' https://nhentai.net/g/235211/ '
]
########################################################################
listLen=0
def main(url):
    def mkdir(mkpath):
        isexists=os.path.exists(mkpath)
        if not isexists:
            print('正在创建目录：',mkpath)
            os.makedirs(mkpath)
            print('创建完成')
            return True
        else:
            return False
    def downimage(url,dirname):
        global listLen         #global定义全局变量
        listLen=listLen+1
        print('开始下载 ',url)
        header={'User-Agent':'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'}
        extensions='.png'
        try:
            req=requests.get(url+extensions,timeout=4,headers=header)
        except:
            print('下载失败',url+extensions)
            print('正在重新下载',url)
            _thread.start_new_thread(downimage,(url,dirname,))  #start_new_thread 多线程处理
            return
        if '404' in req.text:
            extensions='.jpg'
        try:
            req=requests.get(url+extensions,timeout=4,headers=header)
        except:
            print('下载失败',url+extensions)
            print('正在重新下载',url)
            _thread.start_new_thread(downimage,(url,dirname,))
            listLen=listLen-1
            return
        print('完成',url+extensions)
        with open(dirname+extensions,"wb") as img:
            img.write(req.content)
        listLen=listLen-1
    
    def downAllImage(totalNum,url,dirname):  #下载该页面所有图片
        imgdir='D:\\Hentai\\'+dirname+'\\'
        mkdir(imgdir)
        threadList=[]
        for i in range(1,totalNum):
            th=(_thread.start_new_thread(downimage,(url+str(i),imgdir+str(i).zfill(3),)))#启用线程下载单张图片
            threadList.append(th)
            sleep(0.2)#线程之间相隔时间 网速好的可以调小
    def trans(p):
        p=p.replace('|','')   #replace()字符替换 old（旧字符串） 替换成 new(新字符串)
        p=p.replace('?','')
        p=p.replace('*','')
        p=p.replace('<','')
        p=p.replace('>','')
        p=p.replace('/','')
        p=p.replace('\\','')
        p=p.replace('"','')
        p=p.replace('\\','')
        p=p.replace(':','')
        return p
    #防止主线程退出
    global listLen
    listLen=listLen+1
    if(len(url)!=29):
        url=url[:28]
    headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'}
    res=requests.get(url,headers=headers)
    res.encoding='utf-8'
    text=BeautifulSoup(res.text,'html.parser')
    try:
        dirname=text.select('#info h2')[0].text
    except IndexError:
        dirname=text.select('#info h1')[0].text
        print("未找到中文标题 使用英文标题")
    print('开始处理',url)
    print("漫画名  ",dirname)
    dirname=trans(dirname)
    urllist=text.select('.gallerythumb')
    
    #获取第一页的图片的url
    res=requests.get('https://nhentai.net'+urllist[0]['href'],headers=headers)
    res.encoding='utf-8'
    text=BeautifulSoup(res.text,'html.parser')
    imgurl=text.select('#image-container a img')[0]['src']
    downAllImage((len(urllist)),imgurl[:-5],dirname)#下载全篇
    listLen=listLen-1
########################################################################

for url in sys.argv[1:]: #sys.argv[0:]设定用户需要输入的参数0为不输入
    if not(len(url)==0):
        _thread.start_new_thread(main,(url,))
sleep(3)
while listLen is not 0:
    print('剩余下载数量'+str(listLen))
    sleep(2)
print('全部处理完成')

python nhentai图片下载