请注意,本文编写于 2236 天前,最后修改于 2236 天前,其中某些信息可能已经过时。
python nhentai图片下载
使用方法
shell下输入python 1.py "要下载的url"
# -*- coding=utf-8
import os
import requests
import _thread
from time import sleep
import re
from bs4 import BeautifulSoup
import sys
urllist = [
' https://nhentai.net/g/238135/ '
' https://nhentai.net/g/235211/ '
]
########################################################################
listLen=0
def main(url):
def mkdir(mkpath):
isexists=os.path.exists(mkpath)
if not isexists:
print('正在创建目录:',mkpath)
os.makedirs(mkpath)
print('创建完成')
return True
else:
return False
def downimage(url,dirname):
global listLen #global定义全局变量
listLen=listLen+1
print('开始下载 ',url)
header={'User-Agent':'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'}
extensions='.png'
try:
req=requests.get(url+extensions,timeout=4,headers=header)
except:
print('下载失败',url+extensions)
print('正在重新下载',url)
_thread.start_new_thread(downimage,(url,dirname,)) #start_new_thread 多线程处理
return
if '404' in req.text:
extensions='.jpg'
try:
req=requests.get(url+extensions,timeout=4,headers=header)
except:
print('下载失败',url+extensions)
print('正在重新下载',url)
_thread.start_new_thread(downimage,(url,dirname,))
listLen=listLen-1
return
print('完成',url+extensions)
with open(dirname+extensions,"wb") as img:
img.write(req.content)
listLen=listLen-1
def downAllImage(totalNum,url,dirname): #下载该页面所有图片
imgdir='D:\\Hentai\\'+dirname+'\\'
mkdir(imgdir)
threadList=[]
for i in range(1,totalNum):
th=(_thread.start_new_thread(downimage,(url+str(i),imgdir+str(i).zfill(3),)))#启用线程下载单张图片
threadList.append(th)
sleep(0.2)#线程之间相隔时间 网速好的可以调小
def trans(p):
p=p.replace('|','') #replace()字符替换 old(旧字符串) 替换成 new(新字符串)
p=p.replace('?','')
p=p.replace('*','')
p=p.replace('<','')
p=p.replace('>','')
p=p.replace('/','')
p=p.replace('\\','')
p=p.replace('"','')
p=p.replace('\\','')
p=p.replace(':','')
return p
#防止主线程退出
global listLen
listLen=listLen+1
if(len(url)!=29):
url=url[:28]
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'}
res=requests.get(url,headers=headers)
res.encoding='utf-8'
text=BeautifulSoup(res.text,'html.parser')
try:
dirname=text.select('#info h2')[0].text
except IndexError:
dirname=text.select('#info h1')[0].text
print("未找到中文标题 使用英文标题")
print('开始处理',url)
print("漫画名 ",dirname)
dirname=trans(dirname)
urllist=text.select('.gallerythumb')
#获取第一页的图片的url
res=requests.get('https://nhentai.net'+urllist[0]['href'],headers=headers)
res.encoding='utf-8'
text=BeautifulSoup(res.text,'html.parser')
imgurl=text.select('#image-container a img')[0]['src']
downAllImage((len(urllist)),imgurl[:-5],dirname)#下载全篇
listLen=listLen-1
########################################################################
for url in sys.argv[1:]: #sys.argv[0:]设定用户需要输入的参数0为不输入
if not(len(url)==0):
_thread.start_new_thread(main,(url,))
sleep(3)
while listLen is not 0:
print('剩余下载数量'+str(listLen))
sleep(2)
print('全部处理完成')