python爬虫

博主：憶の年
发布时间：2018 年 10 月 08 日
2882 次浏览
暂无评论
727字数
分类：技术

用python爬取连接并保存为txt


# -*- coding=utf-8
import requests
import os
from bs4 import BeautifulSoup

headers = {'user-agent':'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}

url="https://nhentai.net/search/?q=doujinshi+full+color+chinese+"

resp = requests.get(url=url, headers=headers)
resp.encoding = 'UTF-8'
soup = BeautifulSoup(resp.text, 'html.parser')
#抓取图片首页连接
for news in soup.select('.gallery'):
    a = news.select('a')[0]['href']     #取出class=gallery元素下的a标签的href
    url1="https://nhentai.net"+a
    print(url1)

    filename = 'write_data.txt'
    with open(filename,'a') as f: # 如果filename不存在会自动创建， 'w'表示写数据，写之前会清空文件中的原有数据！ a表示追加写入
        f.write(url1+'\r\n')
        f.close()