1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
| ''' 功能介绍: 利用爬虫实现图片爬取,输入你想检索的图片调用百度图片api实现检索,并下载文件 ''' import requests import re from urllib import parse import os
class BaiduImageSpider(object): def __init__(self): self.url = 'https://image.baidu.com/search/flip?tn=baiduimage&word={}' self.headers = {'User-Agent': 'Mozilla/4.0'}
def get_image(self, url, word): res = requests.get(url, headers=self.headers) res.encoding = "utf-8" html = res.text print(html) pattern = re.compile('"hoverURL":"(.*?)"', re.S) img_link_list = pattern.findall(html) print(img_link_list) directory = 'D:/study_folder/download/{}/'.format(word) if not os.path.exists(directory): os.makedirs(directory)
i = 1 for img_link in img_link_list: filename = '{}{}.jpg'.format(directory, i) self.save_image(img_link, filename) i += 1
def save_image(self, img_link, filename): html = requests.get(url=img_link, headers=self.headers).content with open(filename, 'wb') as f: f.write(html) print(filename, '下载成功')
def run(self): word = input("您想要谁的照片:") word_parse = parse.quote(word) url = self.url.format(word_parse) self.get_image(url, word)
if __name__ == '__main__': spider = BaiduImageSpider() spider.run()
|