1 import os 2 import requests 3 from urllib.parse import urlencode 4 from hashlib import md5 5 from multiprocessing.pool import Pool 6 7 GROUP_START = 1 8 GROUP_END = 5 9 10 def get_page(offset):11 params = {12 'offset': offset,13 'format': 'json',14 'keyword': '街拍',15 'autoload': 'true',16 'count': '20',17 'cur_tab': '3',18 'from': 'gallery',19 }20 url = 'https://www.toutiao.com/search_content/?' + urlencode(params)21 try:22 response = requests.get(url)23 if response.status_code == 200:24 return response.json()25 except requests.ConnectionError:26 return None27 28 def get_images(json):29 data = json.get('data')30 if data:31 for item in data:32 # print(item)33 image_list = item.get('image_list')34 title = item.get('title')35 # print(image_list)36 for image in image_list:37 yield {38 'image': image.get('url'),39 'title': title40 }41 42 def save_image(item):43 if not os.path.exists(item.get('title')):44 os.mkdir(item.get('title'))45 try:46 local_image_url = item.get('image')47 new_image_url = local_image_url.replace('list','large')48 response = requests.get('http:' + new_image_url)49 if response.status_code == 200:50 file_path = '{0}/{1}.{2}'.format(item.get('title'), md5(response.content).hexdigest(), 'jpg')51 if not os.path.exists(file_path):52 with open(file_path, 'wb')as f:53 f.write(response.content)54 else:55 print('Already Downloaded', file_path)56 except requests.ConnectionError:57 print('Failed to save image')58 59 def main(offset):60 json = get_page(offset)61 for item in get_images(json):62 print(item)63 save_image(item)64 65 if __name__ == '__main__':66 pool = Pool()67 groups = ([x * 20 for x in range(GROUP_START, GROUP_END + 1)])68 pool.map(main, groups)69 pool.close()70 pool.join()