1、安装依赖库
pip install requests urllib3
pip install image pillow
pip install beautifulsoup4
2、爬取图片的代码
import requests
import urllib3
import urllib.parse
import os
from io import BytesIO
from PIL import Image
from bs4 import BeautifulSoup
url = "URL"
http = urllib3.PoolManager()
header = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0"}
request = http.request('POST', url, headers=header)
'''以下测试用的
#响应数据
#print(request.data.decode())
#响应头信息
#print(request.headers)
#状态码
#print(request.status)
'''
data = request.data.decode()
soup = BeautifulSoup(data,"html.parser")
i = 0
for img_tag in soup.find_all("img"):
i += 1
image_url = img_tag.get("src")
output_folder = r"C:\Users\zzx\Desktop\图片"+"/"
if not os.path.exists(output_folder):
os.makedirs(output_folder)
if image_url:
image_url_http = urllib.parse.urljoin(url,image_url)
resp = requests.get(image_url_http,stream=True)
try:
image_data = BytesIO(resp.content)
image = Image.open(image_data)
if '.gif' not in image_url_http.split("/")[-1]:
image.save(os.path.join(output_folder, f'image_{i}.png'))
print(f"图像已成功保存到 {os.path.join(output_folder, f'image_{i}.png')}")
elif '.gif' in image_url_http.split("/")[-1]:
image.save(os.path.join(output_folder, f'image_{i}.gif'))
print(f"图像已成功保存到 {os.path.join(output_folder, f'image_{i}.gif')}")
except Exception as e :
print(f"保存图像时出错:{e}")
try:
if image.mode in ('RGBA', 'LA'):
image = image.convert('RGB')
image.save(os.path.join(output_folder, f'image_{i}.jpg'))
print(f"图像已成功保存到 {os.path.join(output_folder, f'image_{i}.jpg')}")
except Exception as e:
print(f"转换并保存图像时出错: {e}")
print(i,image_url)
3、效果图