当前位置：首页 > article >正文

360图片搜索爬虫|批量爬取搜索图片

article 2025/3/6 11:45:51

360图片搜索功能爬虫，根据搜索内容批量获取图片，很简单就能实现

结尾附实现代码

一、抓包与分析

直接在360搜索处点击回车即可抓到

GET请求包 https://image.so.com/i?q=天空&inact=0

q——搜索框输入的内容

搜索包响应结果 1

搜索包响应结果展示 2

在响应中，这一部分的的数据便是图片的下载地址和分辨率

可以看到搜索结果的相关数据

title——unicode编码后的标题

img——图片链接

width、hight——分辨率

二、实现代码

代码运行效果1

代码运行效果2

import requests
import json
import os
from bs4 import BeautifulSoup


def fetch_image_urls(url, headers):
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        print(f"请求失败，状态码：{response.status_code}")
        return []

    soup = BeautifulSoup(response.text, "html.parser")
    script_tag = soup.find("script", {"type": "text/data", "id": "commercialImages"})

    if not script_tag:
        print("未找到标签")
        return []

    try:
        image_data = json.loads(script_tag.string)
        return [img["qhimg_url"] for img in image_data]
    except json.JSONDecodeError:
        print("解析JSON数据失败")
        return []


def download_images(image_urls, headers, folder="data"):
    if not os.path.exists(folder):
        os.makedirs(folder)

    for i, url in enumerate(image_urls):
        try:
            img_response = requests.get(url, headers=headers)
            if img_response.status_code == 200:
                with open(f"{folder}/image_{i}.jpg", "wb") as f:
                    f.write(img_response.content)
                print(f"下载成功: image_{i}.jpg")
            else:
                print(f"下载失败: {url}")
        except Exception as e:
            print(f"下载错误: {e}")


if __name__ == "__main__":
    url = "https://image.so.com/i?q=海洋&inact=0"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
    }
    image_urls = fetch_image_urls(url, headers)
    if image_urls:
        print("图片下载链接：")
        for url in image_urls:
            print(url)
        download_images(image_urls, headers)

查看全文

http://www.kler.cn/a/572897.html