当前位置: 首页 > article >正文

爬虫 APP 逆向 ---> shopee(虾皮) 电商

shopee 泰国站点:https://shopee.co.th/

shopee 网页访问时,直接弹出使用 app 登录查看,那就登录 shopee 泰国站点 app。

手机抓包:分类接口

接口:https://mall.shopee.co.th/api/v4/pages/get_category_tree

请求参数:

GET /api/v4/pages/get_category_tree HTTP/2
Host: mall.shopee.co.th
Cookie: 请求的cookie
X-Api-Source: rn
X-Shopee-Language: en
If-None-Match-: 55b03-158a0f98f80a09649f8dcfd2f8d263d4
X-Shopee-Client-Timezone: Asia/Shanghai
Cache-Control: no-cache, no-store
Client-Request-Id: 866aa1ae-8ad1-4fdb-ab12-5c1cd5326645.379
User-Agent: Android app Shopee appver=33835 app_type=1 platform=native_android os_ver=31 Cronet/102.0.5005.61
Af-Ac-Enc-Sz-Token:  一大串字符
X-Sap-Ri: 616c6b67968b98bfd88b87180110349e24abfda49173147d6085
D42920b3: 一大串字符
D609da9f: CTEoMyxcrcDaO0/OkIoS0bVWFEb=
651bb94a: 0+eWOk+Gy1GgoMssBoNnarzOM8E=
Referer: https://mall.shopee.co.th/
Accept-Encoding: gzip, deflate, br

Cookie 不用说,登录后的授权,其他其他参数经分析都是在 app 中生成的。

对于 app 中生成,解决方法有三个

  • 1. 直接逆向参数,然后使用 python 还原
  • 2. 把 java 代码扣出来,补 java 代码,然后编译成 jar 包,python 调用 jar 包生成
  • 3. 直接通过 frida 的 rpc ,hook 住关键函数,python 通过 http 服务去调用

这里使用 第三种方法:通过 frida 的 rpc

import time
import json
import frida
import uvicorn
import subprocess
from pathlib import Path
from fastapi import FastAPI, Request


js_code = """
console.log("脚本载入成功");
function call_request_defense(req_url) { //定义导出函数
    let ret_result;
    Java.perform(function x() {
        console.log("注入成功");
        console.log(`req_url ---> ${req_url}`);
        let ret_hashtable;
        let TargetClass = "com.shopee.shpssdk.SHPSSDK";
        // 遍历并操作目标类的实例
        Java.choose(TargetClass, {
            onMatch: function (instance) {
                console.log("Instance found: " + instance);
                // 调用实例方法
                ret_hashtable = instance.requestDefense(req_url, null)
                console.log(`ret_hashtable ---> ${ret_hashtable}`);
            },
            onComplete: function () {
                console.log("Search complete");
            }
        });
        let obj = {}
        let key_set = ret_hashtable.keySet();
        let key_set_it = key_set.iterator();
        while (key_set_it.hasNext()) {
            let key_str = key_set_it.next().toString();
            obj[key_str] = ret_hashtable.get(key_str).toString();
        }
        ret_result = JSON.stringify(obj);
        return ret_result;
    });
    return ret_result;
}
rpc.exports = {
    // 导出名callfunc1中不能有大写字母或者下划线
    callfunc1: call_request_defense
};
"""


def my_message_handler(message, payload):
    print(message)
    print(payload)


# 执行 Frida 脚本的函数
def run_frida_script():
    try:
        subprocess.run("adb forward tcp:27042 tcp:27042", shell=True)
        # device = frida.get_usb_device()
        device = frida.get_remote_device()
        time.sleep(3)  # 睡眠3秒, 防止程序运行过快从而导致附加不上
        # session = device.attach('蝦皮購物')
        session = device.attach(31785)
        time.sleep(3)
        script = session.create_script(js_code)
        script.load()

        def on_message(message, payload):
            print(message)
            print(payload)

        script.on('message', on_message)
        return script
    except Exception as e:
        print(f'frida 失败 ---> {e}')


app = FastAPI()
global_script = None


@app.post("/get_req_args")
async def root(request: Request):
    data_dict = await request.json()
    req_url = 'https://mall.shopee.sg/api/v4/pdp/get?_pft=127&apm_fs=true&apm_p=7&apm_pid=shopee%2F%40shopee-rn%2Fproduct-page%2FPRODUCT_PAGE&apm_ts=1732599014952&from_source=dd&item_id=25766030491&pdp_type=0&shop_id=1197286982&tz_offset_minutes=480'
    req_url = data_dict['req_url']
    global global_script
    if not global_script:
        global_script = run_frida_script()
    json_string = global_script.exports_sync.callfunc1(req_url, None)
    json_data = json.loads(json_string)
    print(f'req_args ---> {json_data}')
    return json_data


def main():
    uvicorn.run(f'{Path(__file__).stem}:app', host="0.0.0.0", port=9500)


if __name__ == '__main__':
    main()
    pass

通过 frida-ps 查看要 hook 的进程

这里要 hook 泰国的,所以 pid 是 31785

import re
import time
import copy
import json
import requests
from datetime import datetime
from loguru import logger
import traceback

requests.packages.urllib3.disable_warnings()


class SpiderDemo(object):
    def __init__(self):
        self.proxies = {
            'http': 'http://127.0.0.1:7890',
            'https': 'http://127.0.0.1:7890',
        }
        self.headers = {
            # "Host": "mall.shopee.sg",
            # "X-Api-Source": "rn",
            "X-Shopee-Language": "en",
            # "Af-Ac-Cli-Id": "34b3055fd7b9089bae7b8f6ac2f26ca6",
            # "If-None-Match-": "55b03-24cfe96938a6a6d5ae7e9410df32f9fa",
            "Shopee_http_dns_mode": "1",
            # "X-Shopee-Client-Timezone": "Asia/Shanghai",
            # "Cache-Control": "no-cache, no-store",
            "Cookie": "SPC_DID=O142PAj/32dZfKX4N53blQ1t5reVeGhPJmQF9ikDMew=; SPC_F=5858e12b8c30ccb3_unknown; REC_T_ID=8cbc4880-beab-11ef-b727-36f242b5381f; SPC_AFTID=b25c3453-e2ad-42f0-8ee4-03e98d76751c; SPC_CLIENTID=O142PAj/32dZfKX4hhcnmybxabffjfeq; shopee_app_version=33835; SPC_F=5858e12b8c30ccb3_unknown; userid=1421474215; shopid=1420696038; username=kingking482; shopee_token=EAwLsszK+CeDVWxZpYaNQu3F/7LcnX3Hvzgm39E4x4uNynjHMp+p5Fo2HFwWNrGoVritm4eCtfqAHFuZE2JVq7E=; SPC_U=1421474215; SPC_R_T_ID=0x7A96XMg1ulYu5diWw8k3EB9+DygwRdOLyKroXqWTl5jt4Hxl2s2eQABjuIU5SzmXrv2g8mqLxjHa++VYUPZjRR5DglKQrWFLzcdiuA2tK91Q/OFS8UIghYYhDIBirzL8MsCqyv2OIWi51RAee1O1fplXE8UPmFiWhWrIyTmjM=; SPC_R_T_IV=dEUxNHRGVmhCTmNISkx4Tw==; SPC_T_ID=0x7A96XMg1ulYu5diWw8k3EB9+DygwRdOLyKroXqWTl5jt4Hxl2s2eQABjuIU5SzmXrv2g8mqLxjHa++VYUPZjRR5DglKQrWFLzcdiuA2tK91Q/OFS8UIghYYhDIBirzL8MsCqyv2OIWi51RAee1O1fplXE8UPmFiWhWrIyTmjM=; SPC_T_IV=dEUxNHRGVmhCTmNISkx4Tw==; SPC_RNBV=6039001; shopee_rn_bundle_version=6039001; language=en; language=en; shopee_rn_version=1734688365; SPC_SI=/jxhZwAAAABPMm5JTDExMuRcNQAAAAAAMVVMWEN4SXY=; _gcl_au=1.1.1157564451.1734922577; _ga=GA1.1.996212100.1734922581; _fbp=fb.2.1734922583807.906828666777832583; SPC_SEC_SI=v1-U0lQeUw1RFRaZVI1M1ZEMnPE1qfJZamJ5rlZoWJfgsLUWRq7yoGqGopTa046jgsbMno5rIYByLdrrIiDJ11j1PrKbmaweCR+EBI1bRqOmDE=; csrftoken=zRd8NnrevEQPR4l6GT6icpnunDZMfFFd; SPC_DH=EL9XCHTVSjl3dO3dGNrD+wqhL5Ujg2vyEvgh7wfBpLmZWKGTuJ8DAb7OGSUfWLxoh6iMhfvqHbAI; SPC_B_SI=mTFpZwAAAABlZ2VqbnFLcu2SHwAAAAAAM0hqcEtIRHY=; SPC_ST=.OFhaUE41czZpZWhpYkpSQkvxLptruT9XgiqP0HDvHVw/OWv7vs+qAYlh8dDCv0PGl7kKYFNfxrNc4jQBEQXgaaWz+73TNS4+Mb5t2sLa/yRtdAkyGerds66F+RLZQk+d4akPpvEqK3B5AV8g+xjaQ8qiux9pbvm/KTcGTcg+lGQS/K2895+Ji2jEG1CVk5cC5sLIsiwmUDfnd8h49NG6+5dNrNDPpfOveLCW3Fe0Mw/J0VptgH2f9CqvfBT0XiYs; SPC_EC=.OFhaUE41czZpZWhpYkpSQkvxLptruT9XgiqP0HDvHVw/OWv7vs+qAYlh8dDCv0PGl7kKYFNfxrNc4jQBEQXgaaWz+73TNS4+Mb5t2sLa/yRtdAkyGerds66F+RLZQk+d4akPpvEqK3B5AV8g+xjaQ8qiux9pbvm/KTcGTcg+lGQS/K2895+Ji2jEG1CVk5cC5sLIsiwmUDfnd8h49NG6+5dNrNDPpfOveLCW3Fe0Mw/J0VptgH2f9CqvfBT0XiYs; csrftoken=VN1Y7f5G1Uli1XhnGmZZABx8wcLySWIe; UA=Mozilla%2F5.0%20%28Linux%3B%20Android%2012%3B%20MI%206X%20Build%2FSQ1D.211205.016.A1%3B%20wv%29%20AppleWebKit%2F537.36%20%28KHTML%2C%20like%20Gecko%29%20Version%2F4.0%20Chrome%2F131.0.6778.135%20Mobile%20Safari%2F537.36%20Shopee%20Beeshop%20locale%2Fen%20version%3D33835%20appver%3D33835%20rnver%3D1734688365%20app_type%3D1%20platform%3Dweb_android%20os_ver%3D31; AC_CERT_D=U2FsdGVkX1+sIY/+KPEUtaYpWc7VR/NHYFrS+/4TFuN6IeJPvQ/CO3pncRl8OxseigIzojlafZc3nIjRahcpZ3S6C9bYNJS9jnO/c1VbmMwCLvSdCJINouczlCotsUMndNEl6udq3/Pq4s4wVkUStx7+oV6OHKURfZsk3Ng2AhFEF+5r/I9N3UKk8Dti46Vj+dwXR3ajHgpUHG0PrvugxDhmRuSvnxFnrFjnPxoc0CggMh8I/ylPYFQV+eSkndUKEzs1QkdR4/qhpZeklRvYyZYczmc6tHtXJGMa/V6NANPJtqMTMllsqzNLC+xpYkY0rowGDeKAUFbxXv7mvfnBW/vtA94HXLDaVW4aBgxfloEpcrXkPckHUrtGVVdrc1SFxquAgOPbs9BzXR5THJQbIiTnPsiE84T8kfpFb0yb50QgEnw6dGmWAd4oEDrwCIaRIQmkeS0CNrzSh9oP61uFnDxhUG1xh058ft8+SwSEBlHAmuNu0nym013feKCjUsnRGcIfHqEyXnk7lHoVBHaQsWRbBAycH+PKsqJ9+1E6DrstY+zhqWs+UJS1vTjXeTHguFW2WOuZRrsP3WHtJTNs50c87W8HwVwEGKZdCGCznmwDOhULMe2cq2tae2yRWyEQYWOIY1DPx0Xc0VovZ4PsKqYBrwyIHfirimPcTAlv0dfjvkQmZ0fUwOPiNa5y8iQL19vubKXhHKlG21spU8Y82OubnfT0x146bX7tPvFtcQAJuVitkQUnL2tOYhJeh2H0HMbc2QPTdNU2PQ7W2mwabV9ef9PLDthUlRaaI8UpjNid+SDIuCs0fwWudD+DnYy4B7t10ciImZBYOVBgy8mBU4gYmbpYrr2rOkj+R+eAcxgAl5sl6ZvS6KHUqsR9GQcSKGC46J46VOSbE+3YGGwcHIbKvkYz7zsCZu3El7OTwN3RJPZyXYSb/EWsBmzN2DiOY82iLhSk6qlLxrgYTpYCdLXL0t5y8uX1OFBfBtEmU8PkE8VJr0IoYqDjL7ZN+cEs/l+9zvlhxl1jOebTfuvUR8j00kfsWy4X2+XJxC2lFGZ9VzBdHy0pQYp4z6axCP05QAvWV8kcKAqGbn8F6sBq7g==; _ga_LB1RXY1EGG=GS1.1.1735095758.3.0.1735095759.59.0.0",
            # "Client-Request-Id": "5a9c34d4-45dd-45a5-833f-a6d585dea54e.236",
            "User-Agent": "Android app Shopee appver=33731 app_type=1 platform=native_android os_ver=31 Cronet/102.0.5005.61",
            # "Af-Ac-Enc-Sz-Token": "Dss2/0+/c42i8gAFlp/vWQ==|r+hUnM6COojHukeyy51+K15ayQI8ctNORfbISS9uCPBVdaxCwPDCGwLTioU7e2PxVA9XFnY++JFBdGROe83W5GfnLYbXv7ogG6Y=|1owBc7shk6ypUIJX|08|1",
            # "Referer": "https://mall.shopee.sg/",
            # "Accept-Encoding": "gzip, deflate, br",
        }
        pass

    @staticmethod
    def get_req_args(req_url):
        resp_json = None
        try:
            post_data = {'req_url': req_url}
            url = 'http://127.0.0.1:9500/get_req_args'
            resp = requests.post(url, json=post_data)
            resp_json = resp.json()
        except Exception as e:
            logger.warning(f'http api 失败 ---> {e}')
            raise Exception(f'http api 失败 ---> {e}')
        return resp_json

    def get_response(self, crawl_url):
        req_args = self.get_req_args(crawl_url)
        headers = {**self.headers, **req_args}
        response = requests.get(crawl_url, headers=headers, verify=False, proxies=self.proxies)
        resp_json = response.json()
        logger.info(f'status_code ---> {response.status_code}')
        error_int = resp_json['error']
        if 0 != error_int:
            raise Exception('cookie 失效')
        print(json.dumps(resp_json, ensure_ascii=False, indent=4))

def main():
    crawl_url_category_tree = "https://mall.shopee.co.th/api/v4/pages/get_category_tree"
    crawl_url_product_list = "https://mall.shopee.co.th/api/v4/recommend/recommend?bundle=category_landing_page&cat_level=2&catid=11045117&limit=20&offset=0"
    timestamp13 = int(datetime.now().timestamp() * 1000)
    crawl_url_product_detail = f"https://mall.shopee.co.th/api/v4/pdp/get?_pft=255&apm_fs=true&apm_p=7&apm_pid=shopee%2F%40shopee-rn%2Fproduct-page%2FPRODUCT_PAGE&apm_ts={timestamp13}&item_id=23454437584&pdp_type=0&shop_id=989284521&tz_offset_minutes=480"

    xp_spider = SpiderDemo()
    # 分类
    xp_spider.get_response(crawl_url_category_tree)
    # 分类中的产品列表
    xp_spider.get_response(crawl_url_product_list)
    # 产品详情
    xp_spider.get_response(crawl_url_product_detail)


if __name__ == '__main__':
    main()
    pass

rpc 服务 运行截图:

获取 shopee 结果截图:

后续如果大规模抓取,就需要大量账号,和国外的代理 ip


http://www.kler.cn/a/458092.html

相关文章:

  • 音频进阶学习九——离散时间傅里叶变换DTFT
  • ubuntu22.04录屏黑屏,飞书共享屏幕黑屏问题
  • DepthLab: From Partial to Complete 论文解读
  • 吐卡机开发——指令合集—未来之窗行业应用跨平台架构
  • 如何使用Python自动化发送消息:用pynput库批量输入并发送文本
  • Git的.gitignore文件详解与常见用法
  • 表单元素(标签)有哪些?
  • 游戏引擎学习第66天
  • 信息差的商业渠道管理:大数据如何优化渠道管理
  • HTML5新特性|01 音频视频
  • 【每日学点鸿蒙知识】属性变量key、waterflow卡顿问题、包无法上传、Video控件播放视频、Vue类似语法
  • “乡村探索者”:村旅游网站的移动应用开发
  • #端云一体化开发# #HarmonyOS Next#《说书人》鸿蒙原生基于角色的对话式文本编辑开发方案
  • 数据结构部分题目(c语言版本)
  • 英伟达(NVIDIA)
  • Vue.js组件开发-使用KeepAlive缓存组件时,组件的状态如何保存?
  • 从0到100:基于Java的大学选修课选课小程序开发笔记(上)
  • CAT3D: Create Anything in 3D with Multi-View Diffusion Models 论文解读
  • Retrofit和rxjava 实现窜行请求,并行请求,循环多次请求,递归请求,错误重试
  • leetcode 14. 最长公共前缀
  • 【Trick】获取kaggle账号的token和api(用于数据集下载)
  • 【项目实战】格式化JSON数据
  • Spring Boot教程之四十:使用 Jasypt 加密 Spring Boot 项目中的密码
  • C++ 设计模式:观察者模式(Observer Pattern)
  • 【Spring Boot 】详解
  • 深度学习J6周 ResNeXt-50实战解析