爬虫 APP 逆向 ---> shopee(虾皮) 电商
shopee 泰国站点:https://shopee.co.th/
shopee 网页访问时,直接弹出使用 app 登录查看,那就登录 shopee 泰国站点 app。
手机抓包:分类接口
接口:https://mall.shopee.co.th/api/v4/pages/get_category_tree
请求参数:
GET /api/v4/pages/get_category_tree HTTP/2
Host: mall.shopee.co.th
Cookie: 请求的cookie
X-Api-Source: rn
X-Shopee-Language: en
If-None-Match-: 55b03-158a0f98f80a09649f8dcfd2f8d263d4
X-Shopee-Client-Timezone: Asia/Shanghai
Cache-Control: no-cache, no-store
Client-Request-Id: 866aa1ae-8ad1-4fdb-ab12-5c1cd5326645.379
User-Agent: Android app Shopee appver=33835 app_type=1 platform=native_android os_ver=31 Cronet/102.0.5005.61
Af-Ac-Enc-Sz-Token: 一大串字符
X-Sap-Ri: 616c6b67968b98bfd88b87180110349e24abfda49173147d6085
D42920b3: 一大串字符
D609da9f: CTEoMyxcrcDaO0/OkIoS0bVWFEb=
651bb94a: 0+eWOk+Gy1GgoMssBoNnarzOM8E=
Referer: https://mall.shopee.co.th/
Accept-Encoding: gzip, deflate, br
Cookie 不用说,登录后的授权,其他其他参数经分析都是在 app 中生成的。
对于 app 中生成,解决方法有三个
- 1. 直接逆向参数,然后使用 python 还原
- 2. 把 java 代码扣出来,补 java 代码,然后编译成 jar 包,python 调用 jar 包生成
- 3. 直接通过 frida 的 rpc ,hook 住关键函数,python 通过 http 服务去调用
这里使用 第三种方法:通过 frida 的 rpc
import time
import json
import frida
import uvicorn
import subprocess
from pathlib import Path
from fastapi import FastAPI, Request
js_code = """
console.log("脚本载入成功");
function call_request_defense(req_url) { //定义导出函数
let ret_result;
Java.perform(function x() {
console.log("注入成功");
console.log(`req_url ---> ${req_url}`);
let ret_hashtable;
let TargetClass = "com.shopee.shpssdk.SHPSSDK";
// 遍历并操作目标类的实例
Java.choose(TargetClass, {
onMatch: function (instance) {
console.log("Instance found: " + instance);
// 调用实例方法
ret_hashtable = instance.requestDefense(req_url, null)
console.log(`ret_hashtable ---> ${ret_hashtable}`);
},
onComplete: function () {
console.log("Search complete");
}
});
let obj = {}
let key_set = ret_hashtable.keySet();
let key_set_it = key_set.iterator();
while (key_set_it.hasNext()) {
let key_str = key_set_it.next().toString();
obj[key_str] = ret_hashtable.get(key_str).toString();
}
ret_result = JSON.stringify(obj);
return ret_result;
});
return ret_result;
}
rpc.exports = {
// 导出名callfunc1中不能有大写字母或者下划线
callfunc1: call_request_defense
};
"""
def my_message_handler(message, payload):
print(message)
print(payload)
# 执行 Frida 脚本的函数
def run_frida_script():
try:
subprocess.run("adb forward tcp:27042 tcp:27042", shell=True)
# device = frida.get_usb_device()
device = frida.get_remote_device()
time.sleep(3) # 睡眠3秒, 防止程序运行过快从而导致附加不上
# session = device.attach('蝦皮購物')
session = device.attach(31785)
time.sleep(3)
script = session.create_script(js_code)
script.load()
def on_message(message, payload):
print(message)
print(payload)
script.on('message', on_message)
return script
except Exception as e:
print(f'frida 失败 ---> {e}')
app = FastAPI()
global_script = None
@app.post("/get_req_args")
async def root(request: Request):
data_dict = await request.json()
req_url = 'https://mall.shopee.sg/api/v4/pdp/get?_pft=127&apm_fs=true&apm_p=7&apm_pid=shopee%2F%40shopee-rn%2Fproduct-page%2FPRODUCT_PAGE&apm_ts=1732599014952&from_source=dd&item_id=25766030491&pdp_type=0&shop_id=1197286982&tz_offset_minutes=480'
req_url = data_dict['req_url']
global global_script
if not global_script:
global_script = run_frida_script()
json_string = global_script.exports_sync.callfunc1(req_url, None)
json_data = json.loads(json_string)
print(f'req_args ---> {json_data}')
return json_data
def main():
uvicorn.run(f'{Path(__file__).stem}:app', host="0.0.0.0", port=9500)
if __name__ == '__main__':
main()
pass
通过 frida-ps 查看要 hook 的进程
这里要 hook 泰国的,所以 pid 是 31785
import re
import time
import copy
import json
import requests
from datetime import datetime
from loguru import logger
import traceback
requests.packages.urllib3.disable_warnings()
class SpiderDemo(object):
def __init__(self):
self.proxies = {
'http': 'http://127.0.0.1:7890',
'https': 'http://127.0.0.1:7890',
}
self.headers = {
# "Host": "mall.shopee.sg",
# "X-Api-Source": "rn",
"X-Shopee-Language": "en",
# "Af-Ac-Cli-Id": "34b3055fd7b9089bae7b8f6ac2f26ca6",
# "If-None-Match-": "55b03-24cfe96938a6a6d5ae7e9410df32f9fa",
"Shopee_http_dns_mode": "1",
# "X-Shopee-Client-Timezone": "Asia/Shanghai",
# "Cache-Control": "no-cache, no-store",
"Cookie": "SPC_DID=O142PAj/32dZfKX4N53blQ1t5reVeGhPJmQF9ikDMew=; SPC_F=5858e12b8c30ccb3_unknown; REC_T_ID=8cbc4880-beab-11ef-b727-36f242b5381f; SPC_AFTID=b25c3453-e2ad-42f0-8ee4-03e98d76751c; SPC_CLIENTID=O142PAj/32dZfKX4hhcnmybxabffjfeq; shopee_app_version=33835; SPC_F=5858e12b8c30ccb3_unknown; userid=1421474215; shopid=1420696038; username=kingking482; shopee_token=EAwLsszK+CeDVWxZpYaNQu3F/7LcnX3Hvzgm39E4x4uNynjHMp+p5Fo2HFwWNrGoVritm4eCtfqAHFuZE2JVq7E=; SPC_U=1421474215; SPC_R_T_ID=0x7A96XMg1ulYu5diWw8k3EB9+DygwRdOLyKroXqWTl5jt4Hxl2s2eQABjuIU5SzmXrv2g8mqLxjHa++VYUPZjRR5DglKQrWFLzcdiuA2tK91Q/OFS8UIghYYhDIBirzL8MsCqyv2OIWi51RAee1O1fplXE8UPmFiWhWrIyTmjM=; SPC_R_T_IV=dEUxNHRGVmhCTmNISkx4Tw==; SPC_T_ID=0x7A96XMg1ulYu5diWw8k3EB9+DygwRdOLyKroXqWTl5jt4Hxl2s2eQABjuIU5SzmXrv2g8mqLxjHa++VYUPZjRR5DglKQrWFLzcdiuA2tK91Q/OFS8UIghYYhDIBirzL8MsCqyv2OIWi51RAee1O1fplXE8UPmFiWhWrIyTmjM=; SPC_T_IV=dEUxNHRGVmhCTmNISkx4Tw==; SPC_RNBV=6039001; shopee_rn_bundle_version=6039001; language=en; language=en; shopee_rn_version=1734688365; SPC_SI=/jxhZwAAAABPMm5JTDExMuRcNQAAAAAAMVVMWEN4SXY=; _gcl_au=1.1.1157564451.1734922577; _ga=GA1.1.996212100.1734922581; _fbp=fb.2.1734922583807.906828666777832583; SPC_SEC_SI=v1-U0lQeUw1RFRaZVI1M1ZEMnPE1qfJZamJ5rlZoWJfgsLUWRq7yoGqGopTa046jgsbMno5rIYByLdrrIiDJ11j1PrKbmaweCR+EBI1bRqOmDE=; csrftoken=zRd8NnrevEQPR4l6GT6icpnunDZMfFFd; SPC_DH=EL9XCHTVSjl3dO3dGNrD+wqhL5Ujg2vyEvgh7wfBpLmZWKGTuJ8DAb7OGSUfWLxoh6iMhfvqHbAI; SPC_B_SI=mTFpZwAAAABlZ2VqbnFLcu2SHwAAAAAAM0hqcEtIRHY=; SPC_ST=.OFhaUE41czZpZWhpYkpSQkvxLptruT9XgiqP0HDvHVw/OWv7vs+qAYlh8dDCv0PGl7kKYFNfxrNc4jQBEQXgaaWz+73TNS4+Mb5t2sLa/yRtdAkyGerds66F+RLZQk+d4akPpvEqK3B5AV8g+xjaQ8qiux9pbvm/KTcGTcg+lGQS/K2895+Ji2jEG1CVk5cC5sLIsiwmUDfnd8h49NG6+5dNrNDPpfOveLCW3Fe0Mw/J0VptgH2f9CqvfBT0XiYs; SPC_EC=.OFhaUE41czZpZWhpYkpSQkvxLptruT9XgiqP0HDvHVw/OWv7vs+qAYlh8dDCv0PGl7kKYFNfxrNc4jQBEQXgaaWz+73TNS4+Mb5t2sLa/yRtdAkyGerds66F+RLZQk+d4akPpvEqK3B5AV8g+xjaQ8qiux9pbvm/KTcGTcg+lGQS/K2895+Ji2jEG1CVk5cC5sLIsiwmUDfnd8h49NG6+5dNrNDPpfOveLCW3Fe0Mw/J0VptgH2f9CqvfBT0XiYs; csrftoken=VN1Y7f5G1Uli1XhnGmZZABx8wcLySWIe; UA=Mozilla%2F5.0%20%28Linux%3B%20Android%2012%3B%20MI%206X%20Build%2FSQ1D.211205.016.A1%3B%20wv%29%20AppleWebKit%2F537.36%20%28KHTML%2C%20like%20Gecko%29%20Version%2F4.0%20Chrome%2F131.0.6778.135%20Mobile%20Safari%2F537.36%20Shopee%20Beeshop%20locale%2Fen%20version%3D33835%20appver%3D33835%20rnver%3D1734688365%20app_type%3D1%20platform%3Dweb_android%20os_ver%3D31; AC_CERT_D=U2FsdGVkX1+sIY/+KPEUtaYpWc7VR/NHYFrS+/4TFuN6IeJPvQ/CO3pncRl8OxseigIzojlafZc3nIjRahcpZ3S6C9bYNJS9jnO/c1VbmMwCLvSdCJINouczlCotsUMndNEl6udq3/Pq4s4wVkUStx7+oV6OHKURfZsk3Ng2AhFEF+5r/I9N3UKk8Dti46Vj+dwXR3ajHgpUHG0PrvugxDhmRuSvnxFnrFjnPxoc0CggMh8I/ylPYFQV+eSkndUKEzs1QkdR4/qhpZeklRvYyZYczmc6tHtXJGMa/V6NANPJtqMTMllsqzNLC+xpYkY0rowGDeKAUFbxXv7mvfnBW/vtA94HXLDaVW4aBgxfloEpcrXkPckHUrtGVVdrc1SFxquAgOPbs9BzXR5THJQbIiTnPsiE84T8kfpFb0yb50QgEnw6dGmWAd4oEDrwCIaRIQmkeS0CNrzSh9oP61uFnDxhUG1xh058ft8+SwSEBlHAmuNu0nym013feKCjUsnRGcIfHqEyXnk7lHoVBHaQsWRbBAycH+PKsqJ9+1E6DrstY+zhqWs+UJS1vTjXeTHguFW2WOuZRrsP3WHtJTNs50c87W8HwVwEGKZdCGCznmwDOhULMe2cq2tae2yRWyEQYWOIY1DPx0Xc0VovZ4PsKqYBrwyIHfirimPcTAlv0dfjvkQmZ0fUwOPiNa5y8iQL19vubKXhHKlG21spU8Y82OubnfT0x146bX7tPvFtcQAJuVitkQUnL2tOYhJeh2H0HMbc2QPTdNU2PQ7W2mwabV9ef9PLDthUlRaaI8UpjNid+SDIuCs0fwWudD+DnYy4B7t10ciImZBYOVBgy8mBU4gYmbpYrr2rOkj+R+eAcxgAl5sl6ZvS6KHUqsR9GQcSKGC46J46VOSbE+3YGGwcHIbKvkYz7zsCZu3El7OTwN3RJPZyXYSb/EWsBmzN2DiOY82iLhSk6qlLxrgYTpYCdLXL0t5y8uX1OFBfBtEmU8PkE8VJr0IoYqDjL7ZN+cEs/l+9zvlhxl1jOebTfuvUR8j00kfsWy4X2+XJxC2lFGZ9VzBdHy0pQYp4z6axCP05QAvWV8kcKAqGbn8F6sBq7g==; _ga_LB1RXY1EGG=GS1.1.1735095758.3.0.1735095759.59.0.0",
# "Client-Request-Id": "5a9c34d4-45dd-45a5-833f-a6d585dea54e.236",
"User-Agent": "Android app Shopee appver=33731 app_type=1 platform=native_android os_ver=31 Cronet/102.0.5005.61",
# "Af-Ac-Enc-Sz-Token": "Dss2/0+/c42i8gAFlp/vWQ==|r+hUnM6COojHukeyy51+K15ayQI8ctNORfbISS9uCPBVdaxCwPDCGwLTioU7e2PxVA9XFnY++JFBdGROe83W5GfnLYbXv7ogG6Y=|1owBc7shk6ypUIJX|08|1",
# "Referer": "https://mall.shopee.sg/",
# "Accept-Encoding": "gzip, deflate, br",
}
pass
@staticmethod
def get_req_args(req_url):
resp_json = None
try:
post_data = {'req_url': req_url}
url = 'http://127.0.0.1:9500/get_req_args'
resp = requests.post(url, json=post_data)
resp_json = resp.json()
except Exception as e:
logger.warning(f'http api 失败 ---> {e}')
raise Exception(f'http api 失败 ---> {e}')
return resp_json
def get_response(self, crawl_url):
req_args = self.get_req_args(crawl_url)
headers = {**self.headers, **req_args}
response = requests.get(crawl_url, headers=headers, verify=False, proxies=self.proxies)
resp_json = response.json()
logger.info(f'status_code ---> {response.status_code}')
error_int = resp_json['error']
if 0 != error_int:
raise Exception('cookie 失效')
print(json.dumps(resp_json, ensure_ascii=False, indent=4))
def main():
crawl_url_category_tree = "https://mall.shopee.co.th/api/v4/pages/get_category_tree"
crawl_url_product_list = "https://mall.shopee.co.th/api/v4/recommend/recommend?bundle=category_landing_page&cat_level=2&catid=11045117&limit=20&offset=0"
timestamp13 = int(datetime.now().timestamp() * 1000)
crawl_url_product_detail = f"https://mall.shopee.co.th/api/v4/pdp/get?_pft=255&apm_fs=true&apm_p=7&apm_pid=shopee%2F%40shopee-rn%2Fproduct-page%2FPRODUCT_PAGE&apm_ts={timestamp13}&item_id=23454437584&pdp_type=0&shop_id=989284521&tz_offset_minutes=480"
xp_spider = SpiderDemo()
# 分类
xp_spider.get_response(crawl_url_category_tree)
# 分类中的产品列表
xp_spider.get_response(crawl_url_product_list)
# 产品详情
xp_spider.get_response(crawl_url_product_detail)
if __name__ == '__main__':
main()
pass
rpc 服务 运行截图:
获取 shopee 结果截图:
后续如果大规模抓取,就需要大量账号,和国外的代理 ip