1.爬取的代码
import requests
import time
import random
import hashlib
import pandas as pd
url = "https://piaofang.maoyan.com/dashboard-ajax?"
header= {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0"
}
time = str(int(time.time()*1000))
num = random.randint(1, 1000)
data = f"method=GET&timeStamp={time}&User-Agent=TW96aWxsYS81LjAgKFdpbmRvd3MgTlQgMTAuMDsgV2luNjQ7IHg2NCkgQXBwbGVXZWJLaXQvNTM3LjM2IChLSFRNTCwgbGlrZSBHZWNrbykgQ2hyb21lLzEzMy4wLjAuMCBTYWZhcmkvNTM3LjM2IEVkZy8xMzMuMC4wLjA=&index={num}&channelId=40009&sVersion=2&key=A013F70DB97834C0A5492378BD76C53A"
md5_hash = hashlib.md5(data.encode()).hexdigest()
param= {
"orderType": 0,
"uuid": "194fa637589c8-0c3f722a89eccf-4c657b58-1fa400-194fa637589c8",
"timeStamp": {time},
"User-Agent": "TW96aWxsYS81LjAgKFdpbmRvd3MgTlQgMTAuMDsgV2luNjQ7IHg2NCkgQXBwbGVXZWJLaXQvNTM3LjM2IChLSFRNTCwgbGlrZSBHZWNrbykgQ2hyb21lLzEzMy4wLjAuMCBTYWZhcmkvNTM3LjM2IEVkZy8xMzMuMC4wLjA=",
"index": {num},
"channelId": 40009,
"sVersion": 2,
"signKey": {md5_hash},
"WuKongReady": "h5"
}
request = requests.get(url,headers=header,params=param)
print(request.json())
movie_data = []
for index in request.json()["movieList"]["data"]['list']:
movie_info = {
"电影名称": index["movieInfo"]["movieName"],
"总票房": index["sumBoxDesc"],
"上映信息": index["movieInfo"]["releaseInfo"],
"票房占比": index["splitBoxRate"],
"场次": index["showCount"]
}
movie_data.append(movie_info)
print(movie_data)
df = pd.DataFrame(movie_data)
df.to_excel(r"C:\Users\zzx\Desktop\movie_data2.xlsx", index=False)
print("数据已保存到 movie_data.xlsx")
2.效果图
