《王者荣耀》皮肤爬虫源码
1.爬取网页
https://pvp.qq.com/web201605/herolist.shtml
2.python代码
import requests
from bs4 import BeautifulSoup
import os
import threading
from queue import Queue
def mul(x):
if not os.path.exists(x):
os.mkdir(x)
print("目录创建成功")
else:
pass
header={
'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Mobile Safari/537.36 Edg/113.0.1774.57'
}
url1='https://pvp.qq.com/web201605/herolist.shtml'
url2='https://pvp.qq.com/web201605/'
res=requests.get(url=url1,headers=header)
res.encoding = res.apparent_encoding
soup = BeautifulSoup(res.text, 'html.parser')
title=soup.find('title').text[13:17]
f=soup.select("div>div>ul>li>a")
s=0
wy_list=[]
name_list=[]
for i in f:
if s>=50:
wy_list.append(i.attrs['href'])
name_list.append(i.text)
else:
pass
s+=1
def wy(x,y):
for i in range(len(x)):
resa=requests.get(url=url2+x[i],headers=header)
soup = BeautifulSoup(resa.text, 'html.parser')
f=soup.find(class_="zk-con1 zk-con")
wy1="https:"+f.attrs['style'][16:90]
res=requests.get(url=wy1,headers=header)
with open("王者荣耀/"+str(y[i])+".png",mode='wb') as file:
file.write(res.content)
print("{}图片爬取成功".format(y[i]))
if __name__ == "__main__":
mul(title)
wy(wy_list,name_list)