当前位置: 首页 > article >正文

爬取鲜花网站数据

待爬取网页:
在这里插入图片描述
代码:

import requests

from lxml import  etree
import pandas as pd

from lxml import html
import xlwt

url = "https://www.haohua.com/xianhua/"

header = {
    "accept":"image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
    "accept-encoding":"gzip, deflate, br, zstd",
    "accept-language":"zh-CN,zh;q=0.9",
    "cookie":"MUID=35169CD2EDEA6D7E149B88BEECB06C7B; SRCHD=AF=NOFORM; SRCHUID=V=2&GUID=06DEDF3E60F3437B9D1E0E0541286638&dmnchg=1; MUIDB=35169CD2EDEA6D7E149B88BEECB06C7B; MMCASM=ID=5709703A12A449E3A5153FAA872F0450; _UR=QS=0&TQS=0&Pn=1; _TTSS_IN=hist=WyJ6aC1IYW5zIiwiZW4iLCJhdXRvLWRldGVjdCJd&isADRU=0; _TTSS_OUT=hist=WyJlbiIsInpoLUhhbnMiXQ==; _tarLang=default=zh-Hans&newFeature=tonetranslation; _EDGE_S=SID=10AB24CBE0666F783D443148E1B46E27; _Rwho=u=d&ts=2025-01-29; _SS=SID=10AB24CBE0666F783D443148E1B46E27&R=200&RB=0&GB=0&RG=200&RP=200&PC=U316; SRCHUSR=DOB=20240521&T=1738198155000&TPC=1736825154000; USRLOC=HS=1&ELOC=LAT=31.554468154907227|LON=117.24475860595703|N=%E8%82%A5%E8%A5%BF%E5%8E%BF%EF%BC%8C%E5%AE%89%E5%BE%BD%E7%9C%81|ELT=4|; SNRHOP=I=&TS=; _HPVN=CS=eyJQbiI6eyJDbiI6ODksIlN0IjoxLCJRcyI6MCwiUHJvZCI6IlAifSwiU2MiOnsiQ24iOjg5LCJTdCI6MCwiUXMiOjAsIlByb2QiOiJIIn0sIlF6Ijp7IkNuIjo4OSwiU3QiOjAsIlFzIjowLCJQcm9kIjoiVCJ9LCJBcCI6dHJ1ZSwiTXV0ZSI6dHJ1ZSwiTGFkIjoiMjAyNS0wMS0zMFQwMDowMDowMFoiLCJJb3RkIjowLCJHd2IiOjAsIlRucyI6MCwiRGZ0IjpudWxsLCJNdnMiOjAsIkZsdCI6MCwiSW1wIjo2MDgsIlRvYm4iOjB9; _RwBf=r=0&ilt=835&ihpd=0&ispd=8&rc=200&rb=0&gb=0&rg=200&pc=200&mtu=0&rbb=0&g=0&cid=&clo=0&v=15&l=2025-01-29T08:00:00.0000000Z&lft=2025-01-13T00:00:00.0000000-08:00&aof=0&ard=0001-01-01T00:00:00.0000000&rwdbt=0&rwflt=0&o=2&p=&c=&t=0&s=0001-01-01T00:00:00.0000000+00:00&ts=2025-01-30T01:37:12.0686804+00:00&rwred=0&wls=&wlb=&wle=&ccp=&cpt=&lka=0&lkt=0&aad=0&TH=&rwaul2=0; SRCHHPGUSR=SRCHLANG=zh-Hans&BRW=XW&BRH=S&CW=1495&CH=217&SCW=1479&SCH=217&DPR=1.5&UTC=480&DM=0&WTS=63873794963&PRVCW=1494&PRVCH=765&PV=15.0.0&HV=1738201032&BZA=0&WEBTHEME=0&THEME=0&EXLTT=31&AV=14&ADV=14&RB=0&MB=0",
    "ect":"4g",
    "priority":"i",
    "referer":"https://cn.bing.com/chrome/newtab",
    "sec-ch-ua":'"Not A(Brand";v="8", "Chromium";v="132", "Google Chrome";v="132"',
    "sec-ch-ua-arch":"x86",
    "sec-ch-ua-bitness":"64",
    "sec-ch-ua-full-version":"132.0.6834.111",
    "sec-ch-ua-full-version-list":'"Not A(Brand";v="8.0.0.0", "Chromium";v="132.0.6834.111", "Google Chrome";v="132.0.6834.111"',
    "sec-ch-ua-mobile":"?0",
    "sec-ch-ua-model":"",
    "sec-ch-ua-platform":"Windows",
    "sec-ch-ua-platform-version":"15.0.0",
    "sec-fetch-dest":"image",
    "sec-fetch-mode":"no-cors",
    "sec-fetch-site":"same-origin",
    "user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36"
}
response = requests.get(url = url,headers = header)

response.encoding = "utf-8"
# print(response.text)


# price = tree.xpath('//a[@class="info imghover"]/p[@class="price b"]/span[not(@class)]/text()')
#
# print(price[0].strip())


html = etree.HTML(response.text)

# print(html)

fresh_flowers = []
popularity = []
original_price = []
now_price = []

name = html.xpath('//a[@class = "info imghover"]/h5')

for i in name:
    fresh_flowers.append(i.text)

# for i in xianhua_name:
#     print(i)

price = html.xpath('//a[@class = "info imghover"]/p')

for i in price:
    original_price.append(i[1].text)
    popularity.append(i[2].text)


datalist = []
datalist.append(fresh_flowers)
datalist.append(original_price)
datalist.append(popularity)




# 将数据组织成字典
data = {
    "fresh_flowers": fresh_flowers,
    "original_price": original_price,
    "popularity": popularity
}

# 创建DataFrame
df = pd.DataFrame(data)

# 将DataFrame写入Excel文件
df.to_excel("xianhua_data.xlsx", index=False)

print("数据已成功写入Excel文件")



# print(len(xianhua_name))
# print(len(original_price))
# print(len(popularity))





结果文件:
在这里插入图片描述


http://www.kler.cn/a/528640.html

相关文章:

  • 想学习JAVA编程,请问应该如何去学习?
  • arkts bridge使用示例
  • 什么是Pytest Fixtures作用域及如何为Pytest Fixtures设置合适的作用域
  • 算法随笔_33: 132模式
  • Node.js MySQL:深度解析与最佳实践
  • 【PLL】杂散生成和调制
  • 使用 Docker(Podman) 部署 MongoDB 数据库及使用详解
  • 白话DeepSeek-R1论文(三)| DeepSeek-R1蒸馏技术:让小模型“继承”大模型的推理超能力
  • 为AI聊天工具添加一个知识系统 之82 详细设计之23 符号逻辑 正则表达式规则 之1
  • 如何实现滑动列表功能
  • 智慧园区综合管理系统如何实现多个维度的高效管理与安全风险控制
  • c++ list的front和pop_front的概念和使用案例
  • 【3】阿里面试题整理
  • http 请求类型及其使用场景
  • python学习——函数的返回值
  • 【python】tkinter实现音乐播放器(源码+音频文件)【独一无二】
  • error: RPC failed; curl 56 OpenSSL SSL_read: SSL_ERROR_SYSCALL, errno 10054
  • C#面向对象(封装)
  • C语言:整型提升
  • 前端知识速记:节流与防抖
  • Vue2.x简介
  • MongoDB快速上手(包会用)
  • 浅析DDOS攻击及防御策略
  • Linux系统部署Python项目持续运行配置
  • 数据结构【单链表操作大全详解】【c语言版】(只有输入输出为了方便用的c++)
  • LightM-UNet(2024 CVPR)