当前位置：首页 > article >正文

Python爬虫：国家代码(ISO 3166-1)国家货币代码(ISO 4217）

article 2025/2/23 1:11:55

使用场景：

国际贸易需要国家代码以及国家货币代码

解决方案：

网上搜了好多都是不全的，对三个网页进行爬虫汇总，代码如下,把导出的目录改成自己的即可。

import requests
from bs4 import BeautifulSoup
import pandas as pd

# 目标网页URL
country_url = 'https://www.guojiadaima.com/'

currency_url = 'http://www.cnhuilv.com/currency/'

currency_url2 = 'https://www.iban.hk/currency-codes'

# 发送HTTP请求获取网页内容
country_response = requests.get(country_url)
country_response.encoding = 'utf-8'  # 根据网页的编码调整
# 使用BeautifulSoup解析HTML
country_soup = BeautifulSoup(country_response.text, 'html.parser')
# 找到数据所在的标签
country_tbodyData = country_soup.find('tbody')

# 货币的请求地址
currency_response = requests.get(currency_url)
currency_response.encoding = 'utf-8'  # 根据网页的编码调整
currency_soup = BeautifulSoup(currency_response.text, 'html.parser')
currency_tbodyData = currency_soup.find('div', class_='table-responsive rb1')

# 货币的请求地址2
currency_response2 = requests.get(currency_url2)
currency_response2.encoding = 'utf-8'  # 根据网页的编码调整
currency_soup2 = BeautifulSoup(currency_response2.text, 'html.parser')
currency_tbodyData2 = currency_soup2.find('table', class_='table table-bordered downloads tablesorter')

# 初始化数据列表
data = []

# 提取表格中的数据
for row in country_tbodyData.find_all('tr')[1:]:  # 跳过表头
    columns = row.find_all('td')
    if len(columns) > 1:
        if columns[3].text.strip() != '':
            country_name = columns[3].text.strip()  # 中文简称
            english_name = columns[4].text.strip()  # 英文简称
            iso3_code = columns[6].text.strip()  # ISO 3

            data.append({
                '国家名称': country_name,
                '英文名称': english_name,
                '国家编码3': iso3_code
            })

# 初始化货币数据列表
currency_data = []
# 提取表格中的数据
for row in currency_tbodyData.find_all('tr')[1:]:  # 跳过表头
    columns = row.find_all('td')
    if len(columns) > 1:
        if columns[3].text.strip() != '':
            country_name = columns[3].text.strip()
            currency_code = columns[0].text.strip()
            # 人民币 - Chinese Yuan
            currency_name = columns[2].text.strip().split('-')[0].strip()

            currency_data.append({
                '国家名称': country_name,
                '货币代码': currency_code,
                '货币名称': currency_name
            })

# 初始化货币数据列表2
currency_data2 = []
# 提取表格中的数据
for row in currency_tbodyData2.find_all('tr')[1:]:  # 跳过表头
    columns = row.find_all('td')
    if len(columns) > 1:
        if columns[0].text.strip() != '':
            country_name = columns[0].text.strip()
            currency_code = columns[2].text.strip()
            currency_name = columns[1].text.strip()

            currency_data2.append({
                '国家名称': country_name,
                '货币代码': currency_code,
                '货币名称': currency_name
            })

# 首先，将currency_data转换为一个字典，以数字代码为键
currency_dict = {item['国家名称']: item for item in currency_data}
currency_dict2 = {item['国家名称']: item for item in currency_data2}

data_collect = []
# 组装数据
for item in data:
    # 直接从字典中获取货币信息，避免内部循环
    currency_info = currency_dict.get(item.get('国家名称'))
    currency_info2 = currency_dict2.get(item.get('国家名称'))
    if currency_info:
        data_collect.append({
            '国家名称': item.get('国家名称'),
            '英文名称': item.get('英文名称'),
            '国家编码3': item.get('国家编码3'),
            '货币代码': currency_info.get('货币代码'),
            '货币名称': currency_info.get('货币名称')
        })
    elif currency_info2:
        data_collect.append({
            '国家名称': item.get('国家名称'),
            '英文名称': item.get('英文名称'),
            '国家编码3': item.get('国家编码3'),
            '货币代码': currency_info2.get('货币代码'),
            '货币名称': currency_info2.get('货币名称')
        })
    else:
        data_collect.append({
            '国家名称': item.get('国家名称'),
            '英文名称': item.get('英文名称'),
            '国家编码3': item.get('国家编码3')
        })

# 将数据转换为DataFrame
df = pd.DataFrame(data_collect)

# 或者在Linux/macOS系统中使用正斜杠（/）或双反斜杠（\\）作为路径分隔符
# output_path = '/home/yourusername/Documents/output.xlsx'
# 或者
# output_path = 'C:\\Users\\YourUsername\\Documents\\output.xlsx'
# 指定输出的Excel文件的完整路径
# 假设你想要将文件保存在C盘的某个文件夹中
output_path = 'C:\\Users\\Administrator\\Desktop\\国家和货币8.xlsx'
df.to_excel(output_path, index=False)

print("完成输出")

查看全文

http://www.kler.cn/a/391436.html