当前位置：首页 > article >正文

cs*n 网页内容转为html 加入 onenote

article 2025/2/22 9:14:52

csdn上有好用的内容，我们怎么将它们加到 onenote 里吃灰呢。

一、创建新html

create_html.py

import sys

def create_html_file(filename):
    # 检查是否提供了文件名
    if not filename:
        print("请提供HTML文件名")
        return
    
    # 创建HTML内容
    html_content = f"""<!DOCTYPE html>
<html lang="zh-CN">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>{filename}</title>
</head>
<body>
</body>
</html>
"""

    # 写入文件
    with open(filename, 'w', encoding='utf-8') as file:
        file.write(html_content)
    
    print(f"文件 {filename} 已创建")

if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("用法: python create_html.py <html文件名>")
    else:
        create_html_file(sys.argv[1])

使用命令创建html
在这里插入图片描述

二、网页找内容

在这里插入图片描述
content_views 里就是真正内容了。
copy-> copy outerhtml
粘贴到创建的html 的 body 中。

三、替换内容

modify_html.py

import sys
import os
from bs4 import BeautifulSoup

def modify_html_file(filename):
    # 检查是否提供了文件名
    if not filename:
        print("请提供HTML文件名")
        return
    
    # 查找当前目录下的HTML文件
    if not os.path.exists(filename):
        print(f"文件 {filename} 不存在")
        return
    
    # 读取HTML内容
    with open(filename, 'r', encoding='utf-8') as file:
        html_content = file.read()
    
    # 使用BeautifulSoup解析HTML
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # 替换指定的div
    divs_to_remove = soup.find_all('div', class_='hljs-button {2}')
    for div in divs_to_remove:
        new_tag = soup.new_tag('p')  # 创建一个新的标签
        new_tag.string = "----------------------------------------------------------------------------------------------------------------"
        div.replace_with(new_tag)  # 替换原标签
    
    # 替换指定的div
    divs_to_remove = soup.find_all('div', class_='hide-preCode-box')
    for div in divs_to_remove:
        # 在原位置添加“------------------”
        new_tag = soup.new_tag('p')  # 创建一个新的标签
        new_tag.string = "----------------------------------------------------------------------------------------------------------------"
        div.replace_with(new_tag)  # 替换原标签
        
    # 删除指定的ul
    uls_to_remove = soup.find_all('ul', class_='pre-numbering')
    for ul in uls_to_remove:
        ul.decompose()
    
    # 写回修改后的HTML内容
    with open(filename, 'w', encoding='utf-8') as file:
        file.write(str(soup))
    
    print(f"文件 {filename} 已修改")

if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("用法: python modify_html.py <html文件名>")
    else:
        modify_html_file(sys.argv[1])