browser-use 库 DOM 树序列化工具
目录
- 代码
- 代码解释
- 代码示例
代码
from browser_use.dom.views import DOMBaseNode, DOMElementNode, DOMTextNode
class ElementTreeSerializer:
@staticmethod
def dom_element_node_to_json(element_tree: DOMElementNode) -> dict:
def node_to_dict(node: DOMBaseNode) -> dict:
if isinstance(node, DOMTextNode):
return {'type': 'text', 'text': node.text}
elif isinstance(node, DOMElementNode):
return {
'type': 'element',
'tag_name': node.tag_name,
'attributes': node.attributes,
'highlight_index': node.highlight_index,
'children': [node_to_dict(child) for child in node.children],
}
return {}
return node_to_dict(element_tree)
INFO [browser_use] BrowserUse logging setup complete with level info
INFO [root] Anonymized telemetry enabled. See https://docs.browser-use.com/development/telemetry for more information.
代码解释
- 导入部分:
from browser_use.dom.views import DOMBaseNode, DOMElementNode, DOMTextNode
导入了三个核心类,用于表示 DOM 树的不同节点类型。
ElementTreeSerializer
类:
这是一个序列化器类,用于将 DOM 树结构转换为 JSON 格式。主要包含:
dom_element_node_to_json
静态方法:- 输入:DOM 元素树的根节点
- 输出:JSON 格式的字典
- 内部使用
node_to_dict
辅助函数进行递归转换
node_to_dict
辅助函数的处理逻辑:- 对于文本节点(
DOMTextNode
):{'type': 'text', 'text': node.text}
- 对于元素节点(
DOMElementNode
):{ 'type': 'element', 'tag_name': node.tag_name, 'attributes': node.attributes, 'highlight_index': node.highlight_index, 'children': [递归处理子节点] }
- 对于文本节点(
这个序列化器的主要用途是将内存中的 DOM 树结构转换为可读的 JSON 格式,便于:
- 数据存储和传输
- 调试和可视化
- DOM 树结构的分析和处理
代码示例
import json
# 示例1: 简单的按钮元素
button = DOMElementNode(
is_visible=True,
parent=None,
tag_name='button',
xpath='//button[1]',
attributes={'class': 'btn-primary', 'id': 'submit'},
children=[],
highlight_index=1,
is_interactive=True
)
text_node = DOMTextNode(
is_visible=True,
parent=button,
text='提交'
)
button.children.append(text_node)
print("\n示例1 - 按钮元素:")
print(json.dumps(ElementTreeSerializer.dom_element_node_to_json(button), indent=2))
示例1 - 按钮元素:
{
"type": "element",
"tag_name": "button",
"attributes": {
"class": "btn-primary",
"id": "submit"
},
"highlight_index": 1,
"children": [
{
"type": "text",
"text": "\u63d0\u4ea4"
}
]
}
# 示例2: 表单结构
form = DOMElementNode(
is_visible=True,
parent=None,
tag_name='form',
xpath='//form[1]',
attributes={'class': 'login-form'},
children=[],
highlight_index=2
)
input_email = DOMElementNode(
is_visible=True,
parent=form,
tag_name='input',
xpath='//form[1]/input[1]',
attributes={'type': 'email', 'placeholder': '请输入邮箱'},
children=[],
highlight_index=3
)
input_password = DOMElementNode(
is_visible=True,
parent=form,
tag_name='input',
xpath='//form[1]/input[2]',
attributes={'type': 'password', 'placeholder': '请输入密码'},
children=[],
highlight_index=4
)
form.children.extend([input_email, input_password])
print("\n示例2 - 表单结构:")
print(json.dumps(ElementTreeSerializer.dom_element_node_to_json(form), indent=2))
示例2 - 表单结构:
{
"type": "element",
"tag_name": "form",
"attributes": {
"class": "login-form"
},
"highlight_index": 2,
"children": [
{
"type": "element",
"tag_name": "input",
"attributes": {
"type": "email",
"placeholder": "\u8bf7\u8f93\u5165\u90ae\u7bb1"
},
"highlight_index": 3,
"children": []
},
{
"type": "element",
"tag_name": "input",
"attributes": {
"type": "password",
"placeholder": "\u8bf7\u8f93\u5165\u5bc6\u7801"
},
"highlight_index": 4,
"children": []
}
]
}
# 示例3: 导航菜单
nav = DOMElementNode(
is_visible=True,
parent=None,
tag_name='nav',
xpath='//nav',
attributes={'class': 'navigation'},
children=[],
highlight_index=5
)
menu_items = [
('首页', '/home'),
('产品', '/products'),
('关于', '/about')
]
for i, (text, href) in enumerate(menu_items, 1):
link = DOMElementNode(
is_visible=True,
parent=nav,
tag_name='a',
xpath=f'//nav/a[{i}]',
attributes={'href': href, 'class': 'nav-link'},
children=[],
highlight_index=5 + i
)
text_node = DOMTextNode(
is_visible=True,
parent=link,
text=text
)
link.children.append(text_node)
nav.children.append(link)
print("\n示例3 - 导航菜单:")
print(json.dumps(ElementTreeSerializer.dom_element_node_to_json(nav), indent=2))
示例3 - 导航菜单:
{
"type": "element",
"tag_name": "nav",
"attributes": {
"class": "navigation"
},
"highlight_index": 5,
"children": [
{
"type": "element",
"tag_name": "a",
"attributes": {
"href": "/home",
"class": "nav-link"
},
"highlight_index": 6,
"children": [
{
"type": "text",
"text": "\u9996\u9875"
}
]
},
{
"type": "element",
"tag_name": "a",
"attributes": {
"href": "/products",
"class": "nav-link"
},
"highlight_index": 7,
"children": [
{
"type": "text",
"text": "\u4ea7\u54c1"
}
]
},
{
"type": "element",
"tag_name": "a",
"attributes": {
"href": "/about",
"class": "nav-link"
},
"highlight_index": 8,
"children": [
{
"type": "text",
"text": "\u5173\u4e8e"
}
]
}
]
}