browser-use 的简单使用
1、browser-use是什么
Browser Use 是一个基于 Python 开发的开源库,它将先进的 AI 技术与浏览器自动化功能深度融合。通过集成Playwright等浏览器自动化工具,Browser Use允许开发者使用任何支持LangChain的大型语言模型(如GPT-4、Claude、DeepSeek等)来自动化浏览网页、提取信息、模拟用户操作等。
官网:Browser Use - Make websites accessible to AI agents
项目网址 :https://github.com/browser-use/browser-use
2、使用教程
1、把项目clone下来
git clone https://github.com/browser-use/browser-use.git
2. 新建虚拟环境(python >=3.11.3)
conda create -n browser-use python=3.12
conda activate browser-use
3、进入项目,查看readme文件
安装第三方库
pip install browser-use
安装 playwright
playwright install
然后创建Agent(官方以openai为例)
from langchain_openai import ChatOpenAI
from browser_use import Agent
import asyncio
async def main():
agent = Agent(
task="Go to Reddit, search for 'browser-use' in the search bar, click on the first post and return the first comment.",
llm=ChatOpenAI(model="gpt-4o"),
)
result = await agent.run()
print(result)
asyncio.run(main())
如果没有openai-key的可以使用其他模型,下面以DeepSeek为例:
该文件在 browser-use/examples/deepseek.py
import asyncio
import os
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from pydantic import SecretStr
from browser_use import Agent
# dotenv
load_dotenv()
api_key = os.getenv('DEEPSEEK_API_KEY', 'sk-xxxxxx')
if not api_key:
raise ValueError('DEEPSEEK_API_KEY is not set')
async def run_search():
agent = Agent(
task=(
"1. 在搜索框中输入抖音并搜索"
'2. 点击搜索结果中的第一个链接'
'3. 关闭扫码登录'
'3. 返回第一个视频的内容'
),
llm=ChatOpenAI(
base_url='https://api.deepseek.com/v1',
model='deepseek-chat',
api_key=SecretStr(api_key),
),
use_vision=False,
)
await agent.run()
if __name__ == '__main__':
asyncio.run(run_search())
运行结果如下:
DeepSeek 的API获取方式如下:
网址:DeepSeek
还可以体验界面版的:
该文件在 browser-use/examples/gradio.py
注:1、运行时记得修改文件名,否则会报错
2、原文件要获取openai-key,下面的代码是以DeepSeek为例的修改版
3、‘sk-xxxxxxxxx’ 替换为自己的DeepSeek的api-key 即可运行
import asyncio
import os
from dataclasses import dataclass
from typing import List, Optional
from pydantic import SecretStr
import gradio as gr
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from rich.console import Console
from rich.panel import Panel
from rich.text import Text
from browser_use import Agent
load_dotenv()
@dataclass
class ActionResult:
is_done: bool
extracted_content: Optional[str]
error: Optional[str]
include_in_memory: bool
@dataclass
class AgentHistoryList:
all_results: List[ActionResult]
all_model_outputs: List[dict]
api_key = os.getenv('DEEPSEEK_API_KEY', 'sk-xxxxxxxxx')
def parse_agent_history(history_str: str) -> None:
console = Console()
# Split the content into sections based on ActionResult entries
sections = history_str.split('ActionResult(')
for i, section in enumerate(sections[1:], 1): # Skip first empty section
# Extract relevant information
content = ''
if 'extracted_content=' in section:
content = section.split('extracted_content=')[1].split(',')[0].strip("'")
if content:
header = Text(f'Step {i}', style='bold blue')
panel = Panel(content, title=header, border_style='blue')
console.print(panel)
console.print()
async def run_browser_task(
task: str,
model: str = 'deepseek-chat',
headless: bool = True,
) -> str:
agent = Agent(
task=task,
llm=ChatOpenAI(
base_url='https://api.deepseek.com/v1',
model='deepseek-chat',
api_key=SecretStr(api_key),
),
use_vision=False,
)
result = await agent.run()
# TODO: The result cloud be parsed better
return result
def create_ui():
with gr.Blocks(title='Browser Use GUI') as interface:
gr.Markdown('# Browser Use Task Automation')
with gr.Row():
with gr.Column():
task = gr.Textbox(
label='Task Description',
placeholder='E.g., Find flights from New York to London for next week',
lines=3,
)
model = gr.Dropdown(
choices=['gpt-4', 'gpt-3.5-turbo','deepseek-chat'], label='Model', value='deepseek-chat'
)
headless = gr.Checkbox(label='Run Headless', value=True)
submit_btn = gr.Button('Run Task')
with gr.Column():
output = gr.Textbox(label='Output', lines=10, interactive=False)
submit_btn.click(
fn=lambda *args: asyncio.run(run_browser_task(*args)),
inputs=[task, model, headless],
outputs=output,
)
return interface
if __name__ == '__main__':
demo = create_ui()
demo.launch()
结果如下:
官网示例如下: