【LLM-agent】(task3)数据库对话Agent和RAG接入Agent
note
- 数据库对话Agent
- RAG接入Agent
文章目录
- note
- 一、数据库对话Agent
- 二、RAG接入Agent
- Reference
一、数据库对话Agent
import os
from dotenv import load_dotenv
# 加载环境变量
load_dotenv()
# 初始化变量
base_url = None
chat_model = None
api_key = None
# 使用with语句打开文件,确保文件使用完毕后自动关闭
env_path = ".env.txt"
with open(env_path, 'r') as file:
# 逐行读取文件
for line in file:
# 移除字符串头尾的空白字符(包括'\n')
line = line.strip()
# 检查并解析变量
if "base_url" in line:
base_url = line.split('=', 1)[1].strip().strip('"')
elif "chat_model" in line:
chat_model = line.split('=', 1)[1].strip().strip('"')
elif "ZHIPU_API_KEY" in line:
api_key = line.split('=', 1)[1].strip().strip('"')
# 打印变量以验证
print(f"base_url: {base_url}")
print(f"chat_model: {chat_model}")
print(f"ZHIPU_API_KEY: {api_key}")
from openai import OpenAI
client = OpenAI(
api_key = api_key,
base_url = base_url
)
print(client)
def get_completion(prompt):
response = client.chat.completions.create(
model="glm-4-flash", # 填写需要调用的模型名称
messages=[
{"role": "user", "content": prompt},
],
)
return response.choices[0].message.content
# 一、定义上个task的llm
from openai import OpenAI
from pydantic import Field # 导入Field,用于Pydantic模型中定义字段的元数据
from llama_index.core.llms import (
CustomLLM,
CompletionResponse,
LLMMetadata,
)
from llama_index.core.embeddings import BaseEmbedding
from llama_index.core.llms.callbacks import llm_completion_callback
from typing import List, Any, Generator
# 定义OurLLM类,继承自CustomLLM基类
class OurLLM(CustomLLM):
api_key: str = Field(default=api_key)
base_url: str = Field(default=base_url)
model_name: str = Field(default=chat_model)
client: OpenAI = Field(default=None, exclude=True) # 显式声明 client 字段
def __init__(self, api_key: str, base_url: str, model_name: str = chat_model, **data: Any):
super().__init__(**data)
self.api_key = api_key
self.base_url = base_url
self.model_name = model_name
self.client = OpenAI(api_key=self.api_key, base_url=self.base_url) # 使用传入的api_key和base_url初始化 client 实例
@property
def metadata(self) -> LLMMetadata:
"""Get LLM metadata."""
return LLMMetadata(
model_name=self.model_name,
)
@llm_completion_callback()
def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
response = self.client.chat.completions.create(model=self.model_name, messages=[{"role": "user", "content": prompt}])
if hasattr(response, 'choices') and len(response.choices) > 0:
response_text = response.choices[0].message.content
return CompletionResponse(text=response_text)
else:
raise Exception(f"Unexpected response format: {response}")
@llm_completion_callback()
def stream_complete(
self, prompt: str, **kwargs: Any
) -> Generator[CompletionResponse, None, None]:
response = self.client.chat.completions.create(
model=self.model_name,
messages=[{"role": "user", "content": prompt}],
stream=True
)
try:
for chunk in response:
chunk_message = chunk.choices[0].delta
if not chunk_message.content:
continue
content = chunk_message.content
yield CompletionResponse(text=content, delta=content)
except Exception as e:
raise Exception(f"Unexpected response format: {e}")
llm = OurLLM(api_key=api_key, base_url=base_url, model_name=chat_model)
# print(llm)
# 测试模型是否能正常回答
response = llm.stream_complete("你是谁?")
for chunk in response:
print(chunk, end="", flush=True)
# 三、定义embedding模型
def get_emb(api_key, text):
from zhipuai import ZhipuAI
client = ZhipuAI(api_key=api_key)
response = client.embeddings.create(
model="embedding-3", #填写需要调用的模型编码
input=[
text
],
)
one_emb = response.data[0].embedding
return one_emb
from langchain_community.embeddings import ZhipuAIEmbeddings
embedding = ZhipuAIEmbeddings(
model="embedding-3", api_key=api_key
)
# 三、 导入llama-index相关库
from llama_index.core.agent import ReActAgent
from llama_index.core.tools import FunctionTool
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, Settings
from llama_index.core.tools import QueryEngineTool
from llama_index.core import SQLDatabase
from llama_index.core.query_engine import NLSQLTableQueryEngine
from sqlalchemy import create_engine, select
# 配置默认大模型
Settings.llm = llm
Settings.embed_model = embedding
# 三、创建数据库,并给数据库添加数据
import sqlite3
# 创建数据库
sqllite_path = 'llmdb.db'
con = sqlite3.connect(sqllite_path)
# 创建表
sql = """
CREATE TABLE `section_stats` (
`部门` varchar(100) DEFAULT NULL,
`人数` int(11) DEFAULT NULL
);
"""
c = con.cursor()
cursor = c.execute(sql)
c.close()
con.close()
# add data
con = sqlite3.connect(sqllite_path)
c = con.cursor()
data = [
["专利部",22],
["商标部",25],
]
for item in data:
sql = """
INSERT INTO section_stats (部门,人数)
values('%s','%d')
"""%(item[0],item[1])
c.execute(sql)
con.commit()
c.close()
con.close()
# 四、创建数据库查询引擎
engine = create_engine("sqlite:///llmdb.db")
# prepare data
sql_database = SQLDatabase(engine, include_tables=["section_stats"])
query_engine = NLSQLTableQueryEngine(
sql_database=sql_database,
tables=["section_stats"],
llm=Settings.llm
)
# 创建工具函数
def multiply(a: float, b: float) -> float:
"""将两个数字相乘并返回乘积。"""
return a * b
multiply_tool = FunctionTool.from_defaults(fn=multiply)
def add(a: float, b: float) -> float:
"""将两个数字相加并返回它们的和。"""
return a + b
add_tool = FunctionTool.from_defaults(fn=add)
# 把数据库查询引擎封装到工具函数对象中
staff_tool = QueryEngineTool.from_defaults(
query_engine,
name="section_staff",
description="查询部门的人数。"
)
# 构建ReActAgent
agent = ReActAgent.from_tools([multiply_tool, add_tool, staff_tool], verbose=True)
# 通过agent给出指令
response = agent.chat("请从数据库表中获取`专利部`和`商标部`的人数,并将这两个部门的人数相加!")
print(response)
- 分别使用
section_staff
工具从数据库get到两个部门人数 - 使用add工具计算和:
二、RAG接入Agent
如果是在mac本地需要下载faiss库:pip install faiss-cpu
import os
from dotenv import load_dotenv
# 加载环境变量
load_dotenv()
# 初始化变量
base_url = None
chat_model = None
api_key = None
# 使用with语句打开文件,确保文件使用完毕后自动关闭
env_path = ".env.txt"
with open(env_path, 'r') as file:
# 逐行读取文件
for line in file:
# 移除字符串头尾的空白字符(包括'\n')
line = line.strip()
# 检查并解析变量
if "base_url" in line:
base_url = line.split('=', 1)[1].strip().strip('"')
elif "chat_model" in line:
chat_model = line.split('=', 1)[1].strip().strip('"')
elif "ZHIPU_API_KEY" in line:
api_key = line.split('=', 1)[1].strip().strip('"')
# 打印变量以验证
print(f"base_url: {base_url}")
print(f"chat_model: {chat_model}")
print(f"ZHIPU_API_KEY: {api_key}")
from openai import OpenAI
client = OpenAI(
api_key = api_key,
base_url = base_url
)
print(client)
def get_completion(prompt):
response = client.chat.completions.create(
model="glm-4-flash", # 填写需要调用的模型名称
messages=[
{"role": "user", "content": prompt},
],
)
return response.choices[0].message.content
# 一、定义上个task的llm
from openai import OpenAI
from pydantic import Field # 导入Field,用于Pydantic模型中定义字段的元数据
from llama_index.core.llms import (
CustomLLM,
CompletionResponse,
LLMMetadata,
)
from llama_index.core.embeddings import BaseEmbedding
from llama_index.core.llms.callbacks import llm_completion_callback
from typing import List, Any, Generator
# 定义OurLLM类,继承自CustomLLM基类
class OurLLM(CustomLLM):
api_key: str = Field(default=api_key)
base_url: str = Field(default=base_url)
model_name: str = Field(default=chat_model)
client: OpenAI = Field(default=None, exclude=True) # 显式声明 client 字段
def __init__(self, api_key: str, base_url: str, model_name: str = chat_model, **data: Any):
super().__init__(**data)
self.api_key = api_key
self.base_url = base_url
self.model_name = model_name
self.client = OpenAI(api_key=self.api_key, base_url=self.base_url) # 使用传入的api_key和base_url初始化 client 实例
@property
def metadata(self) -> LLMMetadata:
"""Get LLM metadata."""
return LLMMetadata(
model_name=self.model_name,
)
@llm_completion_callback()
def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
response = self.client.chat.completions.create(model=self.model_name, messages=[{"role": "user", "content": prompt}])
if hasattr(response, 'choices') and len(response.choices) > 0:
response_text = response.choices[0].message.content
return CompletionResponse(text=response_text)
else:
raise Exception(f"Unexpected response format: {response}")
@llm_completion_callback()
def stream_complete(
self, prompt: str, **kwargs: Any
) -> Generator[CompletionResponse, None, None]:
response = self.client.chat.completions.create(
model=self.model_name,
messages=[{"role": "user", "content": prompt}],
stream=True
)
try:
for chunk in response:
chunk_message = chunk.choices[0].delta
if not chunk_message.content:
continue
content = chunk_message.content
yield CompletionResponse(text=content, delta=content)
except Exception as e:
raise Exception(f"Unexpected response format: {e}")
llm = OurLLM(api_key=api_key, base_url=base_url, model_name=chat_model)
# print(llm)
# 测试模型是否能正常回答
response = llm.stream_complete("你是谁?")
for chunk in response:
print(chunk, end="", flush=True)
# 二、embedding模型
def get_emb(api_key, text):
from zhipuai import ZhipuAI
client = ZhipuAI(api_key=api_key)
response = client.embeddings.create(
model="embedding-3", #填写需要调用的模型编码
input=[
text
],
)
one_emb = response.data[0].embedding
return one_emb
from langchain_community.embeddings import ZhipuAIEmbeddings
embedding = ZhipuAIEmbeddings(
model="embedding-3", api_key=api_key
)
# 三、构建索引
# 从指定文件读取,输入为List
from llama_index.core import SimpleDirectoryReader,Document
text_file_path = "/Users/guomiansheng/Desktop/LLM/llm_app/wow-agent/docs/问答手册.txt"
documents = SimpleDirectoryReader(input_files=[text_file_path]).load_data()
# 构建节点
from llama_index.core.node_parser import SentenceSplitter
transformations = [SentenceSplitter(chunk_size = 512)]
from llama_index.core.ingestion.pipeline import run_transformations
nodes = run_transformations(documents, transformations=transformations)
# 构建索引
from llama_index.vector_stores.faiss import FaissVectorStore
import faiss
from llama_index.core import StorageContext, VectorStoreIndex
emb = embedding.get_text_embedding("你好呀呀")
vector_store = FaissVectorStore(faiss_index=faiss.IndexFlatL2(len(emb)))
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex(
nodes = nodes,
storage_context=storage_context,
embed_model = embedding,
)
# 四、构建问答引擎
# 构建检索器
from llama_index.core.retrievers import VectorIndexRetriever
# 想要自定义参数,可以构造参数字典
kwargs = {'similarity_top_k': 5, 'index': index, 'dimensions': len(emb)} # 必要参数
retriever = VectorIndexRetriever(**kwargs)
# 构建合成器
from llama_index.core.response_synthesizers import get_response_synthesizer
response_synthesizer = get_response_synthesizer(llm=llm, streaming=True)
# 构建问答引擎
from llama_index.core.query_engine import RetrieverQueryEngine
engine = RetrieverQueryEngine(
retriever=retriever,
response_synthesizer=response_synthesizer,
)
# 五、提问
question = "What are the applications of Agent AI systems ?"
response = engine.query(question)
for text in response.response_gen:
print(text, end="")
# 六、把该rag作为agent的一个工具
# 配置查询工具
from llama_index.core.tools import QueryEngineTool
from llama_index.core.tools import ToolMetadata
query_engine_tools = [
QueryEngineTool(
query_engine=engine,
metadata=ToolMetadata(
name="RAG工具",
description=(
"用于在原文中检索相关信息"
),
),
),
]
# 创建ReAct Agent
from llama_index.core.agent import ReActAgent
agent = ReActAgent.from_tools(query_engine_tools, llm=llm, verbose=True)
# 让Agent完成任务
# response = agent.chat("请问商标注册需要提供哪些文件?")
response = agent.chat("What are the applications of Agent AI systems ?")
print(response)
在action时会使用RAG工具:
Reference
[1] 官方文档:https://docs.cloud.llamaindex.ai/
[2] https://github.com/datawhalechina/wow-agent
[3] https://www.datawhale.cn/learn/summary/86
[4] https://github.com/datawhalechina/wow-rag
[5] LangChain——Embedding 智谱AI