当前位置：首页 > article >正文

AI Agent的多轮对话：提升用户体验的关键技巧

article 2025/3/4 8:51:59

在前面的文章中，我们讨论了 AI Agent 的各个核心系统。今天，我想聊聊如何实现一个好用的多轮对话系统。说实话，这个话题我琢磨了很久，因为它直接影响到用户体验。

从一个槽点说起

还记得我最开始做对话系统时的一个典型场景：

用户：帮我查一下订单状态
助手：好的，请提供订单号。
用户：A12345
助手：抱歉，我需要您提供订单号才能查询。
用户：...我刚才不是说了吗？

这种对话体验显然很糟糕。问题出在哪里？主要是 AI Agent 没有正确理解和维护对话上下文。后来我总结了几个关键点：

要理解用户意图
要记住上下文信息
要有状态管理机制
要能处理各种异常情况

对话系统的设计

经过多次迭代，我设计了一个相对完善的对话系统：

from typing import List, Dict, Any, Optional
from enum import Enum
from datetime import datetime
from pydantic import BaseModel
import asyncio

class DialogueState(Enum):
    IDLE = "idle"
    WAITING_INPUT = "waiting_input"
    PROCESSING = "processing"
    ERROR = "error"
    COMPLETED = "completed"

class DialogueContext(BaseModel):
    session_id: str
    user_id: str
    current_state: DialogueState
    current_intent: Optional[str]
    slots: Dict[str, Any]
    history: List[Dict[str, Any]]
    created_at: datetime
    updated_at: datetime

class DialogueSystem:
    def __init__(
        self,
        llm,
        tool_registry,
        memory_system
    ):
        self.llm = llm
        self.tool_registry = tool_registry
        self.memory_system = memory_system
        self.sessions: Dict[str, DialogueContext] = {}

    async def process_message(
        self,
        session_id: str,
        user_id: str,
        message: str
    ) -> str:
        # 1. 获取或创建会话上下文
        context = self._get_or_create_context(
            session_id,
            user_id
        )

        try:
            # 2. 更新状态
            context.current_state = DialogueState.PROCESSING

            # 3. 理解用户意图
            intent = await self._understand_intent(
                message,
                context
            )

            # 4. 更新上下文
            context.current_intent = intent.name
            context.slots.update(intent.slots)

            # 5. 执行对应的处理流程
            response = await self._handle_intent(
                intent,
                context
            )

            # 6. 记录对话历史
            self._update_history(
                context,
                message,
                response
            )

            return response

        except Exception as e:
            context.current_state = DialogueState.ERROR
            return f"抱歉，处理您的请求时出现错误：{str(e)}"

        finally:
            # 保存上下文
            self._save_context(context)

    async def _understand_intent(
        self,
        message: str,
        context: DialogueContext
    ) -> Intent:
        # 结合上下文理解用户意图
        response = await self.llm.understand_intent(
            message=message,
            history=context.history[-5:],  # 最近5轮对话
            current_intent=context.current_intent,
            slots=context.slots
        )

        return Intent(
            name=response.intent,
            confidence=response.confidence,
            slots=response.slots
        )

    async def _handle_intent(
        self,
        intent: Intent,
        context: DialogueContext
    ) -> str:
        # 检查是否有未填充的必要槽位
        missing_slots = self._get_missing_slots(
            intent
        )

        if missing_slots:
            # 返回槽位询问
            context.current_state = DialogueState.WAITING_INPUT
            return self._generate_slot_question(
                missing_slots[0]
            )

        # 所有槽位都已填充，执行操作
        result = await self._execute_intent(
            intent,
            context
        )

        context.current_state = DialogueState.COMPLETED
        return result

    def _get_or_create_context(
        self,
        session_id: str,
        user_id: str
    ) -> DialogueContext:
        if session_id in self.sessions:
            return self.sessions[session_id]

        # 创建新会话
        context = DialogueContext(
            session_id=session_id,
            user_id=user_id,
            current_state=DialogueState.IDLE,
            current_intent=None,
            slots={},
            history=[],
            created_at=datetime.now(),
            updated_at=datetime.now()
        )

        self.sessions[session_id] = context
        return context

使用示例：

# 初始化对话系统
dialogue = DialogueSystem(
    llm=ChatGPT(),
    tool_registry=tool_registry,
    memory_system=memory_system
)

# 处理用户消息
async def chat():
    responses = []

    # 第一轮：查询订单
    response = await dialogue.process_message(
        session_id="123",
        user_id="user_1",
        message="帮我查一下订单状态"
    )
    responses.append(response)
    # 输出：好的，请提供订单号。

    # 第二轮：提供订单号
    response = await dialogue.process_message(
        session_id="123",
        user_id="user_1",
        message="A12345"
    )
    responses.append(response)
    # 输出：您的订单 A12345 正在配送中，预计明天送达。

    # 第三轮：追问细节
    response = await dialogue.process_message(
        session_id="123",
        user_id="user_1",
        message="具体什么时候到？"
    )
    responses.append(response)
    # 输出：根据物流信息，预计明天上午10:00-12:00送达。

    return responses

# 运行对话
responses = await chat()
for r in responses:
    print(r)

关键实现细节

1. 意图理解

class IntentRecognizer:
    def __init__(self, llm):
        self.llm = llm

    async def recognize(
        self,
        message: str,
        context: Dict[str, Any]
    ) -> Intent:
        # 1. 准备提示词
        prompt = self._prepare_prompt(
            message,
            context
        )

        # 2. 调用 LLM
        response = await self.llm.generate(prompt)

        # 3. 解析结果
        intent = self._parse_response(response)

        # 4. 验证意图
        self._validate_intent(intent)

        return intent

    def _prepare_prompt(
        self,
        message: str,
        context: Dict[str, Any]
    ) -> str:
        return f"""
        请分析以下对话内容，识别用户意图：

        历史对话：
        {self._format_history(context.get('history', []))}

        当前状态：
        - 意图：{context.get('current_intent')}
        - 已知信息：{json.dumps(context.get('slots', {}), indent=2)}

        用户消息：{message}

        请返回：
        1. 意图名称
        2. 置信度
        3. 识别出的槽位信息
        """

2. 状态管理

class StateManager:
    def __init__(self):
        self.state_handlers = {
            DialogueState.IDLE: self._handle_idle,
            DialogueState.WAITING_INPUT: self._handle_waiting,
            DialogueState.PROCESSING: self._handle_processing,
            DialogueState.ERROR: self._handle_error,
            DialogueState.COMPLETED: self._handle_completed
        }

    async def handle_state(
        self,
        context: DialogueContext,
        message: str
    ) -> str:
        # 获取当前状态的处理器
        handler = self.state_handlers.get(
            context.current_state
        )

        if not handler:
            raise ValueError(f"未知状态：{context.current_state}")

        # 执行状态处理
        return await handler(context, message)

    async def _handle_waiting(
        self,
        context: DialogueContext,
        message: str
    ) -> str:
        # 检查是否填充了等待的槽位
        slot_name = context.waiting_for_slot
        if self._is_valid_slot_value(
            slot_name,
            message
        ):
            # 更新槽位
            context.slots[slot_name] = message
            # 继续处理
            return await self._continue_processing(
                context
            )
        else:
            # 重新询问
            return f"抱歉，这似乎不是有效的{slot_name}，请重新输入。"

3. 上下文管理

class ContextManager:
    def __init__(self, memory_system):
        self.memory = memory_system
        self.max_history = 10

    async def update_context(
        self,
        context: DialogueContext,
        message: str,
        response: str
    ):
        # 1. 更新对话历史
        context.history.append({
            "role": "user",
            "content": message,
            "timestamp": datetime.now()
        })
        context.history.append({
            "role": "assistant",
            "content": response,
            "timestamp": datetime.now()
        })

        # 2. 限制历史长度
        if len(context.history) > self.max_history * 2:
            # 保存旧对话到长期记忆
            old_messages = context.history[:-self.max_history * 2]
            await self._save_to_memory(
                context.session_id,
                old_messages
            )
            # 保留最近的对话
            context.history = context.history[-self.max_history * 2:]

        # 3. 更新时间戳
        context.updated_at = datetime.now()

    async def _save_to_memory(
        self,
        session_id: str,
        messages: List[Dict]
    ):
        # 将对话保存到长期记忆
        await self.memory.remember(
            content=self._format_messages(messages),
            metadata={
                "type": "dialogue",
                "session_id": session_id,
                "timestamp": datetime.now()
            }
        )

优化技巧

在实践中，我总结了一些提升用户体验的技巧：

1. 主动确认

class ConfirmationManager:
    def __init__(self, threshold: float = 0.8):
        self.threshold = threshold

    def need_confirm(
        self,
        intent: Intent,
        context: DialogueContext
    ) -> bool:
        # 检查是否需要确认
        if intent.confidence < self.threshold:
            return True

        if self._is_critical_operation(intent):
            return True

        return False

    def generate_confirmation(
        self,
        intent: Intent,
        context: DialogueContext
    ) -> str:
        return f"""
        请确认您是否要{intent.description}？
        - 操作：{intent.name}
        - 参数：{json.dumps(intent.slots, indent=2)}

        回复"是"或"否"。
        """

2. 错误恢复

class ErrorRecovery:
    async def recover(
        self,
        error: Exception,
        context: DialogueContext
    ) -> str:
        # 分析错误
        analysis = await self._analyze_error(error)

        if analysis.can_retry:
            # 自动重试
            return await self._retry_operation(
                context
            )
        elif analysis.need_clarification:
            # 请求用户澄清
            return self._generate_clarification_question(
                analysis
            )
        else:
            # 友好的错误提示
            return self._generate_error_message(
                analysis
            )

3. 上下文压缩

class ContextCompressor:
    def compress_history(
        self,
        history: List[Dict],
        max_tokens: int
    ) -> List[Dict]:
        # 1. 计算当前token数
        current_tokens = self._count_tokens(history)

        if current_tokens <= max_tokens:
            return history

        # 2. 提取关键信息
        key_messages = self._extract_key_messages(
            history
        )

        # 3. 压缩对话
        compressed = self._compress_messages(
            key_messages,
            max_tokens
        )

        return compressed

    def _extract_key_messages(
        self,
        history: List[Dict]
    ) -> List[Dict]:
        # 提取重要的对话轮次
        key_turns = []
        for i, msg in enumerate(history):
            if self._is_key_message(msg, history, i):
                key_turns.append(msg)
        return key_turns