当前位置：首页 > article >正文

人工智能-A* 算法规划的路径进行动态调整

article 2025/2/10 8:32:55

以下是一个简化的代码示例，展示在自动驾驶场景下，如何使用强化学习对 A* 算法规划的路径进行动态调整。这里使用 OpenAI Gym 风格的环境模拟交通场景，采用 Q - learning 这一简单的强化学习算法，同时结合 A* 算法进行初始路径规划。

整体思路

A 算法*：用于生成初始路径。
强化学习环境：模拟交通场景，包含车辆状态和交通状况，车辆在环境中根据当前状态选择动作并获得奖励。
Q - learning 算法：学习不同状态 - 动作对的价值，以做出最优决策，动态调整路径。

代码实现

import numpy as np
import heapq
import random

# A* 算法实现
class Node:
    def __init__(self, x, y, g=float('inf'), h=float('inf'), parent=None):
        self.x = x
        self.y = y
        self.g = g
        self.h = h
        self.f = g + h
        self.parent = parent

    def __lt__(self, other):
        return self.f < other.f

def heuristic(current, goal):
    return abs(current[0] - goal[0]) + abs(current[1] - goal[1])

def astar(grid, start, goal):
    rows, cols = grid.shape
    open_list = []
    closed_set = set()

    start_node = Node(start[0], start[1], g=0, h=heuristic(start, goal))
    heapq.heappush(open_list, start_node)

    while open_list:
        current_node = heapq.heappop(open_list)

        if (current_node.x, current_node.y) == goal:
            path = []
            while current_node:
                path.append((current_node.x, current_node.y))
                current_node = current_node.parent
            return path[::-1]

        closed_set.add((current_node.x, current_node.y))

        neighbors = [(0, 1), (0, -1), (1, 0), (-1, 0)]
        for dx, dy in neighbors:
            new_x, new_y = current_node.x + dx, current_node.y + dy

            if 0 <= new_x < rows and 0 <= new_y < cols and grid[new_x][new_y] == 0 and (new_x, new_y) not in closed_set:
                new_g = current_node.g + 1
                new_h = heuristic((new_x, new_y), goal)
                new_node = Node(new_x, new_y, g=new_g, h=new_h, parent=current_node)

                found = False
                for i, node in enumerate(open_list):
                    if node.x == new_x and node.y == new_y:
                        if new_g < node.g:
                            open_list[i] = new_node
                            heapq.heapify(open_list)
                        found = True
                        break

                if not found:
                    heapq.heappush(open_list, new_node)

    return None

# 强化学习环境
class DrivingEnv:
    def __init__(self, grid, start, goal):
        self.grid = grid
        self.start = start
        self.goal = goal
        self.current_pos = start
        self.path = astar(grid, start, goal)
        self.step_count = 0
        self.max_steps = 100

    def reset(self):
        self.current_pos = self.start
        self.path = astar(self.grid, self.start, self.goal)
        self.step_count = 0
        return self.current_pos

    def step(self, action):
        x, y = self.current_pos
        if action == 0:  # 上
            new_x, new_y = x - 1, y
        elif action == 1:  # 下
            new_x, new_y = x + 1, y
        elif action == 2:  # 左
            new_x, new_y = x, y - 1
        elif action == 3:  # 右
            new_x, new_y = x, y + 1

        if 0 <= new_x < self.grid.shape[0] and 0 <= new_y < self.grid.shape[1] and self.grid[new_x][new_y] == 0:
            self.current_pos = (new_x, new_y)
        else:
            # 撞墙惩罚
            reward = -10
            done = False
        self.step_count += 1

        if self.current_pos == self.goal:
            reward = 100
            done = True
        elif self.step_count >= self.max_steps:
            reward = -50
            done = True
        else:
            # 靠近目标奖励，远离目标惩罚
            dist_before = heuristic(self.current_pos, self.goal)
            dist_after = heuristic((new_x, new_y), self.goal)
            reward = dist_before - dist_after
            done = False

        info = {}
        return self.current_pos, reward, done, info

# Q - learning 算法
def q_learning(env, num_episodes=1000, learning_rate=0.1, discount_factor=0.9, epsilon=0.1):
    q_table = {}
    for episode in range(num_episodes):
        state = env.reset()
        done = False
        while not done:
            if state not in q_table:
                q_table[state] = [0] * 4  # 4 个动作
            if random.uniform(0, 1) < epsilon:
                action = random.randint(0, 3)
            else:
                action = np.argmax(q_table[state])
            next_state, reward, done, _ = env.step(action)
            if next_state not in q_table:
                q_table[next_state] = [0] * 4
            q_table[state][action] = (1 - learning_rate) * q_table[state][action] + learning_rate * (
                    reward + discount_factor * max(q_table[next_state]))
            state = next_state
    return q_table

# 主程序
if __name__ == "__main__":
    # 示例地图，0 表示可通行，1 表示障碍物
    grid = np.array([
        [0, 0, 0, 0, 0],
        [0, 1, 1, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 1, 1, 0],
        [0, 0, 0, 0, 0]
    ])
    start = (0, 0)
    goal = (4, 4)

    env = DrivingEnv(grid, start, goal)
    q_table = q_learning(env)

    # 使用学习到的策略进行路径规划
    state = env.reset()
    path = [state]
    done = False
    while not done:
        action = np.argmax(q_table[state])
        state, _, done, _ = env.step(action)
        path.append(state)
    print("动态调整后的路径:", path)

代码解释

1. A* 算法部分

Node 类：表示地图中的节点，包含节点的坐标、g 值（从起点到该节点的实际代价）、h 值（从该节点到目标节点的估计代价）、f 值（总代价）和父节点。
heuristic 函数：使用曼哈顿距离作为启发式函数，估计从当前节点到目标节点的代价。
astar 函数：实现 A* 算法的核心逻辑，通过维护开放列表和关闭列表，寻找从起点到终点的最短路径。