Linux dma的使用与理解
1 概述
本文描述Linux下的dma子系统,包括上层驱动调用方式(consumer),硬件驱动接入(provider)和dma子系统框架。
2 Linux下dma基本软件框架
如下框架大致描述了Linux下的dma子系统软件层次关系,Linux内核提供了dma子系统,向上驱动层提供DMA consumer,驱动开发者只需调用从summer的接口传入对应的dma通道需求信息;向下硬件层提供DMA Provider,该层主要由芯片厂商实现,向dma子系统注册一个chan_device;DMA核心层包括dmaengine、virt-dma和of-dma。
3 dma寄存器配置回顾
(1)配置前准备
1)时钟使能
启用DMA控制器的时钟(如STM32中通过RCC_AHBPeriphClockCmd开启DMA1/DMA2时钟)14。
2)参数确定
传输方向:外设↔存储器或存储器↔存储器18。
数据量:设置单次传输的数据单元数量(如DMA_CNDTRx寄存器)14。
数据宽度:选择字节(8位)、半字(16位)或字(32位)48。
(2)寄存器配置关键步骤
1)地址设置
外设地址:指向外设数据寄存器(如&USART1->DR)14。
存储器地址:指向内存缓冲区(如数组SendBuff[100]() )14。
地址递增模式:外设地址通常固定,存储器地址按数据宽度递增18。
2)传输模式与优先级
选择循环模式(自动重载数据量)或单次模式(需手动重启)18。
设置通道优先级(高/中/低),冲突时按通道编号仲裁14。
3)中断配置(可选)
使能传输完成/半传输/错误中断,并绑定中断服务函数14。
(3)启动与调试
1)初始化DMA通道
调用初始化函数(如DMA_Init()),加载配置到工作寄存器48。
2)启动传输
使能DMA通道(如DMA_Cmd(DMA1_Channel4, ENABLE))14。
3)数据监控
通过中断标志位或查询寄存器状态(如DMA_ISR)确认传输完成
4 驱动层调用DMA子系统
上层驱动使用DMA子系统,如下是deepseek生成的一个内存到内存的过程示例
#include <linux/init.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/uaccess.h>
#include <linux/dma-mapping.h>
#include <linux/dmaengine.h>
#include <linux/platform_device.h>
#define DEVICE_NAME "dma_example"
#define BUFFER_SIZE 1024
static int major;
static struct dma_chan *dma_chan;
static struct completion dma_complete;
static dma_addr_t src_dma_addr, dst_dma_addr;
static void *src_buffer, *dst_buffer;
// DMA传输完成回调函数
static void dma_callback(void *data)
{
complete(&dma_complete);
}
// 字符设备的open方法
static int dma_example_open(struct inode *inode, struct file *filp)
{
return 0;
}
// 字符设备的release方法
static int dma_example_release(struct inode *inode, struct file *filp)
{
return 0;
}
// 字符设备的write方法,触发DMA传输
static ssize_t dma_example_write(struct file *filp, const char __user *buf, size_t count, loff_t *f_pos)
{
struct dma_async_tx_descriptor *desc;
dma_cookie_t cookie;
// 初始化完成量
init_completion(&dma_complete);
// 准备DMA描述符
desc = dmaengine_prep_dma_memcpy(dma_chan, dst_dma_addr, src_dma_addr, BUFFER_SIZE, DMA_PREP_INTERRUPT);
if (!desc) {
printk(KERN_ERR "Failed to prepare DMA descriptor\n");
return -ENOMEM;
}
// 设置回调函数
desc->callback = dma_callback;
desc->callback_param = NULL;
// 提交DMA传输
cookie = dmaengine_submit(desc);
if (dma_submit_error(cookie)) {
printk(KERN_ERR "Failed to submit DMA transfer\n");
return -EFAULT;
}
// 启动DMA传输
dma_async_issue_pending(dma_chan);
// 等待DMA传输完成
wait_for_completion(&dma_complete);
return count;
}
// 字符设备的文件操作结构体
static struct file_operations fops = {
.owner = THIS_MODULE,
.open = dma_example_open,
.release = dma_example_release,
.write = dma_example_write,
};
// 驱动模块初始化函数
static int __init dma_example_init(void)
{
// 注册字符设备
major = register_chrdev(0, DEVICE_NAME, &fops);
if (major < 0) {
printk(KERN_ALERT "Failed to register character device\n");
return major;
}
printk(KERN_INFO "DMA example driver registered with major number %d\n", major);
// 分配DMA通道
dma_chan = dma_request_chan(NULL, "memcpy");
if (!dma_chan) {
printk(KERN_ERR "Failed to request DMA channel\n");
unregister_chrdev(major, DEVICE_NAME);
return -ENODEV;
}
// 分配源和目的缓冲区
src_buffer = dma_alloc_coherent(NULL, BUFFER_SIZE, &src_dma_addr, GFP_KERNEL);
dst_buffer = dma_alloc_coherent(NULL, BUFFER_SIZE, &dst_dma_addr, GFP_KERNEL);
if (!src_buffer || !dst_buffer) {
printk(KERN_ERR "Failed to allocate DMA buffers\n");
dma_release_channel(dma_chan);
unregister_chrdev(major, DEVICE_NAME);
return -ENOMEM;
}
// 初始化源缓冲区
memset(src_buffer, 0xAA, BUFFER_SIZE);
return 0;
}
// 驱动模块退出函数
static void __exit dma_example_exit(void)
{
// 释放DMA缓冲区
dma_free_coherent(NULL, BUFFER_SIZE, src_buffer, src_dma_addr);
dma_free_coherent(NULL, BUFFER_SIZE, dst_buffer, dst_dma_addr);
// 释放DMA通道
dma_release_channel(dma_chan);
// 注销字符设备
unregister_chrdev(major, DEVICE_NAME);
printk(KERN_INFO "DMA example driver unregistered\n");
}
module_init(dma_example_init);
module_exit(dma_example_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Your Name");
MODULE_DESCRIPTION("A simple DMA example driver");
先请求一个dma通道(示例请求了memcpy名的通道),使用dma_alloc_coherent申请了两个拷贝内存,之后由用户调用write开始dma配置后启动, dmaengine_prep_dma_memcpy实际上调用到了硬件IP实现的接口,主要配置了寄存器内存地址与模式(后续章节分析),之后调用 dmaengine_submit,该接口也是调用到硬件IP接口层实现的接口,而该接口会将当前descp提交请求加入到virt-dma维护的node链表中,descp维护一个callback。最后调用dma_async_issue_pending启动传输,该接口配置dma寄存器启动传输,硬件终端产生时,硬件层将在中断中启动一个事先准备好的tasklet,用来回调上层传入的callback回调(从virt-dma维护的node链表中取出callback)。
上述示例用了dma_alloc_coherent分配内存,该方式主要用来解决缓存一致性问题,用于分配一块内存,并同时将其映射为DMA可以访问的形式。 这种内存区域具有一致性,意味着无论是在CPU还是DMA控制器看来,该内存的内容都是同步的。 这对于需要频繁读写且要求一致性的场景非常有用。
对于已有内存,使用另外一个接口:
struct device *dev;
void *cpu_addr = kmalloc(size, GFP_KERNEL); // 分配一个缓冲区
dma_addr_t dma_handle;
// 映射缓冲区
/*
映射单个缓冲区:dma_map_single 用于将一个已经分配好的内存缓冲区映射为DMA可以访问的形式。这个函数通常用于临时或一次性使用的缓冲区。
*/
dma_handle = dma_map_single(dev, cpu_addr, size, DMA_TO_DEVICE);
if (dma_mapping_error(dev, dma_handle)) {
printk(KERN_ERR "DMA mapping error\n");
kfree(cpu_addr);
return -EINVAL;
}
// 使用dma_handle进行DMA传输...
// 解除映射
dma_unmap_single(dev, dma_handle, size, DMA_TO_DEVICE);
// 释放缓冲区
kfree(cpu_addr);
除了内存到内存,还有内存到设备的dma传输;上述示例使用dmaengine_prep_dma_memcpy,用于内存到内存传输,该函数用于配置内存到内存的数据传输;它适用于需要在两个内存地址之间直接传输数据的情况,而不需要通过任何外部设备。对于内存到设备,一般用dmaengine_prep_slave_sg,如下示例:
int sample_request_dma(void)
{
struct dma_chan *chan;
struct dma_async_tx_descriptor *tx;
dma_cookie_t cookie;
struct scatterlist sg;
// 请求DMA通道
chan = dma_request_chan(dev, "my_dma_channel");
if (IS_ERR(chan)) {
// 处理错误
}
// 准备DMA传输
tx = dmaengine_prep_slave_sg(chan, &sg, 1, DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
if (!tx) {
// 处理错误
}
// 设置传输完成回调
tx->callback = my_dma_callback;
tx->callback_param = my_param;
// 提交传输
cookie = tx->tx_submit(tx);
if (dma_submit_error(cookie)) {
// 处理错误
}
// 启动传输
dma_async_issue_pending(chan);
// 等待传输完成
dma_sync_wait(chan, cookie);
// 释放DMA通道
dma_release_channel(chan);
return 0;
}
5 硬件IP注册一个DMA provider
以stm32的驱动为例子drivers/dma/stm32-dma.c
Stm32设备内容如下:
Stm32有两个dma IP,每个dma有8个通道,每个通道各自有一个中断,如下是设备树描述的硬件信息:
dma1: dma-controller@40026000 {
compatible = "st,stm32-dma";
reg = <0x40026000 0x400>;
//每个dma对应一个中断号
interrupts = <11>,
<12>,
<13>,
<14>,
<15>,
<16>,
<17>,
<47>;
clocks = <&rcc 0 STM32F7_AHB1_CLOCK(DMA1)>;
#dma-cells = <4>;
status = "disabled";
};
dma2: dma-controller@40026400 {
compatible = "st,stm32-dma";
reg = <0x40026400 0x400>;
interrupts = <56>,
<57>,
<58>,
<59>,
<60>,
<68>,
<69>,
<70>;
clocks = <&rcc 0 STM32F7_AHB1_CLOCK(DMA2)>;
#dma-cells = <4>;
st,mem2mem;
status = "disabled";
};
驱动实现在drivers/dma/stm32-dma.c
驱动probe函数主要实现了向DMA子系统注册一个provider:
static const struct of_device_id stm32_dma_of_match[] = {
{ .compatible = "st,stm32-dma", },
{ /* sentinel */ },
};
MODULE_DEVICE_TABLE(of, stm32_dma_of_match);
static int stm32_dma_probe(struct platform_device *pdev)
{
struct stm32_dma_chan *chan;
struct stm32_dma_device *dmadev;
struct dma_device *dd;
const struct of_device_id *match;
struct resource *res;
struct reset_control *rst;
int i, ret;
...
dmadev = devm_kzalloc(&pdev->dev, sizeof(*dmadev), GFP_KERNEL);
...
dmadev->mem2mem = of_property_read_bool(pdev->dev.of_node,"st,mem2mem");
...
//当前dma设备能力配置
dma_cap_set(DMA_SLAVE, dd->cap_mask);
dma_cap_set(DMA_PRIVATE, dd->cap_mask);
dma_cap_set(DMA_CYCLIC, dd->cap_mask);
//根据前面能力配置,实现相关的回调,这些回调将被上层驱动通过指针方式直接调用
//根据芯片dma ip特性不同,在用户获取一个chan时,该接口被调用,用于对应硬件chan的初始化
dd->device_alloc_chan_resources = stm32_dma_alloc_chan_resources;
//释放
dd->device_free_chan_resources = stm32_dma_free_chan_resources;
//获取发送的状态结果
dd->device_tx_status = stm32_dma_tx_status;
//
dd->device_issue_pending = stm32_dma_issue_pending;
dd->device_prep_slave_sg = stm32_dma_prep_slave_sg;
dd->device_prep_dma_cyclic = stm32_dma_prep_dma_cyclic;
dd->device_config = stm32_dma_slave_config;
dd->device_terminate_all = stm32_dma_terminate_all;
dd->device_synchronize = stm32_dma_synchronize;
dd->src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
dd->dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
dd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
dd->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
dd->copy_align = DMAENGINE_ALIGN_32_BYTES;
dd->max_burst = STM32_DMA_MAX_BURST;
dd->descriptor_reuse = true;
dd->dev = &pdev->dev;
INIT_LIST_HEAD(&dd->channels);
if (dmadev->mem2mem) {//实现内存到内存的dma拷贝接口
dma_cap_set(DMA_MEMCPY, dd->cap_mask);
dd->device_prep_dma_memcpy = stm32_dma_prep_dma_memcpy;
dd->directions |= BIT(DMA_MEM_TO_MEM);
}
for (i = 0; i < STM32_DMA_MAX_CHANNELS; i++) {//初始化每个通道,包括绑定每个通道到一个虚拟通道中
chan = &dmadev->chan[i];
chan->id = i;
chan->vchan.desc_free = stm32_dma_desc_free;
vchan_init(&chan->vchan, dd);
}
ret = dma_async_device_register(dd);//注册到DMA子系统中
if (ret)
goto clk_free;
for (i = 0; i < STM32_DMA_MAX_CHANNELS; i++) {
chan = &dmadev->chan[i];
ret = platform_get_irq(pdev, i);
...
chan->irq = ret;
//为每个通道中断注册好中断服务,主要是回调用户的回调,通知用户dma已完成
ret = devm_request_irq(&pdev->dev, chan->irq,
stm32_dma_chan_irq, 0,
dev_name(chan2dev(chan)), chan);
...
}
//将当前dma注册到设备管理控制模块中,用户可以通过调用设备树接口方式来获取dma通道进而使用相应控制接口
ret = of_dma_controller_register(pdev->dev.of_node,
stm32_dma_of_xlate, dmadev);
...
return 0;
dma_async_device_unregister(dd);
... return ret;
}
以上是一个DMA注册provider的一般过程,回调接口流程后续章节介绍。
6 追溯驱动调用接口(DMA子系统consumer接口)
6.1 dma_request_chan/dma_request_channel
该接口将调用硬件层回调,stm的dma ip驱动,device_alloc_chan_resources做了channel的初始化:
dma_request_chan
=>find_candidate
==>dma_chan_get
===>device_alloc_chan_resources
struct dma_chan *dma_request_chan(struct device *dev, const char *name)
{
struct dma_device *d, *_d;
struct dma_chan *chan = NULL;
/* If device-tree is present get slave info from here */
/*根据名称获取设备对应定义的channel,例如这里name是tx,设备树定义如下:
i2s0: i2s@1011a000 {
...
dmas = <&dmac1_s 6>, <&dmac1_s 7>;
dma-names = "tx", "rx";
...
};
此时tx将根据dmas标签获取到dma节点的dmac1_s第6个通道。
*/
if (dev->of_node)
chan = of_dma_request_slave_channel(dev->of_node, name);
....
/* Try to find the channel via the DMA filter map(s) */
/*
上述代码中查询dma通道失败,则尝试用dma的map映射关系来查询获取,从dma_device_list中获取dma,在查找具体的通道,dma_device_list硬件在注册的时候被放到该链表中。这里需要硬件驱动实现好dma_device里的filter,上层驱动才能根据channel名称找到。
*/
mutex_lock(&dma_list_mutex);
list_for_each_entry_safe(d, _d, &dma_device_list, global_node) {
dma_cap_mask_t mask;
//匹配dev名称和channel名称
const struct dma_slave_map *map = dma_filter_match(d, name, dev);
if (!map)
continue;
//查询slave能力的channel
dma_cap_zero(mask);
dma_cap_set(DMA_SLAVE, mask);
chan = find_candidate(d, &mask, d->filter.fn, map->param);
if (!IS_ERR(chan))
break;
}
mutex_unlock(&dma_list_mutex);
...
found:
...
chan->name = kasprintf(GFP_KERNEL, "dma:%s", name);
if (!chan->name)
return chan;
....
return chan;
}
dma_request_channel删除了根据设备树名称获取通道,根据传入的设备引用的dma设备节点与注册好的dma设备节点是否一致,来获取当前slave通道。
#define dma_request_channel(mask, x, y) \
__dma_request_channel(&(mask), x, y, NULL)
struct dma_chan *__dma_request_channel(const dma_cap_mask_t *mask,
dma_filter_fn fn, void *fn_param,
struct device_node *np)
{
struct dma_device *device, *_d;
struct dma_chan *chan = NULL;
...
list_for_each_entry_safe(device, _d, &dma_device_list, global_node) {
/* Finds a DMA controller with matching device node */
if (np && device->dev->of_node && np != device->dev->of_node)
continue;
chan = find_candidate(device, mask, fn, fn_param);
...
}
...
return chan;
}
6.2 dmaengine_prep_dma_memcpy/dmaengine_prep_slave_sg
dmaengine_prep_dma_memcpy最终回调到DMA硬件驱动接口
static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_memcpy(
struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
size_t len, unsigned long flags)
{....
return chan->device->device_prep_dma_memcpy(chan, dest, src,
len, flags);
}
Stm32硬件驱动中
static int stm32_dma_probe(struct platform_device *pdev)
{
...
if (dmadev->mem2mem) {
dma_cap_set(DMA_MEMCPY, dd->cap_mask);
dd->device_prep_dma_memcpy = stm32_dma_prep_dma_memcpy;
dd->directions |= BIT(DMA_MEM_TO_MEM);
}
...
}
该硬件接口主要实现对dma搬运地址的配置,模式的配置,准备好启动dma搬运
static struct dma_async_tx_descriptor *stm32_dma_prep_dma_memcpy(
struct dma_chan *c, dma_addr_t dest,
dma_addr_t src, size_t len, unsigned long flags)
{
struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
...
desc = kzalloc(struct_size(desc, sg_req, num_sgs), GFP_NOWAIT);
if (!desc)
return NULL;
threshold = chan->threshold;
//配置发送长度
for (offset = 0, i = 0; offset < len; offset += xfer_count, i++) {
xfer_count = min_t(size_t, len - offset,
STM32_DMA_ALIGNED_MAX_DATA_ITEMS);
/* Compute best burst size */
max_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
best_burst = stm32_dma_get_best_burst(len, STM32_DMA_MAX_BURST,
threshold, max_width);
dma_burst = stm32_dma_get_burst(chan, best_burst);
stm32_dma_clear_reg(&desc->sg_req[i].chan_reg);//写入寄存器
desc->sg_req[i].chan_reg.dma_scr =
STM32_DMA_SCR_DIR(STM32_DMA_MEM_TO_MEM) |
STM32_DMA_SCR_PBURST(dma_burst) |
STM32_DMA_SCR_MBURST(dma_burst) |
STM32_DMA_SCR_MINC |
STM32_DMA_SCR_PINC |
STM32_DMA_SCR_TCIE |
STM32_DMA_SCR_TEIE;
desc->sg_req[i].chan_reg.dma_sfcr |= STM32_DMA_SFCR_MASK;
desc->sg_req[i].chan_reg.dma_sfcr |=
STM32_DMA_SFCR_FTH(threshold);
desc->sg_req[i].chan_reg.dma_spar = src + offset;
desc->sg_req[i].chan_reg.dma_sm0ar = dest + offset;
desc->sg_req[i].chan_reg.dma_sndtr = xfer_count;
desc->sg_req[i].len = xfer_count;
}
desc->num_sgs = num_sgs;
desc->cyclic = false;
//准备发送相关数据
return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
}
static inline struct dma_async_tx_descriptor *vchan_tx_prep(struct virt_dma_chan *vc,
struct virt_dma_desc *vd, unsigned long tx_flags)
{
...
vd->tx.tx_submit = vchan_tx_submit;//提供上层触发dma搬运的回调
vd->tx.desc_free = vchan_tx_desc_free;//上层调用释放
...
return &vd->tx;
}
同样,dmaengine_prep_slave_sg最终调用到硬件驱动接口。
static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_sg(
struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len,
enum dma_transfer_direction dir, unsigned long flags)
{
...
return chan->device->device_prep_slave_sg(chan, sgl, sg_len,
dir, flags, NULL);
}
static struct dma_async_tx_descriptor *stm32_dma_prep_slave_sg(
struct dma_chan *c, struct scatterlist *sgl,
u32 sg_len, enum dma_transfer_direction direction,
unsigned long flags, void *context)
{
struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
struct stm32_dma_desc *desc;
struct scatterlist *sg;
enum dma_slave_buswidth buswidth;
u32 nb_data_items;
int i, ret;
....
desc = kzalloc(struct_size(desc, sg_req, sg_len), GFP_NOWAIT);
if (!desc)
return NULL;
...
//配置发送参数
for_each_sg(sgl, sg, sg_len, i) {
ret = stm32_dma_set_xfer_param(chan, direction, &buswidth,
sg_dma_len(sg),
sg_dma_address(sg));
if (ret < 0)
goto err;
desc->sg_req[i].len = sg_dma_len(sg);
nb_data_items = desc->sg_req[i].len / buswidth;
if (nb_data_items > STM32_DMA_ALIGNED_MAX_DATA_ITEMS) {
dev_err(chan2dev(chan), "nb items not supported\n");
goto err;
}
stm32_dma_clear_reg(&desc->sg_req[i].chan_reg);
desc->sg_req[i].chan_reg.dma_scr = chan->chan_reg.dma_scr;
desc->sg_req[i].chan_reg.dma_sfcr = chan->chan_reg.dma_sfcr;
desc->sg_req[i].chan_reg.dma_spar = chan->chan_reg.dma_spar;
desc->sg_req[i].chan_reg.dma_sm0ar = sg_dma_address(sg);
desc->sg_req[i].chan_reg.dma_sm1ar = sg_dma_address(sg);
desc->sg_req[i].chan_reg.dma_sndtr = nb_data_items;
}
desc->num_sgs = sg_len;
desc->cyclic = false;
//准备发送数据
return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
...}
6.3 dmaengine_submit/tx->tx_submi
这两个接口,最终调用了由dmaengine_prep_dma_memcpy/dmaengine_prep_slave_sg返回的desc中tx_submit,该会调用在这两个接口中赋值(dmaengine_prep_dma_memcpy/dmaengine_prep_slave_sg中介绍过)。
tx->tx_submit(tx);
static inline dma_cookie_t dmaengine_submit(struct dma_async_tx_descriptor *desc)
{
return desc->tx_submit(desc);
}
在stm32硬件驱动中实现如下,最终将发送的节点放到node链表中。
dma_cookie_t vchan_tx_submit(struct dma_async_tx_descriptor *tx)
{
struct virt_dma_chan *vc = to_virt_chan(tx->chan);
struct virt_dma_desc *vd = to_virt_desc(tx);
。。。
cookie = dma_cookie_assign(tx);
list_move_tail(&vd->node, &vc->desc_submitted);
...
return cookie;
}
什么时候这个链表被拿出来,启动dma搬运的?这个过程由用户调用接口dma_async_issue_pending来触发。
6.4 dma_async_issue_pending
static inline void dma_async_issue_pending(struct dma_chan *chan)
{
chan->device->device_issue_pending(chan);
}
该接口同样回调到dma硬件驱动的接口,在stm32硬件驱动文件中:
dd->device_issue_pending = stm32_dma_issue_pending;
static void stm32_dma_issue_pending(struct dma_chan *c)
{
...
//将当前需要启动的channel对象,放到desc_issued链表,并查询是否需要启动dma
if (vchan_issue_pending(&chan->vchan) && !chan->desc && !chan->busy) {
dev_dbg(chan2dev(chan), "vchan %pK: issued\n", &chan->vchan);
stm32_dma_start_transfer(chan);//启动发送
}
...
}
static void stm32_dma_start_transfer(struct stm32_dma_chan *chan)
{
struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
struct virt_dma_desc *vdesc;
struct stm32_dma_sg_req *sg_req;
struct stm32_dma_chan_reg *reg;
u32 status;
int ret;
//发送前配置寄存器先关闭dma
ret = stm32_dma_disable_chan(chan);
if (ret < 0)
return;
if (!chan->desc) {
//在这里根据vchan从node链表获取前面放入的对象
vdesc = vchan_next_desc(&chan->vchan);
if (!vdesc)
return;
list_del(&vdesc->node);
chan->desc = to_stm32_dma_desc(vdesc);//转成实际chan
chan->next_sg = 0;
}
...
//写寄存器
stm32_dma_write(dmadev, STM32_DMA_SCR(chan->id), reg->dma_scr);
stm32_dma_write(dmadev, STM32_DMA_SPAR(chan->id), reg->dma_spar);
stm32_dma_write(dmadev, STM32_DMA_SM0AR(chan->id), reg->dma_sm0ar);
stm32_dma_write(dmadev, STM32_DMA_SFCR(chan->id), reg->dma_sfcr);
stm32_dma_write(dmadev, STM32_DMA_SM1AR(chan->id), reg->dma_sm1ar);
stm32_dma_write(dmadev, STM32_DMA_SNDTR(chan->id), reg->dma_sndtr);
...
//发送
stm32_dma_write(dmadev, STM32_DMA_SCR(chan->id), reg->dma_scr);
...}
6.4 dma_sync_wait
Dma完成同步接口,调用该接口等待dma传输完成,也可以通过回调获取dma传输结果(后续章节分析)。
enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie)
{
enum dma_status status;
unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);
//重新使能发送,确保使能过dma,可见用户可以直接调用该接口,不需要手调 dma_async_issue_pending
dma_async_issue_pending(chan);
do {
status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
dev_err(chan->device->dev, "%s: timeout!\n", __func__);
return DMA_ERROR;
}
if (status != DMA_IN_PROGRESS)
break;
//cpu休眠
cpu_relax();
} while (1);
return status;
}
static inline enum dma_status dma_async_is_tx_complete(struct dma_chan *chan,
dma_cookie_t cookie, dma_cookie_t *last, dma_cookie_t *used)
{
...//调用硬件实现的接口device_tx_status
status = chan->device->device_tx_status(chan, cookie, &state);
...
return status;
}
static enum dma_status stm32_dma_tx_status(struct dma_chan *c,
dma_cookie_t cookie,
struct dma_tx_state *state)
{
... //根据硬件状态返回
if (status == DMA_COMPLETE || !state)
return status;
...
return status;
}
6.5 dma子系统如何通过回调通知用户dma状态
如下一个dma驱动应用大致调用接口流程,中间配置了callback。
static ssize_t dma_example_write(struct file *filp, const char __user *buf, size_t count, loff_t *f_pos)
{
struct dma_async_tx_descriptor *desc;
...
desc = dmaengine_prep_dma_memcpy(dma_chan, dst_dma_addr, src_dma_addr, BUFFER_SIZE, DMA_PREP_INTERRUPT);
...
// 设置回调函数
desc->callback = dma_callback;
desc->callback_param = NULL;
cookie = dmaengine_submit(desc);
...
dma_async_issue_pending(dma_chan);
...}
这里配置的 dma_callback,将在dma传输完成后,dma触发中断开启worktask调用。
在stm32dma硬件驱动(drivers/dma/stm32-dma.c)probe初始化过程中,为每个dma通道各自创建了一个taskwork:
static int stm32_dma_probe(struct platform_device *pdev)
{
...
for (i = 0; i < STM32_DMA_MAX_CHANNELS; i++) {
chan = &dmadev->chan[i];
chan->id = i;
chan->vchan.desc_free = stm32_dma_desc_free;
vchan_init(&chan->vchan, dd);
}
...
}
vchan_init实现在drivers/dma/virt-dma.c:
void vchan_init(struct virt_dma_chan *vc, struct dma_device *dmadev)
{
dma_cookie_init(&vc->chan);
...
tasklet_setup(&vc->task, vchan_complete);//在这里创建工作线程,配置回调
...
list_add_tail(&vc->chan.device_node, &dmadev->channels);//将每个channel放到channels链表
}
vchan_complete主要实现了对用户传入的callback进行回调
static void vchan_complete(struct tasklet_struct *t)
{
struct virt_dma_chan *vc = from_tasklet(vc, t, task);
struct virt_dma_desc *vd, *_vd;
struct dmaengine_desc_callback cb;
LIST_HEAD(head);
...
vd = vc->cyclic;
if (vd) {
vc->cyclic = NULL;
//tx是用户调用submit回调传进来的,这里将tx里回调信息赋值给cb
dmaengine_desc_get_callback(&vd->tx, &cb);
} else {
memset(&cb, 0, sizeof(cb));
}
...
//回调用户回调接口
dmaengine_desc_callback_invoke(&cb, &vd->tx_result);
//下面逻辑跟上面一样,这里从node链表里取出用户配置的所有callback,即通知所有注册callback的上层用户。
list_for_each_entry_safe(vd, _vd, &head, node) {
dmaengine_desc_get_callback(&vd->tx, &cb);
list_del(&vd->node);
dmaengine_desc_callback_invoke(&cb, &vd->tx_result);
vchan_vdesc_fini(vd);
}
}
重新看下callback的下发,前面应用调用流程知道callback配置到desc中:
static ssize_t dma_example_write(struct file *filp, const char __user *buf, size_t count, loff_t *f_pos)
{
struct dma_async_tx_descriptor *desc;
...
// 设置回调函数
desc->callback = dma_callback;
desc->callback_param = NULL;
cookie = dmaengine_submit(desc);
...}
调用到的下一层回调
dma_cookie_t vchan_tx_submit(struct dma_async_tx_descriptor *tx)
{
struct virt_dma_chan *vc = to_virt_chan(tx->chan);
struct virt_dma_desc *vd = to_virt_desc(tx);//vd和tx绑定,后续能根据vd找到tx回调
...
list_move_tail(&vd->node, &vc->desc_submitted);//加入node链表
...
}
上述分析回调被调用流程,需要触发工作线程才能回调 vchan_complete来完成回调,工作线程的触发是在中断完成,在硬件驱动初始化中,会对每个channel的中断进行注册绑定一个服务函数:
static int stm32_dma_probe(struct platform_device *pdev)
{
...
for (i = 0; i < STM32_DMA_MAX_CHANNELS; i++) {
chan = &dmadev->chan[i];
ret = platform_get_irq(pdev, i);
...
chan->irq = ret;
ret = devm_request_irq(&pdev->dev, chan->irq,stm32_dma_chan_irq, 0,dev_name(chan2dev(chan)), chan);
...}
...}
中断服务函数如下:
static irqreturn_t stm32_dma_chan_irq(int irq, void *devid)
{
...
if (status & STM32_DMA_TCI) {//dma channel完成标志
stm32_dma_irq_clear(chan, STM32_DMA_TCI);
if (scr & STM32_DMA_SCR_TCIE)
stm32_dma_handle_chan_done(chan);
status &= ~STM32_DMA_TCI;
}
....
return IRQ_HANDLED;
}
完成函数如下,如果支持循环搬运,持续配置dma寄存器工作,完成情况下,调用vchan_cookie_complete
static void stm32_dma_handle_chan_done(struct stm32_dma_chan *chan)
{
if (chan->desc) {
if (chan->desc->cyclic) {
vchan_cyclic_callback(&chan->desc->vdesc);
chan->next_sg++;
stm32_dma_configure_next_sg(chan);
} else {
chan->busy = false;
if (chan->next_sg == chan->desc->num_sgs) {
vchan_cookie_complete(&chan->desc->vdesc);
chan->desc = NULL;
}
stm32_dma_start_transfer(chan);
}
}
}
vchan_cookie_complete实现在drivers/dma/virt-dma.h:
static inline void vchan_cookie_complete(struct virt_dma_desc *vd)
{
...
tasklet_schedule(&vc->task);//调度工作线程
}
7 几个关键数据结构关系
如下关系图大致描述了几个关键的数据结构关系,dma_device_list维护了多个dma_device,每个dma IP被抽象成一个dma_device,同时被struct_dma_chan引用,子系统中用virt_dma_chan来表达一个物理channel,每个channel对应一个vchan,同时为用户生成了该dma属性结构体struct virt_dma_desc。