[PATCH] nvme: Fix missing inbound DMA offset calculation
Neil Armstrong
neil.armstrong at linaro.org
Wed May 6 09:52:50 CEST 2026
On 4/30/26 16:54, Torsten Duwe wrote:
> From: Torsten Duwe <duwe at suse.de>
>
> U-Boot currently does not account for PCIe bridges with a non-zero
> inbound access offset when talking NVMe, it only works on platforms
> where this offset happens to be zero.
>
> This patch enhances the NVMe driver with the ability to also handle
> these cases. The fix is required to boot e.g. the Raspberry Pi5 from
> NVMe.
>
> Signed-off-by: Torsten Duwe <duwe at suse.de>
> ---
>
> Kudos for the idea of case discrimination go to Andrea; this eases
> things a lot.
>
> I do consider this a bug fix. The problem has popped up during RPi5
> development but really is a generic major defect IMHO. Please apply
> wherever appropriate.
>
> Torsten
>
> ---
> drivers/nvme/nvme.c | 39 ++++++++++++++++++++++++++-------------
> 1 file changed, 26 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/nvme/nvme.c b/drivers/nvme/nvme.c
> index 2b14437f69c..8056f52fb50 100644
> --- a/drivers/nvme/nvme.c
> +++ b/drivers/nvme/nvme.c
> @@ -12,6 +12,7 @@
> #include <log.h>
> #include <malloc.h>
> #include <memalign.h>
> +#include <phys2bus.h>
> #include <time.h>
> #include <dm/device-internal.h>
> #include <linux/compat.h>
> @@ -27,6 +28,18 @@
> #define IO_TIMEOUT 30
> #define MAX_PRP_POOL 512
>
> +/*
> + * This macro detects the correct inbound DMA offset in both cases:
> + * 1) the NVMe is mentioned in the device tree, along with its parent,
> + * and no intermediate PCIe bus node in between.
> + * 2) the NVMe was found dynamically by PCI enumeration which has created
> + * an intermediate PCIe bus node. That node will be skipped and the offset
> + * looked up in the grandparent.
> + */
> +#define DEV_ADDR(a) (dev_has_ofnode(dev->udev) ? \
> + dev_phys_to_bus(dev->udev, (a)) : \
> + dev_phys_to_bus(dev->udev->parent, (a)))
This looks subspicious, what is the state of the rpi5 here ? with a DT node or without ?
AFAIK NVMes are always dynamically detected, it seems the dma_offset is not correctly
copied to the nvme udev when created.
Could you look into this direction instead ?
> +
> static int nvme_wait_csts(struct nvme_dev *dev, u32 mask, u32 val)
> {
> int timeout;
> @@ -91,12 +104,12 @@ static int nvme_setup_prps(struct nvme_dev *dev, u64 *prp2,
> i = 0;
> while (nprps) {
> if ((i == (prps_per_page - 1)) && nprps > 1) {
> - *(prp_pool + i) = cpu_to_le64((ulong)prp_pool +
> - page_size);
> + *(prp_pool + i) = cpu_to_le64(DEV_ADDR((ulong)prp_pool +
> + page_size));
> i = 0;
> prp_pool += page_size;
> }
> - *(prp_pool + i++) = cpu_to_le64(dma_addr);
> + *(prp_pool + i++) = cpu_to_le64(DEV_ADDR(dma_addr));
> dma_addr += page_size;
> nprps--;
> }
> @@ -393,8 +406,8 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
> dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
>
> writel(aqa, &dev->bar->aqa);
> - nvme_writeq((ulong)nvmeq->sq_cmds, &dev->bar->asq);
> - nvme_writeq((ulong)nvmeq->cqes, &dev->bar->acq);
> + nvme_writeq(DEV_ADDR((ulong)nvmeq->sq_cmds), &dev->bar->asq);
> + nvme_writeq(DEV_ADDR((ulong)nvmeq->cqes), &dev->bar->acq);
>
> result = nvme_enable_ctrl(dev);
> if (result)
> @@ -420,7 +433,7 @@ static int nvme_alloc_cq(struct nvme_dev *dev, u16 qid,
>
> memset(&c, 0, sizeof(c));
> c.create_cq.opcode = nvme_admin_create_cq;
> - c.create_cq.prp1 = cpu_to_le64((ulong)nvmeq->cqes);
> + c.create_cq.prp1 = cpu_to_le64(DEV_ADDR((ulong)nvmeq->cqes));
> c.create_cq.cqid = cpu_to_le16(qid);
> c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
> c.create_cq.cq_flags = cpu_to_le16(flags);
> @@ -437,7 +450,7 @@ static int nvme_alloc_sq(struct nvme_dev *dev, u16 qid,
>
> memset(&c, 0, sizeof(c));
> c.create_sq.opcode = nvme_admin_create_sq;
> - c.create_sq.prp1 = cpu_to_le64((ulong)nvmeq->sq_cmds);
> + c.create_sq.prp1 = cpu_to_le64(DEV_ADDR((ulong)nvmeq->sq_cmds));
> c.create_sq.sqid = cpu_to_le16(qid);
> c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
> c.create_sq.sq_flags = cpu_to_le16(flags);
> @@ -458,14 +471,14 @@ int nvme_identify(struct nvme_dev *dev, unsigned nsid,
> memset(&c, 0, sizeof(c));
> c.identify.opcode = nvme_admin_identify;
> c.identify.nsid = cpu_to_le32(nsid);
> - c.identify.prp1 = cpu_to_le64(dma_addr);
> + c.identify.prp1 = cpu_to_le64(DEV_ADDR(dma_addr));
>
> length -= (page_size - offset);
> if (length <= 0) {
> c.identify.prp2 = 0;
> } else {
> dma_addr += (page_size - offset);
> - c.identify.prp2 = cpu_to_le64(dma_addr);
> + c.identify.prp2 = cpu_to_le64(DEV_ADDR(dma_addr));
> }
>
> c.identify.cns = cpu_to_le32(cns);
> @@ -490,7 +503,7 @@ int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
> memset(&c, 0, sizeof(c));
> c.features.opcode = nvme_admin_get_features;
> c.features.nsid = cpu_to_le32(nsid);
> - c.features.prp1 = cpu_to_le64(dma_addr);
> + c.features.prp1 = cpu_to_le64(DEV_ADDR(dma_addr));
> c.features.fid = cpu_to_le32(fid);
>
> ret = nvme_submit_admin_cmd(dev, &c, result);
> @@ -516,7 +529,7 @@ int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
>
> memset(&c, 0, sizeof(c));
> c.features.opcode = nvme_admin_set_features;
> - c.features.prp1 = cpu_to_le64(dma_addr);
> + c.features.prp1 = cpu_to_le64(DEV_ADDR(dma_addr));
> c.features.fid = cpu_to_le32(fid);
> c.features.dword11 = cpu_to_le32(dword11);
>
> @@ -785,8 +798,8 @@ static ulong nvme_blk_rw(struct udevice *udev, lbaint_t blknr,
> c.rw.slba = cpu_to_le64(slba);
> slba += lbas;
> c.rw.length = cpu_to_le16(lbas - 1);
> - c.rw.prp1 = cpu_to_le64(temp_buffer);
> - c.rw.prp2 = cpu_to_le64(prp2);
> + c.rw.prp1 = cpu_to_le64(DEV_ADDR(temp_buffer));
> + c.rw.prp2 = cpu_to_le64(DEV_ADDR(prp2));
> status = nvme_submit_sync_cmd(dev->queues[NVME_IO_Q],
> &c, NULL, IO_TIMEOUT);
> if (status)
More information about the U-Boot
mailing list