[PATCH] nvme: Fix missing inbound DMA offset calculation

Neil Armstrong neil.armstrong at linaro.org
Wed May 6 10:59:55 CEST 2026


On 5/6/26 09:52, Neil Armstrong wrote:
> On 4/30/26 16:54, Torsten Duwe wrote:
>> From: Torsten Duwe <duwe at suse.de>
>>
>> U-Boot currently does not account for PCIe bridges with a non-zero
>> inbound access offset when talking NVMe, it only works on platforms
>> where this offset happens to be zero.
>>
>> This patch enhances the NVMe driver with the ability to also handle
>> these cases. The fix is required to boot e.g. the Raspberry Pi5 from
>> NVMe.
>>
>> Signed-off-by: Torsten Duwe <duwe at suse.de>
>> ---
>>
>> Kudos for the idea of case discrimination go to Andrea; this eases
>> things a lot.
>>
>> I do consider this a bug fix. The problem has popped up during RPi5
>> development but really is a generic major defect IMHO. Please apply
>> wherever appropriate.
>>
>>     Torsten
>>
>> ---
>>   drivers/nvme/nvme.c | 39 ++++++++++++++++++++++++++-------------
>>   1 file changed, 26 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/nvme/nvme.c b/drivers/nvme/nvme.c
>> index 2b14437f69c..8056f52fb50 100644
>> --- a/drivers/nvme/nvme.c
>> +++ b/drivers/nvme/nvme.c
>> @@ -12,6 +12,7 @@
>>   #include <log.h>
>>   #include <malloc.h>
>>   #include <memalign.h>
>> +#include <phys2bus.h>
>>   #include <time.h>
>>   #include <dm/device-internal.h>
>>   #include <linux/compat.h>
>> @@ -27,6 +28,18 @@
>>   #define IO_TIMEOUT        30
>>   #define MAX_PRP_POOL        512
>> +/*
>> + * This macro detects the correct inbound DMA offset in both cases:
>> + *   1) the NVMe is mentioned in the device tree, along with its parent,
>> + *    and no intermediate PCIe bus node in between.
>> + *   2) the NVMe was found dynamically by PCI enumeration which has created
>> + *    an intermediate PCIe bus node. That node will be skipped and the offset
>> + *    looked up in the grandparent.
>> + */
>> +#define DEV_ADDR(a)        (dev_has_ofnode(dev->udev) ? \
>> +                    dev_phys_to_bus(dev->udev, (a)) : \
>> +                    dev_phys_to_bus(dev->udev->parent, (a)))
> 
> This looks subspicious, what is the state of the rpi5 here ? with a DT node or without ?
> 
> AFAIK NVMes are always dynamically detected, it seems the dma_offset is not correctly
> copied to the nvme udev when created.
> 
> Could you look into this direction instead ?

Ok so I think the problem is from this test:

static int device_get_dma_constraints(struct udevice *dev)
{
...
	if (!CONFIG_IS_ENABLED(DM_DMA) || !parent || !dev_has_ofnode(parent))
                                                             /\
		return 0;

The NVMe is a child of the root bus, but it doesn't seem tre RPI5 has a node
for the root bus for PCI1 so the dma constraints are applied to the root port
but not to the following devices.

So to check could you try adding something like:

```
diff --git a/dts/upstream/src/arm64/broadcom/bcm2712-rpi-5-b-base.dtsi b/dts/upstream/src/arm64/broadcom/bcm2712-rpi-5-b-base.dtsi
index 04738bf281e..640c5987077 100644
--- a/dts/upstream/src/arm64/broadcom/bcm2712-rpi-5-b-base.dtsi
+++ b/dts/upstream/src/arm64/broadcom/bcm2712-rpi-5-b-base.dtsi
@@ -247,6 +247,15 @@

  &pcie1 {
         status = "okay";
+
+       pci at 0,0 {
+               reg = <0x0 0x0 0x0 0x0 0x0>;
+               ranges;
+               bus-range = <0 1>;
+               device_type = "pci";
+               #address-cells = <3>;
+               #size-cells = <2>;
+       };
  };

  &pcie2 {
```

or:

```
--- a/drivers/core/device.c
+++ b/drivers/core/device.c
@@ -459,7 +459,19 @@ static int device_get_dma_constraints(struct udevice *dev)
         u64 size = 0;
         int ret;

-       if (!CONFIG_IS_ENABLED(DM_DMA) || !parent || !dev_has_ofnode(parent))
+       if (!CONFIG_IS_ENABLED(DM_DMA) || !parent)
+               return 0;
+
+       /* Look for the first node in the parent chain */
+       while (parent) {
+               if (dev_has_ofnode(parent))
+                       break;
+
+               parent = dev_get_parent(parent);
+       }
+
+       /* No parents have a node, bail out */
+       if (!parent)
                 return 0;

         /*
```
Which will try to find the node higher in the parent chain.

Neil

> 
>> +
>>   static int nvme_wait_csts(struct nvme_dev *dev, u32 mask, u32 val)
>>   {
>>       int timeout;
>> @@ -91,12 +104,12 @@ static int nvme_setup_prps(struct nvme_dev *dev, u64 *prp2,
>>       i = 0;
>>       while (nprps) {
>>           if ((i == (prps_per_page - 1)) && nprps > 1) {
>> -            *(prp_pool + i) = cpu_to_le64((ulong)prp_pool +
>> -                    page_size);
>> +            *(prp_pool + i) = cpu_to_le64(DEV_ADDR((ulong)prp_pool +
>> +                                page_size));
>>               i = 0;
>>               prp_pool += page_size;
>>           }
>> -        *(prp_pool + i++) = cpu_to_le64(dma_addr);
>> +        *(prp_pool + i++) = cpu_to_le64(DEV_ADDR(dma_addr));
>>           dma_addr += page_size;
>>           nprps--;
>>       }
>> @@ -393,8 +406,8 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
>>       dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
>>       writel(aqa, &dev->bar->aqa);
>> -    nvme_writeq((ulong)nvmeq->sq_cmds, &dev->bar->asq);
>> -    nvme_writeq((ulong)nvmeq->cqes, &dev->bar->acq);
>> +    nvme_writeq(DEV_ADDR((ulong)nvmeq->sq_cmds), &dev->bar->asq);
>> +    nvme_writeq(DEV_ADDR((ulong)nvmeq->cqes), &dev->bar->acq);
>>       result = nvme_enable_ctrl(dev);
>>       if (result)
>> @@ -420,7 +433,7 @@ static int nvme_alloc_cq(struct nvme_dev *dev, u16 qid,
>>       memset(&c, 0, sizeof(c));
>>       c.create_cq.opcode = nvme_admin_create_cq;
>> -    c.create_cq.prp1 = cpu_to_le64((ulong)nvmeq->cqes);
>> +    c.create_cq.prp1 = cpu_to_le64(DEV_ADDR((ulong)nvmeq->cqes));
>>       c.create_cq.cqid = cpu_to_le16(qid);
>>       c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
>>       c.create_cq.cq_flags = cpu_to_le16(flags);
>> @@ -437,7 +450,7 @@ static int nvme_alloc_sq(struct nvme_dev *dev, u16 qid,
>>       memset(&c, 0, sizeof(c));
>>       c.create_sq.opcode = nvme_admin_create_sq;
>> -    c.create_sq.prp1 = cpu_to_le64((ulong)nvmeq->sq_cmds);
>> +    c.create_sq.prp1 = cpu_to_le64(DEV_ADDR((ulong)nvmeq->sq_cmds));
>>       c.create_sq.sqid = cpu_to_le16(qid);
>>       c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
>>       c.create_sq.sq_flags = cpu_to_le16(flags);
>> @@ -458,14 +471,14 @@ int nvme_identify(struct nvme_dev *dev, unsigned nsid,
>>       memset(&c, 0, sizeof(c));
>>       c.identify.opcode = nvme_admin_identify;
>>       c.identify.nsid = cpu_to_le32(nsid);
>> -    c.identify.prp1 = cpu_to_le64(dma_addr);
>> +    c.identify.prp1 = cpu_to_le64(DEV_ADDR(dma_addr));
>>       length -= (page_size - offset);
>>       if (length <= 0) {
>>           c.identify.prp2 = 0;
>>       } else {
>>           dma_addr += (page_size - offset);
>> -        c.identify.prp2 = cpu_to_le64(dma_addr);
>> +        c.identify.prp2 = cpu_to_le64(DEV_ADDR(dma_addr));
>>       }
>>       c.identify.cns = cpu_to_le32(cns);
>> @@ -490,7 +503,7 @@ int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
>>       memset(&c, 0, sizeof(c));
>>       c.features.opcode = nvme_admin_get_features;
>>       c.features.nsid = cpu_to_le32(nsid);
>> -    c.features.prp1 = cpu_to_le64(dma_addr);
>> +    c.features.prp1 = cpu_to_le64(DEV_ADDR(dma_addr));
>>       c.features.fid = cpu_to_le32(fid);
>>       ret = nvme_submit_admin_cmd(dev, &c, result);
>> @@ -516,7 +529,7 @@ int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
>>       memset(&c, 0, sizeof(c));
>>       c.features.opcode = nvme_admin_set_features;
>> -    c.features.prp1 = cpu_to_le64(dma_addr);
>> +    c.features.prp1 = cpu_to_le64(DEV_ADDR(dma_addr));
>>       c.features.fid = cpu_to_le32(fid);
>>       c.features.dword11 = cpu_to_le32(dword11);
>> @@ -785,8 +798,8 @@ static ulong nvme_blk_rw(struct udevice *udev, lbaint_t blknr,
>>           c.rw.slba = cpu_to_le64(slba);
>>           slba += lbas;
>>           c.rw.length = cpu_to_le16(lbas - 1);
>> -        c.rw.prp1 = cpu_to_le64(temp_buffer);
>> -        c.rw.prp2 = cpu_to_le64(prp2);
>> +        c.rw.prp1 = cpu_to_le64(DEV_ADDR(temp_buffer));
>> +        c.rw.prp2 = cpu_to_le64(DEV_ADDR(prp2));
>>           status = nvme_submit_sync_cmd(dev->queues[NVME_IO_Q],
>>                   &c, NULL, IO_TIMEOUT);
>>           if (status)
> 



More information about the U-Boot mailing list