Skip to content

Commit 08cd329

Browse files
committed
powerpc/pseries/iommu: memory notifier incorrectly adds TCEs for pmemory
jira LE-4159 Rebuild_History Non-Buildable kernel-5.14.0-570.42.2.el9_6 commit-author Gaurav Batra <gbatra@linux.ibm.com> commit 6aa989a iommu_mem_notifier() is invoked when RAM is dynamically added/removed. This notifier call is responsible to add/remove TCEs from the Dynamic DMA Window (DDW) when TCEs are pre-mapped. TCEs are pre-mapped only for RAM and not for persistent memory (pmemory). For DMA buffers in pmemory, TCEs are dynamically mapped when the device driver instructs to do so. The issue is 'daxctl' command is capable of adding pmemory as "System RAM" after LPAR boot. The command to do so is - daxctl reconfigure-device --mode=system-ram dax0.0 --force This will dynamically add pmemory range to LPAR RAM eventually invoking iommu_mem_notifier(). The address range of pmemory is way beyond the Max RAM that the LPAR can have. Which means, this range is beyond the DDW created for the device, at device initialization time. As a result when TCEs are pre-mapped for the pmemory range, by iommu_mem_notifier(), PHYP HCALL returns H_PARAMETER. This failed the command, daxctl, to add pmemory as RAM. The solution is to not pre-map TCEs for pmemory. Signed-off-by: Gaurav Batra <gbatra@linux.ibm.com> Tested-by: Donet Tom <donettom@linux.ibm.com> Reviewed-by: Donet Tom <donettom@linux.ibm.com> Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com> Link: https://patch.msgid.link/20250130183854.92258-1-gbatra@linux.ibm.com (cherry picked from commit 6aa989a) Signed-off-by: Jonathan Maple <jmaple@ciq.com>
1 parent 304da2d commit 08cd329

File tree

3 files changed

+18
-14
lines changed

3 files changed

+18
-14
lines changed

arch/powerpc/include/asm/mmzone.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ extern cpumask_var_t node_to_cpumask_map[];
3535
#ifdef CONFIG_MEMORY_HOTPLUG
3636
extern unsigned long max_pfn;
3737
u64 memory_hotplug_max(void);
38+
u64 hot_add_drconf_memory_max(void);
3839
#else
3940
#define memory_hotplug_max() memblock_end_of_DRAM()
4041
#endif

arch/powerpc/mm/numa.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1354,7 +1354,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
13541354
return nid;
13551355
}
13561356

1357-
static u64 hot_add_drconf_memory_max(void)
1357+
u64 hot_add_drconf_memory_max(void)
13581358
{
13591359
struct device_node *memory = NULL;
13601360
struct device_node *dn = NULL;

arch/powerpc/platforms/pseries/iommu.c

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1287,17 +1287,13 @@ static LIST_HEAD(failed_ddw_pdn_list);
12871287

12881288
static phys_addr_t ddw_memory_hotplug_max(void)
12891289
{
1290-
resource_size_t max_addr = memory_hotplug_max();
1291-
struct device_node *memory;
1290+
resource_size_t max_addr;
12921291

1293-
for_each_node_by_type(memory, "memory") {
1294-
struct resource res;
1295-
1296-
if (of_address_to_resource(memory, 0, &res))
1297-
continue;
1298-
1299-
max_addr = max_t(resource_size_t, max_addr, res.end + 1);
1300-
}
1292+
#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
1293+
max_addr = hot_add_drconf_memory_max();
1294+
#else
1295+
max_addr = memblock_end_of_DRAM();
1296+
#endif
13011297

13021298
return max_addr;
13031299
}
@@ -1603,7 +1599,7 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
16031599

16041600
if (direct_mapping) {
16051601
/* DDW maps the whole partition, so enable direct DMA mapping */
1606-
ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT,
1602+
ret = walk_system_ram_range(0, ddw_memory_hotplug_max() >> PAGE_SHIFT,
16071603
win64->value, tce_setrange_multi_pSeriesLP_walk);
16081604
if (ret) {
16091605
dev_info(&dev->dev, "failed to map DMA window for %pOF: %d\n",
@@ -2349,11 +2345,17 @@ static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action,
23492345
struct memory_notify *arg = data;
23502346
int ret = 0;
23512347

2348+
/* This notifier can get called when onlining persistent memory as well.
2349+
* TCEs are not pre-mapped for persistent memory. Persistent memory will
2350+
* always be above ddw_memory_hotplug_max()
2351+
*/
2352+
23522353
switch (action) {
23532354
case MEM_GOING_ONLINE:
23542355
spin_lock(&dma_win_list_lock);
23552356
list_for_each_entry(window, &dma_win_list, list) {
2356-
if (window->direct) {
2357+
if (window->direct && (arg->start_pfn << PAGE_SHIFT) <
2358+
ddw_memory_hotplug_max()) {
23572359
ret |= tce_setrange_multi_pSeriesLP(arg->start_pfn,
23582360
arg->nr_pages, window->prop);
23592361
}
@@ -2365,7 +2367,8 @@ static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action,
23652367
case MEM_OFFLINE:
23662368
spin_lock(&dma_win_list_lock);
23672369
list_for_each_entry(window, &dma_win_list, list) {
2368-
if (window->direct) {
2370+
if (window->direct && (arg->start_pfn << PAGE_SHIFT) <
2371+
ddw_memory_hotplug_max()) {
23692372
ret |= tce_clearrange_multi_pSeriesLP(arg->start_pfn,
23702373
arg->nr_pages, window->prop);
23712374
}

0 commit comments

Comments
 (0)