Web lists-archives.com

[PATCH RFC 7/8] mm: allow to control onlining/offlining of memory by a driver




Some devices (esp. paravirtualized) might want to control
- when to online/offline a memory block
- how to online memory (MOVABLE/NORMAL)
- in which granularity to online/offline memory

So let's add a new flag "driver_managed" and disallow to change the
state by user space. Device onlining/offlining will still work, however
the memory will not be actually onlined/offlined. That has to be handled
by the device driver that owns the memory.

Signed-off-by: David Hildenbrand <david@xxxxxxxxxx>
---
 drivers/base/memory.c          | 22 ++++++++++++++--------
 drivers/xen/balloon.c          |  2 +-
 include/linux/memory.h         |  1 +
 include/linux/memory_hotplug.h |  4 +++-
 mm/memory_hotplug.c            | 34 ++++++++++++++++++++++++++++++++--
 5 files changed, 51 insertions(+), 12 deletions(-)

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index bffe8616bd55..3b8616551561 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -231,27 +231,28 @@ static bool pages_correctly_probed(unsigned long start_pfn)
  * Must already be protected by mem_hotplug_begin().
  */
 static int
-memory_block_action(unsigned long phys_index, unsigned long action, int online_type)
+memory_block_action(struct memory_block *mem, unsigned long action)
 {
-	unsigned long start_pfn;
+	unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
 	unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
-	int ret;
+	int ret = 0;
 
-	start_pfn = section_nr_to_pfn(phys_index);
+	if (mem->driver_managed)
+		return 0;
 
 	switch (action) {
 	case MEM_ONLINE:
 		if (!pages_correctly_probed(start_pfn))
 			return -EBUSY;
 
-		ret = online_pages(start_pfn, nr_pages, online_type);
+		ret = online_pages(start_pfn, nr_pages, mem->online_type);
 		break;
 	case MEM_OFFLINE:
 		ret = offline_pages(start_pfn, nr_pages);
 		break;
 	default:
 		WARN(1, KERN_WARNING "%s(%ld, %ld) unknown action: "
-		     "%ld\n", __func__, phys_index, action, action);
+		     "%ld\n", __func__, mem->start_section_nr, action, action);
 		ret = -EINVAL;
 	}
 
@@ -269,8 +270,7 @@ static int memory_block_change_state(struct memory_block *mem,
 	if (to_state == MEM_OFFLINE)
 		mem->state = MEM_GOING_OFFLINE;
 
-	ret = memory_block_action(mem->start_section_nr, to_state,
-				mem->online_type);
+	ret = memory_block_action(mem, to_state);
 
 	mem->state = ret ? from_state_req : to_state;
 
@@ -350,6 +350,11 @@ store_mem_state(struct device *dev,
 	 */
 	mem_hotplug_begin();
 
+	if (mem->driver_managed) {
+		ret = -EINVAL;
+		goto out;
+	}
+
 	switch (online_type) {
 	case MMOP_ONLINE_KERNEL:
 	case MMOP_ONLINE_MOVABLE:
@@ -364,6 +369,7 @@ store_mem_state(struct device *dev,
 		ret = -EINVAL; /* should never happen */
 	}
 
+out:
 	mem_hotplug_done();
 err:
 	unlock_device_hotplug();
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 065f0b607373..89981d573c06 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -401,7 +401,7 @@ static enum bp_state reserve_additional_memory(void)
 	 * callers drop the mutex before trying again.
 	 */
 	mutex_unlock(&balloon_mutex);
-	rc = add_memory_resource(nid, resource, memhp_auto_online);
+	rc = add_memory_resource(nid, resource, memhp_auto_online, false);
 	mutex_lock(&balloon_mutex);
 
 	if (rc) {
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 9f8cd856ca1e..018c5e5ecde1 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -29,6 +29,7 @@ struct memory_block {
 	unsigned long state;		/* serialized by the dev->lock */
 	int section_count;		/* serialized by mem_sysfs_mutex */
 	int online_type;		/* for passing data to online routine */
+	bool driver_managed;		/* driver handles online/offline */
 	int phys_device;		/* to which fru does this belong? */
 	void *hw;			/* optional pointer to fw/hw data */
 	int (*phys_callback)(struct memory_block *);
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index e0e49b5b1ee1..46c6ceb1110d 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -320,7 +320,9 @@ static inline void remove_memory(int nid, u64 start, u64 size) {}
 extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
 		void *arg, int (*func)(struct memory_block *, void *));
 extern int add_memory(int nid, u64 start, u64 size);
-extern int add_memory_resource(int nid, struct resource *resource, bool online);
+extern int add_memory_driver_managed(int nid, u64 start, u64 size);
+extern int add_memory_resource(int nid, struct resource *resource, bool online,
+			       bool driver_managed);
 extern int arch_add_memory(int nid, u64 start, u64 size,
 		struct vmem_altmap *altmap, bool want_memblock);
 extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 1d6054edc241..ac14ea772792 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1108,8 +1108,15 @@ static int online_memory_block(struct memory_block *mem, void *arg)
 	return device_online(&mem->dev);
 }
 
+static int mark_memory_block_driver_managed(struct memory_block *mem, void *arg)
+{
+	mem->driver_managed = true;
+	return 0;
+}
+
 /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
-int __ref add_memory_resource(int nid, struct resource *res, bool online)
+int __ref add_memory_resource(int nid, struct resource *res, bool online,
+			      bool driver_managed)
 {
 	u64 start, size;
 	pg_data_t *pgdat = NULL;
@@ -1117,6 +1124,9 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online)
 	bool new_node;
 	int ret;
 
+	if (online && driver_managed)
+		return -EINVAL;
+
 	start = res->start;
 	size = resource_size(res);
 
@@ -1188,6 +1198,9 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online)
 	if (online)
 		walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1),
 				  NULL, online_memory_block);
+	else if (driver_managed)
+		walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1),
+				  NULL, mark_memory_block_driver_managed);
 
 	goto out;
 
@@ -1212,13 +1225,30 @@ int __ref add_memory(int nid, u64 start, u64 size)
 	if (IS_ERR(res))
 		return PTR_ERR(res);
 
-	ret = add_memory_resource(nid, res, memhp_auto_online);
+	ret = add_memory_resource(nid, res, memhp_auto_online, false);
 	if (ret < 0)
 		release_memory_resource(res);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(add_memory);
 
+int __ref add_memory_driver_managed(int nid, u64 start, u64 size)
+{
+	struct resource *res;
+	int ret;
+
+	res = register_memory_resource(start, size);
+	if (IS_ERR(res))
+		return PTR_ERR(res);
+
+	ret = add_memory_resource(nid, res, false, true);
+	if (ret < 0)
+		release_memory_resource(res);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(add_memory_driver_managed);
+
+
 #ifdef CONFIG_MEMORY_HOTREMOVE
 /*
  * A free page on the buddy free lists (not the per-cpu lists) has PageBuddy
-- 
2.14.3