Web lists-archives.com

[PATCH bpf-next v5 5/5] error-injection: Support fault injection framework




Support in-kernel fault-injection framework via debugfs.
This allows you to inject a conditional error to specified
function using debugfs interfaces.

Here is the result of test script described in
Documentation/fault-injection/fault-injection.txt

  ===========
  # ./test_fail_function.sh
  1+0 records in
  1+0 records out
  1048576 bytes (1.0 MB, 1.0 MiB) copied, 0.0227404 s, 46.1 MB/s
  btrfs-progs v4.4
  See http://btrfs.wiki.kernel.org for more information.

  Label:              (null)
  UUID:               bfa96010-12e9-4360-aed0-42eec7af5798
  Node size:          16384
  Sector size:        4096
  Filesystem size:    1001.00MiB
  Block group profiles:
    Data:             single            8.00MiB
    Metadata:         DUP              58.00MiB
    System:           DUP              12.00MiB
  SSD detected:       no
  Incompat features:  extref, skinny-metadata
  Number of devices:  1
  Devices:
     ID        SIZE  PATH
      1  1001.00MiB  /dev/loop2

  mount: mount /dev/loop2 on /opt/tmpmnt failed: Cannot allocate memory
  SUCCESS!
  ===========


Signed-off-by: Masami Hiramatsu <mhiramat@xxxxxxxxxx>
Reviewed-by: Josef Bacik <jbacik@xxxxxx>
---
  Changes in v3:
   - Check and adjust error value for each target function
   - Clear kporbe flag for reuse
   - Add more documents and example
  Changes in v5:
   - Support multi-function error injection
---
 Documentation/fault-injection/fault-injection.txt |   68 ++++
 kernel/Makefile                                   |    1 
 kernel/fail_function.c                            |  349 +++++++++++++++++++++
 lib/Kconfig.debug                                 |   10 +
 4 files changed, 428 insertions(+)
 create mode 100644 kernel/fail_function.c

diff --git a/Documentation/fault-injection/fault-injection.txt b/Documentation/fault-injection/fault-injection.txt
index 918972babcd8..f4a32463ca48 100644
--- a/Documentation/fault-injection/fault-injection.txt
+++ b/Documentation/fault-injection/fault-injection.txt
@@ -30,6 +30,12 @@ o fail_mmc_request
   injects MMC data errors on devices permitted by setting
   debugfs entries under /sys/kernel/debug/mmc0/fail_mmc_request
 
+o fail_function
+
+  injects error return on specific functions, which are marked by
+  ALLOW_ERROR_INJECTION() macro, by setting debugfs entries
+  under /sys/kernel/debug/fail_function. No boot option supported.
+
 Configure fault-injection capabilities behavior
 -----------------------------------------------
 
@@ -123,6 +129,29 @@ configuration of fault-injection capabilities.
 	default is 'N', setting it to 'Y' will disable failure injections
 	when dealing with private (address space) futexes.
 
+- /sys/kernel/debug/fail_function/inject:
+
+	Format: { 'function-name' | '!function-name' | '' }
+	specifies the target function of error injection by name.
+	If the function name leads '!' prefix, given function is
+	removed from injection list. If nothing specified ('')
+	injection list is cleared.
+
+- /sys/kernel/debug/fail_function/injectable:
+
+	(read only) shows error injectable functions and what type of
+	error values can be specified. The error type will be one of
+	below;
+	- NULL:	retval must be 0.
+	- ERRNO: retval must be -1 to -MAX_ERRNO (-4096).
+	- ERR_NULL: retval must be 0 or -1 to -MAX_ERRNO (-4096).
+
+- /sys/kernel/debug/fail_function/<functiuon-name>/retval:
+
+	specifies the "error" return value to inject to the given
+	function for given function. This will be created when
+	user specifies new injection entry.
+
 o Boot option
 
 In order to inject faults while debugfs is not available (early boot time),
@@ -268,6 +297,45 @@ trap "echo 0 > /sys/kernel/debug/$FAILTYPE/probability" SIGINT SIGTERM EXIT
 echo "Injecting errors into the module $module... (interrupt to stop)"
 sleep 1000000
 
+------------------------------------------------------------------------------
+
+o Inject open_ctree error while btrfs mount
+
+#!/bin/bash
+
+rm -f testfile.img
+dd if=/dev/zero of=testfile.img bs=1M seek=1000 count=1
+DEVICE=$(losetup --show -f testfile.img)
+mkfs.btrfs -f $DEVICE
+mkdir -p tmpmnt
+
+FAILTYPE=fail_function
+FAILFUNC=open_ctree
+echo $FAILFUNC > /sys/kernel/debug/$FAILTYPE/inject
+echo -12 > /sys/kernel/debug/$FAILTYPE/$FAILFUNC/retval
+echo N > /sys/kernel/debug/$FAILTYPE/task-filter
+echo 100 > /sys/kernel/debug/$FAILTYPE/probability
+echo 0 > /sys/kernel/debug/$FAILTYPE/interval
+echo -1 > /sys/kernel/debug/$FAILTYPE/times
+echo 0 > /sys/kernel/debug/$FAILTYPE/space
+echo 1 > /sys/kernel/debug/$FAILTYPE/verbose
+
+mount -t btrfs $DEVICE tmpmnt
+if [ $? -ne 0 ]
+then
+	echo "SUCCESS!"
+else
+	echo "FAILED!"
+	umount tmpmnt
+fi
+
+echo > /sys/kernel/debug/$FAILTYPE/inject
+
+rmdir tmpmnt
+losetup -d $DEVICE
+rm testfile.img
+
+
 Tool to run command with failslab or fail_page_alloc
 ----------------------------------------------------
 In order to make it easier to accomplish the tasks mentioned above, we can use
diff --git a/kernel/Makefile b/kernel/Makefile
index 172d151d429c..f85ae5dfa474 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -81,6 +81,7 @@ obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
 obj-$(CONFIG_GCOV_KERNEL) += gcov/
 obj-$(CONFIG_KCOV) += kcov.o
 obj-$(CONFIG_KPROBES) += kprobes.o
+obj-$(CONFIG_FAIL_FUNCTION) += fail_function.o
 obj-$(CONFIG_KGDB) += debug/
 obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
 obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
diff --git a/kernel/fail_function.c b/kernel/fail_function.c
new file mode 100644
index 000000000000..21b0122cb39c
--- /dev/null
+++ b/kernel/fail_function.c
@@ -0,0 +1,349 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fail_function.c: Function-based error injection
+ */
+#include <linux/error-injection.h>
+#include <linux/debugfs.h>
+#include <linux/fault-inject.h>
+#include <linux/kallsyms.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+static int fei_kprobe_handler(struct kprobe *kp, struct pt_regs *regs);
+
+struct fei_attr {
+	struct list_head list;
+	struct kprobe kp;
+	unsigned long retval;
+};
+static DEFINE_MUTEX(fei_lock);
+static LIST_HEAD(fei_attr_list);
+static DECLARE_FAULT_ATTR(fei_fault_attr);
+static struct dentry *fei_debugfs_dir;
+
+static unsigned long adjust_error_retval(unsigned long addr, unsigned long retv)
+{
+	switch (get_injectable_error_type(addr)) {
+	case EI_ETYPE_NULL:
+		if (retv != 0)
+			return 0;
+		break;
+	case EI_ETYPE_ERRNO:
+		if (retv < (unsigned long)-MAX_ERRNO)
+			return (unsigned long)-EINVAL;
+		break;
+	case EI_ETYPE_ERRNO_NULL:
+		if (retv != 0 && retv < (unsigned long)-MAX_ERRNO)
+			return (unsigned long)-EINVAL;
+		break;
+	}
+
+	return retv;
+}
+
+static struct fei_attr *fei_attr_new(const char *sym, unsigned long addr)
+{
+	struct fei_attr *attr;
+
+	attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+	if (attr) {
+		attr->kp.symbol_name = kstrdup(sym, GFP_KERNEL);
+		if (!attr->kp.symbol_name) {
+			kfree(attr);
+			return NULL;
+		}
+		attr->kp.pre_handler = fei_kprobe_handler;
+		attr->retval = adjust_error_retval(addr, 0);
+		INIT_LIST_HEAD(&attr->list);
+	}
+	return attr;
+}
+
+static void fei_attr_free(struct fei_attr *attr)
+{
+	if (attr) {
+		kfree(attr->kp.symbol_name);
+		kfree(attr);
+	}
+}
+
+static struct fei_attr *fei_attr_lookup(const char *sym)
+{
+	struct fei_attr *attr;
+
+	list_for_each_entry(attr, &fei_attr_list, list) {
+		if (!strcmp(attr->kp.symbol_name, sym))
+			return attr;
+	}
+
+	return NULL;
+}
+
+static bool fei_attr_is_valid(struct fei_attr *_attr)
+{
+	struct fei_attr *attr;
+
+	list_for_each_entry(attr, &fei_attr_list, list) {
+		if (attr == _attr)
+			return true;
+	}
+
+	return false;
+}
+
+static int fei_retval_set(void *data, u64 val)
+{
+	struct fei_attr *attr = data;
+	unsigned long retv = (unsigned long)val;
+	int err = 0;
+
+	mutex_lock(&fei_lock);
+	/*
+	 * Since this operation can be done after retval file is removed,
+	 * It is safer to check the attr is still valid before accessing
+	 * its member.
+	 */
+	if (!fei_attr_is_valid(attr)) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	if (attr->kp.addr) {
+		if (adjust_error_retval((unsigned long)attr->kp.addr,
+					val) != retv)
+			err = -EINVAL;
+	}
+	if (!err)
+		attr->retval = val;
+out:
+	mutex_unlock(&fei_lock);
+
+	return err;
+}
+
+static int fei_retval_get(void *data, u64 *val)
+{
+	struct fei_attr *attr = data;
+	int err = 0;
+
+	mutex_lock(&fei_lock);
+	/* Here we also validate @attr to ensure it still exists. */
+	if (!fei_attr_is_valid(attr))
+		err = -ENOENT;
+	else
+		*val = attr->retval;
+	mutex_unlock(&fei_lock);
+
+	return err;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(fei_retval_ops, fei_retval_get, fei_retval_set,
+			 "%llx\n");
+
+static int fei_debugfs_add_attr(struct fei_attr *attr)
+{
+	struct dentry *dir;
+
+	dir = debugfs_create_dir(attr->kp.symbol_name, fei_debugfs_dir);
+	if (!dir)
+		return -ENOMEM;
+
+	if (!debugfs_create_file("retval", 0600, dir, attr, &fei_retval_ops)) {
+		debugfs_remove_recursive(dir);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void fei_debugfs_remove_attr(struct fei_attr *attr)
+{
+	struct dentry *dir;
+
+	dir = debugfs_lookup(attr->kp.symbol_name, fei_debugfs_dir);
+	if (dir)
+		debugfs_remove_recursive(dir);
+}
+
+static int fei_kprobe_handler(struct kprobe *kp, struct pt_regs *regs)
+{
+	struct fei_attr *attr = container_of(kp, struct fei_attr, kp);
+
+	if (should_fail(&fei_fault_attr, 1)) {
+		regs_set_return_value(regs, attr->retval);
+		override_function_with_return(regs);
+		/* Kprobe specific fixup */
+		reset_current_kprobe();
+		preempt_enable_no_resched();
+		return 1;
+	}
+
+	return 0;
+}
+NOKPROBE_SYMBOL(fei_kprobe_handler)
+
+static void *fei_seq_start(struct seq_file *m, loff_t *pos)
+{
+	mutex_lock(&fei_lock);
+	return seq_list_start(&fei_attr_list, *pos);
+}
+
+static void fei_seq_stop(struct seq_file *m, void *v)
+{
+	mutex_unlock(&fei_lock);
+}
+
+static void *fei_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	return seq_list_next(v, &fei_attr_list, pos);
+}
+
+static int fei_seq_show(struct seq_file *m, void *v)
+{
+	struct fei_attr *attr = list_entry(v, struct fei_attr, list);
+
+	seq_printf(m, "%pf\n", attr->kp.addr);
+	return 0;
+}
+
+static const struct seq_operations fei_seq_ops = {
+	.start	= fei_seq_start,
+	.next	= fei_seq_next,
+	.stop	= fei_seq_stop,
+	.show	= fei_seq_show,
+};
+
+static int fei_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &fei_seq_ops);
+}
+
+static void fei_attr_remove(struct fei_attr *attr)
+{
+	fei_debugfs_remove_attr(attr);
+	unregister_kprobe(&attr->kp);
+	list_del(&attr->list);
+	fei_attr_free(attr);
+}
+
+static void fei_attr_remove_all(void)
+{
+	struct fei_attr *attr, *n;
+
+	list_for_each_entry_safe(attr, n, &fei_attr_list, list) {
+		fei_attr_remove(attr);
+	}
+}
+
+static ssize_t fei_write(struct file *file, const char __user *buffer,
+			 size_t count, loff_t *ppos)
+{
+	struct fei_attr *attr;
+	unsigned long addr;
+	char *buf, *sym;
+	int ret;
+
+	/* cut off if it is too long */
+	if (count > KSYM_NAME_LEN)
+		count = KSYM_NAME_LEN;
+	buf = kmalloc(sizeof(char) * (count + 1), GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	if (copy_from_user(buf, buffer, count)) {
+		ret = -EFAULT;
+		goto out;
+	}
+	buf[count] = '\0';
+	sym = strstrip(buf);
+
+	mutex_lock(&fei_lock);
+
+	/* Writing just spaces will remove all injection points */
+	if (sym[0] == '\0') {
+		fei_attr_remove_all();
+		ret = count;
+		goto out;
+	}
+	/* Writing !function will remove one injection point */
+	if (sym[0] == '!') {
+		attr = fei_attr_lookup(sym + 1);
+		if (!attr) {
+			ret = -ENOENT;
+			goto out;
+		}
+		fei_attr_remove(attr);
+		ret = count;
+		goto out;
+	}
+
+	addr = kallsyms_lookup_name(sym);
+	if (!addr) {
+		ret = -EINVAL;
+		goto out;
+	}
+	if (!within_error_injection_list(addr)) {
+		ret = -ERANGE;
+		goto out;
+	}
+	if (fei_attr_lookup(sym)) {
+		ret = -EBUSY;
+		goto out;
+	}
+	attr = fei_attr_new(sym, addr);
+	if (!attr) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = register_kprobe(&attr->kp);
+	if (!ret)
+		ret = fei_debugfs_add_attr(attr);
+	if (ret < 0)
+		fei_attr_remove(attr);
+	else {
+		list_add_tail(&attr->list, &fei_attr_list);
+		ret = count;
+	}
+out:
+	kfree(buf);
+	mutex_unlock(&fei_lock);
+	return ret;
+}
+
+static const struct file_operations fei_ops = {
+	.open =		fei_open,
+	.read =		seq_read,
+	.write =	fei_write,
+	.llseek =	seq_lseek,
+	.release =	seq_release,
+};
+
+static int __init fei_debugfs_init(void)
+{
+	struct dentry *dir;
+
+	dir = fault_create_debugfs_attr("fail_function", NULL,
+					&fei_fault_attr);
+	if (IS_ERR(dir))
+		return PTR_ERR(dir);
+
+	/* injectable attribute is just a symlink of error_inject/list */
+	if (!debugfs_create_symlink("injectable", dir,
+				    "../error_injection/list"))
+		goto error;
+
+	if (!debugfs_create_file("inject", 0600, dir, NULL, &fei_ops))
+		goto error;
+
+	fei_debugfs_dir = dir;
+
+	return 0;
+error:
+	debugfs_remove_recursive(dir);
+	return -ENOMEM;
+}
+
+late_initcall(fei_debugfs_init);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 2a33efdd1fea..890d4766cef3 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1551,6 +1551,16 @@ config FAIL_FUTEX
 	help
 	  Provide fault-injection capability for futexes.
 
+config FAIL_FUNCTION
+	bool "Fault-injection capability for functions"
+	depends on FAULT_INJECTION_DEBUG_FS && FUNCTION_ERROR_INJECTION
+	help
+	  Provide function-based fault-injection capability.
+	  This will allow you to override a specific function with a return
+	  with given return value. As a result, function caller will see
+	  an error value and have to handle it. This is useful to test the
+	  error handling in various subsystems.
+
 config FAULT_INJECTION_DEBUG_FS
 	bool "Debugfs entries for fault-injection capabilities"
 	depends on FAULT_INJECTION && SYSFS && DEBUG_FS