Linux Kernel WARN()&BUG(), Oops&Panic, Tainted分析

稳定性范畴, 参考5.x kernel。

kernel Oops

Oops指的就是内核的不正确行为，比如对驱动来说：

static int i82092aa_pci_probe(struct pci_dev *dev,
                              const struct pci_device_id *id)
{
        unsigned char configbyte;
        int i, ret;

        ret = pci_enable_device(dev);
        if (ret)
                return ret;

        /* PCI Configuration Control */
        pci_read_config_byte(dev, 0x40, &configbyte);

        switch (configbyte&6) {
        case 0:
                socket_count = 2;
                break;
        case 2:
                socket_count = 1;
                break;
        case 4:
        case 6:
                socket_count = 4;
                break;

        default:
                dev_err(&dev->dev,
                        "Oops, you did something we didn't think of.\n");
                ret = -EIO;
                goto err_out_disable;
        }

这里的PCI配置读出来有异常，我们就认为他是一个Oops，打印一个错误，探测失败。

分配内存失败也算一种Oops，只不过不需要打出错误信息。

td = kmalloc (sizeof (struct FS_BPENTRY), GFP_ATOMIC);
fs_dprintk (FS_DEBUG_ALLOC, "Alloc transd: %p(%zd)\n", td, sizeof (struct FS_BPENTRY));
if (!td) {
        /* Oops out of mem */
        return -ENOMEM;
}

在体系架构方面的Oops，比如arm64的bug Oops:

static int bug_handler(struct pt_regs *regs, unsigned int esr)
{
        switch (report_bug(regs->pc, regs)) {
        case BUG_TRAP_TYPE_BUG:
                die("Oops - BUG", regs, 0);
                break;

如果report_bug()返回的是BUG_TRAP_TYPE_BUG，那就报个Oops log。

再比如非法访问也会走die("Oops", ):

static void die_kernel_fault(const char *msg, unsigned long addr,
                             unsigned int esr, struct pt_regs *regs)
{
        bust_spinlocks(1);

        pr_alert("Unable to handle kernel %s at virtual address %016lx\n", msg,
                 addr);

        mem_abort_decode(esr);

        show_pte(addr);
        die("Oops", regs, esr); //tj
        bust_spinlocks(0);
        do_exit(SIGKILL);
}

看下die():

void die(const char *str, struct pt_regs *regs, int err)
{
        int ret;
        unsigned long flags;

        raw_spin_lock_irqsave(&die_lock, flags);

        oops_enter();

        console_verbose();
        bust_spinlocks(1);
        ret = __die(str, err, regs); //tj

        if (regs && kexec_should_crash(current))
                crash_kexec(regs);

        bust_spinlocks(0);
        add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
        oops_exit();

        if (in_interrupt())
                panic("Fatal exception in interrupt"); //tj
        if (panic_on_oops)
                panic("Fatal exception"); //tj

        raw_spin_unlock_irqrestore(&die_lock, flags);

        if (ret != NOTIFY_STOP)
                do_exit(SIGSEGV);
}

再看下__die():

static int __die(const char *str, int err, struct pt_regs *regs)
{
        static int die_counter;
        int ret;

        pr_emerg("Internal error: %s: %x [#%d]" S_PREEMPT S_SMP "\n",
                 str, err, ++die_counter);

        /* trap and error numbers are mostly meaningless on ARM */
        ret = notify_die(DIE_OOPS, str, regs, err, 0, SIGSEGV);
        if (ret == NOTIFY_STOP)
                return ret;

        print_modules();
        show_regs(regs);

        dump_kernel_instr(KERN_EMERG, regs);

        return ret;
}

打印类似如下log:

35.449887:   <6> Internal error: Oops - BUG: 0 [#1] PREEMPT SMP
35.449893:   <6> Modules linked in:
35.449901:   <6> Process init (pid: 1, stack limit = 0x00000000826895f7)

后面会call panic()，不过是有条件的:

if (in_interrupt())
        panic("Fatal exception in interrupt");
if (panic_on_oops)
        panic("Fatal exception");

如果这个Oops在中断里，会走panic()。如果不在但if (panic_on_oops)成立，也走panic()。

可见，Oops不一定会导致panic。bug_handle()对BUG_TRAP_TYPE_BUG还不默认panic?

btw: arm64的Oops是怎么触发的了？稍后看。

Kernel panic

kernel panic就是不可恢复的错误了，怎么处理？我想复位or我就想定这。

/**
 *	panic - halt the system
 *	@fmt: The text string to print
 *
 *	Display a message, then perform cleanups.
 *
 *	This function never returns.
 */
void panic(const char *fmt, ...)
{
	...
	pr_emerg("Kernel panic - not syncing: %s\n", buf);
	...
	if (panic_timeout > 0) { //tj: 延迟重启
		/*
		 * Delay timeout seconds before rebooting the machine.
		 * We can't use the "normal" timers since we just panicked.
		 */
		pr_emerg("Rebooting in %d seconds..\n", panic_timeout); //tj

		for (i = 0; i < panic_timeout * 1000; i += PANIC_TIMER_STEP) {
			touch_nmi_watchdog();
			if (i >= i_next) {
				i += panic_blink(state ^= 1);
				i_next = i + 3600 / PANIC_BLINK_SPD;
			}
			mdelay(PANIC_TIMER_STEP);
		}
	}
	if (panic_timeout != 0) { //tj: 立即reboot
		/*
		 * This will not be a clean reboot, with everything
		 * shutting down.  But if there is a chance of
		 * rebooting the system it will be rebooted.
		 */
		if (panic_reboot_mode != REBOOT_UNDEFINED)
			reboot_mode = panic_reboot_mode;
		emergency_restart();
	}
	...
	pr_emerg("---[ end Kernel panic - not syncing: %s ]---\n", buf); //tj: 一直卡这

	/* Do not scroll important messages printed above */
	suppress_printk = 1;
	local_irq_enable();
	for (i = 0; ; i += PANIC_TIMER_STEP) {
		touch_softlockup_watchdog();
		if (i >= i_next) {
			i += panic_blink(state ^= 1);
			i_next = i + 3600 / PANIC_BLINK_SPD;
		}
		mdelay(PANIC_TIMER_STEP);
	}

config PANIC_TIMEOUT
        int "panic timeout"
        default 0
        help
          Set the timeout value (in seconds) until a reboot occurs when the
          the kernel panics. If n = 0, then we wait forever. A timeout
          value n > 0 will wait n seconds before rebooting, while a timeout
          value n < 0 will reboot immediately.

这个panic timeout在Kconfig里说的很清楚。

BUG() ifndef HAVE_ARCH_BUG

先看代码注释:

/*
 * Don't use BUG() or BUG_ON() unless there's really no way out; one
 * example might be detecting data structure corruption in the middle
 * of an operation that can't be backed out of.  If the (sub)system
 * can somehow continue operating, perhaps with reduced functionality,
 * it's probably not BUG-worthy.
 *
 * If you're tempted to BUG(), think again:  is completely giving up
 * really the *only* solution?  There are usually better options, where
 * users don't need to reboot ASAP and can mostly shut down cleanly.
 */
#ifndef HAVE_ARCH_BUG
#define BUG() do { \
	printk("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \
	barrier_before_unreachable(); \
	panic("BUG!"); \
} while (0)
#endif

无路可走了？那就call me。如果只是丢失功能，系统还能继续跑，那就不算BUG。我就call你debug不行啊:)

这里有个宏HAVE_ARCH_BUG，没有实现arch bug，那就用它了，会直接导致panic，这是bug嘛，当然panic，感觉哪里不对劲？

BUG() on arm64

看下arm64的实现：

#define __BUG_FLAGS(flags)                              \
        asm volatile (__stringify(ASM_BUG_FLAGS(flags)));

#define BUG() do {                                      \
        __BUG_FLAGS(0);                                 \
        unreachable();                                  \
} while (0)

#define __WARN_FLAGS(flags) __BUG_FLAGS(BUGFLAG_WARNING|(flags))

#define HAVE_ARCH_BUG  //tj: define

#include <asm-generic/bug.h>

#ifdef CONFIG_GENERIC_BUG

#define __BUG_ENTRY(flags)                              \
                .pushsection __bug_table,"aw";          \
                .align 2;                               \
        14470:  .long 14471f - 14470b;                  \
_BUGVERBOSE_LOCATION(__FILE__, __LINE__)                \
                .short flags;                           \
                .popsection;                            \
        14471:
#else

#define ASM_BUG_FLAGS(flags)                            \
        __BUG_ENTRY(flags)                              \
        brk     BUG_BRK_IMM

/*
 * #imm16 values used for BRK instruction generation
 * ...
 * 0x800: kernel-mode BUG() and WARN() traps
 * ...
 */
#define BUG_BRK_IMM                     0x800

arm64的BUG()就是抛个brk 0x800指令，注释也写明了。

bug_handler()就是对应这个的处理。

static int bug_handler(struct pt_regs *regs, unsigned int esr)
{
        switch (report_bug(regs->pc, regs)) { //tj: report_bug()
        case BUG_TRAP_TYPE_BUG:
                die("Oops - BUG", regs, 0);
                break;

        case BUG_TRAP_TYPE_WARN:
                break;

        default:
                /* unknown/unrecognised bug trap type */
                return DBG_HOOK_ERROR;
        }

        /* If thread survives, skip over the BUG instruction and continue: */
        arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
        return DBG_HOOK_HANDLED;
}

static struct break_hook bug_break_hook = {
        .fn = bug_handler,
        .imm = BUG_BRK_IMM,
};

report bug:

enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
{
...
        if (!is_valid_bugaddr(bugaddr))
                return BUG_TRAP_TYPE_NONE;

        bug = find_bug(bugaddr);
        if (!bug)
                return BUG_TRAP_TYPE_NONE;
...
        if (file)
                pr_crit("kernel BUG at %s:%u!\n", file, line);
        else
                pr_crit("Kernel BUG at %pB [verbose debug info unavailable]\n",
                        (void *)bugaddr);

        return BUG_TRAP_TYPE_BUG;
}

int is_valid_bugaddr(unsigned long addr)
{
        /*
         * bug_handler() only called for BRK #BUG_BRK_IMM.
         * So the answer is trivial -- any spurious instances with no
         * bug table entry will be rejected by report_bug() and passed
         * back to the debug-monitors code and handled as a fatal
         * unexpected debug exception.
         */
        return 1;
}

如果是bug，log里会报出来pr_crit("kernel BUG 。再大概看下bug_handler()的触发：

bug_handler就是bug_break_hook.fn:

void __init trap_init(void)
{
        register_kernel_break_hook(&bug_break_hook);
#ifdef CONFIG_KASAN_SW_TAGS
        register_kernel_break_hook(&kasan_break_hook);
#endif
        debug_traps_init();
}

先是register:

static LIST_HEAD(kernel_break_hook);

void register_kernel_break_hook(struct break_hook *hook)
{
        register_debug_hook(&hook->node, &kernel_break_hook);
}

static void register_debug_hook(struct list_head *node, struct list_head *list)
{
        spin_lock(&debug_hook_lock);
        list_add_rcu(node, list);
        spin_unlock(&debug_hook_lock);

}

增加到list kernel_break_hook。然后初始化：

#define DBG_ESR_EVT_BRK         0x6

void __init debug_traps_init(void)
{
        hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP,
                              TRAP_TRACE, "single-step handler");
        hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP,
                              TRAP_BRKPT, "ptrace BRK handler");
}

void __init hook_debug_fault_code(int nr,
                                  int (*fn)(unsigned long, unsigned int, struct pt_regs *),
                                  int sig, int code, const char *name)
{
        BUG_ON(nr < 0 || nr >= ARRAY_SIZE(debug_fault_info));

        debug_fault_info[nr].fn         = fn; //tj: brk_handler
        debug_fault_info[nr].sig        = sig;
        debug_fault_info[nr].code       = code;
        debug_fault_info[nr].name       = name;
}

static struct fault_info __refdata debug_fault_info[] = {
        { do_bad,       SIGTRAP,        TRAP_HWBKPT,    "hardware breakpoint"   },
        { do_bad,       SIGTRAP,        TRAP_HWBKPT,    "hardware single-step"  },
        { do_bad,       SIGTRAP,        TRAP_HWBKPT,    "hardware watchpoint"   },
        { do_bad,       SIGKILL,        SI_KERNEL,      "unknown 3"             },
        { do_bad,       SIGTRAP,        TRAP_BRKPT,     "aarch32 BKPT"          },
        { do_bad,       SIGKILL,        SI_KERNEL,      "aarch32 vector catch"  },
        { early_brk64,  SIGTRAP,        TRAP_BRKPT,     "aarch64 BRK"           }, //tj: here, index=6
        { do_bad,       SIGKILL,        SI_KERNEL,      "unknown 7"             },
};

就是这里的early_bk64被替换成了brk_handler。

默认的early_brk64()直接call bug_handler():

/*
 * Initial handler for AArch64 BRK exceptions
 * This handler only used until debug_traps_init().
 */
int __init early_brk64(unsigned long addr, unsigned int esr,
                struct pt_regs *regs)
{
#ifdef CONFIG_KASAN_SW_TAGS
        unsigned int comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK;

        if ((comment & ~KASAN_BRK_MASK) == KASAN_BRK_IMM)
                return kasan_handler(regs, esr) != DBG_HOOK_HANDLED;
#endif
        return bug_handler(regs, esr) != DBG_HOOK_HANDLED;
}

而brk_handler()会走hook:

static int brk_handler(unsigned long unused, unsigned int esr,
                       struct pt_regs *regs)
{
        if (call_break_hook(regs, esr) == DBG_HOOK_HANDLED)
                return 0;

        if (user_mode(regs)) {
                send_user_sigtrap(TRAP_BRKPT);
        } else {
                pr_warn("Unexpected kernel BRK exception at EL1\n");
                return -EFAULT;
        }

        return 0;
}

call_break_hook():

static int call_break_hook(struct pt_regs *regs, unsigned int esr)
{
        struct break_hook *hook;
        struct list_head *list;
        int (*fn)(struct pt_regs *regs, unsigned int esr) = NULL;

        list = user_mode(regs) ? &user_break_hook : &kernel_break_hook;

        /*
         * Since brk exception disables interrupt, this function is
         * entirely not preemptible, and we can use rcu list safely here.
         */
        list_for_each_entry_rcu(hook, list, node) {
                unsigned int comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK;

                if ((comment & ~hook->mask) == hook->imm)
                        fn = hook->fn;
        }

        return fn ? fn(regs, esr) : DBG_HOOK_ERROR;
}

就是在list里找到hook->fn,也就是bug_handler()。

那BUG_ON()怎么进入的panic()，除了中断就是靠panic_on_oops控制：

panic_on_oops
Controls the kernel’s behaviour when an oops or BUG is encountered.

= ================================================
0 Try to continue operation.
1 Panic immediately. If the panic sysctl is also non-zero then the
machine will be rebooted.
= ================================================

Android一般在init.rc开启：

on init
    ...
    write /proc/sys/kernel/panic_on_oops 1

WARN()

先看注释：

/*
 * WARN(), WARN_ON(), WARN_ON_ONCE, and so on can be used to report
 * significant kernel issues that need prompt attention if they should ever
 * appear at runtime.
 *
 * Do not use these macros when checking for invalid external inputs
 * (e.g. invalid system call arguments, or invalid data coming from
 * network/devices), and on transient conditions like ENOMEM or EAGAIN.
 * These macros should be used for recoverable kernel issues only.
 * For invalid external inputs, transient conditions, etc use
 * pr_err[_once/_ratelimited]() followed by dump_stack(), if necessary.
 * Do not include "BUG"/"WARNING" in format strings manually to make these
 * conditions distinguishable from kernel issues.
 *
 * Use the versions with printk format strings to provide better diagnostics.
 */

WARN()系是用来报告一些可修复的(recoverable)内核问题，不是用来check入参啊，没内存啊等场景。

WARN()系有个区分__WARN_FLAGS，与体系架构有关：

#ifndef __WARN_FLAGS
extern __printf(4, 5)
void warn_slowpath_fmt(const char *file, const int line, unsigned taint,
		       const char *fmt, ...);
#define __WARN()		__WARN_printf(TAINT_WARN, NULL)
#define __WARN_printf(taint, arg...) do {				\
		instrumentation_begin();				\
		warn_slowpath_fmt(__FILE__, __LINE__, taint, arg);	\
		instrumentation_end();					\
	} while (0)
#else
extern __printf(1, 2) void __warn_printk(const char *fmt, ...);
#define __WARN()		__WARN_FLAGS(BUGFLAG_TAINT(TAINT_WARN))
#define __WARN_printf(taint, arg...) do {				\
		instrumentation_begin();				\
		__warn_printk(arg);					\
		__WARN_FLAGS(BUGFLAG_NO_CUT_HERE | BUGFLAG_TAINT(taint));\
		instrumentation_end();					\
	} while (0)
#define WARN_ON_ONCE(condition) ({				\
	int __ret_warn_on = !!(condition);			\
	if (unlikely(__ret_warn_on))				\
		__WARN_FLAGS(BUGFLAG_ONCE |			\
			     BUGFLAG_TAINT(TAINT_WARN));	\
	unlikely(__ret_warn_on);				\
})
#endif

关注arm64：

#define __BUG_FLAGS(flags)                              \
        asm volatile (__stringify(ASM_BUG_FLAGS(flags)));

#define __WARN_FLAGS(flags) __BUG_FLAGS(BUGFLAG_WARNING|(flags))

和arm64的BUG()定义放一起的，BUGFLAG_WARNING:

#ifdef CONFIG_GENERIC_BUG
#define BUGFLAG_WARNING         (1 << 0) //tj: here
#define BUGFLAG_ONCE            (1 << 1)
#define BUGFLAG_DONE            (1 << 2)
#define BUGFLAG_NO_CUT_HERE     (1 << 3)        /* CUT_HERE already sent */
#define BUGFLAG_TAINT(taint)    ((taint) << 8) //tj
#define BUG_GET_TAINT(bug)      ((bug)->flags >> 8)
#endif

#define __WARN()		__WARN_FLAGS(BUGFLAG_TAINT(TAINT_WARN))

主要差异体现在__BUG_ENTRY，具体涉及arm64汇编，这里不关注。

__WARN_printf就是多了个log。

还有个__warn()函数，lib/bug.c在report_bug()会用：

enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
{
	...
	if (bug) {
	...
	   warning = (bug->flags & BUGFLAG_WARNING) != 0;
	...
	if ((bug->flags & BUGFLAG_NO_CUT_HERE) == 0)
		printk(KERN_DEFAULT CUT_HERE);

	if (warning) {
		/* this is a WARN_ON rather than BUG/BUG_ON */
		__warn(file, line, (void *)bugaddr, BUG_GET_TAINT(bug), regs,
		       NULL);
		return BUG_TRAP_TYPE_WARN;
	}

	if (file)
		pr_crit("kernel BUG at %s:%u!\n", file, line);

bug->flags就是前面定义的哈，arm64 warn也触发的bug_handler()。

__warn():

void __warn(const char *file, int line, void *caller, unsigned taint,
	    struct pt_regs *regs, struct warn_args *args)
{
	disable_trace_on_warning();

	if (file)
		pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS\n",
			raw_smp_processor_id(), current->pid, file, line,
			caller);
	else
		pr_warn("WARNING: CPU: %d PID: %d at %pS\n",
			raw_smp_processor_id(), current->pid, caller);

	if (args)
		vprintk(args->fmt, args->args);

	if (panic_on_warn) { //tj: here
		/*
		 * This thread may hit another WARN() in the panic path.
		 * Resetting this prevents additional WARN() from panicking the
		 * system on this thread.  Other threads are blocked by the
		 * panic_mutex in panic().
		 */
		panic_on_warn = 0;
		panic("panic_on_warn set ...\n");
	}

	print_modules();

	if (regs)
		show_regs(regs);
	else
		dump_stack();

	print_irqtrace_events(current);

	print_oops_end_marker();

	/* Just a warning, don't kill lockdep. */
	add_taint(taint, LOCKDEP_STILL_OK);

what? panic还能发生在warn上？没错，就是这个panic_on_warn，看下缘由：

panic_on_warn
Calls panic() in the WARN() path when set to 1. This is useful to avoid
a kernel rebuild when attempting to kdump at the location of a WARN().

= ================================================
0 Only WARN(), default behaviour.
1 Call panic() after printing out WARN() location.
= ================================================

kdump用时不用rebuild，ok。

tainted-kernels

一些Oops log会看到Tainted字样如下：

35.449908:   <6> CPU: 0 PID: 1 Comm: init Tainted: G S      W       4.14.117-perf+ #65

就是内核被污染了，查问题时用得上。即使污染源被去除后，污染状态一直保留。

运行时状态查询在：/proc/sys/kernel/tainted, bug, oops, panics都会打印出来。

arm64的die()中就会增加污染标记:

void die(const char *str, struct pt_regs *regs, int err)
{
	...
	bust_spinlocks(0);
	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); //tj
	oops_exit();

void add_taint(unsigned flag, enum lockdep_ok lockdep_ok)
{
	if (lockdep_ok == LOCKDEP_NOW_UNRELIABLE && __debug_locks_off())
		pr_warn("Disabling lock debugging due to kernel taint\n");

	set_bit(flag, &tainted_mask); //tj

	if (tainted_mask & panic_on_taint) {
		panic_on_taint = 0;
		panic("panic_on_taint set ...");
	}
}

污染标记到tainted_mask里。

/* This cannot be an enum because some may be used in assembly source. */
#define TAINT_PROPRIETARY_MODULE        0
#define TAINT_FORCED_MODULE             1
#define TAINT_CPU_OUT_OF_SPEC           2
#define TAINT_FORCED_RMMOD              3
#define TAINT_MACHINE_CHECK             4
#define TAINT_BAD_PAGE                  5
#define TAINT_USER                      6
#define TAINT_DIE                       7 //tj
#define TAINT_OVERRIDDEN_ACPI_TABLE     8
#define TAINT_WARN                      9
#define TAINT_CRAP                      10
#define TAINT_FIRMWARE_WORKAROUND       11
#define TAINT_OOT_MODULE                12
#define TAINT_UNSIGNED_MODULE           13
#define TAINT_SOFTLOCKUP                14
#define TAINT_LIVEPATCH                 15
#define TAINT_AUX                       16
#define TAINT_RANDSTRUCT                17
#define TAINT_FLAGS_COUNT               18

这么多污染种类。打印污染状态时：

/**
 * print_tainted - return a string to represent the kernel taint state.
 *
 * For individual taint flag meanings, see Documentation/admin-guide/sysctl/kernel.rst
 *
 * The string is overwritten by the next call to print_tainted(),
 * but is always NULL terminated.
 */
const char *print_tainted(void)
{
	static char buf[TAINT_FLAGS_COUNT + sizeof("Tainted: ")];

	BUILD_BUG_ON(ARRAY_SIZE(taint_flags) != TAINT_FLAGS_COUNT);

	if (tainted_mask) {
		char *s;
		int i;

		s = buf + sprintf(buf, "Tainted: ");
		for (i = 0; i < TAINT_FLAGS_COUNT; i++) {
			const struct taint_flag *t = &taint_flags[i];
			*s++ = test_bit(i, &tainted_mask) ? //tj
					t->c_true : t->c_false;
		}
		*s = 0;
	} else
		snprintf(buf, sizeof(buf), "Not tainted");

	return buf;
}

从tainted_mask里取出之前设置的标记via test_bit(i, &tainted_mask)。像WARN()就设置了TAINT_WARN。

那都被污染了要不要panic？取决于panic_on_taint：

   panic_on_taint= Bitmask for conditionally calling panic() in add_taint()
                   Format: <hex>[,nousertaint]
                   Hexadecimal bitmask representing the set of TAINT flags
                   that will cause the kernel to panic when add_taint() is
                   called with any of the flags in this set.
                   The optional switch "nousertaint" can be utilized to
                   prevent userspace forced crashes by writing to sysctl
                   /proc/sys/kernel/tainted any flagset matching with the
                   bitmask set on panic_on_taint.
                   See Documentation/admin-guide/tainted-kernels.rst for
                   extra details on the taint flags that users can pick
                   to compose the bitmask to assign to panic_on_taint.

哪些污染要panic，你自己决定了。所以才会有tainted_mask & panic_on_taint。

sysctl for panic

/proc/sys/kernel

xxx:/proc/sys/kernel # ls -l panic*
-rw-r--r-- 1 root root 0 2020-07-11 20:32 panic
-rw-r--r-- 1 root root 0 2020-07-11 20:32 panic_on_oops
-rw-r--r-- 1 root root 0 2020-07-11 20:32 panic_on_rcu_stall
-rw-r--r-- 1 root root 0 2020-07-11 20:32 panic_on_warn

reference

Documentation/admin-guide/sysctl/kernel.rst
Documentation/admin-guide/tainted-kernels.rst
Documentation/admin-guide/kernel-parameters.txt

Linux Kernel WARN()&amp;BUG(), Oops&amp;Panic, Tainted分析

Linux Kernel WARN()&amp;BUG(), Oops&amp;Panic, Tainted分析

kernel Oops

Kernel panic

BUG() ifndef HAVE_ARCH_BUG

BUG() on arm64

panic_on_oops

WARN()

panic_on_warn

tainted-kernels

sysctl for panic

reference

Linux Kernel WARN()&BUG(), Oops&Panic, Tainted分析

Linux Kernel WARN()&BUG(), Oops&Panic, Tainted分析