kernel crash 发生后的那些事（一）

本文主要是介绍kernel crash 发生后的那些事（一），希望对大家解决编程问题提供一定的参考价值，需要的开发者们随着小编来一起学习吧！

本文根据 echo c > /pro/sysrq-trigger 触发的 Kernel crash 为例，分析kernel crash 处理的过程。

当代码访问虚拟地址0时，会发生data abort,这是由MMU决定的，没有把virtual address 0 map 到任何有访问权限的物理地址。

当发生data abort时，PC 会跳转到W(b) vector_dabt + stubs_offset[arch/arm/kernel/entry-armv.S]。如果data abort 发生

在内核空间，会进而执行到__dabt_svc.

__dabt_svc

   .align   5
__dabt_svc:
   svc_entry
   mov   r2, sp
   dabt_helper

   @
   @ IRQs off again before pulling preserved data off the stack
   @
   disable_irq_notrace
   svc_exit r5               @ return from exception
UNWIND(.fnend       )
ENDPROC(__dabt_svc)

__dabt_svc调用了宏dabt_helper

   .macro   dabt_helper

   @
   @ Call the processor-specific abort handler:
   @
   @ r2 - pt_regs
   @ r4 - aborted context pc
   @ r5 - aborted context psr
   @
   @ The abort handler must return the aborted address in r0, and
   @ the fault status register in r1. r9 must be preserved.
   @

   bl   CPU_DABORT_HANDLER
   .endm

CPU_DABORT_HANDLER的具体实现

dabt_helper调用了跳转指令bl   CPU_DABORT_HANDLER，标号CPU_DABORT_HANDLER有多个实现，可以根据 .config找到具体的实现。
在文件 arch/arm/include/asm/glue-df.h中找到：
#ifdef CONFIG_CPU_ABRT_EV7
# ifdef CPU_DABORT_HANDLER
# define MULTI_DABORT 1
# else
# define CPU_DABORT_HANDLER v7_early_abort
# endif
#endif
看上去挺特殊，为什么每行都以#开头，应该不是注释掉的意思吧。

到文件arch/arm/mm/abort-ev7.S:
   .align   5
ENTRY(v7_early_abort)
   /*
   * The effect of data aborts on on the exclusive access monitor are
   * UNPREDICTABLE. Do a CLREX to clear the state
   */
   clrex

   mrc   p15, 0, r1, c5, c0, 0       @ get FSR 【Fault Status Register (FSR)】
   mrc   p15, 0, r0, c6, c0, 0       @ get FAR 【Fault Address Register (FAR).】

   /*
   * V6 code adjusts the returned DFSR.
   * New designs should not need to patch up faults.
   */

   b   do_DataAbort
ENDPROC(v7_early_abort)
这里只关注 b   do_DataAbort，终于跳转到C 函数do_DataAbort

跳转到C函数 do_DataAbort

arch/arm/mm/fault.c
/*
* Dispatch a data abort to the relevant handler.
*/
asmlinkage void __exception
do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
   const struct fsr_info *inf = fsr_info + fsr_fs(fsr);
   struct siginfo info;

   if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs))
       return;

   printk(KERN_ALERT "Unhandled fault: %s (0x%03x) at 0x%08lx\n",
       inf->name, fsr, addr);

   info.si_signo = inf->sig;
   info.si_errno = 0;
   info.si_code = inf->code;
   info.si_addr = (void __user *)addr;
   arm_notify_die("", regs, &info, fsr, 0);
}

请关注从汇编过来的入口参数：

do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
r0: mrc p15, 0, r0, c6, c0, 0 @ get FAR 【Fault Address Register (FAR).】
r1: mrc p15, 0, r1, c5, c0, 0 @ get FSR 【Fault Status Register (FSR)】
r2: @ r2 - pt_regs [r2是哪里]
这里涉及到CP15的几个寄存器：
The CP15 register c2 operations control the Translation Table Base (TTB).
The CP15 register c3 operations control the Domain Access Control (DAC) register.
The CP15 register c5 operations control the Fault Status Register (FSR).

根据fsr找到对应的处理函数

fsr_info是全局变量，类型为
struct fsr_info {
   int   (*fn)(unsigned long addr, unsigned int fsr, struct pt_regs *regs);
   int   sig;
   int   code;
   const char *name;
};
通过当前的fsr作为index 得到对应的出来函数。

/* FSR definition 对本平台而言，# CONFIG_ARM_LPAE is not set*/
#ifdef CONFIG_ARM_LPAE
#include "fsr-3level.c"
#else
#include "fsr-2level.c"
#endif

我们要根据 “const struct fsr_info *inf = fsr_info + fsr_fs(fsr);”得到具体的函数，因为do_DataAbort被多次调用去加载物理页，
如果在该函数do_DataAbort中打印，看上去一直在打印好像开不了机。请注意，do_DataAbort(unsigned long addr，--)的第一个参数，就是r0
r0: mrc   p15, 0, r0, c6, c0, 0       @ get FAR 【Fault Address Register (FAR).】,存放的是出问题是访问的CPU访问的地址.
如果触发的是空指针crash, 则r0，也就是do_DataAbort的 addr参数为0.可以加个条件去打印。

do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
   const struct fsr_info *inf = fsr_info + fsr_fs(fsr);
   struct siginfo info;
   /*threre so many callbacks*/
   if(!addr)
       printk(KERN_ALERT "do_DataAbort: NULL index:0x%x\n",fsr_fs(fsr));
   if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs))
       return;

}
输出：do_DataAbort: NULL index:0x5，这样可知对应的函数为do_translation_fault。
static struct fsr_info fsr_info[] = {
   /*
   * The following are the standard ARMv3 and ARMv4 aborts. ARMv5
   * defines these to be "precise" aborts.
   */
   { do_bad,       SIGSEGV, 0,       "vector exception"           },
   { do_bad,       SIGBUS,   BUS_ADRALN,   "alignment exception"           },
   { do_bad,       SIGKILL, 0,       "terminal exception"           },
   { do_bad,       SIGBUS,   BUS_ADRALN,   "alignment exception"           },
   { do_bad,       SIGBUS,   0,       "external abort on linefetch"       },
   { do_translation_fault,   SIGSEGV, SEGV_MAPERR,   "section translation fault"       },
   { do_bad,       SIGBUS,   0,       "external abort on linefetch"       },
   { do_page_fault,   SIGSEGV, SEGV_MAPERR,   "page translation fault"       },
   { do_bad,       SIGBUS,   0,       "external abort on non-linefetch" },
   { do_bad,       SIGSEGV, SEGV_ACCERR,   "section domain fault"           },
   { do_bad,       SIGBUS,   0,       "external abort on non-linefetch" },
   { do_bad,       SIGSEGV, SEGV_ACCERR,   "page domain fault"           },
   { do_bad,       SIGBUS,   0,       "external abort on translation"       },
   { do_sect_fault,   SIGSEGV, SEGV_ACCERR,   "section permission fault"       },
   { do_bad,       SIGBUS,   0,       "external abort on translation"       },
   { do_page_fault,   SIGSEGV, SEGV_ACCERR,   "page permission fault"           },
}

kernel mode空指针处理函数do_translation_fault

do_translation_fault(unsigned long addr, unsigned int fsr,
             struct pt_regs *regs)
{
   unsigned int index;
   pgd_t *pgd, *pgd_k;
   pud_t *pud, *pud_k;
   pmd_t *pmd, *pmd_k;

   if (addr < TASK_SIZE)
       return do_page_fault(addr, fsr, regs);
}

do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
   struct task_struct *tsk;
   struct mm_struct *mm;
   int fault, sig, code;
   int write = fsr & FSR_WRITE;
   unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
               (write ? FAULT_FLAG_WRITE : 0);

   /*
   * If we're in an interrupt or have no user
   * context, we must not take the fault..
   */
   if (in_atomic() || !mm){
       printk(KERN_ALERT "do_page_fault:mm_struct_1 0x%x\n", mm);
       goto no_context;
   }

   if (!down_read_trylock(&mm->mmap_sem)) {
       if (!user_mode(regs) && !search_exception_tables(regs->ARM_pc)){
           printk(KERN_ALERT "do_page_fault:mm_struct_2 0x%x\n", mm);
           goto no_context;
       }
       down_read(&mm->mmap_sem);
   }
   else{
       fault = __do_page_fault(mm, addr, fsr, flags, tsk);
       /*
       * If we are in kernel mode at this point, we
       * have no context to handle this fault with.
       */
       if (!user_mode(regs)){
           printk(KERN_ALERT "do_page_fault:mm_struct_3 0x%x\n", mm);
           goto no_context;
       }
   }
no_context:
   printk(KERN_ALERT "__do_kernel_fault: do_page_fault:index 0x%x\n", fsr_fs(fsr));
   __do_kernel_fault(mm, addr, fsr, regs);
   return 0;

}

如果是crash happened in kernel mode 最终会调到__do_kernel_fault。

对echo c > /proc/sysrq-trigger触发的空指针crash而言：所说的空指针虚拟地址为0， 0 < TASK_SIZE, 所以会调用到do_page_fault，找不到对应的VMA等信息，

且发生异常时在内核空间，所以 goto no_context.

最后调到 __do_kernel_fault。

这篇关于kernel crash 发生后的那些事（一）的文章就介绍到这儿，希望我们推荐的文章对编程师们有所帮助！