本文主要是介绍kernel crash 发生后的那些事(三),希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!
__do_kernel_fault -> die:
继续分析__do_kernel_fault的die函数
arch/arm/kernel/trap.c/*************************************************************************************/
void die(const char *str, struct pt_regs *regs, int err)
{
struct thread_info *thread = current_thread_info();
int ret;
enum bug_trap_type bug_type = BUG_TRAP_TYPE_NONE;
oops_enter();
raw_spin_lock_irq(&die_lock);
console_verbose();
bust_spinlocks(1);
if (!user_mode(regs))
bug_type = report_bug(regs->ARM_pc, regs);
if (bug_type != BUG_TRAP_TYPE_NONE)
str = "Oops - BUG";
/***************************************************/
/* dead in cpu 1 not skip it to check the re-start */
/***************************************************/
ret = __die(str, err, thread, regs);
printk("Had process the __die>>>>...0x%x\n", regs);
if (regs && kexec_should_crash(thread->task)){
printk("kexec_should_crash...\n");
crash_kexec(regs);
}
printk("Had process the crash_kexec...\n");
bust_spinlocks(0);
add_taint(TAINT_DIE);
raw_spin_unlock_irq(&die_lock);
oops_exit();
if (in_interrupt())
panic("Fatal exception in interrupt");
if (panic_on_oops)
panic("Fatal exception");
if (ret != NOTIFY_STOP)
do_exit(SIGSEGV);
}
/**************************************************************************/
Oops的大量打印信息是从该函数及子函数输出的.
1] 其中的oops_enter/console_verbose看不出有用信息,不再关注;
2] report_bug(regs->ARM_pc, regs);
3] 函数主要有__die and crash_kexec两部分组成,后面会详细解析;
die -> report_bug:
enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs){
const struct bug_entry *bug;
const char *file;
unsigned line, warning;
if (!is_valid_bugaddr(bugaddr))
return BUG_TRAP_TYPE_NONE;
}
int is_valid_bugaddr(unsigned long pc)
{
unsigned long bkpt;
if (probe_kernel_address((unsigned *)pc, bkpt))
return 0;
return bkpt == BUG_INSTR_VALUE;
}
/* 这里也给出了内核空间 copy 用户空间数据的方法
* probe_kernel_address(): safely attempt to read from a location
* @addr: address to read from - its type is type typeof(retval)*
* @retval: read into this variable
*
* Safely read from address @addr into variable @revtal. If a kernel fault
* happens, handle that and return -EFAULT.
*/
#define probe_kernel_address(addr, retval) \
({ \
long ret; \
mm_segment_t old_fs = get_fs(); \
\
set_fs(KERNEL_DS); \
pagefault_disable(); \
ret = __copy_from_user_inatomic(&(retval), (__force typeof(retval) __user *)(addr), sizeof(retval)); \
pagefault_enable();
set_fs(old_fs); \
ret; \
})
从打印信息可知: fs:0xbf000000, ds:0x0, pc:0xc01dda94。
这里涉及到thread_info的mm_segment_t addr_limit数据成员,当系统调用的时候,[包括用户空间调用syscall,注意也包括内核空间调用syscall]
会使用thread_info的 addr_limit去检查入口指针参数,检查的方法很简单:看是否小于addr_limit。
在内核中使用时,如果入口参数大于addr_limit, 而又有使用系统调用函数就要临时改变addr_limit.如此例pc:0xc01dda94 大于0xbf000000,需要更改,
否则,参数检查失败,系统函数调用失败。
/****************************************************************************/
有关 fs/ds
/****************************************************************************/
/arch/arm/include/asm/thread_info.h
typedef unsigned long mm_segment_t;
/arch/arm/include/asm/uaccess.h
/arch/arm/include/asm/memory.h
#define CONFIG_PAGE_OFFSET 0xC0000000
/*
* PAGE_OFFSET - the virtual address of the start of the kernel image
* TASK_SIZE - the maximum size of a user space task.
*/
#define PAGE_OFFSET UL(CONFIG_PAGE_OFFSET)
#define TASK_SIZE (UL(CONFIG_PAGE_OFFSET) - UL(0x01000000))
#define KERNEL_DS 0x00000000
#define get_ds() (KERNEL_DS)
#define USER_DS TASK_SIZE
#define get_fs() (current_thread_info()->addr_limit)
struct thread_info -> mm_segment_t addr_limit; /* address limit */
die -> __die:
static int __die(const char *str, int err, struct thread_info *thread, struct pt_regs *regs){
struct task_struct *tsk = thread->task;
static int die_counter;
int ret;
printk(KERN_EMERG "Internal error: %s: %x [#%d]" S_PREEMPT S_SMP
S_ISA "\n", str, err, ++die_counter);
/* trap and error numbers are mostly meaningless on ARM */
ret = notify_die(DIE_OOPS, str, regs, err, tsk->thread.trap_no, SIGSEGV);
if (ret == NOTIFY_STOP)
return ret;
print_modules();
__show_regs(regs);
printk(KERN_EMERG "Process %.*s (pid: %d, stack limit = 0x%p)\n",
TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), thread + 1);
if (!user_mode(regs) || in_interrupt()) {
dump_mem(KERN_EMERG, "Stack: ", regs->ARM_sp,
THREAD_SIZE + (unsigned long)task_stack_page(tsk));
dump_backtrace(regs, tsk);
dump_instr(KERN_EMERG, regs);
}
return ret;
}
1] notify_die通知关注die的程序,就是调用register_die_notifier的程序,一般为调试程序如kgdb 等。
int kgdb_arch_init ->register_die_notifier(&kgdb_notifier);
2] print_modules();可能是动态加载的 moules.
crash> modules
modules = $1 = {
next = 0xc05d64d0 <modules>,
prev = 0xc05d64d0 <modules>
}
__die -> __show_regs(regs);
/arch/arm/kernel/process.cvoid __show_regs(struct pt_regs *regs)
{
unsigned long flags;
char buf[64];
/*CPU ID, tainted reason, verson*/
printk("CPU: %d %s (%s %.*s)\n",
raw_smp_processor_id(), print_tainted(),
init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
init_utsname()->version);
/*print crash时的代码位置,和调用该函数的位置
*这里打印出了函数名
*/
print_symbol("PC is at %s\n", instruction_pointer(regs));
print_symbol("LR is at %s\n", regs->ARM_lr);
/*打印出现问题时ARM的所有寄存器,不是此时的,这个参数是出问题时保存的*/
printk("pc : [<%08lx>] lr : [<%08lx>] psr: %08lx\n"
"sp : %08lx ip : %08lx fp : %08lx\n",
regs->ARM_pc, regs->ARM_lr, regs->ARM_cpsr,
regs->ARM_sp, regs->ARM_ip, regs->ARM_fp);
printk("r10: %08lx r9 : %08lx r8 : %08lx\n",
regs->ARM_r10, regs->ARM_r9,
regs->ARM_r8);
printk("r7 : %08lx r6 : %08lx r5 : %08lx r4 : %08lx\n",
regs->ARM_r7, regs->ARM_r6,
regs->ARM_r5, regs->ARM_r4);
printk("r3 : %08lx r2 : %08lx r1 : %08lx r0 : %08lx\n",
regs->ARM_r3, regs->ARM_r2,
regs->ARM_r1, regs->ARM_r0);
flags = regs->ARM_cpsr;
buf[0] = flags & PSR_N_BIT ? 'N' : 'n';
buf[1] = flags & PSR_Z_BIT ? 'Z' : 'z';
buf[2] = flags & PSR_C_BIT ? 'C' : 'c';
buf[3] = flags & PSR_V_BIT ? 'V' : 'v';
buf[4] = '\0';
/*打印出现问题时ARM的IRQ, FIQ的状态,processor mode, isa[指令集],地址空间
* isa = {"ARM" , "Thumb" , "Jazelle", "ThumbEE"};
*/
printk("Flags: %s IRQs o%s FIQs o%s Mode %s ISA %s Segment %s\n",
buf, interrupts_enabled(regs) ? "n" : "ff",
fast_interrupts_enabled(regs) ? "n" : "ff",
processor_modes[processor_mode(regs)],
isa_modes[isa_mode(regs)],
get_fs() == get_ds() ? "kernel" : "user");
/*这里的dac是什么?
*通过cp15得到,transbase, dac and control,这些都是干什么的?
* 这里的transbase和pgd是什么关系?transbase指的是pte的base?
*/
{
unsigned int ctrl;
buf[0] = '\0';
{
unsigned int transbase, dac;
asm("mrc p15, 0, %0, c2, c0\n\t"
"mrc p15, 0, %1, c3, c0\n"
: "=r" (transbase), "=r" (dac));
snprintf(buf, sizeof(buf), " Table: %08x DAC: %08x",
transbase, dac);
}
asm("mrc p15, 0, %0, c1, c0\n" : "=r" (ctrl));
printk("Control: %08x%s\n", ctrl, buf);
}
show_extra_register_data(regs, 128);
}
show_extra_register_data(regs, 128);
static void show_extra_register_data(struct pt_regs *regs, int nbytes)
{
mm_segment_t fs;
fs = get_fs();
set_fs(KERNEL_DS);
show_data(regs->ARM_pc - nbytes, nbytes * 2, "PC");
show_data(regs->ARM_lr - nbytes, nbytes * 2, "LR");
show_data(regs->ARM_sp - nbytes, nbytes * 2, "SP");
show_data(regs->ARM_ip - nbytes, nbytes * 2, "IP");
show_data(regs->ARM_fp - nbytes, nbytes * 2, "FP");
show_data(regs->ARM_r0 - nbytes, nbytes * 2, "R0");
show_data(regs->ARM_r1 - nbytes, nbytes * 2, "R1");
show_data(regs->ARM_r2 - nbytes, nbytes * 2, "R2");
show_data(regs->ARM_r3 - nbytes, nbytes * 2, "R3");
show_data(regs->ARM_r4 - nbytes, nbytes * 2, "R4");
show_data(regs->ARM_r5 - nbytes, nbytes * 2, "R5");
show_data(regs->ARM_r6 - nbytes, nbytes * 2, "R6");
show_data(regs->ARM_r7 - nbytes, nbytes * 2, "R7");
show_data(regs->ARM_r8 - nbytes, nbytes * 2, "R8");
show_data(regs->ARM_r9 - nbytes, nbytes * 2, "R9");
show_data(regs->ARM_r10 - nbytes, nbytes * 2, "R10");
set_fs(fs);
}
/* 注意这里的内容是从cache里得到的,从这里可以判断,cache里的内容或者说CPU执行的内容是否和 DDR中的一致
* DDR中的可以从crash dump 中读出.
* dump a block of kernel memory from around the given address
*/
__die -> dump_mem:
这个函数涉及task_struct, thread_info, sp之间的关系。在文件arch/arm/include/asm/thread_info.h中可看到函数current_thread_info的定义:
static inline struct thread_info *current_thread_info(void)
{
register unsigned long sp asm ("sp");
return (struct thread_info *)(sp & ~(THREAD_SIZE - 1));
}
通过该函数由任意栈指针都可得到栈底,也就是 thread_info的地址。
#define THREAD_SIZE 8192
栈的大小是8K, task_struct结构体的成员stack是栈底,也是对应thread_info结构体的地址。堆栈数据是从
栈底+8K的地方开始向下存的。
如:
SP:0xe6a35f6c, 通过current_thread_info得到 thread_info的地址eval (0xe6a35f6c & 0xffffe000):0xe6a34000
由thread_info得到:#define task_thread_info(task) ((struct thread_info *)(task)->stack)
crash> thread_info 0xe6a34000
struct thread_info {
flags = 0,
preempt_count = 0,
addr_limit = 3204448256,
task = 0xe68dcca0,
}
由 task_struct得到:#define task_stack_page(task) ((task)->stack)
crash> task_struct 0xe68dcca0
struct task_struct {
state = 0,
stack = 0xe6a34000,
}
从上我们可以看出task_struct, thread_info, sp,之间的关系。
搞清了上述关系,dump_mem就是格式化打印从栈顶当前SP之间的内容。
dump_mem(KERN_EMERG, "Stack: ", regs->ARM_sp,
THREAD_SIZE + (unsigned long)task_stack_page(tsk));
__die -> dump_backtrace:
fp是帧指针,跳转一次形成一个帧,每个帧中保留一些临时变量,有关帧指针的说明--。对帧指针做一些判断,看是否能得到backtrace.
如果检查成功,则调用汇编函数c_backtrace,该函数实现在文件arch\arm\lib\backtrace.S中。
static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
{
unsigned int fp, mode;
int ok = 1;
printk("Backtrace: ");
if (regs) {
fp = regs->ARM_fp;
mode = processor_mode(regs);
}
if (!fp) {
printk("no frame pointer");
ok = 0;
} else if (verify_stack(fp)) {
printk("invalid frame pointer 0x%08x", fp);
ok = 0;
} else if (fp < (unsigned long)end_of_stack(tsk))
printk("frame pointer underflow");
if (ok)
c_backtrace(fp, mode);
}
__die -> dump_instr:
根据PC指针和指令mode, 打印出当前执行的指令码。
至此, kernel crash 后的 oops信息全部打印出,后面的是否根据系统的设置重启系统。
这篇关于kernel crash 发生后的那些事(三)的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!