捉虫日记 0016: vsftpd run failed in NetLogic XLS 64bit kernel

来自Jack's Lab
跳转到: 导航, 搜索

1 Phenomenon

环境:

  • NetLogic XLS 416 Reference Board
  • WindRiver Linux 4.2 (kernel 2.6.34)
  • 内核 64bit,文件系统 32bit,O32 ABI


运行 vsftpd 失败,内核出现 call trace:

root@localhost:/root> vsftpd

Unhandled kernel unaligned access[#1]:
Cpu 0
$ 0   : 0000000000000000 000000001000dce0 0000000000000000 ffffff0000000000
$ 4   : 0000000000000010 0000000000d00000 ffffffffffffffff 0000000000003705
$ 8   : ffffffffffffffff 0000000000003fff 0000000000003703 ffffffff83b0d1b8
$12   : ffffffff83b0d198 ffffffff83b0d1a0 ffffffff83b0d170 ffffffff83b0d180
$16   : a80000008dfe7eb0 ffffff0000000000 ffffffff8e050000 000000002aace000
$20   : 000000000000118e 0000000000000000 00000000004d2860 00000000004a0000
$24   : ffffffff83b0d188 ffffffff83b10000
$28   : a80000008dfe4000 a80000008dfe7e80 00000000004c0000 ffffffff834259e0
Hi    : 0000000000000000
Lo    : 000000849be81000
epc   : ffffffff83425d68 do_ade+0x3b0/0x5d8
    Tainted: G        W
ra    : ffffffff834259e0 do_ade+0x28/0x5d8
Status: 1000dce3    KX SX UX KERNEL EXL IE
Cause : 00800010
BadVA : 000000002aace000
PrId  : 000c4402 (XLS416 Rev B0)
Modules linked in:
Process vsftpd (pid: 299, threadinfo=a80000008dfe4000, task=a80000008d4ea558, tls=000000002aad3b20)
Stack : 000000007fdeb088 000000002aace000 000000007fdeb0d8 000000000000118e
        000000007fdeb088 ffffffff83401dc4 0000000000000000 000000001000dce0
        0000000000000000 0000000000000e72 000000002aace000 0000000000001000
        0000000000000001 0000000000000000 000000000000000a 00000000733d5945
        0000000000000000 000000000002aace 0000000000000000 ffffffffc0000008
        ffffffff83515ed0 0000000000430000 000000002aace000 000000007fdeb0d8
        000000000000118e 000000007fdeb088 000000000000118e 0000000000000000
        00000000004d2860 00000000004a0000 000000000042e5c8 000000002ae2b84c
        000000002aad1000 0000000000000000 000000002aebb960 000000007fdeb050
        00000000004c0000 0000000000409b74 000000000000dcf3 00000000001a9e56
        ...
Call Trace:
[<ffffffff83425d68>] do_ade+0x3b0/0x5d8
[<ffffffff83401dc4>] ret_from_exception+0x0/0x10

Code: 00431024  5440ff99  de030100 <8a630000> 9a630003  24020000  1440ffd0  0060982d  08d09709
Disabling lock debugging due to kernel taint
Segmentation fault



2 Analysis

一个 "Unaligned access exception", 出错地址居然是 0x000000002aace000,一个绝对对齐的地址,而且我记得 XLS 和 XLR 一样都是硬件解决非对齐访问的问题的,这就奇了怪了。


不太甘心,于是在非对齐访问异常处理函数 do_ade() 中用 dump_tlb_all() 把当时的 TLB 内容给打印出来:

Index:  0 pgmask=4kb va=0000042e000 asid=64
    [pa=0008aa06000 c=3 d=1 v=1 g=0] [pa=0008e000000 c=3 d=0 v=1 g=0]
Index:  1 pgmask=4kb va=0002ae2a000 asid=64
    [pa=00000000000 c=0 d=0 v=0 g=0] [pa=00083e63000 c=3 d=0 v=1 g=0]
Index:  2 pgmask=4kb va=0007fa90000 asid=64
    [pa=0008ae43000 c=3 d=1 v=1 g=0] [pa=00000000000 c=0 d=0 v=0 g=0]
Index:  3 pgmask=4kb va=0007fa8e000 asid=64
    [pa=0008ae3c000 c=3 d=1 v=1 g=0] [pa=0008b4b3000 c=3 d=1 v=1 g=0]
Index:  4 pgmask=4kb va=00000402000 asid=64
    [pa=0008abc5000 c=3 d=0 v=1 g=0] [pa=0008aa02000 c=3 d=0 v=1 g=0]
Index:  5 pgmask=4kb va=0000040c000 asid=64
    [pa=0008aacf000 c=3 d=0 v=1 g=0] [pa=0008afac000 c=3 d=0 v=1 g=0]
Index:  6 pgmask=4kb va=0002aace000 asid=64
    [pa=0008a037000 c=7 d=0 v=1 g=0] [pa=0008a036000 c=3 d=1 v=1 g=0]
Index:  7 pgmask=4kb va=0002aabe000 asid=64
    [pa=00081f87000 c=3 d=0 v=1 g=0] [pa=00081f74000 c=3 d=0 v=1 g=0]
Index:  8 pgmask=4kb va=00000414000 asid=64
......
......


在一堆 c=3 中,一个 c=7 的项,就显得那么的光明耀眼!弄不明白 c=7 是啥含义,索性查了一下 XLS 的手册,我惊喜地发现 XLS 上 cache coherency attribute bit 是这样实现的:

Coherency attribute of the page
000: Unimplemented — will cause Address Error exception
001: Unimplemented — will cause Address Error exception
010: Uncached
011: Cachable
100: Cachable
101: Cachable
110: Cachable
111: Unimplemented — will cause Address Error exception


我靠,内核把进程 vsftpd 的虚拟地址 0x000000002aace000 对应页的 cache coherency 属性位改成了 0x7,然后只要一访问这个地址就出 "Address Error exception",这个 exception 就是 Unaligned access exception!


由于每次出错的虚拟地址基本固定,不存在随机现象,而且每次改的都是 cca 属性位,这个太确定了,因此不太可能是 TLB 的已有项被覆盖,但为了更确定,还是在 " Unaligned access exception" 的出错处理函数里把此时的 PTE dump 出来了:

--- a/arch/mips/kernel/unaligned.c
+++ b/arch/mips/kernel/unaligned.c
@@ -509,11 +509,49 @@ sigill:
    force_sig(SIGILL, current);
 }
 
+void dump_page_table(struct pt_regs *regs)
+{
+   struct mm_struct *mm =  current->mm;
+   //struct pt_regs *p_regs = current_thread_info()->regs;
+   unsigned long address = regs->cp0_badvaddr;
+
+   pgd_t *pgd;
+   pud_t *pud;
+   pmd_t *pmd;
+   pte_t *pte;
+
+   printk("<0> Dumped the vaddr 0x%016lx PTE of process %s\n", address, current->comm);
+
+   pgd = pgd_offset(mm, address);
+   if (!pgd_none(*pgd)) {
+       pud = pud_offset(pgd, address);
+       if (!pud_none(*pud)) {
+           pmd = pmd_offset(pud, address);
+           if (!pmd_none(*pmd)) {
+               pte = pte_offset_map(pmd, address);
+               printk("<0> vaddr 0x%016lx, PTE: 0x%016llx, TLB Entry: 0x%016llx, PA = 0x%016llx, cca = 0x%llx\n",
+                   address,
+                   pte_val(*pte),
+                   pte_val(*pte) >> _PAGE_GLOBAL_SHIFT,
+                   (pte_val(*pte) >> (_PAGE_GLOBAL_SHIFT + 6)) << PAGE_SHIFT,      
+                   (pte_val(*pte) >> (_PAGE_GLOBAL_SHIFT + 3)) & 0x7);     
+
+           }
+       }
+   }
+}
+
+extern void dump_tlb_all(void);
+
 asmlinkage void do_ade(struct pt_regs *regs)
 {
    unsigned int __user *pc;
    mm_segment_t seg;
 
+   dump_page_table(regs);
+
+   dump_tlb_all();
+
    trace_trap_entry(regs, CAUSE_EXCCODE(regs->cp0_cause));
 
    /*


运行一下,得到:

root@localhost:/root> vsftpd
 Dumped the vaddr 0x000000002aace000 PTE of process vsftpd
 vaddr 0x000000002aace000, PTE: 0x0000000046612f5b, TLB Entry: 0x000000000233097a, PA = 0x000000008cc25000, cca = 0x7
 Dumped the vaddr 0x000000002aace000 PTE of process vsftpd
 vaddr 0x000000002aace000, PTE: 0x0000000046612f5b, TLB Entry: 0x000000000233097a, PA = 0x000000008cc25000, cca = 0x7
Unhandled kernel unaligned access[#1]:
Cpu 0
$ 0   : 0000000000000000 000000001000dce0 0000000000000000 ffffff0000000000
$ 4   : 0000000000000010 0000000000d00000 ffffffffffffffff 0000000000003705
$ 8   : ffffffffffffffff 0000000000003fff 0000000000003703 ffffffff83b0d1b8
$12   : ffffffff83b0d198 ffffffff83b0d1a0 ffffffff83b0d170 ffffffff83b0d180
$16   : a80000008dfe7eb0 ffffff0000000000 ffffffff8e050000 000000002aace000
$20   : 000000000000118e 0000000000000000 00000000004d2860 00000000004a0000
$24   : ffffffff83b0d188 ffffffff83b10000
$28   : a80000008dfe4000 a80000008dfe7e80 00000000004c0000 ffffffff834259e0
Hi    : 0000000000000000
Lo    : 000000849be81000
epc   : ffffffff83425d68 do_ade+0x3b0/0x5d8
    Tainted: G        W
ra    : ffffffff834259e0 do_ade+0x28/0x5d8
Status: 1000dce3    KX SX UX KERNEL EXL IE
Cause : 00800010
BadVA : 000000002aace000
PrId  : 000c4402 (XLS416 Rev B0)
Modules linked in:
Process vsftpd (pid: 299, threadinfo=a80000008dfe4000, task=a80000008d4ea558, tls=000000002aad3b20)
Stack : 000000007fdeb088 000000002aace000 000000007fdeb0d8 000000000000118e
        000000007fdeb088 ffffffff83401dc4 0000000000000000 000000001000dce0
        0000000000000000 0000000000000e72 000000002aace000 0000000000001000
        0000000000000001 0000000000000000 000000000000000a 00000000733d5945
        0000000000000000 000000000002aace 0000000000000000 ffffffffc0000008
        ffffffff83515ed0 0000000000430000 000000002aace000 000000007fdeb0d8
        000000000000118e 000000007fdeb088 000000000000118e 0000000000000000
        00000000004d2860 00000000004a0000 000000000042e5c8 000000002ae2b84c
        000000002aad1000 0000000000000000 000000002aebb960 000000007fdeb050
        00000000004c0000 0000000000409b74 000000000000dcf3 00000000001a9e56
        ...
Call Trace:
[<ffffffff83425d68>] do_ade+0x3b0/0x5d8
[<ffffffff83401dc4>] ret_from_exception+0x0/0x10


Code: 00431024  5440ff99  de030100 <8a630000> 9a630003  24020000  1440ffd0  0060982d  08d09709
Disabling lock debugging due to kernel taint
Segmentation fault


很显然是 PTE 被改写了,自然地将问题定位在该虚拟地址对应的 PTE 被改写

于是给这个常常固定被改写的虚拟地址加了一个 watch point:(其原理请参考:http://hi.baidu.com/comcat/blog/item/d2a9682492f70e378644f903.html)

--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -1024,6 +1024,7 @@ asmlinkage void do_mdmx(struct pt_regs *regs)
  */
 asmlinkage void do_watch(struct pt_regs *regs)
 {
+#if 0
     u32 cause;
 
     /*
@@ -1047,6 +1048,11 @@ asmlinkage void do_watch(struct pt_regs *regs)
         mips_clear_watch_registers();
         local_irq_enable();
     }
+#else
+    dump_tlb_all();
+    show_registers(regs);
+    compute_return_epc(regs);
+#endif
 }
 
 asmlinkage void do_mcheck(struct pt_regs *regs)
--- a/arch/mips/kernel/unaligned.c
+++ b/arch/mips/kernel/unaligned.c
@@ -509,6 +509,8 @@ sigill:
     force_sig(SIGILL, current);
 }
 
+extern void dump_tlb_all(void);
+
 asmlinkage void do_ade(struct pt_regs *regs)
 {
     unsigned int __user *pc;
@@ -538,6 +540,8 @@ asmlinkage void do_ade(struct pt_regs *regs)
     seg = get_fs();
     if (!user_mode(regs))
         set_fs(KERNEL_DS);
+    dump_tlb_all();
+    printk("<0> cp0_badvaddr = 0x%016x\n", (unsigned int)regs->cp0_badvaddr);
     emulate_load_store_insn(regs, (void __user *)regs->cp0_badvaddr, pc);
     set_fs(seg);
 
--- a/init/main.c
+++ b/init/main.c
@@ -703,6 +703,12 @@ asmlinkage void __init start_kernel(void)
 
     ftrace_init();
 
+    unsigned long cat_wlo1 = 0x000000002aace003;
+    unsigned long cat_whi1 = 0x40000000;
+
+    __write_64bit_c0_register($18, 0, cat_wlo1);
+    __write_32bit_c0_register($19, 0, cat_whi1);
+
     /* Do the rest non-__init'ed, we're now alive */
     rest_init();
 }


排除一些干扰后得到如下的一个信息:

Got watch at ffffffff8350dbe0
Cpu 0
$ 0   : 0000000000000000 0000000000100000 0000000000000000 0000000000000001
$ 4   : 0000000040000000 000000008d542000 6db6db6db6db6db7 0000000000000000
$ 8   : 0000000000010000 ffffffff83c2c780 000000000000002f 0000000000000002
$12   : 000000001000dce1 000000001000001e ffffffff83515fc0 000000007ffbb6c8
$16   : a80000008d510000 0000000046dd7397 000000002aace000 a80000008df6eb80
$20   : a80000008d691f80 a80000008d542670 a800000081f00e10 a800000081eea680
$24   : fffffffff0000000 000000002ae2b84c 
$28   : a80000008df68000 a80000008df6bd40 ffffffff83c30000 ffffffff8350dbc4
Hi    : 00000000001a7fc6
Lo    : 000000000008d542
epc   : ffffffff8350dbe0 handle_mm_fault+0x898/0xff0
    Tainted: G        W
ra    : ffffffff8350dbc4 handle_mm_fault+0x87c/0xff0
Status: 1000dce3    KX SX UX KERNEL EXL IE
Cause : 8080005c
PrId  : 000c4402 (XLS416 Rev B0)
Modules linked in:
Process vsftpd (pid: 303, threadinfo=a80000008df68000, task=a80000008d6c3448, tls=000000002aad3b20)
Stack : a80000008d542690 000000002aad2000 0000000000000670 0000000000100070
        ffffffffffffff18 ffffffff8340edf8 000000001000dce1 ffffffff8340e8e8
        a80000008df6beb0 000000002aace000 a80000008d6c3448 a80000008d691f80
        0000000000000001 a80000008df6eb80 a80000008d691fe0 0000000000000001
        0000000000000003 ffffffff8342fe18 0000000000030002 ffffffff837faaf0
        a80000008d691fe4 000000001000dce1 000000002aad2000 0000000000000000
        0000000000000000 000000002aad2000 fffffffffffffff8 0000000000000000
        000000002aad1000 ffffffff8340e8e8 00000000004c0000 ffffffff835161c8
        a80000008d53fe30 0000000000000000 0000000000000000 ffffffff8340e8e8
        0000000000001000 000000007ffbb928 0000000000000e72 0000000000004000
        ...
Call Trace:
[<ffffffff8350dbe0>] handle_mm_fault+0x898/0xff0
[<ffffffff8342fe18>] do_page_fault+0x1c0/0x450
[<ffffffff83401dc4>] ret_from_exception+0x0/0x10


则确定是在 handle_mm_fault() 中重写了原来地址的 cache coherency 属性,于是集中所有的精力来定位。可能是内核更新 PTE 的属性位比较频繁,此时 watch point 已经不能帮助我们精确定位修改时的上下文,因此改由从正面突破


大概看了一下 handle_mm_fault() 里对 PTE 的操作,最终基本都是通过 set_pte() 和 pfn_pte() 这两个宏来写 PTE,于是加了一个判断检测:

--- a/arch/mips/include/asm/pgtable-64.h
+++ b/arch/mips/include/asm/pgtable-64.h
@@ -222,7 +222,22 @@ static inline void pud_clear(pud_t *pudp)
 #define pfn_pte(pfn, prot) __pte(((pfn) << (PAGE_SHIFT + 2)) | pgprot_val(prot))
 #else
 #define pte_pfn(x)     ((unsigned long)((x).pte >> _PFN_SHIFT))
-#define pfn_pte(pfn, prot) __pte(((pfn) << _PFN_SHIFT) | pgprot_val(prot))
+//#define pfn_pte(pfn, prot)   __pte(((pfn) << _PFN_SHIFT) | pgprot_val(prot))
+extern void dump_tlb_all(void);
+extern void dump_stack(void);
+static inline pte_t pfn_pte_func(unsigned long pfn, pgprot_t prot)
+{
+   if ((((pgprot_val(prot) >> _CACHE_SHIFT) & 7)== 7))
+   {
+       printk("<0> ------------- Notice: PTE cache coherent bit equals 7 !! -----------------\n");
+       printk("<0> prot = 0x%lx\n", pgprot_val(prot));
+       dump_tlb_all();
+       dump_stack();
+   }
+   return __pte(((pfn) << _PFN_SHIFT) | pgprot_val(prot));
+}
+
+#define pfn_pte(pfn, prot) pfn_pte_func(pfn, prot)
 #endif

 #define __pgd_offset(address)  pgd_index(address)
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -146,6 +146,15 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *pt
 static inline void set_pte(pte_t *ptep, pte_t pteval)
 {
    *ptep = pteval;
+
+   if (((pte_val(pteval) >> (_PAGE_GLOBAL_SHIFT + 3)) & 0x7) == 0x7)
+   {
+       printk("<0>------------- Notice: PTE cache coherent bit equals 7 !! -----------------\n");
+       printk("<0>PTE: 0x%016llx, TLB entry: 0x%016llx, PA = 0x%016llx\n",
+           pte_val(pteval), pte_val(pteval) >> _PAGE_GLOBAL_SHIFT,
+           (pte_val(pteval) >> (_PAGE_GLOBAL_SHIFT + 6)) << PAGE_SHIFT);      
+       dump_stack();
+   }
 #if !defined(CONFIG_CPU_R3000) && !defined(CONFIG_CPU_TX39XX)
    if (pte_val(pteval) & _PAGE_GLOBAL) {
        pte_t *buddy = ptep_buddy(ptep);


得到如下有效信息:

------------- Notice: PTE cache coherent bit equals 7 !! -----------------
PTE: 0x0000000046df3f19, TLB entry: 0x000000000236f9f8, PA = 0x000000008dbe7000
Call Trace:
[<ffffffff8340a4a0>] dump_stack+0x8/0x34
[<ffffffff835165cc>] mprotect_fixup+0x51c/0x550
[<ffffffff83516784>] SyS_mprotect+0x184/0x240
[<ffffffff8340344c>] handle_sys+0x16c/0x188

------------- Notice: PTE cache coherent bit equals 7 !! -----------------
PTE: 0x0000000046df3f1b, TLB entry: 0x000000000236f9f8, PA = 0x000000008dbe7000
Call Trace:
[<ffffffff8340a4a0>] dump_stack+0x8/0x34
[<ffffffff835165cc>] mprotect_fixup+0x51c/0x550
[<ffffffff83516784>] SyS_mprotect+0x184/0x240
[<ffffffff8340344c>] handle_sys+0x16c/0x188


因此修改 cca 属性位的操作肯定出现在 mprotect_fixup() 这个函数中,很快,修改 PTE 之 cca 属性位的调用链就出来了:

 mprotect_fixup() ---> change_protection() ---> change_pud_range() ---> change_pmd_range() ---> change_pte_range() 


可以看见,change_pte_range() 中,是通过这一句,修改了 PTE 的 prot 属性位:

 ptent = pte_modify(ptent, newprot);


于是问题就变成了:什么时候 newprot 被改了?


往上回溯,newprot 是 mprotect_fixup() 中调 change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable) 时,由 vma->vm_page_prot 传入,因此密切关注 vma->vm_page_prot 这个变量值的演变,在 mprotect_fixup() 中有关 vma->vm_page_prot 的前后加了一些 printk 后,得到其新值来自于 vm_get_page_prot() 这个函数:

pgprot_t vm_get_page_prot(unsigned long vm_flags)
{
        return __pgprot(pgprot_val(protection_map[vm_flags &
                                (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
                        pgprot_val(arch_vm_get_page_prot(vm_flags)));
}                


其值来自于 protection_map[] 这个矩阵(一个权限属性位映射的矩阵),其定义于 mm/mmap.c

pgprot_t protection_map[16] = {
        __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
        __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
};


__Pxxx, __Sxxx 都是他的初始值,真正被初始化是在 arch/mips/mm/cache.c 中的 setup_protection_map() 完成的:

static inline void setup_protection_map(void)
{
        if (kernel_uses_smartmips_rixi) {
                protection_map[0]  = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
                protection_map[1]  = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC);
                protection_map[2]  = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
                protection_map[3]  = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC);
                protection_map[4]  = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_READ);
                protection_map[5]  = __pgprot(_page_cachable_default | _PAGE_PRESENT);
                protection_map[6]  = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_READ);
                protection_map[7]  = __pgprot(_page_cachable_default | _PAGE_PRESENT);

                protection_map[8]  = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
                protection_map[9]  = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC);
                protection_map[10] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE | _PAGE_NO_READ);
                protection_map[11] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE);
                protection_map[12] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_READ);
                protection_map[13] = __pgprot(_page_cachable_default | _PAGE_PRESENT);
                protection_map[14] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_WRITE  | _PAGE_NO_READ);
                protection_map[15] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_WRITE);

        } else {
                protection_map[0] = PAGE_NONE;
                protection_map[1] = PAGE_READONLY;
                protection_map[2] = PAGE_COPY;
                protection_map[3] = PAGE_COPY;
                protection_map[4] = PAGE_READONLY;
                protection_map[5] = PAGE_READONLY;
                protection_map[6] = PAGE_COPY;
                protection_map[7] = PAGE_COPY;
                protection_map[8] = PAGE_NONE;
                protection_map[9] = PAGE_READONLY;
                protection_map[10] = PAGE_SHARED;
                protection_map[11] = PAGE_SHARED;
                protection_map[12] = PAGE_READONLY;
                protection_map[13] = PAGE_READONLY;
                protection_map[14] = PAGE_SHARED;
                protection_map[15] = PAGE_SHARED;
        }
}


映射是使用了 PAGE_NONE 这个宏,进入定义 PAGE_NONE 的一看:

 #define PAGE_NONE       __pgprot(_PAGE_PRESENT | _CACHE_CACHABLE_NONCOHERENT)

则  _CACHE_CACHABLE_NONCOHERENT 定义于 arch/mips/include/asm/pgtable-bits.h

#elif defined(CONFIG_CPU_PHOENIX)
#define _CACHE_UNCACHED             (2<<9)
#define _CACHE_CACHABLE_COW         (3<<9)
#define _CACHE_CACHABLE_NONCOHERENT (3<<9)

写死了偏移是 9,这个在 CONFIG_HUGETLB_PAGE 关闭时,其值不是 9,造成 _CACHE_CACHABLE_NONCOHERENT 定义错误!



3 Solution

找到了根原因,修正起来就很容易:

Subject: [PATCH] nlm_xls_atx_64_be: fixed the definition of cache coherency attribute bit

SDK 1.7 use the fixed offset value in the three cache attributs macro
It's not correct when we disable the HUGETLB_PAGE.

The _CACHE_CACHABLE_NONCOHERENT affect the PAGE_NONE macro in protection_map[]
which is the mapping table of vm protection attributes and hardware protection
attributes. Kernel would get the incorrect attribute bits when using the table.

It can make the cache attributs to 0x7 when kernel modify the protection bits
by mprotect() system call. But 0x7 is illegal in XLS/XLR platform and it will
cause Address Error Exception:

Unhandled kernel unaligned access[#1]:
Cpu 0
$ 0   : 0000000000000000 000000001000dce0 0000000000000000 ffffff0000000000
......
......
$28   : a80000008d614000 a80000008d617e80 00000000004c0000 ffffffff83401dc4
Hi    : 0000000000000000
Lo    : 00000082d126a800
epc   : ffffffff83425c28 do_ade+0x3a8/0x5d0
    Tainted: G        W
ra    : ffffffff83401dc4 ret_from_exception+0x0/0x10
Status: 1000dce3    KX SX UX KERNEL EXL IE
Cause : 00800010
BadVA : 000000002aace000
PrId  : 000c4402 (XLS416 Rev B0)
Modules linked in:
Process vsftpd (pid: 298, threadinfo=a80000008d614000, task=a80000008df86890...)
Stack : 000000007fdc8498 000000002aace000 000000007fdc84e8 000000000000118e
        000000007fdc8498 ffffffff83401dc4 0000000000000000 000000001000dce0
        ...
        ...
Call Trace:
[<ffffffff83425c28>] do_ade+0x3a8/0x5d0
[<ffffffff83401dc4>] ret_from_exception+0x0/0x10

Signed-off-by: Jack Tan <jiankemeng@gmail.com>
---
 arch/mips/include/asm/pgtable-bits.h |    6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/mips/include/asm/pgtable-bits.h b/arch/mips/include/asm/pgtable-bits.h
index cccecd0..fdd3784 100644
--- a/arch/mips/include/asm/pgtable-bits.h
+++ b/arch/mips/include/asm/pgtable-bits.h
@@ -194,9 +194,9 @@ static inline uint64_t pte_to_entrylo(unsigned long pte_val)
 
 #elif defined(CONFIG_CPU_PHOENIX)
 
-#define _CACHE_UNCACHED             (2<<9)
-#define _CACHE_CACHABLE_COW         (3<<9)
-#define _CACHE_CACHABLE_NONCOHERENT (3<<9)
+#define _CACHE_UNCACHED             (2<<_CACHE_SHIFT)
+#define _CACHE_CACHABLE_COW         (3<<_CACHE_SHIFT)
+#define _CACHE_CACHABLE_NONCOHERENT (3<<_CACHE_SHIFT)
 
 #elif defined(CONFIG_CPU_RM9000)
 
-- 
1.7.4.1



















个人工具
名字空间

变换
操作
导航
工具箱