捉虫日记 0008: Cache aliases issue (2)

来自Jack's Lab
跳转到: 导航, 搜索

1 Solution

1.1 保守的

参照 [Documentation/cachetlb.txt] 的指引,和引入 kmap_coherent 的 patch,发现焦点集中在 3 个函数中:

copy_to_user_page()
copy_from_user_page()
copy_user_highpage()


kmap_coherent 就在这三个函数中被调用,且他们都会根据 cpu_has_dc_aliases 的值判断是否用 kmap_coherent,若其值为 0,则这些函数就会走另一条路径:

void copy_user_highpage(struct page *to, struct page *from,
unsigned long vaddr, struct vm_area_struct *vma)
{
    void *vfrom, *vto;
    vto = kmap_atomic(to, KM_USER1);
    if (cpu_has_dc_aliases) {
        vfrom = kmap_coherent(from, vaddr);
        copy_page(vto, vfrom);
        kunmap_coherent(from);
    } else {
    vfrom = kmap_atomic(from, KM_USER0);
      copy_page(vto, vfrom);
      kunmap_atomic(vfrom, KM_USER0);
    }  

    if (((vma->vm_flags & VM_EXEC) && !cpu_has_ic_fills_f_dc) ||
          pages_do_alias((unsigned long)vto, vaddr & PAGE_MASK))
        flush_data_cache_page((unsigned long)vto);
    kunmap_atomic(vto, KM_USER1);
    /* Make sure this page is cleared on other CPU's too before using it *
    smp_wmb();
}

void copy_to_user_page(struct vm_area_struct *vma,
            struct page *page, unsigned long vaddr, void *dst, const void *src,
            unsigned long len)
{
    if (cpu_has_dc_aliases) {
        void *vto = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
        memcpy(vto, src, len);
        kunmap_coherent(page);
    } else
        memcpy(dst, src, len);
    if ((vma->vm_flags & VM_EXEC) && !cpu_has_ic_fills_f_dc)
    flush_cache_page(vma, vaddr, page_to_pfn(page))
}

void copy_from_user_page(struct vm_area_struct *vma,
                struct page *page, unsigned long vaddr, void *dst, const void *src,
                unsigned long len)
{
    if (cpu_has_dc_aliases) {
        void *vfrom =
            kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
        memcpy(dst, vfrom, len);
        kunmap_coherent(page);
    } else
        memcpy(dst, src, len);
}


改进的做法有点流氓 :) 引入了 cpu_use_kmap_coherent,让他们都走 else,然后对 else 中的程序块做了额外的 Cache flush 以消除因 Cache Aliases 引起的数据不一致问题:

--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -207,11 +207,13 @@
void *vfrom, *vto;
vto = kmap_atomic(to, KM_USER1);
- if (cpu_has_dc_aliases) {
+ if (cpu_has_dc_aliases && cpu_use_kmap_coherent) {
vfrom = kmap_coherent(from, vaddr);
copy_page(vto, vfrom);
kunmap_coherent(from);
} else {
vfrom = kmap_atomic(from, KM_USER0);
+ if(pages_do_alias((unsigned long)vfrom, vaddr & PAGE_MASK))
+ flush_cache_page(vma, vaddr, page_to_pfn(from));
copy_page(vto, vfrom);
kunmap_atomic(vfrom, KM_USER0);
@@ -230,13 +231,14 @@
struct page *page, unsigned long vaddr, void *dst, const void *src,
unsigned long len)
{
- if (cpu_has_dc_aliases) {
+ if (cpu_has_dc_aliases && cpu_use_kmap_coherent) {
void *vto = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
memcpy(vto, src, len);
kunmap_coherent(page);
} else
memcpy(dst, src, len);
- if ((vma->vm_flags & VM_EXEC) && !cpu_has_ic_fills_f_dc)
+ if ((vma->vm_flags & VM_EXEC) && !cpu_has_ic_fills_f_dc ||
+ pages_do_alias((unsigned long)dst, vaddr & PAGE_MASK))
flush_cache_page(vma, vaddr, page_to_pfn(page));
}
@@ -246,13 +248,16 @@
struct page *page, unsigned long vaddr, void *dst, const void *src,
unsigned long len)
{
- if (cpu_has_dc_aliases) {
+ if (cpu_has_dc_aliases && cpu_use_kmap_coherent) {
    void *vfrom =
        kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
    memcpy(dst, vfrom, len);
    kunmap_coherent(page);
- } else
+ } else {
+ if(pages_do_alias((unsigned long)src, vaddr & PAGE_MASK))
+ flush_cache_page(vma, vaddr, page_to_pfn(page));
    memcpy(dst, src, len);
+ }
}

EXPORT_SYMBOL(copy_from_user_page);

--- a/include/asm-mips/mach-bcm56218/cpu-feature-overrides.h
+++ b/include/asm-mips/mach-bcm56218/cpu-feature-overrides.h
@@ -11,6 +11,6 @@
#define __ASM_MACH_BCM56218_CPU_FEATURE_OVERRIDES_H
#define cpu_has_llsc 1
-#define cpu_has_dc_aliases 0
+#define cpu_use_kmap_coherent 0


关于如何如此 flush 的原因,可参考这个文档的描述: http://www.jackslab.org/people/comcat/mydoc/mips.cache.arch.pdf


1.2 激进的

此后轻松的盯了 kmap_coherent() 近一天,最后浮出水面的石头几乎让人崩溃,怀疑了很多不该怀疑的,可从来没有怀疑过 broadcom 的这个 MIPS 实现,它的 KSEG2 高端有一段地址空间,居然不经过 TLB,而 kmap_coherent 恰恰是用位于这个区间的地址来做 fixed map 的,经测试在清空所有 TLB 项的情形下,访问这段区域不会出现异常,且始终返回 0 值,感觉就像固定映射到了一个外设的内部 RAM(我称之为 black hole),要命的是 broadcom 没有任何的文档描述。下面这张图是以页为步进单位,多次访问得出:

Bcm-cache-alias.png


FIXADDR_TOP 往下 8 页左右,即是 kmap_coherent 用来作临时固定映射的。知道的这一点,修正的手段也就很容易了,只要把 FIXADDR_TOP 往下移到 Black Hole 区以下即可,最终的修正比较简洁:

--- a/include/asm-mips/fixmap.h.orig
+++ b/include/asm-mips/fixmap.h
@@ -79,6 +79,7 @@
*/
-#if defined(CONFIG_CPU_TX39XX) || defined(CONFIG_CPU_TX49XX)
+#if defined(CONFIG_CPU_TX39XX) || defined(CONFIG_CPU_TX49XX) || defined (CONFIG_BCM5621X)
#define FIXADDR_TOP ((unsigned long)(long)(int)(0xff000000 - 0x20000))
#else
#define FIXADDR_TOP ((unsigned long)(long)(int)0xfffe0000)
#endif









个人工具
名字空间

变换
操作
导航
工具箱