linux内存管理之sys_brk实现分析【二】

2023-12-31 12:50

本文主要是介绍linux内存管理之sys_brk实现分析【二】,希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!

4       sbrk()系统调用代码分析

// sbrk:用来扩大或者缩小进程的数据段边界,brk为新的数据段边界,其函数实现在文件/mm/mmap.c中。

函数原型如下:

[cpp] view plain copy print ?
  1. SYSCALL_DEFINE1(brk, unsigned long, brk)  
  2.   
  3. {  
  4.   
  5.        unsigned long rlim, retval;  
  6.   
  7.        unsigned long newbrk, oldbrk;  
  8.   
  9.        struct mm_struct *mm = current->mm;  
  10.   
  11.        unsigned long min_brk;  
  12.   
  13. //写信号量获取操作, 得到读写信号量sem, 将直接将文件映射到内存   
  14.   
  15.        down_write(&mm->mmap_sem);  
  16.   
  17.    
  18.   
  19. #ifdef CONFIG_COMPAT_BRK   
  20.   
  21.        min_brk = mm->end_code;  
  22.   
  23. #else   
  24.   
  25.        min_brk = mm->start_brk;  
  26.   
  27. #endif   
  28.   
  29.        if (brk < min_brk)  
  30.   
  31.               goto out;  
  32.   
  33.        /* 
  34.  
  35.         * Check against rlimit here. If this check is done later after the test 
  36.  
  37.         * of oldbrk with newbrk then it can escape the test and let the data 
  38.  
  39.         * segment grow beyond its set limit the in case where the limit is 
  40.  
  41.         * not page aligned -Ram Gupta 
  42.  
  43.         */  
  44.   
  45. //参数有效性判断。   
  46.   
  47. //代码段非法访问   
  48.   
  49.        rlim = rlimit(RLIMIT_DATA);             //RLIMIT_DATA以字节表示的data()段限制   
  50.   
  51.        if (rlim < RLIM_INFINITY && (brk - mm->start_brk) +  
  52.   
  53.                      (mm->end_data - mm->start_data) > rlim)  
  54.   
  55.               goto out;  
  56.   
  57. //页框对齐   
  58.   
  59.        newbrk = PAGE_ALIGN(brk);  
  60.   
  61.        oldbrk = PAGE_ALIGN(mm->brk);  
  62.   
  63.        if (oldbrk == newbrk)  
  64.   
  65.               goto set_brk;  
  66.   
  67.    
  68.   
  69.        /* Always allow shrinking brk. */  
  70.   
  71. //如果新边界比现在的边界要小,那说明要执行收缩操作   
  72.   
  73. //缩短堆   
  74.   
  75.        if (brk <= mm->brk) {  
  76.   
  77.               if (!do_munmap(mm, newbrk, oldbrk-newbrk))  
  78.   
  79.                      goto set_brk;  
  80.   
  81.               goto out;  
  82.   
  83.        }  
  84.   
  85.    
  86.   
  87.        /* Check against existing mmap mappings. */  
  88.   
  89. //伸展空间已经有映射了   
  90.   
  91.        if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))  
  92.   
  93.               goto out;  
  94.   
  95.    
  96.   
  97.        /* Ok, looks good - let it rip. */  
  98.   
  99.        //执行伸长操作   
  100.   
  101.        if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)  
  102.   
  103.               goto out;  
  104.   
  105. set_brk:  
  106.   
  107.        mm->brk = brk;  
  108.   
  109. out:  
  110.   
  111.        retval = mm->brk;  
  112.   
  113.        up_write(&mm->mmap_sem);  
  114.   
  115.        return retval;  
  116.   
  117. }  
SYSCALL_DEFINE1(brk, unsigned long, brk)
{
unsigned long rlim, retval;
unsigned long newbrk, oldbrk;
struct mm_struct *mm = current->mm;
unsigned long min_brk;
//写信号量获取操作, 得到读写信号量sem, 将直接将文件映射到内存
down_write(&mm->mmap_sem);
#ifdef CONFIG_COMPAT_BRK
min_brk = mm->end_code;
#else
min_brk = mm->start_brk;
#endif
if (brk < min_brk)
goto out;
/*
* Check against rlimit here. If this check is done later after the test
* of oldbrk with newbrk then it can escape the test and let the data
* segment grow beyond its set limit the in case where the limit is
* not page aligned -Ram Gupta
*/
//参数有效性判断。
//代码段非法访问
rlim = rlimit(RLIMIT_DATA);             //RLIMIT_DATA以字节表示的data()段限制
if (rlim < RLIM_INFINITY && (brk - mm->start_brk) +
(mm->end_data - mm->start_data) > rlim)
goto out;
//页框对齐
newbrk = PAGE_ALIGN(brk);
oldbrk = PAGE_ALIGN(mm->brk);
if (oldbrk == newbrk)
goto set_brk;
/* Always allow shrinking brk. */
//如果新边界比现在的边界要小,那说明要执行收缩操作
//缩短堆
if (brk <= mm->brk) {
if (!do_munmap(mm, newbrk, oldbrk-newbrk))
goto set_brk;
goto out;
}
/* Check against existing mmap mappings. */
//伸展空间已经有映射了
if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
goto out;
/* Ok, looks good - let it rip. */
//执行伸长操作
if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
goto out;
set_brk:
mm->brk = brk;
out:
retval = mm->brk;
up_write(&mm->mmap_sem);
return retval;
}


 

由于这个函数既可以用来分配空间,即把动态分配区地步的边界往上推;也可以用来释放,即归还空间。因此,它的代码也大致可以分为两部分。首先是第一部分:收缩数据区,伸长操作。我们分为两种情况来分析。

4.1   用户空间的收缩

4.1.1   do_munmap

从上面的代码我们可以看出。用户空间的收缩操作相应的接口是:do_munmap()。代码如下:

[cpp] view plain copy print ?
  1. /* Munmap is split into 2 main parts -- this part which finds 
  2.  
  3.  * what needs doing, and the areas themselves, which do the 
  4.  
  5.  * work.  This now handles partial unmappings. 
  6.  
  7.  * Jeremy Fitzhardinge <jeremy@goop.org> 
  8.  
  9.  */  
  10.   
  11. int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)  
  12.   
  13. {  
  14.   
  15.        unsigned long end;  
  16.   
  17.        struct vm_area_struct *vma, *prev, *last;  
  18.   
  19.        if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)  
  20.   
  21.               return -EINVAL;  
  22.   
  23.        if ((len = PAGE_ALIGN(len)) == 0)  
  24.   
  25.               return -EINVAL;  
  26.   
  27.        /* Find the first overlapping VMA */  
  28.   
  29. //找到第一个结束地址大于start的VMA。Prev是前一个VMA   
  30.   
  31.        vma = find_vma_prev(mm, start, &prev);  
  32.   
  33.        if (!vma)  
  34.   
  35.               return 0;  
  36.   
  37.        /* we have  start < vma->vm_end  */  
  38.   
  39.        /* if it doesn't overlap, we have nothing.. */  
  40.   
  41. //现在的堆尾点不可能落在空洞里   
  42.   
  43.      //start:新的边界地址。Len:收缩的长度。Start+len即为旧的边界地址。   
  44.   
  45.      //所以 start+len肯定是属于进程的线性地址   
  46.   
  47.        end = start + len;  
  48.   
  49.        if (vma->vm_start >= end)  
  50.   
  51.               return 0;  
  52.   
  53.        /* 
  54.  
  55.         * If we need to split any vma, do it now to save pain later. 
  56.  
  57.         * Note: mremap's move_vma VM_ACCOUNT handling assumes a partially 
  58.  
  59.         * unmapped vm_area_struct will remain in use: so lower split_vma 
  60.  
  61.         * places tmp vma above, and higher split_vma places tmp vma below. 
  62.  
  63.         */  
  64.   
  65. //如果start大于mpnt的起始地址,就会把mpnt一分为二   
  66.   
  67.        if (start > vma->vm_start) {  
  68.   
  69.               int error;  
  70.   
  71.               /* 
  72.  
  73.                * Make sure that map_count on return from munmap() will 
  74.  
  75.                * not exceed its limit; but let map_count go just above 
  76.  
  77.                * its limit temporarily, to help free resources as expected. 
  78.  
  79.                */  
  80.   
  81.               if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)  
  82.   
  83.                      return -ENOMEM;  
  84.   
  85.    
  86.   
  87.               error = __split_vma(mm, vma, start, 0);  
  88.   
  89.               if (error)  
  90.   
  91.                      return error;  
  92.   
  93.               prev = vma;  
  94.   
  95.        }  
  96.   
  97.        /* Does it split the last one? */  
  98.   
  99. //找到最后的一个vma   
  100.   
  101.        last = find_vma(mm, end);  
  102.   
  103. //把最后一个线性区一分为二的情况   
  104.   
  105.        if (last && end > last->vm_start) {  
  106.   
  107.               int error = __split_vma(mm, last, end, 1);  
  108.   
  109.               if (error)  
  110.   
  111.                      return error;  
  112.   
  113.        }  
  114.   
  115.        vma = prev? prev->vm_next: mm->mmap;  
  116.   
  117.        /* 
  118.  
  119.         * unlock any mlock()ed ranges before detaching vmas 
  120.  
  121.         */  
  122.   
  123.        if (mm->locked_vm) {  
  124.   
  125.               struct vm_area_struct *tmp = vma;  
  126.   
  127.               while (tmp && tmp->vm_start < end) {  
  128.   
  129.                      if (tmp->vm_flags & VM_LOCKED) {  
  130.   
  131.                             mm->locked_vm -= vma_pages(tmp);  
  132.   
  133.                             munlock_vma_pages_all(tmp);  
  134.   
  135.                      }  
  136.   
  137.                      tmp = tmp->vm_next;  
  138.   
  139.               }  
  140.   
  141.        }  
  142.   
  143.    
  144.   
  145.        /* 
  146.  
  147.         * Remove the vma's, and unmap the actual pages 
  148.  
  149.         */  
  150.   
  151. //将mpnt对的区间vma从进程描述符组中删除   
  152.   
  153.        detach_vmas_to_be_unmapped(mm, vma, prev, end);  
  154.   
  155. //更新页表项,释放页框   
  156.   
  157.        unmap_region(mm, vma, prev, start, end);  
  158.   
  159.    
  160.   
  161.        /* Fix up all other VM information */  
  162.   
  163. //到现在为止,所有要释放的vma都挂在mpnt上。remove_vma_list为对要删除的vma链的处理   
  164.   
  165.        remove_vma_list(mm, vma);  
  166.   
  167.    
  168.   
  169.        return 0;  
  170.   
  171. }  
/* Munmap is split into 2 main parts -- this part which finds
* what needs doing, and the areas themselves, which do the
* work.  This now handles partial unmappings.
* Jeremy Fitzhardinge <jeremy@goop.org>
*/
int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
{
unsigned long end;
struct vm_area_struct *vma, *prev, *last;
if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
return -EINVAL;
if ((len = PAGE_ALIGN(len)) == 0)
return -EINVAL;
/* Find the first overlapping VMA */
//找到第一个结束地址大于start的VMA。Prev是前一个VMA
vma = find_vma_prev(mm, start, &prev);
if (!vma)
return 0;
/* we have  start < vma->vm_end  */
/* if it doesn't overlap, we have nothing.. */
//现在的堆尾点不可能落在空洞里
//start:新的边界地址。Len:收缩的长度。Start+len即为旧的边界地址。
//所以 start+len肯定是属于进程的线性地址
end = start + len;
if (vma->vm_start >= end)
return 0;
/*
* If we need to split any vma, do it now to save pain later.
* Note: mremap's move_vma VM_ACCOUNT handling assumes a partially
* unmapped vm_area_struct will remain in use: so lower split_vma
* places tmp vma above, and higher split_vma places tmp vma below.
*/
//如果start大于mpnt的起始地址,就会把mpnt一分为二
if (start > vma->vm_start) {
int error;
/*
* Make sure that map_count on return from munmap() will
* not exceed its limit; but let map_count go just above
* its limit temporarily, to help free resources as expected.
*/
if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
return -ENOMEM;
error = __split_vma(mm, vma, start, 0);
if (error)
return error;
prev = vma;
}
/* Does it split the last one? */
//找到最后的一个vma
last = find_vma(mm, end);
//把最后一个线性区一分为二的情况
if (last && end > last->vm_start) {
int error = __split_vma(mm, last, end, 1);
if (error)
return error;
}
vma = prev? prev->vm_next: mm->mmap;
/*
* unlock any mlock()ed ranges before detaching vmas
*/
if (mm->locked_vm) {
struct vm_area_struct *tmp = vma;
while (tmp && tmp->vm_start < end) {
if (tmp->vm_flags & VM_LOCKED) {
mm->locked_vm -= vma_pages(tmp);
munlock_vma_pages_all(tmp);
}
tmp = tmp->vm_next;
}
}
/*
* Remove the vma's, and unmap the actual pages
*/
//将mpnt对的区间vma从进程描述符组中删除
detach_vmas_to_be_unmapped(mm, vma, prev, end);
//更新页表项,释放页框
unmap_region(mm, vma, prev, start, end);
/* Fix up all other VM information */
//到现在为止,所有要释放的vma都挂在mpnt上。remove_vma_list为对要删除的vma链的处理
remove_vma_list(mm, vma);
return 0;
}
4.1.1.1      __split_vma

为了弄清楚收缩的整个过程,有必要详细的分析一下函数所调用的各个子函数。

__split_vma:将一个vma劈为成两个:

//参数含义:

//mm:进程的内存描述符 vma:要劈分的vma addr:为界线地址 new_below:为0时,vma为下一半 为1时,//vma为上一半

[cpp] view plain copy print ?
  1. /* 
  2.  
  3.  * __split_vma() bypasses sysctl_max_map_count checking.  We use this on the 
  4.  
  5.  * munmap path where it doesn't make sense to fail. 
  6.  
  7.  */  
  8.   
  9. static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,  
  10.   
  11.              unsigned long addr, int new_below)  
  12.   
  13. {  
  14.   
  15.        struct mempolicy *pol;  
  16.   
  17.        struct vm_area_struct *new;  
  18.   
  19.        int err = -ENOMEM;  
  20.   
  21. //如果进程的vma总数超过了限制值 zlh   
  22.   
  23.        if (is_vm_hugetlb_page(vma) && (addr &  
  24.   
  25.                                    ~(huge_page_mask(hstate_vma(vma)))))  
  26.   
  27.               return -EINVAL;  
  28.   
  29. //新申请一个vma   
  30.   
  31.        new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);  
  32.   
  33.        if (!new)  
  34.   
  35.               goto out_err;  
  36.   
  37.          //将新的vma赋值为旧的vma,使其两者相等   
  38.   
  39.        /* most fields are the same, copy all, and then fixup */  
  40.   
  41.        *new = *vma;  
  42.   
  43.    
  44.   
  45.        INIT_LIST_HEAD(&new->anon_vma_chain);  
  46.   
  47. //new_below为1的时候,vma为上一半,对应的new为下一半   
  48.   
  49.        if (new_below)  
  50.   
  51.               new->vm_end = addr;  
  52.   
  53.        else {  
  54.   
  55.               new->vm_start = addr;  
  56.   
  57.               new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);  
  58.   
  59.        }  
  60.   
  61.    
  62.   
  63.        pol = mpol_dup(vma_policy(vma));  
  64.   
  65.        if (IS_ERR(pol)) {  
  66.   
  67.               err = PTR_ERR(pol);  
  68.   
  69.               goto out_free_vma;  
  70.   
  71.        }  
  72.   
  73.        vma_set_policy(new, pol);  
  74.   
  75.    
  76.   
  77.        if (anon_vma_clone(new, vma))  
  78.   
  79.               goto out_free_mpol;  
  80.   
  81.    
  82.   
  83.        if (new->vm_file) {  
  84.   
  85.               get_file(new->vm_file);  
  86.   
  87.               if (vma->vm_flags & VM_EXECUTABLE)  
  88.   
  89.                      added_exe_file_vma(mm);  
  90.   
  91.        }  
  92.   
  93. //如果定义了open操作   
  94.   
  95.        if (new->vm_ops && new->vm_ops->open)  
  96.   
  97.               new->vm_ops->open(new);  
  98.   
  99. //经过前面的初始化之后,再由vma_adjust调整vma的边界   
  100.   
  101.        if (new_below)  
  102.   
  103.               err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +  
  104.   
  105.                      ((addr - new->vm_start) >> PAGE_SHIFT), new);  
  106.   
  107.        else  
  108.   
  109.               err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);  
  110.   
  111.    
  112.   
  113.        /* Success. */  
  114.   
  115.        if (!err)  
  116.   
  117.               return 0;  
  118.   
  119.    
  120.   
  121.        /* Clean everything up if vma_adjust failed. */  
  122.   
  123. //如果调整失败,清理工作   
  124.   
  125.        if (new->vm_ops && new->vm_ops->close)  
  126.   
  127.               new->vm_ops->close(new);  
  128.   
  129.        if (new->vm_file) {  
  130.   
  131.               if (vma->vm_flags & VM_EXECUTABLE)  
  132.   
  133.                      removed_exe_file_vma(mm);  
  134.   
  135.               fput(new->vm_file);  
  136.   
  137.        }  
  138.   
  139.        unlink_anon_vmas(new);  
  140.   
  141.  out_free_mpol:  
  142.   
  143.        mpol_put(pol);  
  144.   
  145.  out_free_vma:  
  146.   
  147.        kmem_cache_free(vm_area_cachep, new);  
  148.   
  149.  out_err:  
  150.   
  151.        return err;  
  152.   
  153. }  
/*
* __split_vma() bypasses sysctl_max_map_count checking.  We use this on the
* munmap path where it doesn't make sense to fail.
*/
static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
unsigned long addr, int new_below)
{
struct mempolicy *pol;
struct vm_area_struct *new;
int err = -ENOMEM;
//如果进程的vma总数超过了限制值 zlh
if (is_vm_hugetlb_page(vma) && (addr &
~(huge_page_mask(hstate_vma(vma)))))
return -EINVAL;
//新申请一个vma
new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
if (!new)
goto out_err;
//将新的vma赋值为旧的vma,使其两者相等
/* most fields are the same, copy all, and then fixup */
*new = *vma;
INIT_LIST_HEAD(&new->anon_vma_chain);
//new_below为1的时候,vma为上一半,对应的new为下一半
if (new_below)
new->vm_end = addr;
else {
new->vm_start = addr;
new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
}
pol = mpol_dup(vma_policy(vma));
if (IS_ERR(pol)) {
err = PTR_ERR(pol);
goto out_free_vma;
}
vma_set_policy(new, pol);
if (anon_vma_clone(new, vma))
goto out_free_mpol;
if (new->vm_file) {
get_file(new->vm_file);
if (vma->vm_flags & VM_EXECUTABLE)
added_exe_file_vma(mm);
}
//如果定义了open操作
if (new->vm_ops && new->vm_ops->open)
new->vm_ops->open(new);
//经过前面的初始化之后,再由vma_adjust调整vma的边界
if (new_below)
err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
((addr - new->vm_start) >> PAGE_SHIFT), new);
else
err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
/* Success. */
if (!err)
return 0;
/* Clean everything up if vma_adjust failed. */
//如果调整失败,清理工作
if (new->vm_ops && new->vm_ops->close)
new->vm_ops->close(new);
if (new->vm_file) {
if (vma->vm_flags & VM_EXECUTABLE)
removed_exe_file_vma(mm);
fput(new->vm_file);
}
unlink_anon_vmas(new);
out_free_mpol:
mpol_put(pol);
out_free_vma:
kmem_cache_free(vm_area_cachep, new);
out_err:
return err;
}
4.1.1.1.1   vma_adjust

转入vma_adjust(),它用来完成调整vma的起始边界和结束边界,将新的vma,插入到进程的vma链等操作,函数原型为:

[cpp] view plain copy print ?
  1. /* 
  2.  
  3.  * We cannot adjust vm_start, vm_end, vm_pgoff fields of a vma that 
  4.  
  5.  * is already present in an i_mmap tree without adjusting the tree. 
  6.  
  7.  * The following helper function should be used when such adjustments 
  8.  
  9.  * are necessary.  The "insert" vma (if any) is to be inserted 
  10.  
  11.  * before we drop the necessary locks. 
  12.  
  13.  */  
  14.   
  15. int vma_adjust(struct vm_area_struct *vma, unsigned long start,  
  16.   
  17.        unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)  
/*
* We cannot adjust vm_start, vm_end, vm_pgoff fields of a vma that
* is already present in an i_mmap tree without adjusting the tree.
* The following helper function should be used when such adjustments
* are necessary.  The "insert" vma (if any) is to be inserted
* before we drop the necessary locks.
*/
int vma_adjust(struct vm_area_struct *vma, unsigned long start,
unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)
4.1.1.2         detach_vmas_to_be_unmapped

第二个要为析的函数是:detach_vmas_to_be_unmapped()

它主要是将要删除的vma链到一起,同时将要删除的vma从mm中脱链

//参数说明:

/*

     Mm:  进程的内存描述符

     Vma:要删除的起始vma

     Prev:vma的前一个vma区

     End:结束地址

*/

[cpp] view plain copy print ?
  1. /* 
  2.  
  3.  * Create a list of vma's touched by the unmap, removing them from the mm's 
  4.  
  5.  * vma list as we go.. 
  6.  
  7.  */  
  8.   
  9. static void  
  10.   
  11. detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,  
  12.   
  13.        struct vm_area_struct *prev, unsigned long end)  
  14.   
  15. {  
  16.   
  17.        struct vm_area_struct **insertion_point;  
  18.   
  19.        struct vm_area_struct *tail_vma = NULL;  
  20.   
  21.        unsigned long addr;  
  22.   
  23.    
  24.   
  25.        insertion_point = (prev ? &prev->vm_next : &mm->mmap);  
  26.   
  27.        vma->vm_prev = NULL;  
  28.   
  29.        do {  
  30.   
  31. //从红黑树中释放掉vma   
  32.   
  33.               rb_erase(&vma->vm_rb, &mm->mm_rb);  
  34.   
  35. //更新vma计数   
  36.   
  37.               mm->map_count--;  
  38.   
  39.               tail_vma = vma;  
  40.   
  41.               vma = vma->vm_next;  
  42.   
  43.        } while (vma && vma->vm_start < end);  
  44.   
  45.          //将要删除的vma从链表中脱落   
  46.   
  47.        *insertion_point = vma;  
  48.   
  49.        if (vma)  
  50.   
  51.               vma->vm_prev = prev;  
  52.   
  53. //最后元素后向指针置NULL   
  54.   
  55.        tail_vma->vm_next = NULL;  
  56.   
  57.        if (mm->unmap_area == arch_unmap_area)  
  58.   
  59.               addr = prev ? prev->vm_end : mm->mmap_base;  
  60.   
  61.        else  
  62.   
  63.               addr = vma ?  vma->vm_start : mm->mmap_base;  
  64.   
  65.        mm->unmap_area(mm, addr);  
  66.   
  67. //由于进行了删除操作。mmap_cache失效了,置NULL   
  68.   
  69.        mm->mmap_cache = NULL;             /* Kill the cache. */  
  70.   
  71. }  
/*
* Create a list of vma's touched by the unmap, removing them from the mm's
* vma list as we go..
*/
static void
detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
struct vm_area_struct *prev, unsigned long end)
{
struct vm_area_struct **insertion_point;
struct vm_area_struct *tail_vma = NULL;
unsigned long addr;
insertion_point = (prev ? &prev->vm_next : &mm->mmap);
vma->vm_prev = NULL;
do {
//从红黑树中释放掉vma
rb_erase(&vma->vm_rb, &mm->mm_rb);
//更新vma计数
mm->map_count--;
tail_vma = vma;
vma = vma->vm_next;
} while (vma && vma->vm_start < end);
//将要删除的vma从链表中脱落
*insertion_point = vma;
if (vma)
vma->vm_prev = prev;
//最后元素后向指针置NULL
tail_vma->vm_next = NULL;
if (mm->unmap_area == arch_unmap_area)
addr = prev ? prev->vm_end : mm->mmap_base;
else
addr = vma ?  vma->vm_start : mm->mmap_base;
mm->unmap_area(mm, addr);
//由于进行了删除操作。mmap_cache失效了,置NULL
mm->mmap_cache = NULL;             /* Kill the cache. */
}
4.1.1.3         remove_vma_list

接下来要分析的调用函数是remove_vma_list()

它主要对删除的vma链进行处理。具体代码如下示:

 

//参数说明:

//mm:进程的内存描述符

//vma:要删除的链表的头节点

[cpp] view plain copy print ?
  1. /* 
  2.  
  3.  * Ok - we have the memory areas we should free on the vma list, 
  4.  
  5.  * so release them, and do the vma updates. 
  6.  
  7.  * 
  8.  
  9.  * Called with the mm semaphore held. 
  10.  
  11.  */  
  12.   
  13. static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)  
  14.   
  15. {  
  16.   
  17.        /* Update high watermark before we lower total_vm */  
  18.   
  19. //更新mm的total_vm   
  20.   
  21.        update_hiwater_vm(mm);  
  22.   
  23.        do {  
  24.   
  25.               long nrpages = vma_pages(vma);  
  26.   
  27.    
  28.   
  29.               mm->total_vm -= nrpages;  
  30.   
  31.               vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);  
  32.   
  33.               vma = remove_vma(vma);  
  34.   
  35.        } while (vma);  
  36.   
  37.        validate_mm(mm);  
  38.   
  39. }  
/*
* Ok - we have the memory areas we should free on the vma list,
* so release them, and do the vma updates.
*
* Called with the mm semaphore held.
*/
static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
{
/* Update high watermark before we lower total_vm */
//更新mm的total_vm
update_hiwater_vm(mm);
do {
long nrpages = vma_pages(vma);
mm->total_vm -= nrpages;
vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
vma = remove_vma(vma);
} while (vma);
validate_mm(mm);
}
4.1.1.3.1       update_hiwater_vm

update_hiwater_vm() 函数定义在mm.h中,原型为:

[cpp] view plain copy print ?
  1. static inline void update_hiwater_vm(struct mm_struct *mm)  
  2.   
  3. {  
  4.   
  5.        if (mm->hiwater_vm < mm->total_vm)  
  6.   
  7.               mm->hiwater_vm = mm->total_vm;  
  8.   
  9. }  
static inline void update_hiwater_vm(struct mm_struct *mm)
{
if (mm->hiwater_vm < mm->total_vm)
mm->hiwater_vm = mm->total_vm;
}
4.1.1.3.2       vma_pages

vma_pages()函数,是对vma调整的封装:

[cpp] view plain copy print ?
  1. static inline unsigned long vma_pages(struct vm_area_struct *vma)  
  2.   
  3. {  
  4.   
  5.        return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;  
  6.   
  7. }  
static inline unsigned long vma_pages(struct vm_area_struct *vma)
{
return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
}
4.1.1.4         unmap_region

unmap_region是整个收缩过程中的核心,它主要完成相应项表项的修改,具体映射页框的释放。

代码如下:

[cpp] view plain copy print ?
  1. /* 
  2.  
  3.  * Get rid of page table information in the indicated region. 
  4.  
  5.  * 
  6.  
  7.  * Called with the mm semaphore held. 
  8.  
  9.  */  
  10.   
  11. static void unmap_region(struct mm_struct *mm,  
  12.   
  13.               struct vm_area_struct *vma, struct vm_area_struct *prev,  
  14.   
  15.               unsigned long start, unsigned long end)  
  16.   
  17. {  
  18.   
  19.        struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;  
  20.   
  21.        struct mmu_gather *tlb;  
  22.   
  23.        unsigned long nr_accounted = 0;  
  24.   
  25.    
  26.   
  27.        lru_add_drain();  
  28.   
  29.        tlb = tlb_gather_mmu(mm, 0);  
  30.   
  31.        update_hiwater_rss(mm);  
  32.   
  33. //断开具体的vma映射   
  34.   
  35.        unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);  
  36.   
  37.        vm_unacct_memory(nr_accounted);  
  38.   
  39. //因为删除了一些映射,会造成一个页表空闲的情况,回收页表项所占的空间   
  40.   
  41.        free_pgtables(tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,  
  42.   
  43.                              next? next->vm_start: 0);  
  44.   
  45.        tlb_finish_mmu(tlb, start, end);  
  46.   
  47. }  
/*
* Get rid of page table information in the indicated region.
*
* Called with the mm semaphore held.
*/
static void unmap_region(struct mm_struct *mm,
struct vm_area_struct *vma, struct vm_area_struct *prev,
unsigned long start, unsigned long end)
{
struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
struct mmu_gather *tlb;
unsigned long nr_accounted = 0;
lru_add_drain();
tlb = tlb_gather_mmu(mm, 0);
update_hiwater_rss(mm);
//断开具体的vma映射
unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
vm_unacct_memory(nr_accounted);
//因为删除了一些映射,会造成一个页表空闲的情况,回收页表项所占的空间
free_pgtables(tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
next? next->vm_start: 0);
tlb_finish_mmu(tlb, start, end);
}
4.1.1.4.1       unmap_vmas

unmap_vmas用来释放pte所映射的页面。代码如下:

[cpp] view plain copy print ?
  1. //参数说明:   
  2.   
  3. //mm:进程描述符 vma:要删除的起始vma start_addr:要删除的线性区的起始地址   
  4.   
  5. // end_addr:要删除的线性区的结束地址 details:在调用的时候置为了NULL    
  6.   
  7. /** 
  8.  
  9.  * unmap_vmas - unmap a range of memory covered by a list of vma's 
  10.  
  11.  * @tlbp: address of the caller's struct mmu_gather 
  12.  
  13.  * @vma: the starting vma 
  14.  
  15.  * @start_addr: virtual address at which to start unmapping 
  16.  
  17.  * @end_addr: virtual address at which to end unmapping 
  18.  
  19.  * @nr_accounted: Place number of unmapped pages in vm-accountable vma's here 
  20.  
  21.  * @details: details of nonlinear truncation or shared cache invalidation 
  22.  
  23.  * 
  24.  
  25.  * Returns the end address of the unmapping (restart addr if interrupted). 
  26.  
  27.  * 
  28.  
  29.  * Unmap all pages in the vma list. 
  30.  
  31.  * 
  32.  
  33.  * We aim to not hold locks for too long (for scheduling latency reasons). 
  34.  
  35.  * So zap pages in ZAP_BLOCK_SIZE bytecounts.  This means we need to 
  36.  
  37.  * return the ending mmu_gather to the caller. 
  38.  
  39.  * 
  40.  
  41.  * Only addresses between `start' and `end' will be unmapped. 
  42.  
  43.  * 
  44.  
  45.  * The VMA list must be sorted in ascending virtual address order. 
  46.  
  47.  * 
  48.  
  49.  * unmap_vmas() assumes that the caller will flush the whole unmapped address 
  50.  
  51.  * range after unmap_vmas() returns.  So the only responsibility here is to 
  52.  
  53.  * ensure that any thus-far unmapped pages are flushed before unmap_vmas() 
  54.  
  55.  * drops the lock and schedules. 
  56.  
  57.  */  
  58.   
  59. unsigned long unmap_vmas(struct mmu_gather **tlbp,  
  60.   
  61.               struct vm_area_struct *vma, unsigned long start_addr,  
  62.   
  63.               unsigned long end_addr, unsigned long *nr_accounted,  
  64.   
  65.               struct zap_details *details)  
  66.   
  67. {  
  68.   
  69.        long zap_work = ZAP_BLOCK_SIZE;  
  70.   
  71.        unsigned long tlb_start = 0;      /* For tlb_finish_mmu */  
  72.   
  73.        int tlb_start_valid = 0;  
  74.   
  75.        unsigned long start = start_addr;  
  76.   
  77.        spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;  
  78.   
  79.        int fullmm = (*tlbp)->fullmm;  
  80.   
  81.        struct mm_struct *mm = vma->vm_mm;  
  82.   
  83.    
  84.   
  85.        mmu_notifier_invalidate_range_start(mm, start_addr, end_addr);  
  86.   
  87. //遍历要删除的vma链表   
  88.   
  89.        for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) {  
  90.   
  91.               unsigned long end;  
  92.   
  93.                    //确定要断开映射的起始地址跟结束地址   
  94.   
  95.               start = max(vma->vm_start, start_addr);  
  96.   
  97.               if (start >= vma->vm_end)  
  98.   
  99.                      continue;  
  100.   
  101.               end = min(vma->vm_end, end_addr);  
  102.   
  103.               if (end <= vma->vm_start)  
  104.   
  105.                      continue;  
  106.   
  107.    
  108.   
  109.               if (vma->vm_flags & VM_ACCOUNT)  
  110.   
  111.                      *nr_accounted += (end - start) >> PAGE_SHIFT;  
  112.   
  113.    
  114.   
  115.               if (unlikely(is_pfn_mapping(vma)))  
  116.   
  117.                      untrack_pfn_vma(vma, 0, 0);  
  118.   
  119. //while循环开始断开start到end的所有被映射的页框,在足够的情况下一次释放zap_bytes   
  120.   
  121.               while (start != end) {  
  122.   
  123.                      if (!tlb_start_valid) {  
  124.   
  125.                             tlb_start = start;  
  126.   
  127.                             tlb_start_valid = 1;  
  128.   
  129.                      }  
  130.   
  131. //在条件编译下is_vm_hugetlb_page()为空   
  132.   
  133.                      if (unlikely(is_vm_hugetlb_page(vma))) {  
  134.   
  135.                             /* 
  136.  
  137.                              * It is undesirable to test vma->vm_file as it 
  138.  
  139.                              * should be non-null for valid hugetlb area. 
  140.  
  141.                              * However, vm_file will be NULL in the error 
  142.  
  143.                              * cleanup path of do_mmap_pgoff. When 
  144.  
  145.                              * hugetlbfs ->mmap method fails, 
  146.  
  147.                              * do_mmap_pgoff() nullifies vma->vm_file 
  148.  
  149.                              * before calling this function to clean up. 
  150.  
  151.                              * Since no pte has actually been setup, it is 
  152.  
  153.                              * safe to do nothing in this case. 
  154.  
  155.                              */  
  156.   
  157.                             if (vma->vm_file) {  
  158.   
  159.                                    unmap_hugepage_range(vma, start, end, NULL);  
  160.   
  161.                                    zap_work -= (end - start) /  
  162.   
  163.                                    pages_per_huge_page(hstate_vma(vma));  
  164.   
  165.                             }  
  166.   
  167.    
  168.   
  169.                             start = end;  
  170.   
  171.                      } else  
  172.   
  173.                             start = unmap_page_range(*tlbp, vma,  
  174.   
  175.                                           start, end, &zap_work, details);  
  176.   
  177.    
  178.   
  179.                      if (zap_work > 0) {  
  180.   
  181.                             BUG_ON(start != end);  
  182.   
  183.                             break;  
  184.   
  185.                      }  
  186.   
  187.    
  188.   
  189.                      tlb_finish_mmu(*tlbp, tlb_start, start);  
  190.   
  191.    
  192.   
  193.                      if (need_resched() ||  
  194.   
  195.                             (i_mmap_lock && spin_needbreak(i_mmap_lock))) {  
  196.   
  197.                             if (i_mmap_lock) {  
  198.   
  199.                                    *tlbp = NULL;  
  200.   
  201.                                    goto out;  
  202.   
  203.                             }  
  204.   
  205.                             cond_resched();  
  206.   
  207.                      }  
  208.   
  209.    
  210.   
  211.                      *tlbp = tlb_gather_mmu(vma->vm_mm, fullmm);  
  212.   
  213.                      tlb_start_valid = 0;  
  214.   
  215.                      zap_work = ZAP_BLOCK_SIZE;  
  216.   
  217.               }  
  218.   
  219.        }  
  220.   
  221. out:  
  222.   
  223.        mmu_notifier_invalidate_range_end(mm, start_addr, end_addr);  
  224.   
  225.        return start;   /* which is now the end (or restart) address */  
  226.   
  227. }  
//参数说明:
//mm:进程描述符 vma:要删除的起始vma start_addr:要删除的线性区的起始地址
// end_addr:要删除的线性区的结束地址 details:在调用的时候置为了NULL 
/**
* unmap_vmas - unmap a range of memory covered by a list of vma's
* @tlbp: address of the caller's struct mmu_gather
* @vma: the starting vma
* @start_addr: virtual address at which to start unmapping
* @end_addr: virtual address at which to end unmapping
* @nr_accounted: Place number of unmapped pages in vm-accountable vma's here
* @details: details of nonlinear truncation or shared cache invalidation
*
* Returns the end address of the unmapping (restart addr if interrupted).
*
* Unmap all pages in the vma list.
*
* We aim to not hold locks for too long (for scheduling latency reasons).
* So zap pages in ZAP_BLOCK_SIZE bytecounts.  This means we need to
* return the ending mmu_gather to the caller.
*
* Only addresses between `start' and `end' will be unmapped.
*
* The VMA list must be sorted in ascending virtual address order.
*
* unmap_vmas() assumes that the caller will flush the whole unmapped address
* range after unmap_vmas() returns.  So the only responsibility here is to
* ensure that any thus-far unmapped pages are flushed before unmap_vmas()
* drops the lock and schedules.
*/
unsigned long unmap_vmas(struct mmu_gather **tlbp,
struct vm_area_struct *vma, unsigned long start_addr,
unsigned long end_addr, unsigned long *nr_accounted,
struct zap_details *details)
{
long zap_work = ZAP_BLOCK_SIZE;
unsigned long tlb_start = 0;      /* For tlb_finish_mmu */
int tlb_start_valid = 0;
unsigned long start = start_addr;
spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
int fullmm = (*tlbp)->fullmm;
struct mm_struct *mm = vma->vm_mm;
mmu_notifier_invalidate_range_start(mm, start_addr, end_addr);
//遍历要删除的vma链表
for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) {
unsigned long end;
//确定要断开映射的起始地址跟结束地址
start = max(vma->vm_start, start_addr);
if (start >= vma->vm_end)
continue;
end = min(vma->vm_end, end_addr);
if (end <= vma->vm_start)
continue;
if (vma->vm_flags & VM_ACCOUNT)
*nr_accounted += (end - start) >> PAGE_SHIFT;
if (unlikely(is_pfn_mapping(vma)))
untrack_pfn_vma(vma, 0, 0);
//while循环开始断开start到end的所有被映射的页框,在足够的情况下一次释放zap_bytes
while (start != end) {
if (!tlb_start_valid) {
tlb_start = start;
tlb_start_valid = 1;
}
//在条件编译下is_vm_hugetlb_page()为空
if (unlikely(is_vm_hugetlb_page(vma))) {
/*
* It is undesirable to test vma->vm_file as it
* should be non-null for valid hugetlb area.
* However, vm_file will be NULL in the error
* cleanup path of do_mmap_pgoff. When
* hugetlbfs ->mmap method fails,
* do_mmap_pgoff() nullifies vma->vm_file
* before calling this function to clean up.
* Since no pte has actually been setup, it is
* safe to do nothing in this case.
*/
if (vma->vm_file) {
unmap_hugepage_range(vma, start, end, NULL);
zap_work -= (end - start) /
pages_per_huge_page(hstate_vma(vma));
}
start = end;
} else
start = unmap_page_range(*tlbp, vma,
start, end, &zap_work, details);
if (zap_work > 0) {
BUG_ON(start != end);
break;
}
tlb_finish_mmu(*tlbp, tlb_start, start);
if (need_resched() ||
(i_mmap_lock && spin_needbreak(i_mmap_lock))) {
if (i_mmap_lock) {
*tlbp = NULL;
goto out;
}
cond_resched();
}
*tlbp = tlb_gather_mmu(vma->vm_mm, fullmm);
tlb_start_valid = 0;
zap_work = ZAP_BLOCK_SIZE;
}
}
out:
mmu_notifier_invalidate_range_end(mm, start_addr, end_addr);
return start;   /* which is now the end (or restart) address */
}
4.1.1.4.1.1 unmap_page_range

跟进unmap_page_range():

[cpp] view plain copy print ?
  1. static unsigned long unmap_page_range(struct mmu_gather *tlb,  
  2.   
  3.                             struct vm_area_struct *vma,  
  4.   
  5.                             unsigned long addr, unsigned long end,  
  6.   
  7.                             long *zap_work, struct zap_details *details)  
  8.   
  9. {  
  10.   
  11.        pgd_t *pgd;  
  12.   
  13.        unsigned long next;  
  14.   
  15.    
  16.   
  17.        if (details && !details->check_mapping && !details->nonlinear_vma)  
  18.   
  19.               details = NULL;  
  20.   
  21.    
  22.   
  23.        BUG_ON(addr >= end);  
  24.   
  25.        mem_cgroup_uncharge_start();  
  26.   
  27.        tlb_start_vma(tlb, vma);  
  28.   
  29. //取得页目录   
  30.   
  31.        pgd = pgd_offset(vma->vm_mm, addr);  
  32.   
  33. //断开pgd项对应的pmd   
  34.   
  35.        do {  
  36.   
  37.               next = pgd_addr_end(addr, end);  
  38.   
  39.               if (pgd_none_or_clear_bad(pgd)) {  
  40.   
  41.                      (*zap_work)--;  
  42.   
  43.                      continue;  
  44.   
  45.               }  
  46.   
  47.               next = zap_pud_range(tlb, vma, pgd, addr, next,  
  48.   
  49.                                           zap_work, details);  
  50.   
  51.        } while (pgd++, addr = next, (addr != end && *zap_work > 0));  
  52.   
  53.        tlb_end_vma(tlb, vma);  
  54.   
  55.        mem_cgroup_uncharge_end();  
  56.   
  57.        return addr;  
  58.   
  59. }  
static unsigned long unmap_page_range(struct mmu_gather *tlb,
struct vm_area_struct *vma,
unsigned long addr, unsigned long end,
long *zap_work, struct zap_details *details)
{
pgd_t *pgd;
unsigned long next;
if (details && !details->check_mapping && !details->nonlinear_vma)
details = NULL;
BUG_ON(addr >= end);
mem_cgroup_uncharge_start();
tlb_start_vma(tlb, vma);
//取得页目录
pgd = pgd_offset(vma->vm_mm, addr);
//断开pgd项对应的pmd
do {
next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(pgd)) {
(*zap_work)--;
continue;
}
next = zap_pud_range(tlb, vma, pgd, addr, next,
zap_work, details);
} while (pgd++, addr = next, (addr != end && *zap_work > 0));
tlb_end_vma(tlb, vma);
mem_cgroup_uncharge_end();
return addr;
}


 

4.1.1.4.1.1.1         zap_pud_range

跟进zap_pud_range():

[cpp] view plain copy print ?
  1. static inline unsigned long zap_pud_range(struct mmu_gather *tlb,  
  2.   
  3.                             struct vm_area_struct *vma, pgd_t *pgd,  
  4.   
  5.                             unsigned long addr, unsigned long end,  
  6.   
  7.                             long *zap_work, struct zap_details *details)  
  8.   
  9. {  
  10.   
  11.        pud_t *pud;  
  12.   
  13.        unsigned long next;  
  14.   
  15.    
  16.   
  17.        pud = pud_offset(pgd, addr);  
  18.   
  19.        do {  
  20.   
  21.               next = pud_addr_end(addr, end);  
  22.   
  23.               if (pud_none_or_clear_bad(pud)) {  
  24.   
  25.                      (*zap_work)--;  
  26.   
  27.                      continue;  
  28.   
  29.               }  
  30.   
  31.               next = zap_pmd_range(tlb, vma, pud, addr, next,  
  32.   
  33.                                           zap_work, details);  
  34.   
  35.        } while (pud++, addr = next, (addr != end && *zap_work > 0));  
  36.   
  37.    
  38.   
  39.        return addr;  
  40.   
  41. }  
static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
struct vm_area_struct *vma, pgd_t *pgd,
unsigned long addr, unsigned long end,
long *zap_work, struct zap_details *details)
{
pud_t *pud;
unsigned long next;
pud = pud_offset(pgd, addr);
do {
next = pud_addr_end(addr, end);
if (pud_none_or_clear_bad(pud)) {
(*zap_work)--;
continue;
}
next = zap_pmd_range(tlb, vma, pud, addr, next,
zap_work, details);
} while (pud++, addr = next, (addr != end && *zap_work > 0));
return addr;
}

4.1.1.4.1.1.1.1  zap_pmd_range

转入zap_pmd_range():

[cpp] view plain copy print ?
  1. static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,  
  2.   
  3.                             struct vm_area_struct *vma, pud_t *pud,  
  4.   
  5.                             unsigned long addr, unsigned long end,  
  6.   
  7.                             long *zap_work, struct zap_details *details)  
  8.   
  9. {  
  10.   
  11.        pmd_t *pmd;  
  12.   
  13.        unsigned long next;  
  14.   
  15.    
  16.   
  17.        pmd = pmd_offset(pud, addr);  
  18.   
  19.        do {  
  20.   
  21.               next = pmd_addr_end(addr, end);  
  22.   
  23.               if (pmd_none_or_clear_bad(pmd)) {  
  24.   
  25.                      (*zap_work)--;  
  26.   
  27.                      continue;  
  28.   
  29.               }  
  30.   
  31.               next = zap_pte_range(tlb, vma, pmd, addr, next,  
  32.   
  33.                                           zap_work, details);  
  34.   
  35.        } while (pmd++, addr = next, (addr != end && *zap_work > 0));  
  36.   
  37.    
  38.   
  39.        return addr;  
  40.   
  41. }  
static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
struct vm_area_struct *vma, pud_t *pud,
unsigned long addr, unsigned long end,
long *zap_work, struct zap_details *details)
{
pmd_t *pmd;
unsigned long next;
pmd = pmd_offset(pud, addr);
do {
next = pmd_addr_end(addr, end);
if (pmd_none_or_clear_bad(pmd)) {
(*zap_work)--;
continue;
}
next = zap_pte_range(tlb, vma, pmd, addr, next,
zap_work, details);
} while (pmd++, addr = next, (addr != end && *zap_work > 0));
return addr;
}


4.1.1.4.1.1.1.1.1 zap_pte_range

继续跟进zap_pte_range():

[cpp] view plain copy print ?
  1. static unsigned long zap_pte_range(struct mmu_gather *tlb,  
  2.   
  3.                             struct vm_area_struct *vma, pmd_t *pmd,  
  4.   
  5.                             unsigned long addr, unsigned long end,  
  6.   
  7.                             long *zap_work, struct zap_details *details)  
  8.   
  9. {  
  10.   
  11.        struct mm_struct *mm = tlb->mm;  
  12.   
  13.        pte_t *pte;  
  14.   
  15.        spinlock_t *ptl;  
  16.   
  17.        int rss[NR_MM_COUNTERS];  
  18.   
  19.    
  20.   
  21.        init_rss_vec(rss);  
  22.   
  23.    
  24.   
  25.        pte = pte_offset_map_lock(mm, pmd, addr, &ptl);  
  26.   
  27.        arch_enter_lazy_mmu_mode();  
  28.   
  29.        do {  
  30.   
  31.               pte_t ptent = *pte;  
  32.   
  33.                    //pte没有映射页面   
  34.   
  35.               if (pte_none(ptent)) {  
  36.   
  37.                      (*zap_work)--;  
  38.   
  39.                      continue;  
  40.   
  41.               }  
  42.   
  43.    
  44.   
  45.               (*zap_work) -= PAGE_SIZE;  
  46.   
  47. //相应的页在主存中   
  48.   
  49.               if (pte_present(ptent)) {  
  50.   
  51.                      struct page *page;  
  52.   
  53.    
  54.   
  55.                      page = vm_normal_page(vma, addr, ptent);  
  56.   
  57.                      if (unlikely(details) && page) {  
  58.   
  59.                             /* 
  60.  
  61.                              * unmap_shared_mapping_pages() wants to 
  62.  
  63.                              * invalidate cache without truncating: 
  64.  
  65.                              * unmap shared but keep private pages. 
  66.  
  67.                              */  
  68.   
  69.                             if (details->check_mapping &&  
  70.   
  71.                                 details->check_mapping != page->mapping)  
  72.   
  73.                                    continue;  
  74.   
  75.                             /* 
  76.  
  77.                              * Each page->index must be checked when 
  78.  
  79.                              * invalidating or truncating nonlinear. 
  80.  
  81.                              */  
  82.   
  83.                             if (details->nonlinear_vma &&  
  84.   
  85.                                 (page->index < details->first_index ||  
  86.   
  87.                                  page->index > details->last_index))  
  88.   
  89.                                    continue;  
  90.   
  91.                      }  
  92.   
  93.                      ptent = ptep_get_and_clear_full(mm, addr, pte,  
  94.   
  95.                                                  tlb->fullmm);  
  96.   
  97.                      tlb_remove_tlb_entry(tlb, pte, addr);  
  98.   
  99.                      if (unlikely(!page))  
  100.   
  101.                             continue;  
  102.   
  103.                      if (unlikely(details) && details->nonlinear_vma  
  104.   
  105.                          && linear_page_index(details->nonlinear_vma,  
  106.   
  107.                                           addr) != page->index)  
  108.   
  109.                             set_pte_at(mm, addr, pte,  
  110.   
  111.                                       pgoff_to_pte(page->index));  
  112.   
  113.                      if (PageAnon(page))  
  114.   
  115.                             rss[MM_ANONPAGES]--;  
  116.   
  117.                      else {  
  118.   
  119.                             if (pte_dirty(ptent))  
  120.   
  121.                                    set_page_dirty(page);  
  122.   
  123.                             if (pte_young(ptent) &&  
  124.   
  125.                                 likely(!VM_SequentialReadHint(vma)))  
  126.   
  127.                                    mark_page_accessed(page);  
  128.   
  129.                             rss[MM_FILEPAGES]--;  
  130.   
  131.                      }  
  132.   
  133.                      page_remove_rmap(page);  
  134.   
  135.                      if (unlikely(page_mapcount(page) < 0))  
  136.   
  137.                             print_bad_pte(vma, addr, ptent, page);  
  138.   
  139.                      tlb_remove_page(tlb, page);  
  140.   
  141.                      continue;  
  142.   
  143.               }  
  144.   
  145.               /* 
  146.  
  147.                * If details->check_mapping, we leave swap entries; 
  148.  
  149.                * if details->nonlinear_vma, we leave file entries. 
  150.  
  151.                */  
  152.   
  153.               if (unlikely(details))  
  154.   
  155.                      continue;  
  156.   
  157.               if (pte_file(ptent)) {  
  158.   
  159.                      if (unlikely(!(vma->vm_flags & VM_NONLINEAR)))  
  160.   
  161.                             print_bad_pte(vma, addr, ptent, NULL);  
  162.   
  163.               } else {  
  164.   
  165.                      swp_entry_t entry = pte_to_swp_entry(ptent);  
  166.   
  167.    
  168.   
  169.                      if (!non_swap_entry(entry))  
  170.   
  171.                             rss[MM_SWAPENTS]--;  
  172.   
  173.                      if (unlikely(!free_swap_and_cache(entry)))  
  174.   
  175.                             print_bad_pte(vma, addr, ptent, NULL);  
  176.   
  177.               }  
  178.   
  179.               pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);  
  180.   
  181.        } while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));  
  182.   
  183.    
  184.   
  185.        add_mm_rss_vec(mm, rss);  
  186.   
  187.        arch_leave_lazy_mmu_mode();  
  188.   
  189.        pte_unmap_unlock(pte - 1, ptl);  
  190.   
  191.    
  192.   
  193.        return addr;  
  194.   
  195. }  
static unsigned long zap_pte_range(struct mmu_gather *tlb,
struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, unsigned long end,
long *zap_work, struct zap_details *details)
{
struct mm_struct *mm = tlb->mm;
pte_t *pte;
spinlock_t *ptl;
int rss[NR_MM_COUNTERS];
init_rss_vec(rss);
pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
arch_enter_lazy_mmu_mode();
do {
pte_t ptent = *pte;
//pte没有映射页面
if (pte_none(ptent)) {
(*zap_work)--;
continue;
}
(*zap_work) -= PAGE_SIZE;
//相应的页在主存中
if (pte_present(ptent)) {
struct page *page;
page = vm_normal_page(vma, addr, ptent);
if (unlikely(details) && page) {
/*
* unmap_shared_mapping_pages() wants to
* invalidate cache without truncating:
* unmap shared but keep private pages.
*/
if (details->check_mapping &&
details->check_mapping != page->mapping)
continue;
/*
* Each page->index must be checked when
* invalidating or truncating nonlinear.
*/
if (details->nonlinear_vma &&
(page->index < details->first_index ||
page->index > details->last_index))
continue;
}
ptent = ptep_get_and_clear_full(mm, addr, pte,
tlb->fullmm);
tlb_remove_tlb_entry(tlb, pte, addr);
if (unlikely(!page))
continue;
if (unlikely(details) && details->nonlinear_vma
&& linear_page_index(details->nonlinear_vma,
addr) != page->index)
set_pte_at(mm, addr, pte,
pgoff_to_pte(page->index));
if (PageAnon(page))
rss[MM_ANONPAGES]--;
else {
if (pte_dirty(ptent))
set_page_dirty(page);
if (pte_young(ptent) &&
likely(!VM_SequentialReadHint(vma)))
mark_page_accessed(page);
rss[MM_FILEPAGES]--;
}
page_remove_rmap(page);
if (unlikely(page_mapcount(page) < 0))
print_bad_pte(vma, addr, ptent, page);
tlb_remove_page(tlb, page);
continue;
}
/*
* If details->check_mapping, we leave swap entries;
* if details->nonlinear_vma, we leave file entries.
*/
if (unlikely(details))
continue;
if (pte_file(ptent)) {
if (unlikely(!(vma->vm_flags & VM_NONLINEAR)))
print_bad_pte(vma, addr, ptent, NULL);
} else {
swp_entry_t entry = pte_to_swp_entry(ptent);
if (!non_swap_entry(entry))
rss[MM_SWAPENTS]--;
if (unlikely(!free_swap_and_cache(entry)))
print_bad_pte(vma, addr, ptent, NULL);
}
pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
} while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
add_mm_rss_vec(mm, rss);
arch_leave_lazy_mmu_mode();
pte_unmap_unlock(pte - 1, ptl);
return addr;
}
4.1.1.4.2       free_pgtables

通过上面的分析可以看到,内核是如何通过线性地址从pgd找到pte再释放相关页面的。到这一步,注意到,只是释放了pte所映射的页框,所以,可能会造成有很多pte项没有映射的状态,这部份pte所占的空间其实是可以回收的。它是在free_pgtables()函数中完成的。代码如下:

[cpp] view plain copy print ?
  1. void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,  
  2.   
  3.               unsigned long floor, unsigned long ceiling)  
  4.   
  5. {  
  6.   
  7.        while (vma) {  
  8.   
  9.               struct vm_area_struct *next = vma->vm_next;  
  10.   
  11.               unsigned long addr = vma->vm_start;// 把虚拟区的起始地址赋给addr   
  12.   
  13.    
  14.   
  15.               /* 
  16.  
  17.                * Hide vma from rmap and truncate_pagecache before freeing 
  18.  
  19.                * pgtables 
  20.  
  21.                */  
  22.   
  23.               unlink_anon_vmas(vma);  
  24.   
  25.               unlink_file_vma(vma);  
  26.   
  27.    
  28.   
  29.               if (is_vm_hugetlb_page(vma)) {  
  30.   
  31.                      hugetlb_free_pgd_range(tlb, addr, vma->vm_end,  
  32.   
  33.                             floor, next? next->vm_start: ceiling);  
  34.   
  35.               } else {  
  36.   
  37.                      /* 
  38.  
  39.                       * Optimization: gather nearby vmas into one call down 
  40.  
  41.                       */  
  42.   
  43.                      while (next && next->vm_start <= vma->vm_end + PMD_SIZE  
  44.   
  45.                             && !is_vm_hugetlb_page(next)) {  
  46.   
  47.                             vma = next;  
  48.   
  49.                             next = vma->vm_next;  
  50.   
  51.                             unlink_anon_vmas(vma);  
  52.   
  53.                             unlink_file_vma(vma);  
  54.   
  55.                      }  
  56.   
  57.                      free_pgd_range(tlb, addr, vma->vm_end,  
  58.   
  59.                             floor, next? next->vm_start: ceiling);  
  60.   
  61.               }  
  62.   
  63.               vma = next;  
  64.   
  65.        }  
  66.   
  67. }  
void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
unsigned long floor, unsigned long ceiling)
{
while (vma) {
struct vm_area_struct *next = vma->vm_next;
unsigned long addr = vma->vm_start;// 把虚拟区的起始地址赋给addr
/*
* Hide vma from rmap and truncate_pagecache before freeing
* pgtables
*/
unlink_anon_vmas(vma);
unlink_file_vma(vma);
if (is_vm_hugetlb_page(vma)) {
hugetlb_free_pgd_range(tlb, addr, vma->vm_end,
floor, next? next->vm_start: ceiling);
} else {
/*
* Optimization: gather nearby vmas into one call down
*/
while (next && next->vm_start <= vma->vm_end + PMD_SIZE
&& !is_vm_hugetlb_page(next)) {
vma = next;
next = vma->vm_next;
unlink_anon_vmas(vma);
unlink_file_vma(vma);
}
free_pgd_range(tlb, addr, vma->vm_end,
floor, next? next->vm_start: ceiling);
}
vma = next;
}
}

prev指向的是什么?

调用这个函数的时候,prev指向的是什么区域的vma呢?

刚开始的时候:

 

detach_vmas_to_be_unmapped后:

 

看上面可以看出: clear_page_tables中,要操作的线性地址即为prev,prev->next之间的空洞线性地址。

这篇关于linux内存管理之sys_brk实现分析【二】的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!



http://www.chinasem.cn/article/556079

相关文章

NameNode内存生产配置

Hadoop2.x 系列,配置 NameNode 内存 NameNode 内存默认 2000m ,如果服务器内存 4G , NameNode 内存可以配置 3g 。在 hadoop-env.sh 文件中配置如下。 HADOOP_NAMENODE_OPTS=-Xmx3072m Hadoop3.x 系列,配置 Nam

linux-基础知识3

打包和压缩 zip 安装zip软件包 yum -y install zip unzip 压缩打包命令: zip -q -r -d -u 压缩包文件名 目录和文件名列表 -q:不显示命令执行过程-r:递归处理,打包各级子目录和文件-u:把文件增加/替换到压缩包中-d:从压缩包中删除指定的文件 解压:unzip 压缩包名 打包文件 把压缩包从服务器下载到本地 把压缩包上传到服务器(zip

hdu1043(八数码问题,广搜 + hash(实现状态压缩) )

利用康拓展开将一个排列映射成一个自然数,然后就变成了普通的广搜题。 #include<iostream>#include<algorithm>#include<string>#include<stack>#include<queue>#include<map>#include<stdio.h>#include<stdlib.h>#include<ctype.h>#inclu

性能分析之MySQL索引实战案例

文章目录 一、前言二、准备三、MySQL索引优化四、MySQL 索引知识回顾五、总结 一、前言 在上一讲性能工具之 JProfiler 简单登录案例分析实战中已经发现SQL没有建立索引问题,本文将一起从代码层去分析为什么没有建立索引? 开源ERP项目地址:https://gitee.com/jishenghua/JSH_ERP 二、准备 打开IDEA找到登录请求资源路径位置

综合安防管理平台LntonAIServer视频监控汇聚抖动检测算法优势

LntonAIServer视频质量诊断功能中的抖动检测是一个专门针对视频稳定性进行分析的功能。抖动通常是指视频帧之间的不必要运动,这种运动可能是由于摄像机的移动、传输中的错误或编解码问题导致的。抖动检测对于确保视频内容的平滑性和观看体验至关重要。 优势 1. 提高图像质量 - 清晰度提升:减少抖动,提高图像的清晰度和细节表现力,使得监控画面更加真实可信。 - 细节增强:在低光条件下,抖

【C++】_list常用方法解析及模拟实现

相信自己的力量,只要对自己始终保持信心,尽自己最大努力去完成任何事,就算事情最终结果是失败了,努力了也不留遗憾。💓💓💓 目录   ✨说在前面 🍋知识点一:什么是list? •🌰1.list的定义 •🌰2.list的基本特性 •🌰3.常用接口介绍 🍋知识点二:list常用接口 •🌰1.默认成员函数 🔥构造函数(⭐) 🔥析构函数 •🌰2.list对象

【Prometheus】PromQL向量匹配实现不同标签的向量数据进行运算

✨✨ 欢迎大家来到景天科技苑✨✨ 🎈🎈 养成好习惯,先赞后看哦~🎈🎈 🏆 作者简介:景天科技苑 🏆《头衔》:大厂架构师,华为云开发者社区专家博主,阿里云开发者社区专家博主,CSDN全栈领域优质创作者,掘金优秀博主,51CTO博客专家等。 🏆《博客》:Python全栈,前后端开发,小程序开发,人工智能,js逆向,App逆向,网络系统安全,数据分析,Django,fastapi

让树莓派智能语音助手实现定时提醒功能

最初的时候是想直接在rasa 的chatbot上实现,因为rasa本身是带有remindschedule模块的。不过经过一番折腾后,忽然发现,chatbot上实现的定时,语音助手不一定会有响应。因为,我目前语音助手的代码设置了长时间无应答会结束对话,这样一来,chatbot定时提醒的触发就不会被语音助手获悉。那怎么让语音助手也具有定时提醒功能呢? 我最后选择的方法是用threading.Time

Linux 网络编程 --- 应用层

一、自定义协议和序列化反序列化 代码: 序列化反序列化实现网络版本计算器 二、HTTP协议 1、谈两个简单的预备知识 https://www.baidu.com/ --- 域名 --- 域名解析 --- IP地址 http的端口号为80端口,https的端口号为443 url为统一资源定位符。CSDNhttps://mp.csdn.net/mp_blog/creation/editor