在32位的系统上,线性地址空间可达到4GB,这4GB一般按照3:1的比例进行分配,也就是说用户进程享有前3GB线性地址空间,而内核独享最后1GB线性地址空间。由于虚拟内存的引入,每个进程都可拥有3GB的虚拟内存,并且用户进程之间的地址空间是互不可见、互不影响的,也就是说即使两个进程对同一个地址进行操作,也不会产生问题。在前面介绍的一些分配内存的途径中,无论是伙伴系统中分配页的函数,还是slab分配器中分配对象的函数,它们都会尽量快速地响应内核的分配请求,将相应的内存提交给内核使用,而内核对待用户空间显然不能如此。用户空间动态申请内存时往往只是获得一块线性地址的使用权,而并没有将这块线性地址区域与实际的物理内存对应上,只有当用户空间真正操作申请的内存时,才会触发一次缺页异常,这时内核才会分配实际的物理内存给用户空间。
用户进程的虚拟地址空间包含了若干区域,这些区域的分布方式是特定于体系结构的,不过所有的方式都包含下列成分:
- 可执行文件的二进制代码,也就是程序的代码段
- 存储全局变量的数据段
- 用于保存局部变量和实现函数调用的栈
- 环境变量和命令行参数
- 程序使用的动态库的代码
- 用于映射文件内容的区域
由此可以看到进程的虚拟内存空间会被分成不同的若干区域,每个区域都有其相关的属性和用途,一个合法的地址总是落在某个区域当中的,这些区域也不会重叠。在linux内核中,这样的区域被称之为虚拟内存区域(virtual memory areas),简称vma。一个vma就是一块连续的线性地址空间的抽象,它拥有自身的权限(可读,可写,可执行等等) ,每一个虚拟内存区域都由一个相关的struct vm_area_struct结构来描述
- <span style="font-size:12px;">struct vm_area_struct {
- struct mm_struct * vm_mm;
- unsigned long vm_start;
- unsigned long vm_end;
-
-
- struct vm_area_struct *vm_next, *vm_prev;
-
- pgprot_t vm_page_prot;
- unsigned long vm_flags;
-
- struct rb_node vm_rb;
-
-
-
- union {
- struct {
- struct list_head list;
- void *parent;
- struct vm_area_struct *head;
- } vm_set;
-
- struct raw_prio_tree_node prio_tree_node;
- } shared;
-
-
-
- struct list_head anon_vma_node;
- struct anon_vma *anon_vma;
-
-
-
- const struct vm_operations_struct *vm_ops;
-
-
- unsigned long vm_pgoff;
- struct file * vm_file;
- void * vm_private_data;
- unsigned long vm_truncate_count;
-
- #ifndef CONFIG_MMU
- struct vm_region *vm_region;
- #endif
- #ifdef CONFIG_NUMA
- struct mempolicy *vm_policy;
- #endif
- };
- </span>
进程的若干个vma区域都得按一定的形式组织在一起,这些vma都包含在进程的内存描述符中,也就是struct mm_struct中,这些vma在mm_struct以两种方式进行组织,一种是链表方式,对应于mm_struct中的mmap链表头,一种是红黑树方式,对应于mm_struct中的mm_rb根节点,和内核其他地方一样,链表用于遍历,红黑树用于查找。
下面以文件映射为例,来阐述文件的address_space和与其建立映射关系的vma是如何联系上的。首先来看看struct address_space中与vma相关的变量
- struct address_space {
- struct inode *host;
- ...
- struct prio_tree_root i_mmap;
- struct list_head i_mmap_nonlinear;
- ...
- } __attr
与此同时,struct file和struct inode中都包含有一个struct address_space的指针,分别为f_mapping和i_mapping。struct file是一个特定于进程的数据结构,而struct inode则是一个特定于文件的数据结构。每当进程打开一个文件时,都会将file->f_mapping设置到inode->i_mapping,下图则给出了文件和与其建立映射关系的vma的联系
下面来看几个vma的基本操作函数,这些函数都是后面实现具体功能的基础
find_vma()用来寻找一个针对于指定地址的vma,该vma要么包含了指定的地址,要么位于该地址之后并且离该地址最近,或者说寻找第一个满足addr<vma_end的vma
- struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
- {
- struct vm_area_struct *vma = NULL;
-
- if (mm) {
-
-
- vma = mm->mmap_cache;
-
- if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
- struct rb_node * rb_node;
-
- rb_node = mm->mm_rb.rb_node;
- vma = NULL;
-
- while (rb_node) {
- struct vm_area_struct * vma_tmp;
-
- vma_tmp = rb_entry(rb_node,
- struct vm_area_struct, vm_rb);
-
-
- if (vma_tmp->vm_end > addr) {
- vma = vma_tmp;
- if (vma_tmp->vm_start <= addr)
- break;
- rb_node = rb_node->rb_left;
- } else
- rb_node = rb_node->rb_right;
- }
- if (vma)
- mm->mmap_cache = vma;
- }
- }
- return vma;
- }
当一个新区域被加到进程的地址空间时,内核会检查它是否可以与一个或多个现存区域合并,vma_merge()函数在可能的情况下,将一个新区域与周边区域进行合并。参数:
mm:新区域所属的进程地址空间
prev:在地址上紧接着新区域的前面一个vma
addr:新区域的起始地址
end:新区域的结束地址
vm_flags:新区域的标识集
anon_vma:新区域所属的匿名映射
file:新区域映射的文件
pgoff:新区域映射文件的偏移
policy:和NUMA相关
- struct vm_area_struct *vma_merge(struct mm_struct *mm,
- struct vm_area_struct *prev, unsigned long addr,
- unsigned long end, unsigned long vm_flags,
- struct anon_vma *anon_vma, struct file *file,
- pgoff_t pgoff, struct mempolicy *policy)
- {
- pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
- struct vm_area_struct *area, *next;
-
-
- if (vm_flags & VM_SPECIAL)
- return NULL;
-
- if (prev)
- next = prev->vm_next;
- else
- next = mm->mmap;
- area = next;
-
-
- if (next && next->vm_end == end)
- next = next->vm_next;
-
-
- if (prev && prev->vm_end == addr &&
- mpol_equal(vma_policy(prev), policy) &&
- can_vma_merge_after(prev, vm_flags,
- anon_vma, file, pgoff)) {
-
- if (next && end == next->vm_start &&
- mpol_equal(policy, vma_policy(next)) &&
- can_vma_merge_before(next, vm_flags,
- anon_vma, file, pgoff+pglen) &&
- is_mergeable_anon_vma(prev->anon_vma,
- next->anon_vma)) {
-
- vma_adjust(prev, prev->vm_start,
- next->vm_end, prev->vm_pgoff, NULL);
- } else
- vma_adjust(prev, prev->vm_start,
- end, prev->vm_pgoff, NULL);
- return prev;
- }
-
-
-
- if (next && end == next->vm_start &&
- mpol_equal(policy, vma_policy(next)) &&
- can_vma_merge_before(next, vm_flags,
- anon_vma, file, pgoff+pglen)) {
- if (prev && addr < prev->vm_end)
- vma_adjust(prev, prev->vm_start,
- addr, prev->vm_pgoff, NULL);
- else
- vma_adjust(area, addr, next->vm_end,
- next->vm_pgoff - pglen, NULL);
- return area;
- }
-
- return NULL;
- }
vma_adjust会执行具体的合并调整操作
- void vma_adjust(struct vm_area_struct *vma, unsigned long start,
- unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)
- {
- struct mm_struct *mm = vma->vm_mm;
- struct vm_area_struct *next = vma->vm_next;
- struct vm_area_struct *importer = NULL;
- struct address_space *mapping = NULL;
- struct prio_tree_root *root = NULL;
- struct file *file = vma->vm_file;
- struct anon_vma *anon_vma = NULL;
- long adjust_next = 0;
- int remove_next = 0;
-
- if (next && !insert) {
-
- if (end >= next->vm_end) {
-
- again: remove_next = 1 + (end > next->vm_end);
- end = next->vm_end;
- anon_vma = next->anon_vma;
- importer = vma;
- } else if (end > next->vm_start) {
-
-
- adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
- anon_vma = next->anon_vma;
- importer = vma;
- } else if (end < vma->vm_end) {
-
- adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT);
- anon_vma = next->anon_vma;
- importer = next;
- }
- }
-
- if (file) {
- mapping = file->f_mapping;
- if (!(vma->vm_flags & VM_NONLINEAR))
- root = &mapping->i_mmap;
- spin_lock(&mapping->i_mmap_lock);
- if (importer &&
- vma->vm_truncate_count != next->vm_truncate_count) {
-
- importer->vm_truncate_count = 0;
- }
-
- if (insert) {
- insert->vm_truncate_count = vma->vm_truncate_count;
-
- __vma_link_file(insert);
- }
- }
-
-
- if (vma->anon_vma && (insert || importer || start != vma->vm_start))
- anon_vma = vma->anon_vma;
- if (anon_vma) {
- spin_lock(&anon_vma->lock);
-
- if (importer && !importer->anon_vma) {
- importer->anon_vma = anon_vma;
- __anon_vma_link(importer);
- }
- }
-
- if (root) {
- flush_dcache_mmap_lock(mapping);
- vma_prio_tree_remove(vma, root);
- if (adjust_next)
- vma_prio_tree_remove(next, root);
- }
-
-
- vma->vm_start = start;
- vma->vm_end = end;
- vma->vm_pgoff = pgoff;
- if (adjust_next) {
- next->vm_start += adjust_next << PAGE_SHIFT;
- next->vm_pgoff += adjust_next;
- }
-
- if (root) {
- if (adjust_next)
- vma_prio_tree_insert(next, root);
- vma_prio_tree_insert(vma, root);
- flush_dcache_mmap_unlock(mapping);
- }
-
- if (remove_next) {
-
- __vma_unlink(mm, next, vma);
- if (file)
- __remove_shared_vm_struct(next, file, mapping);
- if (next->anon_vma)
- __anon_vma_merge(vma, next);
- } else if (insert) {
-
- __insert_vm_struct(mm, insert);
-
- }
-
- if (anon_vma)
- spin_unlock(&anon_vma->lock);
- if (mapping)
- spin_unlock(&mapping->i_mmap_lock);
-
- if (remove_next) {
- if (file) {
- fput(file);
- if (next->vm_flags & VM_EXECUTABLE)
- removed_exe_file_vma(mm);
- }
- mm->map_count--;
- mpol_put(vma_policy(next));
- kmem_cache_free(vm_area_cachep, next);
-
- if (remove_next == 2) {
- next = vma->vm_next;
- goto again;
- }
- }
-
- validate_mm(mm);
- }
insert_vm_struct()函数用于插入一块新区域
- int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
- {
- struct vm_area_struct * __vma, * prev;
- struct rb_node ** rb_link, * rb_parent;
-
-
- if (!vma->vm_file) {
- BUG_ON(vma->anon_vma);
- vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
- }
-
- __vma = find_vma_prepare(mm,vma->vm_start,&prev,&rb_link,&rb_parent);
- if (__vma && __vma->vm_start < vma->vm_end)
- return -ENOMEM;
- if ((vma->vm_flags & VM_ACCOUNT) &&
- security_vm_enough_memory_mm(mm, vma_pages(vma)))
- return -ENOMEM;
- vma_link(mm, vma, prev, rb_link, rb_parent);
- return 0;
- }
- static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
- struct vm_area_struct *prev, struct rb_node **rb_link,
- struct rb_node *rb_parent)
- {
- struct address_space *mapping = NULL;
-
- if (vma->vm_file)
- mapping = vma->vm_file->f_mapping;
-
- if (mapping) {
- spin_lock(&mapping->i_mmap_lock);
- vma->vm_truncate_count = mapping->truncate_count;
- }
- anon_vma_lock(vma);
-
-
- __vma_link(mm, vma, prev, rb_link, rb_parent);
- __vma_link_file(vma);
-
- anon_vma_unlock(vma);
- if (mapping)
- spin_unlock(&mapping->i_mmap_lock);
-
- mm->map_count++;
- validate_mm(mm);
- }
在创建新的vma区域之前先要寻找一块足够大小的空闲区域,该项工作由get_unmapped_area()函数完成,而实际的工作将会由mm_struct中定义的辅助函数来完成。根据进程虚拟地址空间的布局,会选择使用不同的映射函数,在这里考虑大多数系统上采用的标准函数arch_get_unmapped_area();
- unsigned long
- arch_get_unmapped_area(struct file *filp, unsigned long addr,
- unsigned long len, unsigned long pgoff, unsigned long flags)
- {
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
- unsigned long start_addr;
-
- if (len > TASK_SIZE)
- return -ENOMEM;
-
- if (flags & MAP_FIXED)
- return addr;
-
- if (addr) {
- addr = PAGE_ALIGN(addr);
- vma = find_vma(mm, addr);
-
- if (TASK_SIZE - len >= addr &&
- (!vma || addr + len <= vma->vm_start))
- return addr;
- }
-
- if (len > mm->cached_hole_size) {
- start_addr = addr = mm->free_area_cache;
- } else {
- start_addr = addr = TASK_UNMAPPED_BASE;
- mm->cached_hole_size = 0;
- }
-
- full_search:
-
- for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
-
- if (TASK_SIZE - len < addr) {
-
-
- if (start_addr != TASK_UNMAPPED_BASE) {
- addr = TASK_UNMAPPED_BASE;
- start_addr = addr;
- mm->cached_hole_size = 0;
- goto full_search;
- }
- return -ENOMEM;
- }
- if (!vma || addr + len <= vma->vm_start) {
-
- mm->free_area_cache = addr + len;
- return addr;
- }
-
- if (addr + mm->cached_hole_size < vma->vm_start)
- mm->cached_hole_size = vma->vm_start - addr;
- addr = vma->vm_end;
- }
- }
本文转自张昺华-sky博客园博客,原文链接:http://www.cnblogs.com/sky-heaven/p/5663379.html,如需转载请自行联系原作者