在 Linux 中,当App需要读取Disk文件中的数据时,Linux先分配一些内存,将数据从Disk读入到这些内存中,然后再将数据传给App。当需要往文件中写数据时,Linux先分配内存接收用户数据,然后再将数据从内存写到Disk上。Linux Cache 管理指的就是对这些由Linux分配,并用来存储文件数据的内存的管理。
参数说明:
file :就是用户层想要映射的file
addr :欲映射的起始地址,即用户层的start
prot :用户层传入的port
flag :同上
offset:同上
从这里可以知道,这里面的参数几乎均是用户层传入的参数。
1234567891011
static inline unsigned long do_mmap(struct file *file, unsigned long addr,unsigned long len, unsigned long prot,
unsigned long flag, unsigned long offset)
{
unsigned long ret = -EINVAL;
if ((offset + PAGE_ALIGN(len)) < offset) --页对齐len,检测传入参数是否有误。
goto out;
if (!(offset & ~PAGE_MASK)) --检测offset是否页对齐。映射时只能映射页对齐的长度。
ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT);
out:
return ret;
}
struct vm_area_struct {
struct mm_struct * vm_mm; /* 所属的内存描述符 */
unsigned long vm_start; /* vma的起始地址 */
unsigned long vm_end; /* vma的结束地址 */
/* 该vma的在一个进程的vma链表中的前驱vma和后驱vma指针,链表中的vma都是按地址来排序的*/
struct vm_area_struct *vm_next, *vm_prev;
pgprot_t vm_page_prot; /* vma的访问权限 */
unsigned long vm_flags; /* 标识集 */
struct rb_node vm_rb; /* 红黑树中对应的节点 */
/*
* For areas with an address space and backing store,
* linkage into the address_space->i_mmap prio tree, or
* linkage to the list of like vmas hanging off its node, or
* linkage of vma in the address_space->i_mmap_nonlinear list.
*/
/* shared联合体用于和address space关联 */
union {
struct {
struct list_head list;/* 用于链入非线性映射的链表 */
void *parent; /* aligns with prio_tree_node parent */
struct vm_area_struct *head;
} vm_set;
struct raw_prio_tree_node prio_tree_node;/*线性映射则链入i_mmap优先树*/
} shared;
/*
* A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
* list, after a COW of one of the file pages. A MAP_SHARED vma
* can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack
* or brk vma (with NULL file) can only be in an anon_vma list.
*/
/*anno_vma_node和annon_vma用于管理源自匿名映射的共享页*/
struct list_head anon_vma_node; /* Serialized by anon_vma->lock */
struct anon_vma *anon_vma; /* Serialized by page_table_lock */
/* Function pointers to deal with this struct. */
/*该vma上的各种标准操作函数指针集*/
const struct vm_operations_struct *vm_ops;
/* Information about our backing store: */
unsigned long vm_pgoff; /* 映射文件的偏移量,以PAGE_SIZE为单位 */
struct file * vm_file; /* 映射的文件,没有则为NULL */
void * vm_private_data; /* was vm_pte (shared mem) */
unsigned long vm_truncate_count;/* truncate_count or restart_addr */
#ifndef CONFIG_MMU
struct vm_region *vm_region; /* NOMMU mapping region */
#endif
#ifdef CONFIG_NUMA
struct mempolicy *vm_policy; /* NUMA policy for the VMA */
#endif
};
void vma_adjust(struct vm_area_struct *vma, unsigned long start,
unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)
{
struct mm_struct *mm = vma->vm_mm;
struct vm_area_struct *next = vma->vm_next;
struct vm_area_struct *importer = NULL;
struct address_space *mapping = NULL;
struct prio_tree_root *root = NULL;
struct file *file = vma->vm_file;
struct anon_vma *anon_vma = NULL;
long adjust_next = 0;
int remove_next = 0;
if (next && !insert) {
/*指定的范围已经跨越了整个后驱vma,并且有可能超过后驱vma*/
if (end >= next->vm_end) {
/*
* vma expands, overlapping all the next, and
* perhaps the one after too (mprotect case 6).
*/
again: remove_next = 1 + (end > next->vm_end);//确定是否超过了后驱vma
end = next->vm_end;
anon_vma = next->anon_vma;
importer = vma;
} else if (end > next->vm_start) {/*指定的区域和后驱vma部分重合*/
/*
* vma expands, overlapping part of the next:
* mprotect case 5 shifting the boundary up.
*/
adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
anon_vma = next->anon_vma;
importer = vma;
} else if (end < vma->vm_end) {/*指定的区域没到达后驱vma的结束处*/
/*
* vma shrinks, and !insert tells it's not
* split_vma inserting another: so it must be
* mprotect case 4 shifting the boundary down.
*/
adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT);
anon_vma = next->anon_vma;
importer = next;
}
}
if (file) {//如果有映射文件
mapping = file->f_mapping;//获取文件对应的address_space
if (!(vma->vm_flags & VM_NONLINEAR))
root = &mapping->i_mmap;
spin_lock(&mapping->i_mmap_lock);
if (importer &&
vma->vm_truncate_count != next->vm_truncate_count) {
/*
* unmap_mapping_range might be in progress:
* ensure that the expanding vma is rescanned.
*/
importer->vm_truncate_count = 0;
}
/*如果指定了待插入的vma,则根据vma是否以非线性的方式映射文件来选择是将
vma插入file对应的address_space的优先树(对应线性映射)还是双向链表(非线性映射)*/
if (insert) {
insert->vm_truncate_count = vma->vm_truncate_count;
/*
* Put into prio_tree now, so instantiated pages
* are visible to arm/parisc __flush_dcache_page
* throughout; but we cannot insert into address
* space until vma start or end is updated.
*/
__vma_link_file(insert);
}
}
/*
* When changing only vma->vm_end, we don't really need
* anon_vma lock.
*/
if (vma->anon_vma && (insert || importer || start != vma->vm_start))
anon_vma = vma->anon_vma;
if (anon_vma) {
spin_lock(&anon_vma->lock);
/*
* Easily overlooked: when mprotect shifts the boundary,
* make sure the expanding vma has anon_vma set if the
* shrinking vma had, to cover any anon pages imported.
*/
if (importer && !importer->anon_vma) {
importer->anon_vma = anon_vma;
__anon_vma_link(importer);//将importer插入importer的anon_vma匿名映射链表中
}
}
if (root) {
flush_dcache_mmap_lock(mapping);
vma_prio_tree_remove(vma, root);
if (adjust_next)
vma_prio_tree_remove(next, root);
}
/*调整vma的相关量*/
vma->vm_start = start;
vma->vm_end = end;
vma->vm_pgoff = pgoff;
if (adjust_next) {//调整后驱vma的相关量
next->vm_start += adjust_next << PAGE_SHIFT;
next->vm_pgoff += adjust_next;
}
if (root) {
if (adjust_next)//如果后驱vma被调整了,则重新插入到优先树中
vma_prio_tree_insert(next, root);
vma_prio_tree_insert(vma, root);//将vma插入到优先树中
flush_dcache_mmap_unlock(mapping);
}
if (remove_next) {//给定区域与后驱vma有重合
/*
* vma_merge has merged next into vma, and needs
* us to remove next before dropping the locks.
*/
__vma_unlink(mm, next, vma);//将后驱vma从红黑树中删除
if (file)//将后驱vma从文件对应的address space中删除
__remove_shared_vm_struct(next, file, mapping);
if (next->anon_vma)//将后驱vma从匿名映射链表中删除
__anon_vma_merge(vma, next);
} else if (insert) {
/*
* split_vma has split insert from vma, and needs
* us to insert it before dropping the locks
* (it may either follow vma or precede it).
*/
__insert_vm_struct(mm, insert);//将待插入的vma插入mm的红黑树,双向链表以及
//匿名映射链表
}
if (anon_vma)
spin_unlock(&anon_vma->lock);
if (mapping)
spin_unlock(&mapping->i_mmap_lock);
if (remove_next) {
if (file) {
fput(file);
if (next->vm_flags & VM_EXECUTABLE)
removed_exe_file_vma(mm);
}
mm->map_count--;
mpol_put(vma_policy(next));
kmem_cache_free(vm_area_cachep, next);
/*
* In mprotect's case 6 (see comments on vma_merge),
* we must remove another next too. It would clutter
* up the code too much to do both in one go.
*/
if (remove_next == 2) {//还有待删除的区域
next = vma->vm_next;
goto again;
}
}
validate_mm(mm);
}
int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
{
struct vm_area_struct * __vma, * prev;
struct rb_node ** rb_link, * rb_parent;
/*
* The vm_pgoff of a purely anonymous vma should be irrelevant
* until its first write fault, when page's anon_vma and index
* are set. But now set the vm_pgoff it will almost certainly
* end up with (unless mremap moves it elsewhere before that
* first wfault), so /proc/pid/maps tells a consistent story.
*
* By setting it to reflect the virtual start address of the
* vma, merges and splits can happen in a seamless way, just
* using the existing file pgoff checks and manipulations.
* Similarly in do_mmap_pgoff and in do_brk.
*/
if (!vma->vm_file) {
BUG_ON(vma->anon_vma);
vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
}
/*__vma用来保存和vma->start对应的vma(与find_vma()一样),同时获取以下信息:
1.prev用来保存对应的前驱vma
2.rb_link保存该vma区域插入对应的红黑树节点
3.rb_parent保存该vma区域对应的父节点*/
__vma = find_vma_prepare(mm,vma->vm_start,&prev,&rb_link,&rb_parent);
if (__vma && __vma->vm_start < vma->vm_end)
return -ENOMEM;
if ((vma->vm_flags & VM_ACCOUNT) &&
security_vm_enough_memory_mm(mm, vma_pages(vma)))
return -ENOMEM;
vma_link(mm, vma, prev, rb_link, rb_parent);//将vma关联到所有的数据结构中
return 0;
}