Merge tag 'v3.5-rc2'
[cascardo/linux.git] / arch / tile / mm / hugetlbpage.c
1 /*
2  * Copyright 2010 Tilera Corporation. All Rights Reserved.
3  *
4  *   This program is free software; you can redistribute it and/or
5  *   modify it under the terms of the GNU General Public License
6  *   as published by the Free Software Foundation, version 2.
7  *
8  *   This program is distributed in the hope that it will be useful, but
9  *   WITHOUT ANY WARRANTY; without even the implied warranty of
10  *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11  *   NON INFRINGEMENT.  See the GNU General Public License for
12  *   more details.
13  *
14  * TILE Huge TLB Page Support for Kernel.
15  * Taken from i386 hugetlb implementation:
16  * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
17  */
18
19 #include <linux/init.h>
20 #include <linux/fs.h>
21 #include <linux/mm.h>
22 #include <linux/hugetlb.h>
23 #include <linux/pagemap.h>
24 #include <linux/slab.h>
25 #include <linux/err.h>
26 #include <linux/sysctl.h>
27 #include <linux/mman.h>
28 #include <asm/tlb.h>
29 #include <asm/tlbflush.h>
30 #include <asm/setup.h>
31
32 #ifdef CONFIG_HUGETLB_SUPER_PAGES
33
34 /*
35  * Provide an additional huge page size (in addition to the regular default
36  * huge page size) if no "hugepagesz" arguments are specified.
37  * Note that it must be smaller than the default huge page size so
38  * that it's possible to allocate them on demand from the buddy allocator.
39  * You can change this to 64K (on a 16K build), 256K, 1M, or 4M,
40  * or not define it at all.
41  */
42 #define ADDITIONAL_HUGE_SIZE (1024 * 1024UL)
43
44 /* "Extra" page-size multipliers, one per level of the page table. */
45 int huge_shift[HUGE_SHIFT_ENTRIES] = {
46 #ifdef ADDITIONAL_HUGE_SIZE
47 #define ADDITIONAL_HUGE_SHIFT __builtin_ctzl(ADDITIONAL_HUGE_SIZE / PAGE_SIZE)
48         [HUGE_SHIFT_PAGE] = ADDITIONAL_HUGE_SHIFT
49 #endif
50 };
51
52 /*
53  * This routine is a hybrid of pte_alloc_map() and pte_alloc_kernel().
54  * It assumes that L2 PTEs are never in HIGHMEM (we don't support that).
55  * It locks the user pagetable, and bumps up the mm->nr_ptes field,
56  * but otherwise allocate the page table using the kernel versions.
57  */
58 static pte_t *pte_alloc_hugetlb(struct mm_struct *mm, pmd_t *pmd,
59                                 unsigned long address)
60 {
61         pte_t *new;
62
63         if (pmd_none(*pmd)) {
64                 new = pte_alloc_one_kernel(mm, address);
65                 if (!new)
66                         return NULL;
67
68                 smp_wmb(); /* See comment in __pte_alloc */
69
70                 spin_lock(&mm->page_table_lock);
71                 if (likely(pmd_none(*pmd))) {  /* Has another populated it ? */
72                         mm->nr_ptes++;
73                         pmd_populate_kernel(mm, pmd, new);
74                         new = NULL;
75                 } else
76                         VM_BUG_ON(pmd_trans_splitting(*pmd));
77                 spin_unlock(&mm->page_table_lock);
78                 if (new)
79                         pte_free_kernel(mm, new);
80         }
81
82         return pte_offset_kernel(pmd, address);
83 }
84 #endif
85
86 pte_t *huge_pte_alloc(struct mm_struct *mm,
87                       unsigned long addr, unsigned long sz)
88 {
89         pgd_t *pgd;
90         pud_t *pud;
91
92         addr &= -sz;   /* Mask off any low bits in the address. */
93
94         pgd = pgd_offset(mm, addr);
95         pud = pud_alloc(mm, pgd, addr);
96
97 #ifdef CONFIG_HUGETLB_SUPER_PAGES
98         if (sz >= PGDIR_SIZE) {
99                 BUG_ON(sz != PGDIR_SIZE &&
100                        sz != PGDIR_SIZE << huge_shift[HUGE_SHIFT_PGDIR]);
101                 return (pte_t *)pud;
102         } else {
103                 pmd_t *pmd = pmd_alloc(mm, pud, addr);
104                 if (sz >= PMD_SIZE) {
105                         BUG_ON(sz != PMD_SIZE &&
106                                sz != (PMD_SIZE << huge_shift[HUGE_SHIFT_PMD]));
107                         return (pte_t *)pmd;
108                 }
109                 else {
110                         if (sz != PAGE_SIZE << huge_shift[HUGE_SHIFT_PAGE])
111                                 panic("Unexpected page size %#lx\n", sz);
112                         return pte_alloc_hugetlb(mm, pmd, addr);
113                 }
114         }
115 #else
116         BUG_ON(sz != PMD_SIZE);
117         return (pte_t *) pmd_alloc(mm, pud, addr);
118 #endif
119 }
120
121 static pte_t *get_pte(pte_t *base, int index, int level)
122 {
123         pte_t *ptep = base + index;
124 #ifdef CONFIG_HUGETLB_SUPER_PAGES
125         if (!pte_present(*ptep) && huge_shift[level] != 0) {
126                 unsigned long mask = -1UL << huge_shift[level];
127                 pte_t *super_ptep = base + (index & mask);
128                 pte_t pte = *super_ptep;
129                 if (pte_present(pte) && pte_super(pte))
130                         ptep = super_ptep;
131         }
132 #endif
133         return ptep;
134 }
135
136 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
137 {
138         pgd_t *pgd;
139         pud_t *pud;
140         pmd_t *pmd;
141 #ifdef CONFIG_HUGETLB_SUPER_PAGES
142         pte_t *pte;
143 #endif
144
145         /* Get the top-level page table entry. */
146         pgd = (pgd_t *)get_pte((pte_t *)mm->pgd, pgd_index(addr), 0);
147         if (!pgd_present(*pgd))
148                 return NULL;
149
150         /* We don't have four levels. */
151         pud = pud_offset(pgd, addr);
152 #ifndef __PAGETABLE_PUD_FOLDED
153 # error support fourth page table level
154 #endif
155
156         /* Check for an L0 huge PTE, if we have three levels. */
157 #ifndef __PAGETABLE_PMD_FOLDED
158         if (pud_huge(*pud))
159                 return (pte_t *)pud;
160
161         pmd = (pmd_t *)get_pte((pte_t *)pud_page_vaddr(*pud),
162                                pmd_index(addr), 1);
163         if (!pmd_present(*pmd))
164                 return NULL;
165 #else
166         pmd = pmd_offset(pud, addr);
167 #endif
168
169         /* Check for an L1 huge PTE. */
170         if (pmd_huge(*pmd))
171                 return (pte_t *)pmd;
172
173 #ifdef CONFIG_HUGETLB_SUPER_PAGES
174         /* Check for an L2 huge PTE. */
175         pte = get_pte((pte_t *)pmd_page_vaddr(*pmd), pte_index(addr), 2);
176         if (!pte_present(*pte))
177                 return NULL;
178         if (pte_super(*pte))
179                 return pte;
180 #endif
181
182         return NULL;
183 }
184
185 struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
186                               int write)
187 {
188         return ERR_PTR(-EINVAL);
189 }
190
191 int pmd_huge(pmd_t pmd)
192 {
193         return !!(pmd_val(pmd) & _PAGE_HUGE_PAGE);
194 }
195
196 int pud_huge(pud_t pud)
197 {
198         return !!(pud_val(pud) & _PAGE_HUGE_PAGE);
199 }
200
201 struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
202                              pmd_t *pmd, int write)
203 {
204         struct page *page;
205
206         page = pte_page(*(pte_t *)pmd);
207         if (page)
208                 page += ((address & ~PMD_MASK) >> PAGE_SHIFT);
209         return page;
210 }
211
212 struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address,
213                              pud_t *pud, int write)
214 {
215         struct page *page;
216
217         page = pte_page(*(pte_t *)pud);
218         if (page)
219                 page += ((address & ~PUD_MASK) >> PAGE_SHIFT);
220         return page;
221 }
222
223 int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
224 {
225         return 0;
226 }
227
228 #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
229 static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
230                 unsigned long addr, unsigned long len,
231                 unsigned long pgoff, unsigned long flags)
232 {
233         struct hstate *h = hstate_file(file);
234         struct mm_struct *mm = current->mm;
235         struct vm_area_struct *vma;
236         unsigned long start_addr;
237
238         if (len > mm->cached_hole_size) {
239                 start_addr = mm->free_area_cache;
240         } else {
241                 start_addr = TASK_UNMAPPED_BASE;
242                 mm->cached_hole_size = 0;
243         }
244
245 full_search:
246         addr = ALIGN(start_addr, huge_page_size(h));
247
248         for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
249                 /* At this point:  (!vma || addr < vma->vm_end). */
250                 if (TASK_SIZE - len < addr) {
251                         /*
252                          * Start a new search - just in case we missed
253                          * some holes.
254                          */
255                         if (start_addr != TASK_UNMAPPED_BASE) {
256                                 start_addr = TASK_UNMAPPED_BASE;
257                                 mm->cached_hole_size = 0;
258                                 goto full_search;
259                         }
260                         return -ENOMEM;
261                 }
262                 if (!vma || addr + len <= vma->vm_start) {
263                         mm->free_area_cache = addr + len;
264                         return addr;
265                 }
266                 if (addr + mm->cached_hole_size < vma->vm_start)
267                         mm->cached_hole_size = vma->vm_start - addr;
268                 addr = ALIGN(vma->vm_end, huge_page_size(h));
269         }
270 }
271
272 static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
273                 unsigned long addr0, unsigned long len,
274                 unsigned long pgoff, unsigned long flags)
275 {
276         struct hstate *h = hstate_file(file);
277         struct mm_struct *mm = current->mm;
278         struct vm_area_struct *vma, *prev_vma;
279         unsigned long base = mm->mmap_base, addr = addr0;
280         unsigned long largest_hole = mm->cached_hole_size;
281         int first_time = 1;
282
283         /* don't allow allocations above current base */
284         if (mm->free_area_cache > base)
285                 mm->free_area_cache = base;
286
287         if (len <= largest_hole) {
288                 largest_hole = 0;
289                 mm->free_area_cache  = base;
290         }
291 try_again:
292         /* make sure it can fit in the remaining address space */
293         if (mm->free_area_cache < len)
294                 goto fail;
295
296         /* either no address requested or can't fit in requested address hole */
297         addr = (mm->free_area_cache - len) & huge_page_mask(h);
298         do {
299                 /*
300                  * Lookup failure means no vma is above this address,
301                  * i.e. return with success:
302                  */
303                 vma = find_vma_prev(mm, addr, &prev_vma);
304                 if (!vma) {
305                         return addr;
306                         break;
307                 }
308
309                 /*
310                  * new region fits between prev_vma->vm_end and
311                  * vma->vm_start, use it:
312                  */
313                 if (addr + len <= vma->vm_start &&
314                             (!prev_vma || (addr >= prev_vma->vm_end))) {
315                         /* remember the address as a hint for next time */
316                         mm->cached_hole_size = largest_hole;
317                         mm->free_area_cache = addr;
318                         return addr;
319                 } else {
320                         /* pull free_area_cache down to the first hole */
321                         if (mm->free_area_cache == vma->vm_end) {
322                                 mm->free_area_cache = vma->vm_start;
323                                 mm->cached_hole_size = largest_hole;
324                         }
325                 }
326
327                 /* remember the largest hole we saw so far */
328                 if (addr + largest_hole < vma->vm_start)
329                         largest_hole = vma->vm_start - addr;
330
331                 /* try just below the current vma->vm_start */
332                 addr = (vma->vm_start - len) & huge_page_mask(h);
333
334         } while (len <= vma->vm_start);
335
336 fail:
337         /*
338          * if hint left us with no space for the requested
339          * mapping then try again:
340          */
341         if (first_time) {
342                 mm->free_area_cache = base;
343                 largest_hole = 0;
344                 first_time = 0;
345                 goto try_again;
346         }
347         /*
348          * A failed mmap() very likely causes application failure,
349          * so fall back to the bottom-up function here. This scenario
350          * can happen with large stack limits and large mmap()
351          * allocations.
352          */
353         mm->free_area_cache = TASK_UNMAPPED_BASE;
354         mm->cached_hole_size = ~0UL;
355         addr = hugetlb_get_unmapped_area_bottomup(file, addr0,
356                         len, pgoff, flags);
357
358         /*
359          * Restore the topdown base:
360          */
361         mm->free_area_cache = base;
362         mm->cached_hole_size = ~0UL;
363
364         return addr;
365 }
366
367 unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
368                 unsigned long len, unsigned long pgoff, unsigned long flags)
369 {
370         struct hstate *h = hstate_file(file);
371         struct mm_struct *mm = current->mm;
372         struct vm_area_struct *vma;
373
374         if (len & ~huge_page_mask(h))
375                 return -EINVAL;
376         if (len > TASK_SIZE)
377                 return -ENOMEM;
378
379         if (flags & MAP_FIXED) {
380                 if (prepare_hugepage_range(file, addr, len))
381                         return -EINVAL;
382                 return addr;
383         }
384
385         if (addr) {
386                 addr = ALIGN(addr, huge_page_size(h));
387                 vma = find_vma(mm, addr);
388                 if (TASK_SIZE - len >= addr &&
389                     (!vma || addr + len <= vma->vm_start))
390                         return addr;
391         }
392         if (current->mm->get_unmapped_area == arch_get_unmapped_area)
393                 return hugetlb_get_unmapped_area_bottomup(file, addr, len,
394                                 pgoff, flags);
395         else
396                 return hugetlb_get_unmapped_area_topdown(file, addr, len,
397                                 pgoff, flags);
398 }
399 #endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */
400
401 #ifdef CONFIG_HUGETLB_SUPER_PAGES
402 static __init int __setup_hugepagesz(unsigned long ps)
403 {
404         int log_ps = __builtin_ctzl(ps);
405         int level, base_shift;
406
407         if ((1UL << log_ps) != ps || (log_ps & 1) != 0) {
408                 pr_warn("Not enabling %ld byte huge pages;"
409                         " must be a power of four.\n", ps);
410                 return -EINVAL;
411         }
412
413         if (ps > 64*1024*1024*1024UL) {
414                 pr_warn("Not enabling %ld MB huge pages;"
415                         " largest legal value is 64 GB .\n", ps >> 20);
416                 return -EINVAL;
417         } else if (ps >= PUD_SIZE) {
418                 static long hv_jpage_size;
419                 if (hv_jpage_size == 0)
420                         hv_jpage_size = hv_sysconf(HV_SYSCONF_PAGE_SIZE_JUMBO);
421                 if (hv_jpage_size != PUD_SIZE) {
422                         pr_warn("Not enabling >= %ld MB huge pages:"
423                                 " hypervisor reports size %ld\n",
424                                 PUD_SIZE >> 20, hv_jpage_size);
425                         return -EINVAL;
426                 }
427                 level = 0;
428                 base_shift = PUD_SHIFT;
429         } else if (ps >= PMD_SIZE) {
430                 level = 1;
431                 base_shift = PMD_SHIFT;
432         } else if (ps > PAGE_SIZE) {
433                 level = 2;
434                 base_shift = PAGE_SHIFT;
435         } else {
436                 pr_err("hugepagesz: huge page size %ld too small\n", ps);
437                 return -EINVAL;
438         }
439
440         if (log_ps != base_shift) {
441                 int shift_val = log_ps - base_shift;
442                 if (huge_shift[level] != 0) {
443                         int old_shift = base_shift + huge_shift[level];
444                         pr_warn("Not enabling %ld MB huge pages;"
445                                 " already have size %ld MB.\n",
446                                 ps >> 20, (1UL << old_shift) >> 20);
447                         return -EINVAL;
448                 }
449                 if (hv_set_pte_super_shift(level, shift_val) != 0) {
450                         pr_warn("Not enabling %ld MB huge pages;"
451                                 " no hypervisor support.\n", ps >> 20);
452                         return -EINVAL;
453                 }
454                 printk(KERN_DEBUG "Enabled %ld MB huge pages\n", ps >> 20);
455                 huge_shift[level] = shift_val;
456         }
457
458         hugetlb_add_hstate(log_ps - PAGE_SHIFT);
459
460         return 0;
461 }
462
463 static bool saw_hugepagesz;
464
465 static __init int setup_hugepagesz(char *opt)
466 {
467         if (!saw_hugepagesz) {
468                 saw_hugepagesz = true;
469                 memset(huge_shift, 0, sizeof(huge_shift));
470         }
471         return __setup_hugepagesz(memparse(opt, NULL));
472 }
473 __setup("hugepagesz=", setup_hugepagesz);
474
475 #ifdef ADDITIONAL_HUGE_SIZE
476 /*
477  * Provide an additional huge page size if no "hugepagesz" args are given.
478  * In that case, all the cores have properly set up their hv super_shift
479  * already, but we need to notify the hugetlb code to enable the
480  * new huge page size from the Linux point of view.
481  */
482 static __init int add_default_hugepagesz(void)
483 {
484         if (!saw_hugepagesz) {
485                 BUILD_BUG_ON(ADDITIONAL_HUGE_SIZE >= PMD_SIZE ||
486                              ADDITIONAL_HUGE_SIZE <= PAGE_SIZE);
487                 BUILD_BUG_ON((PAGE_SIZE << ADDITIONAL_HUGE_SHIFT) !=
488                              ADDITIONAL_HUGE_SIZE);
489                 BUILD_BUG_ON(ADDITIONAL_HUGE_SHIFT & 1);
490                 hugetlb_add_hstate(ADDITIONAL_HUGE_SHIFT);
491         }
492         return 0;
493 }
494 arch_initcall(add_default_hugepagesz);
495 #endif
496
497 #endif /* CONFIG_HUGETLB_SUPER_PAGES */