pgtable_64.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433
  1. /*
  2. * This file contains ioremap and related functions for 64-bit machines.
  3. *
  4. * Derived from arch/ppc64/mm/init.c
  5. * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
  6. *
  7. * Modifications by Paul Mackerras (PowerMac) (paulus@samba.org)
  8. * and Cort Dougan (PReP) (cort@cs.nmt.edu)
  9. * Copyright (C) 1996 Paul Mackerras
  10. *
  11. * Derived from "arch/i386/mm/init.c"
  12. * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
  13. *
  14. * Dave Engebretsen <engebret@us.ibm.com>
  15. * Rework for PPC64 port.
  16. *
  17. * This program is free software; you can redistribute it and/or
  18. * modify it under the terms of the GNU General Public License
  19. * as published by the Free Software Foundation; either version
  20. * 2 of the License, or (at your option) any later version.
  21. *
  22. */
  23. #include <linux/signal.h>
  24. #include <linux/sched.h>
  25. #include <linux/kernel.h>
  26. #include <linux/errno.h>
  27. #include <linux/string.h>
  28. #include <linux/export.h>
  29. #include <linux/types.h>
  30. #include <linux/mman.h>
  31. #include <linux/mm.h>
  32. #include <linux/swap.h>
  33. #include <linux/stddef.h>
  34. #include <linux/vmalloc.h>
  35. #include <linux/memblock.h>
  36. #include <linux/slab.h>
  37. #include <linux/hugetlb.h>
  38. #include <asm/pgalloc.h>
  39. #include <asm/page.h>
  40. #include <asm/prom.h>
  41. #include <asm/io.h>
  42. #include <asm/mmu_context.h>
  43. #include <asm/pgtable.h>
  44. #include <asm/mmu.h>
  45. #include <asm/smp.h>
  46. #include <asm/machdep.h>
  47. #include <asm/tlb.h>
  48. #include <asm/processor.h>
  49. #include <asm/cputable.h>
  50. #include <asm/sections.h>
  51. #include <asm/firmware.h>
  52. #include <asm/dma.h>
  53. #include "mmu_decl.h"
  54. #ifdef CONFIG_PPC_STD_MMU_64
  55. #if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT))
  56. #error TASK_SIZE_USER64 exceeds user VSID range
  57. #endif
  58. #endif
  59. #ifdef CONFIG_PPC_BOOK3S_64
  60. /*
  61. * partition table and process table for ISA 3.0
  62. */
  63. struct prtb_entry *process_tb;
  64. struct patb_entry *partition_tb;
  65. /*
  66. * page table size
  67. */
  68. unsigned long __pte_index_size;
  69. EXPORT_SYMBOL(__pte_index_size);
  70. unsigned long __pmd_index_size;
  71. EXPORT_SYMBOL(__pmd_index_size);
  72. unsigned long __pud_index_size;
  73. EXPORT_SYMBOL(__pud_index_size);
  74. unsigned long __pgd_index_size;
  75. EXPORT_SYMBOL(__pgd_index_size);
  76. unsigned long __pmd_cache_index;
  77. EXPORT_SYMBOL(__pmd_cache_index);
  78. unsigned long __pte_table_size;
  79. EXPORT_SYMBOL(__pte_table_size);
  80. unsigned long __pmd_table_size;
  81. EXPORT_SYMBOL(__pmd_table_size);
  82. unsigned long __pud_table_size;
  83. EXPORT_SYMBOL(__pud_table_size);
  84. unsigned long __pgd_table_size;
  85. EXPORT_SYMBOL(__pgd_table_size);
  86. unsigned long __pmd_val_bits;
  87. EXPORT_SYMBOL(__pmd_val_bits);
  88. unsigned long __pud_val_bits;
  89. EXPORT_SYMBOL(__pud_val_bits);
  90. unsigned long __pgd_val_bits;
  91. EXPORT_SYMBOL(__pgd_val_bits);
  92. unsigned long __kernel_virt_start;
  93. EXPORT_SYMBOL(__kernel_virt_start);
  94. unsigned long __kernel_virt_size;
  95. EXPORT_SYMBOL(__kernel_virt_size);
  96. unsigned long __vmalloc_start;
  97. EXPORT_SYMBOL(__vmalloc_start);
  98. unsigned long __vmalloc_end;
  99. EXPORT_SYMBOL(__vmalloc_end);
  100. struct page *vmemmap;
  101. EXPORT_SYMBOL(vmemmap);
  102. unsigned long __pte_frag_nr;
  103. EXPORT_SYMBOL(__pte_frag_nr);
  104. unsigned long __pte_frag_size_shift;
  105. EXPORT_SYMBOL(__pte_frag_size_shift);
  106. unsigned long ioremap_bot;
  107. #else /* !CONFIG_PPC_BOOK3S_64 */
  108. unsigned long ioremap_bot = IOREMAP_BASE;
  109. #endif
  110. /**
  111. * __ioremap_at - Low level function to establish the page tables
  112. * for an IO mapping
  113. */
  114. void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size,
  115. unsigned long flags)
  116. {
  117. unsigned long i;
  118. /* Make sure we have the base flags */
  119. if ((flags & _PAGE_PRESENT) == 0)
  120. flags |= pgprot_val(PAGE_KERNEL);
  121. /* We don't support the 4K PFN hack with ioremap */
  122. if (flags & H_PAGE_4K_PFN)
  123. return NULL;
  124. WARN_ON(pa & ~PAGE_MASK);
  125. WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
  126. WARN_ON(size & ~PAGE_MASK);
  127. for (i = 0; i < size; i += PAGE_SIZE)
  128. if (map_kernel_page((unsigned long)ea+i, pa+i, flags))
  129. return NULL;
  130. return (void __iomem *)ea;
  131. }
  132. /**
  133. * __iounmap_from - Low level function to tear down the page tables
  134. * for an IO mapping. This is used for mappings that
  135. * are manipulated manually, like partial unmapping of
  136. * PCI IOs or ISA space.
  137. */
  138. void __iounmap_at(void *ea, unsigned long size)
  139. {
  140. WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
  141. WARN_ON(size & ~PAGE_MASK);
  142. unmap_kernel_range((unsigned long)ea, size);
  143. }
  144. void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size,
  145. unsigned long flags, void *caller)
  146. {
  147. phys_addr_t paligned;
  148. void __iomem *ret;
  149. /*
  150. * Choose an address to map it to.
  151. * Once the imalloc system is running, we use it.
  152. * Before that, we map using addresses going
  153. * up from ioremap_bot. imalloc will use
  154. * the addresses from ioremap_bot through
  155. * IMALLOC_END
  156. *
  157. */
  158. paligned = addr & PAGE_MASK;
  159. size = PAGE_ALIGN(addr + size) - paligned;
  160. if ((size == 0) || (paligned == 0))
  161. return NULL;
  162. if (slab_is_available()) {
  163. struct vm_struct *area;
  164. area = __get_vm_area_caller(size, VM_IOREMAP,
  165. ioremap_bot, IOREMAP_END,
  166. caller);
  167. if (area == NULL)
  168. return NULL;
  169. area->phys_addr = paligned;
  170. ret = __ioremap_at(paligned, area->addr, size, flags);
  171. if (!ret)
  172. vunmap(area->addr);
  173. } else {
  174. ret = __ioremap_at(paligned, (void *)ioremap_bot, size, flags);
  175. if (ret)
  176. ioremap_bot += size;
  177. }
  178. if (ret)
  179. ret += addr & ~PAGE_MASK;
  180. return ret;
  181. }
  182. void __iomem * __ioremap(phys_addr_t addr, unsigned long size,
  183. unsigned long flags)
  184. {
  185. return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
  186. }
  187. void __iomem * ioremap(phys_addr_t addr, unsigned long size)
  188. {
  189. unsigned long flags = pgprot_val(pgprot_noncached(__pgprot(0)));
  190. void *caller = __builtin_return_address(0);
  191. if (ppc_md.ioremap)
  192. return ppc_md.ioremap(addr, size, flags, caller);
  193. return __ioremap_caller(addr, size, flags, caller);
  194. }
  195. void __iomem * ioremap_wc(phys_addr_t addr, unsigned long size)
  196. {
  197. unsigned long flags = pgprot_val(pgprot_noncached_wc(__pgprot(0)));
  198. void *caller = __builtin_return_address(0);
  199. if (ppc_md.ioremap)
  200. return ppc_md.ioremap(addr, size, flags, caller);
  201. return __ioremap_caller(addr, size, flags, caller);
  202. }
  203. void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size,
  204. unsigned long flags)
  205. {
  206. void *caller = __builtin_return_address(0);
  207. /* writeable implies dirty for kernel addresses */
  208. if (flags & _PAGE_WRITE)
  209. flags |= _PAGE_DIRTY;
  210. /* we don't want to let _PAGE_EXEC leak out */
  211. flags &= ~_PAGE_EXEC;
  212. /*
  213. * Force kernel mapping.
  214. */
  215. #if defined(CONFIG_PPC_BOOK3S_64)
  216. flags |= _PAGE_PRIVILEGED;
  217. #else
  218. flags &= ~_PAGE_USER;
  219. #endif
  220. #ifdef _PAGE_BAP_SR
  221. /* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format
  222. * which means that we just cleared supervisor access... oops ;-) This
  223. * restores it
  224. */
  225. flags |= _PAGE_BAP_SR;
  226. #endif
  227. if (ppc_md.ioremap)
  228. return ppc_md.ioremap(addr, size, flags, caller);
  229. return __ioremap_caller(addr, size, flags, caller);
  230. }
  231. /*
  232. * Unmap an IO region and remove it from imalloc'd list.
  233. * Access to IO memory should be serialized by driver.
  234. */
  235. void __iounmap(volatile void __iomem *token)
  236. {
  237. void *addr;
  238. if (!slab_is_available())
  239. return;
  240. addr = (void *) ((unsigned long __force)
  241. PCI_FIX_ADDR(token) & PAGE_MASK);
  242. if ((unsigned long)addr < ioremap_bot) {
  243. printk(KERN_WARNING "Attempt to iounmap early bolted mapping"
  244. " at 0x%p\n", addr);
  245. return;
  246. }
  247. vunmap(addr);
  248. }
  249. void iounmap(volatile void __iomem *token)
  250. {
  251. if (ppc_md.iounmap)
  252. ppc_md.iounmap(token);
  253. else
  254. __iounmap(token);
  255. }
  256. EXPORT_SYMBOL(ioremap);
  257. EXPORT_SYMBOL(ioremap_wc);
  258. EXPORT_SYMBOL(ioremap_prot);
  259. EXPORT_SYMBOL(__ioremap);
  260. EXPORT_SYMBOL(__ioremap_at);
  261. EXPORT_SYMBOL(iounmap);
  262. EXPORT_SYMBOL(__iounmap);
  263. EXPORT_SYMBOL(__iounmap_at);
  264. #ifndef __PAGETABLE_PUD_FOLDED
  265. /* 4 level page table */
  266. struct page *pgd_page(pgd_t pgd)
  267. {
  268. if (pgd_huge(pgd))
  269. return pte_page(pgd_pte(pgd));
  270. return virt_to_page(pgd_page_vaddr(pgd));
  271. }
  272. #endif
  273. struct page *pud_page(pud_t pud)
  274. {
  275. if (pud_huge(pud))
  276. return pte_page(pud_pte(pud));
  277. return virt_to_page(pud_page_vaddr(pud));
  278. }
  279. /*
  280. * For hugepage we have pfn in the pmd, we use PTE_RPN_SHIFT bits for flags
  281. * For PTE page, we have a PTE_FRAG_SIZE (4K) aligned virtual address.
  282. */
  283. struct page *pmd_page(pmd_t pmd)
  284. {
  285. if (pmd_trans_huge(pmd) || pmd_huge(pmd))
  286. return pte_page(pmd_pte(pmd));
  287. return virt_to_page(pmd_page_vaddr(pmd));
  288. }
  289. #ifdef CONFIG_PPC_64K_PAGES
  290. static pte_t *get_from_cache(struct mm_struct *mm)
  291. {
  292. void *pte_frag, *ret;
  293. spin_lock(&mm->page_table_lock);
  294. ret = mm->context.pte_frag;
  295. if (ret) {
  296. pte_frag = ret + PTE_FRAG_SIZE;
  297. /*
  298. * If we have taken up all the fragments mark PTE page NULL
  299. */
  300. if (((unsigned long)pte_frag & ~PAGE_MASK) == 0)
  301. pte_frag = NULL;
  302. mm->context.pte_frag = pte_frag;
  303. }
  304. spin_unlock(&mm->page_table_lock);
  305. return (pte_t *)ret;
  306. }
  307. static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel)
  308. {
  309. void *ret = NULL;
  310. struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
  311. if (!page)
  312. return NULL;
  313. if (!kernel && !pgtable_page_ctor(page)) {
  314. __free_page(page);
  315. return NULL;
  316. }
  317. ret = page_address(page);
  318. spin_lock(&mm->page_table_lock);
  319. /*
  320. * If we find pgtable_page set, we return
  321. * the allocated page with single fragement
  322. * count.
  323. */
  324. if (likely(!mm->context.pte_frag)) {
  325. set_page_count(page, PTE_FRAG_NR);
  326. mm->context.pte_frag = ret + PTE_FRAG_SIZE;
  327. }
  328. spin_unlock(&mm->page_table_lock);
  329. return (pte_t *)ret;
  330. }
  331. pte_t *pte_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
  332. {
  333. pte_t *pte;
  334. pte = get_from_cache(mm);
  335. if (pte)
  336. return pte;
  337. return __alloc_for_cache(mm, kernel);
  338. }
  339. #endif /* CONFIG_PPC_64K_PAGES */
  340. void pte_fragment_free(unsigned long *table, int kernel)
  341. {
  342. struct page *page = virt_to_page(table);
  343. if (put_page_testzero(page)) {
  344. if (!kernel)
  345. pgtable_page_dtor(page);
  346. free_hot_cold_page(page, 0);
  347. }
  348. }
  349. #ifdef CONFIG_SMP
  350. void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
  351. {
  352. unsigned long pgf = (unsigned long)table;
  353. BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
  354. pgf |= shift;
  355. tlb_remove_table(tlb, (void *)pgf);
  356. }
  357. void __tlb_remove_table(void *_table)
  358. {
  359. void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
  360. unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
  361. if (!shift)
  362. /* PTE page needs special handling */
  363. pte_fragment_free(table, 0);
  364. else {
  365. BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
  366. kmem_cache_free(PGT_CACHE(shift), table);
  367. }
  368. }
  369. #else
  370. void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
  371. {
  372. if (!shift) {
  373. /* PTE page needs special handling */
  374. pte_fragment_free(table, 0);
  375. } else {
  376. BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
  377. kmem_cache_free(PGT_CACHE(shift), table);
  378. }
  379. }
  380. #endif