entry_32.S 28 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204
  1. /*
  2. * Copyright (C) 1991,1992 Linus Torvalds
  3. *
  4. * entry_32.S contains the system-call and low-level fault and trap handling routines.
  5. *
  6. * Stack layout while running C code:
  7. * ptrace needs to have all registers on the stack.
  8. * If the order here is changed, it needs to be
  9. * updated in fork.c:copy_process(), signal.c:do_signal(),
  10. * ptrace.c and ptrace.h
  11. *
  12. * 0(%esp) - %ebx
  13. * 4(%esp) - %ecx
  14. * 8(%esp) - %edx
  15. * C(%esp) - %esi
  16. * 10(%esp) - %edi
  17. * 14(%esp) - %ebp
  18. * 18(%esp) - %eax
  19. * 1C(%esp) - %ds
  20. * 20(%esp) - %es
  21. * 24(%esp) - %fs
  22. * 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS
  23. * 2C(%esp) - orig_eax
  24. * 30(%esp) - %eip
  25. * 34(%esp) - %cs
  26. * 38(%esp) - %eflags
  27. * 3C(%esp) - %oldesp
  28. * 40(%esp) - %oldss
  29. */
  30. #include <linux/linkage.h>
  31. #include <linux/err.h>
  32. #include <asm/thread_info.h>
  33. #include <asm/irqflags.h>
  34. #include <asm/errno.h>
  35. #include <asm/segment.h>
  36. #include <asm/smp.h>
  37. #include <asm/page_types.h>
  38. #include <asm/percpu.h>
  39. #include <asm/processor-flags.h>
  40. #include <asm/ftrace.h>
  41. #include <asm/irq_vectors.h>
  42. #include <asm/cpufeatures.h>
  43. #include <asm/alternative-asm.h>
  44. #include <asm/asm.h>
  45. #include <asm/smap.h>
  46. #include <asm/export.h>
  47. .section .entry.text, "ax"
  48. /*
  49. * We use macros for low-level operations which need to be overridden
  50. * for paravirtualization. The following will never clobber any registers:
  51. * INTERRUPT_RETURN (aka. "iret")
  52. * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
  53. * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
  54. *
  55. * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
  56. * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
  57. * Allowing a register to be clobbered can shrink the paravirt replacement
  58. * enough to patch inline, increasing performance.
  59. */
  60. #ifdef CONFIG_PREEMPT
  61. # define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
  62. #else
  63. # define preempt_stop(clobbers)
  64. # define resume_kernel restore_all
  65. #endif
  66. .macro TRACE_IRQS_IRET
  67. #ifdef CONFIG_TRACE_IRQFLAGS
  68. testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off?
  69. jz 1f
  70. TRACE_IRQS_ON
  71. 1:
  72. #endif
  73. .endm
  74. /*
  75. * User gs save/restore
  76. *
  77. * %gs is used for userland TLS and kernel only uses it for stack
  78. * canary which is required to be at %gs:20 by gcc. Read the comment
  79. * at the top of stackprotector.h for more info.
  80. *
  81. * Local labels 98 and 99 are used.
  82. */
  83. #ifdef CONFIG_X86_32_LAZY_GS
  84. /* unfortunately push/pop can't be no-op */
  85. .macro PUSH_GS
  86. pushl $0
  87. .endm
  88. .macro POP_GS pop=0
  89. addl $(4 + \pop), %esp
  90. .endm
  91. .macro POP_GS_EX
  92. .endm
  93. /* all the rest are no-op */
  94. .macro PTGS_TO_GS
  95. .endm
  96. .macro PTGS_TO_GS_EX
  97. .endm
  98. .macro GS_TO_REG reg
  99. .endm
  100. .macro REG_TO_PTGS reg
  101. .endm
  102. .macro SET_KERNEL_GS reg
  103. .endm
  104. #else /* CONFIG_X86_32_LAZY_GS */
  105. .macro PUSH_GS
  106. pushl %gs
  107. .endm
  108. .macro POP_GS pop=0
  109. 98: popl %gs
  110. .if \pop <> 0
  111. add $\pop, %esp
  112. .endif
  113. .endm
  114. .macro POP_GS_EX
  115. .pushsection .fixup, "ax"
  116. 99: movl $0, (%esp)
  117. jmp 98b
  118. .popsection
  119. _ASM_EXTABLE(98b, 99b)
  120. .endm
  121. .macro PTGS_TO_GS
  122. 98: mov PT_GS(%esp), %gs
  123. .endm
  124. .macro PTGS_TO_GS_EX
  125. .pushsection .fixup, "ax"
  126. 99: movl $0, PT_GS(%esp)
  127. jmp 98b
  128. .popsection
  129. _ASM_EXTABLE(98b, 99b)
  130. .endm
  131. .macro GS_TO_REG reg
  132. movl %gs, \reg
  133. .endm
  134. .macro REG_TO_PTGS reg
  135. movl \reg, PT_GS(%esp)
  136. .endm
  137. .macro SET_KERNEL_GS reg
  138. movl $(__KERNEL_STACK_CANARY), \reg
  139. movl \reg, %gs
  140. .endm
  141. #endif /* CONFIG_X86_32_LAZY_GS */
  142. .macro SAVE_ALL pt_regs_ax=%eax
  143. cld
  144. PUSH_GS
  145. pushl %fs
  146. pushl %es
  147. pushl %ds
  148. pushl \pt_regs_ax
  149. pushl %ebp
  150. pushl %edi
  151. pushl %esi
  152. pushl %edx
  153. pushl %ecx
  154. pushl %ebx
  155. movl $(__USER_DS), %edx
  156. movl %edx, %ds
  157. movl %edx, %es
  158. movl $(__KERNEL_PERCPU), %edx
  159. movl %edx, %fs
  160. SET_KERNEL_GS %edx
  161. .endm
  162. .macro RESTORE_INT_REGS
  163. popl %ebx
  164. popl %ecx
  165. popl %edx
  166. popl %esi
  167. popl %edi
  168. popl %ebp
  169. popl %eax
  170. .endm
  171. .macro RESTORE_REGS pop=0
  172. RESTORE_INT_REGS
  173. 1: popl %ds
  174. 2: popl %es
  175. 3: popl %fs
  176. POP_GS \pop
  177. .pushsection .fixup, "ax"
  178. 4: movl $0, (%esp)
  179. jmp 1b
  180. 5: movl $0, (%esp)
  181. jmp 2b
  182. 6: movl $0, (%esp)
  183. jmp 3b
  184. .popsection
  185. _ASM_EXTABLE(1b, 4b)
  186. _ASM_EXTABLE(2b, 5b)
  187. _ASM_EXTABLE(3b, 6b)
  188. POP_GS_EX
  189. .endm
  190. /*
  191. * %eax: prev task
  192. * %edx: next task
  193. */
  194. ENTRY(__switch_to_asm)
  195. /*
  196. * Save callee-saved registers
  197. * This must match the order in struct inactive_task_frame
  198. */
  199. pushl %ebp
  200. pushl %ebx
  201. pushl %edi
  202. pushl %esi
  203. /* switch stack */
  204. movl %esp, TASK_threadsp(%eax)
  205. movl TASK_threadsp(%edx), %esp
  206. #ifdef CONFIG_CC_STACKPROTECTOR
  207. movl TASK_stack_canary(%edx), %ebx
  208. movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
  209. #endif
  210. /* restore callee-saved registers */
  211. popl %esi
  212. popl %edi
  213. popl %ebx
  214. popl %ebp
  215. jmp __switch_to
  216. END(__switch_to_asm)
  217. /*
  218. * A newly forked process directly context switches into this address.
  219. *
  220. * eax: prev task we switched from
  221. * ebx: kernel thread func (NULL for user thread)
  222. * edi: kernel thread arg
  223. */
  224. ENTRY(ret_from_fork)
  225. pushl %eax
  226. call schedule_tail
  227. popl %eax
  228. testl %ebx, %ebx
  229. jnz 1f /* kernel threads are uncommon */
  230. 2:
  231. /* When we fork, we trace the syscall return in the child, too. */
  232. movl %esp, %eax
  233. call syscall_return_slowpath
  234. jmp restore_all
  235. /* kernel thread */
  236. 1: movl %edi, %eax
  237. call *%ebx
  238. /*
  239. * A kernel thread is allowed to return here after successfully
  240. * calling do_execve(). Exit to userspace to complete the execve()
  241. * syscall.
  242. */
  243. movl $0, PT_EAX(%esp)
  244. jmp 2b
  245. END(ret_from_fork)
  246. /*
  247. * Return to user mode is not as complex as all this looks,
  248. * but we want the default path for a system call return to
  249. * go as quickly as possible which is why some of this is
  250. * less clear than it otherwise should be.
  251. */
  252. # userspace resumption stub bypassing syscall exit tracing
  253. ALIGN
  254. ret_from_exception:
  255. preempt_stop(CLBR_ANY)
  256. ret_from_intr:
  257. #ifdef CONFIG_VM86
  258. movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
  259. movb PT_CS(%esp), %al
  260. andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
  261. #else
  262. /*
  263. * We can be coming here from child spawned by kernel_thread().
  264. */
  265. movl PT_CS(%esp), %eax
  266. andl $SEGMENT_RPL_MASK, %eax
  267. #endif
  268. cmpl $USER_RPL, %eax
  269. jb resume_kernel # not returning to v8086 or userspace
  270. ENTRY(resume_userspace)
  271. DISABLE_INTERRUPTS(CLBR_ANY)
  272. TRACE_IRQS_OFF
  273. movl %esp, %eax
  274. call prepare_exit_to_usermode
  275. jmp restore_all
  276. END(ret_from_exception)
  277. #ifdef CONFIG_PREEMPT
  278. ENTRY(resume_kernel)
  279. DISABLE_INTERRUPTS(CLBR_ANY)
  280. need_resched:
  281. cmpl $0, PER_CPU_VAR(__preempt_count)
  282. jnz restore_all
  283. testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
  284. jz restore_all
  285. call preempt_schedule_irq
  286. jmp need_resched
  287. END(resume_kernel)
  288. #endif
  289. GLOBAL(__begin_SYSENTER_singlestep_region)
  290. /*
  291. * All code from here through __end_SYSENTER_singlestep_region is subject
  292. * to being single-stepped if a user program sets TF and executes SYSENTER.
  293. * There is absolutely nothing that we can do to prevent this from happening
  294. * (thanks Intel!). To keep our handling of this situation as simple as
  295. * possible, we handle TF just like AC and NT, except that our #DB handler
  296. * will ignore all of the single-step traps generated in this range.
  297. */
  298. #ifdef CONFIG_XEN
  299. /*
  300. * Xen doesn't set %esp to be precisely what the normal SYSENTER
  301. * entry point expects, so fix it up before using the normal path.
  302. */
  303. ENTRY(xen_sysenter_target)
  304. addl $5*4, %esp /* remove xen-provided frame */
  305. jmp sysenter_past_esp
  306. #endif
  307. /*
  308. * 32-bit SYSENTER entry.
  309. *
  310. * 32-bit system calls through the vDSO's __kernel_vsyscall enter here
  311. * if X86_FEATURE_SEP is available. This is the preferred system call
  312. * entry on 32-bit systems.
  313. *
  314. * The SYSENTER instruction, in principle, should *only* occur in the
  315. * vDSO. In practice, a small number of Android devices were shipped
  316. * with a copy of Bionic that inlined a SYSENTER instruction. This
  317. * never happened in any of Google's Bionic versions -- it only happened
  318. * in a narrow range of Intel-provided versions.
  319. *
  320. * SYSENTER loads SS, ESP, CS, and EIP from previously programmed MSRs.
  321. * IF and VM in RFLAGS are cleared (IOW: interrupts are off).
  322. * SYSENTER does not save anything on the stack,
  323. * and does not save old EIP (!!!), ESP, or EFLAGS.
  324. *
  325. * To avoid losing track of EFLAGS.VM (and thus potentially corrupting
  326. * user and/or vm86 state), we explicitly disable the SYSENTER
  327. * instruction in vm86 mode by reprogramming the MSRs.
  328. *
  329. * Arguments:
  330. * eax system call number
  331. * ebx arg1
  332. * ecx arg2
  333. * edx arg3
  334. * esi arg4
  335. * edi arg5
  336. * ebp user stack
  337. * 0(%ebp) arg6
  338. */
  339. ENTRY(entry_SYSENTER_32)
  340. movl TSS_sysenter_sp0(%esp), %esp
  341. sysenter_past_esp:
  342. pushl $__USER_DS /* pt_regs->ss */
  343. pushl %ebp /* pt_regs->sp (stashed in bp) */
  344. pushfl /* pt_regs->flags (except IF = 0) */
  345. orl $X86_EFLAGS_IF, (%esp) /* Fix IF */
  346. pushl $__USER_CS /* pt_regs->cs */
  347. pushl $0 /* pt_regs->ip = 0 (placeholder) */
  348. pushl %eax /* pt_regs->orig_ax */
  349. SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */
  350. /*
  351. * SYSENTER doesn't filter flags, so we need to clear NT, AC
  352. * and TF ourselves. To save a few cycles, we can check whether
  353. * either was set instead of doing an unconditional popfq.
  354. * This needs to happen before enabling interrupts so that
  355. * we don't get preempted with NT set.
  356. *
  357. * If TF is set, we will single-step all the way to here -- do_debug
  358. * will ignore all the traps. (Yes, this is slow, but so is
  359. * single-stepping in general. This allows us to avoid having
  360. * a more complicated code to handle the case where a user program
  361. * forces us to single-step through the SYSENTER entry code.)
  362. *
  363. * NB.: .Lsysenter_fix_flags is a label with the code under it moved
  364. * out-of-line as an optimization: NT is unlikely to be set in the
  365. * majority of the cases and instead of polluting the I$ unnecessarily,
  366. * we're keeping that code behind a branch which will predict as
  367. * not-taken and therefore its instructions won't be fetched.
  368. */
  369. testl $X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, PT_EFLAGS(%esp)
  370. jnz .Lsysenter_fix_flags
  371. .Lsysenter_flags_fixed:
  372. /*
  373. * User mode is traced as though IRQs are on, and SYSENTER
  374. * turned them off.
  375. */
  376. TRACE_IRQS_OFF
  377. movl %esp, %eax
  378. call do_fast_syscall_32
  379. /* XEN PV guests always use IRET path */
  380. ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
  381. "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
  382. /* Opportunistic SYSEXIT */
  383. TRACE_IRQS_ON /* User mode traces as IRQs on. */
  384. movl PT_EIP(%esp), %edx /* pt_regs->ip */
  385. movl PT_OLDESP(%esp), %ecx /* pt_regs->sp */
  386. 1: mov PT_FS(%esp), %fs
  387. PTGS_TO_GS
  388. popl %ebx /* pt_regs->bx */
  389. addl $2*4, %esp /* skip pt_regs->cx and pt_regs->dx */
  390. popl %esi /* pt_regs->si */
  391. popl %edi /* pt_regs->di */
  392. popl %ebp /* pt_regs->bp */
  393. popl %eax /* pt_regs->ax */
  394. /*
  395. * Restore all flags except IF. (We restore IF separately because
  396. * STI gives a one-instruction window in which we won't be interrupted,
  397. * whereas POPF does not.)
  398. */
  399. addl $PT_EFLAGS-PT_DS, %esp /* point esp at pt_regs->flags */
  400. btr $X86_EFLAGS_IF_BIT, (%esp)
  401. popfl
  402. /*
  403. * Return back to the vDSO, which will pop ecx and edx.
  404. * Don't bother with DS and ES (they already contain __USER_DS).
  405. */
  406. sti
  407. sysexit
  408. .pushsection .fixup, "ax"
  409. 2: movl $0, PT_FS(%esp)
  410. jmp 1b
  411. .popsection
  412. _ASM_EXTABLE(1b, 2b)
  413. PTGS_TO_GS_EX
  414. .Lsysenter_fix_flags:
  415. pushl $X86_EFLAGS_FIXED
  416. popfl
  417. jmp .Lsysenter_flags_fixed
  418. GLOBAL(__end_SYSENTER_singlestep_region)
  419. ENDPROC(entry_SYSENTER_32)
  420. /*
  421. * 32-bit legacy system call entry.
  422. *
  423. * 32-bit x86 Linux system calls traditionally used the INT $0x80
  424. * instruction. INT $0x80 lands here.
  425. *
  426. * This entry point can be used by any 32-bit perform system calls.
  427. * Instances of INT $0x80 can be found inline in various programs and
  428. * libraries. It is also used by the vDSO's __kernel_vsyscall
  429. * fallback for hardware that doesn't support a faster entry method.
  430. * Restarted 32-bit system calls also fall back to INT $0x80
  431. * regardless of what instruction was originally used to do the system
  432. * call. (64-bit programs can use INT $0x80 as well, but they can
  433. * only run on 64-bit kernels and therefore land in
  434. * entry_INT80_compat.)
  435. *
  436. * This is considered a slow path. It is not used by most libc
  437. * implementations on modern hardware except during process startup.
  438. *
  439. * Arguments:
  440. * eax system call number
  441. * ebx arg1
  442. * ecx arg2
  443. * edx arg3
  444. * esi arg4
  445. * edi arg5
  446. * ebp arg6
  447. */
  448. ENTRY(entry_INT80_32)
  449. ASM_CLAC
  450. pushl %eax /* pt_regs->orig_ax */
  451. SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */
  452. /*
  453. * User mode is traced as though IRQs are on, and the interrupt gate
  454. * turned them off.
  455. */
  456. TRACE_IRQS_OFF
  457. movl %esp, %eax
  458. call do_int80_syscall_32
  459. .Lsyscall_32_done:
  460. restore_all:
  461. TRACE_IRQS_IRET
  462. restore_all_notrace:
  463. #ifdef CONFIG_X86_ESPFIX32
  464. ALTERNATIVE "jmp restore_nocheck", "", X86_BUG_ESPFIX
  465. movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
  466. /*
  467. * Warning: PT_OLDSS(%esp) contains the wrong/random values if we
  468. * are returning to the kernel.
  469. * See comments in process.c:copy_thread() for details.
  470. */
  471. movb PT_OLDSS(%esp), %ah
  472. movb PT_CS(%esp), %al
  473. andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
  474. cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
  475. je ldt_ss # returning to user-space with LDT SS
  476. #endif
  477. restore_nocheck:
  478. RESTORE_REGS 4 # skip orig_eax/error_code
  479. irq_return:
  480. INTERRUPT_RETURN
  481. .section .fixup, "ax"
  482. ENTRY(iret_exc )
  483. pushl $0 # no error code
  484. pushl $do_iret_error
  485. jmp error_code
  486. .previous
  487. _ASM_EXTABLE(irq_return, iret_exc)
  488. #ifdef CONFIG_X86_ESPFIX32
  489. ldt_ss:
  490. /*
  491. * Setup and switch to ESPFIX stack
  492. *
  493. * We're returning to userspace with a 16 bit stack. The CPU will not
  494. * restore the high word of ESP for us on executing iret... This is an
  495. * "official" bug of all the x86-compatible CPUs, which we can work
  496. * around to make dosemu and wine happy. We do this by preloading the
  497. * high word of ESP with the high word of the userspace ESP while
  498. * compensating for the offset by changing to the ESPFIX segment with
  499. * a base address that matches for the difference.
  500. */
  501. #define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8)
  502. mov %esp, %edx /* load kernel esp */
  503. mov PT_OLDESP(%esp), %eax /* load userspace esp */
  504. mov %dx, %ax /* eax: new kernel esp */
  505. sub %eax, %edx /* offset (low word is 0) */
  506. shr $16, %edx
  507. mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */
  508. mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */
  509. pushl $__ESPFIX_SS
  510. pushl %eax /* new kernel esp */
  511. /*
  512. * Disable interrupts, but do not irqtrace this section: we
  513. * will soon execute iret and the tracer was already set to
  514. * the irqstate after the IRET:
  515. */
  516. DISABLE_INTERRUPTS(CLBR_EAX)
  517. lss (%esp), %esp /* switch to espfix segment */
  518. jmp restore_nocheck
  519. #endif
  520. ENDPROC(entry_INT80_32)
  521. .macro FIXUP_ESPFIX_STACK
  522. /*
  523. * Switch back for ESPFIX stack to the normal zerobased stack
  524. *
  525. * We can't call C functions using the ESPFIX stack. This code reads
  526. * the high word of the segment base from the GDT and swiches to the
  527. * normal stack and adjusts ESP with the matching offset.
  528. */
  529. #ifdef CONFIG_X86_ESPFIX32
  530. /* fixup the stack */
  531. mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */
  532. mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */
  533. shl $16, %eax
  534. addl %esp, %eax /* the adjusted stack pointer */
  535. pushl $__KERNEL_DS
  536. pushl %eax
  537. lss (%esp), %esp /* switch to the normal stack segment */
  538. #endif
  539. .endm
  540. .macro UNWIND_ESPFIX_STACK
  541. #ifdef CONFIG_X86_ESPFIX32
  542. movl %ss, %eax
  543. /* see if on espfix stack */
  544. cmpw $__ESPFIX_SS, %ax
  545. jne 27f
  546. movl $__KERNEL_DS, %eax
  547. movl %eax, %ds
  548. movl %eax, %es
  549. /* switch to normal stack */
  550. FIXUP_ESPFIX_STACK
  551. 27:
  552. #endif
  553. .endm
  554. /*
  555. * Build the entry stubs with some assembler magic.
  556. * We pack 1 stub into every 8-byte block.
  557. */
  558. .align 8
  559. ENTRY(irq_entries_start)
  560. vector=FIRST_EXTERNAL_VECTOR
  561. .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
  562. pushl $(~vector+0x80) /* Note: always in signed byte range */
  563. vector=vector+1
  564. jmp common_interrupt
  565. .align 8
  566. .endr
  567. END(irq_entries_start)
  568. /*
  569. * the CPU automatically disables interrupts when executing an IRQ vector,
  570. * so IRQ-flags tracing has to follow that:
  571. */
  572. .p2align CONFIG_X86_L1_CACHE_SHIFT
  573. common_interrupt:
  574. ASM_CLAC
  575. addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */
  576. SAVE_ALL
  577. TRACE_IRQS_OFF
  578. movl %esp, %eax
  579. call do_IRQ
  580. jmp ret_from_intr
  581. ENDPROC(common_interrupt)
  582. #define BUILD_INTERRUPT3(name, nr, fn) \
  583. ENTRY(name) \
  584. ASM_CLAC; \
  585. pushl $~(nr); \
  586. SAVE_ALL; \
  587. TRACE_IRQS_OFF \
  588. movl %esp, %eax; \
  589. call fn; \
  590. jmp ret_from_intr; \
  591. ENDPROC(name)
  592. #ifdef CONFIG_TRACING
  593. # define TRACE_BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(trace_##name, nr, smp_trace_##name)
  594. #else
  595. # define TRACE_BUILD_INTERRUPT(name, nr)
  596. #endif
  597. #define BUILD_INTERRUPT(name, nr) \
  598. BUILD_INTERRUPT3(name, nr, smp_##name); \
  599. TRACE_BUILD_INTERRUPT(name, nr)
  600. /* The include is where all of the SMP etc. interrupts come from */
  601. #include <asm/entry_arch.h>
  602. ENTRY(coprocessor_error)
  603. ASM_CLAC
  604. pushl $0
  605. pushl $do_coprocessor_error
  606. jmp error_code
  607. END(coprocessor_error)
  608. ENTRY(simd_coprocessor_error)
  609. ASM_CLAC
  610. pushl $0
  611. #ifdef CONFIG_X86_INVD_BUG
  612. /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
  613. ALTERNATIVE "pushl $do_general_protection", \
  614. "pushl $do_simd_coprocessor_error", \
  615. X86_FEATURE_XMM
  616. #else
  617. pushl $do_simd_coprocessor_error
  618. #endif
  619. jmp error_code
  620. END(simd_coprocessor_error)
  621. ENTRY(device_not_available)
  622. ASM_CLAC
  623. pushl $-1 # mark this as an int
  624. pushl $do_device_not_available
  625. jmp error_code
  626. END(device_not_available)
  627. #ifdef CONFIG_PARAVIRT
  628. ENTRY(native_iret)
  629. iret
  630. _ASM_EXTABLE(native_iret, iret_exc)
  631. END(native_iret)
  632. #endif
  633. ENTRY(overflow)
  634. ASM_CLAC
  635. pushl $0
  636. pushl $do_overflow
  637. jmp error_code
  638. END(overflow)
  639. ENTRY(bounds)
  640. ASM_CLAC
  641. pushl $0
  642. pushl $do_bounds
  643. jmp error_code
  644. END(bounds)
  645. ENTRY(invalid_op)
  646. ASM_CLAC
  647. pushl $0
  648. pushl $do_invalid_op
  649. jmp error_code
  650. END(invalid_op)
  651. ENTRY(coprocessor_segment_overrun)
  652. ASM_CLAC
  653. pushl $0
  654. pushl $do_coprocessor_segment_overrun
  655. jmp error_code
  656. END(coprocessor_segment_overrun)
  657. ENTRY(invalid_TSS)
  658. ASM_CLAC
  659. pushl $do_invalid_TSS
  660. jmp error_code
  661. END(invalid_TSS)
  662. ENTRY(segment_not_present)
  663. ASM_CLAC
  664. pushl $do_segment_not_present
  665. jmp error_code
  666. END(segment_not_present)
  667. ENTRY(stack_segment)
  668. ASM_CLAC
  669. pushl $do_stack_segment
  670. jmp error_code
  671. END(stack_segment)
  672. ENTRY(alignment_check)
  673. ASM_CLAC
  674. pushl $do_alignment_check
  675. jmp error_code
  676. END(alignment_check)
  677. ENTRY(divide_error)
  678. ASM_CLAC
  679. pushl $0 # no error code
  680. pushl $do_divide_error
  681. jmp error_code
  682. END(divide_error)
  683. #ifdef CONFIG_X86_MCE
  684. ENTRY(machine_check)
  685. ASM_CLAC
  686. pushl $0
  687. pushl machine_check_vector
  688. jmp error_code
  689. END(machine_check)
  690. #endif
  691. ENTRY(spurious_interrupt_bug)
  692. ASM_CLAC
  693. pushl $0
  694. pushl $do_spurious_interrupt_bug
  695. jmp error_code
  696. END(spurious_interrupt_bug)
  697. #ifdef CONFIG_XEN
  698. ENTRY(xen_hypervisor_callback)
  699. pushl $-1 /* orig_ax = -1 => not a system call */
  700. SAVE_ALL
  701. TRACE_IRQS_OFF
  702. /*
  703. * Check to see if we got the event in the critical
  704. * region in xen_iret_direct, after we've reenabled
  705. * events and checked for pending events. This simulates
  706. * iret instruction's behaviour where it delivers a
  707. * pending interrupt when enabling interrupts:
  708. */
  709. movl PT_EIP(%esp), %eax
  710. cmpl $xen_iret_start_crit, %eax
  711. jb 1f
  712. cmpl $xen_iret_end_crit, %eax
  713. jae 1f
  714. jmp xen_iret_crit_fixup
  715. ENTRY(xen_do_upcall)
  716. 1: mov %esp, %eax
  717. call xen_evtchn_do_upcall
  718. #ifndef CONFIG_PREEMPT
  719. call xen_maybe_preempt_hcall
  720. #endif
  721. jmp ret_from_intr
  722. ENDPROC(xen_hypervisor_callback)
  723. /*
  724. * Hypervisor uses this for application faults while it executes.
  725. * We get here for two reasons:
  726. * 1. Fault while reloading DS, ES, FS or GS
  727. * 2. Fault while executing IRET
  728. * Category 1 we fix up by reattempting the load, and zeroing the segment
  729. * register if the load fails.
  730. * Category 2 we fix up by jumping to do_iret_error. We cannot use the
  731. * normal Linux return path in this case because if we use the IRET hypercall
  732. * to pop the stack frame we end up in an infinite loop of failsafe callbacks.
  733. * We distinguish between categories by maintaining a status value in EAX.
  734. */
  735. ENTRY(xen_failsafe_callback)
  736. pushl %eax
  737. movl $1, %eax
  738. 1: mov 4(%esp), %ds
  739. 2: mov 8(%esp), %es
  740. 3: mov 12(%esp), %fs
  741. 4: mov 16(%esp), %gs
  742. /* EAX == 0 => Category 1 (Bad segment)
  743. EAX != 0 => Category 2 (Bad IRET) */
  744. testl %eax, %eax
  745. popl %eax
  746. lea 16(%esp), %esp
  747. jz 5f
  748. jmp iret_exc
  749. 5: pushl $-1 /* orig_ax = -1 => not a system call */
  750. SAVE_ALL
  751. jmp ret_from_exception
  752. .section .fixup, "ax"
  753. 6: xorl %eax, %eax
  754. movl %eax, 4(%esp)
  755. jmp 1b
  756. 7: xorl %eax, %eax
  757. movl %eax, 8(%esp)
  758. jmp 2b
  759. 8: xorl %eax, %eax
  760. movl %eax, 12(%esp)
  761. jmp 3b
  762. 9: xorl %eax, %eax
  763. movl %eax, 16(%esp)
  764. jmp 4b
  765. .previous
  766. _ASM_EXTABLE(1b, 6b)
  767. _ASM_EXTABLE(2b, 7b)
  768. _ASM_EXTABLE(3b, 8b)
  769. _ASM_EXTABLE(4b, 9b)
  770. ENDPROC(xen_failsafe_callback)
  771. BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
  772. xen_evtchn_do_upcall)
  773. #endif /* CONFIG_XEN */
  774. #if IS_ENABLED(CONFIG_HYPERV)
  775. BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
  776. hyperv_vector_handler)
  777. #endif /* CONFIG_HYPERV */
  778. #ifdef CONFIG_FUNCTION_TRACER
  779. #ifdef CONFIG_DYNAMIC_FTRACE
  780. ENTRY(mcount)
  781. ret
  782. END(mcount)
  783. ENTRY(ftrace_caller)
  784. pushl %eax
  785. pushl %ecx
  786. pushl %edx
  787. pushl $0 /* Pass NULL as regs pointer */
  788. movl 4*4(%esp), %eax
  789. movl 0x4(%ebp), %edx
  790. movl function_trace_op, %ecx
  791. subl $MCOUNT_INSN_SIZE, %eax
  792. .globl ftrace_call
  793. ftrace_call:
  794. call ftrace_stub
  795. addl $4, %esp /* skip NULL pointer */
  796. popl %edx
  797. popl %ecx
  798. popl %eax
  799. ftrace_ret:
  800. #ifdef CONFIG_FUNCTION_GRAPH_TRACER
  801. .globl ftrace_graph_call
  802. ftrace_graph_call:
  803. jmp ftrace_stub
  804. #endif
  805. /* This is weak to keep gas from relaxing the jumps */
  806. WEAK(ftrace_stub)
  807. ret
  808. END(ftrace_caller)
  809. ENTRY(ftrace_regs_caller)
  810. pushf /* push flags before compare (in cs location) */
  811. /*
  812. * i386 does not save SS and ESP when coming from kernel.
  813. * Instead, to get sp, &regs->sp is used (see ptrace.h).
  814. * Unfortunately, that means eflags must be at the same location
  815. * as the current return ip is. We move the return ip into the
  816. * ip location, and move flags into the return ip location.
  817. */
  818. pushl 4(%esp) /* save return ip into ip slot */
  819. pushl $0 /* Load 0 into orig_ax */
  820. pushl %gs
  821. pushl %fs
  822. pushl %es
  823. pushl %ds
  824. pushl %eax
  825. pushl %ebp
  826. pushl %edi
  827. pushl %esi
  828. pushl %edx
  829. pushl %ecx
  830. pushl %ebx
  831. movl 13*4(%esp), %eax /* Get the saved flags */
  832. movl %eax, 14*4(%esp) /* Move saved flags into regs->flags location */
  833. /* clobbering return ip */
  834. movl $__KERNEL_CS, 13*4(%esp)
  835. movl 12*4(%esp), %eax /* Load ip (1st parameter) */
  836. subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */
  837. movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */
  838. movl function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */
  839. pushl %esp /* Save pt_regs as 4th parameter */
  840. GLOBAL(ftrace_regs_call)
  841. call ftrace_stub
  842. addl $4, %esp /* Skip pt_regs */
  843. movl 14*4(%esp), %eax /* Move flags back into cs */
  844. movl %eax, 13*4(%esp) /* Needed to keep addl from modifying flags */
  845. movl 12*4(%esp), %eax /* Get return ip from regs->ip */
  846. movl %eax, 14*4(%esp) /* Put return ip back for ret */
  847. popl %ebx
  848. popl %ecx
  849. popl %edx
  850. popl %esi
  851. popl %edi
  852. popl %ebp
  853. popl %eax
  854. popl %ds
  855. popl %es
  856. popl %fs
  857. popl %gs
  858. addl $8, %esp /* Skip orig_ax and ip */
  859. popf /* Pop flags at end (no addl to corrupt flags) */
  860. jmp ftrace_ret
  861. popf
  862. jmp ftrace_stub
  863. #else /* ! CONFIG_DYNAMIC_FTRACE */
  864. ENTRY(mcount)
  865. cmpl $__PAGE_OFFSET, %esp
  866. jb ftrace_stub /* Paging not enabled yet? */
  867. cmpl $ftrace_stub, ftrace_trace_function
  868. jnz trace
  869. #ifdef CONFIG_FUNCTION_GRAPH_TRACER
  870. cmpl $ftrace_stub, ftrace_graph_return
  871. jnz ftrace_graph_caller
  872. cmpl $ftrace_graph_entry_stub, ftrace_graph_entry
  873. jnz ftrace_graph_caller
  874. #endif
  875. .globl ftrace_stub
  876. ftrace_stub:
  877. ret
  878. /* taken from glibc */
  879. trace:
  880. pushl %eax
  881. pushl %ecx
  882. pushl %edx
  883. movl 0xc(%esp), %eax
  884. movl 0x4(%ebp), %edx
  885. subl $MCOUNT_INSN_SIZE, %eax
  886. call *ftrace_trace_function
  887. popl %edx
  888. popl %ecx
  889. popl %eax
  890. jmp ftrace_stub
  891. END(mcount)
  892. #endif /* CONFIG_DYNAMIC_FTRACE */
  893. EXPORT_SYMBOL(mcount)
  894. #endif /* CONFIG_FUNCTION_TRACER */
  895. #ifdef CONFIG_FUNCTION_GRAPH_TRACER
  896. ENTRY(ftrace_graph_caller)
  897. pushl %eax
  898. pushl %ecx
  899. pushl %edx
  900. movl 0xc(%esp), %eax
  901. lea 0x4(%ebp), %edx
  902. movl (%ebp), %ecx
  903. subl $MCOUNT_INSN_SIZE, %eax
  904. call prepare_ftrace_return
  905. popl %edx
  906. popl %ecx
  907. popl %eax
  908. ret
  909. END(ftrace_graph_caller)
  910. .globl return_to_handler
  911. return_to_handler:
  912. pushl %eax
  913. pushl %edx
  914. movl %ebp, %eax
  915. call ftrace_return_to_handler
  916. movl %eax, %ecx
  917. popl %edx
  918. popl %eax
  919. jmp *%ecx
  920. #endif
  921. #ifdef CONFIG_TRACING
  922. ENTRY(trace_page_fault)
  923. ASM_CLAC
  924. pushl $trace_do_page_fault
  925. jmp error_code
  926. END(trace_page_fault)
  927. #endif
  928. ENTRY(page_fault)
  929. ASM_CLAC
  930. pushl $do_page_fault
  931. ALIGN
  932. error_code:
  933. /* the function address is in %gs's slot on the stack */
  934. pushl %fs
  935. pushl %es
  936. pushl %ds
  937. pushl %eax
  938. pushl %ebp
  939. pushl %edi
  940. pushl %esi
  941. pushl %edx
  942. pushl %ecx
  943. pushl %ebx
  944. cld
  945. movl $(__KERNEL_PERCPU), %ecx
  946. movl %ecx, %fs
  947. UNWIND_ESPFIX_STACK
  948. GS_TO_REG %ecx
  949. movl PT_GS(%esp), %edi # get the function address
  950. movl PT_ORIG_EAX(%esp), %edx # get the error code
  951. movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
  952. REG_TO_PTGS %ecx
  953. SET_KERNEL_GS %ecx
  954. movl $(__USER_DS), %ecx
  955. movl %ecx, %ds
  956. movl %ecx, %es
  957. TRACE_IRQS_OFF
  958. movl %esp, %eax # pt_regs pointer
  959. call *%edi
  960. jmp ret_from_exception
  961. END(page_fault)
  962. ENTRY(debug)
  963. /*
  964. * #DB can happen at the first instruction of
  965. * entry_SYSENTER_32 or in Xen's SYSENTER prologue. If this
  966. * happens, then we will be running on a very small stack. We
  967. * need to detect this condition and switch to the thread
  968. * stack before calling any C code at all.
  969. *
  970. * If you edit this code, keep in mind that NMIs can happen in here.
  971. */
  972. ASM_CLAC
  973. pushl $-1 # mark this as an int
  974. SAVE_ALL
  975. xorl %edx, %edx # error code 0
  976. movl %esp, %eax # pt_regs pointer
  977. /* Are we currently on the SYSENTER stack? */
  978. PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
  979. subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
  980. cmpl $SIZEOF_SYSENTER_stack, %ecx
  981. jb .Ldebug_from_sysenter_stack
  982. TRACE_IRQS_OFF
  983. call do_debug
  984. jmp ret_from_exception
  985. .Ldebug_from_sysenter_stack:
  986. /* We're on the SYSENTER stack. Switch off. */
  987. movl %esp, %ebp
  988. movl PER_CPU_VAR(cpu_current_top_of_stack), %esp
  989. TRACE_IRQS_OFF
  990. call do_debug
  991. movl %ebp, %esp
  992. jmp ret_from_exception
  993. END(debug)
  994. /*
  995. * NMI is doubly nasty. It can happen on the first instruction of
  996. * entry_SYSENTER_32 (just like #DB), but it can also interrupt the beginning
  997. * of the #DB handler even if that #DB in turn hit before entry_SYSENTER_32
  998. * switched stacks. We handle both conditions by simply checking whether we
  999. * interrupted kernel code running on the SYSENTER stack.
  1000. */
  1001. ENTRY(nmi)
  1002. ASM_CLAC
  1003. #ifdef CONFIG_X86_ESPFIX32
  1004. pushl %eax
  1005. movl %ss, %eax
  1006. cmpw $__ESPFIX_SS, %ax
  1007. popl %eax
  1008. je nmi_espfix_stack
  1009. #endif
  1010. pushl %eax # pt_regs->orig_ax
  1011. SAVE_ALL
  1012. xorl %edx, %edx # zero error code
  1013. movl %esp, %eax # pt_regs pointer
  1014. /* Are we currently on the SYSENTER stack? */
  1015. PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
  1016. subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
  1017. cmpl $SIZEOF_SYSENTER_stack, %ecx
  1018. jb .Lnmi_from_sysenter_stack
  1019. /* Not on SYSENTER stack. */
  1020. call do_nmi
  1021. jmp restore_all_notrace
  1022. .Lnmi_from_sysenter_stack:
  1023. /*
  1024. * We're on the SYSENTER stack. Switch off. No one (not even debug)
  1025. * is using the thread stack right now, so it's safe for us to use it.
  1026. */
  1027. movl %esp, %ebp
  1028. movl PER_CPU_VAR(cpu_current_top_of_stack), %esp
  1029. call do_nmi
  1030. movl %ebp, %esp
  1031. jmp restore_all_notrace
  1032. #ifdef CONFIG_X86_ESPFIX32
  1033. nmi_espfix_stack:
  1034. /*
  1035. * create the pointer to lss back
  1036. */
  1037. pushl %ss
  1038. pushl %esp
  1039. addl $4, (%esp)
  1040. /* copy the iret frame of 12 bytes */
  1041. .rept 3
  1042. pushl 16(%esp)
  1043. .endr
  1044. pushl %eax
  1045. SAVE_ALL
  1046. FIXUP_ESPFIX_STACK # %eax == %esp
  1047. xorl %edx, %edx # zero error code
  1048. call do_nmi
  1049. RESTORE_REGS
  1050. lss 12+4(%esp), %esp # back to espfix stack
  1051. jmp irq_return
  1052. #endif
  1053. END(nmi)
  1054. ENTRY(int3)
  1055. ASM_CLAC
  1056. pushl $-1 # mark this as an int
  1057. SAVE_ALL
  1058. TRACE_IRQS_OFF
  1059. xorl %edx, %edx # zero error code
  1060. movl %esp, %eax # pt_regs pointer
  1061. call do_int3
  1062. jmp ret_from_exception
  1063. END(int3)
  1064. ENTRY(general_protection)
  1065. pushl $do_general_protection
  1066. jmp error_code
  1067. END(general_protection)
  1068. #ifdef CONFIG_KVM_GUEST
  1069. ENTRY(async_page_fault)
  1070. ASM_CLAC
  1071. pushl $do_async_page_fault
  1072. jmp error_code
  1073. END(async_page_fault)
  1074. #endif
  1075. ENTRY(rewind_stack_do_exit)
  1076. /* Prevent any naive code from trying to unwind to our caller. */
  1077. xorl %ebp, %ebp
  1078. movl PER_CPU_VAR(cpu_current_top_of_stack), %esi
  1079. leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp
  1080. call do_exit
  1081. 1: jmp 1b
  1082. END(rewind_stack_do_exit)