From 51b26ada79b605ed709ddcedbb6012e8f8e0ebed Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 26 Apr 2009 10:12:47 -0700 Subject: x86: unify arch/x86/boot/compressed/vmlinux_*.lds Look at the: diff -u arch/x86/boot/compressed/vmlinux_*.lds output and realize that they're basially exactly the same except for trivial naming differences, and the fact that the 64-bit version has a "pgtable" thing. So unify them. There's some trivial cleanup there (make the output format a Kconfig thing rather than doing #ifdef's for it, and unify both 32-bit and 64-bit BSS end to "_ebss", where 32-bit used to use the traditional "_end"), but other than that it's really very mindless and straigt conversion. For example, I think we should aim to remove "startup_32" vs "startup_64", and just call it "startup", and get rid of one more difference. I didn't do that. Also, notice the comment in the unified vmlinux.lds.S talks about "head_64" and "startup_32" which is an odd and incorrect mix, but that was actually what the old 64-bit only lds file had, so the confusion isn't new, and now that mixing is arguably more accurate thanks to the vmlinux.lds.S file being shared between the two cases ;) [ Impact: cleanup, unification ] Signed-off-by: Linus Torvalds Acked-by: Sam Ravnborg Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/head_32.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/boot/compressed/head_32.S') diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 3a8a866fb2e..85bd3285706 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -88,9 +88,9 @@ ENTRY(startup_32) * where decompression in place becomes safe. */ pushl %esi - leal _end(%ebp), %esi - leal _end(%ebx), %edi - movl $(_end - startup_32), %ecx + leal _ebss(%ebp), %esi + leal _ebss(%ebx), %edi + movl $(_ebss - startup_32), %ecx std rep movsb @@ -121,7 +121,7 @@ relocated: */ xorl %eax,%eax leal _edata(%ebx),%edi - leal _end(%ebx), %ecx + leal _ebss(%ebx), %ecx subl %edi,%ecx cld rep -- cgit v1.2.3 From bd2a36984c50bb546a7d04cb395fddcf98a1092c Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 5 May 2009 23:24:50 -0700 Subject: x86, boot: use BP_scratch in arch/x86/boot/compressed/head_*.S Use the BP_scratch symbol from asm-offsets.h instead of hard-coding the location. [ Impact: cleanup ] Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/head_32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/boot/compressed/head_32.S') diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 85bd3285706..e3398f3d1b3 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -53,7 +53,7 @@ ENTRY(startup_32) * data at 0x1e4 (defined as a scratch field) are used as the stack * for this calculation. Only 4 bytes are needed. */ - leal (0x1e4+4)(%esi), %esp + leal (BP_scratch+4)(%esi), %esp call 1f 1: popl %ebp subl $1b, %ebp -- cgit v1.2.3 From 5f64ec64e7f9b246c0a94f34cdf7782f98a6e55d Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 8 May 2009 15:45:17 -0700 Subject: x86, boot: stylistic cleanups for boot/compressed/head_32.S Reformat arch/x86/boot/compressed/head_32.S to be closer to currently preferred kernel assembly style, that is: - opcode and operand separated by tab - operands separated by ", " - C-style comments This also makes it more similar to head_64.S. [ Impact: cleanup, no object code change ] Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/head_32.S | 170 +++++++++++++++++++------------------ 1 file changed, 89 insertions(+), 81 deletions(-) (limited to 'arch/x86/boot/compressed/head_32.S') diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index e3398f3d1b3..7bd7766ffab 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -12,16 +12,16 @@ * the page directory. [According to comments etc elsewhere on a compressed * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC] * - * Page 0 is deliberately kept safe, since System Management Mode code in + * Page 0 is deliberately kept safe, since System Management Mode code in * laptops may need to access the BIOS data stored there. This is also - * useful for future device drivers that either access the BIOS via VM86 + * useful for future device drivers that either access the BIOS via VM86 * mode. */ /* * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 */ -.text + .text #include #include @@ -29,75 +29,80 @@ #include #include -.section ".text.head","ax",@progbits + .section ".text.head","ax",@progbits ENTRY(startup_32) cld - /* test KEEP_SEGMENTS flag to see if the bootloader is asking - * us to not reload segments */ - testb $(1<<6), BP_loadflags(%esi) - jnz 1f + /* + * Test KEEP_SEGMENTS flag to see if the bootloader is asking + * us to not reload segments + */ + testb $(1<<6), BP_loadflags(%esi) + jnz 1f cli - movl $(__BOOT_DS),%eax - movl %eax,%ds - movl %eax,%es - movl %eax,%fs - movl %eax,%gs - movl %eax,%ss + movl $__BOOT_DS, %eax + movl %eax, %ds + movl %eax, %es + movl %eax, %fs + movl %eax, %gs + movl %eax, %ss 1: -/* Calculate the delta between where we were compiled to run +/* + * Calculate the delta between where we were compiled to run * at and where we were actually loaded at. This can only be done * with a short local call on x86. Nothing else will tell us what * address we are running at. The reserved chunk of the real-mode * data at 0x1e4 (defined as a scratch field) are used as the stack * for this calculation. Only 4 bytes are needed. */ - leal (BP_scratch+4)(%esi), %esp - call 1f -1: popl %ebp - subl $1b, %ebp + leal (BP_scratch+4)(%esi), %esp + call 1f +1: popl %ebp + subl $1b, %ebp -/* %ebp contains the address we are loaded at by the boot loader and %ebx +/* + * %ebp contains the address we are loaded at by the boot loader and %ebx * contains the address where we should move the kernel image temporarily * for safe in-place decompression. */ #ifdef CONFIG_RELOCATABLE - movl %ebp, %ebx + movl %ebp, %ebx addl $(CONFIG_PHYSICAL_ALIGN - 1), %ebx andl $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebx #else - movl $LOAD_PHYSICAL_ADDR, %ebx + movl $LOAD_PHYSICAL_ADDR, %ebx #endif /* Replace the compressed data size with the uncompressed size */ - subl input_len(%ebp), %ebx - movl output_len(%ebp), %eax - addl %eax, %ebx + subl input_len(%ebp), %ebx + movl output_len(%ebp), %eax + addl %eax, %ebx /* Add 8 bytes for every 32K input block */ - shrl $12, %eax - addl %eax, %ebx + shrl $12, %eax + addl %eax, %ebx /* Add 32K + 18 bytes of extra slack */ - addl $(32768 + 18), %ebx + addl $(32768 + 18), %ebx /* Align on a 4K boundary */ - addl $4095, %ebx - andl $~4095, %ebx + addl $4095, %ebx + andl $~4095, %ebx -/* Copy the compressed kernel to the end of our buffer +/* + * Copy the compressed kernel to the end of our buffer * where decompression in place becomes safe. */ - pushl %esi - leal _ebss(%ebp), %esi - leal _ebss(%ebx), %edi - movl $(_ebss - startup_32), %ecx + pushl %esi + leal _ebss(%ebp), %esi + leal _ebss(%ebx), %edi + movl $(_ebss - startup_32), %ecx std - rep - movsb + rep movsb cld - popl %esi + popl %esi -/* Compute the kernel start address. +/* + * Compute the kernel start address. */ #ifdef CONFIG_RELOCATABLE addl $(CONFIG_PHYSICAL_ALIGN - 1), %ebp @@ -109,81 +114,84 @@ ENTRY(startup_32) /* * Jump to the relocated address. */ - leal relocated(%ebx), %eax - jmp *%eax + leal relocated(%ebx), %eax + jmp *%eax ENDPROC(startup_32) -.section ".text" + .text relocated: /* * Clear BSS */ - xorl %eax,%eax - leal _edata(%ebx),%edi - leal _ebss(%ebx), %ecx - subl %edi,%ecx + xorl %eax, %eax + leal _edata(%ebx), %edi + leal _ebss(%ebx), %ecx + subl %edi, %ecx cld - rep - stosb + rep stosb /* * Setup the stack for the decompressor */ - leal boot_stack_end(%ebx), %esp + leal boot_stack_end(%ebx), %esp /* * Do the decompression, and jump to the new kernel.. */ - movl output_len(%ebx), %eax - pushl %eax - # push arguments for decompress_kernel: - pushl %ebp # output address - movl input_len(%ebx), %eax - pushl %eax # input_len - leal input_data(%ebx), %eax - pushl %eax # input_data - leal boot_heap(%ebx), %eax - pushl %eax # heap area - pushl %esi # real mode pointer - call decompress_kernel - addl $20, %esp - popl %ecx + movl output_len(%ebx), %eax + pushl %eax + /* push arguments for decompress_kernel: */ + pushl %ebp /* output address */ + movl input_len(%ebx), %eax + pushl %eax /* input_len */ + leal input_data(%ebx), %eax + pushl %eax /* input_data */ + leal boot_heap(%ebx), %eax + pushl %eax /* heap area */ + pushl %esi /* real mode pointer */ + call decompress_kernel + addl $20, %esp + popl %ecx #if CONFIG_RELOCATABLE -/* Find the address of the relocations. +/* + * Find the address of the relocations. */ - movl %ebp, %edi - addl %ecx, %edi + movl %ebp, %edi + addl %ecx, %edi -/* Calculate the delta between where vmlinux was compiled to run +/* + * Calculate the delta between where vmlinux was compiled to run * and where it was actually loaded. */ - movl %ebp, %ebx - subl $LOAD_PHYSICAL_ADDR, %ebx - jz 2f /* Nothing to be done if loaded at compiled addr. */ + movl %ebp, %ebx + subl $LOAD_PHYSICAL_ADDR, %ebx + jz 2f /* Nothing to be done if loaded at compiled addr. */ /* * Process relocations. */ -1: subl $4, %edi - movl 0(%edi), %ecx - testl %ecx, %ecx - jz 2f - addl %ebx, -__PAGE_OFFSET(%ebx, %ecx) - jmp 1b +1: subl $4, %edi + movl (%edi), %ecx + testl %ecx, %ecx + jz 2f + addl %ebx, -__PAGE_OFFSET(%ebx, %ecx) + jmp 1b 2: #endif /* * Jump to the decompressed kernel. */ - xorl %ebx,%ebx - jmp *%ebp + xorl %ebx, %ebx + jmp *%ebp -.bss -/* Stack and heap for uncompression */ -.balign 4 +/* + * Stack and heap for uncompression + */ + .bss + .balign 4 boot_heap: .fill BOOT_HEAP_SIZE, 1, 0 boot_stack: -- cgit v1.2.3 From 5b11f1cee5797b38d16b94d8745b12b6727a8373 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 8 May 2009 16:20:34 -0700 Subject: x86, boot: straighten out ranges to copy/zero in compressed/head*.S Both on 32 and 64 bits, we copy all the way up to the end of bss, except that on 64 bits there is a hack to avoid copying on top of the page tables. There is no point in copying bss at all, especially since we are just about to zero it all anyway. To clean up and unify the handling, we now do: - copy from startup_32 to _bss. - zero from _bss to _ebss. - the _ebss symbol is aligned to an 8-byte boundary. - the page tables are moved to a separate section. Use _bss as the copy endpoint since _edata may be misaligned. [ Impact: cleanup, trivial performance improvement ] Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/head_32.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/boot/compressed/head_32.S') diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 7bd7766ffab..59425e157df 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -93,9 +93,9 @@ ENTRY(startup_32) * where decompression in place becomes safe. */ pushl %esi - leal _ebss(%ebp), %esi - leal _ebss(%ebx), %edi - movl $(_ebss - startup_32), %ecx + leal _bss(%ebp), %esi + leal _bss(%ebx), %edi + movl $(_bss - startup_32), %ecx std rep movsb cld @@ -125,7 +125,7 @@ relocated: * Clear BSS */ xorl %eax, %eax - leal _edata(%ebx), %edi + leal _bss(%ebx), %edi leal _ebss(%ebx), %ecx subl %edi, %ecx cld -- cgit v1.2.3 From 0a137736704ef9af719409933b3c33e138461786 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 8 May 2009 16:27:41 -0700 Subject: x86, boot: set up the decompression stack as early as possible Set up the decompression stack as soon as we know where it needs to go. That way we have a full-service stack as soon as possible, rather than relying on the BP_scratch field. Note that the stack does need to be empty during bss zeroing (or else the stack needs to be moved out of the bss segment, which is also an option.) [ Impact: cleanup, minor paranoia ] Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/head_32.S | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'arch/x86/boot/compressed/head_32.S') diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 59425e157df..d7245cf8026 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -88,6 +88,9 @@ ENTRY(startup_32) addl $4095, %ebx andl $~4095, %ebx + /* Set up the stack */ + leal boot_stack_end(%ebx), %esp + /* * Copy the compressed kernel to the end of our buffer * where decompression in place becomes safe. @@ -122,7 +125,7 @@ ENDPROC(startup_32) relocated: /* - * Clear BSS + * Clear BSS (stack is currently empty) */ xorl %eax, %eax leal _bss(%ebx), %edi @@ -131,11 +134,6 @@ relocated: cld rep stosb -/* - * Setup the stack for the decompressor - */ - leal boot_stack_end(%ebx), %esp - /* * Do the decompression, and jump to the new kernel.. */ -- cgit v1.2.3 From 97541912785369925723b6255438ad9fce2ddf04 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 6 May 2009 17:56:51 -0700 Subject: x86, boot: zero EFLAGS on 32 bits The 64-bit code already clears EFLAGS as soon as it has a stack. This seems like a reasonable precaution, so do it on 32 bits as well. [ Impact: extra paranoia ] Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/head_32.S | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/boot/compressed/head_32.S') diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index d7245cf8026..d02a4f02be1 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -91,6 +91,10 @@ ENTRY(startup_32) /* Set up the stack */ leal boot_stack_end(%ebx), %esp + /* Zero EFLAGS */ + pushl $0 + popfl + /* * Copy the compressed kernel to the end of our buffer * where decompression in place becomes safe. -- cgit v1.2.3 From 36d3793c947f1ef7ba3d24eeeddc1be41adc5ab4 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 8 May 2009 16:45:15 -0700 Subject: x86, boot: use appropriate rep string for move and clear In the pre-decompression code, use the appropriate largest possible rep movs and rep stos to move code and clear bss, respectively. For reverse copy, do note that the initial values are supposed to be the address of the first (highest) copy datum, not one byte beyond the end of the buffer. rep strings are not necessarily the fastest way to perform these operations on all current processors, but are likely to be in the future, and perhaps more importantly, we want to encourage the architecturally right thing to do here. This also fixes a couple of trivial inefficiencies on 64 bits. [ Impact: trivial performance enhancement, increase code similarity ] Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/head_32.S | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/x86/boot/compressed/head_32.S') diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index d02a4f02be1..6710dc78ac5 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -100,11 +100,12 @@ ENTRY(startup_32) * where decompression in place becomes safe. */ pushl %esi - leal _bss(%ebp), %esi - leal _bss(%ebx), %edi + leal (_bss-4)(%ebp), %esi + leal (_bss-4)(%ebx), %edi movl $(_bss - startup_32), %ecx + shrl $2, %ecx std - rep movsb + rep movsl cld popl %esi @@ -135,8 +136,8 @@ relocated: leal _bss(%ebx), %edi leal _ebss(%ebx), %ecx subl %edi, %ecx - cld - rep stosb + shrl $2, %ecx + rep stosl /* * Do the decompression, and jump to the new kernel.. -- cgit v1.2.3 From 02a884c0fe7ec8459d00d34b7d4101af21fc4a86 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 8 May 2009 17:42:16 -0700 Subject: x86, boot: determine compressed code offset at compile time Determine the compressed code offset (from the kernel runtime address) at compile time. This allows some minor optimizations in arch/x86/boot/compressed/head_*.S, but more importantly it makes this value available to the build process, which will enable a future patch to export the necessary linear memory footprint into the bzImage header. [ Impact: cleanup, future patch enabling ] Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/head_32.S | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) (limited to 'arch/x86/boot/compressed/head_32.S') diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 6710dc78ac5..470474bafc4 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -75,18 +75,8 @@ ENTRY(startup_32) movl $LOAD_PHYSICAL_ADDR, %ebx #endif - /* Replace the compressed data size with the uncompressed size */ - subl input_len(%ebp), %ebx - movl output_len(%ebp), %eax - addl %eax, %ebx - /* Add 8 bytes for every 32K input block */ - shrl $12, %eax - addl %eax, %ebx - /* Add 32K + 18 bytes of extra slack */ - addl $(32768 + 18), %ebx - /* Align on a 4K boundary */ - addl $4095, %ebx - andl $~4095, %ebx + /* Target address to relocate to for decompression */ + addl $z_extract_offset, %ebx /* Set up the stack */ leal boot_stack_end(%ebx), %esp @@ -142,12 +132,10 @@ relocated: /* * Do the decompression, and jump to the new kernel.. */ - movl output_len(%ebx), %eax - pushl %eax + leal z_extract_offset_negative(%ebx), %ebp /* push arguments for decompress_kernel: */ pushl %ebp /* output address */ - movl input_len(%ebx), %eax - pushl %eax /* input_len */ + pushl $z_input_len /* input_len */ leal input_data(%ebx), %eax pushl %eax /* input_data */ leal boot_heap(%ebx), %eax @@ -155,14 +143,12 @@ relocated: pushl %esi /* real mode pointer */ call decompress_kernel addl $20, %esp - popl %ecx #if CONFIG_RELOCATABLE /* * Find the address of the relocations. */ - movl %ebp, %edi - addl %ecx, %edi + leal z_output_len(%ebp), %edi /* * Calculate the delta between where vmlinux was compiled to run -- cgit v1.2.3 From 99aa45595f45603526513d5e29fc00f8afbf3913 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 11 May 2009 16:02:10 -0700 Subject: x86, boot: remove dead code from boot/compressed/head_*.S Remove a couple of lines of dead code from arch/x86/boot/compressed/head_*.S; all of these update registers that are dead in the current code. [ Impact: cleanup ] Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/head_32.S | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'arch/x86/boot/compressed/head_32.S') diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 470474bafc4..2b8e0dfa4b2 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -99,16 +99,6 @@ ENTRY(startup_32) cld popl %esi -/* - * Compute the kernel start address. - */ -#ifdef CONFIG_RELOCATABLE - addl $(CONFIG_PHYSICAL_ALIGN - 1), %ebp - andl $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebp -#else - movl $LOAD_PHYSICAL_ADDR, %ebp -#endif - /* * Jump to the relocated address. */ -- cgit v1.2.3 From 37ba7ab5e33cebc25c68fffe33e9f21e7c2014e8 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 11 May 2009 15:56:08 -0700 Subject: x86, boot: make kernel_alignment adjustable; new bzImage fields Make the kernel_alignment field adjustable; this allows us to set it to a large value (intended to be 16 MB to avoid ZONE_DMA contention, memory holes and other weirdness) while a smart bootloader can still force a loading at a lesser alignment if absolutely necessary. Also export pref_address (preferred loading address, corresponding to the link-time address) and init_size, the total amount of linear memory the kernel will require during initialization. [ Impact: allows better kernel placement, gives bootloader more info ] Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/head_32.S | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/x86/boot/compressed/head_32.S') diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 2b8e0dfa4b2..75e4f001e70 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -69,8 +69,11 @@ ENTRY(startup_32) #ifdef CONFIG_RELOCATABLE movl %ebp, %ebx - addl $(CONFIG_PHYSICAL_ALIGN - 1), %ebx - andl $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebx + movl BP_kernel_alignment(%esi), %eax + decl %eax + addl %eax, %ebx + notl %eax + andl %eax, %ebx #else movl $LOAD_PHYSICAL_ADDR, %ebx #endif -- cgit v1.2.3