[PATCH 1/2] x86: add max_addr boot option

May 22nd, 2012 - 03:00 am ET by Wen Congyang | Report spam
Currently, the boot option max_addr is only supported on ia64 platform.
We also need it on x86 platform.
For example:
There are two nodes:
NODE#0 address range 0x00000000 00000000 - 0x00010000 00000000
NODE#1 address range 0x00010000 00000000 - 0x00020000 00000000
If we only want to use node0, we can specify the max_addr. The boot
option "mem=" can do the same thing now. But the boot option "mem="
means the total memory used by the system. If we tell the user
that the boot option "mem=" can do this, it will confuse the user.
So we need an new boot option "max_addr" on x86 platform.

Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>

Documentation/kernel-parameters.txt | 2 +-
arch/x86/kernel/e820.c | 36 +++++++++++++++++++++++++++++++++++
2 files changed, 37 insertions(+), 1 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index c1601e5..fe80e58 100644
a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1409,7 +1409,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
yeeloong laptop.
Example: machtype=lemote-yeeloong-2f-7inch

- max_addr=nn[KMG] [KNL,BOOT,ia64] All physical memory greater
+ max_addr=nn[KMG] [KNL,BOOT,ia64,X86] All physical memory greater
than or equal to this physical address is ignored.

maxcpus= [SMP] Maximum number of processors that an SMP kernel
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 62d61e9..2a6bec7 100644
a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -47,6 +47,7 @@ unsigned long pci_mem_start = 0xaeedbabe;
#ifdef CONFIG_PCI
EXPORT_SYMBOL(pci_mem_start);
#endif
+static u64 max_addr = ~0ULL;

/*
* This function checks if any part of the range <start,end> is mapped
@@ -117,6 +118,20 @@ static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
return;
}

+ if (start >= max_addr) {
+ printk(KERN_INFO "Ingoring memory: %016Lx - %016Lx",
+ (unsigned long long)start,
+ (unsigned long long)(start + size));
+ return;
+ }
+
+ if (max_addr - start < size) {
+ printk(KERN_INFO "Ingoring memory: %016Lx - %016Lx",
+ (unsigned long long)max_addr,
+ (unsigned long long)(start + size));
+ size = max_addr - start;
+ }
+
e820x->map[x].addr = start;
e820x->map[x].size = size;
e820x->map[x].type = type;
@@ -835,6 +850,22 @@ static int __init parse_memopt(char *p)
}
early_param("mem", parse_memopt);

+static int __init parse_memmax_opt(char *p)
+{
+ char *oldp;
+
+ if (!p)
+ return -EINVAL;
+
+ oldp = p;
+ max_addr = memparse(p, &p);
+ if (p == oldp)
+ return -EINVAL;
+
+ return 0;
+}
+early_param("max_addr", parse_memmax_opt);
+
static int __init parse_memmap_opt(char *p)
{
char *oldp;
@@ -881,6 +912,11 @@ early_param("memmap", parse_memmap_opt);

void __init finish_e820_parsing(void)
{
+ if (max_addr != ~0ULL) {
+ userdef = 1;
+ e820_remove_range(max_addr, ULLONG_MAX - max_addr, E820_RAM, 1);
+ }
+
if (userdef) {
u32 nr = e820.nr_map;

1.7.1
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
email Follow the discussionReplies 6 repliesReplies Make a reply

Similar topics

Replies

#1 Wen Congyang
May 22nd, 2012 - 03:10 am ET | Report spam
The boot option "mem=" specifies the total memory that the system can
use. But we implement it as max_addr.

The x86 system can be booted by EFI. If the user specify the boot
option "add_efi_memmap", we add all memory map from EFI, but we
donot handle the memory map according to the boot option "mem=".

This patch reimplement the boot option "mem=", and handle the memory
map after calling efi_init().

Signed-off-by: Wen Congyang

arch/x86/include/asm/e820.h | 1 +
arch/x86/kernel/e820.c | 36 +++++++++++++++++++++++++++++++--
arch/x86/kernel/setup.c | 1 +
3 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 3778256..d1bb772 100644
a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -127,6 +127,7 @@ extern void e820_reserve_resources(void);
extern void e820_reserve_resources_late(void);
extern void setup_memory_map(void);
extern char *default_machine_specific_memory_setup(void);
+extern void set_memlimit(void);

/*
* Returns true iff the specified range [s,e) is completely contained inside
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 2a6bec7..0148944 100644
a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -48,6 +48,7 @@ unsigned long pci_mem_start = 0xaeedbabe;
EXPORT_SYMBOL(pci_mem_start);
#endif
static u64 max_addr = ~0ULL;
+static u64 mem_limit = ~0ULL;

/*
* This function checks if any part of the range <start,end> is mapped
@@ -824,8 +825,6 @@ static int userdef __initdata;
/* "mem=nopentium" disables the 4MB page tables. */
static int __init parse_memopt(char *p)
{
- u64 mem_size;
-
if (!p)
return -EINVAL;

@@ -840,16 +839,43 @@ static int __init parse_memopt(char *p)
}

userdef = 1;
- mem_size = memparse(p, &p);
+ mem_limit = memparse(p, &p);
/* don't remove all of memory when handling "mem={invalid}" param */
- if (mem_size == 0)
+ if (mem_limit == 0)
return -EINVAL;
- e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);

return 0;
}
early_param("mem", parse_memopt);

+void __init set_memlimit(void)
+{
+ u64 total_size = 0;
+ int i;
+
+ if (mem_limit == ~0ULL)
+ return;
+
+ for (i = 0; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+
+ if (ei->type != E820_RAM)
+ continue;
+
+ if (total_size >= mem_limit) {
+ memset(ei, 0, sizeof(struct e820entry));
+ continue;
+ }
+
+ if (mem_limit - total_size <= ei->size)
+ ei->size = mem_limit - total_size;
+
+ total_size += ei->size;
+ }
+
+ sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+}
+
static int __init parse_memmax_opt(char *p)
{
char *oldp;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 1a29015..7938fae 100644
a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -823,6 +823,7 @@ void __init setup_arch(char **cmdline_p)

if (efi_enabled)
efi_init();
+ set_memlimit();

dmi_scan_machine();

1.7.1

To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Replies Reply to this message
#2 Rob Landley
May 22nd, 2012 - 04:00 pm ET | Report spam
On 05/22/2012 02:02 AM, Wen Congyang wrote:
If we only want to use node0, we can specify the max_addr. The boot
option "mem=" can do the same thing now. But the boot option "mem="
means the total memory used by the system. If we tell the user
that the boot option "mem=" can do this, it will confuse the user.
So we need an new boot option "max_addr" on x86 platform.



I didn't follow that reasoning at all. Care to try again?

(mem= can do this, but telling users that would confuse them? What?)

Rob
GNU/Linux isn't: Linux=GPLv2, GNU=GPLv3+, they can't share code.
Either it's "mere aggregation", or a license violation. Pick one.
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Replies Reply to this message
#3 Wen Congyang
May 22nd, 2012 - 09:20 pm ET | Report spam
At 05/23/2012 03:51 AM, Rob Landley Wrote:
On 05/22/2012 02:02 AM, Wen Congyang wrote:
If we only want to use node0, we can specify the max_addr. The boot
option "mem=" can do the same thing now. But the boot option "mem="
means the total memory used by the system. If we tell the user
that the boot option "mem=" can do this, it will confuse the user.
So we need an new boot option "max_addr" on x86 platform.



I didn't follow that reasoning at all. Care to try again?

(mem= can do this, but telling users that would confuse them? What?)



mem= means the total memory, but we implement it as max address.
I donot know why we implement it as max address. The users donot
know how we implement, and they only know that they can use
mem= to set the total memory. If you tell the users that memcan set max address, it will confuse them.

Thanks
Wen Congyang


Rob



To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Replies Reply to this message
#4 KAMEZAWA Hiroyuki
May 23rd, 2012 - 12:10 am ET | Report spam
(2012/05/23 4:51), Rob Landley wrote:

On 05/22/2012 02:02 AM, Wen Congyang wrote:
If we only want to use node0, we can specify the max_addr. The boot
option "mem=" can do the same thing now. But the boot option "mem="
means the total memory used by the system. If we tell the user
that the boot option "mem=" can do this, it will confuse the user.
So we need an new boot option "max_addr" on x86 platform.



I didn't follow that reasoning at all. Care to try again?

(mem= can do this, but telling users that would confuse them? What?)





Kernel doc says

mem=nn[KMG] [KNL,BOOT] Force usage of a specific amount of memory
Amount of memory to be used when the kernel is not able
to see the whole system memory or for test.
[X86-32] Use together with memmap= to avoid physical
address space collisions. Without memmap= PCI devices
could be placed at addresses belonging to unused RAM.

max_addr=nn[KMG] [KNL,BOOT,ia64] All physical memory greater
than or equal to this physical address is ignored.

And, now, on x86+e820, mem= option works as max_addr= option.

This caused some troubles in our customer sometimes. In many server, address
range 3G-4g are reserved for PCI.

This is my host's dmesg.
=BIOS-provided physical RAM map:
BIOS-e820: 0000000000000000 - 000000000009e800 (usable)
BIOS-e820: 000000000009e800 - 00000000000a0000 (reserved)
BIOS-e820: 00000000000ce000 - 00000000000d0000 (reserved)
BIOS-e820: 00000000000e0000 - 0000000000100000 (reserved)
BIOS-e820: 0000000000100000 - 00000000bfee0000 (usable)
BIOS-e820: 00000000bfee0000 - 00000000bfee7000 (ACPI data)
BIOS-e820: 00000000bfee7000 - 00000000bff00000 (ACPI NVS)
BIOS-e820: 00000000bff00000 - 00000000bff80000 (usable)
BIOS-e820: 00000000bff80000 - 00000000c0000000 (reserved)
BIOS-e820: 00000000e0000000 - 00000000f0000000 (reserved)
BIOS-e820: 00000000fe000000 - 00000000fed00000 (reserved)
BIOS-e820: 00000000fee00000 - 00000000fef00000 (reserved)
BIOS-e820: 00000000ffb00000 - 0000000100000000 (reserved)
BIOS-e820: 0000000100000000 - 0000000640000000 (usable)
=
So, if customer sets memG, the system will boot with 9G memory.
I think this is a bug and mem= should see 'amount of memory'.

For users who want to hide memory in higher address, I think
max_addr= option is suitable.

Thanks,
-Kame











To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Replies Reply to this message
#5 Bjorn Helgaas
May 23rd, 2012 - 10:30 am ET | Report spam
On Tue, May 22, 2012 at 1:02 AM, Wen Congyang wrote:

+       if (start >= max_addr) {
+               printk(KERN_INFO "Ingoring memory: %016Lx - %016Lx",
+                      (unsigned long long)start,
+                      (unsigned long long)(start + size));
+               return;
+       }
+
+       if (max_addr - start < size) {
+               printk(KERN_INFO "Ingoring memory: %016Lx - %016Lx",
+                      (unsigned long long)max_addr,
+                      (unsigned long long)(start + size));
+               size = max_addr - start;



s/Ingoring/Ignoring/

Please use a format like "e820: ignoring [mem %#010Lx-%#010Lx]" so it
matches other places where we print physical address ranges. See
https://lkml.org/lkml/2012/2/13/436

Bjorn
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Replies Reply to this message
Help Create a new topicNext page Replies Make a reply
Search Make your own search