mirror of
https://abf.rosa.ru/djam/kernel-xen.git
synced 2025-02-22 23:32:46 +00:00
79 lines
3.1 KiB
Diff
79 lines
3.1 KiB
Diff
From: Robin Holt <holt@sgi.com>
|
|
Subject: x86 / PAT: Update the page flags for memtype atomically instead of using memtype_lock. -V4
|
|
References: bnc#588008, FATE#306952
|
|
Patch-mainline: not yet
|
|
|
|
While testing an application using the xpmem (out of kernel) driver, we
|
|
noticed a significant page fault rate reduction of x86_64 with respect
|
|
to ia64. For one test running with 32 cpus, one thread per cpu, it
|
|
took 01:08 for each of the threads to vm_insert_pfn 2GB worth of pages.
|
|
For the same test running on 256 cpus, one thread per cpu, it took 14:48
|
|
to vm_insert_pfn 2 GB worth of pages.
|
|
|
|
The slowdown was tracked to lookup_memtype which acquires the
|
|
spinlock memtype_lock. This heavily contended lock was slowing down
|
|
vm_insert_pfn().
|
|
|
|
With the cmpxchg on page->flags method, both the 32 cpu and 256 cpu
|
|
cases take approx 00:01.3 seconds to complete.
|
|
|
|
Changes since -V2:
|
|
1) Cleared up the naming of the masks used in setting and clearing
|
|
the flags.
|
|
|
|
Changes since -V1:
|
|
1) Introduce atomically setting and clearing the page flags and not
|
|
using the global memtype_lock to protect page->flags.
|
|
|
|
2) This allowed me the opportunity to convert the rwlock back into a
|
|
spinlock and not affect _MY_ tests performance as all the pages my test
|
|
was utilizing are tracked by struct pages.
|
|
|
|
3) Corrected the commit log. The timings were for 32 cpus and not 256.
|
|
|
|
Signed-off-by: Robin Holt <holt@sgi.com>
|
|
Signed-off-by: Rafael J. Wysocki <rjw@suse.de>
|
|
Automatically created from "patches.arch/x86-pat-Update-page-flags-for-memtype-without-using-memtype_lock-V4.patch" by xen-port-patches.py
|
|
|
|
--- sle11sp1-2010-03-22.orig/arch/x86/mm/pat-xen.c 2010-02-05 11:22:27.000000000 +0100
|
|
+++ sle11sp1-2010-03-22/arch/x86/mm/pat-xen.c 2010-03-22 12:52:42.000000000 +0100
|
|
@@ -317,8 +317,6 @@ static int pat_pagerange_is_ram(resource
|
|
* Here we do two pass:
|
|
* - Find the memtype of all the pages in the range, look for any conflicts
|
|
* - In case of no conflicts, set the new memtype for pages in the range
|
|
- *
|
|
- * Caller must hold memtype_lock for atomicity.
|
|
*/
|
|
static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type,
|
|
unsigned long *new_type)
|
|
@@ -434,9 +432,7 @@ int reserve_memtype(u64 start, u64 end,
|
|
is_range_ram = pat_pagerange_is_ram(start, end);
|
|
if (is_range_ram == 1) {
|
|
|
|
- spin_lock(&memtype_lock);
|
|
err = reserve_ram_pages_type(start, end, req_type, new_type);
|
|
- spin_unlock(&memtype_lock);
|
|
|
|
return err;
|
|
} else if (is_range_ram < 0) {
|
|
@@ -531,9 +527,7 @@ int free_memtype(u64 start, u64 end)
|
|
is_range_ram = pat_pagerange_is_ram(start, end);
|
|
if (is_range_ram == 1) {
|
|
|
|
- spin_lock(&memtype_lock);
|
|
err = free_ram_pages_type(start, end);
|
|
- spin_unlock(&memtype_lock);
|
|
|
|
return err;
|
|
} else if (is_range_ram < 0) {
|
|
@@ -614,10 +608,8 @@ static unsigned long lookup_memtype(u64
|
|
|
|
if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) {
|
|
struct page *page;
|
|
- spin_lock(&memtype_lock);
|
|
page = pfn_to_page(paddr >> PAGE_SHIFT);
|
|
rettype = get_page_memtype(page);
|
|
- spin_unlock(&memtype_lock);
|
|
/*
|
|
* -1 from get_page_memtype() implies RAM page is in its
|
|
* default state and not reserved, and hence of type WB
|