[PATCH 0/7] zram/xvmalloc: 64K page fixes and optimizations

January 26th, 2011 - 12:30 pm ET by Robert Jennings | Report spam
The xvmalloc allocator is non-functional when running with a 64K page
size. The first two patches fix 64K page related issues.

The remaining patches provide some small optimizations for zram and
xvmalloc.

Regards,
Robert Jennings
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
email Follow the discussionReplies 15 repliesReplies Make a reply

Similar topics

Replies

#1 Robert Jennings
January 26th, 2011 - 12:30 pm ET | Report spam
xvmalloc will not currently function with 64K pages. Newly allocated
pages will be inserted at an offset beyond the end of the first-level
index. This tuning is needed to properly size the allocator for 64K
pages.

The default 3 byte shift results in a second level list size which can not
be indexed using the 64 bits of the flbitmap in the xv_pool structure.
The shift must increase to 4 bytes between second level list entries to
fit the size of the first level bitmap.

Here are a few statistics for structure sizes on 32- and 64-bit CPUs
with 4KB and 64KB page sizes.

bits_per_long 32 64 64
page_size 4,096 4,096 65,535
xv_align 4 8 8
fl_delta 3 3 4
num_free_lists 508 508 4,094
xv_pool size 4,144b 8,216b 66,040b
per object overhead 32 64 64
zram struct 0.5GB disk 512KB 1024KB 64KB

This patch maintains the current tunings for 4K pages, adds an optimal
sizing for 64K pages and adds a safe tuning for any other page sizes.

Signed-off-by: Robert Jennings

drivers/staging/zram/xvmalloc_int.h | 18 ++++++++++++++++--
1 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/zram/xvmalloc_int.h b/drivers/staging/zram/xvmalloc_int.h
index e23ed5c..051a49b 100644
a/drivers/staging/zram/xvmalloc_int.h
+++ b/drivers/staging/zram/xvmalloc_int.h
@@ -19,7 +19,11 @@
/* User configurable params */

/* Must be power of two */
+#ifdef CONFIG_64BIT
+#define XV_ALIGN_SHIFT 3
+#else
#define XV_ALIGN_SHIFT 2
+#endif
#define XV_ALIGN (1 << XV_ALIGN_SHIFT)
#define XV_ALIGN_MASK (XV_ALIGN - 1)

@@ -27,8 +31,18 @@
#define XV_MIN_ALLOC_SIZE 32
#define XV_MAX_ALLOC_SIZE (PAGE_SIZE - XV_ALIGN)

-/* Free lists are separated by FL_DELTA bytes */
-#define FL_DELTA_SHIFT 3
+/*
+ * Free lists are separated by FL_DELTA bytes
+ * This value is 3 for 4k pages and 4 for 64k pages, for any
+ * other page size, a conservative (PAGE_SHIFT - 9) is used.
+ */
+#if PAGE_SHIFT == 12
+#define FL_DELTA_SHIFT 3
+#elif PAGE_SHIFT == 16
+#define FL_DELTA_SHIFT 4
+#else
+#define FL_DELTA_SHIFT (PAGE_SHIFT - 9)
+#endif
#define FL_DELTA (1 << FL_DELTA_SHIFT)
#define FL_DELTA_MASK (FL_DELTA - 1)
#define NUM_FREE_LISTS ((XV_MAX_ALLOC_SIZE - XV_MIN_ALLOC_SIZE) \
1.6.0.2

To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Replies Reply to this message
#2 Robert Jennings
January 26th, 2011 - 12:30 pm ET | Report spam
This wraps the code, noted by comments as being debug code, with
#ifdef DEBUG so that it is removed from running in non-debug kernels.

Signed-off-by: Robert Jennings

drivers/staging/zram/xvmalloc.c | 4 +++-
1 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/drivers/staging/zram/xvmalloc.c b/drivers/staging/zram/xvmalloc.c
index b3622f1..172514e 100644
a/drivers/staging/zram/xvmalloc.c
+++ b/drivers/staging/zram/xvmalloc.c
@@ -219,7 +219,6 @@ static void insert_block(struct xv_pool *pool, struct page *page, u32 offset,
static void remove_block_head(struct xv_pool *pool,
struct block_header *block, u32 slindex)
{
- struct block_header *tmpblock;
u32 flindex = slindex / BITS_PER_LONG;

pool->freelist[slindex].page = block->link.next_page;
@@ -232,6 +231,8 @@ static void remove_block_head(struct xv_pool *pool,
if (!pool->slbitmap[flindex])
__clear_bit(flindex, &pool->flbitmap);
} else {
+#ifdef DEBUG
+ struct block_header *tmpblock;
/*
* DEBUG ONLY: We need not reinitialize freelist head previous
* pointer to 0 - we never depend on its value. But just for
@@ -242,6 +243,7 @@ static void remove_block_head(struct xv_pool *pool,
tmpblock->link.prev_page = 0;
tmpblock->link.prev_offset = 0;
put_ptr_atomic(tmpblock, KM_USER1);
+#endif
}
}

1.6.0.2

To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Replies Reply to this message
#3 Robert Jennings
January 26th, 2011 - 12:40 pm ET | Report spam
By swapping the total_pages statistic with the lock we close a
hole in the structure for 64-bit CPUs.

Signed-off-by: Robert Jennings

drivers/staging/zram/xvmalloc_int.h | 7 ++--
1 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/staging/zram/xvmalloc_int.h b/drivers/staging/zram/xvmalloc_int.h
index 68db384..e51eea4 100644
a/drivers/staging/zram/xvmalloc_int.h
+++ b/drivers/staging/zram/xvmalloc_int.h
@@ -93,12 +93,9 @@ struct block_header {
struct xv_pool {
ulong flbitmap;
ulong slbitmap[MAX_FLI];
- spinlock_t lock;
-
+ u64 total_pages; /* stats */
struct freelist_entry freelist[NUM_FREE_LISTS];
-
- /* stats */
- u64 total_pages;
+ spinlock_t lock;
};

#endif
1.6.0.2

To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Replies Reply to this message
#4 Pekka Enberg
January 26th, 2011 - 12:50 pm ET | Report spam
On Wed, Jan 26, 2011 at 7:27 PM, Robert Jennings wrote:
Adding QUEUE_FLAG_NONROT for the in the request_queue flags.  When used
as a swap device, scan_swap_map will avoid the extra work of optimizing
for rotating media.

Signed-off-by: Robert Jennings



Reviewed-by: Pekka Enberg
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Replies Reply to this message
#5 Pekka Enberg
January 26th, 2011 - 12:50 pm ET | Report spam
On Wed, Jan 26, 2011 at 7:22 PM, Robert Jennings wrote:
xvmalloc will not currently function with 64K pages.  Newly allocated
pages will be inserted at an offset beyond the end of the first-level
index.  This tuning is needed to properly size the allocator for 64K
pages.

The default 3 byte shift results in a second level list size which can not
be indexed using the 64 bits of the flbitmap in the xv_pool structure.
The shift must increase to 4 bytes between second level list entries to
fit the size of the first level bitmap.

Here are a few statistics for structure sizes on 32- and 64-bit CPUs
with 4KB and 64KB page sizes.

bits_per_long              32        64        64
page_size               4,096     4,096    65,535
xv_align                    4         8         8
fl_delta                    3         3         4
num_free_lists            508       508     4,094
xv_pool size            4,144b    8,216b   66,040b
per object overhead        32        64        64
zram struct 0.5GB disk    512KB    1024KB      64KB

This patch maintains the current tunings for 4K pages, adds an optimal
sizing for 64K pages and adds a safe tuning for any other page sizes.

Signed-off-by: Robert Jennings

 drivers/staging/zram/xvmalloc_int.h |   18 ++++++++++++++++--
 1 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/zram/xvmalloc_int.h b/drivers/staging/zram/xvmalloc_int.h
index e23ed5c..051a49b 100644
a/drivers/staging/zram/xvmalloc_int.h
+++ b/drivers/staging/zram/xvmalloc_int.h
@@ -19,7 +19,11 @@
 /* User configurable params */

 /* Must be power of two */
+#ifdef CONFIG_64BIT
+#define XV_ALIGN_SHIFT 3
+#else
 #define XV_ALIGN_SHIFT 2
+#endif
 #define XV_ALIGN       (1 << XV_ALIGN_SHIFT)
 #define XV_ALIGN_MASK  (XV_ALIGN - 1)

@@ -27,8 +31,18 @@
 #define XV_MIN_ALLOC_SIZE      32
 #define XV_MAX_ALLOC_SIZE      (PAGE_SIZE - XV_ALIGN)

-/* Free lists are separated by FL_DELTA bytes */
-#define FL_DELTA_SHIFT 3
+/*
+ * Free lists are separated by FL_DELTA bytes
+ * This value is 3 for 4k pages and 4 for 64k pages, for any
+ * other page size, a conservative (PAGE_SHIFT - 9) is used.
+ */
+#if PAGE_SHIFT == 12
+#define FL_DELTA_SHIFT 3



This is handled by the else branch already, no?

+#elif PAGE_SHIFT == 16
+#define FL_DELTA_SHIFT 4
+#else
+#define FL_DELTA_SHIFT (PAGE_SHIFT - 9)
+#endif


To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Replies Reply to this message
Help Create a new topicNext page Replies Make a reply
Search Make your own search