[PATCHSET] blkcg: update locking and fix stacking

February 16th, 2012 - 05:40 pm ET by Tejun Heo | Report spam
Hey, guys.

This is the third patchset of blkcg API cleanup series and does the
following two things.

* Drops RCU and use double locking. As with ioc, we might want to
re-introduce RCU in more restricted form to walk blkg from blkcg but
for now there's no need. cgroup deletion is much colder than task
exit and there's no other path which requires reverse double
locking.

* Before this patchset, blk-throttle and cfq-iosched propio couldn't
be used together because for any bios delayed by blk-throttle would
be issued by a worker thread and get dumped to the default cgroup.
So, bios will be nondeterministically thrown into the root cg. This
isn't a problem limited to blk-throttle. Any mechanism which punts
bios to a different task (e.g. btrfs) would mess up IO scheduling
for both cgroups and iocontexts.

This patchset implements a mechanism to associate a bio to a task
(its cgroup and ioc actually) and block layer will handle the
request as if it were issued by the associated task no matter which
task actually ends up issuing it to block layer. It's applied to
blk-throttle such that any delayed bio is associated to the original
task.

This patchset contains the following 9 patches.

0001-blkcg-use-double-locking-instead-of-RCU-for-blkg-syn.patch
0002-blkcg-drop-unnecessary-RCU-locking.patch
0003-block-restructure-get_request.patch
0004-block-interface-update-for-ioc-icq-creation-function.patch
0005-block-ioc_task_link-can-t-fail.patch
0006-block-add-io_context-active_ref.patch
0007-block-implement-bio_associate_current.patch
0008-block-make-block-cgroup-policies-follow-bio-task-ass.patch
0009-block-make-blk-throttle-preserve-the-issuing-task-on.patch

0001-0002 updates locking and strips out RCU.

0003-0006 prepare for bio task association.

0007-0009 implement bio task association and apply it to
blk-throttle.

This patchset is on top of

block/for-linus 621032ad6eaabf2fe771c4fa0d8f58e1fcfcdba6
+ [1] blkcg: kill policy node and blkg->dev, take#4
+ [2] blkcg: unify blkgs for different policies (updated)

and is also available in the following git branch.

git://git.kernel.org/pub/scm/linux/...j/misc.git blkcg-locking

The git branch has separate branches for previous patchsets, so using
it probably is the easiest way to review this series. Jens, if you
want the whole thing reposted, please let me know.

diffstat follows.

block/blk-cgroup.c | 167 +++++++++++++++++--
block/blk-cgroup.h | 12 +--
block/blk-core.c | 92 ++++++++++++++--
block/blk-ioc.c | 58 +++++++++
block/blk-throttle.c | 41 +-
block/blk.h | 24 +++
block/cfq-iosched.c | 54 ++++-
block/cfq.h | 10 --
block/elevator.c | 5 -
fs/bio.c | 61 ++++++++++++++++
include/linux/bio.h | 8 ++
include/linux/blk_types.h | 10 ++
include/linux/blkdev.h | 1
include/linux/elevator.h | 6 +
include/linux/iocontext.h | 32 ++++++--
kernel/fork.c | 5 -
16 files changed, 299 insertions(+), 287 deletions(-)

Thanks.

tejun

[1] http://thread.gmane.org/gmane.linux.kernel/1247152
[2] http://thread.gmane.org/gmane.linux.kernel/1247287
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
email Follow the discussionReplies 39 repliesReplies Make a reply

Replies

#1 Tejun Heo
February 16th, 2012 - 05:40 pm ET | Report spam
Currently ioc->nr_tasks is used to decide two things - whether an ioc
is done issuing IOs and whether it's shared by multiple tasks. This
patch separate out the first into ioc->active_ref, which is acquired
and released using {get|put}_io_context_active() respectively.

This will be used to associate bio's with a given task. This patch
doesn't introduce any visible behavior change.

Signed-off-by: Tejun Heo
Cc: Vivek Goyal

block/blk-ioc.c | 36 +++++++++++++++++++++++++--
block/cfq-iosched.c | 4 ++--
include/linux/iocontext.h | 22 ++++++++++++++++++++--
3 files changed, 47 insertions(+), 15 deletions(-)

diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 1092874..439ec21 100644
a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -149,20 +149,20 @@ void put_io_context(struct io_context *ioc)
}
EXPORT_SYMBOL(put_io_context);

-/* Called by the exiting task */
-void exit_io_context(struct task_struct *task)
+/**
+ * put_io_context_active - put active reference on ioc
+ * @ioc: ioc of interest
+ *
+ * Undo get_io_context_active(). If active reference reaches zero after
+ * put, @ioc can never issue further IOs and ioscheds are notified.
+ */
+void put_io_context_active(struct io_context *ioc)
{
- struct io_context *ioc;
- struct io_cq *icq;
struct hlist_node *n;
unsigned long flags;
+ struct io_cq *icq;

- task_lock(task);
- ioc = task->io_context;
- task->io_context = NULL;
- task_unlock(task);
-
- if (!atomic_dec_and_test(&ioc->nr_tasks)) {
+ if (!atomic_dec_and_test(&ioc->active_ref)) {
put_io_context(ioc);
return;
}
@@ -191,6 +191,20 @@ retry:
put_io_context(ioc);
}

+/* Called by the exiting task */
+void exit_io_context(struct task_struct *task)
+{
+ struct io_context *ioc;
+
+ task_lock(task);
+ ioc = task->io_context;
+ task->io_context = NULL;
+ task_unlock(task);
+
+ atomic_dec(&ioc->nr_tasks);
+ put_io_context_active(ioc);
+}
+
/**
* ioc_clear_queue - break any ioc association with the specified queue
* @q: request_queue being cleared
@@ -223,7 +237,7 @@ int create_task_io_context(struct task_struct *task, gfp_t gfp_flags, int node)

/* initialize */
atomic_long_set(&ioc->refcount, 1);
- atomic_set(&ioc->nr_tasks, 1);
+ atomic_set(&ioc->active_ref, 1);
spin_lock_init(&ioc->lock);
INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC | __GFP_HIGH);
INIT_HLIST_HEAD(&ioc->icq_list);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index cc8e9ef..00e28a3 100644
a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1865,7 +1865,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
* task has exited, don't wait
*/
cic = cfqd->active_cic;
- if (!cic || !atomic_read(&cic->icq.ioc->nr_tasks))
+ if (!cic || !atomic_read(&cic->icq.ioc->active_ref))
return;

/*
@@ -2841,7 +2841,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,

if (cfqq->next_rq && (cfqq->next_rq->cmd_flags & REQ_NOIDLE))
enable_idle = 0;
- else if (!atomic_read(&cic->icq.ioc->nr_tasks) ||
+ else if (!atomic_read(&cic->icq.ioc->active_ref) ||
!cfqd->cfq_slice_idle ||
(!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq)))
enable_idle = 0;
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 81a8870..6f1a260 100644
a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -100,6 +100,7 @@ struct io_cq {
*/
struct io_context {
atomic_long_t refcount;
+ atomic_t active_ref;
atomic_t nr_tasks;

/* all the fields below are protected by this lock */
@@ -120,17 +121,34 @@ struct io_context {
struct work_struct release_work;
};

-static inline void ioc_task_link(struct io_context *ioc)
+/**
+ * get_io_context_active - get active reference on ioc
+ * @ioc: ioc of interest
+ *
+ * Only iocs with active reference can issue new IOs. This function
+ * acquires an active reference on @ioc. The caller must already have an
+ * active reference on @ioc.
+ */
+static inline void get_io_context_active(struct io_context *ioc)
{
WARN_ON_ONCE(atomic_long_read(&ioc->refcount) <= 0);
- WARN_ON_ONCE(atomic_read(&ioc->nr_tasks) <= 0);
+ WARN_ON_ONCE(atomic_read(&ioc->active_ref) <= 0);
atomic_long_inc(&ioc->refcount);
+ atomic_inc(&ioc->active_ref);
+}
+
+static inline void ioc_task_link(struct io_context *ioc)
+{
+ get_io_context_active(ioc);
+
+ WARN_ON_ONCE(atomic_read(&ioc->nr_tasks) <= 0);
atomic_inc(&ioc->nr_tasks);
}

struct task_struct;
#ifdef CONFIG_BLOCK
void put_io_context(struct io_context *ioc);
+void put_io_context_active(struct io_context *ioc);
void exit_io_context(struct task_struct *task);
struct io_context *get_task_io_context(struct task_struct *task,
gfp_t gfp_flags, int node);
1.7.7.3

To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Similar topics