[PATCH 0/3] target: Per session tag pooling WIP using lib/tags

2 views
Skip to first unread message

Nicholas A. Bellinger

unread,
Jun 7, 2013, 10:42:15 PM6/7/13
to target-devel, linux-scsi, Christoph Hellwig, Roland Dreier, Kent Overstreet, Asias He, Michael S. Tsirkin, Or Gerlitz, Moussa Ba, Nicholas Bellinger
From: Nicholas Bellinger <n...@linux-iscsi.org>

Hi folks.

Here is an initial series for adding target descriptor pre-allocation
support based upon Kent's excellent per-cpu struct tag_pool library code.

It allows fabric drivers to avoid fast path memory allocation/free
overhead up to the number of registered struct tag_pool tags known at
se_session creation time, similar to how blk-mq currently works using
hw tag resource pre-allocation of struct request and friends.

Thus far vhost/scsi has been updated to use se_sess->sess_tag_pool,
and patches doing similar fabric driver pre-allocation conversions
are currently a WIP..

Also given lib/tags is not upstream just yet, Kent's patch is included
here for review context.

Thank you,

--nab

Kent Overstreet (1):
Percpu tag allocator

Nicholas Bellinger (2):
target: Add transport_init_session_tagpool using per-cpu command map
vhost/scsi: Convert to generic tag_alloc + tag_free command map

drivers/target/target_core_transport.c | 33 +++++++
drivers/vhost/scsi.c | 31 ++++---
include/linux/tags.h | 38 +++++++
include/target/target_core_base.h | 5 +
include/target/target_core_fabric.h | 1 +
lib/Kconfig | 4 +
lib/Makefile | 5 +-
lib/tags.c | 166 ++++++++++++++++++++++++++++++++
8 files changed, 269 insertions(+), 14 deletions(-)
create mode 100644 include/linux/tags.h
create mode 100644 lib/tags.c

--
1.7.2.5

Nicholas A. Bellinger

unread,
Jun 7, 2013, 10:42:16 PM6/7/13
to target-devel, linux-scsi, Christoph Hellwig, Roland Dreier, Kent Overstreet, Asias He, Michael S. Tsirkin, Or Gerlitz, Moussa Ba, Tejun Heo, Oleg Nesterov, Christoph Lameter, Ingo Molnar
From: Kent Overstreet <kover...@google.com>

Allocates integers out of a predefined range - for use by e.g. a driver
to allocate tags for communicating with the device.

Signed-off-by: Kent Overstreet <kover...@google.com>
Cc: Tejun Heo <t...@kernel.org>
Cc: Oleg Nesterov <ol...@redhat.com>
Cc: Christoph Lameter <c...@linux-foundation.org>
Cc: Ingo Molnar <mi...@redhat.com>
Cc: Michael S. Tsirkin <m...@redhat.com>
Cc: Asias He <as...@redhat.com>
Signed-off-by: Nicholas Bellinger <n...@linux-iscsi.org>
---
include/linux/tags.h | 38 +++++++++++
lib/Kconfig | 4 +
lib/Makefile | 5 +-
lib/tags.c | 166 ++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 211 insertions(+), 2 deletions(-)
create mode 100644 include/linux/tags.h
create mode 100644 lib/tags.c

diff --git a/include/linux/tags.h b/include/linux/tags.h
new file mode 100644
index 0000000..1b8cfca
--- /dev/null
+++ b/include/linux/tags.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2012 Google Inc. All Rights Reserved.
+ * Author: kover...@google.com (Kent Overstreet)
+ *
+ * Per cpu tag allocator.
+ */
+
+#ifndef _LINUX_TAGS_H
+#define _LINUX_TAGS_H
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+struct tag_cpu_freelist;
+
+struct tag_pool {
+ unsigned watermark;
+ unsigned nr_tags;
+
+ struct tag_cpu_freelist *tag_cpu;
+
+ struct {
+ /* Global freelist */
+ unsigned nr_free;
+ unsigned *free;
+ spinlock_t lock;
+ struct list_head wait;
+ } ____cacheline_aligned;
+};
+
+unsigned tag_alloc(struct tag_pool *pool, bool wait);
+void tag_free(struct tag_pool *pool, unsigned tag);
+
+void tag_pool_free(struct tag_pool *pool);
+int tag_pool_init(struct tag_pool *pool, unsigned long nr_tags);
+
+
+#endif
diff --git a/lib/Kconfig b/lib/Kconfig
index fe01d41..401bf01 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -407,4 +407,8 @@ config OID_REGISTRY
config UCS2_STRING
tristate

+config PERCPU_TAG
+ boolean
+ default y
+
endmenu
diff --git a/lib/Makefile b/lib/Makefile
index e9c52e1..2669d4c 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -13,7 +13,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \
proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \
is_single_threaded.o plist.o decompress.o kobject_uevent.o \
- earlycpio.o
+ earlycpio.o tags.o

obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o
lib-$(CONFIG_MMU) += ioremap.o
@@ -24,7 +24,8 @@ lib-y += kobject.o klist.o
obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
gcd.o lcm.o list_sort.o uuid.o flex_array.o \
- bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o
+ bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \
+ tags.o
obj-y += string_helpers.o
obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
obj-y += kstrtox.o
diff --git a/lib/tags.c b/lib/tags.c
new file mode 100644
index 0000000..47745c3
--- /dev/null
+++ b/lib/tags.c
@@ -0,0 +1,166 @@
+/*
+ * Copyright 2012 Google Inc. All Rights Reserved.
+ * Author: kover...@google.com (Kent Overstreet)
+ *
+ * Per cpu tag allocator.
+ */
+
+#include <linux/gfp.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/tags.h>
+
+struct tag_cpu_freelist {
+ unsigned nr_free;
+ unsigned free[];
+};
+
+struct tag_waiter {
+ struct list_head list;
+ struct task_struct *task;
+};
+
+static inline void move_tags(unsigned *dst, unsigned *dst_nr,
+ unsigned *src, unsigned *src_nr,
+ unsigned nr)
+{
+ *src_nr -= nr;
+ memcpy(dst + *dst_nr, src + *src_nr, sizeof(unsigned) * nr);
+ *dst_nr += nr;
+}
+
+unsigned tag_alloc(struct tag_pool *pool, bool wait)
+{
+ struct tag_cpu_freelist *tags;
+ unsigned long flags;
+ unsigned ret;
+retry:
+ preempt_disable();
+ local_irq_save(flags);
+ tags = this_cpu_ptr(pool->tag_cpu);
+
+ while (!tags->nr_free) {
+ spin_lock(&pool->lock);
+
+ if (pool->nr_free)
+ move_tags(tags->free, &tags->nr_free,
+ pool->free, &pool->nr_free,
+ min(pool->nr_free, pool->watermark));
+ else if (wait) {
+ struct tag_waiter wait = { .task = current };
+
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ list_add(&wait.list, &pool->wait);
+
+ spin_unlock(&pool->lock);
+ local_irq_restore(flags);
+ preempt_enable();
+
+ schedule();
+
+ if (!list_empty_careful(&wait.list)) {
+ spin_lock_irqsave(&pool->lock, flags);
+ list_del_init(&wait.list);
+ spin_unlock_irqrestore(&pool->lock, flags);
+ }
+
+ goto retry;
+ } else
+ goto fail;
+
+ spin_unlock(&pool->lock);
+ }
+
+ ret = tags->free[--tags->nr_free];
+
+ local_irq_restore(flags);
+ preempt_enable();
+
+ return ret;
+fail:
+ local_irq_restore(flags);
+ preempt_enable();
+ return 0;
+}
+EXPORT_SYMBOL_GPL(tag_alloc);
+
+void tag_free(struct tag_pool *pool, unsigned tag)
+{
+ struct tag_cpu_freelist *tags;
+ unsigned long flags;
+
+ preempt_disable();
+ local_irq_save(flags);
+ tags = this_cpu_ptr(pool->tag_cpu);
+
+ tags->free[tags->nr_free++] = tag;
+
+ if (tags->nr_free == pool->watermark * 2) {
+ spin_lock(&pool->lock);
+
+ move_tags(pool->free, &pool->nr_free,
+ tags->free, &tags->nr_free,
+ pool->watermark);
+
+ while (!list_empty(&pool->wait)) {
+ struct tag_waiter *wait;
+ wait = list_first_entry(&pool->wait,
+ struct tag_waiter, list);
+ list_del_init(&wait->list);
+ wake_up_process(wait->task);
+ }
+
+ spin_unlock(&pool->lock);
+ }
+
+ local_irq_restore(flags);
+ preempt_enable();
+}
+EXPORT_SYMBOL_GPL(tag_free);
+
+void tag_pool_free(struct tag_pool *pool)
+{
+ free_percpu(pool->tag_cpu);
+
+ free_pages((unsigned long) pool->free,
+ get_order(pool->nr_tags * sizeof(unsigned)));
+}
+EXPORT_SYMBOL_GPL(tag_pool_free);
+
+int tag_pool_init(struct tag_pool *pool, unsigned long nr_tags)
+{
+ unsigned i, order;
+
+ spin_lock_init(&pool->lock);
+ INIT_LIST_HEAD(&pool->wait);
+ pool->nr_tags = nr_tags;
+
+ /* Guard against overflow */
+ if (nr_tags > UINT_MAX)
+ return -ENOMEM;
+
+ order = get_order(nr_tags * sizeof(unsigned));
+ pool->free = (void *) __get_free_pages(GFP_KERNEL, order);
+ if (!pool->free)
+ return -ENOMEM;
+
+ for (i = 1; i < nr_tags; i++)
+ pool->free[pool->nr_free++] = i;
+
+ /* nr_possible_cpus would be more correct */
+ pool->watermark = nr_tags / (num_possible_cpus() * 4);
+
+ pool->watermark = min(pool->watermark, 128);
+
+ if (pool->watermark > 64)
+ pool->watermark = round_down(pool->watermark, 32);
+
+ pool->tag_cpu = __alloc_percpu(sizeof(struct tag_cpu_freelist) +
+ pool->watermark * 2 * sizeof(unsigned),
+ sizeof(unsigned));
+ if (!pool->tag_cpu)
+ return -ENOMEM;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(tag_pool_init);
--
1.7.2.5

Nicholas A. Bellinger

unread,
Jun 7, 2013, 10:42:17 PM6/7/13
to target-devel, linux-scsi, Christoph Hellwig, Roland Dreier, Kent Overstreet, Asias He, Michael S. Tsirkin, Or Gerlitz, Moussa Ba, Nicholas Bellinger
From: Nicholas Bellinger <n...@linux-iscsi.org>

This patch adds lib/tag.c based transport_init_session_tagpool() logic
that allows fabric drivers to setup a per-cpu se_sess->sess_tag_pool
and associated se_sess->sess_cmd_map for basic tagged pre-allocation
of fabric descriptor sized memory.

Cc: Kent Overstreet <kover...@google.com>
Cc: Roland Dreier <rol...@kernel.org>
Cc: Christoph Hellwig <h...@lst.de>
Cc: Asias He <as...@redhat.com>
Cc: Michael S. Tsirkin <m...@redhat.com>
Cc: Or Gerlitz <oger...@mellanox.com>
Cc: Moussa Ba <mous...@micron.com>
Signed-off-by: Nicholas Bellinger <n...@linux-iscsi.org>
---
drivers/target/target_core_transport.c | 33 ++++++++++++++++++++++++++++++++
include/target/target_core_base.h | 5 ++++
include/target/target_core_fabric.h | 1 +
3 files changed, 39 insertions(+), 0 deletions(-)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index abb7e40..4715f97 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -228,6 +228,35 @@ struct se_session *transport_init_session(void)
}
EXPORT_SYMBOL(transport_init_session);

+struct se_session *transport_init_session_tagpool(unsigned int tag_num,
+ unsigned int tag_size)
+{
+ struct se_session *se_sess;
+ int rc;
+
+ se_sess = transport_init_session();
+ if (IS_ERR(se_sess))
+ return se_sess;
+
+ se_sess->sess_cmd_map = kzalloc(tag_num * tag_size, GFP_KERNEL);
+ if (!se_sess->sess_cmd_map) {
+ pr_err("Unable to allocate se_sess->sess_cmd_map\n");
+ transport_free_session(se_sess);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ rc = tag_pool_init(&se_sess->sess_tag_pool, tag_num);
+ if (rc < 0) {
+ pr_err("Unable to init se_sess->sess_tag_pool,"
+ " tag_num: %u\n", tag_num);
+ transport_free_session(se_sess);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return se_sess;
+}
+EXPORT_SYMBOL(transport_init_session_tagpool);
+
/*
* Called with spin_lock_irqsave(&struct se_portal_group->session_lock called.
*/
@@ -363,6 +392,10 @@ EXPORT_SYMBOL(transport_deregister_session_configfs);

void transport_free_session(struct se_session *se_sess)
{
+ if (se_sess->sess_cmd_map) {
+ tag_pool_free(&se_sess->sess_tag_pool);
+ kfree(se_sess->sess_cmd_map);
+ }
kmem_cache_free(se_sess_cache, se_sess);
}
EXPORT_SYMBOL(transport_free_session);
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index a5c97db..8afd995 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -5,6 +5,7 @@
#include <linux/configfs.h>
#include <linux/dma-mapping.h>
#include <linux/blkdev.h>
+#include <linux/tags.h>
#include <scsi/scsi_cmnd.h>
#include <net/sock.h>
#include <net/tcp.h>
@@ -422,6 +423,8 @@ struct se_cmd {
enum dma_data_direction data_direction;
/* For SAM Task Attribute */
int sam_task_attr;
+ /* Used for se_sess->sess_tag_pool */
+ unsigned int map_tag;
/* Transport protocol dependent state, see transport_state_table */
enum transport_state_table t_state;
unsigned cmd_wait_set:1;
@@ -542,6 +545,8 @@ struct se_session {
struct list_head sess_cmd_list;
spinlock_t sess_cmd_lock;
struct kref sess_kref;
+ void *sess_cmd_map;
+ struct tag_pool sess_tag_pool;
};

struct se_device;
diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h
index ba3471b..318520b 100644
--- a/include/target/target_core_fabric.h
+++ b/include/target/target_core_fabric.h
@@ -84,6 +84,7 @@ struct target_core_fabric_ops {
};

struct se_session *transport_init_session(void);
+struct se_session *transport_init_session_tagpool(unsigned int, unsigned int);
void __transport_register_session(struct se_portal_group *,
struct se_node_acl *, struct se_session *, void *);
void transport_register_session(struct se_portal_group *,
--
1.7.2.5

Nicholas A. Bellinger

unread,
Jun 7, 2013, 10:42:18 PM6/7/13
to target-devel, linux-scsi, Christoph Hellwig, Roland Dreier, Kent Overstreet, Asias He, Michael S. Tsirkin, Or Gerlitz, Moussa Ba, Nicholas Bellinger
From: Nicholas Bellinger <n...@linux-iscsi.org>

This patch changes vhost/scsi to use transport_init_session_tagpool()
pre-allocation logic for per-cpu session tag pooling with internal
tag_alloc() + tag_free() calls based upon the saved se_cmd->map_tag id.

FIXME: Make transport_init_session_tagpool() number of tags
configurable per vring client setting

Cc: Kent Overstreet <kover...@google.com>
Cc: Michael S. Tsirkin <m...@redhat.com>
Cc: Asias He <as...@redhat.com>
Cc: Christoph Hellwig <h...@lst.de>
Cc: Roland Dreier <rol...@kernel.org>
Signed-off-by: Nicholas Bellinger <n...@linux-iscsi.org>
---
drivers/vhost/scsi.c | 31 +++++++++++++++++++------------
1 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 1e5e820..0f33451 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -48,6 +48,7 @@
#include <linux/virtio_scsi.h>
#include <linux/llist.h>
#include <linux/bitmap.h>
+#include <linux/tags.h>

#include "vhost.c"
#include "vhost.h"
@@ -448,6 +449,7 @@ static void tcm_vhost_release_cmd(struct se_cmd *se_cmd)
{
struct tcm_vhost_cmd *tv_cmd = container_of(se_cmd,
struct tcm_vhost_cmd, tvc_se_cmd);
+ struct se_session *se_sess = se_cmd->se_sess;

if (tv_cmd->tvc_sgl_count) {
u32 i;
@@ -458,7 +460,7 @@ static void tcm_vhost_release_cmd(struct se_cmd *se_cmd)
}

tcm_vhost_put_inflight(tv_cmd->inflight);
- kfree(tv_cmd);
+ tag_free(&se_sess->sess_tag_pool, se_cmd->map_tag);
}

static int tcm_vhost_shutdown_session(struct se_session *se_sess)
@@ -700,7 +702,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
vhost_signal(&vs->dev, &vs->vqs[vq].vq);
}

-static struct tcm_vhost_cmd *vhost_scsi_allocate_cmd(
+static struct tcm_vhost_cmd *vhost_scsi_get_tag(
struct vhost_virtqueue *vq,
struct tcm_vhost_tpg *tv_tpg,
struct virtio_scsi_cmd_req *v_req,
@@ -709,18 +711,21 @@ static struct tcm_vhost_cmd *vhost_scsi_allocate_cmd(
{
struct tcm_vhost_cmd *tv_cmd;
struct tcm_vhost_nexus *tv_nexus;
+ struct se_session *se_sess;
+ int tag;

tv_nexus = tv_tpg->tpg_nexus;
if (!tv_nexus) {
pr_err("Unable to locate active struct tcm_vhost_nexus\n");
return ERR_PTR(-EIO);
}
+ se_sess = tv_nexus->tvn_se_sess;

- tv_cmd = kzalloc(sizeof(struct tcm_vhost_cmd), GFP_ATOMIC);
- if (!tv_cmd) {
- pr_err("Unable to allocate struct tcm_vhost_cmd\n");
- return ERR_PTR(-ENOMEM);
- }
+ tag = tag_alloc(&se_sess->sess_tag_pool, true);
+ tv_cmd = &((struct tcm_vhost_cmd *)se_sess->sess_cmd_map)[tag];
+ memset(tv_cmd, 0, sizeof(struct tcm_vhost_cmd));
+
+ tv_cmd->tvc_se_cmd.map_tag = tag;
tv_cmd->tvc_tag = v_req->tag;
tv_cmd->tvc_task_attr = v_req->task_attr;
tv_cmd->tvc_exp_data_len = exp_data_len;
@@ -982,10 +987,10 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs,
for (i = 0; i < data_num; i++)
exp_data_len += vq->iov[data_first + i].iov_len;

- tv_cmd = vhost_scsi_allocate_cmd(vq, tv_tpg, &v_req,
- exp_data_len, data_direction);
+ tv_cmd = vhost_scsi_get_tag(vq, tv_tpg, &v_req,
+ exp_data_len, data_direction);
if (IS_ERR(tv_cmd)) {
- vq_err(vq, "vhost_scsi_allocate_cmd failed %ld\n",
+ vq_err(vq, "vhost_scsi_get_tag failed %ld\n",
PTR_ERR(tv_cmd));
goto err_cmd;
}
@@ -1662,9 +1667,11 @@ static int tcm_vhost_make_nexus(struct tcm_vhost_tpg *tv_tpg,
return -ENOMEM;
}
/*
- * Initialize the struct se_session pointer
+ * Initialize the struct se_session pointer and setup tagpool
+ * for struct tcm_vhost_cmd descriptors
*/
- tv_nexus->tvn_se_sess = transport_init_session();
+ tv_nexus->tvn_se_sess = transport_init_session_tagpool(256,
+ sizeof(struct tcm_vhost_cmd));
if (IS_ERR(tv_nexus->tvn_se_sess)) {
mutex_unlock(&tv_tpg->tv_tpg_mutex);
kfree(tv_nexus);
--
1.7.2.5

Reply all
Reply to author
Forward
0 new messages