[PATCH v1 0/7] add SPDK support

449 views
Skip to first unread message

Kefu Chai

<tchaikov@gmail.com>
unread,
Sep 27, 2021, 11:33:08 AM9/27/21
to seastar-dev@googlegroups.com, Kefu Chai
hi folks,

i've been thinking about integrating Seastar into SPDK, or, put in
another way, integrating SPDK to Seastar for a while. and i just
managed to pull together a prototype and pushed it to github [0]
last night. the patches surely need more polishment before a
thorough review.

but i am sending the patches for early inputs. the short-term goal
is to wire up Seastar with the bdev library offered by SPDK. the
bdev support would serve as an example for those who are interested
in programming SPDK with Seastar.

i will revise the patches to address comments and suggestions. but
what i really want to know first is if the general structure makes sense.

FWIW, i am still working on bumping [1] up the DPDK version for Seastar
as i want to support the latest release of SPDK which is using
a recent DPDK.

cheers,

---
[0] https://github.com/tchaikov/seastar/tree/wip-spdk
[1] http://mails.dpdk.org/archives/dev/2021-September/219069.html

Kefu Chai (7):
build: build with dpdk v20.02
dpdk: s/ether_addr/rte_ether_addr/
dpdk: do not set TOEPLITZ as RSS hash
dpdk: s/rte_vfio_container_dma_map()/rte_vfio_dma_map()/
build: find dpdk shared libraries as well
spdk: add spdk submodule
*: add spdk support

.gitmodules | 3 +
CMakeLists.txt | 51 +++--
cmake/Finddpdk.cmake | 72 +++++--
cmake/Findspdk.cmake | 114 ++++++++++
cmake/SeastarDependencies.cmake | 10 +
configure.py | 9 +
cooking_recipe.cmake | 68 +++---
demos/CMakeLists.txt | 3 +
demos/spdk_bdev_demo.cc | 90 ++++++++
dpdk | 2 +-
dpdk_config | 24 ---
include/seastar/core/smp.hh | 3 +-
include/seastar/core/spdk_app.hh | 54 +++++
include/seastar/core/spdk_bdev.hh | 57 +++++
include/seastar/core/spdk_lib.hh | 33 +++
include/seastar/core/spdk_thread.hh | 100 +++++++++
spdk | 1 +
src/core/app-template.cc | 5 +-
src/core/reactor.cc | 21 ++
src/core/spdk_app.cc | 310 ++++++++++++++++++++++++++++
src/core/spdk_bdev.cc | 158 ++++++++++++++
src/core/spdk_lib.cc | 39 ++++
src/core/spdk_thread.cc | 158 ++++++++++++++
src/net/dpdk.cc | 29 +--
24 files changed, 1310 insertions(+), 104 deletions(-)
create mode 100644 cmake/Findspdk.cmake
create mode 100644 demos/spdk_bdev_demo.cc
delete mode 100644 dpdk_config
create mode 100644 include/seastar/core/spdk_app.hh
create mode 100644 include/seastar/core/spdk_bdev.hh
create mode 100644 include/seastar/core/spdk_lib.hh
create mode 100644 include/seastar/core/spdk_thread.hh
create mode 160000 spdk
create mode 100644 src/core/spdk_app.cc
create mode 100644 src/core/spdk_bdev.cc
create mode 100644 src/core/spdk_lib.cc
create mode 100644 src/core/spdk_thread.cc

--
2.33.0

Kefu Chai

<tchaikov@gmail.com>
unread,
Sep 27, 2021, 11:33:10 AM9/27/21
to seastar-dev@googlegroups.com, Kefu Chai
* build dpdk using meson and ninja
* override the options using meson options
* disable the build of drivers using meson options
* update the libraries name to be in sync with the latest dpdk

Signed-off-by: Kefu Chai <tcha...@gmail.com>
---
CMakeLists.txt | 22 ++++++-----------
cmake/Finddpdk.cmake | 52 ++++++++++++++++++++++++++++++----------
cooking_recipe.cmake | 57 +++++++++++++++++++++++---------------------
dpdk | 2 +-
dpdk_config | 24 -------------------
5 files changed, 77 insertions(+), 80 deletions(-)
delete mode 100644 dpdk_config

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a63b3b4c..4248aad8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -418,14 +418,9 @@ seastar_generate_ragel (
IN_FILE ${CMAKE_CURRENT_SOURCE_DIR}/src/http/response_parser.rl
OUT_FILE ${Seastar_GEN_BINARY_DIR}/include/seastar/http/response_parser.hh)

-if (Seastar_DPDK)
- set (seastar_dpdk_obj seastar-dpdk.o)
-endif ()
-
add_library (seastar STATIC
${http_chunk_parsers_file}
${http_request_parser_file}
- ${seastar_dpdk_obj}
include/seastar/core/abort_source.hh
include/seastar/core/alien.hh
include/seastar/core/align.hh
@@ -730,6 +725,11 @@ target_link_libraries (seastar
rt::rt
yaml-cpp::yaml-cpp
Threads::Threads)
+if (Seastar_DPDK)
+ target_link_libraries (seastar
+ PRIVATE
+ dpdk::dpdk)
+endif()

set (Seastar_SANITIZE_MODES "Debug" "Sanitize")
if ((Seastar_SANITIZE STREQUAL "ON") OR
@@ -879,17 +879,9 @@ if (Seastar_DPDK)
target_compile_definitions (seastar
PUBLIC SEASTAR_HAVE_DPDK)

- # No pmd driver code will be pulled in without "--whole-archive". To
- # avoid exposing that to seastar users, combine dpdk into a single
- # .o file.
- add_custom_command (
- OUTPUT seastar-dpdk.o
- COMMAND ld -r -o seastar-dpdk.o --whole-archive ${dpdk_LIBRARIES} --no-whole-archive
- )
-
# This just provides the include path to cmake
- target_link_libraries (seastar
- PUBLIC dpdk::dpdk)
+ target_include_directories (seastar
+ PUBLIC $<TARGET_PROPERTY:dpdk::dpdk,INTERFACE_INCLUDE_DIRECTORIES>)
endif ()

if (Seastar_HWLOC)
diff --git a/cmake/Finddpdk.cmake b/cmake/Finddpdk.cmake
index c70b8e02..5ecd359b 100644
--- a/cmake/Finddpdk.cmake
+++ b/cmake/Finddpdk.cmake
@@ -20,27 +20,53 @@
# Copyright (C) 2018 Scylladb, Ltd.
#

+find_package (PkgConfig REQUIRED)
+pkg_check_modules (dpdk_PC libdpdk)
+
+if (dpdk_PC_FOUND)
+ find_package_handle_standard_args (dpdk
+ REQUIRED_VARS
+ dpdk_PC_STATIC_CFLAGS
+ dpdk_PC_STATIC_INCLUDEDIR
+ dpdk_PC_STATIC_INCLUDE_DIRS
+ dpdk_PC_STATIC_LIBRARIES
+ dpdk_PC_STATIC_LIBRARY_DIRS)
+ if (dpdk_FOUND AND NOT (TARGET dpdk::dpdk))
+ set (dpdk_INCLUDE_DIR ${dpdk_PC_STATIC_INCLUDEDIR})
+ set (dpdk_LINK_DIRECTORIES ${dpdk_PC_STATIC_LIBRARY_DIRS})
+ set (dpdk_LIBRARIES ${dpdk_PC_STATIC_LIBRARIES})
+ add_library (dpdk::dpdk INTERFACE IMPORTED)
+ set_target_properties (dpdk::dpdk
+ PROPERTIES
+ INTERFACE_COMPILE_OPTIONS "${dpdk_PC_STATIC_CFLAGS}"
+ INTERFACE_INCLUDE_DIRECTORIES "${dpdk_PC_STATIC_INCLUDE_DIRS}"
+ INTERFACE_LINK_LIBRARIES "${dpdk_LIBRARIES}"
+ INTERFACE_LINK_DIRECTORIES "${dpdk_LINK_DIRECTORIES}")
+ return ()
+ endif ()
+endif ()
+
find_path (dpdk_INCLUDE_DIR
NAMES rte_atomic.h
PATH_SUFFIXES dpdk)

-find_library (dpdk_PMD_VMXNET3_UIO_LIBRARY rte_pmd_vmxnet3_uio)
-find_library (dpdk_PMD_I40E_LIBRARY rte_pmd_i40e)
-find_library (dpdk_PMD_IXGBE_LIBRARY rte_pmd_ixgbe)
-find_library (dpdk_PMD_E1000_LIBRARY rte_pmd_e1000)
-find_library (dpdk_PMD_BNXT_LIBRARY rte_pmd_bnxt)
-find_library (dpdk_PMD_RING_LIBRARY rte_pmd_ring)
-find_library (dpdk_PMD_CXGBE_LIBRARY rte_pmd_cxgbe)
-find_library (dpdk_PMD_ENA_LIBRARY rte_pmd_ena)
-find_library (dpdk_PMD_ENIC_LIBRARY rte_pmd_enic)
-find_library (dpdk_PMD_FM10K_LIBRARY rte_pmd_fm10k)
-find_library (dpdk_PMD_NFP_LIBRARY rte_pmd_nfp)
-find_library (dpdk_PMD_QEDE_LIBRARY rte_pmd_qede)
+find_library (dpdk_PMD_VMXNET3_UIO_LIBRARY rte_net_vmxnet3)
+find_library (dpdk_PMD_I40E_LIBRARY rte_net_i40e)
+find_library (dpdk_PMD_IXGBE_LIBRARY rte_net_ixgbe)
+find_library (dpdk_PMD_E1000_LIBRARY rte_net_e1000)
+find_library (dpdk_PMD_BNXT_LIBRARY rte_net_bnxt)
+find_library (dpdk_PMD_RING_LIBRARY rte_net_ring)
+find_library (dpdk_PMD_CXGBE_LIBRARY rte_net_cxgbe)
+find_library (dpdk_PMD_ENA_LIBRARY rte_net_ena)
+find_library (dpdk_PMD_ENIC_LIBRARY rte_net_enic)
+find_library (dpdk_PMD_FM10K_LIBRARY rte_net_fm10k)
+find_library (dpdk_PMD_NFP_LIBRARY rte_net_nfp)
+find_library (dpdk_PMD_QEDE_LIBRARY rte_net_qede)
find_library (dpdk_RING_LIBRARY rte_ring)
find_library (dpdk_KVARGS_LIBRARY rte_kvargs)
find_library (dpdk_MEMPOOL_LIBRARY rte_mempool)
find_library (dpdk_MEMPOOL_RING_LIBRARY rte_mempool_ring)
-find_library (dpdk_PMD_SFC_EFX_LIBRARY rte_pmd_sfc_efx)
+find_library (dpdk_PMD_SFC_EFX_LIBRARY rte_net_sfc)
find_library (dpdk_HASH_LIBRARY rte_hash)
find_library (dpdk_CMDLINE_LIBRARY rte_cmdline)
find_library (dpdk_MBUF_LIBRARY rte_mbuf)
diff --git a/cooking_recipe.cmake b/cooking_recipe.cmake
index c53d3aee..b098132e 100644
--- a/cooking_recipe.cmake
+++ b/cooking_recipe.cmake
@@ -244,40 +244,43 @@ cooking_ingredient (cryptopp
URL https://github.com/weidai11/cryptopp/archive/CRYPTOPP_5_6_5.tar.gz
URL_MD5 88224d9c0322f63aa1fb5b8ae78170f0)

-
-# Use the "native" profile that DPDK defines in `dpdk/config`, but in `dpdk_configure.cmake` we override
-# CONFIG_RTE_MACHINE with `Seastar_DPDK_MACHINE`.
-if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
- set (dpdk_quadruple arm64-armv8a-linuxapp-gcc)
-else()
- set (dpdk_quadruple ${CMAKE_SYSTEM_PROCESSOR}-native-linuxapp-gcc)
-endif()
-
set (dpdk_args
- # gcc 10 defaults to -fno-common, which dpdk is not prepared for
- "EXTRA_CFLAGS=-Wno-error -fcommon"
- O=<BINARY_DIR>
- DESTDIR=<INSTALL_DIR>
- T=${dpdk_quadruple})
+ --default-library=static
+ -Dc_args="-Wno-error"
+ -Denable_docs=false
+ -Dtests=false
+ -Dexamples=
+ -Dmbuf_refcnt_atomic=false
+ -Dmax_memseg_lists=8192
+ -Ddisable_drivers="net/softnic,net/bonding"
+ -Ddisable_libs="kni,jobstats,lpm,acl,power,ip_frag,distributor,reorder,port,table,pipeline,flow_classify,bpf,efd,member"
+ -Dcpu_instruction_set=${Seastar_DPDK_MACHINE})
+
+if (CMAKE_BUILD_TYPE STREQUAL Debug)
+ list (APPEND dpdk_args -Dbuildtype=debug)
+endif ()
+
+find_program (Meson_EXECUTABLE
+ meson)
+if (NOT Meson_EXECUTABLE)
+ message (FATAL_ERROR "Cooking: Meson is required!")
+endif ()
+
+find_program (Ninja_EXECUTABLE
+ ninja)
+if (NOT Ninja_EXECUTABLE)
+ message (FATAL_ERROR "Cooking: Ninja is required!")
+endif ()

cooking_ingredient (dpdk
EXTERNAL_PROJECT_ARGS
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/dpdk
CONFIGURE_COMMAND
- COMMAND
- ${CMAKE_COMMAND} -E chdir <SOURCE_DIR>
- make ${dpdk_args} config
- COMMAND
- ${CMAKE_COMMAND}
- -DSeastar_DPDK_MACHINE=${Seastar_DPDK_MACHINE}
- -DSeastar_DPDK_CONFIG_FILE_IN=<BINARY_DIR>/.config
- -DSeastar_DPDK_CONFIG_FILE_CHANGES=${CMAKE_CURRENT_SOURCE_DIR}/dpdk_config
- -DSeastar_DPDK_CONFIG_FILE_OUT=<BINARY_DIR>/${dpdk_quadruple}/.config
- -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/dpdk_configure.cmake
- BUILD_COMMAND <DISABLE>
+ env CC=${CMAKE_C_COMPILER} ${Meson_EXECUTABLE} ${dpdk_args} --prefix=<INSTALL_DIR> <BINARY_DIR> <SOURCE_DIR>
+ BUILD_COMMAND
+ ${Ninja_EXECUTABLE} -C <BINARY_DIR>
INSTALL_COMMAND
- ${CMAKE_COMMAND} -E chdir <SOURCE_DIR>
- ${make_command} ${dpdk_args} install)
+ ${Ninja_EXECUTABLE} -C <BINARY_DIR> install)

cooking_ingredient (fmt
EXTERNAL_PROJECT_ARGS
diff --git a/dpdk b/dpdk
index 0f486994..98edb0f7 160000
--- a/dpdk
+++ b/dpdk
@@ -1 +1 @@
-Subproject commit 0f486994114dbc68da07e7345674f7d83b54e0fc
+Subproject commit 98edb0f7fa1d80c9b6b80dc762f48a6157897b34
diff --git a/dpdk_config b/dpdk_config
deleted file mode 100644
index 8b8c60db..00000000
--- a/dpdk_config
+++ /dev/null
@@ -1,24 +0,0 @@
-CONFIG_RTE_LIBRTE_PMD_BOND=n
-CONFIG_RTE_LIBRTE_PMD_SOFTNIC=n
-CONFIG_RTE_APP_TEST=n
-CONFIG_RTE_TEST_PMD=n
-CONFIG_RTE_MBUF_REFCNT_ATOMIC=n
-CONFIG_RTE_MAX_MEMSEG_LISTS=8192
-CONFIG_RTE_EAL_IGB_UIO=n
-CONFIG_RTE_LIBRTE_KNI=n
-CONFIG_RTE_KNI_KMOD=n
-CONFIG_RTE_LIBRTE_JOBSTATS=n
-CONFIG_RTE_LIBRTE_LPM=n
-CONFIG_RTE_LIBRTE_ACL=n
-CONFIG_RTE_LIBRTE_POWER=n
-CONFIG_RTE_LIBRTE_IP_FRAG=n
-CONFIG_RTE_LIBRTE_DISTRIBUTOR=n
-CONFIG_RTE_LIBRTE_PMD_CRYPTO_SCHEDULER=n
-CONFIG_RTE_LIBRTE_REORDER=n
-CONFIG_RTE_LIBRTE_PORT=n
-CONFIG_RTE_LIBRTE_TABLE=n
-CONFIG_RTE_LIBRTE_PIPELINE=n
-CONFIG_RTE_LIBRTE_FLOW_CLASSIFY=n
-CONFIG_RTE_LIBRTE_BPF=n
-CONFIG_RTE_LIBRTE_EFD=n
-CONFIG_RTE_LIBRTE_MEMBER=n
--
2.33.0

Kefu Chai

<tchaikov@gmail.com>
unread,
Sep 27, 2021, 11:33:11 AM9/27/21
to seastar-dev@googlegroups.com, Kefu Chai
* s/ether_addr/rte_ether_addr/
* s/ether_hdr/rte_ether_hdr/

ether_addr was renamed to ether_addr in 6d13ea8e8e49ab957deae2bba5ecf4a4bfe747d1,
see https://github.com/DPDK/dpdk/commit/6d13ea8e8e49ab957deae2bba5ecf4a4bfe747d1
and the first release including this change was v19.08. let's update
accordingly.

the same applies to ether_hdr

Signed-off-by: Kefu Chai <tcha...@gmail.com>
---
src/net/dpdk.cc | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/net/dpdk.cc b/src/net/dpdk.cc
index 5f60bfb2..4e0ce8bd 100644
--- a/src/net/dpdk.cc
+++ b/src/net/dpdk.cc
@@ -468,7 +468,7 @@ class dpdk_device : public device {
}

ethernet_address hw_address() override {
- struct ether_addr mac;
+ struct rte_ether_addr mac;
rte_eth_macaddr_get(_port_idx, &mac);

return mac.addr_bytes;
@@ -621,14 +621,14 @@ class dpdk_qp : public net::qp {
if (oi.needs_ip_csum) {
head->ol_flags |= PKT_TX_IP_CKSUM;
// TODO: Take a VLAN header into an account here
- head->l2_len = sizeof(struct ether_hdr);
+ head->l2_len = sizeof(struct rte_ether_hdr);
head->l3_len = oi.ip_hdr_len;
}
if (qp.port().hw_features().tx_csum_l4_offload) {
if (oi.protocol == ip_protocol_num::tcp) {
head->ol_flags |= PKT_TX_TCP_CKSUM;
// TODO: Take a VLAN header into an account here
- head->l2_len = sizeof(struct ether_hdr);
+ head->l2_len = sizeof(struct rte_ether_hdr);
head->l3_len = oi.ip_hdr_len;

if (oi.tso_seg_size) {
@@ -640,7 +640,7 @@ class dpdk_qp : public net::qp {
} else if (oi.protocol == ip_protocol_num::udp) {
head->ol_flags |= PKT_TX_UDP_CKSUM;
// TODO: Take a VLAN header into an account here
- head->l2_len = sizeof(struct ether_hdr);
+ head->l2_len = sizeof(struct rte_ether_hdr);
head->l3_len = oi.ip_hdr_len;
}
}
--
2.33.0

Kefu Chai

<tchaikov@gmail.com>
unread,
Sep 27, 2021, 11:33:14 AM9/27/21
to seastar-dev@googlegroups.com, Kefu Chai
this change pratically reverts eec89a57a43c070df9d3e29fa649df7237be9f39,
because DPDK v21.08 includes the fix of
ef4c16fd9148215897abadf8e8a965488c82ba03 and c725221d09113bde89faa1e3c468e805fb335939
which set the RSS hash function properly for i40e.
so let's drop this fix to set TOEPLITZ as RSS hash for
unconditionally.

Signed-off-by: Kefu Chai <tcha...@gmail.com>
---
src/net/dpdk.cc | 18 ------------------
1 file changed, 18 deletions(-)

diff --git a/src/net/dpdk.cc b/src/net/dpdk.cc
index 4e0ce8bd..ed04cb34 100644
--- a/src/net/dpdk.cc
+++ b/src/net/dpdk.cc
@@ -1716,27 +1716,9 @@ void dpdk_device::init_port_fini()
});

// TODO: replace deprecated filter api with generic flow api
- #pragma GCC diagnostic push
- #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
if (_num_queues > 1) {
- if (!rte_eth_dev_filter_supported(_port_idx, RTE_ETH_FILTER_HASH)) {
- printf("Port %d: HASH FILTER configuration is supported\n", _port_idx);
-
- // Setup HW touse the TOEPLITZ hash function as an RSS hash function
- struct rte_eth_hash_filter_info info = {};
-
- info.info_type = RTE_ETH_HASH_FILTER_GLOBAL_CONFIG;
- info.info.global_conf.hash_func = RTE_ETH_HASH_FUNCTION_TOEPLITZ;
-
- if (rte_eth_dev_filter_ctrl(_port_idx, RTE_ETH_FILTER_HASH,
- RTE_ETH_FILTER_SET, &info) < 0) {
- rte_exit(EXIT_FAILURE, "Cannot set hash function on a port %d\n", _port_idx);
- }
- }
-
set_rss_table();
}
- #pragma GCC diagnostic pop

// Wait for a link
check_port_link_status();
--
2.33.0

Kefu Chai

<tchaikov@gmail.com>
unread,
Sep 27, 2021, 11:33:15 AM9/27/21
to seastar-dev@googlegroups.com, Kefu Chai
the latter was deprecated in v19.11. see
dpdk/doc/guides/rel_notes/release_19_11.rst

Signed-off-by: Kefu Chai <tcha...@gmail.com>
---
src/net/dpdk.cc | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/net/dpdk.cc b/src/net/dpdk.cc
index ed04cb34..77260d75 100644
--- a/src/net/dpdk.cc
+++ b/src/net/dpdk.cc
@@ -1858,7 +1858,8 @@ bool dpdk_qp<HugetlbfsMemBackend>::map_dma()
auto m = memory::get_memory_layout();
rte_iova_t iova = rte_mem_virt2iova((const void*)m.start);

- return rte_vfio_dma_map(m.start, iova, m.end - m.start) == 0;
+ return rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
+ m.start, iova, m.end - m.start) == 0;
}

void dpdk_device::check_port_link_status()
--
2.33.0

Kefu Chai

<tchaikov@gmail.com>
unread,
Sep 27, 2021, 11:33:16 AM9/27/21
to seastar-dev@googlegroups.com, Kefu Chai
Debian packages dpdk as shared libraries, would be great
if we can use them if they are installed.

Signed-off-by: Kefu Chai <tcha...@gmail.com>
---
cmake/Finddpdk.cmake | 22 +++++++++++++++++++++-
1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/cmake/Finddpdk.cmake b/cmake/Finddpdk.cmake
index 5ecd359b..7e8390af 100644
--- a/cmake/Finddpdk.cmake
+++ b/cmake/Finddpdk.cmake
@@ -23,7 +23,7 @@
find_package (PkgConfig REQUIRED)
pkg_check_modules (dpdk_PC libdpdk)

-if (dpdk_PC_FOUND)
+if (dpdk_PC_STATIC_FOUND)
find_package_handle_standard_args (dpdk
REQUIRED_VARS
dpdk_PC_STATIC_CFLAGS
@@ -44,6 +44,26 @@ if (dpdk_PC_FOUND)
INTERFACE_LINK_DIRECTORIES "${dpdk_LINK_DIRECTORIES}")
return ()
endif ()
+elseif (dpdk_PC_FOUND)
+ find_package_handle_standard_args (dpdk
+ REQUIRED_VARS
+ dpdk_PC_CFLAGS
+ dpdk_PC_INCLUDEDIR
+ dpdk_PC_INCLUDE_DIRS
+ dpdk_PC_LIBRARIES)
+ if (dpdk_FOUND AND NOT (TARGET dpdk::dpdk))
+ set (dpdk_INCLUDE_DIR ${dpdk_PC_INCLUDEDIR})
+ set (dpdk_LINK_DIRECTORIES ${dpdk_PC_LIBRARY_DIRS})
+ set (dpdk_LIBRARIES ${dpdk_PC_LIBRARIES})
+ add_library (dpdk::dpdk INTERFACE IMPORTED)
+ set_target_properties (dpdk::dpdk
+ PROPERTIES
+ INTERFACE_COMPILE_OPTIONS "${dpdk_PC_CFLAGS}"
+ INTERFACE_INCLUDE_DIRECTORIES "${dpdk_PC_INCLUDE_DIRS}"
+ INTERFACE_LINK_LIBRARIES "${dpdk_LIBRARIES}"
+ INTERFACE_LINK_DIRECTORIES "${dpdk_LINK_DIRECTORIES}")
+ return ()
+ endif ()
endif ()

find_path (dpdk_INCLUDE_DIR
--
2.33.0

Kefu Chai

<tchaikov@gmail.com>
unread,
Sep 27, 2021, 11:33:18 AM9/27/21
to seastar-dev@googlegroups.com, Kefu Chai
---
.gitmodules | 3 +++
spdk | 1 +
2 files changed, 4 insertions(+)
create mode 160000 spdk

diff --git a/.gitmodules b/.gitmodules
index c5e41966..0cad1323 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,6 @@
[submodule "dpdk"]
path = dpdk
url = ../dpdk
+[submodule "spdk"]
+ path = spdk
+ url = ../spdk
diff --git a/spdk b/spdk
new file mode 160000
index 00000000..85bf4c81
--- /dev/null
+++ b/spdk
@@ -0,0 +1 @@
+Subproject commit 85bf4c81a4f9114eebab1891862326e362da7ebd
--
2.33.0

Kefu Chai

<tchaikov@gmail.com>
unread,
Sep 27, 2021, 11:33:21 AM9/27/21
to seastar-dev@googlegroups.com, Kefu Chai
Signed-off-by: Kefu Chai <tcha...@gmail.com>
---
CMakeLists.txt | 31 +++
cmake/Findspdk.cmake | 114 ++++++++++
cmake/SeastarDependencies.cmake | 10 +
configure.py | 9 +
cooking_recipe.cmake | 11 +
demos/CMakeLists.txt | 3 +
demos/spdk_bdev_demo.cc | 90 ++++++++
include/seastar/core/smp.hh | 3 +-
include/seastar/core/spdk_app.hh | 54 +++++
include/seastar/core/spdk_bdev.hh | 57 +++++
include/seastar/core/spdk_lib.hh | 33 +++
include/seastar/core/spdk_thread.hh | 100 +++++++++
src/core/app-template.cc | 5 +-
src/core/reactor.cc | 21 ++
src/core/spdk_app.cc | 310 ++++++++++++++++++++++++++++
src/core/spdk_bdev.cc | 158 ++++++++++++++
src/core/spdk_lib.cc | 39 ++++
src/core/spdk_thread.cc | 158 ++++++++++++++
18 files changed, 1204 insertions(+), 2 deletions(-)
create mode 100644 cmake/Findspdk.cmake
create mode 100644 demos/spdk_bdev_demo.cc
create mode 100644 include/seastar/core/spdk_app.hh
create mode 100644 include/seastar/core/spdk_bdev.hh
create mode 100644 include/seastar/core/spdk_lib.hh
create mode 100644 include/seastar/core/spdk_thread.hh
create mode 100644 src/core/spdk_app.cc
create mode 100644 src/core/spdk_bdev.cc
create mode 100644 src/core/spdk_lib.cc
create mode 100644 src/core/spdk_thread.cc

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4248aad8..f2886840 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -206,6 +206,10 @@ option (Seastar_DPDK
"Enable DPDK support."
OFF)

+option (Seastar_SPDK
+ "Enable SPDK support."
+ OFF)
+
option (Seastar_EXCLUDE_APPS_FROM_ALL
"When enabled alongside Seastar_APPS, do not build applications by default."
OFF)
@@ -494,6 +498,10 @@ add_library (seastar STATIC
include/seastar/core/simple-stream.hh
include/seastar/core/slab.hh
include/seastar/core/sleep.hh
+ include/seastar/core/spdk_app.hh
+ include/seastar/core/spdk_bdev.hh
+ include/seastar/core/spdk_lib.hh
+ include/seastar/core/spdk_thread.hh
include/seastar/core/sstring.hh
include/seastar/core/stall_sampler.hh
include/seastar/core/stream.hh
@@ -623,6 +631,10 @@ add_library (seastar STATIC
src/core/scollectd-impl.hh
src/core/systemwide_memory_barrier.cc
src/core/smp.cc
+ src/core/spdk_app.cc
+ src/core/spdk_bdev.cc
+ src/core/spdk_lib.cc
+ src/core/spdk_thread.cc
src/core/sstring.cc
src/core/thread.cc
src/core/uname.cc
@@ -730,6 +742,17 @@ if (Seastar_DPDK)
PRIVATE
dpdk::dpdk)
endif()
+if (Seastar_SPDK)
+ target_link_libraries (seastar
+ PRIVATE
+ spdk::event_bdev
+ spdk::event_accel
+ spdk::bdev
+ spdk::accel
+ spdk::init
+ spdk::env_dpdk
+ dpdk::dpdk)
+endif()

set (Seastar_SANITIZE_MODES "Debug" "Sanitize")
if ((Seastar_SANITIZE STREQUAL "ON") OR
@@ -884,6 +907,13 @@ if (Seastar_DPDK)
PUBLIC $<TARGET_PROPERTY:dpdk::dpdk,INTERFACE_INCLUDE_DIRECTORIES>)
endif ()

+if (Seastar_SPDK)
+ target_compile_definitions (seastar
+ PUBLIC SEASTAR_HAVE_SPDK)
+ target_link_libraries (seastar
+ PUBLIC spdk::spdk)
+endif ()
+
if (Seastar_HWLOC)
if (NOT hwloc_FOUND)
message (FATAL_ERROR "`hwloc` support is enabled but it is not available!")
@@ -1210,6 +1240,7 @@ if (Seastar_INSTALL)
${CMAKE_CURRENT_SOURCE_DIR}/cmake/Findlksctp-tools.cmake
${CMAKE_CURRENT_SOURCE_DIR}/cmake/Findlz4.cmake
${CMAKE_CURRENT_SOURCE_DIR}/cmake/Findnumactl.cmake
+ ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Findspdk.cmake
${CMAKE_CURRENT_SOURCE_DIR}/cmake/Findragel.cmake
${CMAKE_CURRENT_SOURCE_DIR}/cmake/Findrt.cmake
${CMAKE_CURRENT_SOURCE_DIR}/cmake/Findyaml-cpp.cmake
diff --git a/cmake/Findspdk.cmake b/cmake/Findspdk.cmake
new file mode 100644
index 00000000..6ce9bd8b
--- /dev/null
+++ b/cmake/Findspdk.cmake
@@ -0,0 +1,114 @@
+#
+# This file is open source software, licensed to you under the terms
+# of the Apache License, Version 2.0 (the "License"). See the NOTICE file
+# distributed with this work for additional information regarding copyright
+# ownership. You may not use this file except in compliance with the License.
+#
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+#
+# Copyright (C) 2021 Kefu Chai <tcha...@gmail.com>
+#
+
+find_package (PkgConfig REQUIRED)
+
+if(spdk_FIND_COMPONENTS)
+ if(NOT bdev IN_LIST spdk_FIND_COMPONENTS)
+ list (APPEND spdk_FIND_COMPONENTS bdev)
+ endif()
+else()
+ set(spdk_FIND_COMPONENTS
+ bdev
+ blobfs
+ env_dpdk
+ event
+ ftl
+ iscsi
+ json
+ jsonrpc
+ log
+ lvol
+ nvme
+ syslibs
+ thread
+ vhost)
+endif()
+
+include (FindPackageHandleStandardArgs)
+set (spdk_INCLUDE_DIR)
+set (spdk_LINK_DIRECTORIES)
+
+set(_spdk_bdev_aio_deps aio)
+set(_spdk_util_deps uuid)
+
+foreach (component ${spdk_FIND_COMPONENTS})
+ pkg_check_modules (spdk_PC spdk_${component})
+ add_library (spdk::${component} INTERFACE IMPORTED)
+ set (prefix spdk_PC_STATIC)
+ foreach (spdk_lib bdev_aio util)
+ foreach (dep ${_spdk_${spdk_lib}_deps})
+ find_package (${dep} QUIET REQUIRED)
+ list (APPEND ${prefix}_LIBRARIES ${dep})
+ endforeach ()
+ endforeach ()
+ set_target_properties (spdk::${component}
+ PROPERTIES
+ INTERFACE_COMPILE_OPTIONS ${${prefix}_CFLAGS}
+ INTERFACE_INCLUDE_DIRECTORIES ${${prefix}_INCLUDE_DIRS}
+ INTERFACE_LINK_OPTIONS "-Wl,--whole-archive;${${prefix}_LDFLAGS};-Wl,--no-whole-archive"
+ INTERFACE_LINK_LIBRARIES "${${prefix}_LIBRARIES}"
+ INTERFACE_LINK_DIRECTORIES ${${prefix}_LIBRARY_DIRS})
+ if (NOT spdk_INCLUDE_DIR)
+ set (spdk_INCLUDE_DIR ${${prefix}_INCLUDE_DIRS})
+ endif ()
+ if (NOT spdk_LINK_DIRECTORIES)
+ set (spdk_LINK_DIRECTORIES ${${prefix}_LIBRARY_DIRS})
+ endif ()
+ list (APPEND spdk_link_opts "${${prefix}_LDFLAGS}")
+ list (APPEND spdk_libs ${${prefix}_LIBRARIES})
+ list (APPEND spdk_lib_vars ${prefix}_LIBRARIES)
+endforeach ()
+
+if (spdk_INCLUDE_DIR AND EXISTS "${spdk_INCLUDE_DIR}/spdk/version.h")
+ foreach(ver "MAJOR" "MINOR" "PATCH")
+ file(STRINGS "${spdk_INCLUDE_DIR}/spdk/version.h" spdk_VER_${ver}_LINE
+ REGEX "^#define[ \t ]+SPDK_VERSION_${ver}[ \t]+[0-9]+$")
+ string(REGEX REPLACE "^#define[ \t]+SPDK_VERSION_${ver}[ \t]+([0-9]+)$"
+ "\\1" spdk_VERSION_${ver} "${spdk_VER_${ver}_LINE}")
+ unset(${spdk_VER_${ver}_LINE})
+ endforeach()
+ set(spdk_VERSION_STRING
+ "${spdk_VERSION_MAJOR}.${spdk_VERSION_MINOR}.${spdk_VERSION_PATCH}")
+endif ()
+
+find_package_handle_standard_args (spdk
+ REQUIRED_VARS
+ spdk_INCLUDE_DIR
+ spdk_LINK_DIRECTORIES
+ ${spdk_lib_vars}
+ VERSION_VAR
+ spdk_VERSION_STRING)
+
+if (spdk_FOUND AND NOT (TARGET spdk::spdk))
+ set (spdk_LIBRARIES ${spdk_libs})
+ set (whole_archive_link_opts
+ -Wl,--whole-archive -Wl,-Bstatic ${spdk_link_opts} -Wl,--no-whole-archive -Wl,-Bdynamic)
+ add_library (spdk::spdk INTERFACE IMPORTED)
+ set_target_properties (spdk::spdk
+ PROPERTIES
+ INTERFACE_COMPILE_OPTIONS "${spdk_PC_STATIC_bdev_CFLAGS}"
+ INTERFACE_INCLUDE_DIRECTORIES "${spdk_INCLUDE_DIR}"
+ INTERFACE_LINK_OPTIONS "${whole_archive_link_opts}"
+ INTERFACE_LINK_LIBRARIES "${spdk_LIBRARIES}"
+ INTERFACE_LINK_DIRECTORIES "${spdk_LINK_DIRECTORIES}")
+endif ()
diff --git a/cmake/SeastarDependencies.cmake b/cmake/SeastarDependencies.cmake
index 51a8a65a..07358c3d 100644
--- a/cmake/SeastarDependencies.cmake
+++ b/cmake/SeastarDependencies.cmake
@@ -53,6 +53,7 @@ macro (seastar_find_dependencies)
c-ares
cryptopp
dpdk # No version information published.
+ spdk
fmt
lz4
# Private and private/public dependencies.
@@ -92,6 +93,15 @@ macro (seastar_find_dependencies)
set (_seastar_dep_args_lksctp-tools REQUIRED)
set (_seastar_dep_args_rt REQUIRED)
set (_seastar_dep_args_yaml-cpp 0.5.1 REQUIRED)
+ set (_seastar_dep_args_spdk
+ 21.10.0
+ COMPONENTS
+ event_bdev
+ event_accel
+ bdev
+ accel
+ init
+ env_dpdk)

foreach (third_party ${_seastar_all_dependencies})
find_package ("${third_party}" ${_seastar_dep_args_${third_party}})
diff --git a/configure.py b/configure.py
index 27a40fd9..5767455d 100755
--- a/configure.py
+++ b/configure.py
@@ -81,6 +81,11 @@ add_tristate(
name = 'dpdk',
dest = 'dpdk',
help = 'DPDK support')
+add_tristate(
+ arg_parser,
+ name = 'spdk',
+ dest = 'spdk',
+ help = 'SPDK support')
add_tristate(
arg_parser,
name = 'hwloc',
@@ -189,6 +194,7 @@ def configure_mode(mode):
tr(LDFLAGS, 'LD_FLAGS'),
tr(args.cpp_dialect, 'CXX_DIALECT'),
tr(args.dpdk, 'DPDK'),
+ tr(args.spdk, 'SPDK'),
tr(infer_dpdk_machine(args.user_cflags), 'DPDK_MACHINE'),
tr(args.hwloc, 'HWLOC', value_when_none='yes'),
tr(args.alloc_failure_injection, 'ALLOC_FAILURE_INJECTION', value_when_none='DEFAULT'),
@@ -206,6 +212,9 @@ def configure_mode(mode):
if args.dpdk:
ingredients_to_cook.add('dpdk')

+ if args.spdk:
+ ingredients_to_cook.add('spdk')
+
# Generate a new build by pointing to the source directory.
if ingredients_to_cook:
# We need to use cmake-cooking for some dependencies.
diff --git a/cooking_recipe.cmake b/cooking_recipe.cmake
index b098132e..a2f5c566 100644
--- a/cooking_recipe.cmake
+++ b/cooking_recipe.cmake
@@ -299,3 +299,14 @@ cooking_ingredient (lz4
CONFIGURE_COMMAND <DISABLE>
BUILD_COMMAND <DISABLE>
INSTALL_COMMAND ${make_command} PREFIX=<INSTALL_DIR> install)
+
+cooking_ingredient (spdk
+ EXTERNAL_PROJECT_ARGS
+ SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/spdk
+ BUILD_IN_SOURCE ON
+ CONFIGURE_COMMAND
+ <SOURCE_DIR>/configure --with-dpdk --without-isal --disable-tests --disable-unit-tests --disable-examples --disable-apps --prefix=<INSTALL_DIR>
+ BUILD_COMMAND
+ ${make_command}
+ INSTALL_COMMAND
+ ${make_command} install)
diff --git a/demos/CMakeLists.txt b/demos/CMakeLists.txt
index 084e8298..245c98d9 100644
--- a/demos/CMakeLists.txt
+++ b/demos/CMakeLists.txt
@@ -111,5 +111,8 @@ seastar_add_demo (sharded_parameter
seastar_add_demo (file
SOURCES file_demo.cc)

+seastar_add_demo (spdk_bdev
+ SOURCES spdk_bdev_demo.cc)
+
seastar_add_demo (tutorial_examples
SOURCES tutorial_examples.cc)
diff --git a/demos/spdk_bdev_demo.cc b/demos/spdk_bdev_demo.cc
new file mode 100644
index 00000000..c03d8ad4
--- /dev/null
+++ b/demos/spdk_bdev_demo.cc
@@ -0,0 +1,90 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:4; indent-tabs-mode:nil -*-
+/*
+ * This file is open source software, licensed to you under the terms
+ * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
+ * distributed with this work for additional information regarding copyright
+ * ownership. You may not use this file except in compliance with the License.
+ *
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Copyright (C) 2021 Kefu Chai <tcha...@gmail.com>
+ */
+
+#include <cstring>
+#include <limits>
+#include <random>
+
+#include <seastar/core/app-template.hh>
+
+#include <seastar/core/aligned_buffer.hh>
+#include <seastar/core/file.hh>
+#include <seastar/core/fstream.hh>
+#include <seastar/core/seastar.hh>
+#include <seastar/core/spdk_app.hh>
+#include <seastar/core/spdk_bdev.hh>
+#include <seastar/core/spdk_lib.hh>
+#include <seastar/core/sstring.hh>
+#include <seastar/core/temporary_buffer.hh>
+#include <seastar/core/loop.hh>
+#include <seastar/core/io_intent.hh>
+#include <seastar/util/log.hh>
+#include <seastar/util/tmp_file.hh>
+
+using namespace seastar;
+namespace bpo = boost::program_options;
+
+seastar::logger spdk_logger("spdk_demo");
+
+int main(int ac, char** av) {
+ seastar::app_template seastar_app;
+ seastar_app.add_positional_options({
+ { "bdev", bpo::value<std::string>()->default_value("Malloc0"),
+ "bdev", 1 },
+ });
+ spdk::app spdk_app;
+ spdk_logger.info("app run");
+ return seastar_app.run(ac, av, [&] {
+ spdk_logger.info("demo running");
+ auto bdev_name = seastar_app.configuration()["bdev"].as<std::string>();
+ return spdk_app.run(seastar_app.configuration(), [bdev_name] {
+ spdk_logger.info("bdev.open");
+ auto bdev = spdk::block_device::open(bdev_name);
+ uint32_t block_size = bdev->block_size();
+ size_t buf_align = bdev->memory_dma_alignment();
+ auto buf = spdk::dma_zmalloc(block_size, buf_align);
+ return do_with(temporary_buffer<char>(std::move(buf)),
+ std::unique_ptr<spdk::block_device>(std::move(bdev)),
+ [] (temporary_buffer<char>& buf,
+ std::unique_ptr<spdk::block_device>& bdev) {
+ spdk_logger.info("bdev.write");
+ return bdev->write(0, buf.get(), buf.size()).then([&] {
+ spdk_logger.info("bdev.read");
+ memset(buf.get_write(), 0xff, buf.size());
+ return bdev->read(0, buf.get_write(), buf.size());
+ }).then([&buf] {
+ spdk_logger.info("bdev.read");
+ temporary_buffer<char> good{buf.size()};
+ memset(good.get_write(), 0, good.size());
+ if (int where = memcmp(good.get(), buf.get(), buf.size());
+ where != 0) {
+ spdk_logger.error("buf mismatches at {}!", where);
+ } else {
+ spdk_logger.info("buf matches!");
+ }
+ });
+ }).handle_exception_type([&] (std::system_error& e) {
+ spdk_logger.error("error while writing/reading {}", e.what());
+ });
+ });
+ });
+}
diff --git a/include/seastar/core/smp.hh b/include/seastar/core/smp.hh
index 1f58a08d..ab3e86b7 100644
--- a/include/seastar/core/smp.hh
+++ b/include/seastar/core/smp.hh
@@ -292,7 +292,7 @@ class smp_message_queue {
class smp : public std::enable_shared_from_this<smp> {
alien::instance& _alien;
std::vector<posix_thread> _threads;
- std::vector<std::function<void ()>> _thread_loops; // for dpdk
+ std::vector<std::function<void ()>> _thread_loops; // for dpdk/spdk
std::optional<boost::barrier> _all_event_loops_done;
struct qs_deleter {
void operator()(smp_message_queue** qs) const;
@@ -301,6 +301,7 @@ class smp : public std::enable_shared_from_this<smp> {
static thread_local smp_message_queue**_qs;
static thread_local std::thread::id _tmain;
bool _using_dpdk = false;
+ bool _using_spdk = false;

template <typename Func>
using returns_future = is_future<std::result_of_t<Func()>>;
diff --git a/include/seastar/core/spdk_app.hh b/include/seastar/core/spdk_app.hh
new file mode 100644
index 00000000..041c16ee
--- /dev/null
+++ b/include/seastar/core/spdk_app.hh
@@ -0,0 +1,54 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:4; indent-tabs-mode:nil -*-
+/*
+ * This file is open source software, licensed to you under the terms
+ * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
+ * distributed with this work for additional information regarding copyright
+ * ownership. You may not use this file except in compliance with the License.
+ *
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Copyright (C) 2021 Kefu Chai <tcha...@gmail.com>
+ */
+
+#pragma once
+
+#include <boost/program_options.hpp>
+#include <seastar/core/future.hh>
+#include <seastar/core/resource.hh>
+#include <seastar/core/sharded.hh>
+#include <seastar/core/spdk_thread.hh>
+
+struct spdk_thread;
+
+namespace seastar::spdk {
+
+namespace env {
+ void start(const std::vector<resource::cpu>& cpuset,
+ const boost::program_options::variables_map& opts);
+ void stop() noexcept;
+};
+
+class app {
+public:
+ future<> run(const boost::program_options::variables_map& opts,
+ std::function<future<> ()>&& func) noexcept;
+ static boost::program_options::options_description get_options_description();
+private:
+ future<> start(const boost::program_options::variables_map& opts);
+ future<> stop();
+private:
+ sharded<executor> sharded_executor;
+ spdk_thread* app_thread = nullptr;
+};
+
+}
diff --git a/include/seastar/core/spdk_bdev.hh b/include/seastar/core/spdk_bdev.hh
new file mode 100644
index 00000000..0c10f70f
--- /dev/null
+++ b/include/seastar/core/spdk_bdev.hh
@@ -0,0 +1,57 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:4; indent-tabs-mode:nil -*-
+/*
+ * This file is open source software, licensed to you under the terms
+ * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
+ * distributed with this work for additional information regarding copyright
+ * ownership. You may not use this file except in compliance with the License.
+ *
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Copyright (C) 2021 Kefu Chai <tcha...@gmail.com>
+ */
+
+#pragma once
+
+#include <memory>
+#include <seastar/core/future.hh>
+
+struct spdk_bdev;
+struct spdk_bdev_desc;
+struct spdk_io_channel;
+
+namespace seastar::spdk {
+
+class block_device {
+public:
+ static std::unique_ptr<block_device> open(const std::string& bdev_name);
+ ~block_device();
+
+ future<> write(uint64_t pos, const void* buffer, size_t len);
+ future<> read(uint64_t pos, void* buffer, size_t len);
+
+ uint32_t block_size() const;
+ size_t memory_dma_alignment() const;
+
+private:
+ block_device() = default;
+ static void event_cb(int /* spdk_bdev_event_type */ type,
+ struct spdk_bdev* bdev,
+ void* event_ctx);
+
+private:
+ spdk_bdev* bdev = nullptr;
+ spdk_bdev_desc* desc = nullptr;
+ spdk_io_channel* io_channel = nullptr;
+};
+
+}
diff --git a/include/seastar/core/spdk_lib.hh b/include/seastar/core/spdk_lib.hh
new file mode 100644
index 00000000..ef9f818c
--- /dev/null
+++ b/include/seastar/core/spdk_lib.hh
@@ -0,0 +1,33 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:4; indent-tabs-mode:nil -*-
+/*
+ * This file is open source software, licensed to you under the terms
+ * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
+ * distributed with this work for additional information regarding copyright
+ * ownership. You may not use this file except in compliance with the License.
+ *
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Copyright (C) 2021 Kefu Chai <tcha...@gmail.com>
+ */
+
+#pragma once
+
+#include <bitset>
+#include <seastar/core/future.hh>
+#include <boost/program_options.hpp>
+
+namespace seastar::spdk {
+
+temporary_buffer<char> dma_zmalloc(size_t size, size_t align);
+
+}
diff --git a/include/seastar/core/spdk_thread.hh b/include/seastar/core/spdk_thread.hh
new file mode 100644
index 00000000..670af3ba
--- /dev/null
+++ b/include/seastar/core/spdk_thread.hh
@@ -0,0 +1,100 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:4; indent-tabs-mode:nil -*-
+/*
+ * This file is open source software, licensed to you under the terms
+ * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
+ * distributed with this work for additional information regarding copyright
+ * ownership. You may not use this file except in compliance with the License.
+ *
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Copyright (C) 2021 Kefu Chai <tcha...@gmail.com>
+ */
+
+#pragma once
+
+#include <boost/intrusive/list.hpp>
+#include <seastar/core/future.hh>
+#include <seastar/core/reactor.hh>
+#include <seastar/core/sharded.hh>
+#include <memory>
+
+struct spdk_thread;
+
+namespace bi = boost::intrusive;
+
+namespace seastar::spdk {
+
+class thread_entry {
+ bi::list_member_hook<> _hook;
+public:
+ using container_list_t = bi::list<thread_entry,
+ bi::member_hook<thread_entry, bi::list_member_hook<>, &thread_entry::_hook>>;
+ spdk_thread* thread() noexcept;
+ static thread_entry* from_thread(spdk_thread* thread);
+};
+
+namespace internal {
+
+class thread_msg {
+public:
+ virtual void run_and_dispose() noexcept = 0;
+ seastar::future<> get_future() {
+ return _pr.get_future();
+ }
+protected:
+ seastar::promise<> _pr;
+ ~thread_msg() = default;
+};
+
+template <typename Func>
+class lambda_thread_msg final : public thread_msg {
+ Func _func;
+public:
+ lambda_thread_msg(Func&& func) : _func(std::move(func)) {}
+ void run_and_dispose() noexcept final {
+ std::move(_func)();
+ _pr.set_value();
+ delete this;
+ }
+};
+}
+
+class executor : public peering_sharded_service<executor> {
+ using sharded_executor_t = sharded<executor>;
+public:
+ future<> start();
+ future<> stop();
+ bool poll();
+ void schedule_thread(spdk_thread* thread);
+
+ template <typename Func>
+ static future<> send_to(spdk_thread *thread, Func&& func) noexcept {
+ auto msg = new internal::lambda_thread_msg<Func>(std::move(func));
+ return do_send_to(thread, msg);
+ }
+ static sharded_executor_t& instance();
+
+private:
+ static future<> do_send_to(spdk_thread* thread, internal::thread_msg* msg);
+ std::unique_ptr<reactor::poller> poller;
+ thread_entry::container_list_t _threads;
+ uint64_t _tsc_last;
+ static sharded_executor_t* s_executor;
+};
+
+struct run_with_spdk_thread {
+ run_with_spdk_thread(spdk_thread* thread);
+ ~run_with_spdk_thread();
+};
+
+}
diff --git a/src/core/app-template.cc b/src/core/app-template.cc
index e6698662..22e74262 100644
--- a/src/core/app-template.cc
+++ b/src/core/app-template.cc
@@ -23,6 +23,7 @@
#include <seastar/core/reactor.hh>
#include <seastar/core/alien.hh>
#include <seastar/core/scollectd.hh>
+#include <seastar/core/spdk_app.hh>
#include <seastar/core/metrics_api.hh>
#include <boost/program_options.hpp>
#include <seastar/core/print.hh>
@@ -67,7 +68,9 @@ app_template::app_template(app_template::config cfg)
_opts_conf_file.add(smp::get_options_description());
_opts_conf_file.add(scollectd::get_options_description());
_opts_conf_file.add(log_cli::get_options_description());
-
+#ifdef SEASTAR_HAVE_SPDK
+ _opts_conf_file.add(spdk::app::get_options_description());
+#endif
_opts.add(_opts_conf_file);
}

diff --git a/src/core/reactor.cc b/src/core/reactor.cc
index 97d557c5..374b9055 100644
--- a/src/core/reactor.cc
+++ b/src/core/reactor.cc
@@ -93,6 +93,9 @@
#include <seastar/core/dpdk_rte.hh>
#include <rte_lcore.h>
#include <rte_launch.h>
+#elif defined(SEASTAR_HAVE_SPDK)
+#include <seastar/core/spdk_app.hh>
+#include <spdk/env.h>
#endif
#include <seastar/core/prefetch.hh>
#include <exception>
@@ -3669,6 +3672,11 @@ void smp::allocate_reactor(unsigned id, reactor_backend_selector rbs, reactor_co
void smp::cleanup() noexcept {
smp::_threads = std::vector<posix_thread>();
_thread_loops.clear();
+#ifdef SEASTAR_HAVE_SPDK
+ if (_using_spdk) {
+ spdk::env::stop();
+ }
+#endif
}

void smp::cleanup_cpu() {
@@ -3927,6 +3935,8 @@ void smp::configure(boost::program_options::variables_map configuration, reactor

#ifdef SEASTAR_HAVE_DPDK
_using_dpdk = configuration.count("dpdk-pmd");
+#elif defined(SEASTAR_HAVE_SPDK)
+ _using_spdk = configuration.count("spdk-pmd");
#endif
auto thread_affinity = configuration["thread-affinity"].as<bool>();
if (configuration.count("overprovisioned")
@@ -3935,6 +3945,8 @@ void smp::configure(boost::program_options::variables_map configuration, reactor
}
if (!thread_affinity && _using_dpdk) {
fmt::print("warning: --thread-affinity 0 ignored in dpdk mode\n");
+ } else if (!thread_affinity && _using_spdk) {
+ fmt::print("warning: --thread-affinity 0 ignored in spdk mode\n");
}
auto mbind = configuration["mbind"].as<bool>();
if (!thread_affinity) {
@@ -4080,6 +4092,15 @@ void smp::configure(boost::program_options::variables_map configuration, reactor
}
dpdk::eal::init(cpus, configuration);
}
+#elif defined(SEASTAR_HAVE_SPDK)
+ if (_using_spdk) {
+ try {
+ spdk::env::start(allocations, configuration);
+ } catch (const std::exception& e) {
+ seastar_logger.error(e.what());
+ _exit(1);
+ }
+ }
#endif

// Better to put it into the smp class, but at smp construction time
diff --git a/src/core/spdk_app.cc b/src/core/spdk_app.cc
new file mode 100644
index 00000000..f1a21d7a
--- /dev/null
+++ b/src/core/spdk_app.cc
@@ -0,0 +1,310 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:4; indent-tabs-mode:nil -*-
+/*
+ * This file is open source software, licensed to you under the terms
+ * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
+ * distributed with this work for additional information regarding copyright
+ * ownership. You may not use this file except in compliance with the License.
+ *
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Copyright (C) 2021 Kefu Chai <tcha...@gmail.com>
+ */
+
+#include <seastar/core/spdk_app.hh>
+#include <seastar/util/defer.hh>
+#include <seastar/core/thread.hh>
+#include <spdk/cpuset.h>
+#include <spdk/env.h>
+#include <spdk/init.h>
+#include <spdk/log.h>
+#include <spdk/string.h>
+#include <spdk/thread.h>
+
+namespace seastar::spdk {
+
+seastar::logger logger("spdk");
+
+namespace env {
+
+void start(const std::vector<resource::cpu>& cpuset,
+ const boost::program_options::variables_map& opts)
+{
+ logger.info("env starting");
+ spdk_env_opts env_opts = {};
+ spdk_env_opts_init(&env_opts);
+
+ std::string core_list;
+ for (auto& cpu : cpuset) {
+ if (!core_list.empty()) {
+ core_list.append(",");
+ }
+ core_list.append(std::to_string(cpu.cpu_id));
+ }
+ core_list = fmt::format("[{}]", core_list);
+ env_opts.core_mask = core_list.c_str();
+
+ if (opts.count("main-core")) {
+ env_opts.main_core = std::stoi(opts["main-core"].as<std::string>());
+ }
+ if (opts.count("mem-size")) {
+ const std::string mem_size_str = opts["mem-size"].as<std::string>();
+ uint64_t mem_size_mb;
+ bool mem_size_has_prefix;
+ if (spdk_parse_capacity(mem_size_str.c_str(),
+ &mem_size_mb,
+ &mem_size_has_prefix) != 0) {
+ throw std::invalid_argument(
+ fmt::format("invalid memory pool size `--mem-size {}`",
+ mem_size_str));
+ }
+ if (mem_size_has_prefix) {
+ // convert mem size to MiB
+ mem_size_mb >>= 20;
+ }
+ if (mem_size_mb > std::numeric_limits<int>::max()) {
+ throw std::invalid_argument(
+ fmt::format("memory pool size too large `--mem-size {}`",
+ mem_size_mb));
+ }
+ env_opts.mem_size = static_cast<int>(mem_size_mb);
+ }
+ if (opts.count("no-pci")) {
+ env_opts.no_pci = true;
+ }
+ if (opts.count("single-file-segments")) {
+ env_opts.hugepage_single_segments = true;
+ }
+ if (opts.count("huge-unlink")) {
+ env_opts.unlink_hugepage = true;
+ }
+ std::string hugedir;
+ if (opts.count("hugepages")) {
+ hugedir = opts["hugepages"].as<std::string>();
+ env_opts.hugedir = hugedir.c_str();
+ }
+ std::vector<spdk_pci_addr> pci_addrs;
+ if (opts.count("pci-blocked") && opts.count("pci-allowed")) {
+ throw std::invalid_argument("--pci-blocked and --pci-allowed cannot be used at the same time");
+ } else if (opts.count("pci-blocked")) {
+ for (const auto& bdf : opts["pci-blocked"].as<std::vector<std::string>>()) {
+ spdk_pci_addr pci_addr;
+ spdk_pci_addr_parse(&pci_addr, bdf.c_str());
+ pci_addrs.push_back(pci_addr);
+ }
+ env_opts.pci_blocked = &pci_addrs[0];
+ env_opts.num_pci_addr = pci_addrs.size();
+ } else if (opts.count("pci-allowed")) {
+ for (const auto& bdf : opts["pci-allowed"].as<std::vector<std::string>>()) {
+ spdk_pci_addr pci_addr;
+ spdk_pci_addr_parse(&pci_addr, bdf.c_str());
+ pci_addrs.push_back(pci_addr);
+ }
+ env_opts.pci_allowed = &pci_addrs[0];
+ env_opts.num_pci_addr = pci_addrs.size();
+ }
+ std::string iova_mode;
+ if (opts.count("iova-mode")) {
+ iova_mode = opts["iova-mode"].as<std::string>();
+ env_opts.iova_mode = iova_mode.c_str();
+ }
+ if (spdk_env_init(&env_opts) < 0) {
+ throw std::runtime_error("unable to initialize SPDK env");
+ }
+ logger.info("env starting: done");
+}
+
+void stop() noexcept
+{
+ logger.info("env stopping");
+ spdk_env_fini();
+}
+}
+}
+
+namespace {
+class subsystem_init_desc {
+ seastar::promise<> _pr;
+public:
+ void complete_with(int rc) {
+ seastar::spdk::logger.info("subsystem initialized: {}", rc);
+ if (rc) {
+ _pr.set_exception(std::runtime_error("unable to init SPDK subsystem"));
+ } else {
+ _pr.set_value();
+ }
+ delete this;
+ }
+ seastar::future<> get_future() {
+ return _pr.get_future();
+ }
+};
+
+class msg_desc {
+ seastar::promise<> _pr;
+public:
+ void complete() {
+ _pr.set_value();
+ delete this;
+ }
+ seastar::future<> get_future() {
+ return _pr.get_future();
+ }
+};
+
+constexpr seastar::log_level spdk_log_to_seastar_level(int level)
+{
+ switch (level) {
+ case SPDK_LOG_DISABLED:
+ return seastar::log_level(static_cast<int>(seastar::log_level::trace) + 1);
+ case SPDK_LOG_ERROR:
+ return seastar::log_level::error;
+ case SPDK_LOG_WARN:
+ return seastar::log_level::warn;
+ case SPDK_LOG_NOTICE:
+ return seastar::log_level::info;
+ case SPDK_LOG_INFO:
+ return seastar::log_level::debug;
+ case SPDK_LOG_DEBUG:
+ return seastar::log_level::trace;
+ default:
+ return seastar::log_level::info;
+ }
+}
+
+void spdk_do_log(int level, const char *file, const int line,
+ const char *func, const char *format, va_list args)
+{
+ static const int MAX_TMPBUF = 1024;
+ char buf[MAX_TMPBUF];
+ int len = vsnprintf(buf, sizeof(buf), format, args);
+ if (len > 0 && buf[len - 1] == '\n') {
+ // remove the trailing newline, as seastar always add it for us
+ buf[len - 1] = '\0';
+ }
+ seastar::spdk::logger.log(spdk_log_to_seastar_level(level),
+ "{}:{:4d}:{}: {}",
+ file, line, func, buf);
+}
+
+}
+
+namespace seastar::spdk {
+
+future<> app::run(const boost::program_options::variables_map& opts,
+ std::function<future<> ()>&& func) noexcept
+{
+ spdk_log_open(spdk_do_log);
+
+ return seastar::async([opts, func = std::move(func), this] {
+ sharded_executor.start().then([this] {
+ return sharded_executor.invoke_on_all(&executor::start);
+ }).get();
+ auto stop_executor = seastar::defer([&] () noexcept {
+ sharded_executor.stop().get();
+ });
+ assert(app_thread == nullptr);
+ spdk_cpuset cpu_mask = {};
+ spdk_cpuset_set_cpu(&cpu_mask, spdk_env_get_current_core(), true);
+ app_thread = spdk_thread_create("app_thread", &cpu_mask);
+ if (app_thread == nullptr) {
+ throw std::bad_alloc();
+ }
+ run_with_spdk_thread run_with(app_thread);
+ start(opts).get();
+ auto stop_me = seastar::defer([&] () noexcept {
+ stop().get();
+ });
+
+ futurize_invoke(func).get();
+ });
+}
+
+static void spdk_subsystem_init_cpl(int rc, void* arg)
+{
+ auto* desc = static_cast<subsystem_init_desc*>(arg);
+ desc->complete_with(rc);
+}
+
+future<> app::start(const boost::program_options::variables_map& opts)
+{
+ logger.info("app start");
+ // ensure that start() is able to find app_thread using spdk_get_thread(),
+ // the underlying SPDK functions need to hook poolers to "this" thread.
+ auto init_desc = std::make_unique<subsystem_init_desc>();
+ auto init_done = init_desc->get_future();
+ auto rpc_addr = opts["spdk-rpc-socket"].as<std::string>();
+ if (opts.count("spdk-config")) {
+
+ auto spdk_config = opts["spdk-config"].as<std::string>();
+ spdk_subsystem_init_from_json_config(
+ spdk_config.c_str(),
+ rpc_addr.c_str(),
+ spdk_subsystem_init_cpl,
+ init_desc.release(),
+ opts.count("spdk-json-ignore-init-errors"));
+ } else {
+ spdk_subsystem_init(
+ spdk_subsystem_init_cpl,
+ init_desc.release());
+ }
+ return init_done.then([rpc_addr] {
+ if (int rc = spdk_rpc_initialize(rpc_addr.c_str()); rc != 0) {
+ throw std::runtime_error("unable to init SPDK RPC");
+ }
+ });
+}
+
+// seastar takes care of the cleanup, so just use a dummy callback here
+static void spdk_subsystem_fini_cpl(void* arg)
+{
+ auto *desc = static_cast<msg_desc*>(arg);
+ desc->complete();
+}
+
+future<> app::stop()
+{
+ logger.info("app stopping");
+ spdk_rpc_finish();
+ auto fini_desc = std::make_unique<msg_desc>();
+ auto fini_done = fini_desc->get_future();
+ spdk_subsystem_fini(spdk_subsystem_fini_cpl, fini_desc.release());
+ return fini_done;
+}
+
+boost::program_options::options_description app::get_options_description()
+{
+ namespace bpo = boost::program_options;
+ bpo::options_description opts("SPDK options");
+ opts.add_options()
+ ("spdk-pmd", "Use SPDK PMD drivers")
+ ("spdk-rpc-socket",
+ bpo::value<std::string>()->default_value(SPDK_DEFAULT_RPC_ADDR),
+ "RPC listen address")
+ ("spdk-config", bpo::value<std::string>(), "JSON config file")
+ ("spdk-json-ignore-init-errors", "don't exit on invalid config entry")
+ ("iova-mode", bpo::value<std::string>(),
+ "set IOVA mode ('pa' for IOVA_PA and 'va' for IOVA_VA)")
+ ("huge-unlink", "unlink huge files after initialization")
+ ("mem-size", bpo::value<std::string>(),
+ "memory size in MB for DPDK")
+ ("no-pci", "disable PCI access")
+ ("single-file-segments", "force creating just one hugetlbfs file")
+ ("pci-blocked", bpo::value<std::vector<std::string>>()->multitoken(),
+ "pci addr to block (can be used more than once)")
+ ("pci-allowed", bpo::value<std::vector<std::string>>()->multitoken(),
+ "pci addr to allow (--pci-blocked and --pci-allowed cannot be used at the same time)")
+ ;
+ return opts;
+}
+
+}
diff --git a/src/core/spdk_bdev.cc b/src/core/spdk_bdev.cc
new file mode 100644
index 00000000..9c3828c3
--- /dev/null
+++ b/src/core/spdk_bdev.cc
@@ -0,0 +1,158 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:4; indent-tabs-mode:nil -*-
+/*
+ * This file is open source software, licensed to you under the terms
+ * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
+ * distributed with this work for additional information regarding copyright
+ * ownership. You may not use this file except in compliance with the License.
+ *
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Copyright (C) 2021 Kefu Chai <tcha...@gmail.com>
+ */
+
+#include <seastar/core/spdk_bdev.hh>
+#include <seastar/util/log.hh>
+#include <spdk/bdev.h>
+#include <spdk/string.h>
+#include <spdk/thread.h>
+#include <memory>
+
+namespace {
+
+class io_completion_desc {
+ seastar::promise<> _pr;
+public:
+ void complete_with(struct spdk_bdev_io* bdev_io, bool success) {
+ if (success) {
+ _pr.set_value();
+ } else {
+ _pr.set_exception(
+ std::system_error(ECONNABORTED,
+ std::system_category(),
+ "bdev IO error"));
+ }
+ if (bdev_io != nullptr) {
+ spdk_bdev_free_io(bdev_io);
+ }
+ delete this;
+ }
+ template <class E>
+ void fail_with(E&& e) {
+ _pr.set_exception(std::make_exception_ptr(std::move(e)));
+ }
+ seastar::future<> get_future() {
+ return _pr.get_future();
+ }
+};
+
+}
+
+namespace seastar::spdk {
+
+extern logger logger;
+
+block_device::~block_device()
+{
+ if (io_channel) {
+ spdk_put_io_channel(io_channel);
+ }
+ if (desc) {
+ spdk_bdev_close(desc);
+ }
+}
+
+std::unique_ptr<block_device> block_device::open(const std::string& bdev_name)
+{
+ std::unique_ptr<block_device> bdev{new block_device};
+ int rc = spdk_bdev_open_ext(bdev_name.c_str(),
+ true,
+ reinterpret_cast<spdk_bdev_event_cb_t>(event_cb),
+ bdev.get(),
+ &bdev->desc);
+ if (rc) {
+ logger.error("unable to open bdev {}: {}",
+ bdev_name, spdk_strerror(-rc));
+ throw std::runtime_error(fmt::format("unable to open bdev {}", bdev_name));
+ }
+ bdev->bdev = spdk_bdev_desc_get_bdev(bdev->desc);
+ bdev->io_channel = spdk_bdev_get_io_channel(bdev->desc);
+ if (bdev->io_channel == nullptr) {
+ logger.error("unable to open bdev I/O channel");
+ throw std::runtime_error(fmt::format("unable to open io channel"));
+ }
+ return bdev;
+}
+
+void block_device::event_cb(int type, spdk_bdev* bdev, void* event_ctx)
+{}
+
+static void spdk_bdev_io_cpl(spdk_bdev_io* bdev_io, bool success, void* arg)
+{
+ logger.info("io done");
+ auto* desc = static_cast<io_completion_desc*>(arg);
+ desc->complete_with(bdev_io, success);
+}
+
+future<> block_device::write(uint64_t pos, const void* buffer, size_t len)
+{
+ assert(bdev);
+ logger.info("write({}, {})", pos, len);
+ auto io_desc = std::make_unique<io_completion_desc>();
+ auto io_done = io_desc->get_future();
+ int rc = spdk_bdev_write(desc, io_channel,
+ const_cast<void*>(buffer), pos, len,
+ spdk_bdev_io_cpl, io_desc.release());
+ if (rc == 0) {
+ return io_done;
+ }
+ if (rc == -ENOMEM) {
+ io_desc->fail_with(std::bad_alloc());
+ } else {
+ // -EBADF or -EINVAL
+ io_desc->fail_with(std::invalid_argument("out of range"));
+ }
+ return io_done;
+}
+
+future<> block_device::read(uint64_t pos, void* buffer, size_t len)
+{
+ assert(bdev);
+ auto io_desc = std::make_unique<io_completion_desc>();
+ auto io_done = io_desc->get_future();
+ int rc = spdk_bdev_read(desc, io_channel, buffer, pos, len,
+ spdk_bdev_io_cpl, io_desc.release());
+ if (rc == 0) {
+ return io_done;
+ }
+ if (rc == -ENOMEM) {
+ io_desc->fail_with(std::bad_alloc());
+ } else {
+ // --EINVAL
+ io_desc->fail_with(std::invalid_argument("out of range"));
+ }
+ return io_done;
+}
+
+uint32_t block_device::block_size() const
+{
+ assert(bdev);
+ return spdk_bdev_get_block_size(bdev);
+}
+
+size_t block_device::memory_dma_alignment() const
+{
+ assert(bdev);
+ return spdk_bdev_get_buf_align(bdev);
+}
+
+}
diff --git a/src/core/spdk_lib.cc b/src/core/spdk_lib.cc
new file mode 100644
index 00000000..f21899fb
--- /dev/null
+++ b/src/core/spdk_lib.cc
@@ -0,0 +1,39 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:4; indent-tabs-mode:nil -*-
+/*
+ * This file is open source software, licensed to you under the terms
+ * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
+ * distributed with this work for additional information regarding copyright
+ * ownership. You may not use this file except in compliance with the License.
+ *
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Copyright (C) 2021 Kefu Chai <tcha...@gmail.com>
+ */
+
+#include <seastar/core/spdk_lib.hh>
+#include <spdk/env.h>
+
+namespace seastar::spdk {
+
+temporary_buffer<char> dma_zmalloc(size_t size, size_t align)
+{
+ void* buf = spdk_dma_zmalloc_socket(size, align, nullptr, SPDK_ENV_SOCKET_ID_ANY);
+ if (!buf) {
+ throw std::bad_alloc();
+ }
+ return {static_cast<char*>(buf), size, seastar::make_deleter([buf] {
+ spdk_dma_free(buf);
+ })};
+}
+
+}
diff --git a/src/core/spdk_thread.cc b/src/core/spdk_thread.cc
new file mode 100644
index 00000000..07b8dc4e
--- /dev/null
+++ b/src/core/spdk_thread.cc
@@ -0,0 +1,158 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:4; indent-tabs-mode:nil -*-
+/*
+ * This file is open source software, licensed to you under the terms
+ * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
+ * distributed with this work for additional information regarding copyright
+ * ownership. You may not use this file except in compliance with the License.
+ *
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Copyright (C) 2021 Kefu Chai <tcha...@gmail.com>
+ */
+
+#include <seastar/util/log.hh>
+#include <seastar/core/spdk_thread.hh>
+#include <spdk/thread.h>
+
+namespace seastar::spdk {
+
+extern logger logger;
+
+spdk_thread* thread_entry::thread() noexcept
+{
+ return spdk_thread_get_from_ctx(reinterpret_cast<void*>(this));
+}
+
+thread_entry* thread_entry::from_thread(spdk_thread* thread)
+{
+ return static_cast<thread_entry*>(spdk_thread_get_ctx(thread));
+}
+
+static int thread_do_op(spdk_thread* thread, spdk_thread_op op)
+{
+ switch (op) {
+ case SPDK_THREAD_OP_NEW: {
+ spdk_cpuset* cpumask = spdk_thread_get_cpumask(thread);
+ unsigned shard = 0;
+ while (shard < smp::count) {
+ if (spdk_cpuset_get_cpu(cpumask, shard)) {
+ break;
+ }
+ }
+ if (shard == smp::count) {
+ logger.error("unable to find executor for new thread");
+ return -1;
+ }
+ // FIXME: future is discarded
+ (void)executor::instance().invoke_on(
+ shard, [thread] (executor& group) {
+ group.schedule_thread(thread);
+ });
+ return 0;
+ }
+ case SPDK_THREAD_OP_RESCHED:
+ return -ENOTSUP;
+ default:
+ return -ENOTSUP;
+ }
+}
+
+static bool thread_op_supported(spdk_thread_op op)
+{
+ switch (op) {
+ case SPDK_THREAD_OP_NEW:
+ return true;
+ case SPDK_THREAD_OP_RESCHED:
+ return false;
+ default:
+ return false;
+ }
+}
+
+future<> executor::start()
+{
+ logger.info("executor#{} start", seastar::this_shard_id());
+ poller = std::make_unique<reactor::poller>(reactor::poller::simple([this] {
+ return poll();
+ }));
+ if (seastar::this_shard_id() == 0) {
+ spdk_thread_lib_init_ext(thread_do_op, thread_op_supported,
+ sizeof(thread_entry));
+ sharded_executor_t& instance = container();
+ s_executor = &instance;
+ }
+ return make_ready_future<>();
+}
+
+future<> executor::stop()
+{
+ if (seastar::this_shard_id() == 0) {
+ s_executor = nullptr;
+ spdk_thread_lib_fini();
+ }
+ poller.reset();
+ return make_ready_future<>();
+}
+
+bool executor::poll()
+{
+ int nr = 0;
+ for (auto& entry : _threads) {
+ spdk_thread *thread = entry.thread();
+ nr += spdk_thread_poll(thread, 0, _tsc_last);
+ _tsc_last = spdk_thread_get_last_tsc(thread);
+ if (__builtin_expect(spdk_thread_is_exited(thread) &&
+ spdk_thread_is_idle(thread), false)) {
+ _threads.erase(thread_entry::container_list_t::s_iterator_to(entry));
+ spdk_thread_destroy(thread);
+ }
+ }
+ logger.trace("poll(): {}", nr);
+ return nr > 0;
+}
+
+void executor::schedule_thread(spdk_thread* thread)
+{
+ _threads.push_back(*thread_entry::from_thread(thread));
+}
+
+executor::sharded_executor_t& executor::instance()
+{
+ assert(s_executor);
+ return *s_executor;
+}
+
+static void spdk_msg_call(void* ctx)
+{
+ auto* task = static_cast<internal::thread_msg*>(ctx);
+ task->run_and_dispose();
+}
+
+future<> executor::do_send_to(spdk_thread* thread,
+ internal::thread_msg* msg)
+{
+ spdk_thread_send_msg(thread, spdk_msg_call, msg);
+ return msg->get_future();
+}
+
+executor::sharded_executor_t* executor::s_executor = nullptr;
+
+run_with_spdk_thread::run_with_spdk_thread(spdk_thread* thread) {
+ spdk_set_thread(thread);
+}
+
+run_with_spdk_thread::~run_with_spdk_thread() {
+ spdk_set_thread(nullptr);
+}
+
+}
--
2.33.0

kefu chai

<tchaikov@gmail.com>
unread,
Sep 28, 2021, 11:25:24 AM9/28/21
to Nicolas Le Scouarnec, seastar-dev
On Tue, Sep 28, 2021 at 12:16 AM Nicolas Le Scouarnec
<Nicolas.L...@broadpeak.tv> wrote:
>
> Hi,
>

hello, Nicolas,

> Just curious, are you working on updating the DPDK embedded into Seastar ?

yeah, i am.

> If you're interested I have a patched seastar working with (cooking-based) meson-build DPDK 20.11 , I can push it to github if you want : I've fixed the compilation (cmake-cooking calling meson, and cmake fetching relevant info from dpdk generated pkg-config). It compiles, and runs well as we've used it for one year.

the same change is also included in the changeset. could you help review it?

> However, I have not yet proposed it for inclusion into seastar as it currently lacks two code snippet to get "feature" parity : multiqueue settings (using rte_flow) and some stuff around memory allocation . Indeed in our application, we don't use multiqueue nor the zero-copy mechanism : I couldn't find time work on these.
>

i think the rte_flow thing has been fixed in a recent DPDK version.
could you be more specific on the memory allocation part?

> Let me know if you're interested in it.
>
> Nicolas
>
>
> > -----Original Message-----
> > From: seast...@googlegroups.com <seast...@googlegroups.com> On
> > Behalf Of Kefu Chai
> > Sent: Monday, 27 September 2021 17:33
> > To: seast...@googlegroups.com
> > Cc: Kefu Chai <tcha...@gmail.com>
> > Subject: [seastar-dev] [PATCH v1 2/7] dpdk: s/ether_addr/rte_ether_addr/
> >
> > * s/ether_addr/rte_ether_addr/
> > * s/ether_hdr/rte_ether_hdr/
> >
> > ether_addr was renamed to ether_addr in
> > 6d13ea8e8e49ab957deae2bba5ecf4a4bfe747d1,
> > see
> > https://eur02.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.co
> > m%2FDPDK%2Fdpdk%2Fcommit%2F6d13ea8e8e49ab957deae2bba5ecf4a4bfe7
> > 47d1&amp;data=04%7C01%7Cnicolas.lescouarnec%40broadpeak.tv%7C15f962
> > c584df4be30b5d08d981cc1e64%7C0ebe44eac9c9438da0407e699f358ed4%7C0
> > %7C0%7C637683535954172914%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4
> > wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&am
> > p;sdata=i8tPPcsfEh1ANvttAnoOykOD8ouRxkHGH56kpXdk1ew%3D&amp;reserve
> > d=0
> > --
> > You received this message because you are subscribed to the Google Groups
> > "seastar-dev" group.
> > To unsubscribe from this group and stop receiving emails from it, send an email
> > to seastar-dev...@googlegroups.com.
> > To view this discussion on the web visit
> > https://eur02.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgroups.g
> > oogle.com%2Fd%2Fmsgid%2Fseastar-dev%2F20210927153259.190008-3-
> > tchaikov%2540gmail.com&amp;data=04%7C01%7Cnicolas.lescouarnec%40broa
> > dpeak.tv%7C15f962c584df4be30b5d08d981cc1e64%7C0ebe44eac9c9438da040
> > 7e699f358ed4%7C0%7C0%7C637683535954172914%7CUnknown%7CTWFpbGZ
> > sb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0
> > %3D%7C1000&amp;sdata=XqPmr6q0vLGwZ0r9EALlebfV3UMMNt8T20POQMbd
> > hAA%3D&amp;reserved=0.



--
Regards
Kefu Chai

Nicolas Le Scouarnec

<Nicolas.LeScouarnec@broadpeak.tv>
unread,
Sep 28, 2021, 1:38:58 PM9/28/21
to Kefu Chai, seastar-dev@googlegroups.com
Hi,

I don't intend to say that my approach was better or worse. I think yours is better in several point actually, so I'll just post a few diff snippet I noticed in case you think the changes could be merged advantageously.

> -----Original Message-----
> From: seast...@googlegroups.com <seast...@googlegroups.com> On
> Behalf Of Kefu Chai
> Sent: Monday, 27 September 2021 17:33
> To: seast...@googlegroups.com
> Cc: Kefu Chai <tcha...@gmail.com>
> Subject: [seastar-dev] [PATCH v1 1/7] build: build with dpdk v20.02
>
> * build dpdk using meson and ninja
> * override the options using meson options
> * disable the build of drivers using meson options
> * update the libraries name to be in sync with the latest dpdk
>
> - # No pmd driver code will be pulled in without "--whole-archive". To
> - # avoid exposing that to seastar users, combine dpdk into a single
> - # .o file.
> - add_custom_command (
> - OUTPUT seastar-dpdk.o
> - COMMAND ld -r -o seastar-dpdk.o --whole-archive ${dpdk_LIBRARIES} --no-
> whole-archive
> - )

I see that you include DPDK similarly to any library. If this arise as an issue to seastar maintainers and you would like to explore an alternative I had kept the original approach of re-combining all of dpdk as a single .o from the .a libraries declared in the dpdk pkg-config (but still had to pull some shared libraries used by DPDK such as zlib, pcap or mlx5).


> -
> set (dpdk_args
> - # gcc 10 defaults to -fno-common, which dpdk is not prepared for
> - "EXTRA_CFLAGS=-Wno-error -fcommon"
> - O=<BINARY_DIR>
> - DESTDIR=<INSTALL_DIR>
> - T=${dpdk_quadruple})
> + --default-library=static
> + -Dc_args="-Wno-error"
> + -Denable_docs=false
> + -Dtests=false
> + -Dexamples=
> + -Dmbuf_refcnt_atomic=false
> + -Dmax_memseg_lists=8192
> + -Ddisable_drivers="net/softnic,net/bonding"
> + -
> Ddisable_libs="kni,jobstats,lpm,acl,power,ip_frag,distributor,reorder,port,table,
> pipeline,flow_classify,bpf,efd,member"
> + -Dcpu_instruction_set=${Seastar_DPDK_MACHINE})

I disabled additional drivers and libs because they pulled additional dependencies I wasn't interested in or cause various issues during compilation on Centos 8 / Ubuntu 20.04 because I missed some headers. I don't know if it could be applicable (especially basedband, regex, event, .... if seastar uses mostly net/* ). For example crypto/*,cryptodev links openssl into seastar, which conflicted with our application that uses boringssl.

index c53d3aee..a49dbc42 100644
--- a/cooking_recipe.cmake
+++ b/cooking_recipe.cmake
@@ -247,37 +247,31 @@ cooking_ingredient (cryptopp

# Use the "native" profile that DPDK defines in `dpdk/config`, but in `dpdk_configure.cmake` we override
# CONFIG_RTE_MACHINE with `Seastar_DPDK_MACHINE`.
-if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
- set (dpdk_quadruple arm64-armv8a-linuxapp-gcc)
-else()
- set (dpdk_quadruple ${CMAKE_SYSTEM_PROCESSOR}-native-linuxapp-gcc)
-endif()
-
-set (dpdk_args
- # gcc 10 defaults to -fno-common, which dpdk is not prepared for
- "EXTRA_CFLAGS=-Wno-error -fcommon"
- O=<BINARY_DIR>
- DESTDIR=<INSTALL_DIR>
- T=${dpdk_quadruple})
+set ( dpdk_args
+ -Dibverbs_link=shared
+ -Dmax_memseg_lists=8192
+ -Dmax_memzone=7680
+ -Dmbuf_refcnf_atomic=false
+ -Ddisable_drivers=common/dpaax,common/octeontx,common/octeontx2,common/sfc_efx,common/qat,common/cpt,bus/dpaa,net/sfc,bus/ifpga,bus/fslmc,compress/*,crypto/*,baseband/*,regex/*,vdpa/*,event/*
+ -Ddisable_libs=compressdev,cryptodev
+ -Dmachine=${Seastar_DPDK_MACHINE}
+ )
+
If the folder already exists, meson won't redo the configuration, so to force it I added a first attempt with --reconfigure with a fallback to plain call.

cooking_ingredient (dpdk
EXTERNAL_PROJECT_ARGS
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/dpdk
CONFIGURE_COMMAND
COMMAND
- ${CMAKE_COMMAND} -E chdir <SOURCE_DIR>
- make ${dpdk_args} config
- COMMAND
- ${CMAKE_COMMAND}
- -DSeastar_DPDK_MACHINE=${Seastar_DPDK_MACHINE}
- -DSeastar_DPDK_CONFIG_FILE_IN=<BINARY_DIR>/.config
- -DSeastar_DPDK_CONFIG_FILE_CHANGES=${CMAKE_CURRENT_SOURCE_DIR}/dpdk_config
- -DSeastar_DPDK_CONFIG_FILE_OUT=<BINARY_DIR>/${dpdk_quadruple}/.config
- -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/dpdk_configure.cmake
+ meson --reconfigure --prefix=<INSTALL_DIR> --libdir lib ${dpdk_args} build <SOURCE_DIR> || meson --prefix=<INSTALL_DIR> --libdir lib ${dpdk_args} build <SOURCE_DIR>
BUILD_COMMAND <DISABLE>
INSTALL_COMMAND
- ${CMAKE_COMMAND} -E chdir <SOURCE_DIR>
- ${make_command} ${dpdk_args} install)
+ ${CMAKE_COMMAND} -E chdir build
+ ninja install)



Avi Kivity

<avi@scylladb.com>
unread,
Sep 29, 2021, 5:39:03 AM9/29/21
to Kefu Chai, seastar-dev@googlegroups.com

On 27/09/2021 18.32, Kefu Chai wrote:
> * build dpdk using meson and ninja


Please add meson to install-dependencies.sh.


> * override the options using meson options
> * disable the build of drivers using meson options
> * update the libraries name to be in sync with the latest dpdk


And add a note here about testing.


> Signed-off-by: Kefu Chai <tcha...@gmail.com>
> ---
> CMakeLists.txt | 22 ++++++-----------
> cmake/Finddpdk.cmake | 52 ++++++++++++++++++++++++++++++----------
> cooking_recipe.cmake | 57 +++++++++++++++++++++++---------------------
> dpdk | 2 +-


dpdk has some local patches in our tree:


0f486994114dbc68da07e7345674f7d83b54e0fc pmdinfogen: fix build with gcc 11
71db32d74c013f70bc455557f7ba098bb738115e avoid combining -r and
-export-dynamic linker options
7c29bbc804687fca5a2f71d05a120e81b2bd0066 net: i40e: add VLAN tag size to
RXMAX
6a70dd057977dbec7f4b20e341afa14391daef04 ixgbevf:
ixgbevf_dev_info_get(): return default tx_rs_thresh=1
d42481c44c2f4b555794bf15c47b1e0add9a1d8c ixgbe_pmd: forbid tx_rs_thresh
above 1 for all NICs but 82598


Some are backports and no doubt have to be dropped, but the others may
still be needed. Can you check?

Avi Kivity

<avi@scylladb.com>
unread,
Sep 29, 2021, 5:41:39 AM9/29/21
to Kefu Chai, seastar-dev@googlegroups.com

On 27/09/2021 18.32, Kefu Chai wrote:
> * s/ether_addr/rte_ether_addr/
> * s/ether_hdr/rte_ether_hdr/
>
> ether_addr was renamed to ether_addr in 6d13ea8e8e49ab957deae2bba5ecf4a4bfe747d1,
> see https://github.com/DPDK/dpdk/commit/6d13ea8e8e49ab957deae2bba5ecf4a4bfe747d1
> and the first release including this change was v19.08. let's update
> accordingly.
>
> the same applies to ether_hdr


Will the first patch compile without this? If not, they have to be squashed.

Avi Kivity

<avi@scylladb.com>
unread,
Sep 29, 2021, 5:43:30 AM9/29/21
to Kefu Chai, seastar-dev@googlegroups.com

On 27/09/2021 18.32, Kefu Chai wrote:
> this change pratically reverts eec89a57a43c070df9d3e29fa649df7237be9f39,
> because DPDK v21.08 includes the fix of


The first patch says v20.02 (why not the latest?). Does v20.02 also
contains the fix?

Avi Kivity

<avi@scylladb.com>
unread,
Sep 29, 2021, 5:45:13 AM9/29/21
to Kefu Chai, seastar-dev@googlegroups.com

On 27/09/2021 18.32, Kefu Chai wrote:
> the latter was deprecated in v19.11. see
> dpdk/doc/guides/rel_notes/release_19_11.rst


I see it was removed, so this also has to be squashed.

Avi Kivity

<avi@scylladb.com>
unread,
Sep 29, 2021, 5:46:32 AM9/29/21
to Kefu Chai, seastar-dev@googlegroups.com

On 27/09/2021 18.32, Kefu Chai wrote:
> Debian packages dpdk as shared libraries, would be great
> if we can use them if they are installed.


Do you know whether the shared libraries use atomics for managing
reference count? IIRC that was the main configuration choice.


Probably the option was removed, I remember some discussion about it.

Avi Kivity

<avi@scylladb.com>
unread,
Sep 29, 2021, 5:48:16 AM9/29/21
to Kefu Chai, seastar-dev@googlegroups.com
I forked spdk into github.com/scylladb in anticipation of this.


Which version does the commit reference? Please make a note here.

On 27/09/2021 18.32, Kefu Chai wrote:

Avi Kivity

<avi@scylladb.com>
unread,
Sep 29, 2021, 6:13:21 AM9/29/21
to Kefu Chai, seastar-dev@googlegroups.com

On 27/09/2021 18.32, Kefu Chai wrote:
> Signed-off-by: Kefu Chai <tcha...@gmail.com>


<snip cmake goo>


> diff --git a/include/seastar/core/smp.hh b/include/seastar/core/smp.hh
> index 1f58a08d..ab3e86b7 100644
> --- a/include/seastar/core/smp.hh
> +++ b/include/seastar/core/smp.hh
> @@ -292,7 +292,7 @@ class smp_message_queue {
> class smp : public std::enable_shared_from_this<smp> {
> alien::instance& _alien;
> std::vector<posix_thread> _threads;
> - std::vector<std::function<void ()>> _thread_loops; // for dpdk
> + std::vector<std::function<void ()>> _thread_loops; // for dpdk/spdk


IIRC this was needed because dpdk insisted on creating its own threads
rather than using caller-provided threads (just like Seastar). Probably
it's relaxed now and we can go back to using our own threads (no
requirement for this series of course). But does spdk create its own
threads too? I think it's more library like and doesn't care who creates
the threads.
Please put everything that's not supposed to be visible to the
application in the internal subnamespace. And anything not in the
internal subnamespace has to be documented.
Shouldn't this be a subclass of file::impl?


> + static std::unique_ptr<block_device> open(const std::string& bdev_name);
> + ~block_device();
> +
> + future<> write(uint64_t pos, const void* buffer, size_t len);
> + future<> read(uint64_t pos, void* buffer, size_t len);
> +
> + uint32_t block_size() const;
> + size_t memory_dma_alignment() const;
> +
> +private:
> + block_device() = default;
> + static void event_cb(int /* spdk_bdev_event_type */ type,
> + struct spdk_bdev* bdev,
> + void* event_ctx);
> +
> +private:
> + spdk_bdev* bdev = nullptr;
> + spdk_bdev_desc* desc = nullptr;
> + spdk_io_channel* io_channel = nullptr;


Please follow the convention that members start with _.
Too many #includes.


> +namespace seastar::spdk {
> +
> +temporary_buffer<char> dma_zmalloc(size_t size, size_t align);


Not sure why we need this. If we need zeroing allocation of temporary
buffers, but it in temporary_buffer.hh. Why is there special dma
allocation? Can't spdk use an iommu?
Please add explanations about the threading model. I wasn't able to
reverse-engineer it from the code.
Let's prefix all spdk options with spdk-, so it's not too confusing.


> + if (opts.count("huge-unlink")) {
> + env_opts.unlink_hugepage = true;
> + }
> + std::string hugedir;
> + if (opts.count("hugepages")) {
> + hugedir = opts["hugepages"].as<std::string>();
> + env_opts.hugedir = hugedir.c_str();
> + }


That's so sad. Can't it use an iommu? Requiring hugepages in 2021 is
backwards.


> + std::vector<spdk_pci_addr> pci_addrs;
> + if (opts.count("pci-blocked") && opts.count("pci-allowed")) {
> + throw std::invalid_argument("--pci-blocked and --pci-allowed cannot be used at the same time");
> + } else if (opts.count("pci-blocked")) {
> + for (const auto& bdf : opts["pci-blocked"].as<std::vector<std::string>>()) {
> + spdk_pci_addr pci_addr;
> + spdk_pci_addr_parse(&pci_addr, bdf.c_str());
> + pci_addrs.push_back(pci_addr);
> + }
> + env_opts.pci_blocked = &pci_addrs[0];
> + env_opts.num_pci_addr = pci_addrs.size();
> + } else if (opts.count("pci-allowed")) {
> + for (const auto& bdf : opts["pci-allowed"].as<std::vector<std::string>>()) {
> + spdk_pci_addr pci_addr;
> + spdk_pci_addr_parse(&pci_addr, bdf.c_str());
> + pci_addrs.push_back(pci_addr);
> + }
> + env_opts.pci_allowed = &pci_addrs[0];
> + env_opts.num_pci_addr = pci_addrs.size();
> + }
> + std::string iova_mode;
> + if (opts.count("iova-mode")) {
> + iova_mode = opts["iova-mode"].as<std::string>();


This seems to indicate iommu is supported.
For future-proofing, might be better to return a future here.
This is pretty sad, we now have a separate memory pool to keep track of.


If at all possible, let's use iommu and the regular Seastar memory.
Does spdk create threads for polling? It kind of defeats the purpose.
Can't it poll frrom out pollers?

tcha...@gmail.com

<tchaikov@gmail.com>
unread,
Sep 29, 2021, 12:18:06 PM9/29/21
to seastar-dev
On Wednesday, September 29, 2021 at 1:38:58 AM UTC+8 nicolas.l...@broadpeak.tv wrote:
Hi,

I don't intend to say that my approach was better or worse. I think yours is better in several point actually, so I'll just post a few diff snippet I noticed in case you think the changes could be merged advantageously.

thank you for your suggestions, Nicolas. i intended to keep the change minimal and to stick to the recommended practice by CMake and DPDK document. will include your change in a follow-up patchset.
 


> -----Original Message-----
> From: seast...@googlegroups.com <seast...@googlegroups.com> On
> Behalf Of Kefu Chai
> Sent: Monday, 27 September 2021 17:33
> To: seast...@googlegroups.com
> Cc: Kefu Chai <tcha...@gmail.com>
> Subject: [seastar-dev] [PATCH v1 1/7] build: build with dpdk v20.02
>
> * build dpdk using meson and ninja
> * override the options using meson options
> * disable the build of drivers using meson options
> * update the libraries name to be in sync with the latest dpdk
>
> - # No pmd driver code will be pulled in without "--whole-archive". To
> - # avoid exposing that to seastar users, combine dpdk into a single
> - # .o file.
> - add_custom_command (
> - OUTPUT seastar-dpdk.o
> - COMMAND ld -r -o seastar-dpdk.o --whole-archive ${dpdk_LIBRARIES} --no-
> whole-archive
> - )

I see that you include DPDK similarly to any library. If this arise as an issue to seastar maintainers and you would like to explore an alternative I had kept the original approach of re-combining all of dpdk as a single .o from the .a libraries declared in the dpdk pkg-config (but still had to pull some shared libraries used by DPDK such as zlib, pcap or mlx5).

yeah, i made this change on purpose. wanted to follow the suggestion proposed by DPDK document.
 



> -
> set (dpdk_args
> - # gcc 10 defaults to -fno-common, which dpdk is not prepared for
> - "EXTRA_CFLAGS=-Wno-error -fcommon"
> - O=<BINARY_DIR>
> - DESTDIR=<INSTALL_DIR>
> - T=${dpdk_quadruple})
> + --default-library=static
> + -Dc_args="-Wno-error"
> + -Denable_docs=false
> + -Dtests=false
> + -Dexamples=
> + -Dmbuf_refcnt_atomic=false
> + -Dmax_memseg_lists=8192
> + -Ddisable_drivers="net/softnic,net/bonding"
> + -
> Ddisable_libs="kni,jobstats,lpm,acl,power,ip_frag,distributor,reorder,port,table,
> pipeline,flow_classify,bpf,efd,member"
> + -Dcpu_instruction_set=${Seastar_DPDK_MACHINE})

I disabled additional drivers and libs because they pulled additional dependencies I wasn't interested in or cause various issues during compilation on Centos 8 / Ubuntu 20.04 because I missed some headers. I don't know if it could be applicable (especially basedband, regex, event, .... if seastar uses mostly net/* ). For example crypto/*,cryptodev links openssl into seastar, which conflicted with our application that uses boringssl.

i am trying to mirror the settings in dpdk_config. once this changeset is merged, i will create another patch to include more drivers to incorporate the disable_drivers list proposed by you.
i see, in case the "dpdk_args" is changed over time, we need to rerun meson with an existing folder. i will create a follow up change to address this needs.

tcha...@gmail.com

<tchaikov@gmail.com>
unread,
Sep 30, 2021, 11:54:49 AM9/30/21
to seastar-dev
On Wednesday, September 29, 2021 at 5:39:03 PM UTC+8 Avi Kivity wrote:

On 27/09/2021 18.32, Kefu Chai wrote:
> * build dpdk using meson and ninja


Please add meson to install-dependencies.sh.

sure. done.
 


> * override the options using meson options
> * disable the build of drivers using meson options
> * update the libraries name to be in sync with the latest dpdk


And add a note here about testing.

i will try to test the dpdk support later on hopefully using a virtual machine setup. will do so before this change gets merged.
 


> Signed-off-by: Kefu Chai <tcha...@gmail.com>
> ---
> CMakeLists.txt | 22 ++++++-----------
> cmake/Finddpdk.cmake | 52 ++++++++++++++++++++++++++++++----------
> cooking_recipe.cmake | 57 +++++++++++++++++++++++---------------------
> dpdk | 2 +-


dpdk has some local patches in our tree:


0f486994114dbc68da07e7345674f7d83b54e0fc pmdinfogen: fix build with gcc 11
71db32d74c013f70bc455557f7ba098bb738115e avoid combining -r and
-export-dynamic linker options
7c29bbc804687fca5a2f71d05a120e81b2bd0066 net: i40e: add VLAN tag size to
RXMAX
6a70dd057977dbec7f4b20e341afa14391daef04 ixgbevf:
ixgbevf_dev_info_get(): return default tx_rs_thresh=1
d42481c44c2f4b555794bf15c47b1e0add9a1d8c ixgbe_pmd: forbid tx_rs_thresh
above 1 for all NICs but 82598


Some are backports and no doubt have to be dropped, but the others may
still be needed. Can you check?

sure. i pushed the my changes to https://github.com/tchaikov/dpdk/tree/wip-seastar-v2. the branch also includes the change i proposed at http://mails.dpdk.org/archives/dev/2021-September/219069.html .

tcha...@gmail.com

<tchaikov@gmail.com>
unread,
Sep 30, 2021, 1:30:11 PM9/30/21
to seastar-dev
On Wednesday, September 29, 2021 at 6:13:21 PM UTC+8 Avi Kivity wrote:

On 27/09/2021 18.32, Kefu Chai wrote:
> Signed-off-by: Kefu Chai <tcha...@gmail.com>


<snip cmake goo>


> diff --git a/include/seastar/core/smp.hh b/include/seastar/core/smp.hh
> index 1f58a08d..ab3e86b7 100644
> --- a/include/seastar/core/smp.hh
> +++ b/include/seastar/core/smp.hh
> @@ -292,7 +292,7 @@ class smp_message_queue {
> class smp : public std::enable_shared_from_this<smp> {
> alien::instance& _alien;
> std::vector<posix_thread> _threads;
> - std::vector<std::function<void ()>> _thread_loops; // for dpdk
> + std::vector<std::function<void ()>> _thread_loops; // for dpdk/spdk


IIRC this was needed because dpdk insisted on creating its own threads
rather than using caller-provided threads (just like Seastar). Probably
it's relaxed now and we can go back to using our own threads (no
requirement for this series of course). But does spdk create its own
threads too? I think it's more library like and doesn't care who creates
the threads.

this change was dropped in my latest revision. indeed, spdk creates its own threads by default, but it allows the developer to plugin a customized executor on which the spdk threads are scheduled. and in this patch, we are using the "bring my own threads" model for running spdk threads.
sure, will document the public interfaces. 
no, i don't think so. because, IMHO, block devices are not files. there a a handful of differences between these two concepts. for instance
- as one cannot enumerate block devices managed by SPDK using `ls`. i plan to expose some methods allowing developers to enumerate SPDK bdevs.
- SPDK block devices do not support fcntl. as the they not provided by a filesystem.

also, apart from the SPDK bdev versus file differences, there are reasons from the implementation perspective. SPDK implements a basic QoS support using token buckets categorized by number of rw ops, and size of rw ops. and SPDK allows application to issue I/O to a given device using different channels from multiple cores for better performance. so i am not sure if we want to use the userspace io scheduler offered by seastar for QoS, despite that it might perform better under some circumstances.
 


> + static std::unique_ptr<block_device> open(const std::string& bdev_name);
> + ~block_device();
> +
> + future<> write(uint64_t pos, const void* buffer, size_t len);
> + future<> read(uint64_t pos, void* buffer, size_t len);
> +
> + uint32_t block_size() const;
> + size_t memory_dma_alignment() const;
> +
> +private:
> + block_device() = default;
> + static void event_cb(int /* spdk_bdev_event_type */ type,
> + struct spdk_bdev* bdev,
> + void* event_ctx);
> +
> +private:
> + spdk_bdev* bdev = nullptr;
> + spdk_bdev_desc* desc = nullptr;
> + spdk_io_channel* io_channel = nullptr;


Please follow the convention that members start with _.
sure. 
removed. 


> +namespace seastar::spdk {
> +
> +temporary_buffer<char> dma_zmalloc(size_t size, size_t align);


Not sure why we need this. If we need zeroing allocation of temporary
buffers, but it in temporary_buffer.hh. Why is there special dma
allocation? Can't spdk use an iommu?

this function calls rte_malloc_socket() under the hood. the later prefers hugepage over regular heap. so i think SPDK should be happy with the memory chunk allocated by temporary_buffer.hh. will give a try.
sure. i admit that this part is a little bit hackerish. will add comment it in the next revision.
sure. 


> + if (opts.count("huge-unlink")) {
> + env_opts.unlink_hugepage = true;
> + }
> + std::string hugedir;
> + if (opts.count("hugepages")) {
> + hugedir = opts["hugepages"].as<std::string>();
> + env_opts.hugedir = hugedir.c_str();
> + }


That's so sad. Can't it use an iommu? Requiring hugepages in 2021 is
backwards.

yeah, that's my understanding: hugepage is required even iommu is available. i am not able to launch an SPDK application without hugepage. i also came across https://lists.ofono.org/hyperkitty/list/sp...@lists.01.org/thread/H2RKQGZHN3UVSQUTFIV3AZM3G6O4LKL4/ .
 


> + std::vector<spdk_pci_addr> pci_addrs;
> + if (opts.count("pci-blocked") && opts.count("pci-allowed")) {
> + throw std::invalid_argument("--pci-blocked and --pci-allowed cannot be used at the same time");
> + } else if (opts.count("pci-blocked")) {
> + for (const auto& bdf : opts["pci-blocked"].as<std::vector<std::string>>()) {
> + spdk_pci_addr pci_addr;
> + spdk_pci_addr_parse(&pci_addr, bdf.c_str());
> + pci_addrs.push_back(pci_addr);
> + }
> + env_opts.pci_blocked = &pci_addrs[0];
> + env_opts.num_pci_addr = pci_addrs.size();
> + } else if (opts.count("pci-allowed")) {
> + for (const auto& bdf : opts["pci-allowed"].as<std::vector<std::string>>()) {
> + spdk_pci_addr pci_addr;
> + spdk_pci_addr_parse(&pci_addr, bdf.c_str());
> + pci_addrs.push_back(pci_addr);
> + }
> + env_opts.pci_allowed = &pci_addrs[0];
> + env_opts.num_pci_addr = pci_addrs.size();
> + }
> + std::string iova_mode;
> + if (opts.count("iova-mode")) {
> + iova_mode = opts["iova-mode"].as<std::string>();


This seems to indicate iommu is supported.

yup, iommu is supported.
i am inlined to keep as it is. as the bdevs in SPDK are registered when SPDK environment starts. so we can just open a bdev in a sync call. this is part of SPDK public interface.
agreed. as long as we can ensure a buffer is "pinned", we can just hand the pinned buffer to SPDK without worrying about the separate memory pool dedicated to SPDK.



If at all possible, let's use iommu and the regular Seastar memory.

i am not very confident of figuring out a way to address this one despite that, in theory, we *can* fix this. as it seems like a task on the DPDK front.
 no. these are just "spdk threads". they are but user threads with their own event queues, pollers and timers. they are like mini reactors which can be moved across the shards depending on the load. this features is not implemented yet. i will implement this later on.

tcha...@gmail.com

<tchaikov@gmail.com>
unread,
Sep 30, 2021, 1:32:34 PM9/30/21
to seastar-dev

thank you for your reviews, Avi! will post the revised patchset once i have it cooked.

On Friday, October 1, 2021 at 1:30:11 AM UTC+8 tcha...@gmail.com wrote:
On Wednesday, September 29, 2021 at 6:13:21 PM UTC+8 Avi Kivity wrote:

On 27/09/2021 18.32, Kefu Chai wrote:
<snip>

Kefu Chai

<tchaikov@gmail.com>
unread,
Oct 3, 2021, 1:08:28 AM10/3/21
to seastar-dev@googlegroups.com, Kefu Chai
hi Avi, i've addressed most of your comments. but there are still some left
unadressed:

- another memory pool. even SPDK tries hard to be a library which is
relatively not tied to DPDK. it still use DPDK RTE EAL for managing
its memory pools. unless it offers a compile-time/runtime mechinary
allowing us to use another allocator for either regular memory or
pinned memory, we have to stay with yet another memory pool in SPDK.
- hugepage versus iommu: yeah. it's sad in 2021. but i tried to use
regular memory buffer allocated using
temporary_buffer<char>::aligned(), my test in spdk_bdev_demo.cc
fails because of the mismatch read and write with Malloc bdev,
which is backed by memory.
- SPDK thread: SPDK uses spdk_thread in its implementation. and
spdk_thread as an execution unit can even moved across cores for
better load balance. so it'd be better to keep it as it is and
manage the spdk threads using a dedicated sharded service.
- file_impl: i think the SPDK bdevs are quite different from
files in a handful perspectives. to minimize the confusion,
i am inclined to keep it as a different class not related to
file, despite that they do share some methods. i think they
will diverge even more over time.

change since v1:

- rebased against latest Seastar master (637af3add047ff2c668169cf4b5fa8679133c8e1)
- squash the commits into the commit bumping up SPDK, to address the FTBFS with newer DPDK.
- install-dependencies.sh: add meson to packages list
- cooking_recipe.cmake: use atomic_mbuf_ref_counts instead mbuf_refcnt_atomic as per the review comment of upstream DPDK change
- cmake: fail if Seastar_DPDK and Seastar_SPDK are enabled at the same time.
- cmake: link against libarchive if available. DPDK detects it and links against it if it is available. so SPDK have to do the same.
- core/reactor.cc: use the threads spawned by SPDK for running reactor. despite that SPDK allows us to poll from the threads spawned by Seastar, it still, for instance, uses rte_lcore_count(), for evaluating the cache size when creating memory pool. spdk_thread_lib_init_ext() creates a memory pool for the caching messages. spdk_env_get_core_count() is also used for the similar purpose elsewhere. but we cannot set the number of lcores without launching reactor threads from DPDK's RTE EAL.
- core/spdk_bdev: add _ before the name of member variables to more more consistent with the naming convention in Seastar
- core/spdk_app: pass the cpumask of "0x00" to SPDK to prevent it from spawning its own reactor threads
- core/spdk_app: reorder option parsing logic to group the related options together
- core/spdk_app: add "spdk" prefix to all SPDK options to be less confusing
- core/spdk_app: correct some indent
- core/spdk_app: return 1 if an exception is caught
- core/spdk_thread
- move the internal stuff into "internal" namespace
- rename "send_to()" to "submit_to()" to be aligned with alien::submit_to() and smp::submit_to()
- move run_with_spdk_thread into spdk_app.cc as the later is the only consumer of this class so far, and add comment
to explain the rationales behind this class
- core/spdk_lib:
- document public API
- reorder "alignment" and "size" parameters to be consistent with temporary_buffer<>::aligned()
- wrap spdk_dma_malloc() instead of spdk_dma_zmalloc()
- cmake/Findspdk.cmake:
- be QUIET
- use better names when setting spdk_lib_vars which is in turn passed to find_package_handle_standard_args() as REQUIRED_VARS
- do not add TARGET for non-existent library, as SPDK does not necessarily exist if it is disabled. but we alway check for it in cmake/SeastarDependencies.cmake
- demos/spdk_bdev_demo: pass --spdk-pmd if it is not passed in as command line argument.

----

Kefu Chai (7):
build: build with dpdk v20.02
build: find dpdk shared libraries as well
reactor: s/RTE_LCORE_FOREACH_SLAVE/RTE_LCORE_FOREACH_WORKER/
configure.py: drop "import sys"
cmake: check for DPDK only if enabled
spdk: add spdk submodule
*: add spdk support

.gitmodules | 3 +
CMakeLists.txt | 53 +++--
cmake/Findarchive.cmake | 12 +
cmake/Finddpdk.cmake | 82 +++++--
cmake/Findspdk.cmake | 126 +++++++++++
cmake/SeastarDependencies.cmake | 20 +-
configure.py | 10 +-
cooking_recipe.cmake | 68 +++---
demos/CMakeLists.txt | 5 +
demos/spdk-config.json | 17 ++
demos/spdk_bdev_demo.cc | 104 +++++++++
dpdk | 2 +-
dpdk_config | 24 --
include/seastar/core/smp.hh | 1 +
include/seastar/core/spdk_app.hh | 61 +++++
include/seastar/core/spdk_bdev.hh | 57 +++++
include/seastar/core/spdk_lib.hh | 35 +++
include/seastar/core/spdk_thread.hh | 105 +++++++++
install-dependencies.sh | 4 +
spdk | 1 +
src/core/reactor.cc | 53 ++++-
src/core/spdk_app.cc | 339 ++++++++++++++++++++++++++++
src/core/spdk_bdev.cc | 159 +++++++++++++
src/core/spdk_lib.cc | 39 ++++
src/core/spdk_thread.cc | 152 +++++++++++++
src/net/dpdk.cc | 29 +--
26 files changed, 1452 insertions(+), 109 deletions(-)
create mode 100644 cmake/Findarchive.cmake
create mode 100644 cmake/Findspdk.cmake
create mode 100644 demos/spdk-config.json

Kefu Chai

<tchaikov@gmail.com>
unread,
Oct 3, 2021, 1:08:33 AM10/3/21
to seastar-dev@googlegroups.com, Kefu Chai
* build dpdk using meson and ninja
* override the options using meson options
* disable the build of drivers using meson options
* update the libraries name to be in sync with the latest dpdk
* dpdk
- s/ether_addr/rte_ether_addr/
ether_addr was renamed to ether_addr in 6d13ea8e8e49ab957deae2bba5ecf4a4bfe747d1,
see https://github.com/DPDK/dpdk/commit/6d13ea8e8e49ab957deae2bba5ecf4a4bfe747d1
the first release including this change was v19.08. so let's
update accordingly
- s/ether_hdr/rte_ether_hdr/
the same applies to "ether_hdr"
- do not set TOEPLITZ as RSS hash
this change pratically reverts eec89a57a43c070df9d3e29fa649df7237be9f39,
because DPDK v21.08 includes the fix of
ef4c16fd9148215897abadf8e8a965488c82ba03 and c725221d09113bde89faa1e3c468e805fb335939
which set the RSS hash function properly for i40e.
so let's drop this fix to set TOEPLITZ as RSS hash for
unconditionally.
- s/rte_vfio_container_dma_map()/rte_vfio_dma_map()/
the latter was deprecated in v19.11. see
dpdk/doc/guides/rel_notes/release_19_11.rst
* install-dependencies: add meson to the packages list.
because we need to use meson to build newer DPDK.

Signed-off-by: Kefu Chai <tcha...@gmail.com>
---
CMakeLists.txt | 22 +++++-----------
cmake/Finddpdk.cmake | 52 +++++++++++++++++++++++++++----------
cooking_recipe.cmake | 57 ++++++++++++++++++++++-------------------
dpdk | 2 +-
dpdk_config | 24 -----------------
install-dependencies.sh | 4 +++
src/net/dpdk.cc | 29 +++++----------------
7 files changed, 87 insertions(+), 103 deletions(-)
delete mode 100644 dpdk_config

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a63b3b4c..4248aad8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -418,14 +418,9 @@ seastar_generate_ragel (
IN_FILE ${CMAKE_CURRENT_SOURCE_DIR}/src/http/response_parser.rl
OUT_FILE ${Seastar_GEN_BINARY_DIR}/include/seastar/http/response_parser.hh)

-if (Seastar_DPDK)
- set (seastar_dpdk_obj seastar-dpdk.o)
-endif ()
-
add_library (seastar STATIC
${http_chunk_parsers_file}
${http_request_parser_file}
- ${seastar_dpdk_obj}
include/seastar/core/abort_source.hh
include/seastar/core/alien.hh
include/seastar/core/align.hh
@@ -730,6 +725,11 @@ target_link_libraries (seastar
rt::rt
yaml-cpp::yaml-cpp
Threads::Threads)
+if (Seastar_DPDK)
+ target_link_libraries (seastar
+ PRIVATE
+ dpdk::dpdk)
+endif()

set (Seastar_SANITIZE_MODES "Debug" "Sanitize")
if ((Seastar_SANITIZE STREQUAL "ON") OR
@@ -879,17 +879,9 @@ if (Seastar_DPDK)
target_compile_definitions (seastar
PUBLIC SEASTAR_HAVE_DPDK)

- # No pmd driver code will be pulled in without "--whole-archive". To
- # avoid exposing that to seastar users, combine dpdk into a single
- # .o file.
- add_custom_command (
- OUTPUT seastar-dpdk.o
- COMMAND ld -r -o seastar-dpdk.o --whole-archive ${dpdk_LIBRARIES} --no-whole-archive
- )
-
# This just provides the include path to cmake
- target_link_libraries (seastar
- PUBLIC dpdk::dpdk)
+ target_include_directories (seastar
+ PUBLIC $<TARGET_PROPERTY:dpdk::dpdk,INTERFACE_INCLUDE_DIRECTORIES>)
endif ()

if (Seastar_HWLOC)
diff --git a/cmake/Finddpdk.cmake b/cmake/Finddpdk.cmake
index c70b8e02..5ecd359b 100644
--- a/cmake/Finddpdk.cmake
+++ b/cmake/Finddpdk.cmake
@@ -20,27 +20,53 @@
# Copyright (C) 2018 Scylladb, Ltd.
#

+find_package (PkgConfig REQUIRED)
+pkg_check_modules (dpdk_PC libdpdk)
+
+if (dpdk_PC_FOUND)
+ find_package_handle_standard_args (dpdk
+ REQUIRED_VARS
+ dpdk_PC_STATIC_CFLAGS
+ dpdk_PC_STATIC_INCLUDEDIR
+ dpdk_PC_STATIC_INCLUDE_DIRS
+ dpdk_PC_STATIC_LIBRARIES
+ dpdk_PC_STATIC_LIBRARY_DIRS)
+ if (dpdk_FOUND AND NOT (TARGET dpdk::dpdk))
+ set (dpdk_INCLUDE_DIR ${dpdk_PC_STATIC_INCLUDEDIR})
+ set (dpdk_LINK_DIRECTORIES ${dpdk_PC_STATIC_LIBRARY_DIRS})
+ set (dpdk_LIBRARIES ${dpdk_PC_STATIC_LIBRARIES})
+ add_library (dpdk::dpdk INTERFACE IMPORTED)
+ set_target_properties (dpdk::dpdk
+ PROPERTIES
+ INTERFACE_COMPILE_OPTIONS "${dpdk_PC_STATIC_CFLAGS}"
+ INTERFACE_INCLUDE_DIRECTORIES "${dpdk_PC_STATIC_INCLUDE_DIRS}"
+ INTERFACE_LINK_LIBRARIES "${dpdk_LIBRARIES}"
+ INTERFACE_LINK_DIRECTORIES "${dpdk_LINK_DIRECTORIES}")
+ return ()
+ endif ()
diff --git a/cooking_recipe.cmake b/cooking_recipe.cmake
index c53d3aee..035b7527 100644
--- a/cooking_recipe.cmake
+++ b/cooking_recipe.cmake
@@ -244,40 +244,43 @@ cooking_ingredient (cryptopp
URL https://github.com/weidai11/cryptopp/archive/CRYPTOPP_5_6_5.tar.gz
URL_MD5 88224d9c0322f63aa1fb5b8ae78170f0)

-
-# Use the "native" profile that DPDK defines in `dpdk/config`, but in `dpdk_configure.cmake` we override
-# CONFIG_RTE_MACHINE with `Seastar_DPDK_MACHINE`.
-if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
- set (dpdk_quadruple arm64-armv8a-linuxapp-gcc)
-else()
- set (dpdk_quadruple ${CMAKE_SYSTEM_PROCESSOR}-native-linuxapp-gcc)
-endif()
-
set (dpdk_args
- # gcc 10 defaults to -fno-common, which dpdk is not prepared for
- "EXTRA_CFLAGS=-Wno-error -fcommon"
- O=<BINARY_DIR>
- DESTDIR=<INSTALL_DIR>
- T=${dpdk_quadruple})
+ --default-library=static
+ -Dc_args="-Wno-error"
+ -Denable_docs=false
+ -Dtests=false
+ -Dexamples=
+ -Datomic_mbuf_ref_counts=false
+ -Dmax_memseg_lists=8192
+ -Ddisable_drivers="net/softnic,net/bonding"
+ -Ddisable_libs="kni,jobstats,lpm,acl,power,ip_frag,distributor,reorder,port,table,pipeline,flow_classify,bpf,efd,member"
+ -Dcpu_instruction_set=${Seastar_DPDK_MACHINE})
+
+if (CMAKE_BUILD_TYPE STREQUAL Debug)
+ list (APPEND dpdk_args -Dbuildtype=debug)
+endif ()
+
+find_program (Meson_EXECUTABLE
+ meson)
+if (NOT Meson_EXECUTABLE)
+ message (FATAL_ERROR "Cooking: Meson is required!")
+endif ()
+
+find_program (Ninja_EXECUTABLE
+ ninja)
+if (NOT Ninja_EXECUTABLE)
+ message (FATAL_ERROR "Cooking: Ninja is required!")
+endif ()

cooking_ingredient (dpdk
EXTERNAL_PROJECT_ARGS
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/dpdk
CONFIGURE_COMMAND
- COMMAND
- ${CMAKE_COMMAND} -E chdir <SOURCE_DIR>
- make ${dpdk_args} config
- COMMAND
- ${CMAKE_COMMAND}
- -DSeastar_DPDK_MACHINE=${Seastar_DPDK_MACHINE}
- -DSeastar_DPDK_CONFIG_FILE_IN=<BINARY_DIR>/.config
- -DSeastar_DPDK_CONFIG_FILE_CHANGES=${CMAKE_CURRENT_SOURCE_DIR}/dpdk_config
- -DSeastar_DPDK_CONFIG_FILE_OUT=<BINARY_DIR>/${dpdk_quadruple}/.config
- -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/dpdk_configure.cmake
- BUILD_COMMAND <DISABLE>
+ env CC=${CMAKE_C_COMPILER} ${Meson_EXECUTABLE} ${dpdk_args} --prefix=<INSTALL_DIR> <BINARY_DIR> <SOURCE_DIR>
+ BUILD_COMMAND
+ ${Ninja_EXECUTABLE} -C <BINARY_DIR>
INSTALL_COMMAND
- ${CMAKE_COMMAND} -E chdir <SOURCE_DIR>
- ${make_command} ${dpdk_args} install)
+ ${Ninja_EXECUTABLE} -C <BINARY_DIR> install)

cooking_ingredient (fmt
EXTERNAL_PROJECT_ARGS
diff --git a/dpdk b/dpdk
index 0f486994..826e11d5 160000
--- a/dpdk
+++ b/dpdk
@@ -1 +1 @@
-Subproject commit 0f486994114dbc68da07e7345674f7d83b54e0fc
+Subproject commit 826e11d55516f53c58592ac4c4a052a80595067c
diff --git a/install-dependencies.sh b/install-dependencies.sh
index 8cf45f77..340da71f 100755
--- a/install-dependencies.sh
+++ b/install-dependencies.sh
@@ -42,6 +42,7 @@ debian_packages=(
libsctp-dev
gcc
make
+ meson
python3
systemtap-sdt-dev
libtool
@@ -77,6 +78,7 @@ redhat_packages=(
lz4-devel
gcc
make
+ meson
python3
systemtap-sdt-devel
libtool
@@ -152,6 +154,7 @@ arch_packages=(
lksctp-tools
lz4
make
+ meson
libtool
cmake
yaml-cpp
@@ -192,6 +195,7 @@ opensuse_packages=(
liblz4-devel
libnuma-devel
lksctp-tools-devel
+ meson
ninja
ragel
xfsprogs-devel
diff --git a/src/net/dpdk.cc b/src/net/dpdk.cc
index 5f60bfb2..77260d75 100644
--- a/src/net/dpdk.cc
+++ b/src/net/dpdk.cc
@@ -1876,7 +1858,8 @@ bool dpdk_qp<HugetlbfsMemBackend>::map_dma()
auto m = memory::get_memory_layout();
rte_iova_t iova = rte_mem_virt2iova((const void*)m.start);

- return rte_vfio_dma_map(m.start, iova, m.end - m.start) == 0;
+ return rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
+ m.start, iova, m.end - m.start) == 0;
}

void dpdk_device::check_port_link_status()
--
2.33.0

Kefu Chai

<tchaikov@gmail.com>
unread,
Oct 3, 2021, 1:08:35 AM10/3/21
to seastar-dev@googlegroups.com, Kefu Chai
there is chance that DPDK is compiled as shared libraries, would be great
if we can use them if they are installed.

in this change

* check for static DPDK libraries before checking for shared libraries.
and prefer static ones if found.
* check the rte_config.h to see if RTE_MBUF_REFCNT_ATOMIC is defined.
bail out if this macro is defined. as atomic refcnt of mbuf is not
necessary if dpdk is used by a Seastar task.

Signed-off-by: Kefu Chai <tcha...@gmail.com>
---
cmake/Finddpdk.cmake | 32 +++++++++++++++++++++++++++++++-
1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/cmake/Finddpdk.cmake b/cmake/Finddpdk.cmake
index 5ecd359b..e306dcd3 100644
--- a/cmake/Finddpdk.cmake
+++ b/cmake/Finddpdk.cmake
@@ -23,7 +23,7 @@
find_package (PkgConfig REQUIRED)
pkg_check_modules (dpdk_PC libdpdk)

-if (dpdk_PC_FOUND)
+if (dpdk_PC_STATIC_FOUND)
find_package_handle_standard_args (dpdk
REQUIRED_VARS
dpdk_PC_STATIC_CFLAGS
@@ -44,12 +44,42 @@ if (dpdk_PC_FOUND)
INTERFACE_LINK_DIRECTORIES "${dpdk_LINK_DIRECTORIES}")
return ()
endif ()
+elseif (dpdk_PC_FOUND)
+ find_package_handle_standard_args (dpdk
+ REQUIRED_VARS
+ dpdk_PC_CFLAGS
+ dpdk_PC_INCLUDEDIR
+ dpdk_PC_INCLUDE_DIRS
+ dpdk_PC_LIBRARIES)
+ if (dpdk_FOUND AND NOT (TARGET dpdk::dpdk))
+ set (dpdk_INCLUDE_DIR ${dpdk_PC_INCLUDEDIR})
+ set (dpdk_LINK_DIRECTORIES ${dpdk_PC_LIBRARY_DIRS})
+ set (dpdk_LIBRARIES ${dpdk_PC_LIBRARIES})
+ add_library (dpdk::dpdk INTERFACE IMPORTED)
+ set_target_properties (dpdk::dpdk
+ PROPERTIES
+ INTERFACE_COMPILE_OPTIONS "${dpdk_PC_CFLAGS}"
+ INTERFACE_INCLUDE_DIRECTORIES "${dpdk_PC_INCLUDE_DIRS}"
+ INTERFACE_LINK_LIBRARIES "${dpdk_LIBRARIES}"
+ INTERFACE_LINK_DIRECTORIES "${dpdk_LINK_DIRECTORIES}")
+ return ()
+ endif ()
endif ()

find_path (dpdk_INCLUDE_DIR
NAMES rte_atomic.h
PATH_SUFFIXES dpdk)

+if (dpdk_INCLUDE_DIR AND EXISTS "${dpdk_INCLUDE_DIR}/rte_config.h")
+ file (STRINGS "${dpdk_INCLUDE_DIR}/rte_config.h" rte_mbuf_refcnt_atomic
+ REGEX "^#define[ \t ]+RTE_MBUF_REFCNT_ATOMIC")
+ if (rte_mbuf_refcnt_atomic)
+ message (SEND_ERROR
+ "DPDK is configured with RTE_MBUF_REFCNT_ATOMIC enabled, "
+ "please disable this option and recompile DPDK.")
+ endif ()
+endif ()
+
find_library (dpdk_PMD_VMXNET3_UIO_LIBRARY rte_net_vmxnet3)
find_library (dpdk_PMD_I40E_LIBRARY rte_net_i40e)
find_library (dpdk_PMD_IXGBE_LIBRARY rte_net_ixgbe)
--
2.33.0

Kefu Chai

<tchaikov@gmail.com>
unread,
Oct 3, 2021, 1:08:36 AM10/3/21
to seastar-dev@googlegroups.com, Kefu Chai
RTE_LCORE_FOREACH_SLAVE is deprecated in DPDK in favor of
RTE_LCORE_FOREACH_WORKER, so let's update accordingly.

Signed-off-by: Kefu Chai <tcha...@gmail.com>
---
src/core/reactor.cc | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/core/reactor.cc b/src/core/reactor.cc
index 97d557c5..fd75211a 100644
--- a/src/core/reactor.cc
+++ b/src/core/reactor.cc
@@ -4194,7 +4194,7 @@ void smp::configure(boost::program_options::variables_map configuration, reactor
#ifdef SEASTAR_HAVE_DPDK
if (_using_dpdk) {
auto it = _thread_loops.begin();
- RTE_LCORE_FOREACH_SLAVE(i) {
+ RTE_LCORE_FOREACH_WORKER(i) {
rte_eal_remote_launch(dpdk_thread_adaptor, static_cast<void*>(&*(it++)), i);
}
}
--
2.33.0

Kefu Chai

<tchaikov@gmail.com>
unread,
Oct 3, 2021, 1:08:38 AM10/3/21
to seastar-dev@googlegroups.com, Kefu Chai
this python module is imported, but never used. so let's remove its
import

Signed-off-by: Kefu Chai <tcha...@gmail.com>
---
configure.py | 1 -
1 file changed, 1 deletion(-)

diff --git a/configure.py b/configure.py
index 27a40fd9..2c33d738 100755
--- a/configure.py
+++ b/configure.py
@@ -21,7 +21,6 @@ import distutils.dir_util
import os
import seastar_cmake
import subprocess
-import sys
import tempfile

tempfile.tempdir = "./build/tmp"
--
2.33.0

Kefu Chai

<tchaikov@gmail.com>
unread,
Oct 3, 2021, 1:08:39 AM10/3/21
to seastar-dev@googlegroups.com, Kefu Chai
so the configure.py is less chatty. and it's less confusing if
SPDK is enabled when configuration Seastar. because SPDK comes
with a bundled DPDK, whose version is not necessarily identical to
the one in the top-level dpdk submodule. so the output of cmake
when checking DPDK does not imply the existence of the DPDK used
by SPDK. since the DPDK and SPDK supports cannot be enabled at
the same time, it'd be less confusing if DPDK is checked only if
it is enabled.

Signed-off-by: Kefu Chai <tcha...@gmail.com>
---
cmake/SeastarDependencies.cmake | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/cmake/SeastarDependencies.cmake b/cmake/SeastarDependencies.cmake
index 51a8a65a..ca1007f5 100644
--- a/cmake/SeastarDependencies.cmake
+++ b/cmake/SeastarDependencies.cmake
@@ -52,7 +52,6 @@ macro (seastar_find_dependencies)
Boost
c-ares
cryptopp
- dpdk # No version information published.
fmt
lz4
# Private and private/public dependencies.
@@ -67,6 +66,11 @@ macro (seastar_find_dependencies)
rt
yaml-cpp)

+ if (Seastar_DPDK)
+ list (APPEND _seastar_all_dependencies
+ dpdk)
+ endif ()
+
# Arguments to `find_package` for each 3rd-party dependency.
# Note that the version specification is a "minimal" version requirement.

--
2.33.0

Kefu Chai

<tchaikov@gmail.com>
unread,
Oct 3, 2021, 1:08:40 AM10/3/21
to seastar-dev@googlegroups.com, Kefu Chai
include spdk b3ddd46e0de0f909cd2dec8aa67d92fde9dc3e6d from
https://github.com/spdk/spdk. we will use the latest stable release
of SPDK once the spdk support in Seastar is relatively stablized.

Signed-off-by: Kefu Chai <tcha...@gmail.com>
---
.gitmodules | 3 +++
spdk | 1 +
2 files changed, 4 insertions(+)
create mode 160000 spdk

diff --git a/.gitmodules b/.gitmodules
index c5e41966..0cad1323 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,6 @@
[submodule "dpdk"]
path = dpdk
url = ../dpdk
+[submodule "spdk"]
+ path = spdk
+ url = ../spdk
diff --git a/spdk b/spdk
new file mode 160000
index 00000000..b3ddd46e
--- /dev/null
+++ b/spdk
@@ -0,0 +1 @@
+Subproject commit b3ddd46e0de0f909cd2dec8aa67d92fde9dc3e6d
--
2.33.0

Kefu Chai

<tchaikov@gmail.com>
unread,
Oct 3, 2021, 1:08:44 AM10/3/21
to seastar-dev@googlegroups.com, Kefu Chai
* include/seastar/core,src/core: add spdk support
* demos: add a demo for SPDK bdev
* cmake:
- add Findspdk.cmake: for finding SPDK libraries
- add Findarchive.cmake: libarchive is linked against by DPDK
but the .pc file generated by SPDK does not take it into
consideration, so we need to link against by ourselves.
* configure.py: add meson.
the newer releases of DPDK are configured using meson. to cook
SPDK with the bundeled DPDK, we have to install meson.

Signed-off-by: Kefu Chai <tcha...@gmail.com>
---
CMakeLists.txt | 33 +++
cmake/Findarchive.cmake | 12 +
cmake/Findspdk.cmake | 126 +++++++++++
cmake/SeastarDependencies.cmake | 14 ++
configure.py | 9 +
cooking_recipe.cmake | 11 +
demos/CMakeLists.txt | 5 +
demos/spdk-config.json | 17 ++
demos/spdk_bdev_demo.cc | 104 +++++++++
include/seastar/core/smp.hh | 1 +
include/seastar/core/spdk_app.hh | 61 +++++
include/seastar/core/spdk_bdev.hh | 57 +++++
include/seastar/core/spdk_lib.hh | 35 +++
include/seastar/core/spdk_thread.hh | 105 +++++++++
src/core/reactor.cc | 51 ++++-
src/core/spdk_app.cc | 339 ++++++++++++++++++++++++++++
src/core/spdk_bdev.cc | 159 +++++++++++++
src/core/spdk_lib.cc | 39 ++++
src/core/spdk_thread.cc | 152 +++++++++++++
19 files changed, 1326 insertions(+), 4 deletions(-)
create mode 100644 cmake/Findarchive.cmake
create mode 100644 cmake/Findspdk.cmake
create mode 100644 demos/spdk-config.json
create mode 100644 demos/spdk_bdev_demo.cc
create mode 100644 include/seastar/core/spdk_app.hh
create mode 100644 include/seastar/core/spdk_bdev.hh
create mode 100644 include/seastar/core/spdk_lib.hh
create mode 100644 include/seastar/core/spdk_thread.hh
create mode 100644 src/core/spdk_app.cc
create mode 100644 src/core/spdk_bdev.cc
create mode 100644 src/core/spdk_lib.cc
create mode 100644 src/core/spdk_thread.cc

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4248aad8..d9bc8b0e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -730,6 +742,16 @@ if (Seastar_DPDK)
PRIVATE
dpdk::dpdk)
endif()
+if (Seastar_SPDK)
+ target_link_libraries (seastar
+ PRIVATE
+ spdk::event_bdev
+ spdk::event_accel
+ spdk::bdev
+ spdk::accel
+ spdk::init
+ spdk::env_dpdk)
+endif ()

set (Seastar_SANITIZE_MODES "Debug" "Sanitize")
if ((Seastar_SANITIZE STREQUAL "ON") OR
@@ -884,6 +906,16 @@ if (Seastar_DPDK)
PUBLIC $<TARGET_PROPERTY:dpdk::dpdk,INTERFACE_INCLUDE_DIRECTORIES>)
endif ()

+if (Seastar_SPDK)
+ target_compile_definitions (seastar
+ PUBLIC SEASTAR_HAVE_SPDK)
+ target_link_libraries (seastar
+ PUBLIC spdk::spdk)
+endif ()
+if (Seastar_DPDK AND Seastar_SPDK)
+ message (FATAL_ERROR "DPDK and SPDK support cannot be enabled at the same time!")
+endif ()
+
if (Seastar_HWLOC)
if (NOT hwloc_FOUND)
message (FATAL_ERROR "`hwloc` support is enabled but it is not available!")
@@ -1210,6 +1242,7 @@ if (Seastar_INSTALL)
${CMAKE_CURRENT_SOURCE_DIR}/cmake/Findlksctp-tools.cmake
${CMAKE_CURRENT_SOURCE_DIR}/cmake/Findlz4.cmake
${CMAKE_CURRENT_SOURCE_DIR}/cmake/Findnumactl.cmake
+ ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Findspdk.cmake
${CMAKE_CURRENT_SOURCE_DIR}/cmake/Findragel.cmake
${CMAKE_CURRENT_SOURCE_DIR}/cmake/Findrt.cmake
${CMAKE_CURRENT_SOURCE_DIR}/cmake/Findyaml-cpp.cmake
diff --git a/cmake/Findarchive.cmake b/cmake/Findarchive.cmake
new file mode 100644
index 00000000..e9e48b96
--- /dev/null
+++ b/cmake/Findarchive.cmake
@@ -0,0 +1,12 @@
+if(archive_INCLUDE_DIR AND archive_LIBRARIES)
+ set(archive_FIND_QUIETLY TRUE)
+endif()
+
+find_path(archive_INCLUDE_DIR archive.h)
+find_library(archive_LIBRARIES archive)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(archive
+ DEFAULT_MSG archive_INCLUDE_DIR archive_LIBRARIES)
+
+mark_as_advanced(archive_INCLUDE_DIR archive_LIBRARIES)
diff --git a/cmake/Findspdk.cmake b/cmake/Findspdk.cmake
new file mode 100644
index 00000000..02eefd46
--- /dev/null
+++ b/cmake/Findspdk.cmake
@@ -0,0 +1,126 @@
+set (_spdk_env_dpdk_deps archive)
+set (_spdk_bdev_aio_deps aio)
+set (_spdk_util_deps uuid)
+
+foreach (component ${spdk_FIND_COMPONENTS})
+ pkg_check_modules (spdk_${component} spdk_${component} QUIET)
+ set (prefix spdk_${component}_STATIC)
+ list (APPEND spdk_lib_vars ${prefix}_LIBRARIES)
+ if (NOT spdk_${component}_FOUND)
+ continue ()
+ endif ()
+ add_library (spdk::${component} INTERFACE IMPORTED)
+ # add the dependencies of the linked SPDK libraries if any
+ foreach (spdk_lib ${${prefix}_LIBRARIES} )
+ foreach (dep ${_${spdk_lib}_deps})
+ find_package (${dep} QUIET)
+ if (NOT ${dep}_FOUND)
+ continue ()
+ endif ()
+ if (NOT ${dep} IN_LIST "${${prefix}_LIBRARIES}")
+ list (APPEND ${prefix}_LIBRARIES ${dep})
+ endif ()
+ endforeach ()
+ endforeach ()
+
+ set_target_properties (spdk::${component}
+ PROPERTIES
+ INTERFACE_COMPILE_OPTIONS ${${prefix}_CFLAGS}
+ INTERFACE_INCLUDE_DIRECTORIES ${${prefix}_INCLUDE_DIRS}
+ INTERFACE_LINK_OPTIONS "-Wl,--whole-archive;${${prefix}_LDFLAGS};-Wl,--no-whole-archive"
+ INTERFACE_LINK_LIBRARIES "${${prefix}_LIBRARIES}"
+ INTERFACE_LINK_DIRECTORIES "${${prefix}_LIBRARY_DIRS}")
+ if (NOT spdk_INCLUDE_DIR)
+ set (spdk_INCLUDE_DIR ${${prefix}_INCLUDE_DIRS})
+ endif ()
+ if (NOT spdk_LINK_DIRECTORIES)
+ set (spdk_LINK_DIRECTORIES ${${prefix}_LIBRARY_DIRS})
+ endif ()
+ list (APPEND spdk_link_opts "${${prefix}_LDFLAGS}")
+ list (APPEND spdk_libs ${${prefix}_LIBRARIES})
+endforeach ()
+
+if (spdk_INCLUDE_DIR AND EXISTS "${spdk_INCLUDE_DIR}/spdk/version.h")
+ foreach(ver "MAJOR" "MINOR" "PATCH")
+ file(STRINGS "${spdk_INCLUDE_DIR}/spdk/version.h" spdk_VER_${ver}_LINE
+ REGEX "^#define[ \t ]+SPDK_VERSION_${ver}[ \t]+[0-9]+$")
+ string(REGEX REPLACE "^#define[ \t]+SPDK_VERSION_${ver}[ \t]+([0-9]+)$"
+ "\\1" spdk_VERSION_${ver} "${spdk_VER_${ver}_LINE}")
+ unset(${spdk_VER_${ver}_LINE})
+ endforeach()
+ set(spdk_VERSION_STRING
+ "${spdk_VERSION_MAJOR}.${spdk_VERSION_MINOR}.${spdk_VERSION_PATCH}")
+endif ()
+
+list(REMOVE_DUPLICATES spdk_lib_vars)
+find_package_handle_standard_args (spdk
+ REQUIRED_VARS
+ spdk_INCLUDE_DIR
+ spdk_LINK_DIRECTORIES
+ ${spdk_lib_vars}
+ VERSION_VAR
+ spdk_VERSION_STRING)
+
+if (spdk_FOUND AND NOT (TARGET spdk::spdk))
+ set (spdk_LIBRARIES ${spdk_libs})
+ set (whole_archive_link_opts
+ -Wl,--whole-archive -Wl,-Bstatic ${spdk_link_opts} -Wl,--no-whole-archive -Wl,-Bdynamic)
+ add_library (spdk::spdk INTERFACE IMPORTED)
+ set_target_properties (spdk::spdk
+ PROPERTIES
+ INTERFACE_COMPILE_OPTIONS "${spdk_PC_STATIC_bdev_CFLAGS}"
+ INTERFACE_INCLUDE_DIRECTORIES "${spdk_INCLUDE_DIR}"
+ INTERFACE_LINK_OPTIONS "${whole_archive_link_opts}"
+ INTERFACE_LINK_LIBRARIES "${spdk_LIBRARIES}"
+ INTERFACE_LINK_DIRECTORIES "${spdk_LINK_DIRECTORIES}")
+endif ()
diff --git a/cmake/SeastarDependencies.cmake b/cmake/SeastarDependencies.cmake
index ca1007f5..8cf1cb67 100644
--- a/cmake/SeastarDependencies.cmake
+++ b/cmake/SeastarDependencies.cmake
@@ -71,6 +71,11 @@ macro (seastar_find_dependencies)
dpdk)
endif ()

+ if (Seastar_SPDK)
+ list (APPEND _seastar_all_dependencies
+ spdk)
+ endif ()
+
# Arguments to `find_package` for each 3rd-party dependency.
# Note that the version specification is a "minimal" version requirement.

@@ -96,6 +101,15 @@ macro (seastar_find_dependencies)
set (_seastar_dep_args_lksctp-tools REQUIRED)
set (_seastar_dep_args_rt REQUIRED)
set (_seastar_dep_args_yaml-cpp 0.5.1 REQUIRED)
+ set (_seastar_dep_args_spdk
+ 21.10.0
+ COMPONENTS
+ event_bdev
+ event_accel
+ bdev
+ accel
+ init
+ env_dpdk)

foreach (third_party ${_seastar_all_dependencies})
find_package ("${third_party}" ${_seastar_dep_args_${third_party}})
diff --git a/configure.py b/configure.py
index 2c33d738..d52fedd2 100755
--- a/configure.py
+++ b/configure.py
@@ -80,6 +80,11 @@ add_tristate(
name = 'dpdk',
dest = 'dpdk',
help = 'DPDK support')
+add_tristate(
+ arg_parser,
+ name = 'spdk',
+ dest = 'spdk',
+ help = 'SPDK support')
add_tristate(
arg_parser,
name = 'hwloc',
@@ -188,6 +193,7 @@ def configure_mode(mode):
tr(LDFLAGS, 'LD_FLAGS'),
tr(args.cpp_dialect, 'CXX_DIALECT'),
tr(args.dpdk, 'DPDK'),
+ tr(args.spdk, 'SPDK'),
tr(infer_dpdk_machine(args.user_cflags), 'DPDK_MACHINE'),
tr(args.hwloc, 'HWLOC', value_when_none='yes'),
tr(args.alloc_failure_injection, 'ALLOC_FAILURE_INJECTION', value_when_none='DEFAULT'),
@@ -205,6 +211,9 @@ def configure_mode(mode):
if args.dpdk:
ingredients_to_cook.add('dpdk')

+ if args.spdk:
+ ingredients_to_cook.add('spdk')
+
# Generate a new build by pointing to the source directory.
if ingredients_to_cook:
# We need to use cmake-cooking for some dependencies.
diff --git a/cooking_recipe.cmake b/cooking_recipe.cmake
index 035b7527..42ef9b82 100644
--- a/cooking_recipe.cmake
+++ b/cooking_recipe.cmake
@@ -299,3 +299,14 @@ cooking_ingredient (lz4
CONFIGURE_COMMAND <DISABLE>
BUILD_COMMAND <DISABLE>
INSTALL_COMMAND ${make_command} PREFIX=<INSTALL_DIR> install)
+
+cooking_ingredient (spdk
+ EXTERNAL_PROJECT_ARGS
+ SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/spdk
+ BUILD_IN_SOURCE ON
+ CONFIGURE_COMMAND
+ <SOURCE_DIR>/configure --without-isal --disable-tests --disable-unit-tests --disable-examples --disable-apps --prefix=<INSTALL_DIR>
+ BUILD_COMMAND
+ ${make_command}
+ INSTALL_COMMAND
+ ${make_command} install)
diff --git a/demos/CMakeLists.txt b/demos/CMakeLists.txt
index 084e8298..611950aa 100644
--- a/demos/CMakeLists.txt
+++ b/demos/CMakeLists.txt
@@ -111,5 +111,10 @@ seastar_add_demo (sharded_parameter
seastar_add_demo (file
SOURCES file_demo.cc)

+if (Seastar_SPDK)
+ seastar_add_demo (spdk_bdev
+ SOURCES spdk_bdev_demo.cc)
+endif ()
+
seastar_add_demo (tutorial_examples
SOURCES tutorial_examples.cc)
diff --git a/demos/spdk-config.json b/demos/spdk-config.json
new file mode 100644
index 00000000..83bab0cb
--- /dev/null
+++ b/demos/spdk-config.json
@@ -0,0 +1,17 @@
+{
+ "subsystems": [
+ {
+ "subsystem": "bdev",
+ "config": [
+ {
+ "method": "bdev_malloc_create",
+ "params": {
+ "name": "Malloc0",
+ "num_blocks": 32768,
+ "block_size": 512
+ }
+ }
+ ]
+ }
+ ]
+}
diff --git a/demos/spdk_bdev_demo.cc b/demos/spdk_bdev_demo.cc
new file mode 100644
index 00000000..8185ef87
--- /dev/null
+++ b/demos/spdk_bdev_demo.cc
@@ -0,0 +1,104 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:4; indent-tabs-mode:nil -*-
+/*
+ * This file is open source software, licensed to you under the terms
+ * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
+ * distributed with this work for additional information regarding copyright
+ * ownership. You may not use this file except in compliance with the License.
+ *
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Copyright (C) 2021 Kefu Chai <tcha...@gmail.com>
+ */
+
+#include <cstring>
+#include <limits>
+#include <vector>
+
+#include <seastar/core/app-template.hh>
+
+#include <seastar/core/aligned_buffer.hh>
+#include <seastar/core/file.hh>
+#include <seastar/core/fstream.hh>
+#include <seastar/core/seastar.hh>
+#include <seastar/core/spdk_app.hh>
+#include <seastar/core/spdk_bdev.hh>
+#include <seastar/core/spdk_lib.hh>
+#include <seastar/core/sstring.hh>
+#include <seastar/core/temporary_buffer.hh>
+#include <seastar/core/loop.hh>
+#include <seastar/core/io_intent.hh>
+#include <seastar/util/log.hh>
+#include <seastar/util/tmp_file.hh>
+
+using namespace seastar;
+namespace bpo = boost::program_options;
+
+seastar::logger spdk_logger("spdk_demo");
+
+int main(int ac, char** av) {
+ seastar::app_template seastar_app;
+ spdk::app spdk_app;
+ seastar_app.add_options()
+ ( "bdev", bpo::value<std::string>()->default_value("Malloc0"),
+ "bdev name");
+ seastar_app.get_options_description().add(spdk_app.get_options_description());
+ std::vector<const char*> argv(av, av + ac);
+ if (std::find_if(argv.begin(),
+ argv.end(),
+ [](const char* s) { return strcmp(s, "--spdk-pmd") == 0; }) ==
+ argv.end()) {
+ argv.push_back("--spdk-pmd");
+ }
+ return seastar_app.run(argv.size(), const_cast<char**>(argv.data()), [&] {
+ spdk_logger.info("demo running");
+ auto bdev_name = seastar_app.configuration()["bdev"].as<std::string>();
+ return spdk_app.run(seastar_app.configuration(), [bdev_name] {
+ spdk_logger.info("bdev.open");
+ auto bdev = spdk::block_device::open(bdev_name);
+ auto buf = spdk::dma_malloc(bdev->memory_dma_alignment(),
+ bdev->block_size());
+ return do_with(temporary_buffer<char>(std::move(buf)),
+ std::unique_ptr<spdk::block_device>(std::move(bdev)),
+ [] (temporary_buffer<char>& buf,
+ std::unique_ptr<spdk::block_device>& bdev) {
+ spdk_logger.info("bdev.write");
+ return bdev->write(0, buf.get(), buf.size()).then([&] {
+ spdk_logger.info("bdev.read");
+ memset(buf.get_write(), 0xff, buf.size());
+ return bdev->read(0, buf.get_write(), buf.size());
+ }).then([&buf] {
+ spdk_logger.info("bdev.read");
+ temporary_buffer<char> good{buf.size()};
+ memset(good.get_write(), 0, good.size());
+ if (int where = memcmp(good.get(), buf.get(), buf.size());
+ where != 0) {
+ spdk_logger.error("buf mismatches at {}!", where);
+ } else {
+ spdk_logger.info("buf matches!");
+ }
+ });
+ });
+ }).then_wrapped([] (auto f) {
+ try {
+ f.get();
+ return 1;
+ } catch (std::exception& e) {
+ std::cerr << e.what() << std::endl;
+ return 1;
+ } catch (...) {
+ std::cout << "unknown exception" << std::endl;
+ return 1;
+ }
+ });
+ });
+}
diff --git a/include/seastar/core/smp.hh b/include/seastar/core/smp.hh
index 1f58a08d..2e7aaa0a 100644
--- a/include/seastar/core/smp.hh
+++ b/include/seastar/core/smp.hh
@@ -301,6 +301,7 @@ class smp : public std::enable_shared_from_this<smp> {
static thread_local smp_message_queue**_qs;
static thread_local std::thread::id _tmain;
bool _using_dpdk = false;
+ bool _using_spdk = false;

template <typename Func>
using returns_future = is_future<std::result_of_t<Func()>>;
diff --git a/include/seastar/core/spdk_app.hh b/include/seastar/core/spdk_app.hh
new file mode 100644
index 00000000..55d4c5c3
--- /dev/null
+++ b/include/seastar/core/spdk_app.hh
@@ -0,0 +1,61 @@
+// Helper to setup and tear down the SPDK environment
+//
+// \c app
+// -# accepts a bunch of command line options which mirror the ones
+// recognized by \c spdk_app_parse_args().
+// -# initializes the SPDK subsystems specified by the options.
+// -# starts an RPC server offering JSON-RPC remote access.
+class app {
+public:
+ future<int> run(const boost::program_options::variables_map& opts,
+ std::function<future<> ()>&& func) noexcept;
+ static boost::program_options::options_description get_options_description();
+private:
+ future<> start(const boost::program_options::variables_map& opts);
+ future<> stop();
+private:
+ sharded<executor> sharded_executor;
+ spdk_thread* app_thread = nullptr;
+};
+
+}
diff --git a/include/seastar/core/spdk_bdev.hh b/include/seastar/core/spdk_bdev.hh
new file mode 100644
index 00000000..03debdd8
+ static std::unique_ptr<block_device> open(const std::string& bdev_name);
+ ~block_device();
+
+ future<> write(uint64_t pos, const void* buffer, size_t len);
+ future<> read(uint64_t pos, void* buffer, size_t len);
+
+ uint32_t block_size() const;
+ size_t memory_dma_alignment() const;
+
+private:
+ block_device() = default;
+ static void event_cb(int /* spdk_bdev_event_type */ type,
+ struct spdk_bdev* bdev,
+ void* event_ctx);
+
+private:
+ spdk_bdev* _bdev = nullptr;
+ spdk_bdev_desc* _desc = nullptr;
+ spdk_io_channel* _io_channel = nullptr;
+};
+
+}
diff --git a/include/seastar/core/spdk_lib.hh b/include/seastar/core/spdk_lib.hh
new file mode 100644
index 00000000..8bd40515
--- /dev/null
+++ b/include/seastar/core/spdk_lib.hh
@@ -0,0 +1,35 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:4; indent-tabs-mode:nil -*-
+/*
+ * This file is open source software, licensed to you under the terms
+ * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
+ * distributed with this work for additional information regarding copyright
+ * ownership. You may not use this file except in compliance with the License.
+ *
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Copyright (C) 2021 Kefu Chai <tcha...@gmail.com>
+ */
+
+#pragma once
+
+#include <seastar/core/temporary_buffer.hh>
+
+namespace seastar::spdk {
+
+/// Creates a \c temporary_buffer object with a specified size, with
+/// memory aligned to a specified boundary. The underlying memory
+/// buffer is pinned, which is required for data buffers used for
+/// SPDK I/O operations.
+temporary_buffer<char> dma_malloc(size_t alignment, size_t size);
+
+}
diff --git a/include/seastar/core/spdk_thread.hh b/include/seastar/core/spdk_thread.hh
new file mode 100644
index 00000000..fc48e243
--- /dev/null
+++ b/include/seastar/core/spdk_thread.hh
@@ -0,0 +1,105 @@
+namespace seastar::spdk {
+
+namespace internal {
+
+namespace bi = boost::intrusive;
+
+class thread_entry {
+ bi::list_member_hook<> _hook;
+public:
+ using container_list_t = bi::list<thread_entry,
+ bi::member_hook<thread_entry, bi::list_member_hook<>, &thread_entry::_hook>>;
+ spdk_thread* thread() noexcept;
+ static thread_entry* from_thread(spdk_thread* thread);
+};
+
+class thread_msg {
+public:
+ virtual void run_and_dispose() noexcept = 0;
+ seastar::future<> get_future() {
+ return _pr.get_future();
+ }
+protected:
+ seastar::promise<> _pr;
+ ~thread_msg() = default;
+};
+
+template <typename Func>
+class lambda_thread_msg final : public thread_msg {
+ Func _func;
+public:
+ lambda_thread_msg(Func&& func) : _func(std::move(func)) {}
+ void run_and_dispose() noexcept final {
+ std::move(_func)();
+ _pr.set_value();
+ delete this;
+ }
+};
+}
+
+/// An executor of a group of SPDK threads.
+///
+/// \c spdk_thread is a user-space lightweight thread. SPDK uses it to
+/// perform tasks on demand and to poll for events. In general, developer
+/// should use \c seastar::spdk::app instead for setting up the SPDK
+/// environment, but \c seastar::spdk::executor is also exposed when
+/// a full-blown \c seastar::spdk::app is not necessary.
+class executor : public peering_sharded_service<executor> {
+ using sharded_executor_t = sharded<executor>;
+public:
+ future<> start();
+ future<> stop();
+ bool poll();
+ void schedule_thread(spdk_thread* thread);
+
+ /// run the specified function on a SPDK thread
+ template <typename Func>
+ static future<> submit_to(spdk_thread *thread, Func&& func) noexcept {
+ auto msg = new internal::lambda_thread_msg<Func>(std::move(func));
+ return do_submit_to(thread, msg);
+ }
+ /// returns an singleton of executor which is used for scheduling a new
+ /// thread
+ static sharded_executor_t& instance();
+
+private:
+ static future<> do_submit_to(spdk_thread* thread, internal::thread_msg* msg);
+ std::unique_ptr<reactor::poller> poller;
+ internal::thread_entry::container_list_t _threads;
+ uint64_t _tsc_last;
+ static sharded_executor_t* s_executor;
+};
+
+}
diff --git a/src/core/reactor.cc b/src/core/reactor.cc
index fd75211a..3993ef9f 100644
--- a/src/core/reactor.cc
+++ b/src/core/reactor.cc
@@ -93,6 +93,9 @@
#include <seastar/core/dpdk_rte.hh>
#include <rte_lcore.h>
#include <rte_launch.h>
+#elif defined(SEASTAR_HAVE_SPDK)
+#include <seastar/core/spdk_app.hh>
+#include <spdk/env.h>
#endif
#include <seastar/core/prefetch.hh>
#include <exception>
@@ -3581,6 +3584,9 @@ smp::get_options_description()
#endif
#ifdef SEASTAR_HAVE_HWLOC
("allow-cpus-in-remote-numa-nodes", bpo::value<bool>()->default_value(true), "if some CPUs are found not to have any local NUMA nodes, allow assigning them to remote ones")
+#endif
+#ifdef SEASTAR_HAVE_SPDK
+ ("spdk-pmd", "Use SPDK PMD drivers")
#endif
;
return opts;
@@ -3615,7 +3621,7 @@ void smp::start_all_queues()
_alien._qs[this_shard_id()].start();
}

-#ifdef SEASTAR_HAVE_DPDK
+#if defined(SEASTAR_HAVE_DPDK) || defined(SEASTAR_HAVE_SPDK)

int dpdk_thread_adaptor(void* f)
{
@@ -3632,6 +3638,11 @@ void smp::join_all()
rte_eal_mp_wait_lcore();
return;
}
+#elif defined(SEASTAR_HAVE_SPDK)
+ if (_using_spdk) {
+ spdk_env_thread_wait_all();
+ return;
+ }
#endif
for (auto&& t: smp::_threads) {
t.join();
@@ -3639,8 +3650,8 @@ void smp::join_all()
}

void smp::pin(unsigned cpu_id) {
- if (_using_dpdk) {
- // dpdk does its own pinning
+ if (_using_dpdk || _using_spdk) {
+ // dpdk/spdk does its own pinning
return;
}
pin_this_thread(cpu_id);
@@ -3669,6 +3680,11 @@ void smp::allocate_reactor(unsigned id, reactor_backend_selector rbs, reactor_co
void smp::cleanup() noexcept {
smp::_threads = std::vector<posix_thread>();
_thread_loops.clear();
+#ifdef SEASTAR_HAVE_SPDK
+ if (_using_spdk) {
+ spdk::env::stop();
+ }
+#endif
}

void smp::cleanup_cpu() {
@@ -3685,7 +3701,7 @@ void smp::cleanup_cpu() {
}

void smp::create_thread(std::function<void ()> thread_loop) {
- if (_using_dpdk) {
+ if (_using_dpdk || _using_spdk) {
_thread_loops.push_back(std::move(thread_loop));
} else {
_threads.emplace_back(std::move(thread_loop));
@@ -3927,6 +3943,8 @@ void smp::configure(boost::program_options::variables_map configuration, reactor

#ifdef SEASTAR_HAVE_DPDK
_using_dpdk = configuration.count("dpdk-pmd");
+#elif defined(SEASTAR_HAVE_SPDK)
+ _using_spdk = configuration.count("spdk-pmd");
#endif
auto thread_affinity = configuration["thread-affinity"].as<bool>();
if (configuration.count("overprovisioned")
@@ -3935,6 +3953,8 @@ void smp::configure(boost::program_options::variables_map configuration, reactor
}
if (!thread_affinity && _using_dpdk) {
fmt::print("warning: --thread-affinity 0 ignored in dpdk mode\n");
+ } else if (!thread_affinity && _using_spdk) {
+ fmt::print("warning: --thread-affinity 0 ignored in spdk mode\n");
}
auto mbind = configuration["mbind"].as<bool>();
if (!thread_affinity) {
@@ -4080,6 +4100,15 @@ void smp::configure(boost::program_options::variables_map configuration, reactor
}
dpdk::eal::init(cpus, configuration);
}
+#elif defined(SEASTAR_HAVE_SPDK)
+ if (_using_spdk) {
+ try {
+ spdk::env::start(allocations, configuration);
+ } catch (const std::exception& e) {
+ seastar_logger.error(e.what());
+ _exit(1);
+ }
+ }
#endif

// Better to put it into the smp class, but at smp construction time
@@ -4198,6 +4227,20 @@ void smp::configure(boost::program_options::variables_map configuration, reactor
rte_eal_remote_launch(dpdk_thread_adaptor, static_cast<void*>(&*(it++)), i);
}
}
+#elif defined(SEASTAR_HAVE_SPDK)
+ if (_using_spdk) {
+ auto it = _thread_loops.begin();
+ SPDK_ENV_FOREACH_CORE(i) {
+ if (i == spdk_env_get_current_core()) {
+ continue;
+ }
+ int rc = spdk_env_thread_launch_pinned(i, dpdk_thread_adaptor, static_cast<void*>(&*(it++)));
+ if (rc < 0) {
+ seastar_logger.error("Unable to start reactor thread on core {} using SPDK", i);
+ _exit(1);
+ }
+ }
+ }
#endif

reactors_registered.wait();
diff --git a/src/core/spdk_app.cc b/src/core/spdk_app.cc
new file mode 100644
index 00000000..8b861d29
--- /dev/null
+++ b/src/core/spdk_app.cc
@@ -0,0 +1,339 @@
+ if (opts.count("spdk-mem-size")) {
+ const std::string mem_size_str = opts["spdk-mem-size"].as<std::string>();
+ uint64_t mem_size_mb;
+ bool mem_size_has_prefix;
+ if (spdk_parse_capacity(mem_size_str.c_str(),
+ &mem_size_mb,
+ &mem_size_has_prefix) != 0) {
+ throw std::invalid_argument(
+ fmt::format("invalid memory pool size `--mem-size {}`",
+ mem_size_str));
+ }
+ if (mem_size_has_prefix) {
+ // convert mem size to MiB
+ mem_size_mb >>= 20;
+ }
+ if (mem_size_mb > std::numeric_limits<int>::max()) {
+ throw std::invalid_argument(
+ fmt::format("memory pool size too large `--mem-size {}`",
+ mem_size_mb));
+ }
+ env_opts.mem_size = static_cast<int>(mem_size_mb);
+ }
+ std::string hugedir;
+ if (opts.count("spdk-huge-dir")) {
+ hugedir = opts["spdk-huge-dir"].as<std::string>();
+ env_opts.hugedir = hugedir.c_str();
+ }
+ if (opts.count("spdk-single-file-segments")) {
+ env_opts.hugepage_single_segments = true;
+ }
+ if (opts.count("spdk-huge-unlink")) {
+ env_opts.unlink_hugepage = true;
+ }
+ if (opts.count("spdk-no-pci")) {
+ env_opts.no_pci = true;
+ }
+ std::vector<spdk_pci_addr> pci_addrs;
+ if (opts.count("spdk-pci-blocked") && opts.count("spdk-pci-allowed")) {
+ throw std::invalid_argument("--spdk-pci-blocked and --spdk-pci-allowed cannot be used at the same time");
+ } else if (opts.count("spdk-pci-blocked")) {
+ for (const auto& bdf : opts["spdk-pci-blocked"].as<std::vector<std::string>>()) {
+ spdk_pci_addr pci_addr;
+ spdk_pci_addr_parse(&pci_addr, bdf.c_str());
+ pci_addrs.push_back(pci_addr);
+ }
+ env_opts.pci_blocked = &pci_addrs[0];
+ env_opts.num_pci_addr = pci_addrs.size();
+ } else if (opts.count("spdk-pci-allowed")) {
+ for (const auto& bdf : opts["spdk-pci-allowed"].as<std::vector<std::string>>()) {
+ spdk_pci_addr pci_addr;
+ spdk_pci_addr_parse(&pci_addr, bdf.c_str());
+ pci_addrs.push_back(pci_addr);
+ }
+ env_opts.pci_allowed = &pci_addrs[0];
+ env_opts.num_pci_addr = pci_addrs.size();
+ }
+ std::string iova_mode;
+ if (opts.count("spdk-iova-mode")) {
+ iova_mode = opts["spdk-iova-mode"].as<std::string>();
+// SPDK keeps track of the "current" spdk_thread using a thread local storage
+// variable. and it uses a dedicated spdk_thread (app_thread) for setting up
+// SPDK app environment after the reactors are up and running. but seastar::spdk::app
+// is a little bit different, it also spawns an "app_thread". but it does not
+// schedule the setting up task using spdk_thread_send_msg() call, which runs
+// the task when the spdk thread is scheduled. instead, seastar::spdk::app
+// schedule the setup tasks using Seastar primitives directly. To ensure that
+// the tasks have access to the "current" spdk_thread, we have to set the
+// TLS variable manually. run_with_spdk_thread is defined to do this job.
+struct run_with_spdk_thread {
+ run_with_spdk_thread(spdk_thread* thread) {
+ spdk_set_thread(thread);
+ }
+ ~run_with_spdk_thread() {
+ spdk_set_thread(nullptr);
+ }
+};
+
+}
+
+namespace seastar::spdk {
+
+future<int> app::run(const boost::program_options::variables_map& opts,
+ std::function<future<> ()>&& func) noexcept
+{
+ if (!opts.count("spdk-pmd")) {
+ std::cerr << "SPDK app cannot start without --spdk-pmd enabled!" << std::endl;
+ return make_ready_future<int>(1);
+ }
+ spdk_log_open(spdk_do_log);
+
+ return seastar::async([opts, func = std::move(func), this] {
+ sharded_executor.start().then([this] {
+ return sharded_executor.invoke_on_all(&executor::start);
+ }).get();
+ auto stop_executor = seastar::defer([&] () noexcept {
+ sharded_executor.stop().get();
+ });
+ assert(app_thread == nullptr);
+ spdk_cpuset cpu_mask = {};
+ spdk_cpuset_set_cpu(&cpu_mask, spdk_env_get_current_core(), true);
+ app_thread = spdk_thread_create("app_thread", &cpu_mask);
+ if (app_thread == nullptr) {
+ throw std::bad_alloc();
+ }
+ run_with_spdk_thread run_with(app_thread);
+ start(opts).get();
+ auto stop_me = seastar::defer([&] () noexcept {
+ stop().get();
+ });
+
+ try {
+ futurize_invoke(func).get();
+ return 0;
+ } catch (std::exception& e) {
+ std::cerr << e.what() << std::endl;
+ return 1;
+ } catch (...) {
+ std::cerr << "unknown exception" << std::endl;
+ return 1;
+ ("spdk-rpc-socket",
+ bpo::value<std::string>()->default_value(SPDK_DEFAULT_RPC_ADDR),
+ "RPC listen address")
+ ("spdk-config", bpo::value<std::string>(), "JSON config file")
+ ("spdk-json-ignore-init-errors", "don't exit on invalid config entry")
+ ("spdk-iova-mode", bpo::value<std::string>(),
+ "set IOVA mode ('pa' for IOVA_PA and 'va' for IOVA_VA)")
+ ("spdk-huge-dir", bpo::value<std::string>(),
+ "use a specific hugetlbfs mount to reserve memory from")
+ ("spdk-huge-unlink", "unlink huge files after initialization")
+ ("spdk-mem-size", bpo::value<std::string>(),
+ "memory size in MB for DPDK")
+ ("spdk-no-pci", "disable PCI access")
+ ("spdk-single-file-segments", "force creating just one hugetlbfs file")
+ ("spdk-pci-blocked", bpo::value<std::vector<std::string>>()->multitoken(),
+ "pci addr to block (can be used more than once)")
+ ("spdk-pci-allowed", bpo::value<std::vector<std::string>>()->multitoken(),
+ "pci addr to allow (--spdk-pci-blocked and --spdk-pci-allowed cannot be used at the same time)")
+ ;
+ return opts;
+}
+
+}
diff --git a/src/core/spdk_bdev.cc b/src/core/spdk_bdev.cc
new file mode 100644
index 00000000..94b4cb34
--- /dev/null
+++ b/src/core/spdk_bdev.cc
@@ -0,0 +1,159 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:4; indent-tabs-mode:nil -*-
+/*
+ * This file is open source software, licensed to you under the terms
+ * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
+ * distributed with this work for additional information regarding copyright
+ * ownership. You may not use this file except in compliance with the License.
+ *
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Copyright (C) 2021 Kefu Chai <tcha...@gmail.com>
+ */
+
+#include <memory>
+
+#include <seastar/core/spdk_bdev.hh>
+#include <seastar/util/log.hh>
+#include <spdk/bdev.h>
+#include <spdk/string.h>
+#include <spdk/thread.h>
+ if (_io_channel) {
+ spdk_put_io_channel(_io_channel);
+ }
+ if (_desc) {
+ spdk_bdev_close(_desc);
+ }
+}
+
+std::unique_ptr<block_device> block_device::open(const std::string& bdev_name)
+{
+ std::unique_ptr<block_device> bdev{new block_device};
+ int rc = spdk_bdev_open_ext(bdev_name.c_str(),
+ true,
+ reinterpret_cast<spdk_bdev_event_cb_t>(event_cb),
+ bdev.get(),
+ &bdev->_desc);
+ if (rc) {
+ logger.error("unable to open bdev {}: {}",
+ bdev_name, spdk_strerror(-rc));
+ throw std::runtime_error(fmt::format("unable to open bdev {}", bdev_name));
+ }
+ bdev->_bdev = spdk_bdev_desc_get_bdev(bdev->_desc);
+ bdev->_io_channel = spdk_bdev_get_io_channel(bdev->_desc);
+ if (bdev->_io_channel == nullptr) {
+ logger.error("unable to open bdev I/O channel");
+ throw std::runtime_error(fmt::format("unable to open io channel"));
+ }
+ return bdev;
+}
+
+void block_device::event_cb(int type, spdk_bdev* bdev, void* event_ctx)
+{}
+
+static void spdk_bdev_io_cpl(spdk_bdev_io* bdev_io, bool success, void* arg)
+{
+ logger.info("io done");
+ auto* desc = static_cast<io_completion_desc*>(arg);
+ desc->complete_with(bdev_io, success);
+}
+
+future<> block_device::write(uint64_t pos, const void* buffer, size_t len)
+{
+ assert(_bdev);
+ logger.info("write({}, {})", pos, len);
+ auto io_desc = std::make_unique<io_completion_desc>();
+ auto io_done = io_desc->get_future();
+ int rc = spdk_bdev_write(_desc, _io_channel,
+ const_cast<void*>(buffer), pos, len,
+ spdk_bdev_io_cpl, io_desc.release());
+ if (rc == 0) {
+ return io_done;
+ }
+ if (rc == -ENOMEM) {
+ io_desc->fail_with(std::bad_alloc());
+ } else {
+ // -EBADF or -EINVAL
+ io_desc->fail_with(std::invalid_argument("out of range"));
+ }
+ return io_done;
+}
+
+future<> block_device::read(uint64_t pos, void* buffer, size_t len)
+{
+ assert(_bdev);
+ auto io_desc = std::make_unique<io_completion_desc>();
+ auto io_done = io_desc->get_future();
+ int rc = spdk_bdev_read(_desc, _io_channel, buffer, pos, len,
+ spdk_bdev_io_cpl, io_desc.release());
+ if (rc == 0) {
+ return io_done;
+ }
+ if (rc == -ENOMEM) {
+ io_desc->fail_with(std::bad_alloc());
+ } else {
+ // --EINVAL
+ io_desc->fail_with(std::invalid_argument("out of range"));
+ }
+ return io_done;
+}
+
+uint32_t block_device::block_size() const
+{
+ assert(_bdev);
+ return spdk_bdev_get_block_size(_bdev);
+}
+
+size_t block_device::memory_dma_alignment() const
+{
+ assert(_bdev);
+ return spdk_bdev_get_buf_align(_bdev);
+}
+
+}
diff --git a/src/core/spdk_lib.cc b/src/core/spdk_lib.cc
new file mode 100644
index 00000000..3f463c6e
+temporary_buffer<char> dma_malloc(size_t alignment, size_t size)
+{
+ void* buf = spdk_dma_malloc(size, alignment, nullptr);
+ if (!buf) {
+ throw std::bad_alloc();
+ }
+ return {static_cast<char*>(buf), size, seastar::make_deleter([buf] {
+ spdk_dma_free(buf);
+ })};
+}
+
+}
diff --git a/src/core/spdk_thread.cc b/src/core/spdk_thread.cc
new file mode 100644
index 00000000..bc1e9b0e
--- /dev/null
+++ b/src/core/spdk_thread.cc
@@ -0,0 +1,152 @@
+namespace internal {
+
+spdk_thread* thread_entry::thread() noexcept
+{
+ return spdk_thread_get_from_ctx(reinterpret_cast<void*>(this));
+}
+
+internal::thread_entry* thread_entry::from_thread(spdk_thread* thread)
+ sizeof(internal::thread_entry));
+ _threads.erase(internal::thread_entry::container_list_t::s_iterator_to(entry));
+ spdk_thread_destroy(thread);
+ }
+ }
+ return nr > 0;
+}
+
+void executor::schedule_thread(spdk_thread* thread)
+{
+ _threads.push_back(*internal::thread_entry::from_thread(thread));
+}
+
+executor::sharded_executor_t& executor::instance()
+{
+ assert(s_executor);
+ return *s_executor;
+}
+
+static void spdk_msg_call(void* ctx)
+{
+ auto* task = static_cast<internal::thread_msg*>(ctx);
+ task->run_and_dispose();
+}
+
+future<> executor::do_submit_to(spdk_thread* thread,
+ internal::thread_msg* msg)
+{
+ spdk_thread_send_msg(thread, spdk_msg_call, msg);
+ return msg->get_future();
+}
+
+executor::sharded_executor_t* executor::s_executor = nullptr;
+
+}
--
2.33.0

tcha...@gmail.com

<tchaikov@gmail.com>
unread,
Oct 3, 2021, 3:23:06 AM10/3/21
to seastar-dev
this changeset is also available at https://github.com/tchaikov/seastar/tree/wip-spdk-v2

On Sunday, October 3, 2021 at 1:08:44 PM UTC+8 tcha...@gmail.com wrote:
* include/seastar/core,src/core: add spdk support
* demos: add a demo for SPDK bdev
* cmake:
<snip> 

Avi Kivity

<avi@scylladb.com>
unread,
Oct 3, 2021, 4:11:01 AM10/3/21
to Kefu Chai, seastar-dev@googlegroups.com

On 03/10/2021 08.08, Kefu Chai wrote:
> hi Avi, i've addressed most of your comments. but there are still some left
> unadressed:
>
> - another memory pool. even SPDK tries hard to be a library which is
> relatively not tied to DPDK. it still use DPDK RTE EAL for managing
> its memory pools. unless it offers a compile-time/runtime mechinary
> allowing us to use another allocator for either regular memory or
> pinned memory, we have to stay with yet another memory pool in SPDK.
> - hugepage versus iommu: yeah. it's sad in 2021. but i tried to use
> regular memory buffer allocated using
> temporary_buffer<char>::aligned(), my test in spdk_bdev_demo.cc
> fails because of the mismatch read and write with Malloc bdev,
> which is backed by memory.


Can you explain? Is malloc bdev some kind of fake block device for
testing? I'd much rather have a simple memory model and not support
that, than require splitting memory into two pools (and lose zero-copy).


I looked at SPDK code and it does support iommu.


> - SPDK thread: SPDK uses spdk_thread in its implementation. and
> spdk_thread as an execution unit can even moved across cores for
> better load balance. so it'd be better to keep it as it is and
> manage the spdk threads using a dedicated sharded service.
> - file_impl: i think the SPDK bdevs are quite different from
> files in a handful perspectives. to minimize the confusion,
> i am inclined to keep it as a different class not related to
> file, despite that they do share some methods. i think they
> will diverge even more over time.


But that means that code using seastar::blockdev_file_impl cannot use
spdk and vice versa.


Let's split the dpdk update from spdk support, the dpdk update can be
merged quickly once tested.


btw, I'd like to add iommu support to dpdk too, it's much nicer than
hugepages.

Nicolas Le Scouarnec

<Nicolas.LeScouarnec@broadpeak.tv>
unread,
Oct 4, 2021, 6:27:01 AM10/4/21
to tcha...@gmail.com, seastar-dev


> From: seast...@googlegroups.com <seast...@googlegroups.com> On Behalf Of tcha...@gmail.com

>>> -
>>> set (dpdk_args
>>> - # gcc 10 defaults to -fno-common, which dpdk is not prepared for
>>> - "EXTRA_CFLAGS=-Wno-error -fcommon"
>>> - O=<BINARY_DIR>
>>> - DESTDIR=<INSTALL_DIR>
>>> - T=${dpdk_quadruple})
>>> + --default-library=static
>>> + -Dc_args="-Wno-error"
>>> + -Denable_docs=false
>>> + -Dtests=false
>>> + -Dexamples=
>>> + -Dmbuf_refcnt_atomic=false
>>> + -Dmax_memseg_lists=8192
>>> + -Ddisable_drivers="net/softnic,net/bonding"
>>> + -
>>> Ddisable_libs="kni,jobstats,lpm,acl,power,ip_frag,distributor,reorder,port,table,
>>> pipeline,flow_classify,bpf,efd,member"
>>> + -Dcpu_instruction_set=${Seastar_DPDK_MACHINE})

>> I disabled additional drivers and libs because they pulled additional dependencies I wasn't interested in or cause various issues during compilation on Centos 8 / Ubuntu 20.04 because I missed some headers. I don't know if it could be applicable (especially basedband, regex, event, .... if seastar uses mostly net/* ). For example crypto/*,cryptodev >> links openssl into seastar, which conflicted with our application that uses boringssl.

> i am trying to mirror the settings in dpdk_config. once this changeset is merged, i will create another patch to include more drivers to incorporate the disable_drivers list proposed by you.

In understand, my point is that DPDK nowadays includes way more functionality than it used to have when included initially in Seastar, and if they are not exposed through an API in Seastar, they are not "required". Yet, this may be better/as well handled by not pulling in unused shared libraries from DPDK during "linking" (thus avoiding the issue I had that many dependencies where brought in, which could lead to conflicts for applications using Seastar.).

By the way, non-cooked DPDK (from Debian for example) would not include mbuf_refcnt_atomic / max_memseg_lists
I found these in the .deb generating package
override_dh_auto_configure:
dh_auto_configure -- $(DPDK_CONFIG_OPTIONS) \
--includedir=include/dpdk \
--default-library=shared \
-Dper_library_versions=false \
-Dinclude_subdir_arch=../$(DEB_HOST_MULTIARCH)/dpdk \
-Dmachine=$(RTE_MACHINE)




>>> + BUILD_COMMAND
>>> + ${Ninja_EXECUTABLE} -C <BINARY_DIR>
>>> INSTALL_COMMAND
>>> - ${CMAKE_COMMAND} -E chdir <SOURCE_DIR>
>>> - ${make_command} ${dpdk_args} install)
>>> + ${Ninja_EXECUTABLE} -C <BINARY_DIR> install)

>> If the folder already exists, meson won't redo the configuration, so to force it I added a first attempt with --reconfigure with a fallback to plain call.

> i see, in case the "dpdk_args" is changed over time, we need to rerun meson with an existing folder. i will create a follow up change to address this needs.

The multiple cases I see that could lead to this issues are
(i) when doing iterative development within Seastar / Dpdk where either dpdk_args are changed in Seastar, or DPDK's meson is altered. The DPDK wouldn't be reconfigured unless build/xx is deleted.
(ii) when preparing builds for multiple architectures: indeed, DPDK inherits "Seastar_DPDK_MACHINE" but this work only if meson --reconfigure is called. Otherwise, again DPDK wouldn't be reconfigured unless build/xx is deleted (while the rest of seastar would be properly rebuilt).

Avi Kivity

<avi@scylladb.com>
unread,
Oct 4, 2021, 6:49:43 AM10/4/21
to Nicolas Le Scouarnec, tcha...@gmail.com, seastar-dev
We can probably live without without disabling REFCNT_ATOMIC (yes it add
some atomic operations, but probably tolerable).


MAX_MEMSEG_LISTS will cause failures on many-core machines. They're
probably unnecessary with iommu though (not sure).

Nicolas Le Scouarnec

<Nicolas.LeScouarnec@broadpeak.tv>
unread,
Oct 4, 2021, 8:19:54 AM10/4/21
to Kefu Chai, seastar-dev
Hi,

Just curious, are you working on updating the DPDK embedded into Seastar ? If you're interested I have a patched seastar working with (cooking-based) meson-build DPDK 20.11 , I can push it to github if you want : I've fixed the compilation (cmake-cooking calling meson, and cmake fetching relevant info from dpdk generated pkg-config). It compiles, and runs well as we've used it for one year. However, I have not yet proposed it for inclusion into seastar as it currently lacks two code snippet to get "feature" parity : multiqueue settings (using rte_flow) and some stuff around memory allocation . Indeed in our application, we don't use multiqueue nor the zero-copy mechanism : I couldn't find time work on these.

Let me know if you're interested in it.

Nicolas


> -----Original Message-----
> From: seast...@googlegroups.com <seast...@googlegroups.com> On
> Behalf Of Kefu Chai
> Sent: Monday, 27 September 2021 17:33
> To: seast...@googlegroups.com
> Cc: Kefu Chai <tcha...@gmail.com>
> Subject: [seastar-dev] [PATCH v1 2/7] dpdk: s/ether_addr/rte_ether_addr/
>
> * s/ether_addr/rte_ether_addr/
> * s/ether_hdr/rte_ether_hdr/
>
> ether_addr was renamed to ether_addr in
> 6d13ea8e8e49ab957deae2bba5ecf4a4bfe747d1,
> see
> https://eur02.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.co
> m%2FDPDK%2Fdpdk%2Fcommit%2F6d13ea8e8e49ab957deae2bba5ecf4a4bfe7
> 47d1&amp;data=04%7C01%7Cnicolas.lescouarnec%40broadpeak.tv%7C15f962
> c584df4be30b5d08d981cc1e64%7C0ebe44eac9c9438da0407e699f358ed4%7C0
> %7C0%7C637683535954172914%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4
> wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&am
> p;sdata=i8tPPcsfEh1ANvttAnoOykOD8ouRxkHGH56kpXdk1ew%3D&amp;reserve
> d=0
> and the first release including this change was v19.08. let's update accordingly.
>
> the same applies to ether_hdr
>
> Signed-off-by: Kefu Chai <tcha...@gmail.com>
> ---
> src/net/dpdk.cc | 8 ++++----
> 1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/src/net/dpdk.cc b/src/net/dpdk.cc index 5f60bfb2..4e0ce8bd 100644

tcha...@gmail.com

<tchaikov@gmail.com>
unread,
Oct 4, 2021, 10:48:03 AM10/4/21
to seastar-dev
On Monday, October 4, 2021 at 6:27:01 PM UTC+8 nicolas.l...@broadpeak.tv wrote:


> From: seast...@googlegroups.com <seast...@googlegroups.com> On Behalf Of tcha...@gmail.com

>>> -
>>> set (dpdk_args
>>> - # gcc 10 defaults to -fno-common, which dpdk is not prepared for
>>> - "EXTRA_CFLAGS=-Wno-error -fcommon"
>>> - O=<BINARY_DIR>
>>> - DESTDIR=<INSTALL_DIR>
>>> - T=${dpdk_quadruple})
>>> + --default-library=static
>>> + -Dc_args="-Wno-error"
>>> + -Denable_docs=false
>>> + -Dtests=false
>>> + -Dexamples=
>>> + -Dmbuf_refcnt_atomic=false
>>> + -Dmax_memseg_lists=8192
>>> + -Ddisable_drivers="net/softnic,net/bonding"
>>> + -
>>> Ddisable_libs="kni,jobstats,lpm,acl,power,ip_frag,distributor,reorder,port,table,
>>> pipeline,flow_classify,bpf,efd,member"
>>> + -Dcpu_instruction_set=${Seastar_DPDK_MACHINE})

>> I disabled additional drivers and libs because they pulled additional dependencies I wasn't interested in or cause various issues during compilation on Centos 8 / Ubuntu 20.04 because I missed some headers. I don't know if it could be applicable (especially basedband, regex, event, .... if seastar uses mostly net/* ). For example crypto/*,cryptodev >> links openssl into seastar, which conflicted with our application that uses boringssl.

> i am trying to mirror the settings in dpdk_config. once this changeset is merged, i will create another patch to include more drivers to incorporate the disable_drivers list proposed by you.

In understand, my point is that DPDK nowadays includes way more functionality than it used to have when included initially in Seastar, and if they are not exposed through an API in Seastar, they are not "required". Yet, this may be better/as well handled by not pulling in unused shared libraries from DPDK during "linking" (thus avoiding the issue I had that many dependencies where brought in, which could lead to conflicts for applications using Seastar.).

hi Nicolas, if i remove any of them, i'd have to review the changes. i am quoting some of the disabled drivers here:

+ -Ddisable_drivers=common/dpaax,common/octeontx,common/octeontx2,common/sfc_efx,common/qat,common/cpt,bus/dpaa,net/sfc,bus/ifpga,bus/fslmc,compress/*,crypto/*,baseband/*,regex/*,vdpa/*,event/*
+ -Ddisable_libs=compressdev,cryptodev 
 
for instance, octeontx is listed in the disabled drivers. and per https://doc.dpdk.org/guides/nics/octeontx.html, octeontx is a PMD driver. strictly speaking, i cannot tell if there is any Seastar+DPDK user uses octeontx or not. the same applies to some other drivers. i appreciate your insistency. as i am not an expert in this area, i really need more time on understanding these libraries and drivers before dropping them. and i still prefer not doing so in this changeset.


By the way, non-cooked DPDK (from Debian for example) would not include mbuf_refcnt_atomic / max_memseg_lists
I found these in the .deb generating package
override_dh_auto_configure:
dh_auto_configure -- $(DPDK_CONFIG_OPTIONS) \
--includedir=include/dpdk \
--default-library=shared \
-Dper_library_versions=false \
-Dinclude_subdir_arch=../$(DEB_HOST_MULTIARCH)/dpdk \
-Dmachine=$(RTE_MACHINE)




>>> + BUILD_COMMAND
>>> + ${Ninja_EXECUTABLE} -C <BINARY_DIR>
>>> INSTALL_COMMAND
>>> - ${CMAKE_COMMAND} -E chdir <SOURCE_DIR>
>>> - ${make_command} ${dpdk_args} install)
>>> + ${Ninja_EXECUTABLE} -C <BINARY_DIR> install)

>> If the folder already exists, meson won't redo the configuration, so to force it I added a first attempt with --reconfigure with a fallback to plain call.

> i see, in case the "dpdk_args" is changed over time, we need to rerun meson with an existing folder. i will create a follow up change to address this needs.

The multiple cases I see that could lead to this issues are
(i) when doing iterative development within Seastar / Dpdk where either dpdk_args are changed in Seastar, or DPDK's meson is altered. The DPDK wouldn't be reconfigured unless build/xx is deleted.
(ii) when preparing builds for multiple architectures: indeed, DPDK inherits "Seastar_DPDK_MACHINE" but this work only if meson --reconfigure is called. Otherwise, again DPDK wouldn't be reconfigured unless build/xx is deleted (while the rest of seastar would be properly rebuilt).

thank you, Nicolas. i just wanted to have a minimal changeset which bumps up the DPDK version. on top of which, i think we can/should include your change to help with the use cases you elaborated above.

Kefu Chai

<tchaikov@gmail.com>
unread,
Oct 4, 2021, 11:03:24 AM10/4/21
to seastar-dev@googlegroups.com, Kefu Chai
there is chance that DPDK is compiled as shared libraries, would be great
if we can use them if they are installed.

in this change

* check for static DPDK libraries before checking for shared libraries.
and prefer static ones if found.
* check the rte_config.h to see if RTE_MBUF_REFCNT_ATOMIC is defined.
bail out if this macro is defined. as atomic refcnt of mbuf is not
necessary if dpdk is used by a Seastar task.

Signed-off-by: Kefu Chai <tcha...@gmail.com>
---
cmake/Finddpdk.cmake | 31 +++++++++++++++++++++++++++++--
1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/cmake/Finddpdk.cmake b/cmake/Finddpdk.cmake
index 6b2b0260..c8a381ea 100644
--- a/cmake/Finddpdk.cmake
+++ b/cmake/Finddpdk.cmake
@@ -21,9 +21,9 @@
#

find_package (PkgConfig REQUIRED)
-pkg_check_modules (dpdk_PC libdpdk)
+pkg_check_modules (dpdk_PC QUIET libdpdk)

-if (dpdk_PC_FOUND)
+if (dpdk_PC_FOUND AND dpdk_PC_STATIC_LIBRARIES)
find_package_handle_standard_args (dpdk
REQUIRED_VARS
dpdk_PC_STATIC_CFLAGS
@@ -40,12 +40,39 @@ if (dpdk_PC_FOUND)
INTERFACE_LINK_DIRECTORIES "${dpdk_PC_STATIC_LIBRARY_DIRS}")
return ()
endif ()
+elseif (dpdk_PC_FOUND)
+ find_package_handle_standard_args (dpdk
+ REQUIRED_VARS
+ dpdk_PC_CFLAGS
+ dpdk_PC_INCLUDE_DIRS
+ dpdk_PC_LDFLAGS
+ dpdk_PC_LIBRARY_DIRS)
+ if (dpdk_FOUND AND NOT (TARGET dpdk::dpdk))
+ add_library (dpdk::dpdk INTERFACE IMPORTED)
+ set_target_properties (dpdk::dpdk
+ PROPERTIES
+ INTERFACE_COMPILE_OPTIONS "${dpdk_PC_CFLAGS}"
+ INTERFACE_INCLUDE_DIRECTORIES "${dpdk_PC_INCLUDE_DIRS}"
+ INTERFACE_LINK_OPTIONS "${dpdk_PC_LDFLAGS}"
+ INTERFACE_LINK_DIRECTORIES "${dpdk_PC_LIBRARY_DIRS}")

Kefu Chai

<tchaikov@gmail.com>
unread,
Oct 5, 2021, 3:04:15 AM10/5/21
to seastar-dev@googlegroups.com, Kefu Chai
change since v2:

- core/spdk_bdev:
- lower the debug level in spdk_bdev_io_cpl() to logger.trace()
- remove unused #includes
- add list_devices class to enumerate all bdev devices managed by
SPDK
- demos/spdk_bdev_demo:
- use regular temporary_buffer<char> instead of pinned buffer for read/write
- check if the specified device is listed in the known devices,
before writing to it.
- core/spdk_lib: drop spdk_lib.{hh,cc}. it turns out they are not necessary
- use the latest SPDK


Kefu Chai (2):
spdk: add spdk submodule
*: add spdk support

.gitmodules | 3 +
CMakeLists.txt | 31 +++
cmake/Findarchive.cmake | 12 +
cmake/Findspdk.cmake | 126 ++++++++++
cmake/SeastarDependencies.cmake | 14 ++
configure.py | 9 +
cooking_recipe.cmake | 11 +
demos/CMakeLists.txt | 5 +
demos/spdk-config.json | 17 ++
demos/spdk_bdev_demo.cc | 117 ++++++++++
include/seastar/core/smp.hh | 1 +
include/seastar/core/spdk_app.hh | 65 ++++++
include/seastar/core/spdk_bdev.hh | 119 ++++++++++
include/seastar/core/spdk_thread.hh | 109 +++++++++
spdk | 1 +
src/core/reactor.cc | 51 ++++-
src/core/spdk_app.cc | 342 ++++++++++++++++++++++++++++
src/core/spdk_bdev.cc | 298 ++++++++++++++++++++++++
src/core/spdk_thread.cc | 154 +++++++++++++
19 files changed, 1481 insertions(+), 4 deletions(-)
create mode 100644 cmake/Findarchive.cmake
create mode 100644 cmake/Findspdk.cmake
create mode 100644 demos/spdk-config.json
create mode 100644 demos/spdk_bdev_demo.cc
create mode 100644 include/seastar/core/spdk_app.hh
create mode 100644 include/seastar/core/spdk_bdev.hh
create mode 100644 include/seastar/core/spdk_thread.hh
create mode 160000 spdk
create mode 100644 src/core/spdk_app.cc
create mode 100644 src/core/spdk_bdev.cc
create mode 100644 src/core/spdk_thread.cc

--
2.33.0

Kefu Chai

<tchaikov@gmail.com>
unread,
Oct 5, 2021, 3:04:16 AM10/5/21
to seastar-dev@googlegroups.com, Kefu Chai
include spdk d42045b2af1082ee7bc3582108c4bd77619c8c34 from
https://github.com/spdk/spdk. we will use the latest stable release
of SPDK once the spdk support in Seastar is relatively stablized.

Signed-off-by: Kefu Chai <tcha...@gmail.com>
---
.gitmodules | 3 +++
spdk | 1 +
2 files changed, 4 insertions(+)
create mode 160000 spdk

diff --git a/.gitmodules b/.gitmodules
index c5e41966..0cad1323 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,6 @@
[submodule "dpdk"]
path = dpdk
url = ../dpdk
+[submodule "spdk"]
+ path = spdk
+ url = ../spdk
diff --git a/spdk b/spdk
new file mode 160000
index 00000000..d42045b2
--- /dev/null
+++ b/spdk
@@ -0,0 +1 @@
+Subproject commit d42045b2af1082ee7bc3582108c4bd77619c8c34
--
2.33.0

Kefu Chai

<tchaikov@gmail.com>
unread,
Oct 5, 2021, 3:04:19 AM10/5/21
to seastar-dev@googlegroups.com, Kefu Chai
* include/seastar/core,src/core: add spdk support
* demos: add a demo for SPDK bdev
* cmake:
- add Findspdk.cmake: for finding SPDK libraries
- add Findarchive.cmake: libarchive is linked against by DPDK
but the .pc file generated by SPDK does not take it into
consideration, so we need to link against by ourselves.
* configure.py: add meson.
the newer releases of DPDK are configured using meson. to cook
SPDK with the bundeled DPDK, we have to install meson.

Signed-off-by: Kefu Chai <tcha...@gmail.com>
---
CMakeLists.txt | 31 +++
cmake/Findarchive.cmake | 12 +
cmake/Findspdk.cmake | 126 ++++++++++
cmake/SeastarDependencies.cmake | 14 ++
configure.py | 9 +
cooking_recipe.cmake | 11 +
demos/CMakeLists.txt | 5 +
demos/spdk-config.json | 17 ++
demos/spdk_bdev_demo.cc | 117 ++++++++++
include/seastar/core/smp.hh | 1 +
include/seastar/core/spdk_app.hh | 65 ++++++
include/seastar/core/spdk_bdev.hh | 119 ++++++++++
include/seastar/core/spdk_thread.hh | 109 +++++++++
src/core/reactor.cc | 51 ++++-
src/core/spdk_app.cc | 342 ++++++++++++++++++++++++++++
src/core/spdk_bdev.cc | 298 ++++++++++++++++++++++++
src/core/spdk_thread.cc | 154 +++++++++++++
17 files changed, 1477 insertions(+), 4 deletions(-)
create mode 100644 cmake/Findarchive.cmake
create mode 100644 cmake/Findspdk.cmake
create mode 100644 demos/spdk-config.json
create mode 100644 demos/spdk_bdev_demo.cc
create mode 100644 include/seastar/core/spdk_app.hh
create mode 100644 include/seastar/core/spdk_bdev.hh
create mode 100644 include/seastar/core/spdk_thread.hh
create mode 100644 src/core/spdk_app.cc
create mode 100644 src/core/spdk_bdev.cc
create mode 100644 src/core/spdk_thread.cc

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a63b3b4c..4909b1a1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -206,6 +206,10 @@ option (Seastar_DPDK
"Enable DPDK support."
OFF)

+option (Seastar_SPDK
+ "Enable SPDK support."
+ OFF)
+
option (Seastar_EXCLUDE_APPS_FROM_ALL
"When enabled alongside Seastar_APPS, do not build applications by default."
OFF)
@@ -499,6 +503,9 @@ add_library (seastar STATIC
include/seastar/core/simple-stream.hh
include/seastar/core/slab.hh
include/seastar/core/sleep.hh
+ include/seastar/core/spdk_app.hh
+ include/seastar/core/spdk_bdev.hh
+ include/seastar/core/spdk_thread.hh
include/seastar/core/sstring.hh
include/seastar/core/stall_sampler.hh
include/seastar/core/stream.hh
@@ -628,6 +635,9 @@ add_library (seastar STATIC
src/core/scollectd-impl.hh
src/core/systemwide_memory_barrier.cc
src/core/smp.cc
+ src/core/spdk_app.cc
+ src/core/spdk_bdev.cc
+ src/core/spdk_thread.cc
src/core/sstring.cc
src/core/thread.cc
src/core/uname.cc
@@ -730,6 +740,16 @@ target_link_libraries (seastar
rt::rt
yaml-cpp::yaml-cpp
Threads::Threads)
+if (Seastar_SPDK)
+ target_link_libraries (seastar
+ PRIVATE
+ spdk::event_bdev
+ spdk::event_accel
+ spdk::bdev
+ spdk::accel
+ spdk::init
+ spdk::env_dpdk)
+endif ()

set (Seastar_SANITIZE_MODES "Debug" "Sanitize")
if ((Seastar_SANITIZE STREQUAL "ON") OR
@@ -892,6 +912,16 @@ if (Seastar_DPDK)
PUBLIC dpdk::dpdk)
endif ()

+if (Seastar_SPDK)
+ target_compile_definitions (seastar
+ PUBLIC SEASTAR_HAVE_SPDK)
+ target_link_libraries (seastar
+ PUBLIC spdk::spdk)
+endif ()
+if (Seastar_DPDK AND Seastar_SPDK)
+ message (FATAL_ERROR "DPDK and SPDK support cannot be enabled at the same time!")
+endif ()
+
if (Seastar_HWLOC)
if (NOT hwloc_FOUND)
message (FATAL_ERROR "`hwloc` support is enabled but it is not available!")
@@ -1218,6 +1248,7 @@ if (Seastar_INSTALL)
+ PROPERTIES
+ PROPERTIES
+ INTERFACE_COMPILE_OPTIONS "${spdk_PC_STATIC_bdev_CFLAGS}"
+ INTERFACE_INCLUDE_DIRECTORIES "${spdk_INCLUDE_DIR}"
+ INTERFACE_LINK_OPTIONS "${whole_archive_link_opts}"
+ INTERFACE_LINK_LIBRARIES "${spdk_LIBRARIES}"
+ INTERFACE_LINK_DIRECTORIES "${spdk_LINK_DIRECTORIES}")
+endif ()
diff --git a/cmake/SeastarDependencies.cmake b/cmake/SeastarDependencies.cmake
index 51a8a65a..bc458e2e 100644
--- a/cmake/SeastarDependencies.cmake
+++ b/cmake/SeastarDependencies.cmake
@@ -67,6 +67,11 @@ macro (seastar_find_dependencies)
rt
yaml-cpp)

+ if (Seastar_SPDK)
+ list (APPEND _seastar_all_dependencies
+ spdk)
+ endif ()
+
# Arguments to `find_package` for each 3rd-party dependency.
# Note that the version specification is a "minimal" version requirement.

@@ -92,6 +97,15 @@ macro (seastar_find_dependencies)
set (_seastar_dep_args_lksctp-tools REQUIRED)
set (_seastar_dep_args_rt REQUIRED)
set (_seastar_dep_args_yaml-cpp 0.5.1 REQUIRED)
+ set (_seastar_dep_args_spdk
+ 21.10.0
+ COMPONENTS
+ event_bdev
+ event_accel
+ bdev
+ accel
+ init
+ env_dpdk)

foreach (third_party ${_seastar_all_dependencies})
find_package ("${third_party}" ${_seastar_dep_args_${third_party}})
diff --git a/configure.py b/configure.py
index 27a40fd9..5767455d 100755
--- a/configure.py
+++ b/configure.py
@@ -81,6 +81,11 @@ add_tristate(
name = 'dpdk',
dest = 'dpdk',
help = 'DPDK support')
+add_tristate(
+ arg_parser,
+ name = 'spdk',
+ dest = 'spdk',
+ help = 'SPDK support')
add_tristate(
arg_parser,
name = 'hwloc',
@@ -189,6 +194,7 @@ def configure_mode(mode):
tr(LDFLAGS, 'LD_FLAGS'),
tr(args.cpp_dialect, 'CXX_DIALECT'),
tr(args.dpdk, 'DPDK'),
+ tr(args.spdk, 'SPDK'),
tr(infer_dpdk_machine(args.user_cflags), 'DPDK_MACHINE'),
tr(args.hwloc, 'HWLOC', value_when_none='yes'),
tr(args.alloc_failure_injection, 'ALLOC_FAILURE_INJECTION', value_when_none='DEFAULT'),
@@ -206,6 +212,9 @@ def configure_mode(mode):
if args.dpdk:
ingredients_to_cook.add('dpdk')

+ if args.spdk:
+ ingredients_to_cook.add('spdk')
+
# Generate a new build by pointing to the source directory.
if ingredients_to_cook:
# We need to use cmake-cooking for some dependencies.
diff --git a/cooking_recipe.cmake b/cooking_recipe.cmake
index c53d3aee..57d4e398 100644
--- a/cooking_recipe.cmake
+++ b/cooking_recipe.cmake
@@ -296,3 +296,14 @@ cooking_ingredient (lz4
index 00000000..e3f85034
--- /dev/null
+++ b/demos/spdk_bdev_demo.cc
@@ -0,0 +1,117 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:4; indent-tabs-mode:nil -*-
+/*
+ * This file is open source software, licensed to you under the terms
+ * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
+ * distributed with this work for additional information regarding copyright
+ * ownership. You may not use this file except in compliance with the License.
+ *
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Copyright (C) 2021 Kefu Chai <tcha...@gmail.com>
+ */
+
+#include <algorithm>
+#include <cstring>
+#include <iomanip>
+#include <vector>
+
+#include <seastar/core/app-template.hh>
+#include <seastar/core/file.hh>
+#include <seastar/core/seastar.hh>
+#include <seastar/core/spdk_app.hh>
+#include <seastar/core/spdk_bdev.hh>
+#include <seastar/core/temporary_buffer.hh>
+#include <seastar/util/log.hh>
+
+using namespace seastar;
+namespace bpo = boost::program_options;
+
+seastar::logger spdk_logger("spdk_demo");
+
+static constexpr int MEMSET_PATTERN = 0x42;
+
+int main(int ac, char** av) {
+ seastar::app_template seastar_app;
+ spdk::app spdk_app;
+ seastar_app.add_options()
+ ( "bdev", bpo::value<std::string>()->default_value("Malloc0"),
+ "bdev name");
+ seastar_app.get_options_description().add(spdk_app.get_options_description());
+ std::vector<const char*> argv(av, av + ac);
+ if (std::find_if(argv.begin(),
+ argv.end(),
+ [](const char* s) { return strcmp(s, "--spdk-pmd") == 0; }) ==
+ argv.end()) {
+ argv.push_back("--spdk-pmd");
+ }
+ return seastar_app.run(argv.size(), const_cast<char**>(argv.data()), [&] {
+ spdk_logger.info("demo running");
+ auto bdev_name = seastar_app.configuration()["bdev"].as<std::string>();
+ return spdk_app.run(seastar_app.configuration(), [bdev_name] {
+ spdk::list_devices all_devices{false};
+ if (std::none_of(all_devices.begin(), all_devices.end(),
+ [&] (const std::string& name){
+ return bdev_name == name;
+ })) {
+ std::cerr << "Device " << std::quoted(bdev_name) << " not found. "
+ << "Please specify one of the following device(s): ";
+ std::copy(all_devices.begin(),
+ all_devices.end(),
+ std::ostream_iterator<std::string>(std::cerr, ", "));
+ std::cerr << std::endl;
+ throw std::invalid_argument("unknown device");
+ }
+ spdk_logger.info("bdev.open");
+ std::unique_ptr<spdk::block_device> bdev = spdk::block_device::open(bdev_name);
+ uint32_t block_size = bdev->block_size();
+ temporary_buffer<char> buf;
+ if (size_t alignment = bdev->memory_dma_alignment() == 1) {
+ buf = temporary_buffer<char>{block_size};
+ } else {
+ buf = temporary_buffer<char>::aligned(alignment, block_size);
+ }
+ return do_with(std::move(bdev),
+ std::move(buf),
+ [] (std::unique_ptr<spdk::block_device>& bdev,
+ temporary_buffer<char>& buf) {
+ spdk_logger.info("bdev.write");
+ memset(buf.get_write(), MEMSET_PATTERN, buf.size());
+ return bdev->write(0, buf.get(), buf.size()).then([&] {
+ memset(buf.get_write(), 0xff, buf.size());
+ spdk_logger.info("bdev.read");
+ return bdev->read(0, buf.get_write(), buf.size());
+ }).then([&buf] {
+ temporary_buffer<char> good{buf.size()};
+ memset(good.get_write(), MEMSET_PATTERN, good.size());
+ if (int where = memcmp(good.get(), buf.get(), buf.size());
+ where != 0) {
+ spdk_logger.error("buf mismatches at {}!", where);
+ } else {
+ spdk_logger.info("buf matches!");
+ }
+ });
+ });
+ }).then_wrapped([] (auto f) {
+ try {
+ f.get();
+ return 1;
+ } catch (std::exception& e) {
+ std::cerr << e.what() << std::endl;
+ return 1;
+ } catch (...) {
+ std::cout << "unknown exception" << std::endl;
+ return 1;
+ }
+ });
+ });
+}
diff --git a/include/seastar/core/smp.hh b/include/seastar/core/smp.hh
index eda8e8c7..929d9791 100644
--- a/include/seastar/core/smp.hh
+++ b/include/seastar/core/smp.hh
@@ -301,6 +301,7 @@ class smp : public std::enable_shared_from_this<smp> {
static thread_local smp_message_queue**_qs;
static thread_local std::thread::id _tmain;
bool _using_dpdk = false;
+ bool _using_spdk = false;

template <typename Func>
using returns_future = is_future<std::result_of_t<Func()>>;
diff --git a/include/seastar/core/spdk_app.hh b/include/seastar/core/spdk_app.hh
new file mode 100644
index 00000000..56a8bc4f
--- /dev/null
+++ b/include/seastar/core/spdk_app.hh
@@ -0,0 +1,65 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:4; indent-tabs-mode:nil -*-
+/*
+ * This file is open source software, licensed to you under the terms
+ * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
+ * distributed with this work for additional information regarding copyright
+ * ownership. You may not use this file except in compliance with the License.
+ *
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Copyright (C) 2021 Kefu Chai <tcha...@gmail.com>
+ */
+
+#pragma once
+
+#ifdef SEASTAR_HAVE_SPDK
+
+#endif // SEASTAR_HAVE_SPDK
diff --git a/include/seastar/core/spdk_bdev.hh b/include/seastar/core/spdk_bdev.hh
new file mode 100644
index 00000000..fe27d106
--- /dev/null
+++ b/include/seastar/core/spdk_bdev.hh
@@ -0,0 +1,119 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:4; indent-tabs-mode:nil -*-
+/*
+ * This file is open source software, licensed to you under the terms
+ * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
+ * distributed with this work for additional information regarding copyright
+ * ownership. You may not use this file except in compliance with the License.
+ *
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Copyright (C) 2021 Kefu Chai <tcha...@gmail.com>
+ */
+
+#pragma once
+
+#ifdef SEASTAR_HAVE_SPDK
+
+#include <seastar/core/future.hh>
+#include <iterator>
+#include <memory>
+#include <sys/uio.h>
+
+struct spdk_bdev;
+struct spdk_bdev_desc;
+struct spdk_io_channel;
+
+namespace seastar::spdk {
+
+class block_device {
+public:
+ /// mirrors spdk_bdev_io_stat, so seastar application does not need to have
+ /// access spdk header files to compile.
+ struct io_stats {
+ uint64_t bytes_read;
+ uint64_t num_read_ops;
+ uint64_t bytes_written;
+ uint64_t num_write_ops;
+ uint64_t bytes_unmapped;
+ uint64_t num_unmap_ops;
+ uint64_t read_latency_ticks;
+ uint64_t write_latency_ticks;
+ uint64_t unmap_latency_ticks;
+ uint64_t ticks_rate;
+ };
+public:
+ static std::unique_ptr<block_device> open(const std::string& bdev_name);
+ ~block_device();
+
+ future<> write(uint64_t pos, const void* buffer, size_t len);
+ future<> writev(uint64_t pos, std::vector<iovec> iov);
+ future<> read(uint64_t pos, void* buffer, size_t len);
+ future<> readv(uint64_t pos, std::vector<iovec> iov);
+ future<> flush(uint64_t pos, size_t len);
+ future<> unmap(uint64_t pos, size_t len);
+ future<io_stats> stat();
+
+ uint32_t block_size() const;
+ size_t memory_dma_alignment() const;
+
+private:
+ block_device() = default;
+ static void event_cb(int /* spdk_bdev_event_type */ type,
+ struct spdk_bdev* bdev,
+ void* event_ctx);
+
+private:
+ spdk_bdev* _bdev = nullptr;
+ spdk_bdev_desc* _desc = nullptr;
+ spdk_io_channel* _io_channel = nullptr;
+};
+
+class dev_iterator : public std::iterator<std::forward_iterator_tag, std::string> {
+ void advance() noexcept;
+public:
+ dev_iterator(spdk_bdev* bdev, bool with_vbdev)
+ : _bdev{bdev}
+ , _with_vbdev{with_vbdev}
+ {}
+ dev_iterator& operator++() noexcept {
+ advance();
+ return *this;
+ }
+ dev_iterator operator++(int) noexcept {
+ auto ret = *this;
+ advance();
+ return ret;
+ }
+ std::string operator*() const noexcept;
+ bool operator==(const dev_iterator& other) const noexcept {
+ return _bdev == other._bdev;
+ }
+private:
+ spdk_bdev* _bdev;
+ const bool _with_vbdev;
+};
+
+class list_devices {
+public:
+ explicit list_devices(bool with_vbdev)
+ : _with_vbdev{with_vbdev}
+ {}
+ dev_iterator begin() const;
+ dev_iterator end() const;
+private:
+ const bool _with_vbdev;
+};
+
+}
+
+#endif // SEASTAR_HAVE_SPDK
diff --git a/include/seastar/core/spdk_thread.hh b/include/seastar/core/spdk_thread.hh
new file mode 100644
index 00000000..643afb13
--- /dev/null
+++ b/include/seastar/core/spdk_thread.hh
@@ -0,0 +1,109 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:4; indent-tabs-mode:nil -*-
+/*
+ * This file is open source software, licensed to you under the terms
+ * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
+ * distributed with this work for additional information regarding copyright
+ * ownership. You may not use this file except in compliance with the License.
+ *
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Copyright (C) 2021 Kefu Chai <tcha...@gmail.com>
+ */
+
+#pragma once
+
+#ifdef SEASTAR_HAVE_SPDK
+
+#endif // SEASTAR_HAVE_SPDK
diff --git a/src/core/reactor.cc b/src/core/reactor.cc
index 97d557c5..a16a7666 100644
index 00000000..2b910a9f
--- /dev/null
+++ b/src/core/spdk_app.cc
@@ -0,0 +1,342 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:4; indent-tabs-mode:nil -*-
+/*
+ * This file is open source software, licensed to you under the terms
+ * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
+ * distributed with this work for additional information regarding copyright
+ * ownership. You may not use this file except in compliance with the License.
+ *
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Copyright (C) 2021 Kefu Chai <tcha...@gmail.com>
+ */
+#ifdef SEASTAR_HAVE_SPDK
+ if (rc) {
+ seastar::spdk::logger.error("unable to initialize subsystem: {}", spdk_strerror(-rc));
+
+#endif // SEASTAR_HAVE_SPDK
diff --git a/src/core/spdk_bdev.cc b/src/core/spdk_bdev.cc
new file mode 100644
index 00000000..4a43ee48
--- /dev/null
+++ b/src/core/spdk_bdev.cc
@@ -0,0 +1,298 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:4; indent-tabs-mode:nil -*-
+/*
+ * This file is open source software, licensed to you under the terms
+ * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
+ * distributed with this work for additional information regarding copyright
+ * ownership. You may not use this file except in compliance with the License.
+ *
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Copyright (C) 2021 Kefu Chai <tcha...@gmail.com>
+ */
+#ifdef SEASTAR_HAVE_SPDK
+
+#include <memory>
+#include <numeric>
+
+#include <seastar/core/do_with.hh>
+class stat_completion_desc {
+ seastar::promise<seastar::spdk::block_device::io_stats> _pr;
+public:
+ void complete_with(spdk_bdev_io_stat* stat, int rc) {
+ assert(stat);
+ if (rc == 0) {
+ seastar::spdk::block_device::io_stats io_stats;
+ static_assert(sizeof(io_stats) == sizeof(*stat));
+ memcpy(&io_stats, stat, sizeof(io_stats));
+ _pr.set_value(std::move(io_stats));
+ } else {
+ _pr.set_exception(
+ std::system_error(-rc,
+ std::system_category(),
+ "bdev stat error"));
+ }
+ delete this;
+ }
+ seastar::future<seastar::spdk::block_device::io_stats> get_future() {
+ logger.trace("io done");
+future<> block_device::writev(uint64_t pos, std::vector<iovec> iov)
+{
+ auto len = std::accumulate(iov.begin(), iov.end(), size_t(0),
+ [](size_t sum, const iovec& iov) {
+ return sum + iov.iov_len;
+ });
+ auto io_desc = std::make_unique<io_completion_desc>();
+ auto io_done = io_desc->get_future();
+ int rc = spdk_bdev_writev(_desc, _io_channel,
+ iov.data(), iov.size(), pos, len,
+ spdk_bdev_io_cpl, io_desc.release());
+ if (rc == -ENOMEM) {
+ io_desc->fail_with(std::bad_alloc());
+ } else {
+ // -EBADF or -EINVAL
+ io_desc->fail_with(std::invalid_argument("out of range"));
+ }
+ return io_done.finally([iov = std::move(iov)] () {});
+}
+
+future<> block_device::read(uint64_t pos, void* buffer, size_t len)
+{
+ assert(_bdev);
+ auto io_desc = std::make_unique<io_completion_desc>();
+ auto io_done = io_desc->get_future();
+ int rc = spdk_bdev_read(_desc, _io_channel, buffer, pos, len,
+ spdk_bdev_io_cpl, io_desc.release());
+ if (rc == 0) {
+ return io_done;
+ }
+ if (rc == -ENOMEM) {
+ io_desc->fail_with(std::bad_alloc());
+ } else {
+ // -EINVAL
+ io_desc->fail_with(std::invalid_argument("out of range"));
+ }
+ return io_done;
+}
+
+future<> block_device::readv(uint64_t pos, std::vector<iovec> iov)
+{
+ auto len = std::accumulate(iov.begin(), iov.end(), size_t(0),
+ [](size_t sum, const iovec& iov) {
+ return sum + iov.iov_len;
+ });
+ auto io_desc = std::make_unique<io_completion_desc>();
+ auto io_done = io_desc->get_future();
+ int rc = spdk_bdev_readv(_desc, _io_channel,
+ iov.data(), iov.size(), pos, len,
+ spdk_bdev_io_cpl, io_desc.release());
+ if (rc == -ENOMEM) {
+ io_desc->fail_with(std::bad_alloc());
+ } else {
+ // -EINVAL
+ io_desc->fail_with(std::invalid_argument("out of range"));
+ }
+ return io_done.finally([iov = std::move(iov)] () {});
+}
+
+static void spdk_bdev_get_device_stat_cpl(spdk_bdev* bdev,
+ spdk_bdev_io_stat* stat,
+ void* arg, int rc)
+{
+ logger.trace("stat done");
+ auto* desc = static_cast<stat_completion_desc*>(arg);
+ desc->complete_with(stat, rc);
+}
+
+future<block_device::io_stats> block_device::stat()
+{
+ return do_with(spdk_bdev_io_stat{}, [this] (spdk_bdev_io_stat& stat) {
+ auto stat_desc = std::make_unique<stat_completion_desc>();
+ auto stat_done = stat_desc->get_future();
+ spdk_bdev_get_device_stat(_bdev, &stat,
+ spdk_bdev_get_device_stat_cpl,
+ stat_desc.release());
+ return stat_done;
+ });
+}
+
+future<> block_device::flush(uint64_t pos, size_t len)
+{
+ assert(_bdev);
+ auto io_desc = std::make_unique<io_completion_desc>();
+ auto io_done = io_desc->get_future();
+ int rc = spdk_bdev_flush(_desc, _io_channel, pos, len,
+ spdk_bdev_io_cpl, io_desc.release());
+ if (rc == 0) {
+ return io_done;
+ }
+ if (rc == -ENOMEM) {
+ io_desc->fail_with(std::bad_alloc());
+ } else {
+ // -EINVAL or -EBADF
+ io_desc->fail_with(std::invalid_argument(spdk_strerror(-rc)));
+ }
+ return io_done;
+}
+
+uint32_t block_device::block_size() const
+{
+ assert(_bdev);
+ return spdk_bdev_get_block_size(_bdev);
+}
+
+size_t block_device::memory_dma_alignment() const
+{
+ assert(_bdev);
+ return spdk_bdev_get_buf_align(_bdev);
+}
+
+void dev_iterator::advance() noexcept
+{
+ assert(_bdev);
+ if (_with_vbdev) {
+ _bdev = spdk_bdev_next(_bdev);
+ } else {
+ _bdev = spdk_bdev_next_leaf(_bdev);
+ }
+}
+
+std::string dev_iterator::operator*() const noexcept
+{
+ assert(_bdev);
+ return spdk_bdev_get_name(_bdev);
+}
+
+dev_iterator list_devices::begin() const
+{
+ spdk_bdev* bdev = nullptr;
+ if (_with_vbdev) {
+ bdev = spdk_bdev_first();
+ } else {
+ bdev = spdk_bdev_first_leaf();
+ }
+ return dev_iterator{bdev, _with_vbdev};
+}
+
+dev_iterator list_devices::end() const
+{
+ return dev_iterator{nullptr, _with_vbdev};
+}
+
+}
+#endif // SEASTAR_HAVE_SPDK
diff --git a/src/core/spdk_thread.cc b/src/core/spdk_thread.cc
new file mode 100644
index 00000000..53a437f6
--- /dev/null
+++ b/src/core/spdk_thread.cc
@@ -0,0 +1,154 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:4; indent-tabs-mode:nil -*-
+/*
+ * This file is open source software, licensed to you under the terms
+ * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
+ * distributed with this work for additional information regarding copyright
+ * ownership. You may not use this file except in compliance with the License.
+ *
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Copyright (C) 2021 Kefu Chai <tcha...@gmail.com>
+ */
+#ifdef SEASTAR_HAVE_SPDK
+#endif // SEASTAR_HAVE_SPDK
--
2.33.0

kefu chai

<tchaikov@gmail.com>
unread,
Oct 5, 2021, 3:16:49 AM10/5/21
to Avi Kivity, seastar-dev@googlegroups.com
On Sun, Oct 3, 2021 at 4:11 PM Avi Kivity <a...@scylladb.com> wrote:
>
>
> On 03/10/2021 08.08, Kefu Chai wrote:
> > hi Avi, i've addressed most of your comments. but there are still some left
> > unadressed:
> >
> > - another memory pool. even SPDK tries hard to be a library which is
> > relatively not tied to DPDK. it still use DPDK RTE EAL for managing
> > its memory pools. unless it offers a compile-time/runtime mechinary
> > allowing us to use another allocator for either regular memory or
> > pinned memory, we have to stay with yet another memory pool in SPDK.
> > - hugepage versus iommu: yeah. it's sad in 2021. but i tried to use
> > regular memory buffer allocated using
> > temporary_buffer<char>::aligned(), my test in spdk_bdev_demo.cc
> > fails because of the mismatch read and write with Malloc bdev,
> > which is backed by memory.
>
>
> Can you explain? Is malloc bdev some kind of fake block device for
> testing? I'd much rather have a simple memory model and not support
> that, than require splitting memory into two pools (and lose zero-copy).
>

i retested the demo. now it passed even the regular memory buffer is
used to write/read from the malloc device. yeah, the malloc device is
a fake block device for testing. its block size is 1. so i dropped the
change to use the pinned memory from the changeset.

>
> I looked at SPDK code and it does support iommu.
>
>
> > - SPDK thread: SPDK uses spdk_thread in its implementation. and
> > spdk_thread as an execution unit can even moved across cores for
> > better load balance. so it'd be better to keep it as it is and
> > manage the spdk threads using a dedicated sharded service.
> > - file_impl: i think the SPDK bdevs are quite different from
> > files in a handful perspectives. to minimize the confusion,
> > i am inclined to keep it as a different class not related to
> > file, despite that they do share some methods. i think they
> > will diverge even more over time.
>
>
> But that means that code using seastar::blockdev_file_impl cannot use
> spdk and vice versa.

yeah. that's a shame. but IMHO, seastar::file can be opened using a
*path*, but SPDK does not understand a path. i can add a factory
method to seastar like reactor::open_file_dma(), but it will have a
set of different semantics. for instance,

- it won't understand open_flags or file_open_options
- it will ignore io_priority_class and io_intent, before the QoS
feature is integrated / implemented
- the "dma" in dma_read is kind of misleading. as SPDK always does DMA.
- it does not understand fcntl as i mentioned last time. as it's not
supported by a filesystem.
- its stat is not the same thing as the io stat collected by SPDK for
a bdev or an io channel talking to an underlying bdev.

>
>
> Let's split the dpdk update from spdk support, the dpdk update can be
> merged quickly once tested.

sure. already sent for review.

>
>
> btw, I'd like to add iommu support to dpdk too, it's much nicer than
> hugepages.

fully agree. iommu is much easier/nicer than pinned hugepages.
--
Regards
Kefu Chai

Kefu Chai

<tchaikov@gmail.com>
unread,
Oct 16, 2021, 9:56:39 AM10/16/21
to seastar-dev@googlegroups.com, Kefu Chai
---

change since v3:

- open spdk log when setting up spdk env, so the options passed
to dpdk eal are printed in a more readable way
- add missing cmake/Findaio.cmake and cmake/Finduuid.cmake

---

change since v2:

- core/spdk_bdev:
- lower the debug level in spdk_bdev_io_cpl() to logger.trace()
- remove unused #includes
- add list_devices class to enumerate all bdev devices managed by SPDK
- demos/spdk_bdev_demo:
- use regular temporary_buffer<char> instead of pinned buffer for read/write
- check if the specified device is listed in the known devices,
before writing to it.
- core/spdk_lib: drop spdk_lib.{hh,cc}. it turns out they are not necessary
- use the latest SPDK

---
Kefu Chai (2):
spdk: add spdk submodule
*: add spdk support

.gitmodules | 3 +
CMakeLists.txt | 33 +++
cmake/Findaio.cmake | 12 +
cmake/Findarchive.cmake | 12 +
cmake/Findspdk.cmake | 126 ++++++++++
cmake/Finduuid.cmake | 12 +
cmake/SeastarDependencies.cmake | 13 +
configure.py | 9 +
cooking_recipe.cmake | 11 +
demos/CMakeLists.txt | 5 +
demos/spdk-config.json | 17 ++
demos/spdk_bdev_demo.cc | 117 +++++++++
include/seastar/core/smp.hh | 1 +
include/seastar/core/spdk_app.hh | 65 +++++
include/seastar/core/spdk_bdev.hh | 119 ++++++++++
include/seastar/core/spdk_thread.hh | 109 +++++++++
spdk | 1 +
src/core/reactor.cc | 51 +++-
src/core/spdk_app.cc | 353 ++++++++++++++++++++++++++++
src/core/spdk_bdev.cc | 298 +++++++++++++++++++++++
src/core/spdk_thread.cc | 154 ++++++++++++
21 files changed, 1517 insertions(+), 4 deletions(-)
create mode 100644 cmake/Findaio.cmake
create mode 100644 cmake/Findarchive.cmake
create mode 100644 cmake/Findspdk.cmake
create mode 100644 cmake/Finduuid.cmake
create mode 100644 demos/spdk-config.json
create mode 100644 demos/spdk_bdev_demo.cc
create mode 100644 include/seastar/core/spdk_app.hh
create mode 100644 include/seastar/core/spdk_bdev.hh
create mode 100644 include/seastar/core/spdk_thread.hh
create mode 160000 spdk
create mode 100644 src/core/spdk_app.cc
create mode 100644 src/core/spdk_bdev.cc
create mode 100644 src/core/spdk_thread.cc

--
2.33.0

Kefu Chai

<tchaikov@gmail.com>
unread,
Oct 16, 2021, 9:56:42 AM10/16/21
to seastar-dev@googlegroups.com, Kefu Chai
include spdk d42045b2af1082ee7bc3582108c4bd77619c8c34 from
https://github.com/spdk/spdk. we will use the latest stable release
of SPDK once the spdk support in Seastar is relatively stablized.

Signed-off-by: Kefu Chai <tcha...@gmail.com>
---

Kefu Chai

<tchaikov@gmail.com>
unread,
Oct 16, 2021, 9:56:47 AM10/16/21
to seastar-dev@googlegroups.com, Kefu Chai
* include/seastar/core,src/core: add spdk support
* demos: add a demo for SPDK bdev
* cmake:
- add Findspdk.cmake: for finding SPDK libraries
- add Findarchive.cmake: libarchive is linked against by DPDK
but the .pc file generated by SPDK does not take it into
consideration, so we need to link against by ourselves.
the same applies to Finduuid.cmake and Findaio.cmake
* configure.py: add meson.
the newer releases of DPDK are configured using meson. to cook
SPDK with the bundeled DPDK, we have to install meson.

Signed-off-by: Kefu Chai <tcha...@gmail.com>
---
CMakeLists.txt | 33 +++
cmake/Findaio.cmake | 12 +
cmake/Findarchive.cmake | 12 +
cmake/Findspdk.cmake | 126 ++++++++++
cmake/Finduuid.cmake | 12 +
cmake/SeastarDependencies.cmake | 13 +
configure.py | 9 +
cooking_recipe.cmake | 11 +
demos/CMakeLists.txt | 5 +
demos/spdk-config.json | 17 ++
demos/spdk_bdev_demo.cc | 117 +++++++++
include/seastar/core/smp.hh | 1 +
include/seastar/core/spdk_app.hh | 65 +++++
include/seastar/core/spdk_bdev.hh | 119 ++++++++++
include/seastar/core/spdk_thread.hh | 109 +++++++++
src/core/reactor.cc | 51 +++-
src/core/spdk_app.cc | 353 ++++++++++++++++++++++++++++
src/core/spdk_bdev.cc | 298 +++++++++++++++++++++++
src/core/spdk_thread.cc | 154 ++++++++++++
19 files changed, 1513 insertions(+), 4 deletions(-)
create mode 100644 cmake/Findaio.cmake
create mode 100644 cmake/Findarchive.cmake
create mode 100644 cmake/Findspdk.cmake
create mode 100644 cmake/Finduuid.cmake
create mode 100644 demos/spdk-config.json
create mode 100644 demos/spdk_bdev_demo.cc
create mode 100644 include/seastar/core/spdk_app.hh
create mode 100644 include/seastar/core/spdk_bdev.hh
create mode 100644 include/seastar/core/spdk_thread.hh
create mode 100644 src/core/spdk_app.cc
create mode 100644 src/core/spdk_bdev.cc
create mode 100644 src/core/spdk_thread.cc

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2fd462d8..d5d47a77 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -216,6 +216,10 @@ option (Seastar_DPDK
"Enable DPDK support."
OFF)

+option (Seastar_SPDK
+ "Enable SPDK support."
+ OFF)
+
option (Seastar_EXCLUDE_APPS_FROM_ALL
"When enabled alongside Seastar_APPS, do not build applications by default."
OFF)
@@ -504,6 +508,9 @@ add_library (seastar STATIC
include/seastar/core/simple-stream.hh
include/seastar/core/slab.hh
include/seastar/core/sleep.hh
+ include/seastar/core/spdk_app.hh
+ include/seastar/core/spdk_bdev.hh
+ include/seastar/core/spdk_thread.hh
include/seastar/core/sstring.hh
include/seastar/core/stall_sampler.hh
include/seastar/core/stream.hh
@@ -633,6 +640,9 @@ add_library (seastar STATIC
src/core/scollectd-impl.hh
src/core/systemwide_memory_barrier.cc
src/core/smp.cc
+ src/core/spdk_app.cc
+ src/core/spdk_bdev.cc
+ src/core/spdk_thread.cc
src/core/sstring.cc
src/core/thread.cc
src/core/uname.cc
@@ -735,12 +745,24 @@ target_link_libraries (seastar
rt::rt
yaml-cpp::yaml-cpp
Threads::Threads)
+
if (Seastar_DPDK)
target_link_libraries (seastar
PUBLIC
dpdk::dpdk)
endif()

+if (Seastar_SPDK)
+ target_link_libraries (seastar
+ PRIVATE
+ spdk::event_bdev
+ spdk::event_accel
+ spdk::bdev
+ spdk::accel
+ spdk::init
+ spdk::env_dpdk)
+endif ()
+
set (Seastar_SANITIZE_MODES "Debug" "Sanitize")
if ((Seastar_SANITIZE STREQUAL "ON") OR
((Seastar_SANITIZE STREQUAL "DEFAULT") AND
@@ -897,6 +919,16 @@ if (Seastar_DPDK)
PUBLIC $<TARGET_PROPERTY:dpdk::dpdk,INTERFACE_INCLUDE_DIRECTORIES>)
endif ()

+if (Seastar_SPDK)
+ target_compile_definitions (seastar
+ PUBLIC SEASTAR_HAVE_SPDK)
+ target_link_libraries (seastar
+ PUBLIC spdk::spdk)
+endif ()
+if (Seastar_DPDK AND Seastar_SPDK)
+ message (FATAL_ERROR "DPDK and SPDK support cannot be enabled at the same time!")
+endif ()
+
if (Seastar_HWLOC)
if (NOT hwloc_FOUND)
message (FATAL_ERROR "`hwloc` support is enabled but it is not available!")
@@ -1223,6 +1255,7 @@ if (Seastar_INSTALL)
${CMAKE_CURRENT_SOURCE_DIR}/cmake/Findlksctp-tools.cmake
${CMAKE_CURRENT_SOURCE_DIR}/cmake/Findlz4.cmake
${CMAKE_CURRENT_SOURCE_DIR}/cmake/Findnumactl.cmake
+ ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Findspdk.cmake
${CMAKE_CURRENT_SOURCE_DIR}/cmake/Findragel.cmake
${CMAKE_CURRENT_SOURCE_DIR}/cmake/Findrt.cmake
${CMAKE_CURRENT_SOURCE_DIR}/cmake/Findyaml-cpp.cmake
diff --git a/cmake/Findaio.cmake b/cmake/Findaio.cmake
new file mode 100644
index 00000000..e846d063
--- /dev/null
+++ b/cmake/Findaio.cmake
@@ -0,0 +1,12 @@
+if(aio_INCLUDE_DIR AND aio_LIBRARIES)
+ set(aio_FIND_QUIETLY TRUE)
+endif()
+
+find_path(aio_INCLUDE_DIR libaio.h)
+find_library(aio_LIBRARIES aio)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(aio
+ DEFAULT_MSG aio_INCLUDE_DIR aio_LIBRARIES)
+
+mark_as_advanced(aio_INCLUDE_DIR aio_LIBRARIES)
diff --git a/cmake/Finduuid.cmake b/cmake/Finduuid.cmake
new file mode 100644
index 00000000..ff4bfb65
--- /dev/null
+++ b/cmake/Finduuid.cmake
@@ -0,0 +1,12 @@
+if(uuid_INCLUDE_DIR AND uuid_LIBRARIES)
+ set(uuid_FIND_QUIETLY TRUE)
+endif()
+
+find_path(uuid_INCLUDE_DIR uuid/uuid.h)
+find_library(uuid_LIBRARIES uuid)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(uuid
+ DEFAULT_MSG uuid_INCLUDE_DIR uuid_LIBRARIES)
+
+mark_as_advanced(UUID_INCLUDE_DIR uuid_LIBRARIES)
diff --git a/cmake/SeastarDependencies.cmake b/cmake/SeastarDependencies.cmake
index ca1007f5..3c476a45 100644
--- a/cmake/SeastarDependencies.cmake
+++ b/cmake/SeastarDependencies.cmake
@@ -70,6 +70,10 @@ macro (seastar_find_dependencies)
list (APPEND _seastar_all_dependencies
dpdk)
endif ()
+ if (Seastar_SPDK)
+ list (APPEND _seastar_all_dependencies
+ spdk)
+ endif ()

# Arguments to `find_package` for each 3rd-party dependency.
# Note that the version specification is a "minimal" version requirement.
@@ -96,6 +100,15 @@ macro (seastar_find_dependencies)
set (_seastar_dep_args_lksctp-tools REQUIRED)
set (_seastar_dep_args_rt REQUIRED)
set (_seastar_dep_args_yaml-cpp 0.5.1 REQUIRED)
+ set (_seastar_dep_args_spdk
+ 21.10.0
+ COMPONENTS
+ event_bdev
+ event_accel
+ bdev
+ accel
+ init
+ env_dpdk)

foreach (third_party ${_seastar_all_dependencies})
find_package ("${third_party}" ${_seastar_dep_args_${third_party}})
diff --git a/configure.py b/configure.py
index 236df730..eaebb99f 100755
--- a/configure.py
+++ b/configure.py
@@ -83,6 +83,11 @@ add_tristate(
name = 'dpdk',
dest = 'dpdk',
help = 'DPDK support')
+add_tristate(
+ arg_parser,
+ name = 'spdk',
+ dest = 'spdk',
+ help = 'SPDK support')
add_tristate(
arg_parser,
name = 'hwloc',
@@ -192,6 +197,7 @@ def configure_mode(mode):
tr(LDFLAGS, 'LD_FLAGS'),
tr(args.cpp_dialect, 'CXX_DIALECT'),
tr(args.dpdk, 'DPDK'),
+ tr(args.spdk, 'SPDK'),
tr(infer_dpdk_machine(args.user_cflags), 'DPDK_MACHINE'),
tr(args.hwloc, 'HWLOC', value_when_none='yes'),
tr(args.alloc_failure_injection, 'ALLOC_FAILURE_INJECTION', value_when_none='DEFAULT'),
@@ -209,6 +215,9 @@ def configure_mode(mode):
if args.dpdk:
ingredients_to_cook.add('dpdk')

+ if args.spdk:
+ ingredients_to_cook.add('spdk')
+
# Generate a new build by pointing to the source directory.
if ingredients_to_cook:
# We need to use cmake-cooking for some dependencies.
diff --git a/cooking_recipe.cmake b/cooking_recipe.cmake
index 38506194..9df831d3 100644
--- a/cooking_recipe.cmake
+++ b/cooking_recipe.cmake
@@ -298,3 +298,14 @@ cooking_ingredient (lz4
index 0cbea69f..606f6ee6 100644
--- a/src/core/reactor.cc
+++ b/src/core/reactor.cc
@@ -93,6 +93,9 @@
#include <seastar/core/dpdk_rte.hh>
#include <rte_lcore.h>
#include <rte_launch.h>
+#elif defined(SEASTAR_HAVE_SPDK)
+#include <seastar/core/spdk_app.hh>
+#include <spdk/env.h>
#endif
#include <seastar/core/prefetch.hh>
#include <exception>
@@ -3572,6 +3575,9 @@ smp::get_options_description()
#endif
#ifdef SEASTAR_HAVE_HWLOC
("allow-cpus-in-remote-numa-nodes", bpo::value<bool>()->default_value(true), "if some CPUs are found not to have any local NUMA nodes, allow assigning them to remote ones")
+#endif
+#ifdef SEASTAR_HAVE_SPDK
+ ("spdk-pmd", "Use SPDK PMD drivers")
#endif
;
return opts;
@@ -3606,7 +3612,7 @@ void smp::start_all_queues()
_alien._qs[this_shard_id()].start();
}

-#ifdef SEASTAR_HAVE_DPDK
+#if defined(SEASTAR_HAVE_DPDK) || defined(SEASTAR_HAVE_SPDK)

int dpdk_thread_adaptor(void* f)
{
@@ -3623,6 +3629,11 @@ void smp::join_all()
rte_eal_mp_wait_lcore();
return;
}
+#elif defined(SEASTAR_HAVE_SPDK)
+ if (_using_spdk) {
+ spdk_env_thread_wait_all();
+ return;
+ }
#endif
for (auto&& t: smp::_threads) {
t.join();
@@ -3630,8 +3641,8 @@ void smp::join_all()
}

void smp::pin(unsigned cpu_id) {
- if (_using_dpdk) {
- // dpdk does its own pinning
+ if (_using_dpdk || _using_spdk) {
+ // dpdk/spdk does its own pinning
return;
}
pin_this_thread(cpu_id);
@@ -3660,6 +3671,11 @@ void smp::allocate_reactor(unsigned id, reactor_backend_selector rbs, reactor_co
void smp::cleanup() noexcept {
smp::_threads = std::vector<posix_thread>();
_thread_loops.clear();
+#ifdef SEASTAR_HAVE_SPDK
+ if (_using_spdk) {
+ spdk::env::stop();
+ }
+#endif
}

void smp::cleanup_cpu() {
@@ -3676,7 +3692,7 @@ void smp::cleanup_cpu() {
}

void smp::create_thread(std::function<void ()> thread_loop) {
- if (_using_dpdk) {
+ if (_using_dpdk || _using_spdk) {
_thread_loops.push_back(std::move(thread_loop));
} else {
_threads.emplace_back(std::move(thread_loop));
@@ -3918,6 +3934,8 @@ void smp::configure(boost::program_options::variables_map configuration, reactor

#ifdef SEASTAR_HAVE_DPDK
_using_dpdk = configuration.count("dpdk-pmd");
+#elif defined(SEASTAR_HAVE_SPDK)
+ _using_spdk = configuration.count("spdk-pmd");
#endif
auto thread_affinity = configuration["thread-affinity"].as<bool>();
if (configuration.count("overprovisioned")
@@ -3926,6 +3944,8 @@ void smp::configure(boost::program_options::variables_map configuration, reactor
}
if (!thread_affinity && _using_dpdk) {
fmt::print("warning: --thread-affinity 0 ignored in dpdk mode\n");
+ } else if (!thread_affinity && _using_spdk) {
+ fmt::print("warning: --thread-affinity 0 ignored in spdk mode\n");
}
auto mbind = configuration["mbind"].as<bool>();
if (!thread_affinity) {
@@ -4055,6 +4075,15 @@ void smp::configure(boost::program_options::variables_map configuration, reactor
}
dpdk::eal::init(cpus, configuration);
}
+#elif defined(SEASTAR_HAVE_SPDK)
+ if (_using_spdk) {
+ try {
+ spdk::env::start(allocations, configuration);
+ } catch (const std::exception& e) {
+ seastar_logger.error(e.what());
+ _exit(1);
+ }
+ }
#endif

memory::configure(allocations[0].mem, mbind, hugepages_path);
@@ -4190,6 +4219,20 @@ void smp::configure(boost::program_options::variables_map configuration, reactor
rte_eal_remote_launch(dpdk_thread_adaptor, static_cast<void*>(&*(it++)), i);
}
}
+#elif defined(SEASTAR_HAVE_SPDK)
+ if (_using_spdk) {
+ auto it = _thread_loops.begin();
+ SPDK_ENV_FOREACH_CORE(i) {
+ if (i == spdk_env_get_current_core()) {
+ continue;
+ }
+ int rc = spdk_env_thread_launch_pinned(i, dpdk_thread_adaptor, static_cast<void*>(&*(it++)));
+ if (rc < 0) {
+ seastar_logger.error("Unable to start reactor thread on core {} using SPDK", i);
+ _exit(1);
+ }
+ }
+ }
#endif

reactors_registered.wait();
diff --git a/src/core/spdk_app.cc b/src/core/spdk_app.cc
new file mode 100644
index 00000000..df714da3
--- /dev/null
+++ b/src/core/spdk_app.cc
@@ -0,0 +1,353 @@
+
+namespace {
+ if (file) {
+ seastar::spdk::logger.log(spdk_log_to_seastar_level(level),
+ "{}:{:4d}:{}: {}",
+ file, line, func, buf);
+ } else {
+ // SPDK_PRINTF() passes NULL as file
+ seastar::spdk::logger.log(spdk_log_to_seastar_level(level),
+ buf);
+ }
+}
+}
+
+namespace seastar::spdk::env {
+ spdk_log_open(spdk_do_log);
+ if (spdk_env_init(&env_opts) < 0) {
+ throw std::runtime_error("unable to initialize SPDK env");
+ }
+ logger.info("env starting: done");
+}
+
+void stop() noexcept
+{
+ logger.info("env stopping");
+ spdk_env_fini();
+ spdk_log_close();
+}
+}
+
+namespace {
+
Reply all
Reply to author
Forward
0 new messages