Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: connect over tcp if initiator isn't rdma capable #875

Merged
merged 1 commit into from
Oct 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion control-plane/agents/src/bin/ha/node/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ async fn disconnect_controller(
// changed, for example due to interface name modified in io-engine args.
let same_controller = is_same_controller(&new_path_uri, &subsystem)?;
if same_controller {
tracing::info!(path, "Not disconnecting same NVMe controller");
tracing::info!(path, "Not disconnecting same NVMe controller. old subsys {subsystem:?}, new path {new_path_uri:?}");
Ok(None)
} else {
tracing::info!(path, "Disconnecting NVMe controller");
Expand Down Expand Up @@ -476,6 +476,16 @@ struct ParsedUri {
nqn: String,
}

impl std::fmt::Debug for ParsedUri {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("ParsedUri")
.field("host", &self.host())
.field("port", &self.port())
.field("transport", &self.transport())
.field("nqn", &self.nqn())
.finish()
}
}
impl ParsedUri {
fn new(uri: Uri) -> Result<ParsedUri, SvcError> {
let host = uri.host().ok_or(SvcError::InvalidArguments {})?.to_string();
Expand Down
28 changes: 27 additions & 1 deletion control-plane/csi-driver/src/bin/node/dev/nvmf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use csi_driver::PublishParams;
use glob::glob;
use nvmeadm::nvmf_subsystem::Subsystem;
use regex::Regex;
use tracing::warn;
use udev::{Device, Enumerator};
use url::Url;
use uuid::Uuid;
Expand All @@ -22,6 +23,7 @@ use crate::{
config::{config, NvmeConfig, NvmeParseParams},
dev::util::extract_uuid,
match_dev::match_nvmf_device,
node::RDMA_CONNECT_CHECK,
};

use super::{Attach, Detach, DeviceError, DeviceName};
Expand Down Expand Up @@ -307,6 +309,16 @@ pub(crate) fn check_nvme_tcp_module() -> Result<(), std::io::Error> {
Ok(())
}

/// Check for the presence of nvme tcp kernel module.
/// TODO: Handle the case where this(and for that matter nvme_tcp too)
tiagolobocastro marked this conversation as resolved.
Show resolved Hide resolved
/// could be builtin module.
#[allow(unused)]
pub(crate) fn check_nvme_rdma_module() -> Result<(), std::io::Error> {
let path = "/sys/module/nvme_rdma";
std::fs::metadata(path)?;
Ok(())
}

/// Set the nvme_core module IO timeout
/// (note, this is a system-wide parameter)
pub(crate) fn set_nvmecore_iotimeout(io_timeout_secs: u32) -> Result<(), std::io::Error> {
Expand Down Expand Up @@ -359,5 +371,19 @@ pub(crate) fn transport_from_url(url: &Url) -> Result<TrType, DeviceError> {
.split('+')
.nth(1)
.unwrap_or(default_xprt.as_str());
TrType::from_str(xprt).map_err(|e| DeviceError::new(format!("{e:?}").as_str()))

let ret_xprt = TrType::from_str(xprt).map_err(|e| DeviceError::new(format!("{e:?}").as_str()));
let connect_cap_check = RDMA_CONNECT_CHECK.get().unwrap_or(&(false, false));

if !connect_cap_check.0 {
dsharma-dc marked this conversation as resolved.
Show resolved Hide resolved
ret_xprt
} else {
match ret_xprt {
Ok(t) if t == TrType::rdma && !connect_cap_check.1 => {
warn!("rdma incapable node, connecting over tcp");
Ok(TrType::tcp)
}
_else => _else,
}
}
}
44 changes: 43 additions & 1 deletion control-plane/csi-driver/src/bin/node/main_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use crate::{
identity::Identity,
k8s::patch_k8s_node,
mount::probe_filesystems,
node::Node,
node::{Node, RDMA_CONNECT_CHECK},
nodeplugin_grpc::NodePluginGrpcServer,
nodeplugin_nvme::NvmeOperationsSvc,
registration::run_registration_loop,
Expand Down Expand Up @@ -221,6 +221,16 @@ pub(super) async fn main() -> anyhow::Result<()> {
.value_parser(clap::value_parser!(bool))
.help("Enable ansi color for logs")
)
.arg(
Arg::new("nvme-connect-fallback")
dsharma-dc marked this conversation as resolved.
Show resolved Hide resolved
.long("nvme-connect-fallback")
.default_value("false")
.value_parser(clap::value_parser!(bool))
.help(
"Enable falling back to nvme connect over tcp if initiator node is not rdma capable, \n\
even though volume target is rdma capable."
)
)
.subcommand(
clap::Command::new("fs-freeze")
.arg(
Expand Down Expand Up @@ -327,6 +337,38 @@ pub(super) async fn main() -> anyhow::Result<()> {
check_ana_and_label_node(&kube_client, node_name, nvme_enabled).await?;
}

let conn_fallback = matches.get_flag("nvme-connect-fallback");
// Check and store if this initiator node has an RDMA device so that nvme connect over RDMA
// can be done. If this node is also an io-engine node with rdma enabled and working,
// then this check will always set capability true.
let ibv_output = tokio::process::Command::new("ibv_devinfo")
dsharma-dc marked this conversation as resolved.
Show resolved Hide resolved
.arg("-l")
.output()
.await;

let cap = match ibv_output {
Ok(okstdout) if okstdout.status.success() => {
// In an unlikely case string from utf8 fails, err towards incapability.
let outstr = String::from_utf8(okstdout.stdout).unwrap_or("0 HCA".to_string());
if !outstr.starts_with("0 HCA") {
info!("host node rdma capable. conn_fallback({conn_fallback:?}), {outstr:?}");
// todo: check for the presence of nvme_rdma too. But don't bail out as we do for
// nvme_tcp.
true
} else {
info!(
"host node is not rdma capable. conn_fallback({conn_fallback:?}), {outstr:?}"
);
false
}
}
Ok(_) | Err(_) => {
error!("Error executing ibv_devinfo, or invalid command output. conn_fallback({conn_fallback:?}), {ibv_output:?}");
false
}
};
let _ = RDMA_CONNECT_CHECK.set((conn_fallback, cap));
dsharma-dc marked this conversation as resolved.
Show resolved Hide resolved

// Parse the CSI socket file name from the command line arguments.
let csi_socket = matches
.get_one::<String>("csi-socket")
Expand Down
7 changes: 7 additions & 0 deletions control-plane/csi-driver/src/bin/node/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use csi_driver::{
filesystem::FileSystem as Fs,
limiter::VolumeOpGuard,
};
use once_cell::sync::OnceCell;
use rpc::{
csi,
csi::{
Expand Down Expand Up @@ -76,6 +77,12 @@ impl Node {

const ATTACH_TIMEOUT_INTERVAL: Duration = Duration::from_millis(100);
const ATTACH_RETRIES: u32 = 100;
// A type and static variable used to check and set node's rdma capability during startup.
// This is used to decide if initiator can indeed connect over rdma.
// First index bool tells if we need to fallback to tcp connect in case initiator node is
// not rdma capable. Second index bool is the actual capability indicator.
type CheckAndFallbackNvmeConnect = (bool, bool);
pub(crate) static RDMA_CONNECT_CHECK: OnceCell<CheckAndFallbackNvmeConnect> = OnceCell::new();

// Determine if given access mode in conjunction with ro mount flag makes
// sense or not. If access mode is not supported or the combination does
Expand Down
3 changes: 2 additions & 1 deletion nix/pkgs/control-plane/cargo-project.nix
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
, gitVersions
, openapi-generator
, which
, rdma-core
, systemdMinimal
, utillinux
, sourcer
Expand Down Expand Up @@ -71,7 +72,7 @@ let

inherit LIBCLANG_PATH PROTOC PROTOC_INCLUDE;
nativeBuildInputs = [ clang pkg-config openapi-generator which git llvmPackages.bintools ];
buildInputs = [ llvmPackages.libclang protobuf openssl.dev systemdMinimal.dev utillinux.dev ];
buildInputs = [ llvmPackages.libclang protobuf openssl.dev systemdMinimal.dev utillinux.dev rdma-core ];
doCheck = false;
};
release_build = { "release" = true; "debug" = false; };
Expand Down
4 changes: 2 additions & 2 deletions nix/pkgs/images/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# avoid dependency on docker tool chain. Though the maturity of OCI
# builder in nixpkgs is questionable which is why we postpone this step.

{ pkgs, xfsprogs_5_16, busybox, dockerTools, lib, e2fsprogs, btrfs-progs, utillinux, fetchurl, control-plane, tini, sourcer, img_tag ? "", img_org ? "", img_prefix }:
{ pkgs, xfsprogs_5_16, rdma-core, busybox, dockerTools, lib, e2fsprogs, btrfs-progs, utillinux, fetchurl, control-plane, tini, sourcer, img_tag ? "", img_org ? "", img_prefix }:
let
repo-org = if img_org != "" then img_org else "${builtins.readFile (pkgs.runCommand "repo_org" {
buildInputs = with pkgs; [ git ];
Expand Down Expand Up @@ -104,7 +104,7 @@ let
inherit buildType;
name = "node";
config = {
Env = [ "PATH=${lib.makeBinPath [ "/" xfsprogs e2fsprogs_1_46_2 btrfs-progs utillinux ]}" ];
Env = [ "PATH=${lib.makeBinPath [ "/" xfsprogs rdma-core e2fsprogs_1_46_2 btrfs-progs utillinux ]}" ];
};
};
};
Expand Down
1 change: 1 addition & 0 deletions shell.nix
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ mkShell {
pre-commit
python3
utillinux
rdma-core
which
] ++ pkgs.lib.optional (!norust) rust
++ pkgs.lib.optionals (system != "aarch64-darwin") [
Expand Down