From db4f985e2314f8caecb030851104d66561ed1e3c Mon Sep 17 00:00:00 2001 From: Tom Caruso Date: Tue, 12 Apr 2022 16:11:02 -0400 Subject: [PATCH 1/2] initial commit: option to interpret null byte as an empty cell --- src/main.rs | 28 ++++++++++++++++++++++------ tests/tests.rs | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 6 deletions(-) diff --git a/src/main.rs b/src/main.rs index defa4be..d09360b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -99,6 +99,13 @@ struct Opt { /// quoting. #[structopt(value_name = "CHAR", long = "quote", default_value = "\"")] quote: CharSpecifier, + + /// Should a null-byte (\x0) be interpreted as an empty cell + /// this option exists because it is not always possible to pass + /// a null byte into --null on the command line. + /// https://github.com/faradayio/scrubcsv/issues/17 + #[structopt(long = "null-byte-as-empty")] + null_byte_as_empty: bool } lazy_static! { @@ -122,16 +129,25 @@ fn run() -> Result<()> { // Remember the time we started. let start_time = now(); - // Build a regex containing our `--null` value. - let null_re = if let Some(null_re_str) = opt.null.as_ref() { - // Always match the full CSV value. - let s = format!("^{}$", null_re_str); - let re = Regex::new(&s).context("can't compile regular expression")?; - Some(re) + let mut pattern= if opt.null_byte_as_empty == true { + Some(String::from(r"\x00")) } else { None }; + if let Some(null_re_str) = opt.null.as_ref() { + pattern = match pattern { + Some(p) => Some(format!(r"{}|^{}$", p, null_re_str)), + None => Some(format!("^{}$", null_re_str)), + } + } + + // Build a regex containing our `--null` value(s). + let null_re = match pattern { + Some(p) => Some(Regex::new(&p).context("can't build regular expression")?), + None => None, + }; + // Fetch our input from either standard input or a file. The only tricky // detail here is that we use a `Box` to represent "some object // implementing `Read`, stored on the heap." This allows us to do runtime diff --git a/tests/tests.rs b/tests/tests.rs index b47dd37..83787a2 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -200,3 +200,45 @@ a,b,c "# ); } + +#[test] +fn test_null_byte_as_empty_cell() { + let testdir = TestDir::new("scrubcsv", "null_byte_as_empty"); + let output = testdir + .cmd() + .arg("--null-byte-as-empty") + .output_with_stdin( + br#"c1,c2,c3 +1,2,3 +\x00,\x00,1 +"#, + ).expect("error running scrubcsv test"); + assert_eq!( + output.stdout_str(), + r#"c1,c2,c3 +1,2,3 +,,1 +"# + ); + +}#[test] +fn test_null_byte_as_empty_cell_with_null_re() { + let testdir = TestDir::new("scrubcsv", "null_byte_as_empty_with_null_re"); + let output = testdir + .cmd() + .arg("--null-byte-as-empty") + .args(&["--null", "NULL"]) + .output_with_stdin( + br#"c1,c2,c3 +1,2,NULL +\x00,\x00,1 +"#, + ).expect("error running scrubcsv test"); + assert_eq!( + output.stdout_str(), + r#"c1,c2,c3 +1,2, +,,1 +"# + ); +} From beef518d40dbba5f90a3918cf9d5e5463258e3d2 Mon Sep 17 00:00:00 2001 From: Tom Caruso Date: Tue, 12 Apr 2022 16:13:25 -0400 Subject: [PATCH 2/2] cargo fmt --- src/main.rs | 4 ++-- tests/tests.rs | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/main.rs b/src/main.rs index d09360b..d74ce90 100644 --- a/src/main.rs +++ b/src/main.rs @@ -105,7 +105,7 @@ struct Opt { /// a null byte into --null on the command line. /// https://github.com/faradayio/scrubcsv/issues/17 #[structopt(long = "null-byte-as-empty")] - null_byte_as_empty: bool + null_byte_as_empty: bool, } lazy_static! { @@ -129,7 +129,7 @@ fn run() -> Result<()> { // Remember the time we started. let start_time = now(); - let mut pattern= if opt.null_byte_as_empty == true { + let mut pattern = if opt.null_byte_as_empty == true { Some(String::from(r"\x00")) } else { None diff --git a/tests/tests.rs b/tests/tests.rs index 83787a2..f86f782 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -212,7 +212,8 @@ fn test_null_byte_as_empty_cell() { 1,2,3 \x00,\x00,1 "#, - ).expect("error running scrubcsv test"); + ) + .expect("error running scrubcsv test"); assert_eq!( output.stdout_str(), r#"c1,c2,c3 @@ -220,8 +221,8 @@ fn test_null_byte_as_empty_cell() { ,,1 "# ); - -}#[test] +} +#[test] fn test_null_byte_as_empty_cell_with_null_re() { let testdir = TestDir::new("scrubcsv", "null_byte_as_empty_with_null_re"); let output = testdir @@ -233,7 +234,8 @@ fn test_null_byte_as_empty_cell_with_null_re() { 1,2,NULL \x00,\x00,1 "#, - ).expect("error running scrubcsv test"); + ) + .expect("error running scrubcsv test"); assert_eq!( output.stdout_str(), r#"c1,c2,c3