Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement $struct$unnest() #1010

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@
- New methods `$select_seq()` and `$with_columns_seq()` for `DataFrame` and
`LazyFrame` (#1003).
- New method `$clear()` for `DataFrame`, `LazyFrame`, and `Series` (#1004).
- New method `$struct$unnest()` for `Series` (#1010).

### Bug fixes

Expand Down
2 changes: 2 additions & 0 deletions R/extendr-wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -1324,6 +1324,8 @@ RPolarsSeries$set_sorted_mut <- function(descending) invisible(.Call(wrap__RPola

RPolarsSeries$struct_fields <- function() .Call(wrap__RPolarsSeries__struct_fields, self)

RPolarsSeries$struct_unnest <- function() .Call(wrap__RPolarsSeries__struct_unnest, self)

RPolarsSeries$from_arrow_array_stream_str <- function(name, robj_str) .Call(wrap__RPolarsSeries__from_arrow_array_stream_str, name, robj_str)

RPolarsSeries$from_arrow_array_robj <- function(name, array) .Call(wrap__RPolarsSeries__from_arrow_array_robj, name, array)
Expand Down
16 changes: 13 additions & 3 deletions R/series__series.R
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,19 @@ Series_struct = method_as_active_binding(
self, expr_struct_make_sub_ns,
fields = method_as_active_binding(function() {
unwrap(.pr$Series$struct_fields(pl_series), "in $struct$fields:")
})
}),
#' Convert this struct Series to a DataFrame with a separate column for
#' each field
#'
#' @name Series_struct_unnest
#' @return A DataFrame
#' @examples
#' s = pl$Series(values = c(1, 2), dtype = pl$Struct(foo = pl$Float64))
#' s$struct$unnest()
unnest = function() {
.pr$Series$struct_unnest(pl_series) |>
unwrap("in $struct$unnest():")
}
)
}
)
Expand Down Expand Up @@ -1101,8 +1113,6 @@ Series_item = function(index = NULL) {
#'
#' s$clear(n = 5)
Series_clear = function(n = 0) {
# TODO: check whether n < 0 should be removed when resolved upstream
# https://github.com/pola-rs/polars/issues/15421
if (length(n) > 1 || !is.numeric(n) || n < 0) {
Err_plain("`n` must be an integer greater or equal to 0.") |>
unwrap("in $clear():")
Expand Down
21 changes: 21 additions & 0 deletions man/Series_struct_unnest.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions src/rust/src/series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,12 @@ impl RPolarsSeries {
Ok(ca.fields().iter().map(|s| s.name()).collect())
}

fn struct_unnest(&self) -> RResult<RPolarsDataFrame> {
let ca = self.0.struct_().map_err(polars_to_rpolars_err)?;
let out: pl::DataFrame = ca.clone().into();
Ok(out.into())
}

pub fn from_arrow_array_stream_str(name: Robj, robj_str: Robj) -> RResult<Robj> {
let name = robj_to!(str, name)?;
let s = crate::arrow_interop::to_rust::arrow_stream_to_series_internal(robj_str)?
Expand Down
9 changes: 5 additions & 4 deletions tests/testthat/_snaps/after-wrappers.md
Original file line number Diff line number Diff line change
Expand Up @@ -717,10 +717,11 @@
[37] "rep" "set_sorted_mut"
[39] "shape" "sleep"
[41] "sort_mut" "std"
[43] "struct_fields" "sub"
[45] "sum" "to_fmt_char"
[47] "to_frame" "to_r"
[49] "value_counts" "var"
[43] "struct_fields" "struct_unnest"
[45] "sub" "sum"
[47] "to_fmt_char" "to_frame"
[49] "to_r" "value_counts"
[51] "var"

# public and private methods of each class RThreadHandle

Expand Down
22 changes: 22 additions & 0 deletions tests/testthat/test-series.R
Original file line number Diff line number Diff line change
Expand Up @@ -679,3 +679,25 @@ test_that("$clear() works", {
"greater or equal to 0"
)
})

test_that("$struct$unnest() works", {
s = pl$Series(values = c(1, 2), dtype = pl$Struct(foo = pl$Float64))
expect_identical(
s$struct$unnest()$to_list(),
list(foo = c(1, 2))
)

# empty Series
s = pl$Series(dtype = pl$Struct(foo = pl$Float64))
expect_identical(
s$struct$unnest()$to_list(),
list(foo = numeric(0))
)

# Series "name" param is not used, only pl$Struct() name is used
s = pl$Series(name = "bar", dtype = pl$Struct(foo = pl$Float64))
expect_identical(
s$struct$unnest()$to_list(),
list(foo = numeric(0))
)
})
Loading