Mutating joins add columns from y
to x
, matching observations based on
the keys.
Usage
# S3 method for class 'RPolarsDataFrame'
left_join(
x,
y,
by = NULL,
copy = FALSE,
suffix = c(".x", ".y"),
...,
keep = NULL,
na_matches = "na",
relationship = NULL
)
# S3 method for class 'RPolarsDataFrame'
right_join(
x,
y,
by = NULL,
copy = FALSE,
suffix = c(".x", ".y"),
...,
keep = NULL,
na_matches = "na",
relationship = NULL
)
# S3 method for class 'RPolarsDataFrame'
full_join(
x,
y,
by = NULL,
copy = FALSE,
suffix = c(".x", ".y"),
...,
keep = NULL,
na_matches = "na",
relationship = NULL
)
# S3 method for class 'RPolarsDataFrame'
inner_join(
x,
y,
by = NULL,
copy = FALSE,
suffix = c(".x", ".y"),
...,
keep = NULL,
na_matches = "na",
relationship = NULL
)
# S3 method for class 'RPolarsLazyFrame'
left_join(
x,
y,
by = NULL,
copy = FALSE,
suffix = c(".x", ".y"),
...,
keep = NULL,
na_matches = "na",
relationship = NULL
)
# S3 method for class 'RPolarsLazyFrame'
right_join(
x,
y,
by = NULL,
copy = FALSE,
suffix = c(".x", ".y"),
...,
keep = NULL,
na_matches = "na",
relationship = NULL
)
# S3 method for class 'RPolarsLazyFrame'
full_join(
x,
y,
by = NULL,
copy = FALSE,
suffix = c(".x", ".y"),
...,
keep = NULL,
na_matches = "na",
relationship = NULL
)
# S3 method for class 'RPolarsLazyFrame'
inner_join(
x,
y,
by = NULL,
copy = FALSE,
suffix = c(".x", ".y"),
...,
keep = NULL,
na_matches = "na",
relationship = NULL
)
Arguments
- x, y
Two Polars Data/LazyFrames
- by
Variables to join by. If
NULL
(default),*_join()
will perform a natural join, using all variables in common acrossx
andy
. A message lists the variables so that you can check they're correct; suppress the message by supplyingby
explicitly.by
can take a character vector, likec("x", "y")
ifx
andy
are in both datasets. To join on variables that don't have the same name, use equalities in the character vector, likec("x1" = "x2", "y")
. If you use a character vector, the join can only be done using strict equality.by
can also be a specification created bydplyr::join_by()
. Contrary to the input as character vector shown above,join_by()
uses unquoted column names, e.gjoin_by(x1 == x2, y)
.Finally,
inner_join()
also supports inequality joins, e.g.join_by(x1 >= x2)
, and the helpersbetween()
,overlaps()
, andwithin()
. See the documentation ofdplyr::join_by()
for more information. Other join types will likely support inequality joins in the future.- copy, keep
Not supported.
- suffix
If there are non-joined duplicate variables in
x
andy
, these suffixes will be added to the output to disambiguate them. Should be a character vector of length 2.- ...
Dots which should be empty.
- na_matches
Should two
NA
values match?"na"
, the default, treats twoNA
values as equal."never"
treats twoNA
values as different and will never match them together or to any other values.
Note that when joining Polars Data/LazyFrames,
NaN
are always considered equal, no matter the value ofna_matches
. This differs from the originaldplyr
implementation.- relationship
Handling of the expected relationship between the keys of
x
andy
. Must be one of the following:NULL
, the default, is equivalent to"many-to-many"
. It doesn't expect any relationship betweenx
andy
."one-to-one"
expects each row inx
to match at most 1 row iny
and each row iny
to match at most 1 row inx
."one-to-many"
expects each row iny
to match at most 1 row inx
."many-to-one"
expects each row inx
matches at most 1 row iny
.
Unknown arguments
Arguments that are supported by the original implementation in the tidyverse
but are not listed above will throw a warning by default if they are
specified. To change this behavior to error instead, use
options(tidypolars_unknown_args = "error")
.
Examples
test <- polars::pl$DataFrame(
x = c(1, 2, 3),
y1 = c(1, 2, 3),
z = c(1, 2, 3)
)
test2 <- polars::pl$DataFrame(
x = c(1, 2, 4),
y2 = c(1, 2, 4),
z2 = c(4, 5, 7)
)
test
#> shape: (3, 3)
#> ┌─────┬─────┬─────┐
#> │ x ┆ y1 ┆ z │
#> │ --- ┆ --- ┆ --- │
#> │ f64 ┆ f64 ┆ f64 │
#> ╞═════╪═════╪═════╡
#> │ 1.0 ┆ 1.0 ┆ 1.0 │
#> │ 2.0 ┆ 2.0 ┆ 2.0 │
#> │ 3.0 ┆ 3.0 ┆ 3.0 │
#> └─────┴─────┴─────┘
test2
#> shape: (3, 3)
#> ┌─────┬─────┬─────┐
#> │ x ┆ y2 ┆ z2 │
#> │ --- ┆ --- ┆ --- │
#> │ f64 ┆ f64 ┆ f64 │
#> ╞═════╪═════╪═════╡
#> │ 1.0 ┆ 1.0 ┆ 4.0 │
#> │ 2.0 ┆ 2.0 ┆ 5.0 │
#> │ 4.0 ┆ 4.0 ┆ 7.0 │
#> └─────┴─────┴─────┘
# default is to use common columns, here "x" only
left_join(test, test2)
#> Joining by `x`
#> shape: (3, 5)
#> ┌─────┬─────┬─────┬──────┬──────┐
#> │ x ┆ y1 ┆ z ┆ y2 ┆ z2 │
#> │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
#> │ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │
#> ╞═════╪═════╪═════╪══════╪══════╡
#> │ 1.0 ┆ 1.0 ┆ 1.0 ┆ 1.0 ┆ 4.0 │
#> │ 2.0 ┆ 2.0 ┆ 2.0 ┆ 2.0 ┆ 5.0 │
#> │ 3.0 ┆ 3.0 ┆ 3.0 ┆ null ┆ null │
#> └─────┴─────┴─────┴──────┴──────┘
# we can specify the columns on which to join with join_by()...
left_join(test, test2, by = join_by(x, y1 == y2))
#> shape: (3, 4)
#> ┌─────┬─────┬─────┬──────┐
#> │ x ┆ y1 ┆ z ┆ z2 │
#> │ --- ┆ --- ┆ --- ┆ --- │
#> │ f64 ┆ f64 ┆ f64 ┆ f64 │
#> ╞═════╪═════╪═════╪══════╡
#> │ 1.0 ┆ 1.0 ┆ 1.0 ┆ 4.0 │
#> │ 2.0 ┆ 2.0 ┆ 2.0 ┆ 5.0 │
#> │ 3.0 ┆ 3.0 ┆ 3.0 ┆ null │
#> └─────┴─────┴─────┴──────┘
# ... or with a character vector
left_join(test, test2, by = c("x", "y1" = "y2"))
#> shape: (3, 4)
#> ┌─────┬─────┬─────┬──────┐
#> │ x ┆ y1 ┆ z ┆ z2 │
#> │ --- ┆ --- ┆ --- ┆ --- │
#> │ f64 ┆ f64 ┆ f64 ┆ f64 │
#> ╞═════╪═════╪═════╪══════╡
#> │ 1.0 ┆ 1.0 ┆ 1.0 ┆ 4.0 │
#> │ 2.0 ┆ 2.0 ┆ 2.0 ┆ 5.0 │
#> │ 3.0 ┆ 3.0 ┆ 3.0 ┆ null │
#> └─────┴─────┴─────┴──────┘
# we can customize the suffix of common column names not used to join
test2 <- polars::pl$DataFrame(
x = c(1, 2, 4),
y1 = c(1, 2, 4),
z = c(4, 5, 7)
)
left_join(test, test2, by = "x", suffix = c("_left", "_right"))
#> shape: (3, 5)
#> ┌─────┬─────────┬────────┬──────────┬─────────┐
#> │ x ┆ y1_left ┆ z_left ┆ y1_right ┆ z_right │
#> │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
#> │ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │
#> ╞═════╪═════════╪════════╪══════════╪═════════╡
#> │ 1.0 ┆ 1.0 ┆ 1.0 ┆ 1.0 ┆ 4.0 │
#> │ 2.0 ┆ 2.0 ┆ 2.0 ┆ 2.0 ┆ 5.0 │
#> │ 3.0 ┆ 3.0 ┆ 3.0 ┆ null ┆ null │
#> └─────┴─────────┴────────┴──────────┴─────────┘
# the argument "relationship" ensures the join matches the expectation
country <- polars::pl$DataFrame(
iso = c("FRA", "DEU"),
value = 1:2
)
country
#> shape: (2, 2)
#> ┌─────┬───────┐
#> │ iso ┆ value │
#> │ --- ┆ --- │
#> │ str ┆ i32 │
#> ╞═════╪═══════╡
#> │ FRA ┆ 1 │
#> │ DEU ┆ 2 │
#> └─────┴───────┘
country_year <- polars::pl$DataFrame(
iso = rep(c("FRA", "DEU"), each = 2),
year = rep(2019:2020, 2),
value2 = 3:6
)
country_year
#> shape: (4, 3)
#> ┌─────┬──────┬────────┐
#> │ iso ┆ year ┆ value2 │
#> │ --- ┆ --- ┆ --- │
#> │ str ┆ i32 ┆ i32 │
#> ╞═════╪══════╪════════╡
#> │ FRA ┆ 2019 ┆ 3 │
#> │ FRA ┆ 2020 ┆ 4 │
#> │ DEU ┆ 2019 ┆ 5 │
#> │ DEU ┆ 2020 ┆ 6 │
#> └─────┴──────┴────────┘
# We expect that each row in "x" matches only one row in "y" but, it's not
# true as each row of "x" matches two rows of "y"
tryCatch(
left_join(country, country_year, join_by(iso), relationship = "one-to-one"),
error = function(e) e
)
#> <RPolarsErr_error: Execution halted with the following contexts
#> 0: In R: in $collect():
#> 0: During function call [pkgdown::build_site_github_pages(new_process = FALSE, install = TRUE)]
#> 1: Encountered the following error in Rust-Polars:
#> join keys did not fulfill 1:1 validation
#> >
# A correct expectation would be "one-to-many":
left_join(country, country_year, join_by(iso), relationship = "one-to-many")
#> shape: (4, 4)
#> ┌─────┬───────┬──────┬────────┐
#> │ iso ┆ value ┆ year ┆ value2 │
#> │ --- ┆ --- ┆ --- ┆ --- │
#> │ str ┆ i32 ┆ i32 ┆ i32 │
#> ╞═════╪═══════╪══════╪════════╡
#> │ FRA ┆ 1 ┆ 2019 ┆ 3 │
#> │ FRA ┆ 1 ┆ 2020 ┆ 4 │
#> │ DEU ┆ 2 ┆ 2019 ┆ 5 │
#> │ DEU ┆ 2 ┆ 2020 ┆ 6 │
#> └─────┴───────┴──────┴────────┘