Skip to content

Commit

Permalink
Merge pull request #120 from codetalker7/multiple-joins
Browse files Browse the repository at this point in the history
Adding function to `join` multiple `TSFrame`s.
  • Loading branch information
chiraganand authored Dec 5, 2022
2 parents 55b9aac + 7be0db9 commit c2078d0
Show file tree
Hide file tree
Showing 2 changed files with 227 additions and 70 deletions.
166 changes: 108 additions & 58 deletions src/join.jl
Original file line number Diff line number Diff line change
@@ -1,15 +1,11 @@
struct JoinInner # inner
end
struct JoinBoth # inner
end
struct JoinAll # outer
end
struct JoinOuter # outer
end
struct JoinLeft # left
end
struct JoinRight # right
end
joinmap = Dict(
:JoinInner=>DataFrames.innerjoin,
:JoinBoth=>DataFrames.innerjoin,
:JoinOuter=>DataFrames.outerjoin,
:JoinAll=>DataFrames.outerjoin,
:JoinLeft=>DataFrames.leftjoin,
:JoinRight=>DataFrames.rightjoin
)

"""
# Joins/Column-binding
Expand All @@ -24,26 +20,26 @@ column names amongst the TSFrame objects.
The following join types are supported:
`join(ts1::TSFrame, ts2::TSFrame, ::Type{JoinInner})` and
`join(ts1::TSFrame, ts2::TSFrame, ::Type{JoinBoth})`
`join(ts1::TSFrame, ts2::TSFrame; jointype=:JoinInner)` and
`join(ts1::TSFrame, ts2::TSFrame; jointype=:JoinBoth)`
a.k.a. inner join, takes the intersection of the indexes of `ts1` and
`ts2`, and then merges the columns of both the objects. The resulting
object will only contain rows which are present in both the objects'
indexes. The function will rename columns in the final object if
they had same names in the TSFrame objects.
`join(ts1::TSFrame, ts2::TSFrame, ::Type{JoinOuter})` and
`join(ts1::TSFrame, ts2::TSFrame, ::Type{JoinAll})`:
`join(ts1::TSFrame, ts2::TSFrame; jointype=:JoinOuter)` and
`join(ts1::TSFrame, ts2::TSFrame; jointype=:JoinAll)`:
a.k.a. outer join, takes the union of the indexes of `ts1` and `ts2`
before merging the other columns of input objects. The output will
contain rows which are present in all the input objects while
inserting `missing` values where a row was not present in any of the
objects. This is the default behaviour if no `JoinType` object is
objects. This is the default behaviour if no `jointype` is
provided.
`join(ts1::TSFrame, ts2::TSFrame, ::Type{JoinLeft})`:
`join(ts1::TSFrame, ts2::TSFrame; jointype=:JoinLeft)`:
Left join takes the index values which are present in the left
object `ts1` and finds matching index values in the right object
Expand All @@ -53,15 +49,21 @@ associated with matching index rows on the right. The operation
inserts `missing` values where in the unmatched rows of the right
object.
`join(ts1::TSFrame, ts2::TSFrame, ::Type{JoinRight})`
`join(ts1::TSFrame, ts2::TSFrame; jointype=:JoinRight)`
Right join, similar to left join but works in the opposite
direction. The final object contains all the rows from the right
object while inserting `missing` values in rows missing from the left
object.
The default behaviour is to assume `JoinAll` if no `JoinType` object
is provided to the `join` method.
The default behaviour is to assume `jointype=:JoinAll` if no `jointype` is provided to the `join` method.
Joining multiple `TSFrame`s is also supported. The syntax is
`join(ts1::TSFrame, ts2::TSFrame, ts...; jointype::Symbol)`
where `jointype` must be one of `:JoinInner`, `:JoinBoth`, `:JoinOuter`, `:JoinAll`,
`:JoinLeft` or `:JoinRight`. Note that `join` on multiple `TSFrame`s is left associative.
`cbind` is an alias for `join` method.
Expand All @@ -73,7 +75,7 @@ julia> random(x) = rand(MersenneTwister(123), x);
julia> dates = collect(Date(2017,1,1):Day(1):Date(2017,1,10));
julia> ts1 = TSFrame(random(length(dates)), dates)
julia> ts1 = TSFrame(random(length(dates)), dates);
julia> show(ts1)
(10 x 1) TSFrame with Dates.Date Index
Expand All @@ -95,8 +97,7 @@ julia> dates = collect(Date(2017,1,1):Day(1):Date(2017,1,30));
julia> ts2 = TSFrame(random(length(dates)), dates);
julia> show(ts2)
(30 x 1) TSFrame with Dates.Date Index
30×1 TSFrame with Date Index
Index x1
Date Float64
───────────────────────
Expand All @@ -108,7 +109,20 @@ julia> show(ts2)
2017-01-06 0.662555
2017-01-07 0.586022
2017-01-08 0.0521332
⋮ ⋮
2017-01-09 0.26864
2017-01-10 0.108871
2017-01-11 0.163666
2017-01-12 0.473017
2017-01-13 0.865412
2017-01-14 0.617492
2017-01-15 0.285698
2017-01-16 0.463847
2017-01-17 0.275819
2017-01-18 0.446568
2017-01-19 0.582318
2017-01-20 0.255981
2017-01-21 0.70586
2017-01-22 0.291978
2017-01-23 0.281066
2017-01-24 0.792931
2017-01-25 0.20923
Expand All @@ -117,14 +131,11 @@ julia> show(ts2)
2017-01-28 0.802665
2017-01-29 0.555668
2017-01-30 0.940782
14 rows omitted
# join on all index values
# equivalent to `join(ts1, ts2, JoinAll)` call
# equivalent to `join(ts1, ts2; jointype=:JoinAll)` call
julia> join(ts1, ts2)
(30 x 2) TSFrame with Date Index
Index x1 x1_1
Date Float64? Float64?
────────────────────────────────────────
Expand Down Expand Up @@ -154,9 +165,8 @@ julia> join(ts1, ts2)
julia> cbind(ts1, ts2);
# join only the common index values
julia> join(ts1, ts2, JoinBoth)
julia> join(ts1, ts2; jointype=:JoinBoth)
(10 x 2) TSFrame with Date Index
Index x1 x1_1
Date Float64 Float64
──────────────────────────────────
Expand All @@ -171,11 +181,9 @@ julia> join(ts1, ts2, JoinBoth)
2017-01-09 0.26864 0.26864
2017-01-10 0.108871 0.108871
# keep index values of `ts1`
julia> join(ts1, ts2, JoinLeft)
julia> join(ts1, ts2; jointype=:JoinLeft)
(10 x 2) TSFrame with Date Index
Index x1 x1_1
Date Float64 Float64?
──────────────────────────────────
Expand All @@ -190,11 +198,9 @@ julia> join(ts1, ts2, JoinLeft)
2017-01-09 0.26864 0.26864
2017-01-10 0.108871 0.108871
# keep index values of `ts2`
julia> join(ts1, ts2, JoinRight)
julia> join(ts1, ts2; jointype=:JoinRight)
(30 x 2) TSFrame with Date Index
Index x1 x1_1
Date Float64? Float64
────────────────────────────────────────
Expand All @@ -220,32 +226,76 @@ julia> join(ts1, ts2, JoinRight)
2017-01-30 missing 0.940782
11 rows omitted
```
"""
function Base.join(ts1::TSFrame, ts2::TSFrame)
join(ts1, ts2, JoinAll)
end
julia> dates = collect(Date(2017,1,1):Day(1):Date(2017,1,30));
function Base.join(ts1::TSFrame, ts2::TSFrame, ::Type{JoinBoth})
result = DataFrames.innerjoin(ts1.coredata, ts2.coredata, on=:Index, makeunique=true)
return TSFrame(result)
end
Base.join(ts1::TSFrame, ts2::TSFrame, ::Type{JoinInner}) = Base.join(ts1, ts2, JoinBoth)
julia> ts3 = TSFrame(random(length(dates)), dates);
julia> show(ts3)
30×1 TSFrame with Date Index
Index x1
Date Float64
───────────────────────
2017-01-01 0.768448
2017-01-02 0.940515
2017-01-03 0.673959
2017-01-04 0.395453
2017-01-05 0.313244
2017-01-06 0.662555
2017-01-07 0.586022
2017-01-08 0.0521332
2017-01-09 0.26864
2017-01-10 0.108871
2017-01-11 0.163666
2017-01-12 0.473017
2017-01-13 0.865412
2017-01-14 0.617492
2017-01-15 0.285698
2017-01-16 0.463847
2017-01-17 0.275819
2017-01-18 0.446568
2017-01-19 0.582318
2017-01-20 0.255981
2017-01-21 0.70586
2017-01-22 0.291978
2017-01-23 0.281066
2017-01-24 0.792931
2017-01-25 0.20923
2017-01-26 0.918165
2017-01-27 0.614255
2017-01-28 0.802665
2017-01-29 0.555668
2017-01-30 0.940782
function Base.join(ts1::TSFrame, ts2::TSFrame, ::Type{JoinAll})
result = DataFrames.outerjoin(ts1.coredata, ts2.coredata, on=:Index, makeunique=true)
return TSFrame(result)
end
Base.join(ts1::TSFrame, ts2::TSFrame, ::Type{JoinOuter}) = Base.join(ts1, ts2, JoinAll)
# joining multiple TSFrame objects
julia> join(ts1, ts2, ts3; jointype=:JoinLeft)
10×3 TSFrame with Date Index
Index x1 x1_1 x1_2
Date Float64 Float64? Float64?
─────────────────────────────────────────────
2017-01-01 0.768448 0.768448 0.768448
2017-01-02 0.940515 0.940515 0.940515
2017-01-03 0.673959 0.673959 0.673959
2017-01-04 0.395453 0.395453 0.395453
2017-01-05 0.313244 0.313244 0.313244
2017-01-06 0.662555 0.662555 0.662555
2017-01-07 0.586022 0.586022 0.586022
2017-01-08 0.0521332 0.0521332 0.0521332
2017-01-09 0.26864 0.26864 0.26864
2017-01-10 0.108871 0.108871 0.108871
function Base.join(ts1::TSFrame, ts2::TSFrame, ::Type{JoinLeft})
result = DataFrames.leftjoin(ts1.coredata, ts2.coredata, on=:Index, makeunique=true)
```
"""
function Base.join(
ts1::TSFrame,
ts2::TSFrame,
ts...;
jointype::Symbol=:JoinAll
)
result = joinmap[jointype](ts1.coredata, ts2.coredata, on=:Index, makeunique=true)
for tsf in ts
result = joinmap[jointype](result, tsf.coredata, on=:Index, makeunique=true)
end
return TSFrame(result)
end

function Base.join(ts1::TSFrame, ts2::TSFrame, ::Type{JoinRight})
result = DataFrames.rightjoin(ts1.coredata, ts2.coredata, on=:Index, makeunique=true)
return TSFrame(result)
end
# alias
cbind = join
Loading

0 comments on commit c2078d0

Please sign in to comment.