Skip to content

Commit

Permalink
Merge pull request #207 from IBM/floops_integration
Browse files Browse the repository at this point in the history
Floops integration
  • Loading branch information
ppalmes authored May 9, 2023
2 parents 26d5cd1 + e3e6d5b commit 0d40d3f
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 22 deletions.
10 changes: 8 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,36 +1,42 @@
name = "TSML"
uuid = "198dc43e-9e51-5cd7-9d40-d9794d335912"
authors = ["Paulito Palmes <[email protected]>"]
version = "2.7.5"
version = "2.7.6"

[deps]
AMLPipelineBase = "e3c3008a-8869-4d53-9f34-c96f99c8a2b6"
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
FLoops = "cc61a311-1640-44b5-9fba-1b764f453329"
GR = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71"
Impute = "f7bf1975-0170-51b9-8c5f-a992d46b9575"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MLDataUtils = "cc2ba9b6-d476-5e6d-8eaf-a92d5412d41d"
MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
Transducers = "28d57a85-8fef-5791-bfe6-a80928e7c999"

[compat]
AMLPipelineBase = "0.1"
ArgParse = "0.5, 0.6, 0.7, 1.0, 1.1"
CSV = "0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.10"
DataFrames = "0.17, 0.18, 0.19, 0.20, 0.21, 0.22, 1.0, 1.1, 1.2"
FLoops = "0.1, 0.2"
GR = "0.72"
Impute = "0.5, 0.6"
MLDataUtils = "0.2, 0.3, 0.4, 0.5"
MultivariateStats = "0.5, 0.6, 0.7, 0.8, 0.9, 0.10"
Plots = "1.38"
StatsBase = "0.29, 0.30, 0.31, 0.32, 0.33"
ProgressMeter = "1.0, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7"
StatsBase = "0.29, 0.30, 0.31, 0.32, 0.33, 0.34"
Transducers = "0.3, 0.4"
julia = "1"

[extras]
Expand Down
4 changes: 3 additions & 1 deletion src/TSML.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ export VoteEnsemble, StackEnsemble, BestLearner
export FeatureSelector, CatFeatureSelector, NumFeatureSelector, CatNumDiscriminator
export crossvalidate
export NARemover
export @pipeline @pipelinex, @pipelinez
export @pipeline
export @pipelinex
export @pipelinez
export +, |>, *, |, >>
export Pipeline, ComboPipeline

Expand Down
56 changes: 41 additions & 15 deletions src/tsclassifier.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ using ..AbsTypes
using ..Utils
import ..AbsTypes: fit, fit!, transform, transform!

using FLoops
using Transducers
using ProgressMeter

export fit, fit!, transform, transform!
export TSClassifier, getstats

Expand Down Expand Up @@ -178,14 +182,20 @@ function getfilestat(ldirname::AbstractString,lfname::AbstractString)
dtype in string.(instances(TSType)) || error(dtype * ", filename does not indicate known data type.")
# create a pipeline to get stat
fname = joinpath(ldirname,lfname)
csvfilter = CSVDateValReader(Dict(:filename=>fname,:dateformat=>"dd/mm/yyyy HH:MM"))
valgator = DateValgator(Dict(:dateinterval=>Dates.Hour(1)))
valnner = DateValNNer(Dict(:dateinterval=>Dates.Hour(1)))
stfier = Statifier(Dict(:processmissing=>false))
mpipeline = @pipeline csvfilter |> valgator |> valnner |> stfier
df = fit_transform!(mpipeline)
df.dtype = repeat([dtype],nrow(df))
df.fname = repeat([lfname],nrow(df))
df = DataFrame()
try
csvfilter = CSVDateValReader(Dict(:filename=>fname,:dateformat=>"dd/mm/yyyy HH:MM"))
valgator = DateValgator(Dict(:dateinterval=>Dates.Hour(1)))
valnner = DateValNNer(Dict(:dateinterval=>Dates.Hour(1)))
stfier = Statifier(Dict(:processmissing=>false))
mpipeline = @pipeline csvfilter |> valgator |> valnner |> stfier
df = fit_transform!(mpipeline)
df.dtype = repeat([dtype],nrow(df))
df.fname = repeat([lfname],nrow(df))
catch errormsg
println("skipping "*fname*": "*string(errormsg))
df = DataFrame()
end
return (df)
end

Expand Down Expand Up @@ -223,6 +233,20 @@ function threadloop(ldirname,mfiles)
return trdata
end


function transducersloop(ldirname,mfiles)
n = length(mfiles)
p = Progress(n, dt=0.01, showspeed=true)
@floop for mfile in mfiles
df=getfilestat(ldirname,mfile)
next!(p; showvalues = [(:fname,mfile)])
@reduce() do (dftable = DataFrame(); df)
dftable = vcat(dftable,df)
end
end
return dftable
end

# loop over the directory and get stats of each file
# return a dataframe containing stat features and ts type for target
function getstats(ldirname::AbstractString)
Expand All @@ -231,13 +255,15 @@ function getstats(ldirname::AbstractString)
mfiles != [] || error("empty csv directory")
#df = serialloop(ldirname,mfiles)
# get julia version and run threads if julia 1.3
jversion = string(Base.VERSION)
df = DataFrame()
if match(r"^1.5",jversion) === nothing
df = serialloop(ldirname,mfiles)
else
df = threadloop(ldirname,mfiles)
end
#jversion = string(Base.VERSION)
#df = DataFrame()
#if match(r"^1.5",jversion) === nothing
# df = serialloop(ldirname,mfiles)
#else
# df = threadloop(ldirname,mfiles)
#end
#println(first(df))
df = transducersloop(ldirname, mfiles)
return df
end

Expand Down
8 changes: 4 additions & 4 deletions src/valdatefilters.jl
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,7 @@ end
:missdirection => :symmetric, #:reverse, # or :forward or :symmetric
:dateinterval => Dates.Hour(1),
:nnsize => 1,
:strict => true,
:strict => false,
:aggregator => :median
)
)
Expand Down Expand Up @@ -515,7 +515,7 @@ mutable struct DateValNNer <: Transformer
:missdirection => :symmetric, #:reverse, # or :forward or :symmetric
:dateinterval => Dates.Hour(1),
:nnsize => 1,
:strict => true,
:strict => false,
:aggregator => :median
)
cargs=nested_dict_merge(default_args,args)
Expand Down Expand Up @@ -860,7 +860,7 @@ end
:missdirection => :symmetric, #:reverse, # or :forward or :symmetric
:dateinterval => Dates.Hour(1),
:nnsize => 1,
:strict => true,
:strict => false,
:aggregator => :median
)
)
Expand Down Expand Up @@ -897,7 +897,7 @@ Example:
:dateinterval=>Dates.Hour(1),
:nnsize=>10,
:missdirection => :symmetric,
:strict=>true,
:strict=>false,
:aggregator => :mean))
fit!(dnnr,X)
transform!(dnnr,X)
Expand Down

2 comments on commit 0d40d3f

@ppalmes
Copy link
Collaborator Author

@ppalmes ppalmes commented on 0d40d3f May 9, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register()

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/83193

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v2.7.6 -m "<description of version>" 0d40d3f36a3d0eb0757091acb4cc2b13b4e377a3
git push origin v2.7.6

Please sign in to comment.