diff --git a/Project.toml b/Project.toml index 559d028..d271a31 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "TSML" uuid = "198dc43e-9e51-5cd7-9d40-d9794d335912" authors = ["Paulito Palmes "] -version = "2.7.5" +version = "2.7.6" [deps] AMLPipelineBase = "e3c3008a-8869-4d53-9f34-c96f99c8a2b6" @@ -9,28 +9,34 @@ ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" +FLoops = "cc61a311-1640-44b5-9fba-1b764f453329" GR = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71" Impute = "f7bf1975-0170-51b9-8c5f-a992d46b9575" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MLDataUtils = "cc2ba9b6-d476-5e6d-8eaf-a92d5412d41d" MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +Transducers = "28d57a85-8fef-5791-bfe6-a80928e7c999" [compat] AMLPipelineBase = "0.1" ArgParse = "0.5, 0.6, 0.7, 1.0, 1.1" CSV = "0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.10" DataFrames = "0.17, 0.18, 0.19, 0.20, 0.21, 0.22, 1.0, 1.1, 1.2" +FLoops = "0.1, 0.2" GR = "0.72" Impute = "0.5, 0.6" MLDataUtils = "0.2, 0.3, 0.4, 0.5" MultivariateStats = "0.5, 0.6, 0.7, 0.8, 0.9, 0.10" Plots = "1.38" -StatsBase = "0.29, 0.30, 0.31, 0.32, 0.33" +ProgressMeter = "1.0, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7" +StatsBase = "0.29, 0.30, 0.31, 0.32, 0.33, 0.34" +Transducers = "0.3, 0.4" julia = "1" [extras] diff --git a/src/TSML.jl b/src/TSML.jl index 843566c..67c8151 100644 --- a/src/TSML.jl +++ b/src/TSML.jl @@ -24,7 +24,9 @@ export VoteEnsemble, StackEnsemble, BestLearner export FeatureSelector, CatFeatureSelector, NumFeatureSelector, CatNumDiscriminator export crossvalidate export NARemover -export @pipeline @pipelinex, @pipelinez +export @pipeline +export @pipelinex +export @pipelinez export +, |>, *, |, >> export Pipeline, ComboPipeline diff --git a/src/tsclassifier.jl b/src/tsclassifier.jl index 21a32ba..7af03c4 100644 --- a/src/tsclassifier.jl +++ b/src/tsclassifier.jl @@ -14,6 +14,10 @@ using ..AbsTypes using ..Utils import ..AbsTypes: fit, fit!, transform, transform! +using FLoops +using Transducers +using ProgressMeter + export fit, fit!, transform, transform! export TSClassifier, getstats @@ -178,14 +182,20 @@ function getfilestat(ldirname::AbstractString,lfname::AbstractString) dtype in string.(instances(TSType)) || error(dtype * ", filename does not indicate known data type.") # create a pipeline to get stat fname = joinpath(ldirname,lfname) - csvfilter = CSVDateValReader(Dict(:filename=>fname,:dateformat=>"dd/mm/yyyy HH:MM")) - valgator = DateValgator(Dict(:dateinterval=>Dates.Hour(1))) - valnner = DateValNNer(Dict(:dateinterval=>Dates.Hour(1))) - stfier = Statifier(Dict(:processmissing=>false)) - mpipeline = @pipeline csvfilter |> valgator |> valnner |> stfier - df = fit_transform!(mpipeline) - df.dtype = repeat([dtype],nrow(df)) - df.fname = repeat([lfname],nrow(df)) + df = DataFrame() + try + csvfilter = CSVDateValReader(Dict(:filename=>fname,:dateformat=>"dd/mm/yyyy HH:MM")) + valgator = DateValgator(Dict(:dateinterval=>Dates.Hour(1))) + valnner = DateValNNer(Dict(:dateinterval=>Dates.Hour(1))) + stfier = Statifier(Dict(:processmissing=>false)) + mpipeline = @pipeline csvfilter |> valgator |> valnner |> stfier + df = fit_transform!(mpipeline) + df.dtype = repeat([dtype],nrow(df)) + df.fname = repeat([lfname],nrow(df)) + catch errormsg + println("skipping "*fname*": "*string(errormsg)) + df = DataFrame() + end return (df) end @@ -223,6 +233,20 @@ function threadloop(ldirname,mfiles) return trdata end + +function transducersloop(ldirname,mfiles) + n = length(mfiles) + p = Progress(n, dt=0.01, showspeed=true) + @floop for mfile in mfiles + df=getfilestat(ldirname,mfile) + next!(p; showvalues = [(:fname,mfile)]) + @reduce() do (dftable = DataFrame(); df) + dftable = vcat(dftable,df) + end + end + return dftable +end + # loop over the directory and get stats of each file # return a dataframe containing stat features and ts type for target function getstats(ldirname::AbstractString) @@ -231,13 +255,15 @@ function getstats(ldirname::AbstractString) mfiles != [] || error("empty csv directory") #df = serialloop(ldirname,mfiles) # get julia version and run threads if julia 1.3 - jversion = string(Base.VERSION) - df = DataFrame() - if match(r"^1.5",jversion) === nothing - df = serialloop(ldirname,mfiles) - else - df = threadloop(ldirname,mfiles) - end + #jversion = string(Base.VERSION) + #df = DataFrame() + #if match(r"^1.5",jversion) === nothing + # df = serialloop(ldirname,mfiles) + #else + # df = threadloop(ldirname,mfiles) + #end + #println(first(df)) + df = transducersloop(ldirname, mfiles) return df end diff --git a/src/valdatefilters.jl b/src/valdatefilters.jl index f431e11..fb7f7f2 100644 --- a/src/valdatefilters.jl +++ b/src/valdatefilters.jl @@ -470,7 +470,7 @@ end :missdirection => :symmetric, #:reverse, # or :forward or :symmetric :dateinterval => Dates.Hour(1), :nnsize => 1, - :strict => true, + :strict => false, :aggregator => :median ) ) @@ -515,7 +515,7 @@ mutable struct DateValNNer <: Transformer :missdirection => :symmetric, #:reverse, # or :forward or :symmetric :dateinterval => Dates.Hour(1), :nnsize => 1, - :strict => true, + :strict => false, :aggregator => :median ) cargs=nested_dict_merge(default_args,args) @@ -860,7 +860,7 @@ end :missdirection => :symmetric, #:reverse, # or :forward or :symmetric :dateinterval => Dates.Hour(1), :nnsize => 1, - :strict => true, + :strict => false, :aggregator => :median ) ) @@ -897,7 +897,7 @@ Example: :dateinterval=>Dates.Hour(1), :nnsize=>10, :missdirection => :symmetric, - :strict=>true, + :strict=>false, :aggregator => :mean)) fit!(dnnr,X) transform!(dnnr,X)