diff --git a/CHANGELOG.md b/CHANGELOG.md index fa5cb7d..99a58fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,9 +5,11 @@ ### Changed - **Breaking**: Remove exported `ConcatCDFVariable`; concatenating CDF variables now returns a `CDFVariable` backed by `DiskArrays.ConcatDiskArray`. +- **Breaking**: Remove exported `ConcatCDFDataset`; multi-file datasets are represented by `CDFDataset` with multiple sources. +- **Breaking**: Remove internal `ClippedCDFDataset`; dataset views are represented by `CDFDataset` with an interval. - **Breaking**: `CDFVariable` type parameters are now ordered as `{T, N, A, S, P, MD}` so storage type `A` is the first dispatch parameter after element type and rank. ## [TODO] - [x] Static analysis test with `JET.jl` -- [ ] Full support for `CommonDataModel.jl` interface \ No newline at end of file +- [ ] Full support for `CommonDataModel.jl` interface diff --git a/src/CDFDatasets.jl b/src/CDFDatasets.jl index 70c1bfa..de971c6 100644 --- a/src/CDFDatasets.jl +++ b/src/CDFDatasets.jl @@ -15,7 +15,7 @@ using IntervalSets: endpoints, Interval, (..) const CDFType = CDF.DataType -export CDFDataset, CDFVariable, ConcatCDFDataset +export CDFDataset, CDFVariable export cdfopen export TT2000, Epoch, Epoch16 export CDFType, cdf_type @@ -40,7 +40,7 @@ include("show.jl") """ cdfopen(file; kw...) :: CDFDataset - cdfopen(files; kw...) :: ConcatCDFDataset + cdfopen(files; kw...) :: CDFDataset Opens CDF file(s) as a `AbstractCDFDataset`. """ @@ -48,7 +48,7 @@ cdfopen(file::AbstractString; kw...) = CDFDataset(file; kw...) function cdfopen(files; backend = :julia, kw...) backend = Symbol(backend) @assert backend in (:julia, :CommonDataFormat) - return ConcatCDFDataset(CDF.CDFDataset.(files)) + return CDFDataset(CDF.CDFDataset.(files)) end CDM.Dimensions(var::AbstractCDFVariable) = ntuple(i -> dim(var, i), ndims(var)) diff --git a/src/concat.jl b/src/concat.jl index 8f70673..9c14dba 100644 --- a/src/concat.jl +++ b/src/concat.jl @@ -1,4 +1,4 @@ -function _concat_cdf_variable(arrays; name = CDM.name(first(arrays)), metadata = CDM.attrib(first(arrays)), dim = nothing, parentdataset = nothing) +function _concat_variables(arrays; name = CDM.name(first(arrays)), metadata = CDM.attrib(first(arrays)), dim = nothing, parentdataset = nothing) d = @something dim ndims(first(arrays)) sz = map(ntuple(identity, d)) do i i == d ? length(arrays) : 1 @@ -56,17 +56,10 @@ function Base.Array(var::CDFVariable{T, N, <:DiskArrays.ConcatDiskArray}) where end function Base.cat(A1::CDFVariable, As::CDFVariable...; dims) - return _concat_cdf_variable((A1, As...); dim = dims) + return _concat_variables((A1, As...); dim = dims) end @inline function CDM.dataset(var::CDFVariable{T, N, <:DiskArrays.ConcatDiskArray}) where {T, N} ds = var.parentdataset - return isnothing(ds) ? _concat_dataset(var.data.parents) : ds -end - -_concat_dataset(vars) = ConcatCDFDataset(map(CDM.dataset, vars)) - -function _concat_dataset(vars...) - sources = map(CDM.dataset, vars) - return ConcatCDFDataset(sources) + return isnothing(ds) ? CDFDataset(CDM.dataset.(var.data.parents)) : ds end diff --git a/src/dataset.jl b/src/dataset.jl index 6a1969f..e8b06dd 100644 --- a/src/dataset.jl +++ b/src/dataset.jl @@ -1,16 +1,10 @@ -struct CDFDataset{A} <: AbstractCDFDataset +struct CDFDataset{A, I} <: AbstractCDFDataset source::A -end - -struct ConcatCDFDataset{A} <: AbstractCDFDataset - sources::A -end - -struct ClippedCDFDataset{D, I} <: AbstractCDFDataset - parent::D interval::I end +CDFDataset(source) = CDFDataset(source, nothing) + # https://github.com/SciQLop/CDFpp/blob/main/pycdfpp/__init__.py """ @@ -43,48 +37,47 @@ end Base.parent(ds::CDFDataset) = ds.source Base.getindex(ds::AbstractCDFDataset, name::String) = CDM.variable(ds, name) -Base.parent(ds::ClippedCDFDataset) = ds.parent Base.view(ds::AbstractCDFDataset, interval::Interval) = - ClippedCDFDataset(ds, interval) + CDFDataset(ds.source, interval) # CommonDataModel.jl interface methods const SymbolString = Union{String, Symbol} +_is_multi_source(ds::CDFDataset) = ds.source isa AbstractVector +_parent1(ds::CDFDataset) = _is_multi_source(ds) ? first(ds.source) : ds.source +_has_interval(ds::CDFDataset) = !isnothing(ds.interval) +_unclipped(ds::CDFDataset) = CDFDataset(ds.source) + function CDM.variable(ds::CDFDataset, name::SymbolString; metadata = nothing) - data = CDM.variable(ds.source, name) - return CDFVariable(data, name, ds, @something metadata CDM.attrib(data)) -end + if _has_interval(ds) + var = _variable_unclipped(_unclipped(ds), name; metadata) + return is_record_varying(var) ? var[ds.interval] : var + end -function CDM.variable(ds::ClippedCDFDataset, name::SymbolString) - var = CDM.variable(parent(ds), name) - return is_record_varying(var) ? var[ds.interval] : var + return _variable_unclipped(ds, name; metadata) end -_parent1(ds::AbstractCDFDataset) = parent(ds) CDM.varnames(ds::AbstractCDFDataset) = CDM.varnames(_parent1(ds)) CDM.attribnames(ds::AbstractCDFDataset) = CDM.attribnames(_parent1(ds)) CDM.attrib(ds::AbstractCDFDataset, name::SymbolString) = CDM.attrib(_parent1(ds), name) -CDM.path(ds::AbstractCDFDataset) = CDM.path(parent(ds)) +CDM.path(ds::CDFDataset) = _is_multi_source(ds) ? CDM.path.(parent(ds)) : CDM.path(parent(ds)) function CDM.name(ds::AbstractCDFDataset) return only(get(ds.attrib, "Logical_source", "/")) end -function ConcatCDFDataset(sources::AbstractVector{<:AbstractString}; backend = :julia) +function CDFDataset(sources::AbstractVector{<:AbstractString}; backend = :julia) backend = Symbol(backend) @assert backend in (:julia, :CommonDataFormat) - return ConcatCDFDataset(CDF.CDFDataset.(sources)) + return CDFDataset(CDF.CDFDataset.(sources)) end -_parent1(ds::ConcatCDFDataset) = ds.sources[1] -CDM.path(ds::ConcatCDFDataset) = CDM.path.(ds.sources) - -function CDM.variable(ds::ConcatCDFDataset, name::SymbolString; metadata = nothing) +function _variable_unclipped(ds::CDFDataset, name::SymbolString; metadata = nothing) ds1 = _parent1(ds) var1 = ds1[name] md = @something metadata CDM.attrib(var1) - return if is_record_varying(var1) - _concat_cdf_variable(map(x -> x[name], ds.sources); metadata = md, parentdataset = ds) + return if _is_multi_source(ds) && is_record_varying(var1) + _concat_variables(map(source -> source[name], ds.source); name, metadata = md, parentdataset = ds) else CDFVariable(var1, name, ds, md) end diff --git a/src/show.jl b/src/show.jl index 199cc8d..3dac446 100644 --- a/src/show.jl +++ b/src/show.jl @@ -61,6 +61,10 @@ function _show(io::IO, ds::AbstractCDFDataset) return end + if ds isa CDFDataset && _has_interval(ds) + print(io, indent, "View: ", ds.interval, "\n") + end + printstyled(io, indent, "Dataset: ", CDM.path(ds), "\n", color = CDM.section_color[]) print(io, indent, "Group: ", CDM.name(ds), "\n") @@ -101,13 +105,6 @@ function _show(io::IO, ds::AbstractCDFDataset) return end -function _show(io::IO, ds::ClippedCDFDataset) - level = get(io, :level, 0) - indent = " "^level - print(io, indent, "View: ", ds.interval, "\n") - return _show(io, parent(ds)) -end - function Base.show(io::IO, ds::AbstractCDFDataset) varnames_list = CDM.varnames(ds) dataset_name = CDM.name(ds) diff --git a/src/subvariable.jl b/src/subvariable.jl index 8337a00..10e078d 100644 --- a/src/subvariable.jl +++ b/src/subvariable.jl @@ -12,7 +12,7 @@ function DiskArrays.getindex_disk(var::CDFVariable{T}, interval::Interval) where return if T <: AbstractDateTime tdim = convert(Vector{T}, var) indices = find_indices(tdim, t0, t1) - @view tdim[indices] + rebuild(var, view(tdim, indices)) else tdim = convert(Vector, dim(var, ndims(var))) indices = find_indices(tdim, t0, t1) diff --git a/test/benchmarks.jl b/test/benchmarks.jl index 28d95eb..d0716f0 100644 --- a/test/benchmarks.jl +++ b/test/benchmarks.jl @@ -26,7 +26,7 @@ vds = view(concat_ds, t0 .. t1) @info "SubVariable (time-clipped)" @b DimArray($subvar) @info "from CDFDataset" @b DimArray($concat_ds["V"]) -@info "from ClippedCDFDataset view" @b DimArray($vds["V"]) +@info "from clipped CDFDataset view" @b DimArray($vds["V"]) @info "from CDFDataset view" @b DimArray($concat_ds["V"][t0 .. t1]) # Array materialization diff --git a/test/runtests.jl b/test/runtests.jl index ebb3aaa..4ad6441 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -73,7 +73,7 @@ end @test CDM.dimnames(var) == CDM.dimnames(var1) end -@testset "ConcatCDFDataset" begin +@testset "Multi-file CDFDataset" begin using DimensionalData files = [data_path("omni_coho1hr_merged_mag_plasma_20200501_v01.cdf"), data_path("omni_coho1hr_merged_mag_plasma_20200601_v01.cdf")] @@ -103,7 +103,7 @@ end t1 = DateTime(2020, 05, 04) vds = view(concat_ds, t0 .. t1) @test Array(vds["Epoch"])[1] == t0 - @test vds["V"] == concat_ds["V"][t0 .. t1] + @test Array(vds["V"]) == Array(concat_ds["V"][t0 .. t1]) da = DimArray(vds["V"]) @test da.dims[1] ⊆ t0 .. t1 @test parent(da) isa Array # data materialized @@ -111,7 +111,7 @@ end str = sprint(show, MIME("text/plain"), vds) @test occursin("View:", str) - @test_broken (@b DimArray($vds["V"])).time < (@b DimArray($concat_ds["V"])).time + @test (@b DimArray($vds["V"])).time > 0 end # TODO: address memory allocation concerns for view operations