Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
### Changed

- **Breaking**: Remove exported `ConcatCDFVariable`; concatenating CDF variables now returns a `CDFVariable` backed by `DiskArrays.ConcatDiskArray`.
- **Breaking**: Remove exported `ConcatCDFDataset`; multi-file datasets are represented by `CDFDataset` with multiple sources.
- **Breaking**: Remove internal `ClippedCDFDataset`; dataset views are represented by `CDFDataset` with an interval.
- **Breaking**: `CDFVariable` type parameters are now ordered as `{T, N, A, S, P, MD}` so storage type `A` is the first dispatch parameter after element type and rank.

## [TODO]

- [x] Static analysis test with `JET.jl`
- [ ] Full support for `CommonDataModel.jl` interface
- [ ] Full support for `CommonDataModel.jl` interface
6 changes: 3 additions & 3 deletions src/CDFDatasets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ using IntervalSets: endpoints, Interval, (..)

const CDFType = CDF.DataType

export CDFDataset, CDFVariable, ConcatCDFDataset
export CDFDataset, CDFVariable
export cdfopen
export TT2000, Epoch, Epoch16
export CDFType, cdf_type
Expand All @@ -40,15 +40,15 @@ include("show.jl")

"""
cdfopen(file; kw...) :: CDFDataset
cdfopen(files; kw...) :: ConcatCDFDataset
cdfopen(files; kw...) :: CDFDataset

Opens CDF file(s) as a `AbstractCDFDataset`.
"""
cdfopen(file::AbstractString; kw...) = CDFDataset(file; kw...)
function cdfopen(files; backend = :julia, kw...)
backend = Symbol(backend)
@assert backend in (:julia, :CommonDataFormat)
return ConcatCDFDataset(CDF.CDFDataset.(files))
return CDFDataset(CDF.CDFDataset.(files))
end

CDM.Dimensions(var::AbstractCDFVariable) = ntuple(i -> dim(var, i), ndims(var))
Expand Down
13 changes: 3 additions & 10 deletions src/concat.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
function _concat_cdf_variable(arrays; name = CDM.name(first(arrays)), metadata = CDM.attrib(first(arrays)), dim = nothing, parentdataset = nothing)
function _concat_variables(arrays; name = CDM.name(first(arrays)), metadata = CDM.attrib(first(arrays)), dim = nothing, parentdataset = nothing)
d = @something dim ndims(first(arrays))
sz = map(ntuple(identity, d)) do i
i == d ? length(arrays) : 1
Expand Down Expand Up @@ -56,17 +56,10 @@ function Base.Array(var::CDFVariable{T, N, <:DiskArrays.ConcatDiskArray}) where
end

function Base.cat(A1::CDFVariable, As::CDFVariable...; dims)
return _concat_cdf_variable((A1, As...); dim = dims)
return _concat_variables((A1, As...); dim = dims)
end

@inline function CDM.dataset(var::CDFVariable{T, N, <:DiskArrays.ConcatDiskArray}) where {T, N}
ds = var.parentdataset
return isnothing(ds) ? _concat_dataset(var.data.parents) : ds
end

_concat_dataset(vars) = ConcatCDFDataset(map(CDM.dataset, vars))

function _concat_dataset(vars...)
sources = map(CDM.dataset, vars)
return ConcatCDFDataset(sources)
return isnothing(ds) ? CDFDataset(CDM.dataset.(var.data.parents)) : ds
end
47 changes: 20 additions & 27 deletions src/dataset.jl
Original file line number Diff line number Diff line change
@@ -1,16 +1,10 @@
struct CDFDataset{A} <: AbstractCDFDataset
struct CDFDataset{A, I} <: AbstractCDFDataset
source::A
end

struct ConcatCDFDataset{A} <: AbstractCDFDataset
sources::A
end

struct ClippedCDFDataset{D, I} <: AbstractCDFDataset
parent::D
interval::I
end

CDFDataset(source) = CDFDataset(source, nothing)

# https://github.com/SciQLop/CDFpp/blob/main/pycdfpp/__init__.py

"""
Expand Down Expand Up @@ -43,48 +37,47 @@ end
Base.parent(ds::CDFDataset) = ds.source
Base.getindex(ds::AbstractCDFDataset, name::String) = CDM.variable(ds, name)

Base.parent(ds::ClippedCDFDataset) = ds.parent
Base.view(ds::AbstractCDFDataset, interval::Interval) =
ClippedCDFDataset(ds, interval)
CDFDataset(ds.source, interval)

# CommonDataModel.jl interface methods
const SymbolString = Union{String, Symbol}

_is_multi_source(ds::CDFDataset) = ds.source isa AbstractVector
_parent1(ds::CDFDataset) = _is_multi_source(ds) ? first(ds.source) : ds.source
_has_interval(ds::CDFDataset) = !isnothing(ds.interval)
_unclipped(ds::CDFDataset) = CDFDataset(ds.source)

function CDM.variable(ds::CDFDataset, name::SymbolString; metadata = nothing)
data = CDM.variable(ds.source, name)
return CDFVariable(data, name, ds, @something metadata CDM.attrib(data))
end
if _has_interval(ds)
var = _variable_unclipped(_unclipped(ds), name; metadata)
return is_record_varying(var) ? var[ds.interval] : var
end

function CDM.variable(ds::ClippedCDFDataset, name::SymbolString)
var = CDM.variable(parent(ds), name)
return is_record_varying(var) ? var[ds.interval] : var
return _variable_unclipped(ds, name; metadata)
end

_parent1(ds::AbstractCDFDataset) = parent(ds)
CDM.varnames(ds::AbstractCDFDataset) = CDM.varnames(_parent1(ds))
CDM.attribnames(ds::AbstractCDFDataset) = CDM.attribnames(_parent1(ds))
CDM.attrib(ds::AbstractCDFDataset, name::SymbolString) = CDM.attrib(_parent1(ds), name)

CDM.path(ds::AbstractCDFDataset) = CDM.path(parent(ds))
CDM.path(ds::CDFDataset) = _is_multi_source(ds) ? CDM.path.(parent(ds)) : CDM.path(parent(ds))
function CDM.name(ds::AbstractCDFDataset)
return only(get(ds.attrib, "Logical_source", "/"))
end

function ConcatCDFDataset(sources::AbstractVector{<:AbstractString}; backend = :julia)
function CDFDataset(sources::AbstractVector{<:AbstractString}; backend = :julia)
backend = Symbol(backend)
@assert backend in (:julia, :CommonDataFormat)
return ConcatCDFDataset(CDF.CDFDataset.(sources))
return CDFDataset(CDF.CDFDataset.(sources))
end

_parent1(ds::ConcatCDFDataset) = ds.sources[1]
CDM.path(ds::ConcatCDFDataset) = CDM.path.(ds.sources)

function CDM.variable(ds::ConcatCDFDataset, name::SymbolString; metadata = nothing)
function _variable_unclipped(ds::CDFDataset, name::SymbolString; metadata = nothing)
ds1 = _parent1(ds)
var1 = ds1[name]
md = @something metadata CDM.attrib(var1)
return if is_record_varying(var1)
_concat_cdf_variable(map(x -> x[name], ds.sources); metadata = md, parentdataset = ds)
return if _is_multi_source(ds) && is_record_varying(var1)
_concat_variables(map(source -> source[name], ds.source); name, metadata = md, parentdataset = ds)
else
CDFVariable(var1, name, ds, md)
end
Expand Down
11 changes: 4 additions & 7 deletions src/show.jl
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ function _show(io::IO, ds::AbstractCDFDataset)
return
end

if ds isa CDFDataset && _has_interval(ds)
print(io, indent, "View: ", ds.interval, "\n")
end

printstyled(io, indent, "Dataset: ", CDM.path(ds), "\n", color = CDM.section_color[])

print(io, indent, "Group: ", CDM.name(ds), "\n")
Expand Down Expand Up @@ -101,13 +105,6 @@ function _show(io::IO, ds::AbstractCDFDataset)
return
end

function _show(io::IO, ds::ClippedCDFDataset)
level = get(io, :level, 0)
indent = " "^level
print(io, indent, "View: ", ds.interval, "\n")
return _show(io, parent(ds))
end

function Base.show(io::IO, ds::AbstractCDFDataset)
varnames_list = CDM.varnames(ds)
dataset_name = CDM.name(ds)
Expand Down
2 changes: 1 addition & 1 deletion src/subvariable.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ function DiskArrays.getindex_disk(var::CDFVariable{T}, interval::Interval) where
return if T <: AbstractDateTime
tdim = convert(Vector{T}, var)
indices = find_indices(tdim, t0, t1)
@view tdim[indices]
rebuild(var, view(tdim, indices))
else
tdim = convert(Vector, dim(var, ndims(var)))
indices = find_indices(tdim, t0, t1)
Expand Down
2 changes: 1 addition & 1 deletion test/benchmarks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ vds = view(concat_ds, t0 .. t1)
@info "SubVariable (time-clipped)" @b DimArray($subvar)

@info "from CDFDataset" @b DimArray($concat_ds["V"])
@info "from ClippedCDFDataset view" @b DimArray($vds["V"])
@info "from clipped CDFDataset view" @b DimArray($vds["V"])
@info "from CDFDataset view" @b DimArray($concat_ds["V"][t0 .. t1])

# Array materialization
Expand Down
6 changes: 3 additions & 3 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ end
@test CDM.dimnames(var) == CDM.dimnames(var1)
end

@testset "ConcatCDFDataset" begin
@testset "Multi-file CDFDataset" begin
using DimensionalData

files = [data_path("omni_coho1hr_merged_mag_plasma_20200501_v01.cdf"), data_path("omni_coho1hr_merged_mag_plasma_20200601_v01.cdf")]
Expand Down Expand Up @@ -103,15 +103,15 @@ end
t1 = DateTime(2020, 05, 04)
vds = view(concat_ds, t0 .. t1)
@test Array(vds["Epoch"])[1] == t0
@test vds["V"] == concat_ds["V"][t0 .. t1]
@test Array(vds["V"]) == Array(concat_ds["V"][t0 .. t1])
da = DimArray(vds["V"])
@test da.dims[1] ⊆ t0 .. t1
@test parent(da) isa Array # data materialized
@test parent(dims(da)[1].val) isa Vector # dim materialized

str = sprint(show, MIME("text/plain"), vds)
@test occursin("View:", str)
@test_broken (@b DimArray($vds["V"])).time < (@b DimArray($concat_ds["V"])).time
@test (@b DimArray($vds["V"])).time > 0
end

# TODO: address memory allocation concerns for view operations
Expand Down
Loading