diff --git a/.gitignore b/.gitignore index acc813d..6661b9f 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ Manifest.toml .CondaPkg themis_data omni_data -.claude/settings.local.json \ No newline at end of file +.claude/settings.local.json +*.cdf diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..9fa5faf --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,16 @@ +# Changelog + +## [Unreleased] + +### Added + +- Added `PySPEDASSchema` for `pyspedas` CDF/VATT metadata +- Added `XArrayDataArray` coordinate and dimension support for `xarray.DataArray` + values, including multidimensional coordinates. + +### Changed + +- **Breaking**: `get_data` now returns `XArrayDataArray` instead of the + previous `TplotVariable` structure. +- `DimensionalData.jl` is now an optional weak dependency instead of a hard + dependency. diff --git a/Project.toml b/Project.toml index 7771f4a..4852c44 100644 --- a/Project.toml +++ b/Project.toml @@ -8,12 +8,17 @@ projects = ["test", "benchmark"] [deps] ConcreteStructs = "2569d6c7-a4a2-43d3-a901-331e8e4be471" -DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0" NetCDF_jll = "7243133f-43d8-5620-bbf4-c2c921802cf3" PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d" SpaceDataModel = "0b37b92c-f0c5-4a52-bd5c-390dec20857c" UnixTimes = "ab1a18e7-b408-4913-896c-624bb82ed7f4" +[weakdeps] +DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0" + +[extensions] +PySPEDASDimensionalDataExt = "DimensionalData" + [compat] ConcreteStructs = "0.2" DimensionalData = "0.29, 0.30" diff --git a/README.md b/README.md index a3d2941..5dda0c0 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,6 @@ Load and plot THEMIS FGM data. ```julia using Pkg; Pkg.add("PySPEDAS") using PySPEDAS -using DimensionalData trange=["2007-03-23", "2007-03-24"] # Load THEMIS FGM data for probe A @@ -26,6 +25,9 @@ get_data("tha_fgl_dsl") pytplot("tha_fgl_dsl") ``` +`DimensionalData.jl` support is optional. If it is loaded, `TplotVariable` +can be converted with `DimArray(get_data("tha_fgl_dsl"))`. + You can load projects into scope for quick access: ```julia diff --git a/ext/PySPEDASDimensionalDataExt.jl b/ext/PySPEDASDimensionalDataExt.jl new file mode 100644 index 0000000..164fe68 --- /dev/null +++ b/ext/PySPEDASDimensionalDataExt.jl @@ -0,0 +1,26 @@ +module PySPEDASDimensionalDataExt + +using DimensionalData +import DimensionalData: DimArray, dims +import PySPEDAS +using PySPEDAS: XArrayDataArray +using SpaceDataModel: getmeta, dim + +import Base: convert + +# DimensionalData.dims only support 1D dimensions +function DimensionalData.dims(var::XArrayDataArray, i::Int) + dimvar = dim(var, i) + name = PySPEDAS.dimnames(var, i) + return Dim{Symbol(name)}(length(dimvar) == size(var, i) ? dimvar : axes(dimvar, i)) +end + +DimensionalData.dims(var::XArrayDataArray) = ntuple(i -> dims(var, i), ndims(var)) + +function DimensionalData.DimArray(var::XArrayDataArray; kw...) + return DimArray(parent(var), dims(var); name = var.name, metadata = getmeta(var), kw...) +end + +Base.convert(::Type{T}, var::XArrayDataArray) where {T <: AbstractDimArray} = T(var) + +end diff --git a/src/DimensionalData.jl b/src/DimensionalData.jl deleted file mode 100644 index a1b58b0..0000000 --- a/src/DimensionalData.jl +++ /dev/null @@ -1,36 +0,0 @@ -using DimensionalData -import DimensionalData: dims, DimArray -using DimensionalData.Lookups: NoLookup -import Base: convert - -""" -Get the dimensions of a tplot variable from the underlying `xarray.DataArray`. - -# Reference: -- https://github.com/rafaqz/DimensionalData.jl/blob/main/ext/DimensionalDataPythonCall.jl -""" -function get_xarray_dims(x; collect = false) - dim_names = Tuple(Symbol(d) for d in x."dims") - coord_names = Tuple(Symbol(d) for d in x."coords"."keys"()) - return map(dim_names) do dim - lookup = if dim == :time - pyconvert_time(x."time"."data") - elseif dim == :v_dim && hasproperty(x, :v) - PyArray(x."v"."data"; copy = false) - elseif dim in coord_names - PyArray(getproperty(x, dim)."data"; copy = false) - else - NoLookup() - end - Dim{dim}(collect ? Base.collect(lookup) : lookup) - end -end - -DimensionalData.dims(v::TplotVariable) = v.dims - -# https://discourse.julialang.org/t/convert-vs-constructors/4159 -function DimensionalData.DimArray(var::TplotVariable; kw...) - return DimArray(parent(var), dims(var); name = var.name, metadata = var.metadata, kw...) -end - -Base.convert(::Type{T}, var::TplotVariable) where {T <: AbstractDimArray} = T(var) diff --git a/src/PySPEDAS.jl b/src/PySPEDAS.jl index 8073da3..2ee3f30 100644 --- a/src/PySPEDAS.jl +++ b/src/PySPEDAS.jl @@ -12,11 +12,13 @@ using ConcreteStructs: @concrete export pyspedas export pytplot, get_data export Project, TplotVariable +export PySPEDASSchema include("types.jl") include("utils.jl") include("projects.jl") -include("DimensionalData.jl") +include("schema.jl") +include("dimensions.jl") using .Projects @@ -60,23 +62,20 @@ pytplot(args...) = @pyconst(pyspedas.tplot)(args...) pytplot(tnames::TnamesType, args...) = @pyconst(pyspedas.tplot)(pylist(tnames), args...) """ - get_data(name; collect = false, kw...)::TplotVariable + get_data(name; kw...)::XArrayDataArray -Retrieve data from `pyspedas` by `name`. If `collect` is true, the data will be collected into a Julia array. +Retrieve data from `pyspedas` by `name`. """ -function get_data(name; collect = false, kw...) +function get_data(name; schema = PySPEDASSchema(), kw...) py = py_get_data(name; kw...) - py_data = PyArray(@py py.data; copy = false) - data = collect ? Base.collect(py_data) : py_data - py_metadata = PyDict{String, PyDict{String, Any}}(@py py.attrs) - metadata = Dict{Any, Any}(k => v for (k, v) in py_metadata) - dims = get_xarray_dims(py; collect) - return TplotVariable(name, data, dims, metadata, py) + py_attrs = PyDict{String, Any}(@py py.attrs) + attrs = SchemaDict(schema, py_attrs) + return XArrayDataArray(py; name, attrs) end function demo_get_data(; trange = ["2017-03-23/00:00:00", "2017-03-23/23:59:59"]) pyspedas.projects.omni.data(; trange) - return DimArray(get_data("SYM_H")) + return get_data("SYM_H") end function demo(; trange = ["2020-04-20/06:00", "2020-04-20/08:00"]) diff --git a/src/dimensions.jl b/src/dimensions.jl new file mode 100644 index 0000000..77ed468 --- /dev/null +++ b/src/dimensions.jl @@ -0,0 +1,41 @@ +function coordinate(var::XArrayDataArray, i::Integer) + py = var.py + name = @py(py.dims)[i - 1] + coords = @py py.coords + pyin(name, coords) && return XArrayDataArray(coords[name]) + for coord in @py coords.values() + pyin(name, coord.dims) && return XArrayDataArray(coord) + end + return nothing +end + +function coordinate(var::XArrayDataArray, name) + py = var.py + return XArrayDataArray(@py py.coords[name]) +end + +function SpaceDataModel.dim(var::XArrayDataArray, i::Integer) + coord = coordinate(var, i) + return isnothing(coord) ? axes(var, i) : coord +end + +function dimnames(var::XArrayDataArray, i::Integer) + py = var.py + return pyconvert(String, @py(py.dims)[i - 1]) +end + +function dimname(var::XArrayDataArray) + py = var.py + T = NTuple{ndims(var), String} + return pyconvert(T, @py py.dims) +end + +function SpaceDataModel.tdimnum(var::XArrayDataArray) + i = findfirst(==("time"), var.dims) + return isnothing(i) ? ndims(var) : i +end + +function _xarray_values(py) + data = @py py.data + return is_datetime64_ns(data) ? pyconvert_time(data) : PyArray(data; copy = false) +end diff --git a/src/schema.jl b/src/schema.jl new file mode 100644 index 0000000..8dd2a7e --- /dev/null +++ b/src/schema.jl @@ -0,0 +1,46 @@ +using SpaceDataModel: MetadataSchema, SchemaDict, Via +using SpaceDataModel: getmeta +import SpaceDataModel: rules, resolve + +""" + PySPEDASSchema <: MetadataSchema + +Schema for pyspedas-loaded variables. Metadata is nested as `CDF` → `VATT` +holding ISTP-style variable attributes. +""" +struct PySPEDASSchema <: MetadataSchema end + +_get(x, key) = isnothing(x) ? nothing : get(x, key, nothing) + +cdf(x) = _get(getmeta(x), "CDF") +vatt(x) = _get(cdf(x), "VATT")::PyDict{Any, Any} + +struct Path{Ks} + keys::Ks +end + +Path(ks...) = Path(ks) + +const _PYSPEDAS_SCHEMA = ( + desc = Via(vatt, "CATDESC"), + name = (Via(vatt, "LABLAXIS"), SpaceDataModel.name), + long_name = Via(vatt, "FIELDNAM"), + unit = Via(vatt, "UNITS"), + scale = Via(vatt, ("SCALETYP", "SCALE_TYP")), + labels = Via(cdf, "LABELS"), + display_type = Via(vatt, "DISPLAY_TYPE"), + depend_1_name = Path("plot_options", "yaxis_opt", "axis_label"), + depend_1_unit = Path("data_att", "depend_1_units"), + depend_1_scale = Path("plot_options", "yaxis_opt", "y_axis_type"), +) + +rules(::PySPEDASSchema) = _PYSPEDAS_SCHEMA + +function resolve(data, p::Path) + cur = data + for k in p.keys + isnothing(cur) && return nothing + cur = resolve(cur, k) + end + return cur +end diff --git a/src/types.jl b/src/types.jl index 24c89f0..30ffaa7 100644 --- a/src/types.jl +++ b/src/types.jl @@ -34,24 +34,40 @@ Project(name) = Project(name, pynew(), Ref{Vector{Symbol}}()) # This somehow could prevent the Segmentation fault, see also https://github.com/JuliaPy/PythonCall.jl/issues/586 attributes(p::Project) = p.attributes[] -@concrete struct TplotVariable{T, N, A <: AbstractArray{T, N}} <: AbstractDataVariable{T, N} +@concrete struct XArrayDataArray{T, N, A <: AbstractArray{T, N}} <: AbstractDataVariable{T, N} name data::A - dims - metadata + attrs py::Py end +function XArrayDataArray(py; name = nothing, attrs = nothing) + name = @something name pyconvert(Any, @py py.name) "" + data = _xarray_values(py) + attrs = @something attrs PyDict(@py py.attrs) + return XArrayDataArray(name, data, attrs, py) +end + +@inline function Base.getproperty(var::XArrayDataArray, s::Symbol) + s in fieldnames(XArrayDataArray) && return getfield(var, s) + s == :metadata && return getmeta(var) + s == :dims && return dimname(var) + return getproperty(var.py, s) +end + +const TplotVariable = XArrayDataArray + +SpaceDataModel.getmeta(var::XArrayDataArray) = var.attrs SpaceDataModel.times(var::TplotVariable) = pyconvert_time(var.py.time.data) struct LoadFunction py::Py end -function (f::LoadFunction)(args...; collect = false, kwargs...) +function (f::LoadFunction)(args...; kwargs...) tvars_py = f.py(args...; kwargs...) tvars = Tuple(pyconvert(Vector{Symbol}, tvars_py)) - return NamedTuple{tvars}(get_data.(tvars; collect)) + return NamedTuple{tvars}(get_data.(tvars)) end # Allow calling methods on the Python module @@ -68,6 +84,6 @@ end Base.show(io::IO, p::Project) = print(io, "SPEDAS Project: $(p.name)") Base.show(io::IO, var::TplotVariable) = print(io, var.py.data) function Base.show(io::IO, m::MIME"text/plain", var::TplotVariable) - println(io, "Tplot Variable: $(var.name)") + println(io, "XArray DataArray: $(var.name)") return show(io, m, var.py) end diff --git a/src/utils.jl b/src/utils.jl index accdb73..730e8ad 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,4 +1,4 @@ -is_datetime64_ns(py) = pyeq(Bool, py.dtype, @pyconst(np.dtype("datetime64[ns]"))) +is_datetime64_ns(py) = pyeq(Bool, @py(py.dtype), @pyconst(np.dtype("datetime64[ns]"))) """ pyconvert_time(time) @@ -12,16 +12,3 @@ function pyconvert_time(times) py_ns = PyArray{Int64, 1, false, true, Int64}(times."view"("i8"), copy = false) return length(py_ns) == 0 ? UnixTime[] : reinterpret(UnixTime, py_ns) end - -""" - promote_cdf_attributes!(meta) - -Promotes the nested CDF variable attributes to the top level of the metadata. -""" -function promote_cdf_attributes!(meta) - cdf_attrs = meta["CDF"]["VATT"] - for (k, v) in cdf_attrs - meta[k] = v - end - return -end diff --git a/test/Project.toml b/test/Project.toml index 3733219..52f2969 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,7 +1,9 @@ [deps] Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" +DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0" PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d" PySPEDAS = "668d2cef-03a7-4c36-a521-7d1c6ddc8846" +SpaceDataModel = "0b37b92c-f0c5-4a52-bd5c-390dec20857c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a" TestItems = "1c621080-faea-4a02-84b6-bbd5e436b8fe" diff --git a/test/runtests.jl b/test/runtests.jl index 0fffcac..f778f30 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -8,13 +8,16 @@ using TestItems, TestItemRunner end @testitem "PySPEDAS.jl" begin - using PythonCall: Py - using PySPEDAS.DimensionalData - @test PySPEDAS.demo_get_data() isa DimArray - @test pyspedas.geopack isa Py + using PySPEDAS: XArrayDataArray + + var = PySPEDAS.demo_get_data() + @test var.dims == ("time",) + coord = PySPEDAS.coordinate(var, "time") + @test coord isa XArrayDataArray + @test coord.dims == ("time",) using PySPEDAS.Projects - @test themis.fgm(["2020-04-20/06:00", "2020-04-20/08:00"], time_clip=true, probe="d")[1] isa TplotVariable + @test themis.fgm(["2020-04-20/06:00", "2020-04-20/08:00"], time_clip = true, probe = "d")[1] isa XArrayDataArray end @testitem "Project initialization" begin @@ -32,3 +35,60 @@ end @test Projects.wind.attributes[] isa Vector{Symbol} @test !isempty(Projects.wind.attributes[]) end + + +@testitem "PySPEDASSchema (1D)" begin + using PySPEDAS: PySPEDASSchema, get_data, pyspedas + using SpaceDataModel: get_schema + + pyspedas.projects.omni.data(; trange = ["2017-03-23/00:00:00", "2017-03-23/01:00:00"]) + var = get_data("SYM_H") + schema = get_schema(var) + @test schema isa PySPEDASSchema + + attrs = schema(var) + @test attrs[:long_name] == "SYM/H index" + @test attrs[:desc] == "SYM/H - 1-minute SYM/H index,from WDC Kyoto (1981/001-2024/244)" + @test attrs[:name] == "SYM/H index" + @test attrs[:unit] == "nT" +end + +@testitem "PySPEDASSchema (2D, depend_1)" begin + using PySPEDAS: PySPEDASSchema, get_data, pyspedas + using SpaceDataModel: get_schema + + pyspedas.projects.themis.fgm(; + trange = ["2020-04-20/06:00", "2020-04-20/06:05"], + probe = "d", + time_clip = true, + ) + var = get_data("thd_fgs_gsm") + schema = get_schema(var) + attrs = schema(var) + @test attrs[:desc] == "FGS magnetic field B in XYZ GSM Coordinates" + @test attrs[:unit] == attrs[:depend_1_unit] == "nT GSM" + @test attrs[:labels] == ["Bx FGS-D", "By FGS-D", "Bz FGS-D"] + @test attrs[:depend_1_name] == "THD FGS" + @test attrs[:depend_1_scale] == "linear" +end + +@testitem "MMS FPI multidimensional data" begin + using DimensionalData + using PySPEDAS: get_data, pyspedas + + pyspedas.projects.mms.fpi(; + trange = ["2015-10-16/04:00", "2015-10-16/04:10"], + datatype = "des-moms", + ) + + spectr = get_data("mms1_des_energyspectr_omni_fast") + @test spectr.dims == ("time", "v_dim") + @test size(spectr, 2) == 32 + @test PySPEDAS.coordinate(spectr, "spec_bins").dims == ("time", "v_dim") + @test DimArray(spectr) isa DimArray + + tensor = get_data("mms1_des_temptensor_gse_fast") + @test tensor.dims == ("time", "v1_dim", "v2_dim") + @test size(tensor)[2:3] == (3, 3) + @test DimArray(tensor) isa DimArray +end