Skip to content
SamuelGilbert42 edited this page May 17, 2021 · 5 revisions

Version 2020 initial thoughts

=

Table of Contents

Performance consideration

  • Streaming: Needed on model output and processing
  • Searching: Needed on searching using metadata
  • Chunking size, allow for subgrid access. possible to store many resolution, keeping the difference between them
  • Compressor on dlopen

Data structures

  • 2D,3D,clouds,profiles,timeseries,xsection,tsection,meshes,trajectory.
(have to figure a way to properly tag/store/compress data along the 3 x/y/z axis in any combination)

Metadata

  • 2 level of metadata ?
    • Table/IPs for searching
    • Free format for anything else (or key/value pairs ?)
  • Standard for file level and field level metadata. Free format is simple but it needs to be structured, use versioning.
  • Manage data status, stats and ranges
    • Avg,min,max (over time, layer, ???), bias, stddev,
    • How about processing applied to data(filtered, clamped, interpolated,...)
    • ex: Average of the maximum wind gust computed over a vertical column, averaged over a period of time and clamped
  • Need exhaustive list of metadata listing
  • Now
    • VAR,TYPVAR,ETIKET,DATEO,DATEV,DEET,NPAS,IP1,IP2,IP3,NI,NJ,NK,NPACK
  • Wished
    • Per field
      • Unit
      • ShortName (VAR)
      • LongName
      • Ancilliary (vector components ?)(generalized tuples ?)
      • Descrition (ETIKET)
      • Type (TYPVAR)
      • Level/Depth (IP1)
      • LevelType (IP1)
      • Level Increment (increment direction)(goes with level type)
      • ForecastTime (DATEV)
      • ValidTime (DATE) (the good old dateo/datev conundrum)
      • TimeStepNo (IP2,NPAS)
      • TimeStepLen (DEET)
      • Precision (NPACK)
      • Process/Method (min,max,average,…) over interval (associate with extended TYPVAR ?)
        • Time
        • Vertical
        • Horizontal
      • Missing value (many, or fillValue, flags)
      • Mask (Many)
      • Informative (Min/Max/Avg/StdDev) (as byproducts of hierarchical compression ?)
    • Per File
      • Discipline (Meteo,Hydrology,Surface,Oceanography,Airquality,Emergency)
      • Type of generating process ( analysis, forecast, ensemble forecast, nowcast, hindcast, …)
      • Ensemble
        • Member number
        • Member type (unperturbed control forecast, positively perturbued, forecast, …)
      • Model/Provenance
      • Version
      • Status (Dev,Stage,Ops,…)
    • Georeference (add WKT, Meshes)
      • Projection
      • Datum
      • Spheroid
      • Transform
      • Dimension (NI,NJ)
      • Cell Measures/Area/Volume
      • Hide << ^^ !! HY ^> … (the metadata duplication conundrum !)
    • Type de calendrier ?

Other considerations

  • Unit decoupled from var (UDUnits pour unit conversion)
  • Time in milliseconds (unix time + negative) (extended julian with milli/micro seconds)
  • Georef formalized + correct datum
  • 2 format possible (streaming vs local) for writing 2D slice of vertical prof.
  • Manage 3D (NK ?) ( associate i/j/k dimensions with appropriate x/y/z direction ? )
  • Explore distributed options (chunking, hierarchical storage, ...)
  • Multiple missing values (different kinds of "missing" data)
    • (Does this cover? An explicit value to represent any missing data (e.g. NaN))
  • Mask management, multiple masks (implicit vs explicit mask because of compression side effects)
  • An export format will be supported from rmnlib (Grib,netcdf, GDAL ?)
  • Should we use tables for metadata info and if so, use standard ones (grib, CF, ...)

API

Current

c_fstinf( iun, &ni, &nj, &nk,datev,etiket,ip1,ip2,ip3,typvar,nomvar)
FSTINF( iun,ni,nj,nk,datev,etiket,ip1,ip2,ip3,typvar,nomvar)

c_fstinfx( handle,iun,&ni,&nj,&nk,datev,etiket,ip1,ip2,ip3,typvar,nomvar )
FSTINFX( handle,iun,ni,nj,nk,datev,etiket,ip1,ip2,ip3,typvar,nomvar)

c_fstinl( iun, &ni, &nj, &nk,datev,etiket,ip1,ip2,ip3,typvar,nomvar,liste,nliste,nmax);
FSTINL( iun,ni,nj,nk,datev,etiket,ip1,ip2,ip3,typvar,nomvar, liste, nliste, nmax)

c_fstsui( iun,&ni,&nj,&nk )
FSTSUI( iun, ni, nj, nk)

c_fstprm(handle, &dateo, &deet, &npas, &ni, &nj, &nk, &nbits,&datyp, &ip1, &ip2, &ip3, typvar, nomvar, etiket, grtyp, &ig1, &ig2, &ig3, &ig4, &swa, &lng, &dltf, &ubc, &extra1, &extra2, &extra3)
FSTPRM( handle, dateo, deet, npas, ni, nj, nk, nbits,datyp,ip1,ip2, ip3, typvar, nomvar, etiket, grtyp, ig1, ig2, ig3,ig4,swa, lng, dltf, ubc, extra1, extra2, extra3)

c_fstmsq( iun, mip1, mip2, mip3, metik, getmode);
FSTMSQ( iun,mip1,mip2,mip3,metik,mode)

c_fstlir( field,iun, &ni, &nj, &nk,datev,etiket,ip1,ip2,ip3,typvar,nomvar )
FSTLIR( field,iun,ni,nj,nk,datev,etiket,ip1,ip2,ip3,typvar,nomvar)

c_fstlirx( field,handle,iun, &ni, &nj, &nk,datev,etiket,ip1,ip2,ip3,typvar,nomvar )
FSTLIRX( field,handle,iun,ni,nj,nk,datev,etiket,ip1,ip2,ip3,typvar,nomvar)

c_fstlis( field, iun, &ni, &nj, &nk )
FSTLIS( field, iun,ni,nj,nk)

c_fstluk( field, handle, &ni, &nj, &nk )
FSTLUK( field, handle, ni,nj,nk)

FSTECR( field, work, npak, iun, dateo, deet, npas, ni,nj,nk, ip1, ip2, ip3, typvar, nomvar, etiket, grtyp,ig1, ig2, ig3, ig4, datyp, rewrit)
c_fstecr( field, work, npak, iun, dateo, deet, npas, ni, nj,nk, ip1, ip2, ip3, typvar, nomvar, etiket, grtyp, ig1, ig2, ig3, ig4, datyp, rewrit);

New

TFieldMeta {
char version[8] // 1.0 would be current metadata
...
// Should we include ni,nj,nk in metadata, what about datyp and npak ?
}

TFileMeta {
   char version[8]
...
}

TGeoref {
   char *Name
   char Type

   char *GeoSpec;   // Pointer on grid specific parameters per grid type 

   Func *Project    // Pointer to functions per grud type
   Func *UnProject
   ...
}

TGeoSpecZ {

   IG1,IG2,IG3,IG4
   ...
}
TGeoSpecR {
   Loc,CTH,STH,ResRadius,ResAzimuth,Bin... ///
}
TGeoSpecW {
   String,Transform,InvTransform... ///
}

TField {
  char *Data;   // field data
  char *NoData; // int,float,or double, multiple no data ?
  char *Mask;   // Multiple masks ?
  
   ...
}
c_fstinf  (iun,ni,nj,nk,TFieldMeta *meta)
c_fstinfx (handle,iun,ni,nj,nk,TFieldMeta *meta) // Could be merged with fsting with a handle of -1
c_fstinl  (iun,ni,nj,nk,TFieldMeta *meta, liste, nliste, nmax)
c_fstsui  (iun,&ni,&nj,&nk ) // Still needed ? looks like fstinfx, not threadsafe
c_fstprm  (handle, TFieldMeta *meta) // All meta
c_fstmsq  ( ) // Is this used anywhere ? We could just use the TMeta fields as is in the fstinf
c_fstlir  (char *field, iun, TFieldMeta *meta)
c_fstlirx (char *field, handle, iun, TFieldMeta *meta)
c_fstlis  (char *field, iun )
c_fstluk  (char *field, handle)  // Why ni,nj,nk, size already has to be known at his point
c_fstluktile (char *field, handle, x0,y0,x1,y1)  // different function for subtile or only 1 and use 0 0 0 0 for whole field ?
c_fstecr  (char *field, npak, iun, ni, nj, nk, TFieldMeta *meta, TGeoref *georef, datyp, rewrit);

c_fstfmetaget(iun,TFileMeta *meta)

FORTRAN:
   fstdef(TField,Data,Mask,Nodata)

FST_GeorefGet(handle)
FST_GeoRefCreate(...)

Clone this wiki locally