ccdavis
diff --git a/‎Cargo.lock‎
Lines changed: 1 addition & 0 deletions b/‎Cargo.lock‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎Cargo.toml‎
Lines changed: 5 additions & 0 deletions b/‎Cargo.toml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/bin/dataversion.rs‎
Lines changed: 84 additions & 0 deletions b/‎src/bin/dataversion.rs‎
Lines changed: 84 additions & 0 deletions
diff --git a/‎src/conventions.rs‎
Lines changed: 5 additions & 8 deletions b/‎src/conventions.rs‎
Lines changed: 5 additions & 8 deletions
@@ -12,6 +12,7 @@ sql-builder="3.1"
 interner="*"
 compressed_string = "*"
 csv = "1.1"
+flate2 = "1.0"
 extended = "*"
 ascii = "*"
 bstr = "1.7.0"
@@ -33,6 +34,10 @@ path = "src/lib.rs"
 name = "abacus"
 path = "src/bin/abacus.rs"
 
+[[bin]]
+name = "dataversion"
+path = "src/bin/dataversion.rs"
+
 [[bench]]
 name = "tabulate_simple_request_benchmark"
 harness = false
@@ -0,0 +1,84 @@
+//! A command-line utility to extract version information from IPUMS data files.
+//!
+//! This tool reads version metadata from both Parquet and fixed-width IPUMS data files
+//! and outputs it in either JSON or human-readable text format.
+//!
+//! # Usage
+//!
+//! ```bash
+//! # For parquet data (directory containing .parquet files)
+//! dataversion /pkg/ipums/usa/output_data/current/parquet/us2015b
+//!
+//! # For fixed-width data (.dat.gz file)
+//! dataversion /pkg/ipums/usa/output_data/current/us2015b_usa.dat.gz
+//!
+//! # Output as JSON (default is text)
+//! dataversion --format json /path/to/data
+//! ```
+
+use cimdea::data_version::{extract_version, DataVersion};
+use clap::{Parser, ValueEnum};
+use std::process;
+
+#[derive(Parser, Debug)]
+#[command(
+    name = "dataversion",
+    version,
+    about = "Extract version information from IPUMS data files",
+    long_about = "Extract version information from IPUMS data files.\n\n\
+                  Supports both Parquet and fixed-width (.dat.gz) formats.\n\
+                  Version information includes release numbers, commit hashes,\n\
+                  branch names, and other build metadata."
+)]
+struct Args {
+    /// Path to the data file or directory.
+    ///
+    /// For Parquet: path to a directory containing .parquet files
+    /// (e.g., /pkg/ipums/usa/output_data/current/parquet/us2015b)
+    ///
+    /// For fixed-width: path to a .dat.gz file
+    /// (e.g., /pkg/ipums/usa/output_data/current/us2015b_usa.dat.gz)
+    #[arg(value_name = "PATH")]
+    path: String,
+
+    /// Output format
+    #[arg(short, long, value_enum, default_value = "text")]
+    format: OutputFormat,
+}
+
+#[derive(Debug, Clone, Copy, ValueEnum)]
+enum OutputFormat {
+    /// Human-readable text output
+    Text,
+    /// Machine-readable JSON output
+    Json,
+}
+
+fn main() {
+    let args = Args::parse();
+
+    match extract_version(&args.path) {
+        Ok(version) => {
+            output_version(&version, args.format);
+        }
+        Err(e) => {
+            eprintln!("Error: {}", e);
+            process::exit(1);
+        }
+    }
+}
+
+fn output_version(version: &DataVersion, format: OutputFormat) {
+    match format {
+        OutputFormat::Text => {
+            println!("{}", version.to_text());
+        }
+        OutputFormat::Json => match version.to_json() {
+            Ok(json) => println!("{}", json),
+            Err(e) => {
+                eprintln!("Error serializing to JSON: {}", e);
+                process::exit(1);
+            }
+        },
+    }
+}
@@ -155,7 +155,8 @@ impl MicroDataCollection {
             })?;
 
         // Collect record types and filenames first to avoid borrow issues
-        let record_types: Vec<(String, String)> = self.record_types
+        let record_types: Vec<(String, String)> = self
+            .record_types
             .keys()
             .map(|k| {
                 let base_filename = self.base_filename_for_dataset_and_rectype(dataset_name, k);
@@ -254,10 +255,7 @@ impl MicroDataCollection {
     /// Takes a path like ../output_data/current/parquet/, which could be derived
     /// automatically from defaults based on data root or product root. Scans all
     /// parquet schema information and embedded metadata.
-    pub fn load_metadata_from_all_parquet(
-        &mut self,
-        parquet_path: &Path,
-    ) -> Result<(), MdError> {
+    pub fn load_metadata_from_all_parquet(&mut self, parquet_path: &Path) -> Result<(), MdError> {
         if !parquet_path.exists() {
             return Err(metadata_error!(
                 "Parquet path does not exist: {}",
@@ -278,9 +276,8 @@ impl MicroDataCollection {
         let mut errors = Vec::new();
 
         for entry in entries {
-            let entry = entry.map_err(|e| {
-                metadata_error!("Failed to read directory entry: {}", e)
-            })?;
+            let entry =
+                entry.map_err(|e| metadata_error!("Failed to read directory entry: {}", e))?;
 
             let path = entry.path();
             if path.is_dir() {