luminousmen · crjaensch · Apr 21, 2025 · Apr 21, 2025 · Apr 21, 2025 · Apr 22, 2025
@@ -7,6 +7,7 @@ htmlcov/
 .mypy_cache/
 *.egg-info/
 *.pyc
+output.avro
 *~
 .#*
 \#*

@@ -0,0 +1,27 @@
+{
+    "cSpell.words": [
+        "addopts",
+        "asctime",
+        "Bobrov",
+        "cacheprovider",
+        "duckdb",
+        "envlist",
+        "fastavro",
+        "isort",
+        "iterchunks",
+        "Kirill",
+        "levelname",
+        "millis",
+        "mypy",
+        "nans",
+        "ndarray",
+        "numpy",
+        "pyarrow",
+        "pytest",
+        "rtype",
+        "setuptools",
+        "skipsdist",
+        "subclassing",
+        "testpaths"
+    ]
+}
@@ -1 +1,2 @@
 *       @luminousmen
+*       @crjaensch
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2023 Kirill Bobrov
+Copyright (c) 2023-2025 Kirill Bobrov, Christian R. Jaensch
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

@@ -2,33 +2,31 @@
 <img src="https://raw.githubusercontent.com/luminousmen/data-toolset/master/branding/logo/logo.png" width="200">
 </div>
 
-[![Master](https://github.com/luminousmen/data-toolset/actions/workflows/master.yml/badge.svg?branch=master)](https://github.com/luminousmen/data-toolset/actions/workflows/master.yml)
-[![codecov](https://codecov.io/gh/luminousmen/data-toolset/branch/master/graph/badge.svg?token=6V9IPSRCB0)](https://codecov.io/gh/luminousmen/data-toolset)
+[![Master](https://github.com/luminousmen/data-toolset/actions/workflows/master.yml/badge.svg?branch=code-improvement)](https://github.com/luminousmen/data-toolset/actions/workflows/master.yml)
+[![codecov](https://codecov.io/gh/luminousmen/data-toolset/branch/code-improvement/graph/badge.svg?token=6V9IPSRCB0)](https://codecov.io/gh/luminousmen/data-toolset)
 
 # data-tools(et)
 
 data-toolset is designed to simplify your data processing tasks by providing a more user-friendly alternative to the traditional JAR utilities like avro-tools and parquet-tools. With this Python package, you can effortlessly handle various data file formats, including Avro and Parquet, using a simple and intuitive command-line interface.
 
 ## Installation
 
-Python 3.8, Python 3.9 and 3.10 are supported and tested (to some extent).
+Python 3.10, Python 3.12 are supported and tested (to some extent).
 
 ```bash
-python -m pip install data-toolset
+pip install data-toolset
 ```
 
-## Legacy
-
-Do you want polars to run on an old CPU (e.g. dating from before 2011), or on an x86-64 build of Python on Apple Silicon under Rosetta? Install `pip install polars-lts-cpu`. This version of polars is compiled without AVX target features.
+> **Note:** The legacy `data-toolset` entrypoint (Argparse-based) is deprecated. Please use the new Typer-based `data-toolset-cli` command.
 
 ## Usage
 
 ```bash
-$ data-toolset -h
-usage: data-toolset [-h] {head,tail,meta,schema,stats,query,validate,merge,count,to_json,to_csv,to_avro,to_parquet,random_sample} ...
+$ data-toolset-cli -h
+usage: data-toolset-cli [-h] {head,tail,meta,schema,stats,query,validate,merge,count,to-json,to-csv,to-avro,to-parquet,random-sample} ...
 
 positional arguments:
-  {head,tail,meta,schema,stats,query,validate,merge,count,to_json,to_csv,to_avro,to_parquet,random_sample}
+  {head,tail,meta,schema,stats,query,validate,merge,count,to-json,to-csv,to-avro,to-parquet,random-sample}
                         commands
     head                Print the first N records from a file
     tail                Print the last N records from a file
@@ -39,19 +37,19 @@ positional arguments:
     validate            Validate a file
     merge               Merge multiple files into one
     count               Count the number of records in a file
-    to_json             Convert a file to JSON format
-    to_csv              Convert a file to CSV format
-    to_avro             Convert a file to Avro format
-    to_parquet          Convert a file to Parquet format
-    random_sample       Randomly sample records from a file
+    to-json             Convert a file to JSON format
+    to-csv              Convert a file to CSV format
+    to-avro             Convert a file to Avro format
+    to-parquet          Convert a file to Parquet format
+    random-sample       Randomly sample records from a file
 ```
 
 ## Examples
 
 Print the first 10 records of a Parquet file:
 
 ```bash
-$ data-toolset head my_data.parquet -n 10
+$ data-toolset-cli head my_data.parquet -n 10
 shape: (1, 7)
 ┌───────────┬─────┬──────────┬────────┬──────────────────────────┬────────────────────────────┬──────────────────┐
 │ character ┆ age ┆ is_human ┆ height ┆ quote                    ┆ friends                    ┆ appearance       │
@@ -65,7 +63,7 @@ shape: (1, 7)
 Query a Parquet file using a SQL-like expression:
 
 ```bash
-$ data-toolset query my_data.parquet "SELECT * FROM 'my_data.parquet' WHERE height > 165"
+$ data-toolset-cli query my_data.parquet "SELECT * FROM 'my_data.parquet' WHERE height > 165"
 shape: (2, 7)
 ┌─────────────────┬─────┬──────────┬────────┬───────────────────────┬────────────────────────────────────┬───────────────────┐
 │ character       ┆ age ┆ is_human ┆ height ┆ quote                 ┆ friends                            ┆ appearance        │
@@ -80,7 +78,7 @@ shape: (2, 7)
 Get basic data statistics: 
 
 ```bash
-$ data-toolset stats my_data.avro
+$ data-toolset-cli stats my_data.avro
 shape: (9, 8)
 ┌────────────┬─────────────────┬───────────┬──────────┬────────────┬──────────────────────────┬─────────┬────────────┐
 │ describe   ┆ character       ┆ age       ┆ is_human ┆ height     ┆ quote                    ┆ friends ┆ appearance │
@@ -102,19 +100,19 @@ shape: (9, 8)
 Merge multiple Avro files into one:
 
 ```bash
-$ data-toolset merge file1.avro file2.avro file3.avro merged_file.avro
+$ data-toolset-cli merge file1.avro file2.avro file3.avro merged_file.avro
 ```
 
 Convert Avro file into Parquet:
 
 ```bash
-$ data-toolset to_parquet my_data.avro output.parquet
+$ data-toolset-cli to-parquet my_data.avro output.parquet
 ```
 
 Convert Parquet file into JSON:
 
 ```bash
-$ data-toolset to_json my_data.parquet output.json
+$ data-toolset-cli to-json my_data.parquet output.json
 ```
 
 ## Contributing
-Original file line number
+Diff line change
@@ Expand Up / @@ -7,6 +7,7 @@ htmlcov/ @@
     .mypy_cache/
     *.egg-info/
     *.pyc
+    output.avro
     *~
     .#*
     \#*
@@ Expand Down @@