Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
python-version: ["3.11", "3.12", "3.13", "3.14"]
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
Expand Down
79 changes: 48 additions & 31 deletions jsoncsv/dumptool.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,112 +2,123 @@
# 2015.10.09

import csv
import io
import json
from typing import Any

import xlwt

from jsoncsv.utils import JsonType


class Dump:
def __init__(self, fin, fout, **kwargs):
def __init__(self, fin: io.TextIOBase, fout: io.TextIOBase | io.BytesIO, **kwargs: Any) -> None:
self.fin = fin
self.fout = fout
self.initialize(**kwargs)

def initialize(self, **kwargs):
def initialize(self, **kwargs: Any) -> None:
pass

def prepare(self):
def prepare(self) -> None:
pass

def dump_file(self, obj):
def dump_file(self) -> None:
raise NotImplementedError

def on_finish(self):
def on_finish(self) -> None:
pass

def dump(self):
def dump(self) -> None:
self.prepare()
self.dump_file()
self.on_finish()


class ReadHeadersMixin:
@staticmethod
def load_headers(fin, read_row=None, sort_type=None): # noqa: ARG004 - reserved for future use
headers = set()
datas = []
def load_headers(
fin: io.TextIOBase,
read_row: int | None = None,
sort_type: bool | None = None, # noqa: ARG004 - reserved for future use
) -> tuple[list[str], list[dict[str, JsonType]]]:
headers: set[str] = set()
datas: list[dict[str, JsonType]] = []

# read
if not read_row or read_row < 1:
read_row = -1

for line in fin:
obj = json.loads(line)
assert isinstance(obj, dict)
headers.update(obj.keys())
datas.append(obj)

read_row -= 1
if not read_row:
break
# TODO: add some sort_type here
headers = sorted(headers)
headers_list = sorted(headers)

return (list(headers), datas)
return (headers_list, datas)


class DumpExcel(Dump, ReadHeadersMixin):
def initialize(self, **kwargs):
def initialize(self, **kwargs: Any) -> None:
super().initialize(**kwargs)
self._read_row = kwargs.get('read_row')
self._sort_type = kwargs.get('sort_type')

def prepare(self):
headers, datas = self.load_headers(self.fin, self._read_row,
self._sort_type)
def prepare(self) -> None:
headers, datas = self.load_headers(self.fin, self._read_row, self._sort_type)
self._headers = headers
self._datas = datas

def write_headers(self):
def write_headers(self) -> None:
raise NotImplementedError

def write_obj(self):
def write_obj(self, obj: dict[str, JsonType]) -> None:
raise NotImplementedError

def dump_file(self):
def dump_file(self) -> None:
self.write_headers()

for obj in self._datas:
self.write_obj(obj)

for line in self.fin:
obj = json.loads(line)
assert isinstance(obj, dict)
self.write_obj(obj)


class DumpCSV(DumpExcel):
def initialize(self, **kwargs):
def initialize(self, **kwargs: Any) -> None:
super().initialize(**kwargs)
self.csv_writer = None
self.csv_writer: csv.DictWriter[str] | None = None

def write_headers(self):
def write_headers(self) -> None:
assert isinstance(self.fout, io.TextIOBase)
self.csv_writer = csv.DictWriter(self.fout, self._headers)
self.csv_writer.writeheader()

def write_obj(self, obj):
patched_obj = {
def write_obj(self, obj: dict[str, JsonType]) -> None:
patched_obj: dict[str, str] = {
key: self.patch_value(value)
for key, value in obj.items()
}
assert self.csv_writer is not None
self.csv_writer.writerow(patched_obj)

def patch_value(self, value):
def patch_value(self, value: JsonType) -> str:
if value in (None, {}, []):
return ""
return value
return str(value)


class DumpXLS(DumpExcel):
def initialize(self, **kwargs):
def initialize(self, **kwargs: Any) -> None:
super().initialize(**kwargs)

self.sheet = kwargs.get('sheet', 'Sheet1')
Expand All @@ -116,30 +127,36 @@ def initialize(self, **kwargs):
self.row = 0
self.cloumn = 0

def write_headers(self):
def write_headers(self) -> None:
for head in self._headers:
self.ws.write(self.row, self.cloumn, head)
self.cloumn += 1
self.row += 1

def write_obj(self, obj):
def write_obj(self, obj: dict[str, JsonType]) -> None:
self.cloumn = 0

for head in self._headers:
value = obj.get(head)
# patch
if value in ({},):
if value == {}:
value = "{}"
self.ws.write(self.row, self.cloumn, value)
self.cloumn += 1

self.row += 1

def on_finish(self):
def on_finish(self) -> None:
assert isinstance(self.fout, io.BufferedIOBase)
self.wb.save(self.fout)


def dump_excel(fin, fout, klass, **kwargs):
def dump_excel(
fin: io.TextIOBase,
fout: io.TextIOBase | io.BytesIO,
klass: type[DumpExcel],
**kwargs: Any,
) -> None:
if not isinstance(klass, type) or not issubclass(klass, DumpExcel):
raise ValueError("unknow dumpexcel type")

Expand Down
58 changes: 40 additions & 18 deletions jsoncsv/jsontool.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,40 @@
# author@alingse
# 2016.05.27

import io
import json
from collections.abc import Callable, Iterable, Iterator
from copy import deepcopy
from itertools import groupby
from operator import itemgetter

from jsoncsv.utils import decode_safe_key, encode_safe_key
from jsoncsv.utils import (
DecodedPathType,
JsonType,
LeafInputType,
LeafType,
PathType,
decode_safe_key,
encode_safe_key,
)

__all__ = [
'convert_json',
'expand',
'restore',
]

# Type alias for the func parameter in convert_json
# Use ... to indicate additional keyword arguments are accepted
ConvertFunc = Callable[..., dict[str, JsonType]] | Callable[..., JsonType]

def gen_leaf(root, path=None):

def gen_leaf(root: JsonType, path: PathType | None = None) -> Iterator[LeafType]:
if path is None:
path = []

if not isinstance(root, (dict, list)) or not root:
leaf = (path, root)
leaf: LeafType = (path, root)
yield leaf
else:
items = root.items() if isinstance(root, dict) else enumerate(root)
Expand All @@ -32,7 +46,7 @@ def gen_leaf(root, path=None):
yield leaf


def is_array_index(keys, enable_str=True):
def is_array_index(keys: Iterable[int | str], enable_str: bool = True) -> bool:
keys = list(deepcopy(keys))
# 不强调有序
key_map = dict.fromkeys(keys, True)
Expand All @@ -44,7 +58,7 @@ def is_array_index(keys, enable_str=True):
return bool(enable_str and all(str(key) in key_map for key in int_keys))


def from_leaf(leafs):
def from_leaf(leafs: Iterable[LeafInputType]) -> JsonType:
# [(path, value), (path, value)]
leafs = list(leafs)

Expand All @@ -58,10 +72,10 @@ def from_leaf(leafs):
_get_head = itemgetter(0)
_get_leaf = itemgetter(1)

zlist = list(zip(heads, leafs))
zlist = list(zip(heads, leafs, strict=True))
glist = groupby(sorted(zlist, key=_get_head), key=_get_head)

child = []
child: list[tuple[int | str, JsonType]] = []
for g in glist:
head, _zlist = g
_leafs = map(_get_leaf, _zlist)
Expand All @@ -73,30 +87,31 @@ def from_leaf(leafs):
child.sort(key=lambda x: int(x[0]))
return list(map(_get_leaf, child))

return dict(child)
return dict(child) # type: ignore[arg-type]


def expand(origin, separator='.', safe=False):
def expand(origin: JsonType, separator: str = '.', safe: bool = False) -> dict[str, JsonType]:
root = origin
leafs = gen_leaf(root)

expobj = {}
expobj: dict[str, JsonType] = {}
for path, value in leafs:
path = map(str, path)
# Convert path elements to strings
str_path: list[str] = [str(p) for p in path]

key = encode_safe_key(path, separator) if safe else separator.join(path)
key = encode_safe_key(str_path, separator) if safe else separator.join(str_path)
expobj[key] = value

return expobj


def restore(expobj, separator='.', safe=False):
leafs = []
def restore(expobj: dict[str, JsonType], separator: str = '.', safe: bool = False) -> JsonType:
leafs: list[tuple[DecodedPathType, JsonType]] = []

items = expobj.items()

for key, value in items:
path = decode_safe_key(key, separator) if safe else key.split(separator)
path: DecodedPathType = decode_safe_key(key, separator) if safe else key.split(separator)

if key == '':
path = []
Expand All @@ -107,7 +122,14 @@ def restore(expobj, separator='.', safe=False):
return origin


def convert_json(fin, fout, func, separator=".", safe=False, json_array=False):
def convert_json(
fin: io.TextIOBase,
fout: io.TextIOBase,
func: ConvertFunc,
separator: str = '.',
safe: bool = False,
json_array: bool = False,
) -> None:
'''
ensure fin/fout is TextIO
'''
Expand All @@ -116,7 +138,7 @@ def convert_json(fin, fout, func, separator=".", safe=False, json_array=False):
raise ValueError("unknow convert_json type")

# default: read json objects from each line
def gen_objs():
def gen_objs() -> Iterator[JsonType]:
for line in fin:
obj = json.loads(line)
yield obj
Expand All @@ -125,7 +147,7 @@ def gen_objs():

if json_array:
# read all input as json array
def gen_objs_from_array():
def gen_objs_from_array() -> Iterator[JsonType]:
objs = json.load(fin)
assert isinstance(objs, list)
yield from objs
Expand Down
Loading