-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathjson2json.py
More file actions
144 lines (126 loc) · 3.92 KB
/
json2json.py
File metadata and controls
144 lines (126 loc) · 3.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
"""json2json will convert JSON compatible objects from one encoding
to UTF-8.
"""
import argparse
import asyncio
import glob
import json
import logging
import os
import sys
from typing import Tuple
# Set up logging.
logging.basicConfig(
format="%(asctime)-15s %(levelname)s :: %(filename)s:%(lineno)s:%(funcName)s() :: %(message)s", # noqa: E501
datefmt="%Y-%m-%d %H:%M:%S",
level="INFO",
handlers=[
logging.StreamHandler(),
],
)
logger = logging.getLogger(__name__)
async def identify_plaintext_bytestream(path: str) -> Tuple[bool, str]:
"""Ensure that the file is a palintext bytestream and can be
processed as JSON.
"""
logger.debug("attempting to open: %s", path)
with open(path, "r", encoding="utf-8") as obj:
try:
content = obj.read()
json_data = json.loads(content)
return True, json_data
except UnicodeDecodeError:
pass
except json.decoder.JSONDecodeError:
pass
with open(path, "r", encoding="utf-16") as obj:
try:
content = obj.read()
json_data = json.loads(content)
return True, json_data
except UnicodeError:
pass
except json.decoder.JSONDecodeError:
pass
with open(path, "r", encoding="utf-16LE") as obj:
try:
content = obj.read()
json_data = json.loads(content)
return True, json_data
except UnicodeDecodeError:
pass
except json.decoder.JSONDecodeError:
pass
return False, None
async def identify_json(paths: list[str]):
"""Identify objects."""
for idx, path in enumerate(paths):
valid, data = await identify_plaintext_bytestream(path)
if not valid:
continue
print(json.dumps(data, indent=2))
async def create_manifest(path: str) -> list[str]:
"""Get a list of paths to process."""
paths = []
for root, _, files in os.walk(path):
for file in files:
file_path = os.path.join(root, file)
logger.debug(file_path)
paths.append(file_path)
return paths
async def process_glob(glob_path: str):
"""Process glob patterns provided by the user."""
paths = []
for path in glob.glob(glob_path):
if os.path.isdir(path):
paths = paths + await create_manifest(path)
if os.path.isfile(path):
paths.append(path)
await identify_json(paths)
async def process_data(path: str):
"""Process all objects at a given path."""
logger.debug("processing: %s", path)
if "*" in path:
return await process_glob(path)
if not os.path.exists(path):
logger.error("path: '%s' does not exist", path)
sys.exit(1)
if os.path.isfile(path):
await identify_json([path])
sys.exit(0)
paths = await create_manifest(path)
if not paths:
logger.info("no files in directory: %s", path)
sys.exit(1)
await identify_json(paths)
def main() -> None:
"""Primary entry point for this script."""
parser = argparse.ArgumentParser(
prog="json2json",
description="parse JSON UTF-16 (BE-LE) objects and output them as UTF-8 for the sake of developer ergonomics",
epilog="for more information visit https://github.com/ffdev-info/jsonid",
)
parser.add_argument(
"--debug",
help="use debug loggng",
required=False,
action="store_true",
)
parser.add_argument(
"--path",
help="file path to process",
required=False,
)
args = parser.parse_args()
logging.getLogger(__name__).setLevel(logging.DEBUG if args.debug else logging.INFO)
logger.debug("debug logging is configured")
if not args.path:
parser.print_help(sys.stderr)
sys.exit()
asyncio.run(
process_data(
path=args.path,
)
)
if __name__ == "__main__":
main()