-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata_generator.py
More file actions
128 lines (114 loc) · 3.72 KB
/
data_generator.py
File metadata and controls
128 lines (114 loc) · 3.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#!/usr/bin/env python3
"""
Top-level data_generator.py
Orchestrates dataset generation across all submissions.
"""
import os
import shutil
import subprocess
import argparse
from pathlib import Path
from PIL import Image
def count_clean_images(clean_dir):
count = 0
for p in clean_dir.iterdir():
if p.is_file() and p.name != ".DS_Store":
try:
with Image.open(p) as img:
img.verify()
count += 1
except:
pass
return count
def main():
parser = argparse.ArgumentParser(description="Top-level dataset generator")
parser.add_argument(
"--limit",
type=int,
default=10,
help="Percentage for sample and val limits, or number for other submissions (default 10)",
)
args = parser.parse_args()
# Clean up datasets/*_submission_*/clean and stego, except datasets/sample_submission_2025/clean and datasets/maya_submission_2025
datasets_dir = Path("datasets")
for sub_dir in datasets_dir.iterdir():
if sub_dir.is_dir() and sub_dir.name.endswith("_submission_2025"):
stego_dir = sub_dir / "stego"
if stego_dir.exists():
shutil.rmtree(stego_dir)
if (
sub_dir.name != "sample_submission_2025"
and sub_dir.name != "maya_submission_2025"
):
clean_dir = sub_dir / "clean"
if clean_dir.exists():
shutil.rmtree(clean_dir)
# Run data_generator.py --limit N for each submission directory
for sub_dir in datasets_dir.iterdir():
if (
sub_dir.is_dir()
and sub_dir.name.endswith("_submission_2025")
and sub_dir.name != "sample_submission_2025"
):
os.chdir(sub_dir)
if sub_dir.name == "maya_submission_2025":
subprocess.run(["python", "data_generator.py"])
else:
subprocess.run(
["python", "data_generator.py", "--limit", str(args.limit)]
)
os.chdir("../..")
# Clean up val/stego
val_stego = Path("datasets/val/stego")
if val_stego.exists():
shutil.rmtree(val_stego)
# Clean up sample stego
sample_stego = Path("datasets/sample_submission_2025/stego")
if sample_stego.exists():
shutil.rmtree(sample_stego)
# Run data_generator.py from sample_submission_2025 for sample
sample_clean_dir = Path("datasets/sample_submission_2025/clean")
if sample_clean_dir.exists():
limit_sample = int(count_clean_images(sample_clean_dir) * (args.limit / 100.0))
else:
limit_sample = args.limit
os.chdir("datasets/sample_submission_2025")
subprocess.run(
[
"python",
"data_generator.py",
"--clean_source",
"clean",
"--output_stego",
"stego",
"--seeds_dir",
"seeds",
"--limit",
str(limit_sample),
]
)
os.chdir("../..")
# Run data_generator.py from sample_submission_2025 for val
val_clean_dir = Path("datasets/val/clean")
if val_clean_dir.exists():
limit_val = int(count_clean_images(val_clean_dir) * (args.limit / 100.0))
else:
limit_val = args.limit
os.chdir("datasets/sample_submission_2025")
subprocess.run(
[
"python",
"data_generator.py",
"--clean_source",
"../val/clean",
"--output_stego",
"../val/stego",
"--seeds_dir",
"../val/seeds",
"--limit",
str(limit_val),
]
)
os.chdir("../..")
if __name__ == "__main__":
main()