-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathextend.py
More file actions
123 lines (96 loc) · 4.14 KB
/
extend.py
File metadata and controls
123 lines (96 loc) · 4.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import sys
import random
import argparse
from ezr import * # import everything from ezr
# Data lists
# Dumb vs Smart Scoring Policies
def run_dumb_strategy(N, d):
"""Run the dumb strategy and return the results."""
dumb_results = []
dumb = [guess(N, d) for _ in range(20)] # Generate dumb strategies
dumb_results += [d.chebyshev(lst[0]) for lst in dumb] # Calculate Chebyshev for dumb results
dumb_results.sort() # Sort the dumb results
return dumb_results
def run_smart_strategy(d):
"""Run the smart strategy and return the results."""
smart_results = []
smart = [d.shuffle().activeLearning() for _ in range(20)] # Generate smart strategies
smart_results += [d.chebyshev(lst[0]) for lst in smart] # Calculate Chebyshev for smart results
smart_results.sort() # Sort the smart results
return smart_results
# Updated report function to include dumb vs smart scoring
def branch_extend():
scoring_policies = [('dumb', run_dumb_strategy),
('smart', run_smart_strategy)]
print(the.train, flush=True, file=sys.stderr)
print("\n" + the.train)
repeats = 20
d = DATA().adds(csv(the.train))
b4 = [d.chebyshev(row) for row in d.rows]
asIs, div = medianSd(b4)
rnd = lambda z: z
print(f"asIs\t: {asIs:.3f}")
print(f"div\t: {div:.3f}")
print(f"rows\t: {len(d.rows)}")
print(f"xcols\t: {len(d.cols.x)}")
print(f"ycols\t: {len(d.cols.y)}\n")
somes = [stats.SOME(b4, f"asIs,{len(d.rows)}")]
for what, how in scoring_policies:
for the.Last in [20, 30, 40, 50]:
start = time()
result = []
runs = 0
if what == 'dumb':
tmp_result = how(the.Last, d)
else: # smart strategy
tmp_result = how(d)
result += tmp_result
runs += len(tmp_result)
# Identify whether the data is low or high dimensional
param_parts = the.train.split('/')
sliced_train = '/'.join(param_parts[3:]) #From data/optimize/ezr , join back
if len(d.cols.x) < 6 : # Atleast one xcol should be there
dimension_category = "low_dim"
elif len(d.cols.x) >=6:
dimension_category = "high_dim"
pre = f"{what}/dim={dimension_category}"
tag = f"{pre},{the.Last}"
print(tag, f": {(time() - start) / repeats:.2f} secs")
somes += [stats.SOME(result, tag)]
stats.report(somes, 0.01)
# Check if it's a dataset path or 'low'/'high' option
# if option.endswith('.csv'):
# data_list = [option]
# elif option == 'low':
# data_list = Low_dim_data_list
# elif option == 'high':
# data_list = High_dim_data_list
# else:
# raise ValueError("Invalid option. Choose 'low', 'high', or a valid dataset path.")
# for data in data_list:
# print(f"THE DATA {data} ----")
# for N in (20, 30, 40, 50):
# somes = []
# d = DATA().adds(csv(data))
# dumb = [guess(N, d) for _ in range(20)]
# dumb = [d.chebyshev(lst[0]) for lst in dumb]
# dumb.sort()
# the.Last = N
# smart = [d.shuffle().activeLearning() for _ in range(20)]
# smart = [d.chebyshev(lst[0]) for lst in smart]
# smart.sort()
# somes += [stats.SOME(dumb, f"dumb,{N}")]
# somes += [stats.SOME(smart, f"smart,{N}")]
# stats.report(somes)
# print("------")
def guess(N, d):
some = random.choices(d.rows, k=N)
return d.clone().adds(some).chebyshevs().rows
# Command-line argument parsing
if __name__ == "__main__":
the.cli()
parser = argparse.ArgumentParser(description="Process some datasets.")
parser.add_argument('-D', action='store_true', help="An optional flag for the.Dull cohen's D")
parser.add_argument('-t', '--dataset', type=str, help="Path to the dataset")
args = parser.parse_args()
branch_extend()