-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstats_cli.py
More file actions
100 lines (81 loc) · 3.85 KB
/
stats_cli.py
File metadata and controls
100 lines (81 loc) · 3.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import argparse
from Core_stats import *
import sys
import csv
def read_data_from_file(file_path):
"""Read numeric data from a CSV file"""
try:
with open(file_path, 'r') as file:
reader = csv.reader(file)
data = [[float(num) for num in row if num.strip()] for row in reader if row]
return data
except FileNotFoundError:
print(f"Error: File {file_path} not found")
sys.exit(1)
except ValueError:
print("Error: File must contain only numeric values")
sys.exit(1)
def main():
parser = argparse.ArgumentParser(description='Statistical Analysis Tool')
parser.add_argument('operation', choices=[
'basic', 'ttest', 'anova', 'correlation', 'ztest'
], help='Type of statistical analysis to perform')
parser.add_argument('--file', '-f', help='Input CSV file path')
parser.add_argument('--groups', '-g', nargs='+', help='Multiple input files for group comparison')
parser.add_argument('--alpha', '-a', type=float, default=0.05, help='Significance level (default: 0.05)')
args = parser.parse_args()
try:
if args.operation == 'basic':
if not args.file:
print("Error: Please provide an input file")
sys.exit(1)
data = read_data_from_file(args.file)[0] # Assuming single column
print("\nBasic Statistics:")
print(f"Mean: {mean(data)}")
print(f"Standard Deviation: {std_dev_samp(data)}")
print(f"Variance: {var_sample(data)}")
elif args.operation == 'ttest':
if not args.groups or len(args.groups) != 2:
print("Error: Please provide exactly two input files for t-test")
sys.exit(1)
group1 = read_data_from_file(args.groups[0])[0]
group2 = read_data_from_file(args.groups[1])[0]
print("\nIndependent T-Test Results:")
independent_ttest(group1, group2,
label1=f"Group 1 ({args.groups[0]})",
label2=f"Group 2 ({args.groups[1]})")
elif args.operation == 'anova':
if not args.groups or len(args.groups) < 2:
print("Error: Please provide at least two input files for ANOVA")
sys.exit(1)
groups = [read_data_from_file(file)[0] for file in args.groups]
print("\nANOVA Results:")
print(f"F-statistic: {F(groups)}")
print(f"p-value: {calculate_pvalue(groups)}")
print(f"Effect size (partial eta squared): {partial_eta_squared(groups)}")
if calculate_pvalue(groups) < args.alpha:
print("\nPost-hoc Tukey HSD Results:")
results = perform_tukey_hsd(groups, alpha=args.alpha)
if isinstance(results, str):
print(results)
else:
for result in results:
print("\n".join(result))
elif args.operation == 'correlation':
if not args.file:
print("Error: Please provide an input file with two columns")
sys.exit(1)
data = read_data_from_file(args.file)
if len(data) < 2:
print("Error: File must contain two columns for correlation")
sys.exit(1)
x = data[0]
y = data[1]
print("\nCorrelation Results:")
print(f"Pearson correlation: {pCorrelation(x, y)}")
print(f"Effect size (r-squared): {effect_size(x, y)}")
except (StatisticalError, ValueError) as e:
print(f"Error: {str(e)}")
sys.exit(1)
if __name__ == "__main__":
main()