StatisticalToolbox/stats_cli.py at main · RoseRahimi/StatisticalToolbox · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import argparse
from Core_stats import *
import sys
import csv

def read_data_from_file(file_path):
    """Read numeric data from a CSV file"""
    try:
        with open(file_path, 'r') as file:
            reader = csv.reader(file)
            data = [[float(num) for num in row if num.strip()] for row in reader if row]
            return data
    except FileNotFoundError:
        print(f"Error: File {file_path} not found")
        sys.exit(1)
    except ValueError:
        print("Error: File must contain only numeric values")
        sys.exit(1)

def main():
    parser = argparse.ArgumentParser(description='Statistical Analysis Tool')
    parser.add_argument('operation', choices=[
        'basic', 'ttest', 'anova', 'correlation', 'ztest'
    ], help='Type of statistical analysis to perform')

    parser.add_argument('--file', '-f', help='Input CSV file path')
    parser.add_argument('--groups', '-g', nargs='+', help='Multiple input files for group comparison')
    parser.add_argument('--alpha', '-a', type=float, default=0.05, help='Significance level (default: 0.05)')

    args = parser.parse_args()

    try:
        if args.operation == 'basic':
            if not args.file:
                print("Error: Please provide an input file")
                sys.exit(1)

            data = read_data_from_file(args.file)[0]  # Assuming single column
            print("\nBasic Statistics:")
            print(f"Mean: {mean(data)}")
            print(f"Standard Deviation: {std_dev_samp(data)}")
            print(f"Variance: {var_sample(data)}")

        elif args.operation == 'ttest':
            if not args.groups or len(args.groups) != 2:
                print("Error: Please provide exactly two input files for t-test")
                sys.exit(1)

            group1 = read_data_from_file(args.groups[0])[0]
            group2 = read_data_from_file(args.groups[1])[0]

            print("\nIndependent T-Test Results:")
            independent_ttest(group1, group2,
                            label1=f"Group 1 ({args.groups[0]})",
                            label2=f"Group 2 ({args.groups[1]})")

        elif args.operation == 'anova':
            if not args.groups or len(args.groups) < 2:
                print("Error: Please provide at least two input files for ANOVA")
                sys.exit(1)

            groups = [read_data_from_file(file)[0] for file in args.groups]

            print("\nANOVA Results:")
            print(f"F-statistic: {F(groups)}")
            print(f"p-value: {calculate_pvalue(groups)}")
            print(f"Effect size (partial eta squared): {partial_eta_squared(groups)}")

            if calculate_pvalue(groups) < args.alpha:
                print("\nPost-hoc Tukey HSD Results:")
                results = perform_tukey_hsd(groups, alpha=args.alpha)
                if isinstance(results, str):
                    print(results)
                else:
                    for result in results:
                        print("\n".join(result))

        elif args.operation == 'correlation':
            if not args.file:
                print("Error: Please provide an input file with two columns")
                sys.exit(1)

            data = read_data_from_file(args.file)
            if len(data) < 2:
                print("Error: File must contain two columns for correlation")
                sys.exit(1)

            x = data[0]
            y = data[1]

            print("\nCorrelation Results:")
            print(f"Pearson correlation: {pCorrelation(x, y)}")
            print(f"Effect size (r-squared): {effect_size(x, y)}")

    except (StatisticalError, ValueError) as e:
        print(f"Error: {str(e)}")
        sys.exit(1)

if __name__ == "__main__":
    main()