tolerance_python/umatolint.py at main · baileyarzate/tolerance_python · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import numpy as np
import pandas as pd
import scipy.stats as st

def length(x):
    if type(x) == int or type(x) == float:
        return 1
    else:
        return len(x)

def umatolint(x, N = None, n = None, dist = 'Bin', alpha = 0.05, P = 0.99):
    '''
Uniformly Most Accurate Upper Tolerance Limits for Certain Discrete
Distributions

Description
-----------
    Provides uniformly most accurate upper tolerance limits for the binomial,
    negative binomial, and Poisson distributions.

    umatolint(x, n = NULL, dist = ["Bin", "NegBin", "Pois"], N, alpha = 0.05,
              P = 0.99)

Parameters
----------
    x : float or list of floats
        A vector of data which is distributed according to one of the
        binomial, negative binomial, or Poisson distributions. If the
        length of x is 1,then it is assumed that this number is the sum of
        iid values from the assumed distribution.

    N : int, optional
        Must be specified for the binomial and negative binomial distributions.
        If dist = "Bin", then N is the number of Bernoulli trials and must be
        a positive integer. If dist = "NegBin", then N is the total number of
        successful trials (or dispersion parameter) and must be strictly
        positive. The default is None.

    n : int, optional
        The sample size of the data. If None, then n is calculated as the
        length of x. The default is None.

    dist : string, optional
        The distribution for the data given by x. The options are "Bin" for
        the binomial distribution, "NegBin" for the negative binomial
        distribution, and "Pois" for the Poisson distribution. The default is
        'Bin'.

    alpha : float, optional
        The level chosen such that 1-alpha is the confidence level. The
        default is 0.05.

    P : float, optional
        The proportion of the population to be covered by this tolerance
        interval. The default is 0.99.

Returns
-------
    umatolint returns a data frame with items:

        alpha:
            The specified significance level.

        P
            The proportion of the population covered by this tolerance
            interval.

        p.hat
            The maximum likelihood estimate for the probability of success in
            each trial; reported if dist = "Bin".

        nu.hat
            The maximum likelihood estimate for the probability of success in
            each trial; reported if dist = "NegBin".

        lambda.hat
            The maximum likelihood estimate for the rate of success; reported
            if dist = "Pois".

        1-sided.upper
            The 1-sided upper tolerance limit.

References
----------
    Derek S. Young (2010). tolerance: An R Package for Estimating Tolerance
        Intervals. Journal of Statistical Software, 36(5), 1-39.
        URL http://www.jstatsoft.org/v36/i05/.

    Zacks, S. (1970), Uniformly Most Accurate Tolerance Limits for Monotone
        Likelihood Ratio Families of Discrete Distributions, Journal of the
        American Statistical Association, 65, 307–316.

Examples
--------
    ## Examples from Zacks (1970).

        umatolint(25, n = 4, dist = "Bin", N = 10, alpha = 0.10,
           P = 0.95)

        umatolint(13, n = 10, dist = "NegBin", N = 2, alpha = 0.10,
           P = 0.95)

        umatolint(37, n = 10, dist = "Pois", alpha = 0.10, P = 0.95)
    '''
    if length(x) == 1 and n == None:
        return 'A value for n must be specified!'
    if length(x) > 1:
        n = len(x)
        y = sum(x)
    else:
        y = x
    if dist == 'Bin' or dist == 'NegBin':
        if N == None:
            print('N must be specfied for the binomial and negative binomial distribution.',
                    'For the Poisson distribution, N doesn\'t need to be specified.')
            return ''
    if dist == 'Bin':
        if y > 0:
            r0 = 1-st.beta.ppf(alpha, N*n-y+1,y)
            r1 = 1-st.beta.ppf(alpha, N*n-y,y+1)
            R = max(r0,r1)
        else:
            R = 1-alpha**(1/(N*n))
        def f2(k, P, N):
            return st.beta.ppf(1-P,k+1,N-k)
        k = -1
        temp = -1
        while temp < R:
            k = k+1
            if k < N:
                temp = f2(k=k,P=P,N=N)
            else:
                temp = 1
        phat = (y/n)/N
        if k < 1:
            print("upper is less than 1, error, results are invalid.")
        return pd.DataFrame({'alpha':[alpha],'P':[P],'p.hat':[phat],'1-sided.upper':k})
    if dist == 'NegBin':
        if y > 0:
            r1 = 1 - st.beta.ppf(alpha,n*N,y+1)
            r0 = 1 - st.beta.ppf(alpha,n*N,y)
            R = max(r0,r1)
        else:
            R = 1-alpha**(1/(n*N))
        k = -1
        temp = 1.1
        while temp > 1-R:
            k = k+1
            temp = st.beta.ppf(P,N,k+1)
        nuhat = N/(N+(y/n))
        return pd.DataFrame({'alpha':[alpha],'P':[P],'nu.hat':[nuhat],'1-sided.upper':k})
    if dist == 'Pois':
        if y > 0:
            r0 = st.chi2.ppf(1-alpha,2*y+2)/(2*n)
            r1 = st.chi2.ppf(1-alpha,2*y)/(2*n)
            R = 2*max(r0,r1)
        else:
            R = -(1/n)*np.log(alpha)
        k = -1
        temp = -1
        while temp < R:
            k = k+1
            temp = st.chi2.ppf(1-P,2*k+2)
        lambdahat = y/n
        return pd.DataFrame({'alpha':[alpha],'P':[P],'nu.hat':[lambdahat],'1-sided.upper':k})