-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy paths12_13_regression.py
More file actions
100 lines (65 loc) · 1.95 KB
/
s12_13_regression.py
File metadata and controls
100 lines (65 loc) · 1.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# -*- coding: utf-8 -*-
"""S12_13 - Regression.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1lbzLHW8Wl8-VABAHH-a53ddofxkiJo0J
"""
import numpy as np
x = np.random.normal(20,size=100)
e = np.random.normal(0,size=100)
y = 3 * x + 5 + e
import matplotlib.pyplot as plt
plt.plot(x,y,"o")
plt.xlim([0, 25])
plt.ylim([0,75])
plt.show()
"""**OLS**: Math formula
Solving the least squares problem analytically, we get:
$\hat{𝛃}=(X'X)^{-1}(X'y)$
"""
X =np.column_stack((np.ones(100),x)) # or np.vstack((np.ones(100),x)).T
a1 =np.matmul(np.transpose(X),X)
a1inv = np.linalg.inv(a1)
a2 = np.matmul(np.transpose(X),y)
beta = np.matmul(a1inv,a2)
#short form:
beta = (X.T @ X) @ (X.T @ y)
print(beta)
"""**OLS**: optimize (least squares)"""
def sqerr(b):
return np.sum((y-b[0]-b[1]*x)**2)
from scipy.optimize import minimize
mymin =minimize(sqerr,np.array([1,1]))
print(mymin)
"""**MLE**"""
def negloglik(b):
return -(-100/2*np.log(2*np.pi)-100/2*np.log(b[2]**2)-1/(2*b[2]**2)*np.sum((y-b[0]-b[1]*x)**2))
myminll = minimize(negloglik,np.array([1,1,1]))
print(myminll)
import statsmodels.api as sm
import statsmodels.formula.api as smf
import statsmodels.graphics.api as smg
import patsy
import matplotlib.pyplot as plt
import numpy as np
"""**OLS** using `numpy.linalg.lstsq`"""
beta, res, rank, sval = np.linalg.lstsq(X,y)
print(beta)
"""**OLS** using `statsmodels`: """
model = sm.OLS(y,X)
result = model.fit()
print(result.params)
print(result.llf) # log-likelihood function value
print(result.rsquared)
print(result.summary2()) # or summary
"""Use names for variables"""
data={"dep":y,"exp":x}
fy, fX = patsy.dmatrices("dep ~ 1 + exp",data)
model = sm.OLS(fy,fX)
result = model.fit()
print(result.summary())
"""**OLS**: Using formula in `statsmodels`
"""
model = smf.ols("dep ~ 1 + exp",data) # data is a dict here, it can be a DataFrame
result = model.fit()
print(result.summary())