Lowess Method

In [1]:
# read the data 
from read_data_fx import read_tb_dataTA
data=read_tb_dataTA(1,1)  # this data is given in table 1.1 in the book
X=data[:,0] # Lot size
Y=data[:,1] #Work Hours   

Scatter plot and Lowess Curve

In [2]:
import numpy as np
import matplotlib.pyplot as plt 
%matplotlib inline 

plt.figure(figsize=(10,8))
plt.plot(X,Y,'bo',ms=20) 
plt.xlim(0,160);
plt.ylim(90,600);
plt.xlabel('Lot size',fontsize=20)
plt.ylabel('Hours',fontsize=20)

# how to import lowess function 

import statsmodels.api as sm
lowess = sm.nonparametric.lowess
Z = lowess(Y,X,frac=0.5,it=2)

# farc = neighborhood size, it= number of iterations 
# See http://statsmodels.sourceforge.net/devel/generated/statsmodels.nonparametric.smoothers_lowess.lowess.html
# for further description 


plt.plot(Z[:,0],Z[:,1],'g-',lw=5);

Confidence bands and Lowess Curve

In [3]:
def lin_reg(X,Y):
    barX=np.mean(X); barY=np.mean(Y)
    XminusbarX=X-barX; YminusbarY=Y-barY
    b1=sum(XminusbarX*YminusbarY)/sum(XminusbarX**2)
    b0=barY-b1*barX
    Yhat=b0+b1*X
    e_i=Y-Yhat
    sse=np.sum(e_i**2)
    ssr=np.sum((Yhat-barY )**2)
    
    return {'Yhat':Yhat,'b0':b0,'b1':b1,'e_i': e_i,'sse':sse,'ssr':ssr}




# confidence band 
from scipy import stats
n=len(X)
linreg=lin_reg(X,Y)
Yhat=linreg['Yhat'];
sse=linreg['sse']; 
MSE=sse/np.float(n-2)
barX=np.mean(X)
XminusbarX=X-barX



s_of_yh_hat=np.sqrt(MSE*(1.0/n+(X-barX)**2/sum(XminusbarX**2)))
W=np.sqrt(2.0*stats.f.ppf(0.95,2,n-2))
cb_upper=Yhat+W*s_of_yh_hat
cb_lower=Yhat-W*s_of_yh_hat
In [4]:
import matplotlib.pyplot as plt 
%matplotlib inline 

plt.figure(figsize=(10,8))
plt.plot(X,Y,'bo',ms=20) 
plt.xlim(0,160);
plt.ylim(90,600);


# X values are not sorted in this example, sort it to plot the confidence band properly  
idx=np.argsort(X)

plt.plot(X[idx],cb_lower[idx],'k--',linewidth=3)
plt.plot(X[idx],cb_upper[idx],'k--',linewidth=3);
plt.plot(Z[:,0],Z[:,1],'g-',lw=5);