Lecture 03 Ridge and LASSO Regression

Jan 28, 2021

Ridge Regression

In [80]:
# Standard linear regression
import numpy as np
import matplotlib.pyplot as plt
from scipy.special import eval_legendre
np.set_printoptions(precision=2, suppress=True)

N = 50
x = np.sort(2*np.random.rand(N)-1)
a = np.array([-0.001, 0.01, 0.55, 1.5, 1.2])
y = a[0]*eval_legendre(0,x) + a[1]*eval_legendre(1,x) + \
    a[2]*eval_legendre(2,x) + a[3]*eval_legendre(3,x) + \
    a[4]*eval_legendre(4,x) + 0.2*np.random.randn(N)

P = 30
X = np.zeros((N, P))
for p in range(P):
  X[:,p] = eval_legendre(p,x)
beta = np.linalg.lstsq(X, y, rcond=None)[0]

t    = np.linspace(-1, 1, 500)
Xhat = np.zeros((500,P))
for p in range(P):
  Xhat[:,p] = eval_legendre(p,t)
yhat = np.dot(Xhat, beta)  
plt.plot(x,y,'o',markersize=12)
plt.plot(t,yhat, linewidth=4)
plt.axis((-1, 1, -3, 3));
plt.show()
print(beta)
[ -81790.08 -243603.86 -399906.37 -546593.01 -678927.28 -791988.2
 -880736.7  -941059.49 -969926.3  -966507.33 -931918.46 -869880.99
 -785752.24 -686590.53 -579715.01 -472523.56 -371134.81 -280444.26
 -203302.49 -141038.51  -93221.11  -58473.89  -34553.15  -19108.2
   -9755.02   -4541.79   -1870.42    -661.19    -180.97     -33.27]
In [81]:
# Regularized linear regression
import numpy as np
import matplotlib.pyplot as plt
from scipy.special import eval_legendre
np.set_printoptions(precision=2, suppress=True)

N = 50
x = np.sort(2*np.random.rand(N)-1)
a = np.array([-0.001, 0.01, 0.55, 1.5, 1.2])
y = a[0]*eval_legendre(0,x) + a[1]*eval_legendre(1,x) + \
    a[2]*eval_legendre(2,x) + a[3]*eval_legendre(3,x) + \
    a[4]*eval_legendre(4,x) + 0.2*np.random.randn(N)

P = 30
X = np.zeros((N, P))
for p in range(P):
  X[:,p] = eval_legendre(p,x)

lambd = 0.8
A = np.vstack((X, np.sqrt(lambd)*np.eye(P)))
b = np.hstack((y, np.zeros(P)))
beta = np.linalg.lstsq(A, b, rcond=None)[0]

t    = np.linspace(-1, 1, 500)
Xhat = np.zeros((500,P))
for p in range(P):
  Xhat[:,p] = eval_legendre(p,t)
yhat = np.dot(Xhat, beta)  

plt.plot(x,y,'o',markersize=12)
plt.plot(t,yhat, linewidth=4)
plt.show()
print(beta)
[ 0.   -0.    0.55  1.34  1.11  0.18  0.05 -0.03 -0.04 -0.02 -0.11 -0.04
  0.11 -0.02  0.07  0.23  0.14 -0.11  0.    0.14 -0.07 -0.05 -0.03 -0.15
 -0.01 -0.06 -0.17  0.13  0.05 -0.06]

LASSO regression

In [79]:
# LASSO linear regression
import numpy as np
import matplotlib.pyplot as plt
from scipy.special import eval_legendre
import cvxpy as cvx
np.set_printoptions(precision=2, suppress=True)

N = 50
x = np.sort(2*np.random.rand(N)-1)
a = np.array([-0.001, 0.01, 0.55, 1.5, 1.2])
y = a[0]*eval_legendre(0,x) + a[1]*eval_legendre(1,x) + \
    a[2]*eval_legendre(2,x) + a[3]*eval_legendre(3,x) + \
    a[4]*eval_legendre(4,x) + 0.2*np.random.randn(N)
ysum = sum(np.abs(a))

P = 30
X = np.zeros((N, P))
for p in range(P):
  X[:,p] = eval_legendre(p,x)

# ===== Call CVX ==========
theta = cvx.Variable(P)
objective   = cvx.Minimize( cvx.sum_squares(X*theta-y) )
constraints = [cvx.norm1(theta) <= ysum]
prob        = cvx.Problem(objective, constraints)
prob.solve()
beta = theta.value
# =========================

t    = np.linspace(-1, 1, 500)
Xhat = np.zeros((500,P))
for p in range(P):
  Xhat[:,p] = eval_legendre(p,t)
yhat = np.dot(Xhat, beta)  
plt.plot(x,y,'o',markersize=12)
plt.plot(t,yhat, linewidth=4)
plt.axis((-1, 1, -3, 3));
plt.show()
print(beta)
[ 0.   -0.    0.57  1.52  1.07  0.03  0.   -0.   -0.   -0.   -0.   -0.
  0.   -0.    0.   -0.    0.   -0.    0.   -0.    0.   -0.    0.   -0.
  0.   -0.    0.   -0.08  0.   -0.  ]
In [ ]:
%%shell
jupyter nbconvert --to html /content/ECE595_lecture03_lasso.ipynb