2/4/2021
import numpy as np
import matplotlib.pyplot as plt
np.set_printoptions(precision=2, suppress=True)
plt.rcParams.update({'font.size': 16})
# Setup the problem
N = 50
x = 2*np.random.rand(N)-1
a = np.array([0.5, -2, -3, 4, 6])
y = a[0] + a[1]*x + a[2]*x**2 + a[3]*x**3 + \
a[4]*x**4 + 0.25*np.random.randn(N)
# Construct X matrix
d = 5
X = np.zeros((N, d))
for j in range(d):
X[:,j] = x**j
# Initialize gradient descent
theta = np.zeros(d)
cost = np.zeros(1000)
XtX = np.dot( np.transpose(X), X)
# Gradient descent
for itr in range(1000):
dJ = np.dot(np.transpose(X), np.dot(X, theta)-y)
dd = dJ
alpha = np.dot(dJ, dd) / np.dot(np.dot(XtX, dd), dd)
theta = theta - alpha*dd
cost[itr] = np.linalg.norm(np.dot(X, theta)-y)**2/N
# Plotting
fig = plt.figure(figsize = (8,5))
plt.semilogx(cost,'o')
import numpy as np
import matplotlib.pyplot as plt
np.set_printoptions(precision=2, suppress=True)
plt.rcParams.update({'font.size': 22})
# Setup the problem
N = 50
x = 2*np.random.rand(N)-1
a = np.array([0.5, 0.25])
y = a[0] + a[1]*x + 0.25*np.random.randn(N)
# Construct X matrix
d = 2
X = np.zeros((N, d))
for j in range(d):
X[:,j] = x**j
# Setup the grid for visualization
xgrid = np.linspace(0.5-1, 0.5+1, 200)
ygrid = np.linspace(0.25-1,0.25+1,200)
theta0, theta1 = np.meshgrid(xgrid, ygrid)
cost = np.zeros((len(xgrid),len(ygrid)))
for i in range(len(xgrid)):
for j in range(len(ygrid)):
theta_tmp = np.hstack((theta0[i,j],theta1[i,j]))
cost[i,j] = np.log(np.linalg.norm(np.dot(X, theta_tmp)-y)**2/N)
# Initialize gradient descent
max_itr = 1000
theta = np.array([-0.5, -0.75])
XtX = np.dot( np.transpose(X), X)
theta_store = np.zeros((d,max_itr+1))
theta_store[0,0] = -0.5
theta_store[1,0] = -0.75
# Gradient descent
for itr in range(max_itr):
dJ = np.dot(np.transpose(X), np.dot(X, theta)-y)
dd = dJ
alpha = np.dot(dJ, dd) / np.dot(np.dot(XtX, dd), dd)
theta = theta - alpha*dd
theta_store[:,itr+1] = theta
# Plotting
fig = plt.figure(figsize = (6,6))
plt.contourf(xgrid, ygrid, cost)
for i in range(10):
plt.plot(theta_store[0,i:i+2], theta_store[1,i:i+2], c='w',linewidth=2)
import numpy as np
import matplotlib.pyplot as plt
np.set_printoptions(precision=2, suppress=True)
plt.rcParams.update({'font.size': 22})
# Setup the problem
N = 50
x = 2*np.random.rand(N)-1
a = np.array([0.5, -2, -3, 4, 6])
y = a[0] + a[1]*x + a[2]*x**2 + a[3]*x**3 + \
a[4]*x**4 + 0.1*np.random.randn(N)
# Construct X matrix
d = 5
X = np.zeros((N, d))
for j in range(d):
X[:,j] = x**j
theta = np.zeros(d)
cost = np.zeros(1000)
XtX = np.dot( np.transpose(X), X )
dJ_old = np.zeros(d)
beta = 0.9
for itr in range(1000):
dJ = np.dot(np.transpose(X), np.dot(X, theta)-y)
dd = beta*dJ_old + (1-beta)*dJ
alpha = np.dot(dJ, dd) / np.dot(np.dot(XtX, dd), dd)
theta = theta - alpha*dd
dJ_old = dJ
cost[itr] = np.linalg.norm(np.dot(X, theta)-y)**2/N
# Plotting
fig = plt.figure(figsize = (8,5))
plt.semilogx(cost,'o')
import numpy as np
import matplotlib.pyplot as plt
np.set_printoptions(precision=2, suppress=True)
plt.rcParams.update({'font.size': 22})
# Setup the problem
N = 50
x = 2*np.random.rand(N)-1
a = np.array([0.5, 0.25])
y = a[0] + a[1]*x + 0.25*np.random.randn(N)
# Construct X matrix
d = 2
X = np.zeros((N, d))
for j in range(d):
X[:,j] = x**j
# Setup the grid for visualization
xgrid = np.linspace(0.5-1, 0.5+1, 100)
ygrid = np.linspace(0.25-1,0.25+1,100)
theta0, theta1 = np.meshgrid(xgrid, ygrid)
cost = np.zeros((len(xgrid),len(ygrid)))
for i in range(len(xgrid)):
for j in range(len(ygrid)):
theta_tmp = np.hstack((theta0[i,j],theta1[i,j]))
cost[i,j] = np.log(np.linalg.norm(np.dot(X, theta_tmp)-y)**2/N)
# Initialize momentum
max_itr = 1000
theta = np.array([-0.5, -0.75])
XtX = np.dot( np.transpose(X), X)
dJ_old = np.zeros(d)
theta_store = np.zeros((d,max_itr+1))
theta_store[0,0] = -0.5
theta_store[1,0] = -0.75
beta = 0.9
# momentum
for itr in range(max_itr):
dJ = np.dot(np.transpose(X), np.dot(X, theta)-y)
dd = beta*dJ_old + dJ
alpha = np.dot(dJ, dd) / np.dot(np.dot(XtX, dd), dd)
theta = theta - alpha*dd
dJ_old = dJ
theta_store[:,itr+1] = theta
# Plotting
fig = plt.figure(figsize = (8,5))
plt.contourf(xgrid, ygrid, cost)
plt.axis('square')
for i in range(10):
plt.plot(theta_store[0,i:i+2], theta_store[1,i:i+2], c='w',linewidth=2)
import numpy as np
import matplotlib.pyplot as plt
np.set_printoptions(precision=2, suppress=True)
plt.rcParams.update({'font.size': 16})
# Setup the problem
N = 5000
x = 2*np.random.rand(N)-1
a = np.array([0.5, -2, -3, 4, 6])
y = a[0] + a[1]*x + a[2]*x**2 + a[3]*x**3 + \
a[4]*x**4 + 0.25*np.random.randn(N)
# Construct X matrix
d = 5
X = np.zeros((N, d))
for j in range(d):
X[:,j] = x**j
# Initialize gradient descent
theta = np.zeros(d)
cost = np.zeros(1000)
# Gradient descent
B = 50
for itr in range(1000):
idx = np.random.randint(N, size=B)
Xsub = X[idx,:]
ysub = y[idx]
XtX = np.dot( np.transpose(Xsub), Xsub)
dJ = np.dot(np.transpose(Xsub), np.dot(Xsub, theta)-ysub)
dd = dJ
alpha = np.dot(dJ, dd) / np.dot(np.dot(XtX, dd), dd)
theta = theta - alpha*dd
cost[itr] = np.linalg.norm(np.dot(X, theta)-y)**2/N
# Plotting
fig = plt.figure(figsize = (8,5))
plt.semilogx(cost,'o')
import numpy as np
import matplotlib.pyplot as plt
np.set_printoptions(precision=2, suppress=True)
plt.rcParams.update({'font.size': 22})
# Setup the problem
N = 50
x = 2*np.random.rand(N)-1
a = np.array([0.5, 0.25])
y = a[0] + a[1]*x + 0.25*np.random.randn(N)
# Construct X matrix
d = 2
X = np.zeros((N, d))
for j in range(d):
X[:,j] = x**j
# Setup the grid for visualization
xgrid = np.linspace(0.5-1, 0.5+1, 100)
ygrid = np.linspace(0.25-1,0.25+1,100)
theta0, theta1 = np.meshgrid(xgrid, ygrid)
cost = np.zeros((len(xgrid),len(ygrid)))
for i in range(len(xgrid)):
for j in range(len(ygrid)):
theta_tmp = np.hstack((theta0[i,j],theta1[i,j]))
cost[i,j] = np.log(np.linalg.norm(np.dot(X, theta_tmp)-y)**2/N)
# Initialize stochastic gradient descent
max_itr = 1000
theta = np.array([-0.5, -0.75])
XtX = np.dot( np.transpose(X), X)
dJ_old = np.zeros(d)
theta_store = np.zeros((d,max_itr+1))
theta_store[0,0] = -0.5
theta_store[1,0] = -0.75
B = 50
# Stochastic Gradient descent
for itr in range(max_itr):
idx = np.random.randint(N, size=B)
Xsub = X[idx,:]
ysub = y[idx]
XtX = np.dot( np.transpose(Xsub), Xsub)
dJ = np.dot(np.transpose(Xsub), np.dot(Xsub, theta)-ysub)
dd = dJ
alpha = np.dot(dJ, dd) / np.dot(np.dot(XtX, dd), dd)
theta = theta - alpha*dd
theta_store[:,itr+1] = theta
# Plotting
fig = plt.figure(figsize = (8,5))
plt.contourf(xgrid, ygrid, cost)
plt.axis('square')
for i in range(10):
plt.plot(theta_store[0,i:i+2], theta_store[1,i:i+2], c='w',linewidth=2)
%%shell
jupyter nbconvert --to html /content/ECE595_lecture05_gradient.ipynb