# Example 1: Baseline logistic regression
import numpy as np
import matplotlib.pyplot as plt
import cvxpy as cvx
import scipy.stats as stats
np.set_printoptions(precision=4, suppress=True)
# Generate data
mu0 = 0
mu1 = 10
sigma0 = 3
sigma1 = 2
N0 = 25
N1 = 25
N = N0+N1
x0 = stats.norm.rvs(mu0,sigma0,N0)
x1 = stats.norm.rvs(mu1,sigma1,N1)
y0 = np.zeros(N0)
y1 = np.ones(N1)
# Formulate logistic regression
x = np.hstack((x0,x1))
y = np.hstack((y0,y1)).reshape((N,1))
X = np.vstack((x, np.ones(N))).T
lambd = 0.0001
#==== CVX =====#
theta = cvx.Variable((2,1))
loss = - cvx.sum(cvx.multiply(y, X @ theta)) \
+ cvx.sum(cvx.log_sum_exp( cvx.hstack([np.zeros((N,1)), X @ theta]), axis=1 ) )
reg = cvx.sum_squares(theta)
prob = cvx.Problem(cvx.Minimize(loss/N + lambd*reg))
prob.solve()
w = theta.value
#==============#
# Display
xs = np.linspace(-10,20,1000)
ys = 1/(1+np.exp(-(w[0]*xs + w[1])))
plt.plot(x,y,'o',markersize=10)
plt.plot(xs,ys,linewidth=6)
# Example 2: If there are outliers
import numpy as np
import matplotlib.pyplot as plt
import cvxpy as cvx
import scipy.stats as stats
np.set_printoptions(precision=4, suppress=True)
# Generate data
mu0 = 0
mu1 = 10
sigma0 = 3
sigma1 = 2
N0 = 25
N1 = 25
N = N0+N1
x0 = stats.norm.rvs(mu0,sigma0,N0)
x1 = stats.norm.rvs(mu1,sigma1,N1)
y0 = np.zeros(N0)
y1 = np.ones(N1)
# outliers
x0[0] = -8
x0[1] = -15
x1[0] = 20
x1[1] = 25
# Formulate logistic regression
x = np.hstack((x0,x1))
y = np.hstack((y0,y1)).reshape((N,1))
X = np.vstack((x, np.ones(N))).T
lambd = 0.0001
#==== CVX =====#
theta = cvx.Variable((2,1))
loss = - cvx.sum(cvx.multiply(y, X @ theta)) \
+ cvx.sum(cvx.log_sum_exp( cvx.hstack([np.zeros((N,1)), X @ theta]), axis=1 ) )
reg = cvx.sum_squares(theta)
prob = cvx.Problem(cvx.Minimize(loss/N + lambd*reg))
prob.solve()
w = theta.value
#==============#
# Display
xs = np.linspace(-20,25,1000)
ys = 1/(1+np.exp(-(w[0]*xs + w[1])))
zs = (np.sign(ys-0.5)+1)/2
plt.plot(x,y,'o',markersize=10)
plt.plot(xs,ys,linewidth=6)
plt.plot(xs,zs,linewidth=6)
plt.axis([-20,25,-0.2,1.2])
# Example 3: Compare with linear regression
lambd = 0.00001
#==== CVX =====#
theta = cvx.Variable((2,1))
loss = cvx.sum_squares( X@theta - y)
reg = cvx.sum_squares(theta)
prob = cvx.Problem(cvx.Minimize(loss/N + lambd*reg))
prob.solve()
w = theta.value
#==============#
# Display
xs = np.linspace(-20,25,1000)
ys = xs*w[0] + w[1]
zs = (np.sign(ys-0.5)+1)/2
plt.plot(x,y,'o',markersize=10)
plt.plot(xs,ys,linewidth=6)
plt.plot(xs,zs,linewidth=6)
plt.axis([-20,25,-0.2,1.2])
%%shell
jupyter nbconvert --to html /content/ECE595_lecture11.ipynb