Logistic regression demo

In [28]:
# Example 1: Baseline logistic regression
import numpy as np
import matplotlib.pyplot as plt
import cvxpy as cvx
import scipy.stats as stats
np.set_printoptions(precision=4, suppress=True)

# Generate data
mu0 = 0
mu1 = 10
sigma0 = 3
sigma1 = 2

N0 = 25
N1 = 25
N  = N0+N1

x0 = stats.norm.rvs(mu0,sigma0,N0)
x1 = stats.norm.rvs(mu1,sigma1,N1)
y0 = np.zeros(N0)
y1 = np.ones(N1)


# Formulate logistic regression
x = np.hstack((x0,x1))
y = np.hstack((y0,y1)).reshape((N,1))
X = np.vstack((x, np.ones(N))).T

lambd       = 0.0001

#==== CVX =====#
theta       = cvx.Variable((2,1))
loss        = - cvx.sum(cvx.multiply(y, X @ theta)) \
              + cvx.sum(cvx.log_sum_exp( cvx.hstack([np.zeros((N,1)), X @ theta]), axis=1 ) )
reg         = cvx.sum_squares(theta)
prob        = cvx.Problem(cvx.Minimize(loss/N + lambd*reg))
prob.solve()
w = theta.value
#==============#


# Display
xs = np.linspace(-10,20,1000)
ys = 1/(1+np.exp(-(w[0]*xs + w[1])))
plt.plot(x,y,'o',markersize=10)
plt.plot(xs,ys,linewidth=6)
Out[28]:
[<matplotlib.lines.Line2D at 0x7f717e323150>]
In [25]:
# Example 2: If there are outliers

import numpy as np
import matplotlib.pyplot as plt
import cvxpy as cvx
import scipy.stats as stats
np.set_printoptions(precision=4, suppress=True)

# Generate data
mu0 = 0
mu1 = 10
sigma0 = 3
sigma1 = 2

N0 = 25
N1 = 25
N  = N0+N1

x0 = stats.norm.rvs(mu0,sigma0,N0)
x1 = stats.norm.rvs(mu1,sigma1,N1)
y0 = np.zeros(N0)
y1 = np.ones(N1)

# outliers
x0[0] = -8
x0[1] = -15
x1[0] = 20
x1[1] = 25


# Formulate logistic regression
x = np.hstack((x0,x1))
y = np.hstack((y0,y1)).reshape((N,1))
X = np.vstack((x, np.ones(N))).T

lambd       = 0.0001
#==== CVX =====#
theta       = cvx.Variable((2,1))
loss        = - cvx.sum(cvx.multiply(y, X @ theta)) \
              + cvx.sum(cvx.log_sum_exp( cvx.hstack([np.zeros((N,1)), X @ theta]), axis=1 ) )
reg         = cvx.sum_squares(theta)
prob        = cvx.Problem(cvx.Minimize(loss/N + lambd*reg))
prob.solve()
w = theta.value
#==============#


# Display
xs = np.linspace(-20,25,1000)
ys = 1/(1+np.exp(-(w[0]*xs + w[1])))
zs = (np.sign(ys-0.5)+1)/2
plt.plot(x,y,'o',markersize=10)
plt.plot(xs,ys,linewidth=6)
plt.plot(xs,zs,linewidth=6)
plt.axis([-20,25,-0.2,1.2])
Out[25]:
(-20.0, 25.0, -0.2, 1.2)
In [27]:
# Example 3: Compare with linear regression

lambd       = 0.00001
#==== CVX =====#
theta       = cvx.Variable((2,1))
loss        = cvx.sum_squares( X@theta - y)
reg         = cvx.sum_squares(theta)
prob        = cvx.Problem(cvx.Minimize(loss/N + lambd*reg))
prob.solve()
w = theta.value
#==============#

# Display
xs = np.linspace(-20,25,1000)
ys = xs*w[0] + w[1]
zs = (np.sign(ys-0.5)+1)/2
plt.plot(x,y,'o',markersize=10)
plt.plot(xs,ys,linewidth=6)
plt.plot(xs,zs,linewidth=6)
plt.axis([-20,25,-0.2,1.2])
Out[27]:
(-20.0, 25.0, -0.2, 1.2)
In [ ]:
%%shell
jupyter nbconvert --to html /content/ECE595_lecture11.ipynb