backprop-Fs-stalingrad.vlad

(include "common-stalingrad")

;;; Representation for weights:
;;;  list with one element for each layer following the input;
;;;  each such list has one element for each unit in that layer;
;;;  which consists of a bias, followed by the weights for each
;;;  unit in the previous layer.

;;; Basic MLP

(define ((sum-activities activities) bias ws)
 ((reduce + bias) ((map2 *) ws activities)))

(define (sum-layer activities ws-layer)
 ((map (sum-activities activities)) ws-layer))

(define (sigmoid x) (/ 1 (+ (exp (- 0 x)) 1)))

(define ((forward-pass ws-layers) in)
 (if (null? ws-layers)
     in
     ((forward-pass (cdr ws-layers))
      ((map sigmoid) (sum-layer in (first ws-layers))))))

(define ((error-on-dataset dataset) ws-layers)
 ((reduce + 0)
  ((map (lambda ((list in target))
	 (* 0.5
	    (magnitude-squared (v- ((forward-pass ws-layers) in) target)))))
   dataset)))

;;; Optimization of the sort used with MLPs and backpropagation,
;;; often called "vanilla backprop"

;;; Scaled structure subtraction

(define (s-k* x k y)
 (cond ((real? x) (- x (* k y)))
       ((pair? x) (cons (s-k* (car x) k (car y))
			(s-k* (cdr x) k (cdr y))))
       (else x)))

;;; Vanilla gradient optimization.
;;; Gradient minimize f starting at w0 for n iterations via
;;; w(t+1) = w(t) - eta * grad_w f.
;;; returns the last f(w)

(define (weight-e ws l u w)
 ((map-n
   (lambda (li)
    (let ((ll (list-ref ws li)))
     ((map-n (lambda (ui)
	      ((map-n (lambda (wi) (if (and (= li l) (= ui u) (= wi w)) 1 0)))
	       (length (list-ref ll ui)))))
      (length ll)))))
  (length ws)))

(define ((weight-gradient f) ws)
 ((map-n
   (lambda (li)
    (let ((ll (list-ref ws li)))
     ((map-n
       (lambda (ui)
	((map-n (lambda (wi)
		 (unperturb
		  (tangent
		   ((j* f) (bundle ws (perturb (weight-e ws li ui wi))))))))
	 (length (list-ref ll ui)))))
      (length ll)))))
  (length ws)))

(define (vanilla f w0 n eta)
 (if (zero? n)
     (f w0)
     (vanilla f (s-k* w0 eta ((weight-gradient f) w0)) (- n 1) eta)))

;;; Allow compiler to grok structure of sexpr but not the numbers at
;;; the leaves

(define (map-real x)
 (cond ((real? x) (real x))
       ((pair? x) (cons (map-real (car x)) (map-real (cdr x))))
       (else x)))

;;; XOR network

(define (xor-ws0)
 (map-real '(((0 -0.284227 1.16054) (0 0.617194 1.30467))
	     ((0 -0.084395 0.648461)))))

(define (xor-data)
 '(((0 0) (0))
   ((0 1) (1))
   ((1 0) (1))
   ((1 1) (0))))

(write-real
 (vanilla (error-on-dataset (xor-data)) (xor-ws0) (real 1000000) 0.3))

Generated by GNU enscript 1.6.4.