backprop-R-stalingrad.vlad
(include "common-stalingrad")
;;; Representation for weights:
;;; list with one element for each layer following the input;
;;; each such list has one element for each unit in that layer;
;;; which consists of a bias, followed by the weights for each
;;; unit in the previous layer.
;;; Basic MLP
(define ((sum-activities activities) bias ws)
((reduce + bias) ((map2 *) ws activities)))
(define (sum-layer activities ws-layer)
((map (sum-activities activities)) ws-layer))
(define (sigmoid x) (/ 1 (+ (exp (- 0 x)) 1)))
(define ((forward-pass ws-layers) in)
(if (null? ws-layers)
in
((forward-pass (cdr ws-layers))
((map sigmoid) (sum-layer in (first ws-layers))))))
(define ((error-on-dataset dataset) ws-layers)
((reduce + 0)
((map (lambda ((list in target))
(* 0.5
(magnitude-squared (v- ((forward-pass ws-layers) in) target)))))
dataset)))
;;; Optimization of the sort used with MLPs and backpropagation,
;;; often called "vanilla backprop"
;;; Scaled structure subtraction
(define (s-k* x k y)
(cond ((real? x) (- x (* k y)))
((pair? x) (cons (s-k* (car x) k (car y))
(s-k* (cdr x) k (cdr y))))
(else x)))
;;; Vanilla gradient optimization.
;;; Gradient minimize f starting at w0 for n iterations via
;;; w(t+1) = w(t) - eta * grad_w f.
;;; returns the last f(w)
(define (vanilla f w0 n eta)
(let (((cons fw f-reverse) ((*j f) (*j w0))))
(if (zero? n)
(*j-inverse fw)
(vanilla f
(s-k* w0 eta (cdr (unsensitize (f-reverse (sensitize 1)))))
(- n 1)
eta))))
;;; Allow compiler to grok structure of sexpr but not the numbers at
;;; the leaves
(define (map-real x)
(cond ((real? x) (real x))
((pair? x) (cons (map-real (car x)) (map-real (cdr x))))
(else x)))
;;; XOR network
(define (xor-ws0)
(map-real '(((0 -0.284227 1.16054) (0 0.617194 1.30467))
((0 -0.084395 0.648461)))))
(define (xor-data)
'(((0 0) (0))
((0 1) (1))
((1 0) (1))
((1 1) (0))))
(write-real
(vanilla (error-on-dataset (xor-data)) (xor-ws0) (real 1000000) 0.3))
Generated by GNU enscript 1.6.4.