;;; Commentary:
-;;; Lm is a relatively non-participatory game in which a robot
-;;; attempts to maneuver towards a tree at the center of the window
-;;; based on unique olfactory cues from each of the 4 directions. If
-;;; the smell of the tree increases, then the weights in the robot's
-;;; brain are adjusted to encourage this odor-driven behavior in the
-;;; future. If the smell of the tree decreases, the robots weights are
-;;; adjusted to discourage a correct move.
-
-;;; In laymen's terms, the search space is initially flat. The point
-;;; of training is to "turn up the edges of the search space" so that
-;;; the robot rolls toward the center.
-
-;;; Further, do not become alarmed if the robot appears to oscillate
-;;; back and forth between two or a few positions. This simply means
-;;; it is currently caught in a local minimum and is doing its best to
-;;; work its way out.
-
-;;; The version of this program as described has a small problem. a
-;;; move in a net direction can produce gross credit assignment. for
-;;; example, if moving south will produce positive payoff, then, if in
-;;; a single move, one moves east,west and south, then both east and
-;;; west will be improved when they shouldn't
-
-;;; Many thanks to Yuri Pryadkin (yuri@rana.usc.edu) for this
-;;; concise problem description.
+;; Lm is a relatively non-participatory game in which a robot
+;; attempts to maneuver towards a tree at the center of the window
+;; based on unique olfactory cues from each of the 4 directions. If
+;; the smell of the tree increases, then the weights in the robot's
+;; brain are adjusted to encourage this odor-driven behavior in the
+;; future. If the smell of the tree decreases, the robots weights are
+;; adjusted to discourage a correct move.
+
+;; In laymen's terms, the search space is initially flat. The point
+;; of training is to "turn up the edges of the search space" so that
+;; the robot rolls toward the center.
+
+;; Further, do not become alarmed if the robot appears to oscillate
+;; back and forth between two or a few positions. This simply means
+;; it is currently caught in a local minimum and is doing its best to
+;; work its way out.
+
+;; The version of this program as described has a small problem. a
+;; move in a net direction can produce gross credit assignment. for
+;; example, if moving south will produce positive payoff, then, if in
+;; a single move, one moves east,west and south, then both east and
+;; west will be improved when they shouldn't
+
+;; Many thanks to Yuri Pryadkin (yuri@rana.usc.edu) for this
+;; concise problem description.
;;;_* Require
(eval-when-compile (require 'cl))
:type 'hook
:group 'lm)
-(defvar lm-mode-map nil
+(defvar lm-mode-map
+ (let ((map (make-sparse-keymap)))
+ ;; Key bindings for cursor motion.
+ (define-key map "y" 'lm-move-nw) ; y
+ (define-key map "u" 'lm-move-ne) ; u
+ (define-key map "b" 'lm-move-sw) ; b
+ (define-key map "n" 'lm-move-se) ; n
+ (define-key map "h" 'backward-char) ; h
+ (define-key map "l" 'forward-char) ; l
+ (define-key map "j" 'lm-move-down) ; j
+ (define-key map "k" 'lm-move-up) ; k
+
+ (define-key map [kp-7] 'lm-move-nw)
+ (define-key map [kp-9] 'lm-move-ne)
+ (define-key map [kp-1] 'lm-move-sw)
+ (define-key map [kp-3] 'lm-move-se)
+ (define-key map [kp-4] 'backward-char)
+ (define-key map [kp-6] 'forward-char)
+ (define-key map [kp-2] 'lm-move-down)
+ (define-key map [kp-8] 'lm-move-up)
+
+ (define-key map "\C-n" 'lm-move-down) ; C-n
+ (define-key map "\C-p" 'lm-move-up) ; C-p
+
+ ;; Key bindings for entering Human moves.
+ (define-key map "X" 'lm-human-plays) ; X
+ (define-key map "x" 'lm-human-plays) ; x
+
+ (define-key map " " 'lm-start-robot) ; SPC
+ (define-key map [down-mouse-1] 'lm-start-robot)
+ (define-key map [drag-mouse-1] 'lm-click)
+ (define-key map [mouse-1] 'lm-click)
+ (define-key map [down-mouse-2] 'lm-click)
+ (define-key map [mouse-2] 'lm-mouse-play)
+ (define-key map [drag-mouse-2] 'lm-mouse-play)
+
+ (define-key map [remap previous-line] 'lm-move-up)
+ (define-key map [remap next-line] 'lm-move-down)
+ (define-key map [remap beginning-of-line] 'lm-beginning-of-line)
+ (define-key map [remap end-of-line] 'lm-end-of-line)
+ (define-key map [remap undo] 'lm-human-takes-back)
+ (define-key map [remap advertised-undo] 'lm-human-takes-back)
+ map)
"Local keymap to use in Lm mode.")
-(if lm-mode-map nil
- (setq lm-mode-map (make-sparse-keymap))
-
- ;; Key bindings for cursor motion.
- (define-key lm-mode-map "y" 'lm-move-nw) ; y
- (define-key lm-mode-map "u" 'lm-move-ne) ; u
- (define-key lm-mode-map "b" 'lm-move-sw) ; b
- (define-key lm-mode-map "n" 'lm-move-se) ; n
- (define-key lm-mode-map "h" 'backward-char) ; h
- (define-key lm-mode-map "l" 'forward-char) ; l
- (define-key lm-mode-map "j" 'lm-move-down) ; j
- (define-key lm-mode-map "k" 'lm-move-up) ; k
-
- (define-key lm-mode-map [kp-7] 'lm-move-nw)
- (define-key lm-mode-map [kp-9] 'lm-move-ne)
- (define-key lm-mode-map [kp-1] 'lm-move-sw)
- (define-key lm-mode-map [kp-3] 'lm-move-se)
- (define-key lm-mode-map [kp-4] 'backward-char)
- (define-key lm-mode-map [kp-6] 'forward-char)
- (define-key lm-mode-map [kp-2] 'lm-move-down)
- (define-key lm-mode-map [kp-8] 'lm-move-up)
-
- (define-key lm-mode-map "\C-n" 'lm-move-down) ; C-n
- (define-key lm-mode-map "\C-p" 'lm-move-up) ; C-p
-
- ;; Key bindings for entering Human moves.
- (define-key lm-mode-map "X" 'lm-human-plays) ; X
- (define-key lm-mode-map "x" 'lm-human-plays) ; x
-
- (define-key lm-mode-map " " 'lm-start-robot) ; SPC
- (define-key lm-mode-map [down-mouse-1] 'lm-start-robot)
- (define-key lm-mode-map [drag-mouse-1] 'lm-click)
- (define-key lm-mode-map [mouse-1] 'lm-click)
- (define-key lm-mode-map [down-mouse-2] 'lm-click)
- (define-key lm-mode-map [mouse-2] 'lm-mouse-play)
- (define-key lm-mode-map [drag-mouse-2] 'lm-mouse-play)
-
- (define-key lm-mode-map [remap previous-line] 'lm-move-up)
- (define-key lm-mode-map [remap next-line] 'lm-move-down)
- (define-key lm-mode-map [remap beginning-of-line] 'lm-beginning-of-line)
- (define-key lm-mode-map [remap end-of-line] 'lm-end-of-line)
- (define-key lm-mode-map [remap undo] 'lm-human-takes-back)
- (define-key lm-mode-map [remap advertised-undo] 'lm-human-takes-back))
+
(defvar lm-emacs-won ()
"*For making font-lock use the winner's face for the line.")
;; its contents as a set, i.e. not considering the order of its elements. The
;; highest score is given to the "OOOO" qtuples because playing in such a
;; qtuple is winning the game. Just after this comes the "XXXX" qtuple because
-;; not playing in it is just loosing the game, and so on. Note that a
+;; not playing in it is just losing the game, and so on. Note that a
;; "polluted" qtuple, i.e. one containing at least one X and at least one O,
;; has score zero because there is no more any point in playing in it, from
;; both an attacking and a defending point of view.
;; these values will change (hopefully improve) the strength of the program
;; and may change its style (rather aggressive here).
-(defconst nil-score 7 "Score of an empty qtuple.")
-(defconst Xscore 15 "Score of a qtuple containing one X.")
-(defconst XXscore 400 "Score of a qtuple containing two X's.")
-(defconst XXXscore 1800 "Score of a qtuple containing three X's.")
-(defconst XXXXscore 100000 "Score of a qtuple containing four X's.")
-(defconst Oscore 35 "Score of a qtuple containing one O.")
-(defconst OOscore 800 "Score of a qtuple containing two O's.")
-(defconst OOOscore 15000 "Score of a qtuple containing three O's.")
-(defconst OOOOscore 800000 "Score of a qtuple containing four O's.")
-
-;; These values are not just random: if, given the following situation:
-;;
-;; . . . . . . . O .
-;; . X X a . . . X .
-;; . . . X . . . X .
-;; . . . X . . . X .
-;; . . . . . . . b .
-;;
-;; you want Emacs to play in "a" and not in "b", then the parameters must
-;; satisfy the inequality:
-;;
-;; 6 * XXscore > XXXscore + XXscore
-;;
-;; because "a" mainly belongs to six "XX" qtuples (the others are less
-;; important) while "b" belongs to one "XXX" and one "XX" qtuples. Other
-;; conditions are required to obtain sensible moves, but the previous example
-;; should illustrate the point. If you manage to improve on these values,
-;; please send me a note. Thanks.
-
-
-;; As we chose values 0, 1 and 6 to denote empty, X and O squares, the
-;; contents of a qtuple are uniquely determined by the sum of its elements and
-;; we just have to set up a translation table.
+(defconst lm-nil-score 7 "Score of an empty qtuple.")
(defconst lm-score-trans-table
- (vector nil-score Xscore XXscore XXXscore XXXXscore 0
- Oscore 0 0 0 0 0
- OOscore 0 0 0 0 0
- OOOscore 0 0 0 0 0
- OOOOscore 0 0 0 0 0
- 0)
+ (let ((Xscore 15) ; Score of a qtuple containing one X.
+ (XXscore 400) ; Score of a qtuple containing two X's.
+ (XXXscore 1800) ; Score of a qtuple containing three X's.
+ (XXXXscore 100000) ; Score of a qtuple containing four X's.
+ (Oscore 35) ; Score of a qtuple containing one O.
+ (OOscore 800) ; Score of a qtuple containing two O's.
+ (OOOscore 15000) ; Score of a qtuple containing three O's.
+ (OOOOscore 800000)) ; Score of a qtuple containing four O's.
+
+ ;; These values are not just random: if, given the following situation:
+ ;;
+ ;; . . . . . . . O .
+ ;; . X X a . . . X .
+ ;; . . . X . . . X .
+ ;; . . . X . . . X .
+ ;; . . . . . . . b .
+ ;;
+ ;; you want Emacs to play in "a" and not in "b", then the parameters must
+ ;; satisfy the inequality:
+ ;;
+ ;; 6 * XXscore > XXXscore + XXscore
+ ;;
+ ;; because "a" mainly belongs to six "XX" qtuples (the others are less
+ ;; important) while "b" belongs to one "XXX" and one "XX" qtuples.
+ ;; Other conditions are required to obtain sensible moves, but the
+ ;; previous example should illustrate the point. If you manage to
+ ;; improve on these values, please send me a note. Thanks.
+
+
+ ;; As we chose values 0, 1 and 6 to denote empty, X and O squares,
+ ;; the contents of a qtuple are uniquely determined by the sum of
+ ;; its elements and we just have to set up a translation table.
+ (vector lm-nil-score Xscore XXscore XXXscore XXXXscore 0
+ Oscore 0 0 0 0 0
+ OOscore 0 0 0 0 0
+ OOOscore 0 0 0 0 0
+ OOOOscore 0 0 0 0 0
+ 0))
"Vector associating qtuple contents to their score.")
;; qtuple, thus to be a winning move. Similarly, the only way for a square to
;; have a score between XXXXscore and OOOOscore is to belong to a "XXXX"
;; qtuple. We may use these considerations to detect when a given move is
-;; winning or loosing.
+;; winning or losing.
-(defconst lm-winning-threshold OOOOscore
+(defconst lm-winning-threshold
+ (aref lm-score-trans-table (+ 6 6 6 6)) ;; OOOOscore
"Threshold score beyond which an Emacs move is winning.")
-(defconst lm-loosing-threshold XXXXscore
+(defconst lm-losing-threshold
+ (aref lm-score-trans-table (+ 1 1 1 1)) ;; XXXXscore
"Threshold score beyond which a human move is winning.")
(setq lm-score-table (copy-sequence lm-saved-score-table))
;; No, compute it:
(setq lm-score-table
- (make-vector lm-vector-length (* 20 nil-score)))
+ (make-vector lm-vector-length (* 20 lm-nil-score)))
(let (i j maxi maxj maxi2 maxj2)
(setq maxi (/ (1+ lm-board-width) 2)
maxj (/ (1+ lm-board-height) 2)
(t
(setq score (aref lm-score-table square))
(lm-play-move square 1)
- (cond ((and (>= score lm-loosing-threshold)
+ (cond ((and (>= score lm-losing-threshold)
;; Just testing SCORE > THRESHOLD is not enough for
;; detecting wins, it just gives an indication that
;; we confirm with LM-FIND-FILLED-QTUPLE.
"Display a message asking for Human's move."
(message (if (zerop lm-number-of-human-moves)
"Your move? (move to a free square and hit X, RET ...)"
- "Your move?"))
- ;; This may seem silly, but if one omits the following line (or a similar
- ;; one), the cursor may very well go to some place where POINT is not.
- ;; FIXME: this can't be right!! --Stef
- (save-excursion (set-buffer (other-buffer))))
+ "Your move?")))
(defun lm-prompt-for-other-game ()
"Ask for another game, and start it."
(defun lm-weights-debug ()
(if lm-debug
- (progn (lm-print-wts) (lm-blackbox) (lm-print-y,s,noise)
+ (progn (lm-print-wts) (lm-blackbox) (lm-print-y-s-noise)
(lm-print-smell))))
;;;_ - Printing various things
(insert (format "%S\n" moves))))
-(defun lm-print-y,s,noise-int (direction)
+(defun lm-print-y-s-noise-int (direction)
(insert (format "%S:lm-y %S, s %S, noise %S \n"
(symbol-name direction)
(get direction 'y_t)
(get direction 'noise)
)))
-(defun lm-print-y,s,noise ()
+(defun lm-print-y-s-noise ()
(interactive)
(with-current-buffer "*lm-y,s,noise*"
(insert "==============================\n")
- (mapc 'lm-print-y,s,noise-int lm-directions)))
+ (mapc 'lm-print-y-s-noise-int lm-directions)))
(defun lm-print-smell-int (direction)
(insert (format "%S: smell: %S \n"