;; -*- Mode: Irken -*-

(include "lib/basis.scm")
(include "lib/map.scm")
(include "lib/codecs/base64.scm")
(include "lib/codecs/hex.scm")

(define (get-ct)
  (pipe list->string
        generator->list
        b64-dec
        file-char-generator
        (file/open-read "01_06.txt")
        ))

(define (str-ref-int s i)
  (char->int (string-ref s i)))

(define (hamming-distance a b)
  (let ((alen (string-length a))
        (blen (string-length b))
        (dist 0))
    (assert (= alen blen))
    (for-range i alen
      (inc! dist (popcount
                  (logxor (str-ref-int a i)
                          (str-ref-int b i)))))
    dist))

(define (string->chunks s size)
  (let ((slen (string-length s))
        (n (/ slen size))
        (chunks '()))
    (for-range i n
      (PUSH chunks (substring s (* i size) (* (+ 1 i) size))))
    (reverse chunks)))

;; generate combinations in the more 'interesting' order (i.e., reverse lexicographic).
(define (rcombinations xs k)
  (makegen emit
    (if (= k 0)
        (emit (list:nil))
        (match xs with
          () -> #u
          (hd . tl)
          -> (begin
               (for sub (rcombinations tl k)
                 (emit sub))
               (for sub (rcombinations tl (- k 1))
                 (emit (list:cons hd sub))))
          ))))

(define (guess-keysize ct)
  (let ((min-dist 100000)
        (best-guess 0))
    (for-range* keysize 2 40
      (let ((chunks (string->chunks ct keysize))
            (combs (gen-take 20 (rcombinations chunks 2)))
            (dist 0))
        (for-list comb combs
          (inc! dist (hamming-distance (first comb) (second comb))))
        (let ((ndist (/ (* 100 dist) (* 20 keysize))))
          (printf "keysize: " (lpad 2 (int keysize))
                  " distance: " (lpad 4 (int dist))
                  " normalized distance: " (int ndist)
                  "\n")
          (when (< ndist min-dist)
            (set! min-dist ndist)
            (set! best-guess keysize)))))
    best-guess))

(define (transpose-blocks ct keysize)
  (let ((ctlen (string-length ct))
        (blen (/ ctlen keysize))
        (blocks '()))
    (for-range i keysize
      (let ((block '()))
        (for-range j blen
          (PUSH block (string-ref ct (+ i (* j keysize)))))
        (PUSH blocks (list->string (reverse block)))))
    (reverse blocks)))

;; --------------------------------------------------------------------------------
;; from 01_04.scm

(define (xor-char ch n)
  (int->char (logxor n (char->int ch))))

(define (xor-onebyte s key)
  (let ((slen (string-length s))
        (r (copy-string s slen)))
    (for-range i slen
      (string-set! r i (xor-char (string-ref s i) key)))
    r))

;; https://en.wikipedia.org/wiki/Letter_frequency#Relative_frequencies_of_letters_in_the_English_language
;; the percentages are encoded as integers. A-Z.  e.g. A == 8.167%, E = 12.702%.
(define letter-scores
  #(8167 1492 2782 4253 12702 2228 2015 6094 6966 0153 0772 4025 2406
    6749 7507 1929 0095 5987 6327 9056 2758 0978 2360 0150 1974 0074))

;; make an int[256] vector of scores, filling in A-Za-z.
(define char-scores
  (let ((v (make-vector 256 -1000))) ;; note default.
    (for-range i 256
      (cond ((and (>= i 65) (< i (+ 65 26))) ;; uppercase letter
             (set! v[i] letter-scores[(- i 65)]))
            ((and (>= i 97) (< i (+ 97 26)))
             (set! v[i] letter-scores[(- i 97)]))
            )
      ;; bias for printable characters
      (when (printable? (int->char i))
        (inc! v[i] 1000)))
    ;; these values I just made up.
    (inc! v[32] 2000) ;; space
    (inc! v[46] 1000) ;; period
    v))

;; score a string
(define (get-score s)
  (let ((r 0))
    (for-string ch s
      (inc! r char-scores[(char->int ch)]))
    r))

(define (get-scores ct)
  (let ((scores (tree/empty))
        (results '()))
    (for-range i 256
      (let ((pt (xor-onebyte ct i))
            (score (get-score pt)))
        ;;(printf "i: " (int i) " score: " (int score) " pt: " (string (substring pt 0 80)) "\n")
        (tree/insert! scores int-cmp score (:tuple i pt))))
    scores))

(define (get-top5 m)
  (gen-take 5 (tree/make-reverse-generator m)))

(define (print-top top)
  (for-list item top
    (match item with
      (:tuple score (:tuple key pt))
      -> (printf (lpad 2 (hex key)) " " (lpad 8 (int score)) " " (string pt) "\n")
      )))
;; --------------------------------------------------------------------------------

(define (apply-key ct key)
  (let ((ctlen (string-length ct))
        (klen (string-length key))
        (result (make-string ctlen)))
    (for-range i ctlen
      (string-set! result i
                   (xor-char
                    (string-ref ct i)
                    (str-ref-int key (mod i klen)))))
    result))

(let ((ct (get-ct))
      (keysize (guess-keysize ct))
      ;;(keysize 29)
      (blocks (transpose-blocks ct keysize))
      (keychars '()))
  (printf "best-guess keysize: " (int keysize) "\n")
  (for-list block blocks
    (printf "block: " (string->hex block) "\n")
    (let ((scores (get-scores block)))
      (print-top (get-top5 scores))
      (match (tree/max scores) with
        (:tuple score (:tuple key pt))
        -> (PUSH keychars (int->char key)))
      ))
  (printf " -----------\n")
  (let ((key (list->string (reverse keychars))))
    (printf "decode? " (string (apply-key ct key)) "\n")
    (printf "key? " (string key) "\n")
    ))

;;(printf "ct: " (string->hex (get-ct)) "\n")
;;(printf "popcount 73 = " (int (popcount 73)) "\n")
;;(printf "dist " (int (hamming-distance "this is a test" "wokka wokka!!!")) "\n")