;; -*- Mode: Irken -*-

(include "lib/basis.scm")
(include "lib/map.scm")
(include "lib/codecs/hex.scm")

(define (xor-char ch n)
  (int->char (logxor n (char->int ch))))

(define (xor-onebyte s key)
  (let ((slen (string-length s))
        (r (copy-string s slen)))
    (for-range i slen
      (string-set! r i (xor-char (string-ref s i) key)))
    r))

;; https://en.wikipedia.org/wiki/Letter_frequency#Relative_frequencies_of_letters_in_the_English_language
;; the percentages are encoded as integers. A-Z.  e.g. A == 8.167%, E = 12.702%.
(define letter-scores
  #(8167 1492 2782 4253 12702 2228 2015 6094 6966 0153 0772 4025 2406
    6749 7507 1929 0095 5987 6327 9056 2758 0978 2360 0150 1974 0074))

;; make an int[256] vector of scores, filling in A-Za-z.
(define char-scores
  (let ((v (make-vector 256 0)))
    (for-range i 256
      (cond ((and (>= i 65) (< i (+ 65 26))) ;; uppercase letter
             (set! v[i] letter-scores[(- i 65)]))
            ((and (>= i 97) (< i (+ 97 26)))
             (set! v[i] letter-scores[(- i 97)]))))
    v))

;; score a string
(define (score s)
  (let ((r 0))
    (for-string ch s
      (inc! r char-scores[(char->int ch)]))
    r))

;; given ciphertext
(define ct (hex->string "1b37373331363f78151b7f2b783431333d78397828372d363c78373e783a393b3736"))

(define (get-scores ct)
  (let ((scores (tree/empty))
        (results '()))
    (for-range i 256
      (let ((pt (xor-onebyte ct i)))
        (tree/insert! scores int-cmp (score pt) (:tuple i pt))))
    scores))

(define (get-top5 m)
  (slice
   (generator->list
    (tree/make-reverse-generator m))
   0 5))

(define (print-top top)
  (for-list item top
    (match item with
      (:tuple score (:tuple key pt))
      -> (printf (lpad 2 (hex key)) " " (lpad 8 (int score)) " " (string pt) "\n")
      )))

(let ((top5 (get-top5 (get-scores ct))))
  (printf "top 5 decodes:\n")
  (print-top top5))