;; -*- Mode: Irken -*- (include "lib/basis.scm") (include "lib/map.scm") (include "lib/codecs/base64.scm") (include "lib/codecs/hex.scm") (define (get-ct) (pipe list->string generator->list b64-dec file-char-generator (file/open-read "01_06.txt") )) (define (str-ref-int s i) (char->int (string-ref s i))) (define (hamming-distance a b) (let ((alen (string-length a)) (blen (string-length b)) (dist 0)) (assert (= alen blen)) (for-range i alen (inc! dist (popcount (logxor (str-ref-int a i) (str-ref-int b i))))) dist)) (define (string->chunks s size) (let ((slen (string-length s)) (n (/ slen size)) (chunks '())) (for-range i n (PUSH chunks (substring s (* i size) (* (+ 1 i) size)))) (reverse chunks))) ;; generate combinations in the more 'interesting' order (i.e., reverse lexicographic). (define (rcombinations xs k) (makegen emit (if (= k 0) (emit (list:nil)) (match xs with () -> #u (hd . tl) -> (begin (for sub (rcombinations tl k) (emit sub)) (for sub (rcombinations tl (- k 1)) (emit (list:cons hd sub)))) )))) (define (guess-keysize ct) (let ((min-dist 100000) (best-guess 0)) (for-range* keysize 2 40 (let ((chunks (string->chunks ct keysize)) (combs (gen-take 20 (rcombinations chunks 2))) (dist 0)) (for-list comb combs (inc! dist (hamming-distance (first comb) (second comb)))) (let ((ndist (/ (* 100 dist) (* 20 keysize)))) (printf "keysize: " (lpad 2 (int keysize)) " distance: " (lpad 4 (int dist)) " normalized distance: " (int ndist) "\n") (when (< ndist min-dist) (set! min-dist ndist) (set! best-guess keysize))))) best-guess)) (define (transpose-blocks ct keysize) (let ((ctlen (string-length ct)) (blen (/ ctlen keysize)) (blocks '())) (for-range i keysize (let ((block '())) (for-range j blen (PUSH block (string-ref ct (+ i (* j keysize))))) (PUSH blocks (list->string (reverse block))))) (reverse blocks))) ;; -------------------------------------------------------------------------------- ;; from 01_04.scm (define (xor-char ch n) (int->char (logxor n (char->int ch)))) (define (xor-onebyte s key) (let ((slen (string-length s)) (r (copy-string s slen))) (for-range i slen (string-set! r i (xor-char (string-ref s i) key))) r)) ;; https://en.wikipedia.org/wiki/Letter_frequency#Relative_frequencies_of_letters_in_the_English_language ;; the percentages are encoded as integers. A-Z. e.g. A == 8.167%, E = 12.702%. (define letter-scores #(8167 1492 2782 4253 12702 2228 2015 6094 6966 0153 0772 4025 2406 6749 7507 1929 0095 5987 6327 9056 2758 0978 2360 0150 1974 0074)) ;; make an int[256] vector of scores, filling in A-Za-z. (define char-scores (let ((v (make-vector 256 -1000))) ;; note default. (for-range i 256 (cond ((and (>= i 65) (< i (+ 65 26))) ;; uppercase letter (set! v[i] letter-scores[(- i 65)])) ((and (>= i 97) (< i (+ 97 26))) (set! v[i] letter-scores[(- i 97)])) ) ;; bias for printable characters (when (printable? (int->char i)) (inc! v[i] 1000))) ;; these values I just made up. (inc! v[32] 2000) ;; space (inc! v[46] 1000) ;; period v)) ;; score a string (define (get-score s) (let ((r 0)) (for-string ch s (inc! r char-scores[(char->int ch)])) r)) (define (get-scores ct) (let ((scores (tree/empty)) (results '())) (for-range i 256 (let ((pt (xor-onebyte ct i)) (score (get-score pt))) ;;(printf "i: " (int i) " score: " (int score) " pt: " (string (substring pt 0 80)) "\n") (tree/insert! scores int-cmp score (:tuple i pt)))) scores)) (define (get-top5 m) (gen-take 5 (tree/make-reverse-generator m))) (define (print-top top) (for-list item top (match item with (:tuple score (:tuple key pt)) -> (printf (lpad 2 (hex key)) " " (lpad 8 (int score)) " " (string pt) "\n") ))) ;; -------------------------------------------------------------------------------- (define (apply-key ct key) (let ((ctlen (string-length ct)) (klen (string-length key)) (result (make-string ctlen))) (for-range i ctlen (string-set! result i (xor-char (string-ref ct i) (str-ref-int key (mod i klen))))) result)) (let ((ct (get-ct)) (keysize (guess-keysize ct)) ;;(keysize 29) (blocks (transpose-blocks ct keysize)) (keychars '())) (printf "best-guess keysize: " (int keysize) "\n") (for-list block blocks (printf "block: " (string->hex block) "\n") (let ((scores (get-scores block))) (print-top (get-top5 scores)) (match (tree/max scores) with (:tuple score (:tuple key pt)) -> (PUSH keychars (int->char key))) )) (printf " -----------\n") (let ((key (list->string (reverse keychars)))) (printf "decode? " (string (apply-key ct key)) "\n") (printf "key? " (string key) "\n") )) ;;(printf "ct: " (string->hex (get-ct)) "\n") ;;(printf "popcount 73 = " (int (popcount 73)) "\n") ;;(printf "dist " (int (hamming-distance "this is a test" "wokka wokka!!!")) "\n")