# Quick script to generate test data using 
# Reverend. 
#
# Usage: generate-tests.py > testdata.txt
# License: WTFPL
# 
# USE THIS SOFTWARE AT YOUR OWN RISK

from reverend import Bayes
import sys

import random

letters = list("abcdefghijklmnopqrstuvwxyz")
words = []
for i in range(100):
    words.append( "".join(random.sample(letters, random.randint(1, 5))) )

guesser = Bayes()
words.extend(words[0] * 20)
words.extend(words[1] * 15)
words.extend(words[2] * 10)
words.extend(words[3] * 10)
words.extend(words[4] * 5)
words.extend(words[5] * 5)
words.extend(words[6] * 5)
words.extend(words[7] * 5)
words.extend(words[8] * 3)
words.extend(words[9] * 3)
words.extend(words[10] * 3)
for i in range(3):
    
    text = " ".join(random.sample(words, 100))
    pool = "pool%s" % i
    print 'var %s = "%s";' % (pool, text)
    guesser.train(pool, text)
    print 'guesser.train("%s", %s);' % (pool, pool)

tests = []
i = 0
while i < 30:
    test = " ".join(random.sample(words, 20))
    probs = guesser.guess(test)
    
    # Sometimes the probabilities will be the same but the
    # order will be different -- different sorting algorithms
    # or something. Easiest to just not print those tests.
    if probs[0][1] == probs[1][1]\
    or probs[1][1] == probs[2][1]\
    or probs[0][1] == probs[2][1]:
        continue
    probs = [list(tup) for tup in probs]
    print 'var test%s = "%s";' % (i, test)
    print 'var probs%s = %s;' % (i, probs)
    print 'assertEquals(probs%s, guesser.guess(test%s));' % (i, i)
    i += 1