## A303
## David Martín Alaminos

from cdi import rd_float, rd_int

'''
Consider the following binary encoding of the alphabet A = {'a', 'b', 'c', 'd', ..., 'z'}:
'''
R = [('a','0000'), ('b','001000'), ('c','00101'), ('d','10000'), ('e','1100'),
     ('f','111000'), ('g','001001'), ('h','10001'), ('i','1001'), ('j','1101000000'),
     ('k','1010000'), ('l','11101'), ('m','110101'), ('n','0001'), ('o','1011'),
     ('p','111001'), ('q','110100001'), ('r','11011'), ('s','0011'), ('t','1111'),
     ('u','10101'), ('v','11010001'), ('w','1101001'), ('x','1010001'), ('y','101001'),
     ('z','1101000001'), (' ','01')]

Freq = [('a',575), ('b',128), ('c',263), ('d',285), ('e',913),
        ('f',173), ('g',133), ('h',313), ('i',599), ('j',6),
        ('k',84), ('l',335), ('m',235), ('n',596), ('o',689),
        ('p',192), ('q',8), ('r',508), ('s',567), ('t',706),
        ('u',334), ('v',69), ('w',119), ('x',73), ('y',164),
        ('z',7), (' ',1928)]

#encoding dictionary
x2c = dict(R)

#decoding dictionary
c2x = dict([(c,x) for x, c in R])

#char frequencies dictionary
x2freq = dict(Freq)

#Generic class for exception handling. Useful for
#generating the error traceback and additional info.
class CDIException(Exception):
    pass



'''
1.
Define a function E(X) that yields, given a message X in the alphabet A,
the encoded message. For example, E("aae") should yield '000000001100'.
'''

def E(X):
    return ''.join([x2c[x] for x in X]) #KeyError exception raised if x is not in x2c

print("a =", E("a"), "b =", E("b"), "c =", E("c"), "d =", E("d"), "e =", E("e"))
print("aae =", E("aae"))
print()



'''
2.
Generate a random message X of length 50 (say) in the alphabet A
with expected frequences Freq for the characters
'a', 'b', 'c', 'd', ..., 'z' and find the coded message C.
'''

def select(X, W): #from L219
    r = rd_float(0, sum(W))
    acum = 0
    for i in range(len(X)):
        acum += W[i]
        if (r <= acum): return X[i]

def random_message(A, F, length=50):
    return ''.join(select(A,F) for i in range(length))

A = list(x2c.keys()) #alphabet list
F = [x2freq[x] for x in A] #frequency list
X = random_message(A,F)
C = E(X)

print("X:", X)
print("C:", C)



'''
3.
In an ASCII-like encoding the message X requires at least 5 bits per character.
Find how many bits per character are needed for the encoding C.
What is the compression ratio?
'''

bits_C = {x:len(c) for (x,c) in x2c.items()}
#bits_C[x] contains the bit size of character x encoded with x2c
#len(C) = sum([bits_C[x] for x in X]) is the bit length of C

cf = len(C)/(5*len(X))
print("Compression ratio: ", cf)
print()



'''
4.
Define a function D(C) that yields, given an encoded message C,
the original message X. Test it with the C obtained in the previous point.
'''

max_tk_length = max(bits_C.values())

def D(C):
    X = ""
    token = ""
    for i in range(len(C)):
        token += C[i]
        if token in c2x:
            X += c2x[token]
            token = ""
        elif len(token) > max_tk_length:
            raise CDIException("Could not decode the (corrupted) message") #not necessary unless encoding could be wrong?

    if token != "":
        raise CDIException("Could not decode the (corrupted) message")
    return X


print("Checking some random-length messages... ", end="")

m_cf = 0
N_checks = 500
for _ in range(N_checks):
    X = random_message(A, F, rd_int(1,200))
    C = E(X)
    m_cf += len(C)/(5*len(X))
    assert(X == D(C))

print("done!")
print("Mean compression ratio:", round(m_cf/N_checks,4))

