## A317
## Joan Ginés i Ametllé

import operator
from cdi import *
from cdi_huffman import *
from math import log, ceil

'''
Construction of an optimal encoding and related questions
'''

# Characters and relative frequencies ('_' stands for ' ').

CF= [('a',575), ('b',128), ('c',263), ('d',285), ('e',913), ('f',173), 
    ('g',133), ('h',313), ('i',599), ('j',6),   ('k',84),  ('l',335), 
    ('m',235), ('n',596), ('o',689), ('p',192), ('q',8),   ('r',508), 
    ('s',567), ('t',706), ('u',334), ('v',69),  ('w',119), ('x',73), 
    ('y',164), ('z',7), ('_',1928)]
    
'''
1. Extract from CF the list F of relative frequencies and the list A of characters
'''
A,F = zip(*CF)
print('A: ' + str(A))
print('F: ' + str(F), end='\n\n')


'''
2. Compute the list L of binary "Shannon lengths" corresponding to F.
   Use the formula E.3.2 on T3.10, which in terms of frequencies
   becomes ceil(log2(S)-log2(f_j)), S = sum(f_j).
   The function ceil is defined in the math module.
'''
S = log(sum(F),2)
L = [ceil(S-log(f,2)) for f in F]
print('L: ' + str(L), end='\n\n')


'''
3. Consider the list H below. Check that H[j] <= L[j] for all j
   and yet that it satisfies kraft_sum(H) <= 1.
'''
H = [4,6,5,5,4,6,6,5,4,10,7,5,6,4,4,6,9,5,4,4,5,8,7,7,6,10,2]

p = all(map(lambda x,y: x <= y, H, L))
q = kraft(H)
print('H[j] <= L[j]: ' + str(p))
print('kraft_sum(H) <= 1: ' + str(q), end='\n\n')


'''
4. Use the output C = ells2code(H) (see cdi) to construct a binary
   encoding E of A such that len(E(A[j]) = H[j]. Be very careful
   in how to relate the ordering of C to the orderings of A and H.
'''
C = ells2code(H)

# get indexes of sorted list H
idx = [i for (i,j) in sorted(enumerate(H), key=operator.itemgetter(1))]

# replace each key in C with a character in A
E = {}
for i in range(len(A)): E[ A[idx[i]] ] = C[i]
print('E: ' + str(E), end='\n\n')


'''
5. Compute the average length l of the encoding E and compare
   it with the entropy e of the distribution F
'''
# average length
l = weighted_average([len(E[x]) for x in A], F)
print('l: ' + str(l))

# entropy of F
e = -sum(x*log(x,2) for x in normalize(F))
print('e: ' + str(e))

if l >= e:
    print('Shannon theorem holds')
else:
    print('Shannon theorem does not hold!')
