## A324
## David Martin Alaminos

from cdi import ells2code, entropy


'''
Finding the average time units needed for the
transmission of one character using the Morse code.
'''

## 0. Data

# Frequencies of characters, including space '_'

F = {
    'a': 651738, 'b':  124248, 'c': 217339, 'd': 349835, 'e': 1041442,
    'f': 197881, 'g':  158610, 'h': 492888, 'i': 558094, 'j':    9033,
    'k':  50529, 'l':  331490, 'm': 202124, 'n': 564513, 'o':  596302,
    'p': 137645, 'q':    8606, 'r': 497563, 's': 515760, 't':  729357,
    'u': 225134, 'v':   82903, 'w': 171272, 'x':  13692, 'y':  145984,
    'z':   7836, '_': 1918182
}


# Dictionary M whose entries have the form x:(a,b), where
# x is a character, and a, b are the number of dots and dashes
# in the Morse encoding of x. Note the entry '_':(1,0)

M = {
    'a': (1,1), 'b': (3,1), 'c': (2,2), 'd': (2,1), 'e': (1,0),
    'f': (3,1), 'g': (1,2), 'h': (4,0), 'i': (2,0), 'j': (1,3),
    'k': (1,2), 'l': (3,1), 'm': (0,2), 'n': (1,1), 'o': (0,3),
    'p': (2,2), 'q': (1,3), 'r': (2,1), 's': (3,0), 't': (0,1),
    'u': (2,1), 'v': (3,1), 'w': (1,2), 'x': (2,2), 'y': (1,3),
    'z': (2,2), '_': (1,0)
}

# 1. The length of a dot is one unit
# 2. A dash is three units
# 3. The space between elements of the same letter is one unit
# 4. The space between letters is three units
# 5. The space between words is seven units

len_dot = 1
len_dash = 3
len_btwn_letter_elements = 1
len_btwn_letters = 3



##  1.
''' Define a function time_units(x) that gives the
    the number of time units needed for the transmission of x.
    Add comments that document your reasoning.
'''

"""
The number of time units needed to send a letter x is the sum of:
 - unit length of a dot times the number of dots encoding x
 - unit length of a dash times the number of dashes encoding x
 - unit length of inter-elements space times the number of such
   spaces, i.e., sum of dots + sum of dashes - 1 = sum(M[x])-1
 - unit length of an inter-letters space to represent the
   division with the next letter

The space '_' is a letter consisting of a dot (1); along with its
leading (3) and trailing (3) inter-letter separators, it adds up
to the requirement of 1+3+3=7 time units between words
"""

def time_units(x):
    return len_dot*M[x][0] + len_dash*M[x][1] + len_btwn_letter_elements*(sum(M[x])-1) + len_btwn_letters



##  2.
''' Produce an expression that gives the (weighted) average
    A of time units required to transmit a character.
'''

A = sum([time_units(c)*F[c] for c in F])/sum(F.values())
print("Average time units of Morse encoding:", A)



##  3.
''' Which characters have maximum (minimum) time-length?
'''

C = list(F.keys()) #characters
TU = [time_units(c) for c in C] #time unit lengths of characters

xmax = max(TU)
xmin = min(TU)
print(', '.join([str("\'"+C[i]+"\'") for i in range(len(TU)) if TU[i]==xmax]), "have maximum time-length:", xmax, "units")
print(', '.join([str("\'"+C[i]+"\'") for i in range(len(TU)) if TU[i]==xmin]), "have minimum time-length:", xmin, "units")



##  4.
''' Compare the average A with the average time-length
    of Huffman encoding assuming that a bit transmission
    amounts to a time unit.
'''

HC = ells2code(TU) #TU now represents bit lengths
C_sorted = [c for (c,_) in sorted(list(F.items()), key=lambda t: t[1], reverse=True)] #chars sorted by desc. frequency
HX = list(HC.values()) #binary codes already sorted by asc. length
E = list(zip(C_sorted,HX)) #shortest codes are assigned to most frequent chars, and so on

HA = sum([(len(x)*F[c]) for (c,x) in E])/sum(F.values()) #Huffman encoding avg.
entr = entropy(F.values())
print("Average length of Morse encoding (w/ Huffman):", HA)
print("Entropy of the frequency distribution:", entr)

"""
Huffman encoding slightly improves the average length of the
raw Morse encoding, but still far from the entropy boundary
because the inherent codification/time units rules of Morse
lead to longer bit lengths per character (TU list)
"""
