|
import itertools |
|
import string |
|
import random |
|
|
|
|
|
def color_new_words(new: str, old: str, color: str = "#eefa66") -> str: |
|
"""Color new words in strings with a span.""" |
|
|
|
def find_diff(new_, old_): |
|
return [ii for ii, (n, o) in enumerate(zip(new_, old_)) if n != o] |
|
|
|
new_words = new.split() |
|
old_words = old.split() |
|
forward = find_diff(new_words, old_words) |
|
backward = find_diff(new_words[::-1], old_words[::-1]) |
|
|
|
if not forward or not backward: |
|
|
|
return new |
|
|
|
start, end = forward[0], len(new_words) - backward[0] |
|
return ( |
|
" ".join(new_words[:start]) |
|
+ " " |
|
+ f'<span style="background-color: {color}">' |
|
+ " ".join(new_words[start:end]) |
|
+ "</span>" |
|
+ " " |
|
+ " ".join(new_words[end:]) |
|
) |
|
|
|
|
|
def find_last_word(s): |
|
"""Find the last word in a string.""" |
|
|
|
alpha_only_sentence = "".join([c for c in s if (c.isalpha() or (c == " "))]).strip() |
|
return alpha_only_sentence.split()[-1] |
|
|
|
|
|
def pairwise(iterable): |
|
"""s -> (s0,s1), (s1,s2), (s2, s3), ...""" |
|
|
|
a, b = itertools.tee(iterable) |
|
next(b, None) |
|
return zip(a, b) |
|
|
|
|
|
def sanitize(s): |
|
"""Remove punctuation from a string.""" |
|
return s.translate(str.maketrans("", "", string.punctuation)) |
|
|
|
def extract(filename): |
|
"""Extrait du fichier arguement les deux premiers champs |
|
arg : nom du fichier au format tsv |
|
return : list de tuples (ortho, phon) |
|
""" |
|
words = [] |
|
with open(filename, 'r') as f: |
|
f.readline() |
|
for line in f: |
|
ortho, phon = line.split('\t')[0:2] |
|
words.append((ortho, phon)) |
|
return words |
|
|
|
def mk_dico(lexique, n): |
|
""" |
|
Construit un dictionnaire de rimes de longueur n |
|
à partir d'un lexique phonétisé |
|
args : lexique [(ortho, phon)], n int |
|
return : dict {rime : [word1, word2, ..]} |
|
""" |
|
dico = {} |
|
for item in lexique: |
|
if len(item[1]) >= n: |
|
rime = item[1][-n:] |
|
dico.setdefault(rime, []).append(item[0]) |
|
return dico |
|
|
|
def ortho2phon(word, words_list): |
|
""" |
|
Trouve un mot (word) dans une liste (words_list) |
|
et retourne la forme phonétique correspondante |
|
(en cas d'homographe non homophone, retourne le premier trouvé) |
|
args : word (str), words_list [(ortho, phon), (.., ..)] |
|
return : str, "" si word ne fait pas partie de la liste |
|
""" |
|
for item in words_list: |
|
if word == item[0]: |
|
return item[1] |
|
return "" |
|
|
|
def find_rhyme_french(word, dico, lexique, n=3): |
|
""" |
|
Pour un mot donné, retourne un mot au hasard dont les n |
|
derniers phonèmes riment |
|
args : word (str), dico (dict) le dictionnaire de rimes, |
|
lexique (list) lexique ortho, phon, n (int) le nombre de phonèmes terminaux |
|
""" |
|
|
|
phon = ortho2phon(word, lexique) |
|
if not phon: |
|
return None |
|
|
|
|
|
if phon[-n:] not in dico: |
|
return None |
|
rhymes = dico[phon[-n:]] |
|
if word in rhymes: |
|
rhymes.remove(word) |
|
|
|
rand = random.randint(0, len(rhymes) - 1) |
|
return rhymes[rand] |