Camille
commited on
Commit
·
43b8437
1
Parent(s):
8eb8892
find_rhyme_french
Browse files- app.py +1 -1
- rhyme_with_ai/noms-lexique.org.txt +0 -0
- rhyme_with_ai/rhyme.py +7 -0
- rhyme_with_ai/utils.py +64 -1
app.py
CHANGED
@@ -25,7 +25,7 @@ elif LANGUAGE == "french":
|
|
25 |
MODEL_PATH = "camembert-base"
|
26 |
ITER_FACTOR = 5
|
27 |
else:
|
28 |
-
raise NotImplementedError(f"Unsupported language ({LANGUAGE}) expected 'english' or '
|
29 |
|
30 |
def main():
|
31 |
st.markdown(
|
|
|
25 |
MODEL_PATH = "camembert-base"
|
26 |
ITER_FACTOR = 5
|
27 |
else:
|
28 |
+
raise NotImplementedError(f"Unsupported language ({LANGUAGE}) expected 'english','dutch' or 'french.")
|
29 |
|
30 |
def main():
|
31 |
st.markdown(
|
rhyme_with_ai/noms-lexique.org.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
rhyme_with_ai/rhyme.py
CHANGED
@@ -6,6 +6,9 @@ import requests
|
|
6 |
from gazpacho import Soup, get
|
7 |
|
8 |
from rhyme_with_ai.utils import find_last_word
|
|
|
|
|
|
|
9 |
|
10 |
|
11 |
def query_rhyme_words(sentence: str, n_rhymes: int, language:str="english") -> List[str]:
|
@@ -23,6 +26,10 @@ def query_rhyme_words(sentence: str, n_rhymes: int, language:str="english") -> L
|
|
23 |
return query_datamuse_api(last_word, n_rhymes)
|
24 |
elif language == "dutch":
|
25 |
return mick_rijmwoordenboek(last_word, n_rhymes)
|
|
|
|
|
|
|
|
|
26 |
else:
|
27 |
raise NotImplementedError(f"Unsupported language ({language}) expected 'english' or 'dutch'.")
|
28 |
|
|
|
6 |
from gazpacho import Soup, get
|
7 |
|
8 |
from rhyme_with_ai.utils import find_last_word
|
9 |
+
from rhyme_with_ai.utils import find_rhyme_french
|
10 |
+
from rhyme_with_ai.utils import extract
|
11 |
+
from rhyme_with_ai.utils import mk_dico
|
12 |
|
13 |
|
14 |
def query_rhyme_words(sentence: str, n_rhymes: int, language:str="english") -> List[str]:
|
|
|
26 |
return query_datamuse_api(last_word, n_rhymes)
|
27 |
elif language == "dutch":
|
28 |
return mick_rijmwoordenboek(last_word, n_rhymes)
|
29 |
+
elif language == "french":
|
30 |
+
lexique = extract('noms-lexique.org.txt')
|
31 |
+
dico_3 = mk_dico(lexique, 3)
|
32 |
+
return find_rhyme_french(last_word, dico_3, lexique, n_rhymes)
|
33 |
else:
|
34 |
raise NotImplementedError(f"Unsupported language ({language}) expected 'english' or 'dutch'.")
|
35 |
|
rhyme_with_ai/utils.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import itertools
|
2 |
import string
|
|
|
3 |
|
4 |
|
5 |
def color_new_words(new: str, old: str, color: str = "#eefa66") -> str:
|
@@ -46,4 +47,66 @@ def pairwise(iterable):
|
|
46 |
|
47 |
def sanitize(s):
|
48 |
"""Remove punctuation from a string."""
|
49 |
-
return s.translate(str.maketrans("", "", string.punctuation))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import itertools
|
2 |
import string
|
3 |
+
import random
|
4 |
|
5 |
|
6 |
def color_new_words(new: str, old: str, color: str = "#eefa66") -> str:
|
|
|
47 |
|
48 |
def sanitize(s):
|
49 |
"""Remove punctuation from a string."""
|
50 |
+
return s.translate(str.maketrans("", "", string.punctuation))
|
51 |
+
|
52 |
+
def extract(filename):
|
53 |
+
"""Extrait du fichier arguement les deux premiers champs
|
54 |
+
arg : nom du fichier au format tsv
|
55 |
+
return : list de tuples (ortho, phon)
|
56 |
+
"""
|
57 |
+
words = []
|
58 |
+
with open(filename, 'r') as f:
|
59 |
+
f.readline() # première ligne
|
60 |
+
for line in f:
|
61 |
+
ortho, phon = line.split('\t')[0:2]
|
62 |
+
words.append((ortho, phon))
|
63 |
+
return words
|
64 |
+
|
65 |
+
def mk_dico(lexique, n):
|
66 |
+
"""
|
67 |
+
Construit un dictionnaire de rimes de longueur n
|
68 |
+
à partir d'un lexique phonétisé
|
69 |
+
args : lexique [(ortho, phon)], n int
|
70 |
+
return : dict {rime : [word1, word2, ..]}
|
71 |
+
"""
|
72 |
+
dico = {}
|
73 |
+
for item in lexique:
|
74 |
+
if len(item[1]) >= n:
|
75 |
+
rime = item[1][-n:]
|
76 |
+
dico.setdefault(rime, []).append(item[0])
|
77 |
+
return dico
|
78 |
+
|
79 |
+
def ortho2phon(word, words_list):
|
80 |
+
"""
|
81 |
+
Trouve un mot (word) dans une liste (words_list)
|
82 |
+
et retourne la forme phonétique correspondante
|
83 |
+
(en cas d'homographe non homophone, retourne le premier trouvé)
|
84 |
+
args : word (str), words_list [(ortho, phon), (.., ..)]
|
85 |
+
return : str, "" si word ne fait pas partie de la liste
|
86 |
+
"""
|
87 |
+
for item in words_list:
|
88 |
+
if word == item[0]:
|
89 |
+
return item[1]
|
90 |
+
return ""
|
91 |
+
|
92 |
+
def find_rhyme_french(word, dico, lexique, n=3):
|
93 |
+
"""
|
94 |
+
Pour un mot donné, retourne un mot au hasard dont les n
|
95 |
+
derniers phonèmes riment
|
96 |
+
args : word (str), dico (dict) le dictionnaire de rimes,
|
97 |
+
lexique (list) lexique ortho, phon, n (int) le nombre de phonèmes terminaux
|
98 |
+
"""
|
99 |
+
# 1 trouver la transcription phonétique
|
100 |
+
phon = ortho2phon(word, lexique)
|
101 |
+
if not phon:
|
102 |
+
return None
|
103 |
+
# 2 extraire de la transcription les 3 derniers phonèmes (ou 2 le cas échéant)
|
104 |
+
# 3 trouver dans le dictionnaire la liste des mots du lexique qui ont la même suite de phonèmes finaux
|
105 |
+
if phon[-n:] not in dico:
|
106 |
+
return None
|
107 |
+
rhymes = dico[phon[-n:]]
|
108 |
+
if word in rhymes:
|
109 |
+
rhymes.remove(word)
|
110 |
+
# 4. piocher un mot au hasard dans la liste
|
111 |
+
rand = random.randint(0, len(rhymes) - 1)
|
112 |
+
return rhymes[rand]
|