Spaces:

temp-late
/

rhyme-ai

Runtime error

Camille

find_rhyme_french

43b8437 over 2 years ago

3.51 kB

	import itertools
	import string
	import random


	def color_new_words(new: str, old: str, color: str = "#eefa66") -> str:
	"""Color new words in strings with a span."""

	def find_diff(new_, old_):
	return [ii for ii, (n, o) in enumerate(zip(new_, old_)) if n != o]

	new_words = new.split()
	old_words = old.split()
	forward = find_diff(new_words, old_words)
	backward = find_diff(new_words[::-1], old_words[::-1])

	if not forward or not backward:
	# No difference
	return new

	start, end = forward[0], len(new_words) - backward[0]
	return (
	" ".join(new_words[:start])
	+ " "
	+ f'<span style="background-color: {color}">'
	+ " ".join(new_words[start:end])
	+ "</span>"
	+ " "
	+ " ".join(new_words[end:])
	)


	def find_last_word(s):
	"""Find the last word in a string."""
	# Note: will break on \n, \r, etc.
	alpha_only_sentence = "".join([c for c in s if (c.isalpha() or (c == " "))]).strip()
	return alpha_only_sentence.split()[-1]


	def pairwise(iterable):
	"""s -> (s0,s1), (s1,s2), (s2, s3), ..."""
	# https://stackoverflow.com/questions/5434891/iterate-a-list-as-pair-current-next-in-python
	a, b = itertools.tee(iterable)
	next(b, None)
	return zip(a, b)


	def sanitize(s):
	"""Remove punctuation from a string."""
	return s.translate(str.maketrans("", "", string.punctuation))

	def extract(filename):
	"""Extrait du fichier arguement les deux premiers champs
	arg : nom du fichier au format tsv
	return : list de tuples (ortho, phon)
	"""
	words = []
	with open(filename, 'r') as f:
	f.readline() # première ligne
	for line in f:
	ortho, phon = line.split('\t')[0:2]
	words.append((ortho, phon))
	return words

	def mk_dico(lexique, n):
	"""
	Construit un dictionnaire de rimes de longueur n
	à partir d'un lexique phonétisé
	args : lexique [(ortho, phon)], n int
	return : dict {rime : [word1, word2, ..]}
	"""
	dico = {}
	for item in lexique:
	if len(item[1]) >= n:
	rime = item[1][-n:]
	dico.setdefault(rime, []).append(item[0])
	return dico

	def ortho2phon(word, words_list):
	"""
	Trouve un mot (word) dans une liste (words_list)
	et retourne la forme phonétique correspondante
	(en cas d'homographe non homophone, retourne le premier trouvé)
	args : word (str), words_list [(ortho, phon), (.., ..)]
	return : str, "" si word ne fait pas partie de la liste
	"""
	for item in words_list:
	if word == item[0]:
	return item[1]
	return ""

	def find_rhyme_french(word, dico, lexique, n=3):
	"""
	Pour un mot donné, retourne un mot au hasard dont les n
	derniers phonèmes riment
	args : word (str), dico (dict) le dictionnaire de rimes,
	lexique (list) lexique ortho, phon, n (int) le nombre de phonèmes terminaux
	"""
	# 1 trouver la transcription phonétique
	phon = ortho2phon(word, lexique)
	if not phon:
	return None
	# 2 extraire de la transcription les 3 derniers phonèmes (ou 2 le cas échéant)
	# 3 trouver dans le dictionnaire la liste des mots du lexique qui ont la même suite de phonèmes finaux
	if phon[-n:] not in dico:
	return None
	rhymes = dico[phon[-n:]]
	if word in rhymes:
	rhymes.remove(word)
	# 4. piocher un mot au hasard dans la liste
	rand = random.randint(0, len(rhymes) - 1)
	return rhymes[rand]