awacke1 commited on
Commit
3dd6d81
1 Parent(s): e1b66b5

Create new file

Browse files
Files changed (1) hide show
  1. rhyme-with-ai/rhyme.py +64 -0
rhyme-with-ai/rhyme.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import functools
2
+ import random
3
+ from typing import List, Optional
4
+
5
+ import requests
6
+ from gazpacho import Soup, get
7
+
8
+ from rhyme_with_ai.utils import find_last_word
9
+
10
+
11
+ def query_rhyme_words(sentence: str, n_rhymes: int, language:str="english") -> List[str]:
12
+ """Returns a list of rhyme words for a sentence.
13
+ Parameters
14
+ ----------
15
+ sentence : Sentence that may end with punctuation
16
+ n_rhymes : Maximum number of rhymes to return
17
+ Returns
18
+ -------
19
+ List[str] -- List of words that rhyme with the final word
20
+ """
21
+ last_word = find_last_word(sentence)
22
+ if language == "english":
23
+ return query_datamuse_api(last_word, n_rhymes)
24
+ elif language == "dutch":
25
+ return mick_rijmwoordenboek(last_word, n_rhymes)
26
+ else:
27
+ raise NotImplementedError(f"Unsupported language ({language}) expected 'english' or 'dutch'.")
28
+
29
+
30
+ def query_datamuse_api(word: str, n_rhymes: Optional[int] = None) -> List[str]:
31
+ """Query the DataMuse API.
32
+ Parameters
33
+ ----------
34
+ word : Word to rhyme with
35
+ n_rhymes : Max rhymes to return
36
+ Returns
37
+ -------
38
+ Rhyme words
39
+ """
40
+ out = requests.get(
41
+ "https://api.datamuse.com/words", params={"rel_rhy": word}
42
+ ).json()
43
+ words = [_["word"] for _ in out]
44
+ if n_rhymes is None:
45
+ return words
46
+ return words[:n_rhymes]
47
+
48
+
49
+ @functools.lru_cache(maxsize=128, typed=False)
50
+ def mick_rijmwoordenboek(word: str, n_words: int):
51
+ url = f"https://rijmwoordenboek.nl/rijm/{word}"
52
+ html = get(url)
53
+ soup = Soup(html)
54
+
55
+ results = soup.find("div", {"id": "rhymeResultsWords"}).html.split("<br>")
56
+
57
+ # clean up
58
+ results = [r.replace("\n", "").replace(" ", "") for r in results]
59
+
60
+ # filter html and empty strings
61
+ results = [r for r in results if ("<" not in r) and (len(r) > 0)]
62
+
63
+ return random.sample(results, min(len(results), n_words))
64
+