File size: 1,871 Bytes
5ddb621
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import functools
import random
from typing import List, Optional

import requests
from gazpacho import Soup, get

from rhyme_with_ai.utils import find_last_word


def query_rhyme_words(sentence: str, n_rhymes: int, language:str="english") -> List[str]:
    """Returns a list of rhyme words for a sentence.

    Parameters
    ----------
    sentence : Sentence that may end with punctuation
    n_rhymes : Maximum number of rhymes to return

    Returns
    -------
        List[str] -- List of words that rhyme with the final word
    """
    last_word = find_last_word(sentence)
    if language == "english":
       return query_datamuse_api(last_word, n_rhymes)
    elif language == "dutch":
        return mick_rijmwoordenboek(last_word, n_rhymes)
    else:
        raise NotImplementedError(f"Unsupported language ({language}) expected 'english' or 'dutch'.")


def query_datamuse_api(word: str, n_rhymes: Optional[int] = None) -> List[str]:
    """Query the DataMuse API.

    Parameters
    ----------
    word : Word to rhyme with
    n_rhymes : Max rhymes to return

    Returns
    -------
    Rhyme words
    """
    out = requests.get(
        "https://api.datamuse.com/words", params={"rel_rhy": word}
    ).json()
    words = [_["word"] for _ in out]
    if n_rhymes is None:
        return words
    return words[:n_rhymes]


@functools.lru_cache(maxsize=128, typed=False)
def mick_rijmwoordenboek(word: str, n_words: int):
    url = f"https://rijmwoordenboek.nl/rijm/{word}"
    html = get(url)
    soup = Soup(html)

    results = soup.find("div", {"id": "rhymeResultsWords"}).html.split("<br>")

    # clean up
    results = [r.replace("\n", "").replace(" ", "") for r in results]

    # filter html and empty strings
    results = [r for r in results if ("<" not in r) and (len(r) > 0)]

    return random.sample(results, min(len(results), n_words))