|
import random |
|
|
|
def generate_text(corpus, start_word, max_length=100): |
|
""" |
|
Generates text using a bigram language model. |
|
|
|
Args: |
|
corpus: A list of words from the training text. |
|
start_word: The word to start the generation. |
|
max_length: The maximum length of the generated text. |
|
|
|
Returns: |
|
A string of generated text. |
|
""" |
|
text = start_word |
|
prev_word = start_word |
|
for _ in range(max_length): |
|
|
|
next_word_candidates = [word for word in corpus if word[0] == prev_word[-1]] |
|
|
|
next_word = random.choices(next_word_candidates, weights=[corpus.count(w) for w in next_word_candidates])[0] |
|
text += " " + next_word |
|
prev_word = next_word |
|
return text |
|
|
|
|
|
corpus = ["hello", "world", "how", "are", "you", "today", "feeling", "great", "is", "a", "beautiful", "day"] |
|
start_word = "hello" |
|
generated_text = generate_text(corpus, start_word) |
|
print(generated_text) |
|
|