import nltk import numpy as np import random

class Chatbot:

def __init__(self, train_data):
    self.train_data = train_data
    self.vocabulary = set()
    self.word_to_index = {}
    self.index_to_word = {}
    self.create_vocabulary()
    self.build_model()

def create_vocabulary(self):
    for sentence in self.train_data:
        for word in sentence:
            self.vocabulary.add(word)
    self.vocabulary = sorted(self.vocabulary)
    self.word_to_index = {word: i for i, word in enumerate(self.vocabulary)}
    self.index_to_word = {i: word for i, word in enumerate(self.vocabulary)}

def build_model(self):
    self.num_words = len(self.vocabulary)
    self.W = np.random.randn(self.num_words, self.num_words)
    self.b = np.random.randn(self.num_words)

def predict(self, sentence):
    # Convert the sentence to a sequence of indices.
    indices = []
    for word in sentence:
        indices.append(self.word_to_index[word])

    # Calculate the probability of each possible next word.
    probabilities = np.dot(indices, self.W) + self.b

    # Choose the word with the highest probability.
    next_word = self.index_to_word[np.argmax(probabilities)]

    return next_word

def generate_text(self, start_text, max_length=100):
    sentence = start_text
    for _ in range(max_length):
        next_word = self.predict(sentence)
        sentence += " " + next_word
    return sentence

def respond_to(self, input_text):
    input_words = nltk.word_tokenize(input_text.lower())
    # Check for special commands
    if input_words[0] == "repeat":
        return " ".join(input_words[1:])
    elif input_words[0] == "generate":
        start_text = " ".join(input_words[1:])
        return self.generate_text(start_text)
    else:
        # Find the most similar sentence in the training data.
        similarity_scores = []
        for sentence in self.train_data:
            similarity_score = nltk.jaccard_distance(set(sentence), set(input_words))
            similarity_scores.append(similarity_score)
        most_similar_index = np.argmin(similarity_scores)
        most_similar_sentence = self.train_data[most_similar_index]
        # Generate a response based on the most similar sentence.
        response = ""
        for word in most_similar_sentence:
            response += self.predict([word]) + " "
        return response.strip()

def main(): # Load the training data. train_data = nltk.corpus.reuters.sents()

# Create the chatbot.
chatbot = Chatbot(train_data)

# Start a conversation.
print("Chatbot: Hi, I'm a chatbot. What can I help you with?")
while True:
    user_input = input("User: ")
    if user_input.lower() in ["bye", "goodbye", "exit", "quit"]:
        print("Chatbot: Goodbye!")
        break
    response = chatbot.respond_to(user_input)
    print("Chatbot:", response)

if name == "main": main()

Downloads last month

-

Downloads are not tracked for this model. How to track
Inference Providers NEW
This model is not currently available via any of the supported Inference Providers.
The model cannot be deployed to the HF Inference API: The model has no library tag.