tcgyver/chatbot1 · Hugging Face

import nltk import numpy as np import random

class Chatbot:

def __init__(self, train_data):
    self.train_data = train_data
    self.vocabulary = set()
    self.word_to_index = {}
    self.index_to_word = {}
    self.create_vocabulary()
    self.build_model()

def create_vocabulary(self):
    for sentence in self.train_data:
        for word in sentence:
            self.vocabulary.add(word)
    self.vocabulary = sorted(self.vocabulary)
    self.word_to_index = {word: i for i, word in enumerate(self.vocabulary)}
    self.index_to_word = {i: word for i, word in enumerate(self.vocabulary)}

def build_model(self):
    self.num_words = len(self.vocabulary)
    self.W = np.random.randn(self.num_words, self.num_words)
    self.b = np.random.randn(self.num_words)

def predict(self, sentence):
    # Convert the sentence to a sequence of indices.
    indices = []
    for word in sentence:
        indices.append(self.word_to_index[word])

    # Calculate the probability of each possible next word.
    probabilities = np.dot(indices, self.W) + self.b

    # Choose the word with the highest probability.
    next_word = self.index_to_word[np.argmax(probabilities)]

    return next_word

def generate_text(self, start_text, max_length=100):
    sentence = start_text
    for _ in range(max_length):
        next_word = self.predict(sentence)
        sentence += " " + next_word
    return sentence

def respond_to(self, input_text):
    input_words = nltk.word_tokenize(input_text.lower())
    # Check for special commands
    if input_words[0] == "repeat":
        return " ".join(input_words[1:])
    elif input_words[0] == "generate":
        start_text = " ".join(input_words[1:])
        return self.generate_text(start_text)
    else:
        # Find the most similar sentence in the training data.
        similarity_scores = []
        for sentence in self.train_data:
            similarity_score = nltk.jaccard_distance(set(sentence), set(input_words))
            similarity_scores.append(similarity_score)
        most_similar_index = np.argmin(similarity_scores)
        most_similar_sentence = self.train_data[most_similar_index]
        # Generate a response based on the most similar sentence.
        response = ""
        for word in most_similar_sentence:
            response += self.predict([word]) + " "
        return response.strip()

def main(): # Load the training data. train_data = nltk.corpus.reuters.sents()

# Create the chatbot.
chatbot = Chatbot(train_data)

# Start a conversation.
print("Chatbot: Hi, I'm a chatbot. What can I help you with?")
while True:
    user_input = input("User: ")
    if user_input.lower() in ["bye", "goodbye", "exit", "quit"]:
        print("Chatbot: Goodbye!")
        break
    response = chatbot.respond_to(user_input)
    print("Chatbot:", response)

if name == "main": main()