from statistics import mode import urllib.request import gradio as gr import subprocess import unidecode import requests import string import json # Download files using wget keys_url = "https://github.com/Nkluge-correa/Aira/raw/master/Aira-1/data/generated_data/keys_en.json" answers_url = "https://github.com/Nkluge-correa/Aira/raw/master/Aira-1/data/original_data/answers_en.txt" subprocess.run(["wget", keys_url, "-O", "keys_en.json"]) subprocess.run(["wget", answers_url, "-O", "answers_en.txt"]) # Load data from files with open('answers_en.txt', encoding='utf-8') as fp: answers = [line.strip() for line in fp] with open('keys_en.json', 'r') as fp: vocabulary = json.load(fp) with open('keys_en.json') as json_file: dictionary = json.load(json_file) def generate_ngrams(text, WordsToCombine): """ Generates n-grams of length WordsToCombine from the input text. Args: text: A string representing the input text WordsToCombine: An integer representing the size of the n-grams to be generated Returns: A list of n-grams generated from the input text, where each n-gram is a list of WordsToCombine words """ words = text.split() output = [] for i in range(len(words) - WordsToCombine+1): output.append(words[i:i+WordsToCombine]) return output def make_keys(text, WordsToCombine): """ Given a text and a number of words to combine, returns a list of keys that correspond to all possible combinations of n-grams (sequences of n consecutive words) in the text. Args: - text (str): The input text. - WordsToCombine (int): The number of words to combine. Returns: - sentences (list of str): A list of all the keys, which are the n-grams in the text. """ gram = generate_ngrams(text, WordsToCombine) sentences = [] for i in range(0, len(gram)): sentence = ' '.join(gram[i]) sentences.append(sentence) return sentences def chat(message, history): """ A function that generates a response to a user input message based on a pre-built dictionary of responses. Args: message (str): A string representing the user's input message. history (list): A list of tuples containing previous messages and responses. Returns: tuple: A tuple containing two lists of tuples. The first list is the original history with the user's input message and the bot's response appended as a tuple. The second list is an updated history with the same tuples. """ history = history or [] text = message.lower() sentences = [] values = [] new_text = text.translate(str.maketrans('', '', string.punctuation)) new_text = unidecode.unidecode(new_text) if len(new_text.split()) == 1: if new_text in dictionary.keys(): l = [dictionary[new_text]] * 100 values.append(l) new_text = new_text + ' ' + new_text else: if new_text in dictionary.keys(): l = [dictionary[new_text]] * 100 values.append(l) for i in range(1, len(new_text.split()) + 1): sentence = make_keys(new_text, i) sentences.append(sentence) for i in range(len(sentences)): attention = sentences[i] for i in range(len(attention)): if attention[i] in dictionary.keys(): l = [dictionary[attention[i]]] * i values.append(l) if len([item for sublist in values for item in sublist]) == 0: bot_input_ids = "I'm sorry, either I didn't understand the question, or it is not part of my domain of expertise... :( Try asking it in another way or using other words. Maybe then I can help you!" history.append((message, bot_input_ids)) return history, history else: values = [item for sublist in values for item in sublist] prediction = mode(values) bot_input_ids = answers[int(prediction)-1] history.append((message, bot_input_ids)) return history, history title = "Basic Chatbot - By Teeny-Tiny Castle 🏰" head = ( "