import time import gradio as gr from ctransformers import AutoModelForCausalLM from spellchecker import SpellChecker llm = AutoModelForCausalLM.from_pretrained("TheBloke/WizardLM-7B-uncensored-GGUF", model_file="WizardLM-7B-uncensored.Q3_K_M.gguf", model_type="llama", stream=True) history = [""] spell = SpellChecker() def correct_words(text): words = text.split() corrected_words = [spell.correction(word) for word in words] corrected_text = ' '.join(corrected_words) return corrected_text import asyncio import asyncio # ... (previous code) async def generate_async(tokens): for token in tokens: yield token async def generate_response_async(message): global history for _ in range(2): tokens = [ord(char) for char in message] response = llm.generate_async(tokens=tokens, top_k=50, top_p=0.95, temperature=1.0, repetition_penalty=1.0, last_n_tokens=1) response_text = ' '.join(map(str, await asyncio.gather(*response))) time.sleep(2) corrected_response = correct_words(response_text) history.append(corrected_response) yield ' '.join(history) history = ["Chatbot:"] # Clear the history list after the last response history = ["Chatbot:"] def chatbot(message, history): response_generator = generate_response_async(message) for response in response_generator: time.sleep(0.1) # Optional delay for a natural chat feel yield response iface = gr.ChatInterface(chatbot) iface.launch()