import gradio as gr import subprocess import sys import os async def generate(prompt): # os.environ["PROMPT"] = prompt # stream stout process = subprocess.Popen( ["mojo", "llama2.mojo"], stdout=subprocess.PIPE, stderr=subprocess.PIPE ) text = "" for char in iter(lambda: process.stdout.read(1), b""): char_decoded = char.decode() sys.stdout.write(char_decoded) text += char_decoded yield text output_text = gr.Textbox(label="Generated Text") demo = gr.Interface( fn=generate, inputs=None, outputs=output_text, description=""" # llama2.🔥 ## [Mojo](https://docs.modular.com/mojo/) implementation of [llama2.c](https://github.com/karpathy/llama2.c) by [@tairov](https://github.com/tairov) Source: https://github.com/tairov/llama2.mojo """, allow_flagging="never", ) demo.queue() demo.launch(server_name="0.0.0.0")