dolphin / app.py
nroggendorff's picture
Update app.py
5c3c196 verified
raw
history blame
537 Bytes
import gradio as gr
import spaces
from vllm import LLM, SamplingParams
llm = LLM(model="meta-llama/Llama-2-7B-Chat-hf")
sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
@spaces.GPU
def pipe(text: str):
prompt = [text]
tokens = llm.generate(prompt, sampling_params)
output = (output.outputs[0].text for output in tokens)
return output[0]
if __name__ == "__main__":
interface = gr.Interface(pipe, gr.Textbox(label="Prompt"), gr.Textbox(label="Response"), title="Text Completion")
interface.launch()