|
import gradio as gr |
|
import torch |
|
import spaces |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("ping98k/typhoon-7b-rag-instruct-th") |
|
model = AutoModelForCausalLM.from_pretrained("ping98k/typhoon-7b-rag-instruct-th") |
|
|
|
@spaces.GPU(duration=120) |
|
def response(instruction, history, inputText): |
|
inp = f"""### Instruction: |
|
{instruction} |
|
|
|
### Input: |
|
=======START OF DOCUMENT======= |
|
{inputText} |
|
=======END OF DOCUMENT======= |
|
|
|
### Response:""" |
|
|
|
input_ids = tokenizer(inp, return_tensors='pt').to("cuda") |
|
beam_output = model.generate(**input_ids) |
|
outputText = tokenizer.decode(beam_output[0], skip_special_token=True) |
|
|
|
|
|
return outputText |
|
|
|
gr.ChatInterface( |
|
response, |
|
additional_inputs=[ |
|
gr.Textbox("You are helpful AI.", label="Input Text"), |
|
], |
|
).launch() |