import gradio as gr import torch import spaces from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("ping98k/typhoon-7b-rag-instruct-th") model = AutoModelForCausalLM.from_pretrained("ping98k/typhoon-7b-rag-instruct-th") @spaces.GPU(duration=120) def response(instruction, history, inputText): inp = f"""### Instruction: {instruction} ### Input: =======START OF DOCUMENT======= {inputText} =======END OF DOCUMENT======= ### Response:""" input_ids = tokenizer(inp, return_tensors='pt').to("cuda") beam_output = model.generate(**input_ids) outputText = tokenizer.decode(beam_output[0], skip_special_token=True) #output = output.replace(inp,"").replace("","").replace("","") return outputText gr.ChatInterface( response, additional_inputs=[ gr.Textbox("You are helpful AI.", label="Input Text"), ], ).launch()