|
import gradio as gr |
|
import cuml.umap |
|
import cuml.hdbscan |
|
|
|
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig |
|
|
|
quantization_config = BitsAndBytesConfig(load_in_4bit=True) |
|
|
|
|
|
|
|
def answer_llm(text, tokenizer=tokenizer, model=model): |
|
input_ids = tokenizer(input_text, return_tensors="pt").to("cuda") |
|
outputs = model.generate(**input_ids) |
|
output_ = (tokenizer.decode(outputs[0])) |
|
return output_ |
|
|
|
with gr.Blocks() as demo: |
|
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-27b") |
|
model = AutoModelForCausalLM.from_pretrained( |
|
"google/gemma-2-27b", |
|
quantization_config=quantization_config) |
|
file_output = gr.File() |
|
opt = gr.Label() |
|
upload_button = gr.UploadButton("Click to Upload a File") |
|
upload_button.upload(upload_file, upload_button, file_output) |
|
|
|
text = gr.Textbox(label="Input Text") |
|
output = gr.Textbox(label="Answer") |
|
|
|
text.change(fn=answer_llm, input=text, output=output) |
|
|
|
|
|
demo.launch() |
|
|
|
|
|
|
|
|