# load model once import torch from peft import AutoPeftModelForCausalLM from transformers import AutoModelForCausalLM, AutoTokenizer import gradio as gr import random import time model_id = "hikinegi/Llama-JAVA_tuned" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoPeftModelForCausalLM.from_pretrained(model_id, device_map='auto', torch_dtype=torch.float16) # Set the model to evaluation mode #model.eval() def generate_pred(text): # Disable gradient calculation with torch.no_grad(): # generate text=f"[INST]<>\nBelow is an instruction that describes a task. Write a response that appropriately completes the request.\n<>\n{text}[/INST]" inputs = tokenizer(text, return_tensors="pt") #.to("cuda") outputs = model.generate(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"], max_new_tokens=1024, pad_token_id=tokenizer.eos_token_id) return (tokenizer.decode(outputs[0], skip_special_tokens=False)) with gr.Blocks(theme=gr.themes.Monochrome()) as demo: gr.Markdown("""

CodeGuru will answer all of your'e JAVA coding Question

""") chatbot = gr.Chatbot(label="CodeGuru") msg = gr.Textbox(label = "Question") clear = gr.ClearButton([msg, chatbot]) def user(user_message, history): return "", history + [[user_message, None]] def bot(history): bot_message = generate_pred(history[-1][0]) history[-1][1] = "" for character in bot_message: history[-1][1] += character time.sleep(0.05) yield history msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( bot, chatbot, chatbot ) clear.click(lambda: None, None, chatbot, queue=False) with gr.Row(visible=True) as button_row: upvote_btn = gr.Button(value="👍 Upvote", interactive=True) downvote_btn = gr.Button(value="👎 Downvote", interactive=True) demo.queue() demo.launch(debug=True)