import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import os import torch from datasets import load_dataset from transformers import ( AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, HfArgumentParser, TrainingArguments, pipeline, logging, ) from peft import LoraConfig, PeftModel # from trl import SFTTrainer tokenizer = AutoTokenizer.from_pretrained("Gbssreejith/new_TinyLlama3") model = AutoModelForCausalLM.from_pretrained("Gbssreejith/new_TinyLlama3") def generate_response(prompt): pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=50) result = pipe(f"[INST] {prompt} [/INST]") return result[0]['generated_text'] iface = gr.Interface( fn=generate_response, inputs="text", outputs="text", title="Text Generation", description="Enter a prompt and get a generated response.", examples=[ ["I'm having trouble sleeping. Any advice?"], ["I sad i dont know what to do"] ] ) iface.launch(share=True,debug=True)