Spaces:

LLMproj1
/

my_persona

Build error

App Files Files Community

my_persona / app.py

LLMproj1

Upload folder using huggingface_hub

783a29f verified over 1 year ago

raw

history blame contribute delete

2.39 kB

	# -- coding: utf-8 --
	"""Untitled18.ipynb

	Automatically generated by Colab.

	Original file is located at
	https://colab.research.google.com/drive/1_vTVH3hBX8wVXIgrW1T2Q4N1DSkWoXV8
	"""



	import gradio as gr
	import torch
	from transformers import TextStreamer
	from unsloth import FastLanguageModel
	from google.colab import drive
	import os

	# Ensure necessary packages are installed



	# Define the parameters for the model
	max_seq_length = 2048
	# Choose any! We auto support RoPE Scaling internally!
	dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
	load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

	# Load the model and tokenizer
	model, tokenizer = FastLanguageModel.from_pretrained(
	model_name="lora_model", # YOUR MODEL YOU USED FOR TRAINING
	max_seq_length=max_seq_length,
	dtype=dtype,
	load_in_4bit=load_in_4bit,
	)
	FastLanguageModel.for_inference(model) # Enable native 2x faster inference

	# Define the Alpaca prompt
	alpaca_prompt = """
	### Input:
	{}

	### Response:
	{}"""

	# Define the function to generate responses
	def chat_alpaca(message: str, history: list, temperature: float, max_new_tokens: int) -> str:
	prompt = alpaca_prompt.format(message, "")
	inputs = tokenizer([prompt], return_tensors="pt").to("cuda")

	# Define the streamer
	text_streamer = TextStreamer(tokenizer)

	# Generate the response
	outputs = model.generate(**inputs, streamer=text_streamer, max_new_tokens=max_new_tokens, temperature=temperature)
	response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]

	# Return the response
	return response

	# Define the response function for the Gradio interface
	def respond(message, history, system_message, max_new_tokens, temperature, top_p):
	return chat_alpaca(message, history, temperature, max_new_tokens)

	# Create the Gradio interface
	demo = gr.ChatInterface(
	respond,
	additional_inputs=[
	gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
	gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
	gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
	gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
	],
	)

	if __name__ == "__main__":
	demo.launch(share=True)