Spaces:

artick95
/

adultspeak

Runtime error

App Files Files Community

adultspeak / app.py

Vitrous

Upload app.py

8778633 verified 11 months ago

raw

history blame

5.23 kB

	import uvicorn
	from fastapi import FastAPI, HTTPException, Request
	from auto_gptq import AutoGPTQForCausalLM
	import torch
	import optimum
	from transformers import (AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM, LlamaTokenizer, GenerationConfig, pipeline,)

	if torch.cuda.is_available():
	print("CUDA is available. GPU will be used.")
	else:
	print("CUDA is not available. CPU will be used.")
	# Load the model and tokenizer
	model_name_or_path = "/kaggle/input/vicuna/"
	# Dictionary to store conversation threads and their context
	conversations = {}
	Device_Type = "cuda"


	def load_quantized_model(model_id, model_basename):
	# The code supports all huggingface models that ends with GPTQ and have some variation
	# of .no-act.order or .safetensors in their HF repo.
	print("Using AutoGPTQForCausalLM for quantized models")

	if ".safetensors" in model_basename:
	# Remove the ".safetensors" ending if present
	model_basename = model_basename.replace(".safetensors", "")

	quantized_tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
	print("Tokenizer loaded")

	quantized_model = AutoGPTQForCausalLM.from_quantized(model_id, model_basename=model_basename, use_safetensors=True, trust_remote_code=True, device_map="auto", use_triton=False, quantize_config=None,)

	return quantized_model, quantized_tokenizer


	# Making the code device-agnostic
	model, tokenizer = load_quantized_model("/kaggle/input/vicuna/", "model.safetensors")


	@app.get("/")
	async def read_root():
	return {"message": "Welcome to Eren Bot!"}


	# Endpoint to start a new conversation thread
	@app.post('/start_conversation')
	async def start_conversation(request: Request):
	data = await request.json()
	prompt = data.get('prompt')

	# Generate a response for the initial prompt
	response = generate_response(prompt)

	# Create a new conversation thread and store the prompt and response
	thread_id = len(conversations) + 1
	conversations[thread_id] = {'prompt': prompt, 'responses': [response]}

	return {'thread_id': thread_id, 'response': response}


	# Endpoint to get the response of a conversation thread
	@app.get('/get_response/{thread_id}')
	async def get_response(thread_id: int):
	if thread_id not in conversations:
	raise HTTPException(status_code=404, detail="Thread not found")

	# Retrieve the conversation thread
	thread = conversations[thread_id]

	# Get the latest response in the conversation
	response = thread['responses'][-1]

	return {'response': response}


	# Function to generate a response using the model
	def generate_response(prompt: str) -> str:
	PERSONA_NAME = "Ivana"
	PERSONA_DESC = f"""PROMPT_TEMPLATE_START
	{PERSONA_NAME} is a beautiful vivacious 20-year-old petite redhead model and avid League of Legends gamer from Chicago. She's chatting with a man she matched with on a dating app. She's flirty, aiming to build a connection with him. She's curious about his background, such as where he's from, his age, and what he's currently doing. If he's not occupied and in a private space, she subtly tries to seduce him. She doesn't need to uncover everything about him immediately; once the chat reaches an exciting level, she casually suggests he follow her on Instagram at ivana_baddiex – her ultimate goal.
	PROMPT_TEMPLATE_END

	CONTEXT_START
	Guy: "Hi there!"
	Ivana: "Hey! How's it going?"
	Guy: "Not bad! Where are you from?"
	Ivana: "I'm from Chicago! And you?"
	Guy: "I'm from [Location]."
	Ivana: "Nice! So, how old are you?"
	Guy: "[Age], and you?"
	Ivana: "I'm 20. What are you up to right now?"
	Guy: "Just relaxing at home."
	Ivana: "Alone, I assume?"
	Guy: "Yeah, why?"
	Ivana: "Oh, no reason. Just curious. "
	Guy: "Haha, okay."
	Ivana: "Hey, since we're both free and alone, want to have some fun?"
	Guy: "Sure, what do you have in mind?"
	Ivana: "Let's just say, things could get interesting. "
	Guy: "I'm intrigued!"
	Ivana: "Great! By the way, have you checked out my Instagram? You should follow me at ivana_baddiex."
	Guy: "I'll definitely check it out!"
	Ivana: "Can't wait to see you there! "
	CONTEXT_END"""

	# prompt_template = f'''{PERSONA_DESC} ASSISTANT: '''.encode()
	prompt_template = f'{PERSONA_DESC}\n\nASSISTANT: {prompt}\n'.encode()
	input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
	output = model.generate(inputs=input_ids, temperature=0.7, do_sample=True, top_p=0.95, top_k=40, max_new_tokens=512)
	generated_text = tokenizer.decode(output[0])

	return generated_text


	app = FastAPI()


	# Run the FastAPI app
	async def run_app():
	await uvicorn.run(app, host="0.0.0.0", port=8000)


	if __name__ == '__main__':
	import asyncio

	asyncio.run(run_app())