ask-away

Runtime error

App Files Files Community

ask-away / app.py

botsi

Update app.py

5306510 verified almost 2 years ago

raw

history blame contribute delete

17 kB

	import gradio as gr
	import time
	import random
	import json
	import mysql.connector
	import os
	import csv
	import spaces
	import torch

	from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
	from threading import Thread
	from typing import Iterator
	from huggingface_hub import Repository, hf_hub_download
	from datetime import datetime

	# for fetch_personalized_data
	import mysql.connector
	import urllib.parse
	import urllib.request

	# for saving chat history as JSON - not used
	import atexit
	import os
	from huggingface_hub import HfApi, HfFolder

	# for saving chat history as dataset - not used
	import huggingface_hub
	from huggingface_hub import Repository
	from datetime import datetime

	# for saving chat history as dataset - used
	import sqlite3
	import huggingface_hub
	import gradio as gr
	import pandas as pd
	import shutil
	import os
	import datetime
	from apscheduler.schedulers.background import BackgroundScheduler

	DATASET_REPO_URL = "https://huggingface.co/datasets/botsi/trust-game-llama-2-chat-history"
	DATA_DIRECTORY = "data" # Separate directory for storing data files
	DATA_FILENAME = "newmarion.csv" # Default filename
	DATA_FILE = os.path.join("data", DATA_FILENAME)

	HF_TOKEN = os.environ.get("HF_TOKEN")
	print("is none?", HF_TOKEN is None)
	print("hfh", huggingface_hub.__version__)

	repo = Repository(
	local_dir=DATA_DIRECTORY, clone_from=DATASET_REPO_URL
	)

	MAX_MAX_NEW_TOKENS = 2048
	DEFAULT_MAX_NEW_TOKENS = 1024
	MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))

	DESCRIPTION = """\
	# This is your personal space to chat.
	You can ask anything.
	"""

	# License and Acceptable Use Policy by Meta
	LICENSE = """
	<p/>

	---
	This demo is governed by the [original license](https://ai.meta.com/llama/license/) and [acceptable use policy](https://ai.meta.com/llama/use-policy/).
	The most recent copy of this policy can be found at ai.meta.com/llama/use-policy.
	"""

	if not torch.cuda.is_available():
	DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"


	if torch.cuda.is_available():
	model_id = "meta-llama/Llama-2-7b-chat-hf"
	model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
	tokenizer = AutoTokenizer.from_pretrained(model_id)
	tokenizer.use_default_system_prompt = False

	## trust-game-llama-2-7b-chat
	# app.py
	def get_default_system_prompt():
	BSYS, ESYS = "<<SYS>>\n", "\n<</SYS>>\n\n"

	DEFAULT_SYSTEM_PROMPT = f"""You are a friendly chat assistant. First, wait for a question. Do not talk before being asked. When the user says hi, politely ask what he/she wants to know.
	Do not tell any more!
	If asked about Marion, you can answer the following: There are only two Marions, one is currently working at Accenture and the other one will shortly begin a new job at BMW.
	She recently quit Accenture. Nicknames by her friends include botsi and botsokrates. Her favorite project was Hypercare. Kathy/Kathi is her colleague at Accenture. Anita is her colleague at Accenture.
	"""
	print(DEFAULT_SYSTEM_PROMPT)
	return DEFAULT_SYSTEM_PROMPT

	## trust-game-llama-2-7b-chat
	# app.py
	def construct_input_prompt(chat_history, message):
	input_prompt = f"<s>[INST] <<SYS>>\n{get_default_system_prompt()}\n<</SYS>>\n\n "
	for user, assistant in chat_history:
	input_prompt += f"{user} [/INST] {assistant} <s>[INST] "
	input_prompt += f"{message} [/INST] "
	return input_prompt

	## trust-game-llama-2-7b-chat
	# app.py
	@spaces.GPU
	def generate(
	request: gr.Request, # To fetch query params
	message: str,
	chat_history: list[tuple[str, str]],
	# input_prompt: str,
	max_new_tokens: int = 1024,
	temperature: float = 0.6,
	top_p: float = 0.9,
	top_k: int = 50,
	repetition_penalty: float = 1.2,
	) -> Iterator[str]: # Change return type hint to Iterator[str]

	conversation = []

	# Fetch query params
	params = {
	key: value for key, value in request.query_params.items()
	}
	print('those are the query params')
	print(params)

	print("Request headers dictionary:", request.headers)
	print("IP address:", request.client.host)
	print("Query parameters:", params)

	# Construct the input prompt using the functions from the system_prompt_config module
	input_prompt = construct_input_prompt(chat_history, message)

	# Move the condition here after the assignment
	if input_prompt:
	conversation.append({"role": "system", "content": input_prompt})

	# Convert input prompt to tensor
	input_ids = tokenizer(input_prompt, return_tensors="pt").to(model.device)

	for user, assistant in chat_history:
	conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
	conversation.append({"role": "user", "content": message})

	input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
	if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
	input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
	gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
	input_ids = input_ids.to(model.device)

	# Set up the TextIteratorStreamer
	streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)

	# Set up the generation arguments
	generate_kwargs = dict(
	input_ids=input_ids,
	streamer=streamer,
	max_new_tokens=max_new_tokens,
	do_sample=True,
	top_p=top_p,
	top_k=top_k,
	temperature=temperature,
	num_beams=1,
	repetition_penalty=repetition_penalty,
	)

	# Start the model generation thread
	t = Thread(target=model.generate, kwargs=generate_kwargs)
	t.start()

	# Yield generated text chunks
	outputs = []
	for text in streamer:
	outputs.append(text)
	yield "".join(outputs)

	# Fix bug that last answer is not recorded!
	# Parse the outputs into a readable sentence and record them
	# Filter out empty strings and join the remaining strings with spaces
	readable_sentence = ' '.join(filter(lambda x: x.strip(), outputs))
	# Print the readable sentence
	print(readable_sentence)

	# Save chat history to .csv file on HuggingFace Hub
	# Generate filename with bot id and session id
	filename = f"{DATA_FILENAME}"
	data_file = os.path.join(DATA_DIRECTORY, filename)

	# Generate timestamp
	timestamp = datetime.datetime.now()

	# Check if the file already exists
	if os.path.exists(data_file):
	# If file exists, load existing data
	existing_data = pd.read_csv(data_file)

	# Add timestamp column
	conversation_df = pd.DataFrame(conversation)
	conversation_df['ip_address'] = request.client.host
	conversation_df['readable_sentence'] = readable_sentence
	conversation_df['timestamp'] = timestamp

	# Append new conversation to existing data
	updated_data = pd.concat([existing_data, conversation_df], ignore_index=True)
	updated_data.to_csv(data_file, index=False)
	else:
	# If file doesn't exist, create new file with conversation data
	conversation_df = pd.DataFrame(conversation)
	conversation_df['ip_address'] = request.client.host
	conversation_df['readable_sentence'] = readable_sentence
	conversation_df['timestamp'] = timestamp
	conversation_df.to_csv(data_file, index=False)

	print("Updating .csv")
	repo.push_to_hub(blocking=False, commit_message=f"Updating data at {timestamp}")

	chat_interface = gr.ChatInterface(
	fn=generate,
	retry_btn=None,
	clear_btn=None,
	undo_btn=None,
	chatbot=gr.Chatbot(avatar_images=('user.png', 'bot.png'), bubble_full_width=False),
	examples=[
	["What is your favorite fruit?"],
	["What do you think about AI in the workplace?"],
	],
	)

	with gr.Blocks(css="style.css", theme=gr.themes.Default(primary_hue=gr.themes.colors.emerald, secondary_hue=gr.themes.colors.indigo)) as demo:
	gr.Markdown(DESCRIPTION)
	chat_interface.render()
	gr.Markdown(LICENSE)

	if __name__ == "__main__":
	demo.queue(max_size=20).launch() # Launching the interface with queueing and maximum size limit
	# demo.launch(share=True, debug=True) # Uncomment this line if you want to launch the interface with sharing and debug mode






	'''# Original code from https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat
	# Modified for trust game purposes

	import gradio as gr
	import time
	import random
	import json
	import mysql.connector
	import os
	import csv
	import spaces
	import torch

	from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
	from threading import Thread
	from typing import Iterator
	from huggingface_hub import Repository, hf_hub_download
	from datetime import datetime

	# for fetch_personalized_data
	import mysql.connector
	import urllib.parse
	import urllib.request

	# for saving chat history as JSON - not used
	import atexit
	import os
	from huggingface_hub import HfApi, HfFolder

	# for saving chat history as dataset - not used
	import huggingface_hub
	from huggingface_hub import Repository
	from datetime import datetime

	# for saving chat history as dataset - used
	import sqlite3
	import huggingface_hub
	import gradio as gr
	import pandas as pd
	import shutil
	import os
	import datetime
	from apscheduler.schedulers.background import BackgroundScheduler


	DATASET_REPO_URL = "https://huggingface.co/datasets/botsi/trust-game-llama-2-chat-history"
	DATA_DIRECTORY = "data" # Separate directory for storing data files
	DATA_FILENAME = "marion.csv" # Default filename
	DATA_FILE = os.path.join("data", DATA_FILENAME)

	HF_TOKEN = os.environ.get("HF_TOKEN")
	print("is none?", HF_TOKEN is None)
	print("hfh", huggingface_hub.__version__)

	repo = Repository(
	local_dir=DATA_DIRECTORY, clone_from=DATASET_REPO_URL
	)

	MAX_MAX_NEW_TOKENS = 2048
	DEFAULT_MAX_NEW_TOKENS = 1024
	MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))

	DESCRIPTION = """\
	# This is your personal space to chat.
	You can ask anything.
	"""

	# License and Acceptable Use Policy by Meta
	LICENSE = """
	<p/>

	---
	This demo is governed by the [original license](https://ai.meta.com/llama/license/) and [acceptable use policy](https://ai.meta.com/llama/use-policy/).
	The most recent copy of this policy can be found at ai.meta.com/llama/use-policy.
	"""

	if not torch.cuda.is_available():
	DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"


	if torch.cuda.is_available():
	model_id = "meta-llama/Llama-2-7b-chat-hf"
	model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
	tokenizer = AutoTokenizer.from_pretrained(model_id)
	tokenizer.use_default_system_prompt = False

	## trust-game-llama-2-7b-chat
	# app.py
	def get_default_system_prompt():
	#BOS, EOS = "<s>", "</s>"
	#BINST, EINST = "[INST]", "[/INST]"
	BSYS, ESYS = "<<SYS>>\n", "\n<</SYS>>\n\n"

	DEFAULT_SYSTEM_PROMPT = f"""You are a friendly chat assistant. First, wait for a question. Do not talk before being asked. When the user says hi, politely ask what he/she wants to know.
	Do not tell any more!
	If asked about Marion, you can answer the following: There are only two Marions, one is currently working at Accenture and the other one will shortly begin a new job at BMW.
	She recently quit Accenture. Nicknames by her friends include botsi and botsokrates. Her favorite project was Hypercare. Kathy/Kathi is her colleague at Accenture. Anita is her colleague at Accenture.
	"""
	print(DEFAULT_SYSTEM_PROMPT)
	return DEFAULT_SYSTEM_PROMPT


	## trust-game-llama-2-7b-chat
	# app.py
	def construct_input_prompt(chat_history, message):
	input_prompt = f"<s>[INST] <<SYS>>\n{get_default_system_prompt()}\n<</SYS>>\n\n "
	for user, assistant in chat_history:
	input_prompt += f"{user} [/INST] {assistant} <s>[INST] "
	input_prompt += f"{message} [/INST] "
	return input_prompt

	## trust-game-llama-2-7b-chat
	# app.py
	@spaces.GPU
	def generate(
	request: gr.Request, # To fetch query params
	message: str,
	chat_history: list[tuple[str, str]],
	# input_prompt: str,
	max_new_tokens: int = 1024,
	temperature: float = 0.6,
	top_p: float = 0.9,
	top_k: int = 50,
	repetition_penalty: float = 1.2,
	) -> Iterator[str]: # Change return type hint to Iterator[str]

	conversation = []

	# Fetch query params
	params = {
	key: value for key, value in gr.Request.query_params.items()
	}
	print('those are the query params')
	print(params)

	print("Request headers dictionary:", gr.Request.headers)
	print("IP address:", gr.Request.client.host)
	print("Query parameters:", params)

	# Construct the input prompt using the functions from the system_prompt_config module
	input_prompt = construct_input_prompt(chat_history, message)

	# Move the condition here after the assignment
	if input_prompt:
	conversation.append({"role": "system", "content": input_prompt})

	# Convert input prompt to tensor
	input_ids = tokenizer(input_prompt, return_tensors="pt").to(model.device)

	for user, assistant in chat_history:
	conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
	conversation.append({"role": "user", "content": message})

	input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
	if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
	input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
	gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
	input_ids = input_ids.to(model.device)

	# Set up the TextIteratorStreamer
	streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)

	# Set up the generation arguments
	generate_kwargs = dict(
	{"input_ids": input_ids},
	streamer=streamer,
	max_new_tokens=max_new_tokens,
	do_sample=True,
	top_p=top_p,
	top_k=top_k,
	temperature=temperature,
	num_beams=1,
	repetition_penalty=repetition_penalty,
	)

	# Start the model generation thread
	t = Thread(target=model.generate, kwargs=generate_kwargs)
	t.start()

	# Yield generated text chunks
	outputs = []
	for text in streamer:
	outputs.append(text)
	yield "".join(outputs)

	# Fix bug that last answer is not recorded!
	# Parse the outputs into a readable sentence and record them
	# Filter out empty strings and join the remaining strings with spaces
	readable_sentence = ' '.join(filter(lambda x: x.strip(), outputs))
	# Print the readable sentence
	print(readable_sentence)

	# Save chat history to .csv file on HuggingFace Hub
	# Generate filename with bot id and session id
	filename = f"{DATA_FILENAME}"
	data_file = os.path.join(DATA_DIRECTORY, filename)

	# Generate timestamp
	timestamp = datetime.datetime.now()

	# Check if the file already exists
	if os.path.exists(data_file):
	# If file exists, load existing data
	existing_data = pd.read_csv(data_file)

	# Add timestamp column
	conversation_df = pd.DataFrame(conversation)
	conversation_df['ip_address'] = request.client.host
	conversation_df['readable_sentence'] = readable_sentence
	conversation_df['timestamp'] = timestamp

	# Append new conversation to existing data
	updated_data = pd.concat([existing_data, conversation_df], ignore_index=True)
	updated_data.to_csv(data_file, index=False)
	else:
	# If file doesn't exist, create new file with conversation data
	conversation_df = pd.DataFrame(conversation)
	conversation_df['ip_address'] = request.client.host
	conversation_df['readable_sentence'] = readable_sentence
	conversation_df['timestamp'] = timestamp
	conversation_df.to_csv(data_file, index=False)

	print("Updating .csv")
	repo.push_to_hub(blocking=False, commit_message=f"Updating data at {timestamp}")

	chat_interface = gr.ChatInterface(
	fn=generate,
	retry_btn=None,
	clear_btn=None,
	undo_btn=None,
	chatbot=gr.Chatbot(avatar_images=('user.png', 'bot.png'), bubble_full_width = False),
	examples=[
	["What is your favorite fruit?"],
	["What do you think about AI in the workplace?"],
	],
	)

	with gr.Blocks(css="style.css", theme=gr.themes.Default(primary_hue=gr.themes.colors.emerald,secondary_hue=gr.themes.colors.indigo)) as demo:
	gr.Markdown(DESCRIPTION)
	chat_interface.render()
	gr.Markdown(LICENSE)

	if __name__ == "__main__":
	demo.queue(max_size=20).launch()
	#demo.queue(max_size=20)
	demo.launch(share=True, debug=True)
	'''