| | import gradio as gr |
| | import time |
| | import random |
| | import json |
| | import mysql.connector |
| | import os |
| | import csv |
| | import spaces |
| | import torch |
| |
|
| | from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer |
| | from threading import Thread |
| | from typing import Iterator |
| | from huggingface_hub import Repository, hf_hub_download |
| | from datetime import datetime |
| |
|
| | |
| | import mysql.connector |
| | import urllib.parse |
| | import urllib.request |
| |
|
| | |
| | import atexit |
| | import os |
| | from huggingface_hub import HfApi, HfFolder |
| |
|
| | |
| | import huggingface_hub |
| | from huggingface_hub import Repository |
| | from datetime import datetime |
| |
|
| | |
| | import sqlite3 |
| | import huggingface_hub |
| | import gradio as gr |
| | import pandas as pd |
| | import shutil |
| | import os |
| | import datetime |
| | from apscheduler.schedulers.background import BackgroundScheduler |
| |
|
| | DATASET_REPO_URL = "https://huggingface.co/datasets/botsi/trust-game-llama-2-chat-history" |
| | DATA_DIRECTORY = "data" |
| | DATA_FILENAME = "newmarion.csv" |
| | DATA_FILE = os.path.join("data", DATA_FILENAME) |
| |
|
| | HF_TOKEN = os.environ.get("HF_TOKEN") |
| | print("is none?", HF_TOKEN is None) |
| | print("hfh", huggingface_hub.__version__) |
| |
|
| | repo = Repository( |
| | local_dir=DATA_DIRECTORY, clone_from=DATASET_REPO_URL |
| | ) |
| |
|
| | MAX_MAX_NEW_TOKENS = 2048 |
| | DEFAULT_MAX_NEW_TOKENS = 1024 |
| | MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096")) |
| |
|
| | DESCRIPTION = """\ |
| | # This is your personal space to chat. |
| | You can ask anything. |
| | """ |
| |
|
| | |
| | LICENSE = """ |
| | <p/> |
| | |
| | --- |
| | This demo is governed by the [original license](https://ai.meta.com/llama/license/) and [acceptable use policy](https://ai.meta.com/llama/use-policy/). |
| | The most recent copy of this policy can be found at ai.meta.com/llama/use-policy. |
| | """ |
| |
|
| | if not torch.cuda.is_available(): |
| | DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>" |
| |
|
| |
|
| | if torch.cuda.is_available(): |
| | model_id = "meta-llama/Llama-2-7b-chat-hf" |
| | model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto") |
| | tokenizer = AutoTokenizer.from_pretrained(model_id) |
| | tokenizer.use_default_system_prompt = False |
| | |
| | |
| | |
| | def get_default_system_prompt(): |
| | BSYS, ESYS = "<<SYS>>\n", "\n<</SYS>>\n\n" |
| |
|
| | DEFAULT_SYSTEM_PROMPT = f"""You are a friendly chat assistant. First, wait for a question. Do not talk before being asked. When the user says hi, politely ask what he/she wants to know. |
| | Do not tell any more! |
| | If asked about Marion, you can answer the following: There are only two Marions, one is currently working at Accenture and the other one will shortly begin a new job at BMW. |
| | She recently quit Accenture. Nicknames by her friends include botsi and botsokrates. Her favorite project was Hypercare. Kathy/Kathi is her colleague at Accenture. Anita is her colleague at Accenture. |
| | """ |
| | print(DEFAULT_SYSTEM_PROMPT) |
| | return DEFAULT_SYSTEM_PROMPT |
| |
|
| | |
| | |
| | def construct_input_prompt(chat_history, message): |
| | input_prompt = f"<s>[INST] <<SYS>>\n{get_default_system_prompt()}\n<</SYS>>\n\n " |
| | for user, assistant in chat_history: |
| | input_prompt += f"{user} [/INST] {assistant} <s>[INST] " |
| | input_prompt += f"{message} [/INST] " |
| | return input_prompt |
| |
|
| | |
| | |
| | @spaces.GPU |
| | def generate( |
| | request: gr.Request, |
| | message: str, |
| | chat_history: list[tuple[str, str]], |
| | |
| | max_new_tokens: int = 1024, |
| | temperature: float = 0.6, |
| | top_p: float = 0.9, |
| | top_k: int = 50, |
| | repetition_penalty: float = 1.2, |
| | ) -> Iterator[str]: |
| |
|
| | conversation = [] |
| | |
| | |
| | params = { |
| | key: value for key, value in request.query_params.items() |
| | } |
| | print('those are the query params') |
| | print(params) |
| |
|
| | print("Request headers dictionary:", request.headers) |
| | print("IP address:", request.client.host) |
| | print("Query parameters:", params) |
| | |
| | |
| | input_prompt = construct_input_prompt(chat_history, message) |
| |
|
| | |
| | if input_prompt: |
| | conversation.append({"role": "system", "content": input_prompt}) |
| |
|
| | |
| | input_ids = tokenizer(input_prompt, return_tensors="pt").to(model.device) |
| |
|
| | for user, assistant in chat_history: |
| | conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}]) |
| | conversation.append({"role": "user", "content": message}) |
| |
|
| | input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt") |
| | if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH: |
| | input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:] |
| | gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.") |
| | input_ids = input_ids.to(model.device) |
| |
|
| | |
| | streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True) |
| | |
| | |
| | generate_kwargs = dict( |
| | input_ids=input_ids, |
| | streamer=streamer, |
| | max_new_tokens=max_new_tokens, |
| | do_sample=True, |
| | top_p=top_p, |
| | top_k=top_k, |
| | temperature=temperature, |
| | num_beams=1, |
| | repetition_penalty=repetition_penalty, |
| | ) |
| |
|
| | |
| | t = Thread(target=model.generate, kwargs=generate_kwargs) |
| | t.start() |
| |
|
| | |
| | outputs = [] |
| | for text in streamer: |
| | outputs.append(text) |
| | yield "".join(outputs) |
| |
|
| | |
| | |
| | |
| | readable_sentence = ' '.join(filter(lambda x: x.strip(), outputs)) |
| | |
| | print(readable_sentence) |
| |
|
| | |
| | |
| | filename = f"{DATA_FILENAME}" |
| | data_file = os.path.join(DATA_DIRECTORY, filename) |
| | |
| | |
| | timestamp = datetime.datetime.now() |
| | |
| | |
| | if os.path.exists(data_file): |
| | |
| | existing_data = pd.read_csv(data_file) |
| | |
| | |
| | conversation_df = pd.DataFrame(conversation) |
| | conversation_df['ip_address'] = request.client.host |
| | conversation_df['readable_sentence'] = readable_sentence |
| | conversation_df['timestamp'] = timestamp |
| | |
| | |
| | updated_data = pd.concat([existing_data, conversation_df], ignore_index=True) |
| | updated_data.to_csv(data_file, index=False) |
| | else: |
| | |
| | conversation_df = pd.DataFrame(conversation) |
| | conversation_df['ip_address'] = request.client.host |
| | conversation_df['readable_sentence'] = readable_sentence |
| | conversation_df['timestamp'] = timestamp |
| | conversation_df.to_csv(data_file, index=False) |
| |
|
| | print("Updating .csv") |
| | repo.push_to_hub(blocking=False, commit_message=f"Updating data at {timestamp}") |
| |
|
| | chat_interface = gr.ChatInterface( |
| | fn=generate, |
| | retry_btn=None, |
| | clear_btn=None, |
| | undo_btn=None, |
| | chatbot=gr.Chatbot(avatar_images=('user.png', 'bot.png'), bubble_full_width=False), |
| | examples=[ |
| | ["What is your favorite fruit?"], |
| | ["What do you think about AI in the workplace?"], |
| | ], |
| | ) |
| |
|
| | with gr.Blocks(css="style.css", theme=gr.themes.Default(primary_hue=gr.themes.colors.emerald, secondary_hue=gr.themes.colors.indigo)) as demo: |
| | gr.Markdown(DESCRIPTION) |
| | chat_interface.render() |
| | gr.Markdown(LICENSE) |
| | |
| | if __name__ == "__main__": |
| | demo.queue(max_size=20).launch() |
| | |
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | '''# Original code from https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat |
| | # Modified for trust game purposes |
| | |
| | import gradio as gr |
| | import time |
| | import random |
| | import json |
| | import mysql.connector |
| | import os |
| | import csv |
| | import spaces |
| | import torch |
| | |
| | from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer |
| | from threading import Thread |
| | from typing import Iterator |
| | from huggingface_hub import Repository, hf_hub_download |
| | from datetime import datetime |
| | |
| | # for fetch_personalized_data |
| | import mysql.connector |
| | import urllib.parse |
| | import urllib.request |
| | |
| | # for saving chat history as JSON - not used |
| | import atexit |
| | import os |
| | from huggingface_hub import HfApi, HfFolder |
| | |
| | # for saving chat history as dataset - not used |
| | import huggingface_hub |
| | from huggingface_hub import Repository |
| | from datetime import datetime |
| | |
| | # for saving chat history as dataset - used |
| | import sqlite3 |
| | import huggingface_hub |
| | import gradio as gr |
| | import pandas as pd |
| | import shutil |
| | import os |
| | import datetime |
| | from apscheduler.schedulers.background import BackgroundScheduler |
| | |
| | |
| | DATASET_REPO_URL = "https://huggingface.co/datasets/botsi/trust-game-llama-2-chat-history" |
| | DATA_DIRECTORY = "data" # Separate directory for storing data files |
| | DATA_FILENAME = "marion.csv" # Default filename |
| | DATA_FILE = os.path.join("data", DATA_FILENAME) |
| | |
| | HF_TOKEN = os.environ.get("HF_TOKEN") |
| | print("is none?", HF_TOKEN is None) |
| | print("hfh", huggingface_hub.__version__) |
| | |
| | repo = Repository( |
| | local_dir=DATA_DIRECTORY, clone_from=DATASET_REPO_URL |
| | ) |
| | |
| | MAX_MAX_NEW_TOKENS = 2048 |
| | DEFAULT_MAX_NEW_TOKENS = 1024 |
| | MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096")) |
| | |
| | DESCRIPTION = """\ |
| | # This is your personal space to chat. |
| | You can ask anything. |
| | """ |
| | |
| | # License and Acceptable Use Policy by Meta |
| | LICENSE = """ |
| | <p/> |
| | |
| | --- |
| | This demo is governed by the [original license](https://ai.meta.com/llama/license/) and [acceptable use policy](https://ai.meta.com/llama/use-policy/). |
| | The most recent copy of this policy can be found at ai.meta.com/llama/use-policy. |
| | """ |
| | |
| | if not torch.cuda.is_available(): |
| | DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>" |
| | |
| | |
| | if torch.cuda.is_available(): |
| | model_id = "meta-llama/Llama-2-7b-chat-hf" |
| | model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto") |
| | tokenizer = AutoTokenizer.from_pretrained(model_id) |
| | tokenizer.use_default_system_prompt = False |
| | |
| | ## trust-game-llama-2-7b-chat |
| | # app.py |
| | def get_default_system_prompt(): |
| | #BOS, EOS = "<s>", "</s>" |
| | #BINST, EINST = "[INST]", "[/INST]" |
| | BSYS, ESYS = "<<SYS>>\n", "\n<</SYS>>\n\n" |
| | |
| | DEFAULT_SYSTEM_PROMPT = f"""You are a friendly chat assistant. First, wait for a question. Do not talk before being asked. When the user says hi, politely ask what he/she wants to know. |
| | Do not tell any more! |
| | If asked about Marion, you can answer the following: There are only two Marions, one is currently working at Accenture and the other one will shortly begin a new job at BMW. |
| | She recently quit Accenture. Nicknames by her friends include botsi and botsokrates. Her favorite project was Hypercare. Kathy/Kathi is her colleague at Accenture. Anita is her colleague at Accenture. |
| | """ |
| | print(DEFAULT_SYSTEM_PROMPT) |
| | return DEFAULT_SYSTEM_PROMPT |
| | |
| | |
| | ## trust-game-llama-2-7b-chat |
| | # app.py |
| | def construct_input_prompt(chat_history, message): |
| | input_prompt = f"<s>[INST] <<SYS>>\n{get_default_system_prompt()}\n<</SYS>>\n\n " |
| | for user, assistant in chat_history: |
| | input_prompt += f"{user} [/INST] {assistant} <s>[INST] " |
| | input_prompt += f"{message} [/INST] " |
| | return input_prompt |
| | |
| | ## trust-game-llama-2-7b-chat |
| | # app.py |
| | @spaces.GPU |
| | def generate( |
| | request: gr.Request, # To fetch query params |
| | message: str, |
| | chat_history: list[tuple[str, str]], |
| | # input_prompt: str, |
| | max_new_tokens: int = 1024, |
| | temperature: float = 0.6, |
| | top_p: float = 0.9, |
| | top_k: int = 50, |
| | repetition_penalty: float = 1.2, |
| | ) -> Iterator[str]: # Change return type hint to Iterator[str] |
| | |
| | conversation = [] |
| | |
| | # Fetch query params |
| | params = { |
| | key: value for key, value in gr.Request.query_params.items() |
| | } |
| | print('those are the query params') |
| | print(params) |
| | |
| | print("Request headers dictionary:", gr.Request.headers) |
| | print("IP address:", gr.Request.client.host) |
| | print("Query parameters:", params) |
| | |
| | # Construct the input prompt using the functions from the system_prompt_config module |
| | input_prompt = construct_input_prompt(chat_history, message) |
| | |
| | # Move the condition here after the assignment |
| | if input_prompt: |
| | conversation.append({"role": "system", "content": input_prompt}) |
| | |
| | # Convert input prompt to tensor |
| | input_ids = tokenizer(input_prompt, return_tensors="pt").to(model.device) |
| | |
| | for user, assistant in chat_history: |
| | conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}]) |
| | conversation.append({"role": "user", "content": message}) |
| | |
| | input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt") |
| | if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH: |
| | input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:] |
| | gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.") |
| | input_ids = input_ids.to(model.device) |
| | |
| | # Set up the TextIteratorStreamer |
| | streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True) |
| | |
| | # Set up the generation arguments |
| | generate_kwargs = dict( |
| | {"input_ids": input_ids}, |
| | streamer=streamer, |
| | max_new_tokens=max_new_tokens, |
| | do_sample=True, |
| | top_p=top_p, |
| | top_k=top_k, |
| | temperature=temperature, |
| | num_beams=1, |
| | repetition_penalty=repetition_penalty, |
| | ) |
| | |
| | # Start the model generation thread |
| | t = Thread(target=model.generate, kwargs=generate_kwargs) |
| | t.start() |
| | |
| | # Yield generated text chunks |
| | outputs = [] |
| | for text in streamer: |
| | outputs.append(text) |
| | yield "".join(outputs) |
| | |
| | # Fix bug that last answer is not recorded! |
| | # Parse the outputs into a readable sentence and record them |
| | # Filter out empty strings and join the remaining strings with spaces |
| | readable_sentence = ' '.join(filter(lambda x: x.strip(), outputs)) |
| | # Print the readable sentence |
| | print(readable_sentence) |
| | |
| | # Save chat history to .csv file on HuggingFace Hub |
| | # Generate filename with bot id and session id |
| | filename = f"{DATA_FILENAME}" |
| | data_file = os.path.join(DATA_DIRECTORY, filename) |
| | |
| | # Generate timestamp |
| | timestamp = datetime.datetime.now() |
| | |
| | # Check if the file already exists |
| | if os.path.exists(data_file): |
| | # If file exists, load existing data |
| | existing_data = pd.read_csv(data_file) |
| | |
| | # Add timestamp column |
| | conversation_df = pd.DataFrame(conversation) |
| | conversation_df['ip_address'] = request.client.host |
| | conversation_df['readable_sentence'] = readable_sentence |
| | conversation_df['timestamp'] = timestamp |
| | |
| | # Append new conversation to existing data |
| | updated_data = pd.concat([existing_data, conversation_df], ignore_index=True) |
| | updated_data.to_csv(data_file, index=False) |
| | else: |
| | # If file doesn't exist, create new file with conversation data |
| | conversation_df = pd.DataFrame(conversation) |
| | conversation_df['ip_address'] = request.client.host |
| | conversation_df['readable_sentence'] = readable_sentence |
| | conversation_df['timestamp'] = timestamp |
| | conversation_df.to_csv(data_file, index=False) |
| | |
| | print("Updating .csv") |
| | repo.push_to_hub(blocking=False, commit_message=f"Updating data at {timestamp}") |
| | |
| | chat_interface = gr.ChatInterface( |
| | fn=generate, |
| | retry_btn=None, |
| | clear_btn=None, |
| | undo_btn=None, |
| | chatbot=gr.Chatbot(avatar_images=('user.png', 'bot.png'), bubble_full_width = False), |
| | examples=[ |
| | ["What is your favorite fruit?"], |
| | ["What do you think about AI in the workplace?"], |
| | ], |
| | ) |
| | |
| | with gr.Blocks(css="style.css", theme=gr.themes.Default(primary_hue=gr.themes.colors.emerald,secondary_hue=gr.themes.colors.indigo)) as demo: |
| | gr.Markdown(DESCRIPTION) |
| | chat_interface.render() |
| | gr.Markdown(LICENSE) |
| | |
| | if __name__ == "__main__": |
| | demo.queue(max_size=20).launch() |
| | #demo.queue(max_size=20) |
| | demo.launch(share=True, debug=True) |
| | ''' |