Notes / strings_chat.py
MatsRooth's picture
Upload strings_chat.py with huggingface_hub
f7e7fda
# Portions copyright (c) Meta Platforms, Inc. and affiliates.
# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
# Connect to a compute node interactively
# srun --partition=gpu-interactive --gpus=a5000:1 --mem=16000 --pty /bin/bash
# source env/hugh/bin/activate
# cd /share/compling/speech/llama_tuning
# Run it
# torchrun --nproc_per_node 1 strings_chat.py --ckpt_dirllama-2-7b-chat/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 6 --benchmark_path benchmark/cl23.txt
# torchrun --nproc_per_node 1 strings_chat.py --ckpt_dir llama-2-7b-chat/ --tokenizer_path tokenizer.model --max_seq_len 512 --max_batch_size 6 --benchmark_path benchmark/cl23.txt --tuple
from typing import List, Optional
import fire
from llama import Llama, Dialog
import sys
from ast import literal_eval
import json
def tuple2dialog(x):
systemprompt = {"role": "system", "content": "Always answer with a single word 'True' or 'False'"}
userprompt = {"role": "user", "content": f"Consider the string '{x[2]}'. True or False: {x[1]}"}
return [userprompt]
# The system prompt is not needed, the llama assistant already starts with True/False and
# includes an explanation of suitable scope.
# return [systemprompt,userprompt]
# ('jeremy_a_2_ab', 'every letter is a consonant', 'ab', 'f')
def tuple_add_dialog(x):
systemprompt = {"role": "system", "content": "Always answer with a single word 'True' or 'False'"}
userprompt = {"role": "user", "content": f"Consider the string '{x[2]}'. True or False: {x[1]}"}
return (x[0],x[1],x[2],x[3],[userprompt])
def main(
ckpt_dir: str,
tokenizer_path: str,
benchmark_path: str,
temperature: float = 0.6,
top_p: float = 0.9,
max_seq_len: int = 512,
max_batch_size: int = 8,
max_gen_len: Optional[int] = None,
tuple: Optional[bool] = False,
):
"""
Entry point of the program for generating text using a pretrained model.
Args:
ckpt_dir (str): The directory containing checkpoint files for the pretrained model.
tokenizer_path (str): The path to the tokenizer model used for text encoding/decoding.
benchmark_path (str): The path to the benchmark e.g. benchmark/cl23.txt.
temperature (float, optional): The temperature value for controlling randomness in generation.
Defaults to 0.6.
top_p (float, optional): The top-p sampling parameter for controlling diversity in generation.
Defaults to 0.9.
max_seq_len (int, optional): The maximum sequence length for input prompts. Defaults to 512.
max_batch_size (int, optional): The maximum batch size for generating sequences. Defaults to 8.
max_gen_len (int, optional): The maximum length of generated sequences. If None, it will be
set to the model's max sequence length. Defaults to None.
"""
# Need to work out how to add this to the arguments
# benchmark_file = '/share/compling/speech/llama/benchmark/cl23.txt'
# This is iterable
# benchmark_stream = map(lambda z:literal_eval(z), open(benchmark_path))
# Include the dialog prompt in the tuple
benchmark_stream = map(lambda z:tuple_add_dialog(literal_eval(z)), open(benchmark_path))
# Bunch into groups of five
benchmark_by_5 = zip(*(benchmark_stream,) * 5)
# Generator for lists of five dialogs, with 100 elements covering 500 items
'''
def gen_dialogs():
for x in zip(range(100),benchmark_by_5):
dialog = map(tuple2dialog,x[1])
yield list(dialog)
'''
# Generator for lists of five dialog tuples, with 100 elements covering 500 items
def gen_dialog_tuples():
for x in zip(range(100),benchmark_by_5):
yield list(x[1])
generator = Llama.build(
ckpt_dir=ckpt_dir,
tokenizer_path=tokenizer_path,
max_seq_len=max_seq_len,
max_batch_size=max_batch_size,
)
#for dialogs in gen_dialogs():
# results = generator.chat_completion(
# dialogs, # type: ignore
# max_gen_len=max_gen_len,
# temperature=temperature,
# top_p=top_p,
# )
# for dialog, result in zip(dialogs, results):
# for msg in dialog:
# print(f"{msg['role'].capitalize()}: {msg['content']}\n")
# print(
# f"> {result['generation']['role'].capitalize()}: {result['generation']['content']}"
# )
# print("\n==================================\n")
for dtuple in gen_dialog_tuples():
dialogs = [z[4] for z in dtuple] # Map out a list of five dialog promts
results = generator.chat_completion(
dialogs, # type: ignore
max_gen_len=max_gen_len,
temperature=temperature,
top_p=top_p,
)
for tpl, result in zip(dtuple, results):
if tuple:
t0 = tpl[0]
t1 = tpl[1]
t2 = tpl[2]
t3 = tpl[3]
t4 = tpl[4]
# json.dumps escapes double quotes and newlines
t5 = json.dumps(result['generation']['content'])
# Need to check that quotes are coming out right, they arent
print(f'("{t0}","{t1}","{t2}","{t3}",{t4},"{t5}")')
else:
# The below needs adjustment
print(f"{msg['role'].capitalize()}: {msg['content']}\n")
print(
f"> {result['generation']['role'].capitalize()}: {result['generation']['content']}"
)
print("\n==================================\n")
if __name__ == "__main__":
fire.Fire(main)