LacDa2 Model Card Readme
Model Information
Model Name: LacDa
Description: LacDa is a specialized language model that has been fine-tuned from the LLama2 model. It is designed to provide advanced natural language processing capabilities in specific domains or applications.
Fine-tuned from: LLama2
Open LLM Leaderboard Evaluation Results
Metric | Value |
---|---|
Avg. | 43.91 |
ARC (25-shot) | 53.07 |
HellaSwag (10-shot) | 77.57 |
MMLU (5-shot) | 46.03 |
TruthfulQA (0-shot) | 44.57 |
Winogrande (5-shot) | 74.19 |
GSM8K (5-shot) | 6.29 |
DROP (3-shot) | 5.65 |
Instruction format
from transformers import AutoModelForCausalLM, LlamaTokenizer, BitsAndBytesConfig, TextStreamer, StoppingCriteria, StoppingCriteriaList
import torch
class StopTokenCriteria(StoppingCriteria):
def __init__(self, stop_tokens, tokenizer, prompt_length):
self.stop_tokens = stop_tokens
if tokenizer.pad_token not in stop_tokens:
self.stop_tokens.append(tokenizer.pad_token)
if tokenizer.bos_token not in stop_tokens:
self.stop_tokens.append(tokenizer.bos_token)
if tokenizer.eos_token not in stop_tokens:
self.stop_tokens.append(tokenizer.eos_token)
self.tokenizer = tokenizer
self.prompt_length = prompt_length
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
is_done = False
tokens = tokenizer.decode(input_ids[0])[self.prompt_length:]
for st in self.stop_tokens:
if st in tokens:
is_done = True
break
return is_done
model_name = "willnguyen/lacda-2-7B-chat-v0.1"
tokenizer = LlamaTokenizer.from_pretrained(
model_name,
use_fast=False,
padding_side="right",
tokenizer_type='llama',
)
tokenizer.pad_token_id = 0
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="auto",
torch_dtype=torch.float16,
)
prompt = "<s> [INST] who is Hồ Chí Minh [/INST]"
stopping_criteria = StoppingCriteriaList([StopTokenCriteria(["[INST]", "[/INST]"], tokenizer, len(prompt))])
with torch.inference_mode():
input_ids = tokenizer(prompt, return_tensors="pt", add_special_tokens=False).input_ids.to('cuda')
streamer = TextStreamer(tokenizer)
_ = model.generate(
input_ids=input_ids,
max_new_tokens=1024,
do_sample=False,
temperature=1.0,
top_p=1.0,
top_k=50,
repetition_penalty=1.0,
use_cache=True,
streamer=streamer,
stopping_criteria=stopping_criteria
)
- Downloads last month
- 20
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social
visibility and check back later, or deploy to Inference Endpoints (dedicated)
instead.