--- library_name: peft language: - pt --- ## Training procedure The following `bitsandbytes` quantization config was used during training: - load_in_8bit: False - load_in_4bit: True - llm_int8_threshold: 6.0 - llm_int8_skip_modules: None - llm_int8_enable_fp32_cpu_offload: False - llm_int8_has_fp16_weight: False - bnb_4bit_quant_type: nf4 - bnb_4bit_use_double_quant: False - bnb_4bit_compute_dtype: float16 ## Algoritmo para utilização do modelo import torch from peft import PeftModel, PeftConfig from transformers import AutoModelForCausalLM,BitsAndBytesConfig,AutoTokenizer #Quantização use_4bit = True bnb_4bit_compute_dtype = "float16" bnb_4bit_quant_type = "nf4" use_nested_quant = False # Carrega o tokenizer e modelo com configuração QLoRA compute_dtype = getattr(torch, bnb_4bit_compute_dtype) bnb_config = BitsAndBytesConfig(load_in_4bit = use_4bit, bnb_4bit_quant_type = bnb_4bit_quant_type, bnb_4bit_compute_dtype = compute_dtype, bnb_4bit_use_double_quant = use_nested_quant) #Import do modelo config = PeftConfig.from_pretrained("MatNLP/Sectrum") base_model = AutoModelForCausalLM.from_pretrained("NousResearch/Llama-2-7b-chat-hf",quantization_config = bnb_config) model = PeftModel.from_pretrained(base_model, "MatNLP/Sectrum") # Carrega o tokenizador tokenizer = AutoTokenizer.from_pretrained("NousResearch/Llama-2-7b-chat-hf", trust_remote_code = True,skip_special_tokens=True) tokenizer.pad_token = tokenizer.eos_token tokenizer.padding_side = "right" # Prepara o prompt prompt = "Como proteger meu e-mail?" # Cria o pipeline pipe = pipeline(task = "text-generation", model = model, tokenizer = tokenizer, max_length = 200) #streamer=TextStreamer(tokenizer,skip_prompt=True) # Executa o pipeline e gera o texto a partir do prompt inicial resultado = pipe(f"[INST] {prompt} [/INST]") print(resultado[0]['generated_text'].split("[/INST]")[1].split('<\s>')[0])