m-polignano-uniba commited on
Commit
8f1b234
1 Parent(s): 9135d96

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +7 -2
README.md CHANGED
@@ -85,7 +85,12 @@ tokenizer.chat_template = "{% set ns = namespace(i=0) %}" \
85
  "{% set ns.i = ns.i+1 %}" \
86
  "{% endfor %}"
87
 
88
-
 
 
 
 
 
89
 
90
  pipe = transformers.pipeline(model=model,
91
  device_map="balanced",
@@ -93,7 +98,7 @@ pipe = transformers.pipeline(model=model,
93
  return_full_text=False, # langchain expects the full text
94
  task='text-generation',
95
  max_new_tokens=512, # max number of tokens to generate in the output
96
- temperature=0.8 #temperature
97
  )
98
  messages = [{"role": "user", "content": "Cosa sono i word embeddings?"}]
99
  text = tokenizer.apply_chat_template(messages, tokenize=False)
 
85
  "{% set ns.i = ns.i+1 %}" \
86
  "{% endfor %}"
87
 
88
+ model = AutoModelForCausalLM.from_pretrained(
89
+ model,
90
+ torch_dtype=torch.float16,
91
+ device_map='balanced',
92
+ use_flash_attention_2=True
93
+ )
94
 
95
  pipe = transformers.pipeline(model=model,
96
  device_map="balanced",
 
98
  return_full_text=False, # langchain expects the full text
99
  task='text-generation',
100
  max_new_tokens=512, # max number of tokens to generate in the output
101
+ temperature=0.7 #temperature
102
  )
103
  messages = [{"role": "user", "content": "Cosa sono i word embeddings?"}]
104
  text = tokenizer.apply_chat_template(messages, tokenize=False)