duyntnet commited on
Commit
6ec88d0
1 Parent(s): 73f5c3e

Upload README.md

Browse files
Files changed (1) hide show
  1. README.md +62 -0
README.md ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ language:
4
+ - en
5
+ pipeline_tag: text-generation
6
+ inference: false
7
+ tags:
8
+ - transformers
9
+ - gguf
10
+ - imatrix
11
+ - Azzurro
12
+ ---
13
+ Quantizations of https://huggingface.co/MoxoffSpA/Azzurro
14
+
15
+
16
+ # From original readme
17
+
18
+ ## Usage
19
+
20
+ Be sure to install these dependencies before running the program
21
+
22
+ ```python
23
+ !pip install transformers torch sentencepiece
24
+ ```
25
+
26
+ ```python
27
+ from transformers import AutoModelForCausalLM, AutoTokenizer
28
+
29
+ device = "cpu" # if you want to use the gpu make sure to have cuda toolkit installed and change this to "cuda"
30
+
31
+ model = AutoModelForCausalLM.from_pretrained("MoxoffSpA/Azzurro")
32
+ tokenizer = AutoTokenizer.from_pretrained("MoxoffSpA/Azzurro")
33
+
34
+ question = """Quanto è alta la torre di Pisa?"""
35
+ context = """
36
+ La Torre di Pisa è un campanile del XII secolo, famoso per la sua inclinazione. Alta circa 56 metri.
37
+ """
38
+
39
+ prompt = f"Domanda: {question}, contesto: {context}"
40
+
41
+ messages = [
42
+ {"role": "user", "content": prompt}
43
+ ]
44
+
45
+ encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt")
46
+
47
+ model_inputs = encodeds.to(device)
48
+ model.to(device)
49
+
50
+ generated_ids = model.generate(
51
+ model_inputs, # The input to the model
52
+ max_new_tokens=128, # Limiting the maximum number of new tokens generated
53
+ do_sample=True, # Enabling sampling to introduce randomness in the generation
54
+ temperature=0.1, # Setting temperature to control the randomness, lower values make it more deterministic
55
+ top_p=0.95, # Using nucleus sampling with top-p filtering for more coherent generation
56
+ eos_token_id=tokenizer.eos_token_id # Specifying the token that indicates the end of a sequence
57
+ )
58
+
59
+ decoded_output = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
60
+ trimmed_output = decoded_output.strip()
61
+ print(trimmed_output)
62
+ ```