matheusrdgsf commited on
Commit
95444e3
·
1 Parent(s): 670dcc4

Upload model

Browse files
Files changed (3) hide show
  1. README.md +1 -79
  2. adapter_config.json +1 -1
  3. adapter_model.bin +1 -1
README.md CHANGED
@@ -1,13 +1,5 @@
1
  ---
2
  library_name: peft
3
- base_model: TheBloke/zephyr-7B-beta-GPTQ
4
- revision: gptq-8bit-32g-actorder_True
5
- license: mit
6
- language:
7
- - pt
8
- tags:
9
- - gptq
10
- - ptbr
11
  ---
12
  ## Training procedure
13
 
@@ -32,75 +24,5 @@ The following `bitsandbytes` quantization config was used during training:
32
  - max_input_length: None
33
  ### Framework versions
34
 
35
- # Load model
36
- ```python
37
- from transformers import AutoModelForCausalLM, GPTQConfig
38
- from peft import PeftModel
39
 
40
- bnb_config = GPTQConfig(
41
- bits=8,
42
- disable_exllama=True,
43
- )
44
-
45
- _model = AutoModelForCausalLM.from_pretrained(
46
- 'TheBloke/zephyr-7B-beta-GPTQ',
47
- quantization_config=bnb_config,
48
- device_map='auto',
49
- revision='gptq-8bit-32g-actorder_True',
50
- )
51
-
52
- model = PeftModel.from_pretrained(_model, 'matheusrdgsf/cesar-ptbr')
53
- ```
54
-
55
- # Easy inference
56
- ```python
57
- from transformers import GenerationConfig
58
- from transformers import AutoTokenizer
59
-
60
- tokenizer_model = AutoTokenizer.from_pretrained('TheBloke/zephyr-7B-beta-GPTQ')
61
- tokenizer_template = AutoTokenizer.from_pretrained('HuggingFaceH4/zephyr-7b-alpha')
62
-
63
- generation_config = GenerationConfig(
64
- do_sample=True,
65
- temperature=0.1,
66
- top_p=0.25,
67
- top_k=0,
68
- max_new_tokens=512,
69
- repetition_penalty=1.1,
70
- eos_token_id=tokenizer_model.eos_token_id,
71
- pad_token_id=tokenizer_model.eos_token_id,
72
- )
73
-
74
-
75
- def get_inference(
76
- text,
77
- model,
78
- tokenizer_model=tokenizer_model,
79
- tokenizer_template=tokenizer_template,
80
- generation_config=generation_config,
81
- ):
82
- st_time = time.time()
83
- inputs = tokenizer_model(
84
- tokenizer_template.apply_chat_template(
85
- [
86
- {
87
- "role": "system",
88
- "content": "Você é um chatbot para indicação de filmes. Responda de maneira educada sugestões de filmes para os usuários.",
89
- },
90
- {"role": "user", "content": text},
91
- ],
92
- tokenize=False,
93
- ),
94
- return_tensors="pt",
95
- ).to("cuda")
96
-
97
- outputs = model.generate(**inputs, generation_config=generation_config)
98
-
99
- print('inference time:', time.time() - st_time)
100
- return tokenizer_model.decode(outputs[0], skip_special_tokens=True).split('\n')[-1]
101
-
102
- get_inference('Poderia indicar filmes de ação de até 2 horas?', model)
103
- ```
104
-
105
-
106
- - PEFT 0.5.0
 
1
  ---
2
  library_name: peft
 
 
 
 
 
 
 
 
3
  ---
4
  ## Training procedure
5
 
 
24
  - max_input_length: None
25
  ### Framework versions
26
 
 
 
 
 
27
 
28
+ - PEFT 0.5.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
adapter_config.json CHANGED
@@ -1,7 +1,6 @@
1
  {
2
  "auto_mapping": null,
3
  "base_model_name_or_path": "TheBloke/zephyr-7B-beta-GPTQ",
4
- "revision": "gptq-8bit-32g-actorder_True",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
@@ -13,6 +12,7 @@
13
  "modules_to_save": null,
14
  "peft_type": "LORA",
15
  "r": 16,
 
16
  "target_modules": [
17
  "q_proj",
18
  "v_proj"
 
1
  {
2
  "auto_mapping": null,
3
  "base_model_name_or_path": "TheBloke/zephyr-7B-beta-GPTQ",
 
4
  "bias": "none",
5
  "fan_in_fan_out": false,
6
  "inference_mode": true,
 
12
  "modules_to_save": null,
13
  "peft_type": "LORA",
14
  "r": 16,
15
+ "revision": null,
16
  "target_modules": [
17
  "q_proj",
18
  "v_proj"
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb45b0e39fc1e82f20fb661a914f5951451557ee03efd76add36aff361041aad
3
  size 27308941
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7435d98205bd733689ce9bf2192cc23b005f25ac6404b21b055022513d4092b5
3
  size 27308941