Text Generation
Transformers
English
alpaca
bloom
LLM
mrm8488 commited on
Commit
70cb66f
1 Parent(s): 48bed71

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +76 -0
README.md CHANGED
@@ -8,6 +8,8 @@ tags:
8
  - alpaca
9
  - bloom
10
  - LLM
 
 
11
  ---
12
 
13
  # AlpacOOM: Alpaca + BLOOM
@@ -45,5 +47,79 @@ TBA
45
 
46
  ## How to use
47
  ```py
 
 
 
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  ```
 
8
  - alpaca
9
  - bloom
10
  - LLM
11
+ datasets:
12
+ - tatsu-lab/alpaca
13
  ---
14
 
15
  # AlpacOOM: Alpaca + BLOOM
 
47
 
48
  ## How to use
49
  ```py
50
+ import torch
51
+ from peft import PeftModel, PeftConfig
52
+ from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
53
 
54
+ peft_model_id = "mrm8488/Alpacoom"
55
+ config = PeftConfig.from_pretrained(peft_model_id)
56
+ model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map="auto")
57
+ tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-7b1")
58
+ model = PeftModel.from_pretrained(model, peft_model_id)
59
+
60
+ model = PeftModel.from_pretrained(model, peft_model_id)
61
+ model.eval()
62
+
63
+ # Based on the inference code by `tloen/alpaca-lora`
64
+ def generate_prompt(instruction, input=None):
65
+ if input:
66
+ return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
67
+ ### Instruction:
68
+ {instruction}
69
+ ### Input:
70
+ {input}
71
+ ### Response:"""
72
+ else:
73
+ return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
74
+ ### Instruction:
75
+ {instruction}
76
+ ### Response:"""
77
+
78
+ def generate(
79
+ instruction,
80
+ input=None,
81
+ temperature=0.1,
82
+ top_p=0.75,
83
+ top_k=40,
84
+ num_beams=4,
85
+ **kwargs,
86
+ ):
87
+ prompt = generate_prompt(instruction, input)
88
+ inputs = tokenizer(prompt, return_tensors="pt")
89
+ input_ids = inputs["input_ids"].cuda()
90
+ generation_config = GenerationConfig(
91
+ temperature=temperature,
92
+ top_p=top_p,
93
+ top_k=top_k,
94
+ num_beams=num_beams,
95
+ **kwargs,
96
+ )
97
+ with torch.no_grad():
98
+ generation_output = model.generate(
99
+ input_ids=input_ids,
100
+ generation_config=generation_config,
101
+ return_dict_in_generate=True,
102
+ output_scores=True,
103
+ max_new_tokens=256,
104
+ )
105
+ s = generation_output.sequences[0]
106
+ output = tokenizer.decode(s)
107
+ return output.split("### Response:")[1].strip().split("Below")[0]
108
+
109
+ instruction = "Tell me about alpacas"
110
+
111
+ print("Instruction:", instruction)
112
+ print("Response:", generate(instruction))
113
+ ```
114
+
115
+ ## Citation
116
+ ```
117
+ @misc {manuel_romero_2023,
118
+ author = { {Manuel Romero} },
119
+ title = { Alpacoom (Revision 874f989) },
120
+ year = 2023,
121
+ url = { https://huggingface.co/mrm8488/Alpacoom },
122
+ doi = { 10.57967/hf/0449 },
123
+ publisher = { Hugging Face }
124
+ }
125
  ```