samdeniyi commited on
Commit
369b113
·
1 Parent(s): 5eb3cba

Added custom inference handler and requirements

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .idea
.idea/lora_lesson_plan_model.iml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$" />
5
+ <orderEntry type="inheritedJdk" />
6
+ <orderEntry type="sourceFolder" forTests="false" />
7
+ </component>
8
+ </module>
handler.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, Any
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
3
+ from peft import PeftModel, PeftConfig
4
+ import torch
5
+ import time
6
+
7
+
8
+ class EndpointHandler:
9
+ def __init__(self, path="samadeniyi/lora_lesson_plan_model"):
10
+ # Load the model configuration
11
+ config = PeftConfig.from_pretrained(path)
12
+
13
+ # Define 4-bit quantization configuration
14
+ bnb_config = BitsAndBytesConfig(
15
+ load_in_4bit=True,
16
+ bnb_4bit_use_double_quant=True,
17
+ bnb_4bit_quant_type="nf4",
18
+ bnb_4bit_compute_dtype=torch.float16,
19
+ )
20
+
21
+ # Load the model with quantization
22
+ self.model = AutoModelForCausalLM.from_pretrained(
23
+ config.base_model_name_or_path,
24
+ return_dict=True,
25
+ load_in_4bit=True,
26
+ device_map={"": 0},
27
+ trust_remote_code=True,
28
+ quantization_config=bnb_config,
29
+ )
30
+
31
+ # Load tokenizer
32
+ self.tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
33
+ self.tokenizer.pad_token = self.tokenizer.eos_token
34
+
35
+ # Apply PEFT (Parameter-Efficient Fine-Tuning) to the model
36
+ self.model = PeftModel.from_pretrained(self.model, path)
37
+
38
+ def __call__(self, data: Any) -> Dict[str, Any]:
39
+ """
40
+ Args:
41
+ data :obj:`dict`:. The object should contain {"instruction": "some text", "input": "some text"}:
42
+ - "instruction": The instruction describing what to generate.
43
+ - "input": Context to guide the generation.
44
+
45
+ Returns:
46
+ A :obj:`dict` containing {"generated_text": "the generated lesson plan", "time": "..."}:
47
+ - "generated_text": The generated text based on the input.
48
+ - "time": The time taken to generate the output.
49
+ """
50
+
51
+ # Parse input data
52
+ inputs = data.pop("inputs", data)
53
+ instruction = inputs.get("instruction", "")
54
+ input_context = inputs.get("input", "")
55
+
56
+ # Create the lesson plan prompt based on your preparation format
57
+ lesson_prompt = f"""Below is an instruction that describes how to create a lesson plan, paired with an input that provides further context. Write a response that appropriately completes the request.
58
+
59
+ ### Instruction:
60
+ {instruction}
61
+
62
+ ### Input:
63
+ {input_context}
64
+
65
+ ### Response:
66
+ """
67
+
68
+ # Tokenize the prompt
69
+ batch = self.tokenizer(
70
+ lesson_prompt,
71
+ padding=True,
72
+ truncation=True,
73
+ return_tensors='pt'
74
+ )
75
+ batch = batch.to('cuda:0')
76
+
77
+ # Configure generation settings
78
+ generation_config = self.model.generation_config
79
+ generation_config.top_p = 0.7
80
+ generation_config.temperature = 0.7
81
+ generation_config.max_new_tokens = 256
82
+ generation_config.num_return_sequences = 1
83
+ generation_config.pad_token_id = self.tokenizer.eos_token_id
84
+ generation_config.eos_token_id = self.tokenizer.eos_token_id
85
+
86
+ # Time the prediction
87
+ start = time.time()
88
+ with torch.cuda.amp.autocast():
89
+ output_tokens = self.model.generate(
90
+ input_ids=batch.input_ids,
91
+ generation_config=generation_config,
92
+ )
93
+ end = time.time()
94
+
95
+ # Decode generated tokens into text
96
+ generated_text = self.tokenizer.decode(output_tokens[0], skip_special_tokens=True)
97
+
98
+ # Return the generated text and the time taken
99
+ return {"generated_text": generated_text, "time": f"{(end - start):.2f} s"}
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ optimum
4
+ peft