AhmedSSoliman commited on
Commit
7bef3fd
1 Parent(s): 8fed7be

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +34 -10
README.md CHANGED
@@ -28,6 +28,9 @@ This model is [**LlaMa2-7b**](https://huggingface.co/meta-llama/Llama-2-7b) whi
28
  # You can load the LlaMa2-CodeGen model on google colab.
29
 
30
 
 
 
 
31
  ### Example
32
  ```py
33
 
@@ -46,9 +49,10 @@ model = PeftModel.from_pretrained(model, peft_model_id)
46
 
47
 
48
 
 
49
  def create_prompt(instruction):
50
- system = "You are a coding assistant that will help the user to resolve the following instruction:"
51
- instruction = "\n### Input: " + instruction
52
  return system + "\n" + instruction + "\n\n" + "### Response:" + "\n"
53
 
54
  def generate(
@@ -62,9 +66,10 @@ def generate(
62
  ):
63
  prompt = create_prompt(instruction)
64
  print(prompt)
65
- inputs = tokenizer(prompt, return_tensors="pt")
66
- input_ids = inputs["input_ids"].to("cuda")
67
- attention_mask = inputs["attention_mask"].to("cuda")
 
68
  generation_config = GenerationConfig(
69
  temperature=temperature,
70
  top_p=top_p,
@@ -74,17 +79,36 @@ def generate(
74
  )
75
  with torch.no_grad():
76
  generation_output = model.generate(
77
- input_ids=input_ids,
78
- attention_mask=attention_mask,
 
79
  generation_config=generation_config,
80
  return_dict_in_generate=True,
81
  output_scores=True,
82
  max_new_tokens=max_new_tokens,
83
  early_stopping=True
84
  )
85
- s = generation_output.sequences[0]
86
- output = tokenizer.decode(s)
87
- return output.split("### Response:")[1].lstrip("\n")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
 
90
  instruction = """
 
28
  # You can load the LlaMa2-CodeGen model on google colab.
29
 
30
 
31
+
32
+
33
+
34
  ### Example
35
  ```py
36
 
 
49
 
50
 
51
 
52
+
53
  def create_prompt(instruction):
54
+ system = "You are using the Llam2-CodeGen model, a coding assistant that will help the user to resolve the following instruction:\n"
55
+ instruction = "### Input: " + instruction
56
  return system + "\n" + instruction + "\n\n" + "### Response:" + "\n"
57
 
58
  def generate(
 
66
  ):
67
  prompt = create_prompt(instruction)
68
  print(prompt)
69
+ inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
70
+ #input_ids = inputs["input_ids"].to("cuda")
71
+ #attention_mask = inputs["attention_mask"].to("cuda")
72
+
73
  generation_config = GenerationConfig(
74
  temperature=temperature,
75
  top_p=top_p,
 
79
  )
80
  with torch.no_grad():
81
  generation_output = model.generate(
82
+ #input_ids=input_ids,
83
+ #attention_mask=attention_mask,
84
+ **inputs,
85
  generation_config=generation_config,
86
  return_dict_in_generate=True,
87
  output_scores=True,
88
  max_new_tokens=max_new_tokens,
89
  early_stopping=True
90
  )
91
+
92
+
93
+
94
+ generated_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
95
+ stop_output = "### Input"
96
+ gen_response = (generated_response.split(stop_output))[0]
97
+
98
+
99
+ #s = generation_output.sequences[0]
100
+ #output = tokenizer.decode(s, skip_special_tokens=True)
101
+ #stop_output = "### Input"
102
+
103
+ #gen_response = (output.split(stop_output))[0]
104
+
105
+
106
+ #return output.split("### Response:")[1].lstrip("\n")
107
+ return gen_response
108
+
109
+
110
+
111
+
112
 
113
 
114
  instruction = """