jeevavijay10 commited on
Commit
5cc1d21
1 Parent(s): f464a86

change model to WizardCoder

Browse files
Files changed (2) hide show
  1. app-salesforce.py +26 -0
  2. app.py +57 -11
app-salesforce.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+
4
+ # checkpoint = "Salesforce/codegen25-7b-instruct"
5
+ # checkpoint = "Salesforce/codegen-2B-nl"
6
+ checkpoint = "Salesforce/codegen2-1B"
7
+
8
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True)
9
+ # model = AutoModelForCausalLM.from_pretrained(checkpoint, cache_dir="models/")
10
+ model = AutoModelForCausalLM.from_pretrained(checkpoint)
11
+
12
+ def code_gen(text):
13
+ input_ids = tokenizer(text, return_tensors="pt").input_ids
14
+ generated_ids = model.generate(input_ids, max_length=128)
15
+ response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
16
+ print(response)
17
+ return response
18
+
19
+
20
+ iface = gr.Interface(fn=code_gen,
21
+ inputs=gr.inputs.Textbox(
22
+ label="Input Source Code"),
23
+ outputs="text",
24
+ title="Code Generation")
25
+
26
+ iface.launch()
app.py CHANGED
@@ -1,20 +1,66 @@
 
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM
 
3
 
4
- # checkpoint = "Salesforce/codegen25-7b-instruct"
5
- # checkpoint = "Salesforce/codegen-2B-nl"
6
- checkpoint = "Salesforce/codegen2-1B"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
- tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True)
9
- # model = AutoModelForCausalLM.from_pretrained(checkpoint, cache_dir="models/")
10
- model = AutoModelForCausalLM.from_pretrained(checkpoint)
11
 
12
  def code_gen(text):
13
- input_ids = tokenizer(text, return_tensors="pt").input_ids
14
- generated_ids = model.generate(input_ids, max_length=128)
15
- response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  print(response)
17
- return response
 
18
 
19
 
20
  iface = gr.Interface(fn=code_gen,
 
1
+ import torch
2
  import gradio as gr
3
+ from transformers import AutoTokenizer, pipeline, logging
4
+ from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
5
 
6
+ model_name_or_path = "TheBloke/WizardCoder-Guanaco-15B-V1.1-GPTQ"
7
+ model_basename = "gptq_model-4bit-128g"
8
+
9
+ use_triton = False
10
+
11
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
12
+
13
+ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
14
+
15
+ model = AutoGPTQForCausalLM.from_quantized(model_name_or_path,
16
+ model_basename=model_basename,
17
+ use_safetensors=True,
18
+ trust_remote_code=False,
19
+ device=device,
20
+ use_triton=use_triton,
21
+ quantize_config=None,
22
+ cache_dir="models/"
23
+ )
24
+
25
+ """
26
+ To download from a specific branch, use the revision parameter, as in this example:
27
+
28
+ model = AutoGPTQForCausalLM.from_quantized(model_name_or_path,
29
+ revision="gptq-4bit-32g-actorder_True",
30
+ model_basename=model_basename,
31
+ use_safetensors=True,
32
+ trust_remote_code=False,
33
+ device="cuda:0",
34
+ quantize_config=None)
35
+ """
36
 
 
 
 
37
 
38
  def code_gen(text):
39
+ # input_ids = tokenizer(text, return_tensors='pt').input_ids.to(device)
40
+ # output = model.generate(
41
+ # inputs=input_ids, temperature=0.7, max_new_tokens=124)
42
+ # print(tokenizer.decode(output[0]))
43
+
44
+ # Inference can also be done using transformers' pipeline
45
+
46
+ # Prevent printing spurious transformers error when using pipeline with AutoGPTQ
47
+ logging.set_verbosity(logging.CRITICAL)
48
+
49
+ print("*** Pipeline:")
50
+ pipe = pipeline(
51
+ "text-generation",
52
+ model=model,
53
+ tokenizer=tokenizer,
54
+ max_new_tokens=124,
55
+ temperature=0.7,
56
+ top_p=0.95,
57
+ repetition_penalty=1.15
58
+ )
59
+
60
+ response = pipe(text)
61
  print(response)
62
+
63
+ return response[0]['generated_text']
64
 
65
 
66
  iface = gr.Interface(fn=code_gen,