bragour commited on
Commit
a40bb81
1 Parent(s): 8008ee1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -6
app.py CHANGED
@@ -1,9 +1,13 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import pipeline
 
 
 
 
 
 
4
 
5
- # Initialize the inference client with the model ID
6
- client = pipeline(model="bragour/Camel-7b-chat")
7
 
8
  def respond(
9
  message,
@@ -11,15 +15,20 @@ def respond(
11
  temperature,
12
  top_p,
13
  ):
 
 
 
 
14
  # Generate the response from the API
15
- result = client(
16
- message,
 
17
  max_new_tokens=max_tokens,
18
  temperature=temperature,
19
  top_p=top_p,
20
  )
21
 
22
- response = result[0]['generated_text']
23
 
24
  return response
25
 
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import AutoTokenizer
4
+ from awq import AutoAWQForCausalLM
5
+
6
+ model_path = "bragour/Camel-7b-chat-awq"
7
+
8
+ model = AutoAWQForCausalLM.from_quantized(model_path, fuse_layers=True, trust_remote_code=False, safetensors=True)
9
+ tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=False)
10
 
 
 
11
 
12
  def respond(
13
  message,
 
15
  temperature,
16
  top_p,
17
  ):
18
+ formatted_prompt = f"<s>[INST]{message}[/INST]"
19
+
20
+ tokens = tokenizer(formatted_prompt,return_tensors='pt').input_ids.cuda()
21
+
22
  # Generate the response from the API
23
+ result = model.generate(
24
+ tokens,
25
+ do_sample=False
26
  max_new_tokens=max_tokens,
27
  temperature=temperature,
28
  top_p=top_p,
29
  )
30
 
31
+ response = tokenizer.decode(result[0], skip_special_tokens=True)
32
 
33
  return response
34