abdfajar707 commited on
Commit
c7e793e
1 Parent(s): 5cf9c20

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -3
app.py CHANGED
@@ -4,8 +4,24 @@ from huggingface_hub import InferenceClient
4
  """
5
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
  """
7
- client = InferenceClient("abdfajar707/rkp_lora_model")
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  def respond(
11
  message,
@@ -27,7 +43,7 @@ def respond(
27
 
28
  response = ""
29
 
30
- for message in client.chat_completion(
31
  messages,
32
  max_tokens=max_tokens,
33
  stream=True,
@@ -43,7 +59,7 @@ def respond(
43
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
  """
45
  demo = gr.ChatInterface(
46
- respond,
47
  additional_inputs=[
48
  gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
49
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
 
4
  """
5
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
  """
7
+ model = InferenceClient("abdfajar707/rkp_lora_model")
8
 
9
+ def get_completion(query: str, model, tokenizer) -> str:
10
+ device = "cuda:0"
11
+ prompt_template = """
12
+ <start_of_turn>user
13
+ Below is an instruction that describes a task. Write a response that appropriately completes the request.
14
+ {query}
15
+ <end_of_turn>\\n<start_of_turn>model
16
+
17
+ """
18
+ prompt = prompt_template.format(query=query)
19
+ encodeds = tokenizer(prompt, return_tensors="pt", add_special_tokens=True)
20
+ model_inputs = encodeds.to(device)
21
+ generated_ids = model.generate(**model_inputs, max_new_tokens=1000, do_sample=True, pad_token_id=tokenizer.eos_token_id)
22
+ # decoded = tokenizer.batch_decode(generated_ids)
23
+ decoded = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
24
+ return (decoded)
25
 
26
  def respond(
27
  message,
 
43
 
44
  response = ""
45
 
46
+ for message in model.chat_completion(
47
  messages,
48
  max_tokens=max_tokens,
49
  stream=True,
 
59
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
60
  """
61
  demo = gr.ChatInterface(
62
+ get_completion,
63
  additional_inputs=[
64
  gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
65
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),