abdfajar707 commited on
Commit
f3c3d6f
1 Parent(s): c7e793e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -75
app.py CHANGED
@@ -1,79 +1,28 @@
 
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- model = InferenceClient("abdfajar707/rkp_lora_model")
8
-
9
- def get_completion(query: str, model, tokenizer) -> str:
10
- device = "cuda:0"
11
- prompt_template = """
12
- <start_of_turn>user
13
- Below is an instruction that describes a task. Write a response that appropriately completes the request.
14
- {query}
15
- <end_of_turn>\\n<start_of_turn>model
16
-
17
- """
18
- prompt = prompt_template.format(query=query)
19
- encodeds = tokenizer(prompt, return_tensors="pt", add_special_tokens=True)
20
- model_inputs = encodeds.to(device)
21
- generated_ids = model.generate(**model_inputs, max_new_tokens=1000, do_sample=True, pad_token_id=tokenizer.eos_token_id)
22
- # decoded = tokenizer.batch_decode(generated_ids)
23
- decoded = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
24
- return (decoded)
25
-
26
- def respond(
27
- message,
28
- history: list[tuple[str, str]],
29
- system_message,
30
- max_tokens,
31
- temperature,
32
- top_p,
33
- ):
34
- messages = [{"role": "system", "content": system_message}]
35
-
36
- for val in history:
37
- if val[0]:
38
- messages.append({"role": "user", "content": val[0]})
39
- if val[1]:
40
- messages.append({"role": "assistant", "content": val[1]})
41
-
42
- messages.append({"role": "user", "content": message})
43
-
44
- response = ""
45
-
46
- for message in model.chat_completion(
47
- messages,
48
- max_tokens=max_tokens,
49
- stream=True,
50
- temperature=temperature,
51
- top_p=top_p,
52
- ):
53
- token = message.choices[0].delta.content
54
-
55
- response += token
56
- yield response
57
-
58
- """
59
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
60
- """
61
- demo = gr.ChatInterface(
62
- get_completion,
63
- additional_inputs=[
64
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
65
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
66
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
67
- gr.Slider(
68
- minimum=0.1,
69
- maximum=1.0,
70
- value=0.95,
71
- step=0.05,
72
- label="Top-p (nucleus sampling)",
73
- ),
74
- ],
75
  )
76
 
77
-
78
- if __name__ == "__main__":
79
- demo.launch()
 
1
+ from transformers import AutoModel, AutoTokenizer
2
+ import torch
3
  import gradio as gr
 
4
 
5
+ # Load the model and tokenizer
6
+ model_name = "abdfajar707/rkp_llama3_lora_model"
7
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ model = AutoModel.from_pretrained(model_name)
9
+
10
+ # Define the function for text generation
11
+ def generate_text(prompt):
12
+ inputs = tokenizer(prompt, return_tensors="pt")
13
+ with torch.no_grad():
14
+ outputs = model(**inputs)
15
+ logits = outputs.last_hidden_state # Adjust depending on your model's output
16
+ predicted_indices = torch.argmax(logits, dim=-1)
17
+ predicted_text = tokenizer.decode(predicted_indices[0], skip_special_tokens=True)
18
+ return predicted_text
19
+
20
+ # Create the Gradio interface
21
+ iface = gr.Interface(
22
+ fn=generate_text,
23
+ inputs=gr.inputs.Textbox(lines=2, placeholder="Enter your prompt here..."),
24
+ outputs="text"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  )
26
 
27
+ # Launch the Gradio interface
28
+ iface.launch()