Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,59 +1,43 @@
|
|
1 |
import gradio as gr
|
2 |
-
from transformers import
|
|
|
3 |
|
4 |
-
|
5 |
-
|
|
|
|
|
6 |
|
7 |
-
def
|
8 |
-
|
9 |
-
|
10 |
-
user_input,
|
11 |
-
temperature=0.4,
|
12 |
-
top_p=0.95,
|
13 |
-
top_k=50,
|
14 |
-
max_new_tokens=256,
|
15 |
-
):
|
16 |
-
pipe = load_model(model_name)
|
17 |
-
# Need to add additional options later.
|
18 |
-
if template_name == "Falcon 1B Template":
|
19 |
-
message_template = [
|
20 |
-
{"role": "user", "content": "Hello!"},
|
21 |
-
{"role": "assistant", "content": "Hello! How can I assist you today?"},
|
22 |
-
{"role": "user", "content": user_input},
|
23 |
-
]
|
24 |
-
else: # Default to "TinyLlama Template"
|
25 |
-
message_template = [
|
26 |
-
{
|
27 |
-
"role": "system",
|
28 |
-
"content": "You are a highly knowledgeable and friendly chatbot equipped with extensive information across various domains. Your goal is to understand and respond to user inquiries with accuracy and clarity. You're adept at providing detailed explanations, concise summaries, and insightful responses. Your interactions are always respectful, helpful, and focused on delivering the most relevant information to the user.",
|
29 |
-
},
|
30 |
-
{"role": "user", "content": user_input},
|
31 |
-
]
|
32 |
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
g = gr.Interface(
|
43 |
-
fn=generate,
|
44 |
inputs=[
|
45 |
-
gr.
|
46 |
-
gr.
|
47 |
-
gr.
|
48 |
-
gr.
|
49 |
-
gr.
|
50 |
-
gr.components.Slider(minimum=0, maximum=100, step=1, value=50, label="Top k"),
|
51 |
-
gr.components.Slider(minimum=1, maximum=1024, step=1, value=256, label="Max tokens"),
|
52 |
],
|
53 |
-
outputs=
|
54 |
-
title="
|
55 |
-
description="
|
56 |
-
concurrency_limit=1
|
57 |
)
|
58 |
|
59 |
-
|
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
3 |
+
import torch
|
4 |
|
5 |
+
# Laden des Modells und Tokenizers
|
6 |
+
model_name = "dumb-dev/TinyLlama-1.1B-Chat-rust-cpp-encodings"
|
7 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
8 |
+
model = AutoModelForCausalLM.from_pretrained(model_name)
|
9 |
|
10 |
+
def generate_text(prompt, top_p, top_k, max_tokens, temperature):
|
11 |
+
inputs = tokenizer(prompt, return_tensors="pt")
|
12 |
+
input_ids = inputs["input_ids"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
+
with torch.no_grad():
|
15 |
+
outputs = model.generate(
|
16 |
+
input_ids,
|
17 |
+
do_sample=True,
|
18 |
+
max_length=max_tokens,
|
19 |
+
top_p=top_p,
|
20 |
+
top_k=top_k,
|
21 |
+
temperature=temperature,
|
22 |
+
)
|
23 |
+
|
24 |
+
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
25 |
+
return generated_text
|
26 |
|
27 |
+
# Gradio Interface
|
28 |
+
interface = gr.Interface(
|
29 |
+
fn=generate_text,
|
|
|
|
|
30 |
inputs=[
|
31 |
+
gr.Textbox(lines=2, placeholder="Enter your prompt here..."),
|
32 |
+
gr.Slider(0, 1, step=0.01, value=0.9, label="Top-p"),
|
33 |
+
gr.Slider(0, 100, step=1, value=50, label="Top-k"),
|
34 |
+
gr.Slider(1, 512, step=1, value=100, label="Max tokens"),
|
35 |
+
gr.Slider(0.1, 2, step=0.1, value=1, label="Temperature")
|
|
|
|
|
36 |
],
|
37 |
+
outputs="text",
|
38 |
+
title="TinyLlama-1.1B Chat",
|
39 |
+
description="Generate text using TinyLlama-1.1B-Chat model with adjustable parameters."
|
|
|
40 |
)
|
41 |
|
42 |
+
if __name__ == "__main__":
|
43 |
+
interface.launch()
|