Boning c commited on
Commit
6d34d27
·
verified ·
1 Parent(s): 3a351f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -87
app.py CHANGED
@@ -1,115 +1,128 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import AutoTokenizer, AutoModelForCausalLM
 
4
 
5
- MODELS = [
6
- "Smilyai-labs/Sam-reason-A1",
7
  "Smilyai-labs/Sam-reason-S1",
8
  "Smilyai-labs/Sam-reason-S1.5",
9
- "Smilyai-labs/Sam-reason-S2",
10
- "Smilyai-labs/Sam-reason-S3",
11
  "Smilyai-labs/Sam-reason-v1",
12
  "Smilyai-labs/Sam-reason-v2",
13
- "Smilyai-labs/Sam-flash-mini-v1"
 
 
14
  ]
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  device = "cuda" if torch.cuda.is_available() else "cpu"
17
 
 
18
  model = None
19
  tokenizer = None
 
20
 
21
  def load_model(model_name):
22
- global model, tokenizer
23
- tokenizer = AutoTokenizer.from_pretrained(model_name)
24
- model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
25
- model.eval()
26
- return f"Loaded model: {model_name}"
27
-
28
- def build_prompt(chat_history):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  prompt = ""
30
- for role, text in chat_history:
31
- prompt += f"{role}: {text}\n"
32
- prompt += "Assistant: "
33
- return prompt
34
-
35
- def generate_stream(chat_history, max_length=100, temperature=0.7, top_p=0.9):
36
- global model, tokenizer
37
- if model is None or tokenizer is None:
38
- yield "Model not loaded. Please select a model first."
39
- return
40
-
41
- prompt = build_prompt(chat_history)
42
- input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
43
- generated_ids = input_ids
44
- output_text = prompt
45
-
46
- for _ in range(max_length):
47
- outputs = model(generated_ids)
48
- logits = outputs.logits
49
- next_token_logits = logits[:, -1, :] / temperature
50
-
51
- sorted_logits, sorted_indices = torch.sort(next_token_logits, descending=True)
52
- cumulative_probs = torch.softmax(sorted_logits, dim=-1).cumsum(dim=-1)
53
-
54
- sorted_indices_to_remove = cumulative_probs > top_p
55
- sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
56
- sorted_indices_to_remove[..., 0] = 0
57
-
58
- filtered_logits = next_token_logits.clone()
59
- filtered_logits[:, sorted_indices[sorted_indices_to_remove]] = -float('Inf')
60
-
61
- probabilities = torch.softmax(filtered_logits, dim=-1)
62
- next_token = torch.multinomial(probabilities, num_samples=1)
63
- generated_ids = torch.cat([generated_ids, next_token], dim=-1)
64
-
65
- new_token_text = tokenizer.decode(next_token[0])
66
- output_text += new_token_text
67
-
68
- assistant_reply = output_text.split("Assistant:")[-1].strip()
69
-
70
- yield assistant_reply
71
-
72
- if next_token.item() == tokenizer.eos_token_id:
73
- break
74
-
75
- def chatbot_step(user_input, chat_history):
76
- if not user_input.strip():
77
- # Return chat_history and error message (no change to chat display)
78
- return chat_history, chat_history
79
-
80
- # Append user message to chat history
81
- chat_history = chat_history + [("User", user_input)]
82
-
83
- assistant_response = ""
84
-
85
- def response_generator():
86
- nonlocal assistant_response
87
- for partial_reply in generate_stream(chat_history):
88
- assistant_response = partial_reply
89
- # Yield tuple: (chatbot content, updated state)
90
- yield (chat_history + [("Assistant", assistant_response)], chat_history + [("Assistant", assistant_response)])
91
-
92
- return response_generator()
93
 
94
  with gr.Blocks() as demo:
95
- gr.Markdown("# SmilyAI Sam Multi-turn Chatbot with Token Streaming")
96
 
97
  with gr.Row():
98
- model_selector = gr.Dropdown(choices=MODELS, value=MODELS[0], label="Select Model")
99
- status = gr.Textbox(label="Status", interactive=False)
100
 
101
  chatbot = gr.Chatbot()
102
- msg = gr.Textbox(label="Your message")
103
- send_btn = gr.Button("Send")
 
 
 
 
 
 
 
104
 
105
- # Load default model
106
- status.value = load_model(MODELS[0])
107
 
108
- model_selector.change(lambda m: load_model(m), inputs=model_selector, outputs=status)
 
 
 
109
 
110
- state = gr.State([])
111
 
112
- send_btn.click(chatbot_step, inputs=[msg, state], outputs=[chatbot, state])
113
- msg.submit(chatbot_step, inputs=[msg, state], outputs=[chatbot, state])
114
 
115
  demo.launch()
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
+ import requests
5
 
6
+ # Verified existing SmilyAI models on Huggingface
7
+ MODEL_CANDIDATES = [
8
  "Smilyai-labs/Sam-reason-S1",
9
  "Smilyai-labs/Sam-reason-S1.5",
10
+ "Smilyai-labs/Sam-reason-S2.1",
 
11
  "Smilyai-labs/Sam-reason-v1",
12
  "Smilyai-labs/Sam-reason-v2",
13
+ "Smilyai-labs/Sam-large-v1",
14
+ "Smilyai-labs/Sam-flash-mini-v1",
15
+ "Smilyai-labs/Sam-reason-A1"
16
  ]
17
 
18
+ def model_exists(repo_id):
19
+ url = f"https://huggingface.co/api/models/{repo_id}"
20
+ try:
21
+ response = requests.get(url)
22
+ return response.status_code == 200
23
+ except Exception:
24
+ return False
25
+
26
+ # Filter models that actually exist
27
+ AVAILABLE_MODELS = [m for m in MODEL_CANDIDATES if model_exists(m)]
28
+
29
+ if not AVAILABLE_MODELS:
30
+ raise RuntimeError("No verified SmilyAI models are available from Huggingface!")
31
+
32
  device = "cuda" if torch.cuda.is_available() else "cpu"
33
 
34
+ # Globals to hold current model and tokenizer
35
  model = None
36
  tokenizer = None
37
+ generator = None
38
 
39
  def load_model(model_name):
40
+ global model, tokenizer, generator
41
+ try:
42
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
43
+ model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
44
+ model.eval()
45
+ # Use pipeline for generation with streaming support
46
+ generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if device=="cuda" else -1)
47
+ return f"✅ Loaded model: {model_name} on {device}"
48
+ except Exception as e:
49
+ return f"❌ Failed to load model: {model_name}\n{str(e)}"
50
+
51
+ def chat_stream(user_message, history, model_name):
52
+ global model, tokenizer, generator
53
+
54
+ if model is None or tokenizer is None or generator is None:
55
+ load_status = load_model(model_name)
56
+ if load_status.startswith("❌"):
57
+ yield history, load_status
58
+ return
59
+
60
+ if history is None:
61
+ history = []
62
+
63
+ # Append user input to history
64
+ history.append((user_message, ""))
65
+
66
+ # Prepare prompt with conversation history for multi-turn chat
67
  prompt = ""
68
+ for user, bot in history[:-1]:
69
+ prompt += f"User: {user}\nSam: {bot}\n"
70
+ prompt += f"User: {user_message}\nSam:"
71
+
72
+ # Streaming token generation
73
+ response_text = ""
74
+ try:
75
+ # Set parameters to generate text token by token
76
+ # Use generator with `stream=True` if supported (Huggingface pipeline streaming)
77
+ # Note: some transformers versions or models may not support streaming in pipeline.
78
+ # We'll simulate streaming here by chunking output.
79
+
80
+ # Generate full text first (fallback)
81
+ output = generator(prompt, max_new_tokens=128, do_sample=True, top_p=0.9, temperature=0.8)[0]['generated_text']
82
+
83
+ # Extract the new bot output (everything after prompt)
84
+ bot_reply = output[len(prompt):].strip()
85
+
86
+ # Stream output token-by-token to Gradio
87
+ for i in range(1, len(bot_reply)+1):
88
+ partial = bot_reply[:i]
89
+ history[-1] = (user_message, partial)
90
+ yield history, ""
91
+ except Exception as e:
92
+ history[-1] = (user_message, f"Error during generation: {str(e)}")
93
+ yield history, ""
94
+
95
+ def reset_chat():
96
+ return [], ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
  with gr.Blocks() as demo:
99
+ gr.Markdown("# SmilyAI Sam Chatbot")
100
 
101
  with gr.Row():
102
+ model_dropdown = gr.Dropdown(choices=AVAILABLE_MODELS, value=AVAILABLE_MODELS[0], label="Select Sam Model")
 
103
 
104
  chatbot = gr.Chatbot()
105
+ state = gr.State([]) # conversation history
106
+ status = gr.Textbox(value="", interactive=False, visible=True, label="Status")
107
+
108
+ with gr.Row():
109
+ user_input = gr.Textbox(show_label=False, placeholder="Type your message and hit Enter", lines=2)
110
+ submit_btn = gr.Button("Send")
111
+
112
+ def on_submit(user_message, history, model_name):
113
+ return chat_stream(user_message, history, model_name)
114
 
115
+ submit_btn.click(on_submit, inputs=[user_input, state, model_dropdown], outputs=[chatbot, status], queue=True)
116
+ user_input.submit(on_submit, inputs=[user_input, state, model_dropdown], outputs=[chatbot, status], queue=True)
117
 
118
+ def on_model_change(new_model):
119
+ # Reload model on change, reset chat
120
+ status_message = load_model(new_model)
121
+ return [], status_message
122
 
123
+ model_dropdown.change(on_model_change, inputs=model_dropdown, outputs=[chatbot, status])
124
 
125
+ reset_btn = gr.Button("Reset Chat")
126
+ reset_btn.click(reset_chat, outputs=[chatbot, status])
127
 
128
  demo.launch()