Spaces:

FrameRateTech
/

DS-llama-8b-instruct

Paused

App Files Files Community

FrameRateTech commited on Mar 13

Commit

ec97c82

verified ·

1 Parent(s): 30758b6

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -37

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 # app.py
 import torch
 import gradio as gr
 from transformers import (
@@ -9,26 +10,44 @@ from transformers import (
 )
 ###############################################################################
-# 1. Configuration
 ###############################################################################
-MODEL_ID = "FrameRateTech/DamageScan-llama-8b-instruct-merged"  # Now public
 ###############################################################################
-# 2. Load Tokenizer
 ###############################################################################
 tokenizer = AutoTokenizer.from_pretrained(
     MODEL_ID,
     use_fast=False,
     trust_remote_code=True
 )
-# Some LLaMA-based models do not have a pad_token_id by default.
 if getattr(tokenizer, "pad_token_id", None) is None:
-    # If no pad token is defined, fall back to eos_token_id
     tokenizer.pad_token_id = getattr(tokenizer, "eos_token_id", None)
 ###############################################################################
-# 3. Load Model
 ###############################################################################
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_ID,
@@ -39,7 +58,7 @@ model = AutoModelForCausalLM.from_pretrained(
 model.eval()
 ###############################################################################
-# 4. Default Generation Settings
 ###############################################################################
 default_gen_config = GenerationConfig(
     temperature=0.7,
@@ -50,17 +69,9 @@ default_gen_config = GenerationConfig(
 )
 ###############################################################################
-# 5. Helper: Convert Chatbot Messages to Prompt
 ###############################################################################
 def messages_to_prompt(messages):
-    """
-    Convert a list of chat messages (role/content) into a text prompt.
-    Example of messages:
-      [
-        {"role": "user", "content": "..."},
-        {"role": "assistant", "content": "..."}
-      ]
-    """
     conversation = ""
     for msg in messages:
         if msg["role"] == "user":
@@ -70,16 +81,10 @@ def messages_to_prompt(messages):
     return conversation
 ###############################################################################
-# 6. Generation Function
 ###############################################################################
 def predict(messages, temperature, top_p, max_new_tokens):
-    """
-    Takes the current conversation (messages) and returns an updated list
-    of messages with the model's response appended.
-    """
     prompt_text = messages_to_prompt(messages) + "Assistant:"
-    # Create a GenerationConfig on the fly with user settings
     gen_config = GenerationConfig(
         temperature=temperature,
         top_p=top_p,
@@ -87,31 +92,22 @@ def predict(messages, temperature, top_p, max_new_tokens):
         repetition_penalty=1.1,
         max_new_tokens=max_new_tokens,
     )
     with torch.no_grad():
-        # Tokenize and move to GPU
         inputs = tokenizer(prompt_text, return_tensors="pt").to(model.device)
-        # Generate
         outputs = model.generate(**inputs, generation_config=gen_config)
-        # Decode the output
         full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # The newly generated text is the difference between the prompt and the full output
     generated_reply = full_text[len(prompt_text):].strip()
-    # Append the model's reply
     messages.append({"role": "assistant", "content": generated_reply})
     return messages
 ###############################################################################
-# 7. Build the Gradio Interface
 ###############################################################################
 with gr.Blocks() as demo:
     gr.Markdown("<h1 align='center'>DamageScan 8B Instruct Chatbot</h1>")
     with gr.Row():
         with gr.Column():
-            # "type='messages'" => each item is a dict {"role": ..., "content": ...}
             chatbot = gr.Chatbot(label="Chat History", type="messages")
         with gr.Column():
             gr.Markdown("### Generation Settings")
@@ -125,17 +121,14 @@ with gr.Blocks() as demo:
                 minimum=64, maximum=2048, value=256, step=64, label="Max New Tokens"
             )
-    # User input box
     user_input = gr.Textbox(lines=1, label="Your Message", placeholder="Type here...")
     send_btn = gr.Button("Send")
-    # Function that appends the user's input to the chat, calls the model, and returns the updated chat
     def user_submit(message_history, user_text, temp, top_p, max_tokens):
         message_history.append({"role": "user", "content": user_text})
         updated_messages = predict(message_history, temp, top_p, max_tokens)
         return updated_messages, ""
-    # Link the button and the textbox "Enter" key to user_submit
     send_btn.click(
         user_submit,
         inputs=[chatbot, user_input, temperature_slider, top_p_slider, max_tokens_slider],
@@ -147,5 +140,4 @@ with gr.Blocks() as demo:
         outputs=[chatbot, user_input],
     )
-    # Launch the Gradio interface with a queue for concurrency
     demo.queue().launch()

 # app.py
+import transformers
 import torch
 import gradio as gr
 from transformers import (
 )
 ###############################################################################
+# Debug Print Section
 ###############################################################################
+MODEL_ID = "FrameRateTech/DamageScan-llama-8b-instruct-merged"
+print("Transformers version:", transformers.__version__)
+# Attempt to load the tokenizer once just to see what happens
+try:
+    tokenizer_test = AutoTokenizer.from_pretrained(
+        MODEL_ID,
+        use_fast=False,
+        trust_remote_code=True
+    )
+    print("tokenizer_test =", tokenizer_test)
+    print("type(tokenizer_test) =", type(tokenizer_test))
+except Exception as e:
+    print("AutoTokenizer failed with exception:", e)
+    raise e
+# If it's returning False, bail out early so we don't crash below
+if tokenizer_test is False:
+    raise ValueError("AutoTokenizer returned False, meaning it failed to load properly.")
 ###############################################################################
+# 1. Load Tokenizer
 ###############################################################################
+# Now load the real tokenizer for your app
 tokenizer = AutoTokenizer.from_pretrained(
     MODEL_ID,
     use_fast=False,
     trust_remote_code=True
 )
+# If `tokenizer` is not False, set pad_token_id if needed
 if getattr(tokenizer, "pad_token_id", None) is None:
     tokenizer.pad_token_id = getattr(tokenizer, "eos_token_id", None)
 ###############################################################################
+# 2. Load Model
 ###############################################################################
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_ID,
 model.eval()
 ###############################################################################
+# 3. Default Generation Settings
 ###############################################################################
 default_gen_config = GenerationConfig(
     temperature=0.7,
 )
 ###############################################################################
+# 4. Helper: Convert Chatbot Messages to Prompt
 ###############################################################################
 def messages_to_prompt(messages):
     conversation = ""
     for msg in messages:
         if msg["role"] == "user":
     return conversation
 ###############################################################################
+# 5. Generation Function
 ###############################################################################
 def predict(messages, temperature, top_p, max_new_tokens):
     prompt_text = messages_to_prompt(messages) + "Assistant:"
     gen_config = GenerationConfig(
         temperature=temperature,
         top_p=top_p,
         repetition_penalty=1.1,
         max_new_tokens=max_new_tokens,
     )
     with torch.no_grad():
         inputs = tokenizer(prompt_text, return_tensors="pt").to(model.device)
         outputs = model.generate(**inputs, generation_config=gen_config)
         full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
     generated_reply = full_text[len(prompt_text):].strip()
     messages.append({"role": "assistant", "content": generated_reply})
     return messages
 ###############################################################################
+# 6. Build the Gradio Interface
 ###############################################################################
 with gr.Blocks() as demo:
     gr.Markdown("<h1 align='center'>DamageScan 8B Instruct Chatbot</h1>")
     with gr.Row():
         with gr.Column():
             chatbot = gr.Chatbot(label="Chat History", type="messages")
         with gr.Column():
             gr.Markdown("### Generation Settings")
                 minimum=64, maximum=2048, value=256, step=64, label="Max New Tokens"
             )
     user_input = gr.Textbox(lines=1, label="Your Message", placeholder="Type here...")
     send_btn = gr.Button("Send")
     def user_submit(message_history, user_text, temp, top_p, max_tokens):
         message_history.append({"role": "user", "content": user_text})
         updated_messages = predict(message_history, temp, top_p, max_tokens)
         return updated_messages, ""
     send_btn.click(
         user_submit,
         inputs=[chatbot, user_input, temperature_slider, top_p_slider, max_tokens_slider],
         outputs=[chatbot, user_input],
     )
     demo.queue().launch()