Spaces:

SandLogicTechnologies
/

Shakti-2.5B

Running on T4

App Files Files Community

SandLogicTechnologies commited on 11 days ago

Commit

d816a8a

•

1 Parent(s): 3080342

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -37

app.py CHANGED Viewed

@@ -8,9 +8,8 @@ import torch
 import json
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 DESCRIPTION = """\
-Shakti is a 2.5 billion parameter language model specifically optimized for resource-constrained environments such as edge devices, including smartphones, wearables, and IoT systems. With support for vernacular languages and domain-specific tasks, Shakti excels in industries such as healthcare, finance, and customer service
 For more details, please check [here](https://arxiv.org/pdf/2410.11331v1).
 """
@@ -20,17 +19,31 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "2048"))
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-model_id = "SandLogicTechnologies/Shakti-2.5B"
-tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.getenv("SHAKTI"))
-model = AutoModelForCausalLM.from_pretrained(
-    model_id,
-    device_map="auto",
-    torch_dtype=torch.bfloat16,
-    token=os.getenv("SHAKTI")
-)
-model.eval()
 @spaces.GPU(duration=90)
 def generate(
@@ -79,6 +92,28 @@ def generate(
         outputs.append(text)
         yield "".join(outputs)
 chat_interface = gr.ChatInterface(
     fn=generate,
@@ -97,39 +132,28 @@ chat_interface = gr.ChatInterface(
             step=0.1,
             value=0.6,
         ),
-        # gr.Slider(
-        #     label="Top-p (nucleus sampling)",
-        #     minimum=0.05,
-        #     maximum=1.0,
-        #     step=0.05,
-        #     value=0.9,
-        # ),
-        # gr.Slider(
-        #     label="Top-k",
-        #     minimum=1,
-        #     maximum=1000,
-        #     step=1,
-        #     value=50,
-        # ),
-        # gr.Slider(
-        #     label="Repetition penalty",
-        #     minimum=1.0,
-        #     maximum=2.0,
-        #     step=0.05,
-        #     value=1.2,
-        # ),
     ],
     stop_btn=None,
-    examples=[
-            ["Tell me a story"], ["write a short poem which is hard to sing"], ['मुझे भारतीय इतिहास के बारे में बताएं']
-    ],
     cache_examples=False,
 )
 with gr.Blocks(css="style.css", fill_height=True) as demo:
     gr.Markdown(DESCRIPTION)
     gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
     chat_interface.render()
 if __name__ == "__main__":
-    demo.queue(max_size=20).launch()

 import json
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 DESCRIPTION = """\
+Shakti is a 2.5 billion parameter language model specifically optimized for resource-constrained environments such as edge devices, including smartphones, wearables, and IoT systems. With support for vernacular languages and domain-specific tasks, Shakti excels in industries such as healthcare, finance, and customer service.
 For more details, please check [here](https://arxiv.org/pdf/2410.11331v1).
 """
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+# Model configurations
+model_options = {
+    "Shakti-100M": "SandLogicTechnologies/Shakti-100M",
+    "Shakti-250M": "SandLogicTechnologies/Shakti-250M",
+    "Shakti-2.5B": "SandLogicTechnologies/Shakti-2.5B"
+}
+# Initialize tokenizer and model variables
+tokenizer = None
+model = None
+def load_model(selected_model: str):
+    global tokenizer, model
+    model_id = model_options[selected_model]
+    tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.getenv("SHAKTI"))
+    model = AutoModelForCausalLM.from_pretrained(
+        model_id,
+        device_map="auto",
+        torch_dtype=torch.bfloat16,
+        token=os.getenv("SHAKTI")
+    )
+    model.eval()
+# Initial model load (default to 2.5B)
+load_model("Shakti-2.5B")
 @spaces.GPU(duration=90)
 def generate(
         outputs.append(text)
         yield "".join(outputs)
+def update_examples(selected_model):
+    if selected_model == "Shakti-100M":
+        return [["Tell me a story"],
+            ["Write a short poem on Rose"],
+            ["What are computers"]]
+    elif selected_model == "Shakti-250M":
+        return [["Can you explain the pathophysiology of hypertension and its impact on the cardiovascular system?"],
+            ["What are the potential side effects of beta-blockers in the treatment of arrhythmias?"],
+            ["What foods are good for boosting the immune system?"],
+			["What is the difference between a stock and a bond?"],
+			["How can I start saving for retirement?"],
+			["What are some low-risk investment options?"],
+			["What is a power of attorney and when is it used?"],
+			["What are the key differences between a will and a trust?"],
+			["How do I legally protect my business name?"]]
+    else:
+        return [["Tell me a story"], ["write a short poem which is hard to sing"], ['मुझे भारतीय इतिहास के बारे में बताएं']]
+def on_model_select(selected_model):
+    load_model(selected_model)  # Load the selected model
+    return update_examples(selected_model)  # Return new examples based on the selected model
 chat_interface = gr.ChatInterface(
     fn=generate,
             step=0.1,
             value=0.6,
         ),
     ],
     stop_btn=None,
+    examples=update_examples("Shakti-2.5B"),  # Set initial examples for 2.5B model
     cache_examples=False,
 )
 with gr.Blocks(css="style.css", fill_height=True) as demo:
     gr.Markdown(DESCRIPTION)
     gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
+    # Dropdown for model selection
+    model_dropdown = gr.Dropdown(
+        label="Select Model",
+        choices=["Shakti-100M", "Shakti-250M", "Shakti-2.5B"],
+        value="Shakti-2.5B",
+        interactive=True,
+    )
+    # Function to handle model change and update examples dynamically
+    model_dropdown.change(on_model_select, inputs=model_dropdown, outputs=[chat_interface])
     chat_interface.render()
 if __name__ == "__main__":
+    demo.queue(max_size=20).launch()