Spaces:

farhananis005
/

LLM_Finetuning

Sleeping

App Files Files Community

farhananis005 commited on 25 days ago

Commit

ec0af28

verified ·

1 Parent(s): e32f84f

LLM finetuning demo

Browse files

Files changed (4) hide show

app.py +206 -0
backend.py +103 -0
config.py +83 -0
requirements.txt +0 -0

app.py ADDED Viewed

	@@ -0,0 +1,206 @@

+import gradio as gr
+import random
+from threading import Thread
+from queue import Queue
+# Import our new modules
+import config
+import backend
+# --- HELPER FUNCTIONS (Unchanged) ---
+def get_random_question(domain):
+    data_conf = config.DATASET_CONFIG[domain]
+    dataset = data_conf["dataset"]
+    if not dataset:
+        return "Failed to load dataset.", "N/A"
+    random_index = random.randint(0, len(dataset) - 1)
+    sample = dataset[random_index]
+    if domain == "Math":
+        question = sample[data_conf["question_col"]]
+        answer = sample[data_conf["answer_col"]]
+    elif domain == "Bio":
+        instruction = sample[data_conf["instruction_col"]]
+        bio_input = sample[data_conf["input_col"]]
+        answer = sample[data_conf["answer_col"]]
+        if bio_input and bio_input.strip():
+            question = f"**Instruction:**\n{instruction}\n\n**Input:**\n{bio_input}"
+        else:
+            question = instruction
+    return question, answer
+def update_domain_settings(domain):
+    models = list(config.ALL_MODELS[domain].keys())
+    def_base = next((m for m in models if "Base" in m), models[0])
+    def_ft = next((m for m in models if "Finetuned" in m), models[0])
+    q, a = get_random_question(domain)
+    return [
+        gr.Dropdown(choices=models, value=def_base),
+        gr.Dropdown(choices=models, value=def_ft),
+        gr.Textbox(value=q),
+        a,
+        gr.Markdown(visible=False)
+    ]
+def load_next_question(domain):
+    q, a = get_random_question(domain)
+    return [gr.Textbox(value=q), a, gr.Markdown(visible=False, value="")]
+def reveal_answer(hidden_answer):
+    return gr.Markdown(value=f"**Ground Truth Answer:**\n\n{hidden_answer}", visible=True)
+# --- CORE LOGIC (REBUILT FOR TRUE PARALLEL STREAMING) ---
+def stream_to_queue(model_id, prompt, lane, queue, key):
+    """
+    A worker function that runs in a thread.
+    It calls the streaming API and puts tokens into the queue.
+    """
+    try:
+        # call_modal_api is a generator
+        for token in backend.call_modal_api(model_id, prompt, lane):
+            queue.put((key, token))
+    except Exception as e:
+        queue.put((key, f"\n\nTHREAD ERROR: {e}"))
+    finally:
+        # When the stream is done, put a 'None' sentinel
+        queue.put((key, None))
+def run_comparison(domain, question, model_1_name, model_2_name):
+    # 1. Get IDs
+    id_1 = config.ALL_MODELS[domain].get(model_1_name)
+    id_2 = config.ALL_MODELS[domain].get(model_2_name)
+    # 2. Ask the Smart Router
+    lane_for_m1, lane_for_m2 = backend.router.get_routing_plan(id_1, id_2)
+    # 3. Create the Queue and Threads
+    q = Queue()
+    Thread(
+        target=stream_to_queue,
+        args=(id_1, question, lane_for_m1, q, 'm1')
+    ).start()
+    Thread(
+        target=stream_to_queue,
+        args=(id_2, question, lane_for_m2, q, 'm2')
+    ).start()
+    # 4. Listen to the Queue
+    text1 = ""
+    text2 = ""
+    m1_done = False
+    m2_done = False
+    # Clear boxes and start
+    yield "", "", gr.Markdown(visible=False)
+    while not (m1_done and m2_done):
+        # Wait for the next token from *either* thread
+        try:
+            key, token = q.get()
+        except Exception as e:
+            # This should ideally not happen
+            print(f"Queue error: {e}")
+            continue
+        # Check for the 'None' sentinel
+        if token is None:
+            if key == 'm1':
+                m1_done = True
+            elif key == 'm2':
+                m2_done = True
+        else:
+            # Append the new token
+            if key == 'm1':
+                text1 += token
+            elif key == 'm2':
+                text2 += token
+        # Yield the updated full text
+        yield text1, text2, gr.Markdown(visible=False)
+# --- UI BUILD (Unchanged) ---
+initial_question, initial_answer = get_random_question("Math")
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown(
+        """
+        # 🔬 LLM Finetuning Arena
+        ### Comparing Finetuned vs. Base Models on Specialized Tasks
+        """
+    )
+    hidden_answer_state = gr.State(value=initial_answer)
+    with gr.Row():
+        domain_radio = gr.Radio(
+            ["Math", "Bio"], label="1. Select Domain", value="Math"
+        )
+    with gr.Row():
+        question_box = gr.Textbox(
+            label="2. Question Prompt (Editable)",
+            value=initial_question, lines=5, scale=4
+        )
+        next_btn = gr.Button("Load Random Question 🔄", scale=1, min_width=100)
+    with gr.Row():
+        model_1_dd = gr.Dropdown(
+            label="3. Select Model 1 (Left)",
+            choices=list(config.ALL_MODELS["Math"].keys()),
+            value=next((m for m in config.ALL_MODELS["Math"] if "Base" in m))
+        )
+        model_2_dd = gr.Dropdown(
+            label="4. Select Model 2 (Right)",
+            choices=list(config.ALL_MODELS["Math"].keys()),
+            value=next((m for m in config.ALL_MODELS["Math"] if "Finetuned" in m))
+        )
+    with gr.Row():
+        run_btn = gr.Button("🚀 Run Comparison", variant="primary", scale=3)
+        show_answer_btn = gr.Button("Show Ground Truth Answer", scale=1)
+    answer_display_box = gr.Markdown(label="Ground Truth Answer", visible=False)
+    gr.Markdown("---")
+    with gr.Row():
+        output_1_box = gr.Markdown(label="Output: Model 1")
+        output_2_box = gr.Markdown(label="Output: Model 2")
+    # --- EVENTS (Unchanged) ---
+    domain_radio.change(
+        fn=update_domain_settings,
+        inputs=[domain_radio],
+        outputs=[model_1_dd, model_2_dd, question_box, hidden_answer_state, answer_display_box]
+    )
+    next_btn.click(
+        fn=load_next_question,
+        inputs=[domain_radio],
+        outputs=[question_box, hidden_answer_state, answer_display_box]
+    )
+    show_answer_btn.click(
+        fn=reveal_answer,
+        inputs=[hidden_answer_state],
+        outputs=[answer_display_box]
+    )
+    run_btn.click(
+        fn=run_comparison,
+        inputs=[domain_radio, question_box, model_1_dd, model_2_dd],
+        outputs=[output_1_box, output_2_box, answer_display_box]
+    )
+if __name__ == "__main__":
+    if not config.MY_AUTH_TOKEN:
+        print("⚠️ WARNING: ARENA_AUTH_TOKEN is not set.")
+    demo.launch()

backend.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import requests
+import config
+class SmartRouter:
+    def __init__(self):
+        # Tracks what is currently loaded in the backend (Best Guess)
+        self.lane_state = {
+            "primary": None,    # URL: ...-generate-primary.modal.run
+            "secondary": None   # URL: ...-generate-secondary.modal.run
+        }
+    def get_routing_plan(self, model_left_id, model_right_id):
+        """
+        Decides which model goes to which lane to minimize cold starts.
+        Returns: (lane_for_left_model, lane_for_right_model)
+        """
+        primary_model = self.lane_state["primary"]
+        secondary_model = self.lane_state["secondary"]
+        # Score: 0 = Cache Hit (Good), 1 = Cache Miss (Bad)
+        # Option A: Straight (Left -> Primary, Right -> Secondary)
+        cost_straight = (0 if primary_model == model_left_id else 1) + \
+                        (0 if secondary_model == model_right_id else 1)
+        # Option B: Swapped (Left -> Secondary, Right -> Primary)
+        cost_swapped = (0 if secondary_model == model_left_id else 1) + \
+                       (0 if primary_model == model_right_id else 1)
+        if cost_swapped < cost_straight:
+            print(f"🔀 Smart Router: Swapping lanes to optimize cache!")
+            # Update state for next time
+            self.lane_state["secondary"] = model_left_id
+            self.lane_state["primary"] = model_right_id
+            return "secondary", "primary"
+        else:
+            print(f"⬇️ Smart Router: keeping straight lanes.")
+            # Update state for next time
+            self.lane_state["primary"] = model_left_id
+            self.lane_state["secondary"] = model_right_id
+            return "primary", "secondary"
+# Create a global instance
+router = SmartRouter()
+# --- STEP 3: REWRITE call_modal_api FOR STREAMING ---
+def call_modal_api(model_repo_id, prompt, lane):
+    """
+    Calls the Modal API on a specific lane and yields tokens as they arrive.
+    This is now a GENERATOR.
+    """
+    if not model_repo_id:
+        yield "Please select a model from the dropdown."
+        return # Stop the generator
+    if not config.MY_AUTH_TOKEN:
+        yield "Error: `ARENA_AUTH_TOKEN` is not set on the Gradio server."
+        return
+    # Construct the URL based on the lane
+    if lane == "primary":
+        endpoint = f"{config.MODAL_BASE_URL}-generate-primary.modal.run"
+    else:
+        endpoint = f"{config.MODAL_BASE_URL}-generate-secondary.modal.run"
+    print(f"🚀 Streaming from {model_repo_id} on [{lane.upper()}]...")
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {config.MY_AUTH_TOKEN}"
+    }
+    payload = {"model_id": model_repo_id, "prompt": prompt}
+    try:
+        # stream=True is the magic.
+        response = requests.post(
+            endpoint,
+            json=payload,
+            timeout=300,
+            headers=headers,
+            stream=True
+        )
+        response.raise_for_status()
+        # Yield tokens as they arrive
+        for chunk in response.iter_content(chunk_size=None, decode_unicode=True):
+            if chunk:
+                yield chunk
+    except requests.exceptions.RequestException as e:
+        if e.response and e.response.status_code == 401:
+            yield "Error: Authentication failed. The token is invalid."
+        elif e.response:
+             # Try to get error detail from the streaming API
+            try:
+                error_detail = e.response.json().get("detail", str(e))
+                yield f"API Error: {e.response.status_code} - {error_detail}"
+            except:
+                yield f"API Error: {e}"
+        else:
+            yield f"API Error: {e}"
+    except Exception as e:
+        yield f"An unexpected error occurred: {e}"

config.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import os
+from dotenv import load_dotenv
+from datasets import load_dataset
+load_dotenv()
+# --- CONFIGURATION ---
+MODAL_BASE_URL = "https://mohdfanis--unsloth-model-arena-backend" # Base URL
+MY_AUTH_TOKEN = os.environ.get("ARENA_AUTH_TOKEN")
+# --- DATASETS ---
+print("Loading Hugging Face datasets...")
+try:
+    math_dataset = load_dataset("microsoft/orca-math-word-problems-200k", split="train")
+    bio_dataset = load_dataset("bio-nlp-umass/bioinstruct", split="train")
+    print("✅ Datasets loaded successfully.")
+except Exception as e:
+    print(f"❌ Failed to load datasets: {e}")
+    math_dataset, bio_dataset = [], []
+DATASET_CONFIG = {
+    "Math": {
+        "dataset": math_dataset,
+        "question_col": "question",
+        "answer_col": "answer"
+    },
+    "Bio": {
+        "dataset": bio_dataset,
+        "instruction_col": "instruction",
+        "input_col": "input",
+        "answer_col": "output"
+    }
+}
+# --- MODEL DEFINITIONS ---
+BASE_MODELS = {
+    "Base Llama-3.1 8B Instruct": "unsloth/llama-3.1-8b-instruct-bnb-4bit",
+    "Base Llama-3 8B Instruct": "unsloth/llama-3-8b-instruct-bnb-4bit",
+    "Base Llama-2 7B Chat": "unsloth/llama-2-7b-chat-bnb-4bit",
+    "Base Mistral 7B Instruct": "unsloth/mistral-7b-v0.3-instruct-bnb-4bit",
+    "Base Qwen-2 7B Instruct": "unsloth/qwen2-7B-instruct-bnb-4bit",
+    "Base Gemma-2 9B Instruct": "unsloth/gemma-2-9b-it-bnb-4bit",
+    "Base Gemma 7B Instruct": "unsloth/gemma-7b-it-bnb-4bit",
+}
+FINETUNED_MATH = {
+    "Finetuned Llama-3.1 8B (e3) - MATH": "farhananis005/lora-llama-3.1-8b-Math-e3",
+    "Finetuned Llama-3.1 8B (e1) - MATH": "farhananis005/lora-llama-3.1-8b-Math-e1",
+    "Finetuned Llama-3 8B (e3) - MATH": "farhananis005/lora-llama-3-8b-Math-e3",
+    "Finetuned Llama-3 8B (e1) - MATH": "farhananis005/lora-llama-3-8b-Math-e1",
+    "Finetuned Llama-2 7B (e3) - MATH": "farhananis005/lora-llama-2-7b-Math-e3",
+    "Finetuned Llama-2 7B (e1) - MATH": "farhananis005/lora-llama-2-7b-Math-e1",
+    "Finetuned Mistral 7B (e3) - MATH": "farhananis005/lora-mistral-7b-v0.3-Math-e3",
+    "Finetuned Mistral 7B (e1) - MATH": "farhananis005/lora-mistral-7b-v0.3-Math-e1",
+    "Finetuned Qwen-2 7B (e3) - MATH": "farhananis005/lora-qwen-2-7b-Math-e3",
+    "Finetuned Qwen-2 7B (e1) - MATH": "farhananis005/lora-qwen-2-7b-Math-e1",
+    "Finetuned Gemma-2 9B (e3) - MATH": "farhananis005/lora-gemma-2-9b-Math-e3",
+    "Finetuned Gemma-2 9B (e1) - MATH": "farhananis005/lora-gemma-2-9b-Math-e1",
+    "Finetuned Gemma 7B (e3) - MATH": "farhananis005/lora-gemma-7b-Math-e3",
+    "Finetuned Gemma 7B (e1) - MATH": "farhananis005/lora-gemma-7b-Math-e1",
+}
+FINETUNED_BIO = {
+    "Finetuned Llama-3.1 8B (e3) - BIO": "farhananis005/lora-llama-3.1-8b-Bio-e3",
+    "Finetuned Llama-3.1 8B (e1) - BIO": "farhananis005/lora-llama-3.1-8b-Bio-e1",
+    "Finetuned Llama-3 8B (e3) - BIO": "farhananis005/lora-llama-3-8b-Bio-e3",
+    "Finetuned Llama-3 8B (e1) - BIO": "farhananis005/lora-llama-3-8b-Bio-e1",
+    "Finetuned Llama-2 7B (e3) - BIO": "farhananis005/lora-llama-2-7b-Bio-e3",
+    "Finetuned Llama-2 7B (e1) - BIO": "farhananis005/lora-llama-2-7b-Bio-e1",
+    "Finetuned Mistral 7B (e3) - BIO": "farhananis005/lora-mistral-7b-v0.3-Bio-e3",
+    "Finetuned Mistral 7B (e1) - BIO": "farhananis005/lora-mistral-7b-v0.3-Bio-e1",
+    "Finetuned Qwen-2 7B (e3) - BIO": "farhananis005/lora-qwen-2-7b-Bio-e3",
+    "Finetuned Qwen-2 7B (e1) - BIO": "farhananis005/lora-qwen-2-7b-Bio-e1",
+    "Finetuned Gemma-2 9B (e3) - BIO": "farhananis005/lora-gemma-2-9b-Bio-e3",
+    "Finetuned Gemma-2 9B (e1) - BIO": "farhananis005/lora-gemma-2-9b-Bio-e1",
+    "Finetuned Gemma 7B (e3) - BIO": "farhananis005/lora-gemma-7b-Bio-e3",
+    "Finetuned Gemma 7B (e1) - BIO": "farhananis005/lora-gemma-7b-Bio-e1",
+}
+ALL_MODELS = {
+    "Math": {"-- Select Math Model --": None, **BASE_MODELS, **FINETUNED_MATH},
+    "Bio": {"-- Select Bio Model --": None, **BASE_MODELS, **FINETUNED_BIO}
+}

requirements.txt ADDED Viewed

Binary file (3.07 kB). View file