Spaces:

kshitijthakkar
/

loggenix-moe-0.3B-A0.1B-demo

Sleeping

App Files Files Community

kshitijthakkar commited on Sep 4

Commit

1a4f599

1 Parent(s): c6f336e

initial working code

Browse files

Files changed (6) hide show

.gitignore +187 -0
Dockerfile +33 -0
app.py +611 -0
enhanced_app.py +745 -0
model_handler.py +434 -0
requirements.txt +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,187 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+.sh
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+.xml
+.png
+.pdf
+.pptx
+.zip
+.log
+.gradio
+.idea
+*conversation_logs/
+push_to_hub.sh
+init_repos.sh
+generated_images/

Dockerfile ADDED Viewed

	@@ -0,0 +1,33 @@

+# Dockerfile for a Python application with user permissions
+FROM python:3.11-slim
+# Install system dependencies as root
+RUN apt-get update && apt-get install -y build-essential && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
+# Create user and set up directory structure as root
+RUN useradd -m -u 1000 user && \
+    mkdir -p /app && \
+    chown -R user:user /app
+# Set working directory
+WORKDIR /app
+# Switch to user AFTER setting up permissions
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+# Copy files with proper ownership
+COPY --chown=user:user . /app
+# Install Python dependencies
+COPY --chown=user:user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir --user -r requirements.txt
+# Make start.sh executable
+RUN chmod +x run.sh
+EXPOSE 8000 7860
+# Run the startup script
+#CMD ["sh", "-c", "bash run.sh"]
+#CMD bash -c "python /app/mcp_server.py & sleep 60 && python /app/app.py"
+CMD bash -c "python /app/enhanced_app.py"

app.py ADDED Viewed

	@@ -0,0 +1,611 @@

+import gradio as gr
+import pandas as pd
+from datasets import load_dataset
+import plotly.graph_objects as go
+import datetime
+import json
+import random
+import os
+from model_handler import generate_response, get_inference_configs
+import torch
+# Configuration for datasets
+DATASET_CONFIGS = {
+    'Loggenix Synthetic AI Tasks Eval (with outputs)': {
+        'repo_id': 'kshitijthakkar/loggenix-synthetic-ai-tasks-eval-with-outputs',
+        'split': 'train'
+    },
+    'Loggenix Synthetic AI Tasks Eval (with outputs) v5': {
+        'repo_id': 'kshitijthakkar/loggenix-synthetic-ai-tasks-eval_v5-with-outputs',
+        'split': 'train'
+    }
+}
+# Load main dataset for inference tab
+def load_inference_dataset():
+    """Load the main dataset for inference use case"""
+    try:
+        print("Loading synthetic-ai-tasks-eval-v5 dataset...")
+        dataset = load_dataset(
+            'kshitijthakkar/synthetic-ai-tasks-eval-v5',
+            split='train',
+            trust_remote_code=True
+        )
+        df = dataset.to_pandas()
+        print(f"✓ Successfully loaded: {len(df)} rows, {len(df.columns)} columns")
+        return df
+    except Exception as e:
+        print(f"✗ Error loading dataset: {str(e)}")
+        return pd.DataFrame({'Error': [f'Failed to load: {str(e)}']})
+# Load dataset for eval samples tab
+def load_eval_datasets():
+    """Load all datasets for evaluation samples"""
+    datasets = {}
+    for display_name, config in DATASET_CONFIGS.items():
+        try:
+            print(f"Loading {display_name}...")
+            dataset = load_dataset(
+                config['repo_id'],
+                split=config['split'],
+                trust_remote_code=True
+            )
+            df = dataset.to_pandas()
+            datasets[display_name] = df
+            print(f"✓ Successfully loaded {display_name}: {len(df)} rows")
+        except Exception as e:
+            print(f"✗ Error loading {display_name}: {str(e)}")
+            datasets[display_name] = pd.DataFrame({
+                'Error': [f'Failed to load: {str(e)}'],
+                'Dataset': [config['repo_id']]
+            })
+    return datasets
+# Load datasets
+INFERENCE_DATASET = load_inference_dataset()
+EVAL_DATASETS = load_eval_datasets()
+# ===== TAB 1: INFERENCE USE CASE =====
+def get_task_types():
+    """Get unique task types from inference dataset"""
+    if 'task_type' in INFERENCE_DATASET.columns:
+        task_types = INFERENCE_DATASET['task_type'].unique().tolist()
+        return [str(t) for t in task_types if pd.notna(t)]
+    return ["No task types available"]
+def get_task_by_type(task_type):
+    """Get task content by task type"""
+    if 'task_type' in INFERENCE_DATASET.columns and 'task' in INFERENCE_DATASET.columns:
+        filtered = INFERENCE_DATASET[INFERENCE_DATASET['task_type'] == task_type]
+        if len(filtered) > 0:
+            return str(filtered.iloc[0]['task'])
+    return "No task found for this type"
+def run_inference(task_type, system_prompt, user_input, inference_config):
+    """Run model inference"""
+    if not user_input.strip():
+        return "Please enter a user input"
+    if not system_prompt.strip():
+        return "Please select a task type to load system prompt"
+    try:
+        # Get inference configuration
+        configs = get_inference_configs()
+        config = configs.get(inference_config, configs["Optimized for Speed"])
+        # Run inference
+        response = generate_response(
+            system_prompt=system_prompt,
+            user_input=user_input,
+            config_name=inference_config
+        )
+        return response
+    except Exception as e:
+        return f"Error during inference: {str(e)}"
+# ===== TAB 2: EVAL SAMPLES =====
+def update_eval_table(dataset_name):
+    """Update eval table based on selected dataset"""
+    if dataset_name in EVAL_DATASETS:
+        return EVAL_DATASETS[dataset_name].head(100)
+    return pd.DataFrame()
+def get_eval_dataset_info(dataset_name):
+    """Get info about selected eval dataset"""
+    if dataset_name in EVAL_DATASETS:
+        df = EVAL_DATASETS[dataset_name]
+        return f"""
+        **Dataset**: {dataset_name}
+        - **Rows**: {len(df):,}
+        - **Columns**: {len(df.columns)}
+        - **Column Names**: {', '.join(df.columns.tolist())}
+        """
+    return "No dataset selected"
+# ===== TAB 3 & 4: FLAGGING FUNCTIONALITY =====
+def generate_chart():
+    """Generate a sample Plotly chart"""
+    x = list(range(10))
+    y = [random.randint(1, 100) for _ in x]
+    fig = go.Figure()
+    fig.add_trace(go.Scatter(x=x, y=y, mode="lines+markers", name="Random Data"))
+    fig.update_layout(title="Sample Chart", xaxis_title="X-axis", yaxis_title="Y-axis")
+    return fig.to_html(full_html=False)
+def chat_interface(prompt, history):
+    """Handle chat interface with history"""
+    if not prompt.strip():
+        return history, ""
+    history.append(("You", prompt))
+    try:
+        if "chart" in prompt.lower() or "graph" in prompt.lower():
+            response = generate_chart()
+        else:
+            response = f"This is a demo response to: {prompt}"
+        if isinstance(response, str):
+            formatted_response = f"**AI Assistant:**\n{response}"
+            history.append(("AI Assistant", formatted_response))
+        else:
+            history.append(("AI Assistant", response))
+    except Exception as e:
+        error_msg = f"**AI Assistant:**\nSorry, an error occurred: {str(e)}"
+        history.append(("AI Assistant", error_msg))
+    return history, ""
+def flag_response(history, flagged_message, flag_reason):
+    """Flag a response"""
+    if not flagged_message or flagged_message == "No responses available":
+        return "Invalid message selection."
+    try:
+        flagged_index = int(flagged_message.split()[1][:-1])
+        if flagged_index >= len(history) or history[flagged_index][0] != "AI Assistant":
+            return "You can only flag assistant responses."
+        flagged_message_content = history[flagged_index][1]
+        log_entry = {
+            "timestamp": datetime.datetime.now().isoformat(),
+            "flag_reason": str(flag_reason),
+            "flagged_message": str(flagged_message_content),
+            "conversation_context": history,
+        }
+        os.makedirs("logs", exist_ok=True)
+        with open("logs/flagged_responses.log", "a") as f:
+            f.write(json.dumps(log_entry) + "\n")
+        return f"Response flagged successfully"
+    except Exception as e:
+        return f"Error flagging response: {str(e)}"
+def get_assistant_responses(history):
+    """Get dropdown options for assistant responses"""
+    responses = [
+        f"Response {i}: {str(msg[1])[:50]}..."
+        for i, msg in enumerate(history)
+        if msg[0] == "AI Assistant"
+    ]
+    if not responses:
+        responses = ["No responses available"]
+    return gr.update(choices=responses, value=responses[0])
+def display_selected_message(selected_index, history):
+    """Display the selected flagged message"""
+    if selected_index == "No responses available":
+        return "No responses available"
+    try:
+        flagged_index = int(selected_index.split()[1][:-1])
+        if flagged_index < len(history) and history[flagged_index][0] == "AI Assistant":
+            return history[flagged_index][1]
+        else:
+            return "Invalid selection."
+    except Exception as e:
+        return f"Error: {str(e)}"
+def read_flagged_messages():
+    """Read flagged messages from log file"""
+    try:
+        if not os.path.exists("logs/flagged_responses.log"):
+            return pd.DataFrame()
+        with open("logs/flagged_responses.log", "r") as f:
+            flagged_messages = f.readlines()
+        if not flagged_messages:
+            return pd.DataFrame()
+        table_data = []
+        for entry in flagged_messages:
+            data = json.loads(entry)
+            table_data.append({
+                "Timestamp": data.get("timestamp", "N/A"),
+                "Flag Reason": data.get("flag_reason", "N/A"),
+                "Flagged Message": data.get("flagged_message", "N/A")[:100] + "...",
+                "Conversation Context": str(len(data.get("conversation_context", []))) + " messages"
+            })
+        return pd.DataFrame(table_data)
+    except Exception as e:
+        return pd.DataFrame({"Error": [f"Error reading flagged messages: {str(e)}"]})
+def handle_row_select(evt: gr.SelectData):
+    """Handle row selection in flagged messages table"""
+    try:
+        if not os.path.exists("logs/flagged_responses.log"):
+            return []
+        with open("logs/flagged_responses.log", "r") as f:
+            flagged_messages_log = f.readlines()
+        if evt.index[0] < len(flagged_messages_log):
+            selected_entry = json.loads(flagged_messages_log[evt.index[0]])
+            conversation_context = selected_entry.get("conversation_context", [])
+            return conversation_context
+        return []
+    except Exception as e:
+        return [("System", f"Error loading conversation: {str(e)}")]
+def clear_history():
+    """Clear chat history"""
+    return [], gr.update(choices=["No responses available"], value="No responses available")
+# ===== MAIN INTERFACE =====
+def create_interface():
+    with gr.Blocks(title="AI Tasks Evaluation Suite", theme=gr.themes.Soft()) as demo:
+        gr.Markdown("# 🤖 AI Tasks Evaluation Suite")
+        gr.Markdown("Comprehensive platform for AI model evaluation and testing")
+        with gr.Tabs():
+            # TAB 1: INFERENCE USE CASE
+            with gr.Tab("🚀 Inference Use Case"):
+                gr.Markdown("## Model Inference Testing")
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        # Task type dropdown
+                        task_type_dropdown = gr.Dropdown(
+                            choices=get_task_types(),
+                            value=get_task_types()[0] if get_task_types() else None,
+                            label="Task Type",
+                            info="Select task type to load system prompt"
+                        )
+                        # Inference configuration
+                        inference_config = gr.Dropdown(
+                            choices=list(get_inference_configs().keys()),
+                            value="Optimized for Speed",
+                            label="Inference Configuration",
+                            info="Select inference optimization level"
+                        )
+                    with gr.Column(scale=2):
+                        # System prompt (editable)
+                        system_prompt = gr.Textbox(
+                            label="System Prompt (Editable)",
+                            lines=6,
+                            max_lines=10,
+                            placeholder="Select a task type to load system prompt...",
+                            interactive=True
+                        )
+                with gr.Row():
+                    with gr.Column():
+                        # User input
+                        user_input = gr.Textbox(
+                            label="User Input",
+                            lines=4,
+                            placeholder="Enter your input here...",
+                            interactive=True
+                        )
+                    with gr.Column():
+                        # Model response
+                        model_response = gr.Textbox(
+                            label="Model Response",
+                            lines=8,
+                            interactive=False
+                        )
+                with gr.Row():
+                    submit_btn = gr.Button("🔥 Run Inference", variant="primary", size="lg")
+                    clear_btn = gr.Button("🗑️ Clear", variant="secondary")
+                # Event handlers for Tab 1
+                task_type_dropdown.change(
+                    fn=get_task_by_type,
+                    inputs=[task_type_dropdown],
+                    outputs=[system_prompt]
+                )
+                submit_btn.click(
+                    fn=run_inference,
+                    inputs=[task_type_dropdown, system_prompt, user_input, inference_config],
+                    outputs=[model_response]
+                )
+                clear_btn.click(
+                    fn=lambda: ("", "", ""),
+                    outputs=[system_prompt, user_input, model_response]
+                )
+            # TAB 2: EVAL SAMPLES
+            with gr.Tab("📊 Eval Samples"):
+                gr.Markdown("## Dataset Evaluation Samples")
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        eval_dataset_dropdown = gr.Dropdown(
+                            choices=list(EVAL_DATASETS.keys()),
+                            value=list(EVAL_DATASETS.keys())[0] if EVAL_DATASETS else None,
+                            label="Select Dataset",
+                            info="Choose evaluation dataset to view"
+                        )
+                        eval_dataset_info = gr.Markdown(
+                            get_eval_dataset_info(list(EVAL_DATASETS.keys())[0] if EVAL_DATASETS else "")
+                        )
+                with gr.Row():
+                    eval_table = gr.Dataframe(
+                        value=update_eval_table(list(EVAL_DATASETS.keys())[0]) if EVAL_DATASETS else pd.DataFrame(),
+                        label="Dataset Table",
+                        max_height=800,
+                        min_width=800,
+                        interactive=False,
+                        wrap=True,
+                        show_fullscreen_button=True,
+                        show_copy_button=True,
+                        show_row_numbers=True,
+                        show_search="filter",
+                    )
+                # Event handlers for Tab 2
+                eval_dataset_dropdown.change(
+                    fn=lambda x: (update_eval_table(x), get_eval_dataset_info(x)),
+                    inputs=[eval_dataset_dropdown],
+                    outputs=[eval_table, eval_dataset_info]
+                )
+            # TAB 3: FLAG RESPONSES
+            with gr.Tab("🚩 Flag Responses"):
+                gr.Markdown("## Chat Interface with Response Flagging")
+                with gr.Row():
+                    with gr.Column():
+                        chat_input = gr.Textbox(placeholder="Ask something...", label="Your Message")
+                        with gr.Row():
+                            chat_submit_btn = gr.Button("Send", variant="primary")
+                            chat_clear_btn = gr.Button("Clear History", variant="secondary")
+                    with gr.Column():
+                        chat_display = gr.Chatbot(label="Chat History", height=400)
+                        chat_history_state = gr.State([])
+                gr.Markdown("### Flag Response")
+                with gr.Row():
+                    with gr.Column():
+                        flagged_message_index = gr.Dropdown(
+                            label="Select a response to flag",
+                            choices=["No responses available"],
+                            value="No responses available",
+                            interactive=True
+                        )
+                        selected_message_display = gr.Textbox(
+                            label="Selected Response",
+                            interactive=False,
+                            lines=4
+                        )
+                    with gr.Column():
+                        flag_reason = gr.Textbox(
+                            placeholder="Enter reason for flagging...",
+                            label="Reason for Flagging"
+                        )
+                        flag_btn = gr.Button("Flag Response", variant="stop")
+                        flag_output = gr.Textbox(label="Flagging Feedback", visible=True)
+                # Event handlers for Tab 3
+                chat_submit_btn.click(
+                    chat_interface,
+                    inputs=[chat_input, chat_history_state],
+                    outputs=[chat_display, chat_input]
+                ).then(
+                    get_assistant_responses,
+                    inputs=[chat_history_state],
+                    outputs=[flagged_message_index]
+                )
+                chat_clear_btn.click(
+                    clear_history,
+                    outputs=[chat_display, flagged_message_index]
+                )
+                flagged_message_index.change(
+                    display_selected_message,
+                    inputs=[flagged_message_index, chat_history_state],
+                    outputs=[selected_message_display]
+                )
+                flag_btn.click(
+                    flag_response,
+                    inputs=[chat_history_state, flagged_message_index, flag_reason],
+                    outputs=[flag_output]
+                )
+            # TAB 4: VIEW FLAGGED RESPONSES
+            with gr.Tab("👀 View Flagged Responses"):
+                gr.Markdown("## Review Flagged Responses")
+                with gr.Row():
+                    with gr.Column():
+                        flagged_messages_display = gr.Dataframe(
+                            headers=["Timestamp", "Flag Reason", "Flagged Message", "Conversation Context"],
+                            interactive=False,
+                            max_height=400
+                        )
+                        refresh_btn = gr.Button("🔄 Refresh", variant="primary")
+                    with gr.Column():
+                        conversation_context_display = gr.Chatbot(
+                            label="Conversation Context",
+                            height=400
+                        )
+                # Event handlers for Tab 4
+                flagged_messages_display.select(
+                    handle_row_select,
+                    outputs=[conversation_context_display]
+                )
+                refresh_btn.click(
+                    read_flagged_messages,
+                    outputs=[flagged_messages_display]
+                )
+            # TAB 5: MODEL EVAL RESULTS
+            with gr.Tab("📈 Model Eval Results"):
+                gr.Markdown("## Model Evaluation Results")
+                gr.Markdown("### 🚧 Coming Soon")
+                gr.Markdown(
+                    "This section will display comprehensive model evaluation metrics, charts, and performance analysis.")
+                # Placeholder content
+                with gr.Row():
+                    with gr.Column():
+                        gr.Markdown("#### Evaluation Metrics")
+                        gr.Markdown("- Accuracy scores")
+                        gr.Markdown("- Performance benchmarks")
+                        gr.Markdown("- Comparative analysis")
+                    with gr.Column():
+                        gr.Markdown("#### Visualization")
+                        gr.Markdown("- Performance charts")
+                        gr.Markdown("- Score distributions")
+                        gr.Markdown("- Trend analysis")
+            # TAB 6: ABOUT
+            with gr.Tab("ℹ️ About"):
+                gr.Markdown("## About Loggenix MOE Model")
+                gr.Markdown("""
+                ### Model: `kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v6.2-finetuned-tool`
+                This is a fine-tuned Mixture of Experts (MOE) model designed for specialized AI tasks with tool calling capabilities.
+                #### Key Features:
+                - **Architecture**: MOE with 0.3B total parameters, 0.1B active parameters
+                - **Training**: Fine-tuned with learning rate 7e-5, batch size 16
+                - **Hardware**: Optimized for RTX 4090 GPU
+                - **Capabilities**: Tool calling, instruction following, task-specific responses
+                #### Model Specifications:
+                - **Total Parameters**: 0.3B
+                - **Active Parameters**: 0.1B
+                - **Context Length**: 4096 tokens
+                - **Precision**: FP16 for optimal performance
+                - **Flash Attention**: Supported for faster inference
+                #### Sample Inference Code:
+                ```python
+                from transformers import AutoModelForCausalLM, AutoTokenizer
+                import torch
+                # Load model and tokenizer
+                model_id = "kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v6.2-finetuned-tool"
+                tokenizer = AutoTokenizer.from_pretrained(model_id)
+                model = AutoModelForCausalLM.from_pretrained(
+                    model_id,
+                    device_map="auto",
+                    torch_dtype=torch.float16,
+                    attn_implementation="flash_attention_2"
+                ).eval()
+                # Prepare messages
+                messages = [
+                    {"role": "system", "content": "You are a helpful AI assistant."},
+                    {"role": "user", "content": "Calculate 25 + 37"}
+                ]
+                # Format and generate
+                prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+                inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
+                with torch.no_grad():
+                    outputs = model.generate(
+                        **inputs,
+                        max_new_tokens=512,
+                        do_sample=True,
+                        temperature=0.7,
+                        pad_token_id=tokenizer.pad_token_id
+                    )
+                response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+                print(response)
+                ```
+                #### Tool Calling Support:
+                The model supports structured tool calling for mathematical operations, data analysis, and other specialized tasks.
+                #### Performance Optimizations:
+                - **Speed Mode**: Max 512 new tokens for fast responses
+                - **Balanced Mode**: Max 2048 new tokens for comprehensive answers
+                - **Full Capacity**: Dynamic token allocation up to context limit
+                ---
+                **Developed by**: Kshitij Thakkar
+                **Version**: v6.2
+                **License**: Please check model repository for licensing details
+                """)
+        # Load initial data
+        demo.load(
+            fn=read_flagged_messages,
+            outputs=[flagged_messages_display]
+        )
+    return demo
+# Launch the application
+if __name__ == "__main__":
+    print("Starting AI Tasks Evaluation Suite...")
+    demo = create_interface()
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        debug=True
+    )

enhanced_app.py ADDED Viewed

	@@ -0,0 +1,745 @@

+import gradio as gr
+import pandas as pd
+from datasets import load_dataset
+import plotly.graph_objects as go
+import datetime
+import json
+import random
+import os
+from model_handler import generate_response, get_inference_configs
+import torch
+# Configuration for datasets
+DATASET_CONFIGS = {
+    'Loggenix Synthetic AI Tasks Eval (with outputs)-small': {
+        'repo_id': 'kshitijthakkar/loggenix-synthetic-ai-tasks-eval-with-outputs',
+        'split': 'train'
+    },
+    'Loggenix Synthetic AI Tasks Eval (with outputs) v5-large': {
+        'repo_id': 'kshitijthakkar/loggenix-synthetic-ai-tasks-eval_v5-with-outputs',
+        'split': 'train'
+    },
+    'Loggenix Synthetic AI Tasks Eval (with outputs) v6-large': {
+        'repo_id': 'kshitijthakkar/loggenix-synthetic-ai-tasks-eval_v6-with-outputs',
+        'split': 'train'
+    }
+}
+# Load main dataset for inference tab
+def load_inference_dataset():
+    """Load the main dataset for inference use case"""
+    try:
+        print("Loading synthetic-ai-tasks-eval-v5 dataset...")
+        dataset = load_dataset(
+            'kshitijthakkar/synthetic-ai-tasks-eval-v5',
+            split='train',
+            trust_remote_code=True
+        )
+        df = dataset.to_pandas()
+        print(f"✓ Successfully loaded: {len(df)} rows, {len(df.columns)} columns")
+        return df
+    except Exception as e:
+        print(f"✗ Error loading dataset: {str(e)}")
+        return pd.DataFrame({'Error': [f'Failed to load: {str(e)}']})
+# Load dataset for eval samples tab
+def load_eval_datasets():
+    """Load all datasets for evaluation samples"""
+    datasets = {}
+    for display_name, config in DATASET_CONFIGS.items():
+        try:
+            print(f"Loading {display_name}...")
+            dataset = load_dataset(
+                config['repo_id'],
+                split=config['split'],
+                trust_remote_code=True
+            )
+            df = dataset.to_pandas()
+            datasets[display_name] = df
+            print(f"✓ Successfully loaded {display_name}: {len(df)} rows")
+        except Exception as e:
+            print(f"✗ Error loading {display_name}: {str(e)}")
+            datasets[display_name] = pd.DataFrame({
+                'Error': [f'Failed to load: {str(e)}'],
+                'Dataset': [config['repo_id']]
+            })
+    return datasets
+# Load datasets
+INFERENCE_DATASET = load_inference_dataset()
+EVAL_DATASETS = load_eval_datasets()
+# ===== TAB 1: INFERENCE USE CASE WITH INTEGRATED FLAGGING =====
+def get_task_types():
+    """Get unique task types from inference dataset"""
+    if 'task_type' in INFERENCE_DATASET.columns:
+        task_types = INFERENCE_DATASET['task_type'].unique().tolist()
+        return [str(t) for t in task_types if pd.notna(t)]
+    return ["No task types available"]
+def get_task_by_type(task_type):
+    """Get task content by task type"""
+    if 'task_type' in INFERENCE_DATASET.columns and 'task' in INFERENCE_DATASET.columns:
+        filtered = INFERENCE_DATASET[INFERENCE_DATASET['task_type'] == task_type]
+        if len(filtered) > 0:
+            return str(filtered.iloc[0]['task'])
+    return "No task found for this type"
+def chat_interface_with_inference(prompt, history, system_prompt, inference_config):
+    """Enhanced chat interface with model inference and history"""
+    if not prompt.strip():
+        return history, ""
+    # Add user message to history
+    history.append(("You", prompt))
+    try:
+        if not system_prompt.strip():
+            response = "Please select a task type to load system prompt first."
+        else:
+            # Get inference configuration
+            configs = get_inference_configs()
+            config = configs.get(inference_config, configs["Optimized for Speed"])
+            # Run inference using the model
+            response = generate_response(
+                system_prompt=system_prompt,
+                user_input=prompt,
+                config_name=inference_config
+            )
+        # Format and add AI response to history
+        formatted_response = f"**AI Assistant:**\n{response}"
+        history.append(("AI Assistant", formatted_response))
+    except Exception as e:
+        error_msg = f"**AI Assistant:**\nError during inference: {str(e)}"
+        history.append(("AI Assistant", error_msg))
+    return history, ""
+def flag_response(history, flagged_message, flag_reason):
+    """Flag a response"""
+    if not flagged_message or flagged_message == "No responses available":
+        return "Invalid message selection."
+    try:
+        flagged_index = int(flagged_message.split()[1][:-1])
+        if flagged_index >= len(history) or history[flagged_index][0] != "AI Assistant":
+            return "You can only flag assistant responses."
+        flagged_message_content = history[flagged_index][1]
+        log_entry = {
+            "timestamp": datetime.datetime.now().isoformat(),
+            "flag_reason": str(flag_reason),
+            "flagged_message": str(flagged_message_content),
+            "conversation_context": history,
+        }
+        os.makedirs("logs", exist_ok=True)
+        with open("logs/flagged_responses.log", "a") as f:
+            f.write(json.dumps(log_entry) + "\n")
+        return f"Response flagged successfully: {flag_reason}"
+    except Exception as e:
+        return f"Error flagging response: {str(e)}"
+def get_assistant_responses(history):
+    """Get dropdown options for assistant responses"""
+    responses = [
+        f"Response {i}: {str(msg[1])[:50]}..."
+        for i, msg in enumerate(history)
+        if msg[0] == "AI Assistant"
+    ]
+    if not responses:
+        responses = ["No responses available"]
+    return gr.update(choices=responses, value=responses[0] if responses else "No responses available")
+def display_selected_message(selected_index, history):
+    """Display the selected flagged message"""
+    if selected_index == "No responses available":
+        return "No responses available"
+    try:
+        flagged_index = int(selected_index.split()[1][:-1])
+        if flagged_index < len(history) and history[flagged_index][0] == "AI Assistant":
+            return history[flagged_index][1]
+        else:
+            return "Invalid selection."
+    except Exception as e:
+        return f"Error: {str(e)}"
+def clear_inference_history():
+    """Clear chat history for inference tab"""
+    return [], gr.update(choices=["No responses available"], value="No responses available")
+# ===== TAB 2: EVAL SAMPLES =====
+def update_eval_table(dataset_name):
+    """Update eval table based on selected dataset"""
+    if dataset_name in EVAL_DATASETS:
+        return EVAL_DATASETS[dataset_name].head(100)
+    return pd.DataFrame()
+def get_eval_dataset_info(dataset_name):
+    """Get info about selected eval dataset"""
+    if dataset_name in EVAL_DATASETS:
+        df = EVAL_DATASETS[dataset_name]
+        return f"""
+        **Dataset**: {dataset_name}
+        - **Rows**: {len(df):,}
+        - **Columns**: {len(df.columns)}
+        - **Column Names**: {', '.join(df.columns.tolist())}
+        """
+    return "No dataset selected"
+def get_task_types_for_eval(dataset_name):
+    """Get unique task types from selected eval dataset"""
+    if dataset_name in EVAL_DATASETS and 'task_type' in EVAL_DATASETS[dataset_name].columns:
+        task_types = EVAL_DATASETS[dataset_name]['task_type'].unique().tolist()
+        return [str(t) for t in task_types if pd.notna(t)]
+    return ["No task types available"]
+def get_tasks_by_type_eval(dataset_name, task_type):
+    """Get tasks filtered by dataset and task type"""
+    if (dataset_name in EVAL_DATASETS and
+            'task_type' in EVAL_DATASETS[dataset_name].columns and
+            'task' in EVAL_DATASETS[dataset_name].columns):
+        filtered = EVAL_DATASETS[dataset_name][EVAL_DATASETS[dataset_name]['task_type'] == task_type]
+        if len(filtered) > 0:
+            # Create display options with index and truncated task content
+            tasks = []
+            for idx, row in filtered.iterrows():
+                task_preview = str(row['task'])[:100] + "..." if len(str(row['task'])) > 100 else str(row['task'])
+                tasks.append(f"Row {idx}: {task_preview}")
+            return tasks
+    return ["No tasks found"]
+def get_selected_row_data(dataset_name, task_type, selected_task):
+    """Get all data for the selected row"""
+    if not selected_task or selected_task == "No tasks found":
+        return "", "", "", "", "", "",""
+    try:
+        # Extract row index from selected_task
+        row_idx = int(selected_task.split("Row ")[1].split(":")[0])
+        if dataset_name in EVAL_DATASETS:
+            df = EVAL_DATASETS[dataset_name]
+            if row_idx in df.index:
+                row = df.loc[row_idx]
+                # Extract all fields with safe handling for missing columns
+                task = str(row.get('task', 'N/A'))
+                task_type_val = str(row.get('task_type', 'N/A'))
+                input_model = str(row.get('input_model', 'N/A'))
+                expected_response = str(row.get('expected_response', 'N/A'))
+                loggenix_output = str(row.get('loggenix_output', 'N/A'))
+                output_model = str(row.get('output_model', 'N/A'))
+                input_text = str(row.get('input', 'N/A'))
+                return task_type_val, input_model, output_model, task, input_text, expected_response, loggenix_output
+    except Exception as e:
+        return f"Error: {str(e)}", "", "", "", "", "", "", ""
+    return "", "", "", "", "", "", ""
+# ===== TAB 3: VIEW FLAGGED RESPONSES =====
+def read_flagged_messages():
+    """Read flagged messages from log file"""
+    try:
+        if not os.path.exists("logs/flagged_responses.log"):
+            return pd.DataFrame()
+        with open("logs/flagged_responses.log", "r") as f:
+            flagged_messages = f.readlines()
+        if not flagged_messages:
+            return pd.DataFrame()
+        table_data = []
+        for entry in flagged_messages:
+            data = json.loads(entry)
+            table_data.append({
+                "Timestamp": data.get("timestamp", "N/A"),
+                "Flag Reason": data.get("flag_reason", "N/A"),
+                "Flagged Message": data.get("flagged_message", "N/A")[:100] + "...",
+                "Conversation Context": str(len(data.get("conversation_context", []))) + " messages"
+            })
+        return pd.DataFrame(table_data)
+    except Exception as e:
+        return pd.DataFrame({"Error": [f"Error reading flagged messages: {str(e)}"]})
+def handle_row_select(evt: gr.SelectData):
+    """Handle row selection in flagged messages table"""
+    try:
+        if not os.path.exists("logs/flagged_responses.log"):
+            return []
+        with open("logs/flagged_responses.log", "r") as f:
+            flagged_messages_log = f.readlines()
+        if evt.index[0] < len(flagged_messages_log):
+            selected_entry = json.loads(flagged_messages_log[evt.index[0]])
+            conversation_context = selected_entry.get("conversation_context", [])
+            return conversation_context
+        return []
+    except Exception as e:
+        return [("System", f"Error loading conversation: {str(e)}")]
+# ===== MAIN INTERFACE =====
+def create_interface():
+    with gr.Blocks(title="AI Tasks Evaluation Suite", theme=gr.themes.Soft()) as demo:
+        gr.Markdown("# 🤖 AI Tasks Evaluation Suite")
+        gr.Markdown("Comprehensive platform for AI model evaluation and testing")
+        with gr.Tabs():
+            # TAB 1: INFERENCE USE CASE WITH INTEGRATED FLAGGING
+            with gr.Tab("🚀 Inference Use Case"):
+                gr.Markdown("## Model Inference Testing with Response Flagging")
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        # Task type dropdown
+                        task_type_dropdown = gr.Dropdown(
+                            choices=get_task_types(),
+                            value=get_task_types()[0] if get_task_types() else None,
+                            label="Task Type",
+                            info="Select task type to load system prompt"
+                        )
+                        # Inference configuration
+                        inference_config = gr.Dropdown(
+                            choices=list(get_inference_configs().keys()),
+                            value="Optimized for Speed",
+                            label="Inference Configuration",
+                            info="Select inference optimization level"
+                        )
+                    with gr.Column(scale=2):
+                        # System prompt (editable)
+                        system_prompt = gr.Textbox(
+                            label="System Prompt (Editable)",
+                            lines=6,
+                            max_lines=10,
+                            placeholder="Select a task type to load system prompt...",
+                            interactive=True
+                        )
+                # Chat interface section
+                gr.Markdown("### 💬 Chat Interface")
+                with gr.Row():
+                    with gr.Column(scale=2):
+                        # Chat display (replacing the old textbox)
+                        chat_display = gr.Chatbot(label="Conversation History", height=400)
+                        chat_history_state = gr.State([])
+                        # Chat input
+                        with gr.Row():
+                            chat_input = gr.Textbox(
+                                placeholder="Enter your message here...",
+                                label="Your Message",
+                                scale=4
+                            )
+                            send_btn = gr.Button("Send", variant="primary", scale=1)
+                        with gr.Row():
+                            clear_chat_btn = gr.Button("🗑️ Clear History", variant="secondary")
+                    # Flagging section
+                    with gr.Column(scale=1):
+                        gr.Markdown("### 🚩 Flag Response")
+                        flagged_message_index = gr.Dropdown(
+                            label="Select a response to flag",
+                            choices=["No responses available"],
+                            value="No responses available",
+                            interactive=True
+                        )
+                        selected_message_display = gr.Textbox(
+                            label="Selected Response",
+                            interactive=False,
+                            lines=4,
+                            max_lines=6
+                        )
+                        flag_reason = gr.Textbox(
+                            placeholder="Enter reason for flagging...",
+                            label="Reason for Flagging"
+                        )
+                        flag_btn = gr.Button("🚩 Flag Response", variant="stop")
+                        flag_output = gr.Textbox(label="Flagging Status", visible=True, lines=2)
+                # Event handlers for Tab 1
+                task_type_dropdown.change(
+                    fn=get_task_by_type,
+                    inputs=[task_type_dropdown],
+                    outputs=[system_prompt]
+                )
+                # Chat functionality
+                send_btn.click(
+                    chat_interface_with_inference,
+                    inputs=[chat_input, chat_history_state, system_prompt, inference_config],
+                    outputs=[chat_display, chat_input]
+                ).then(
+                    lambda x: x,  # Update state
+                    inputs=[chat_display],
+                    outputs=[chat_history_state]
+                ).then(
+                    get_assistant_responses,
+                    inputs=[chat_history_state],
+                    outputs=[flagged_message_index]
+                )
+                # Enter key support for chat input
+                chat_input.submit(
+                    chat_interface_with_inference,
+                    inputs=[chat_input, chat_history_state, system_prompt, inference_config],
+                    outputs=[chat_display, chat_input]
+                ).then(
+                    lambda x: x,  # Update state
+                    inputs=[chat_display],
+                    outputs=[chat_history_state]
+                ).then(
+                    get_assistant_responses,
+                    inputs=[chat_history_state],
+                    outputs=[flagged_message_index]
+                )
+                clear_chat_btn.click(
+                    clear_inference_history,
+                    outputs=[chat_display, flagged_message_index]
+                ).then(
+                    lambda: [],
+                    outputs=[chat_history_state]
+                )
+                # Flagging functionality
+                flagged_message_index.change(
+                    display_selected_message,
+                    inputs=[flagged_message_index, chat_history_state],
+                    outputs=[selected_message_display]
+                )
+                flag_btn.click(
+                    flag_response,
+                    inputs=[chat_history_state, flagged_message_index, flag_reason],
+                    outputs=[flag_output]
+                )
+            # TAB 2: EVAL SAMPLES
+            with gr.Tab("📊 Eval Samples"):
+                gr.Markdown("## Dataset Evaluation Samples")
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        eval_dataset_dropdown = gr.Dropdown(
+                            choices=list(EVAL_DATASETS.keys()),
+                            value=list(EVAL_DATASETS.keys())[0] if EVAL_DATASETS else None,
+                            label="Select Dataset",
+                            info="Choose evaluation dataset to view"
+                        )
+                        eval_dataset_info = gr.Markdown(
+                            get_eval_dataset_info(list(EVAL_DATASETS.keys())[0] if EVAL_DATASETS else "")
+                        )
+                with gr.Row():
+                    eval_table = gr.Dataframe(
+                        value=update_eval_table(list(EVAL_DATASETS.keys())[0]) if EVAL_DATASETS else pd.DataFrame(),
+                        label="Dataset Table",
+                        max_height=800,
+                        min_width=800,
+                        interactive=True,
+                        wrap=True,
+                        show_fullscreen_button=True,
+                        show_copy_button=True,
+                        show_row_numbers=True,
+                        show_search="search",
+                        column_widths=["80px","80px","80px","150px","250px","250px","250px"]
+                    )
+                # Event handlers for Tab 2
+                eval_dataset_dropdown.change(
+                    fn=lambda x: (update_eval_table(x), get_eval_dataset_info(x)),
+                    inputs=[eval_dataset_dropdown],
+                    outputs=[eval_table, eval_dataset_info]
+                )
+            with gr.Tab("📊 Eval Samples 2"):
+                gr.Markdown("## Dataset Evaluation Samples")
+                gr.Markdown("Select dataset, task type, and specific task to view detailed information")
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        eval_dataset_dropdown = gr.Dropdown(
+                            choices=list(EVAL_DATASETS.keys()),
+                            value=list(EVAL_DATASETS.keys())[0] if EVAL_DATASETS else None,
+                            label="Select Dataset",
+                            info="Choose evaluation dataset to view"
+                        )
+                        eval_task_type_dropdown = gr.Dropdown(
+                            choices=[],
+                            label="Select Task Type",
+                            info="Choose task type from selected dataset"
+                        )
+                        eval_task_dropdown = gr.Dropdown(
+                            choices=[],
+                            label="Select Specific Task",
+                            info="Choose specific task to view details"
+                        )
+                    with gr.Column(scale=1):
+                        eval_dataset_info = gr.Markdown(
+                            get_eval_dataset_info(list(EVAL_DATASETS.keys())[0] if EVAL_DATASETS else "")
+                        )
+                # Task details section
+                gr.Markdown("### Task Details")
+                with gr.Row():
+                    with gr.Column():
+                        task_field = gr.Textbox(
+                            label="Task",
+                            lines=8,
+                            max_lines=12,
+                            interactive=False
+                        )
+                        task_type_field = gr.Textbox(
+                            label="Task Type",
+                            lines=1,
+                            interactive=False
+                        )
+                        input_model_field = gr.Textbox(
+                            label="input_model",
+                            lines=1,
+                            interactive=False
+                        )
+                        input_field = gr.Textbox(
+                            label="input",
+                            lines=8,
+                            max_lines=12,
+                            interactive=False
+                        )
+                        output_model_field = gr.Textbox(
+                            label="output_model",
+                            lines=1,
+                            interactive=False
+                        )
+                # Large text fields for outputs side by side
+                gr.Markdown("### Expected vs Actual Response Comparison")
+                with gr.Row():
+                    expected_response_field = gr.Textbox(
+                        label="Expected Response",
+                        lines=30,
+                        max_lines=40,
+                        interactive=False
+                    )
+                    loggenix_output_field = gr.Textbox(
+                        label="Loggenix Output",
+                        lines=30,
+                        max_lines=40,
+                        interactive=False
+                    )
+                # Event handlers for Tab 2
+                eval_dataset_dropdown.change(
+                    fn=lambda x: (get_eval_dataset_info(x), get_task_types_for_eval(x), []),
+                    inputs=[eval_dataset_dropdown],
+                    outputs=[eval_dataset_info, eval_task_type_dropdown, eval_task_dropdown]
+                )
+                eval_task_type_dropdown.change(
+                    fn=get_tasks_by_type_eval,
+                    inputs=[eval_dataset_dropdown, eval_task_type_dropdown],
+                    outputs=[eval_task_dropdown]
+                )
+                eval_task_dropdown.change(
+                    fn=get_selected_row_data,
+                    inputs=[eval_dataset_dropdown, eval_task_type_dropdown, eval_task_dropdown],
+                    outputs=[task_type_field, input_model_field, output_model_field, task_field, input_field,
+                             loggenix_output_field, expected_response_field]
+                )
+            # TAB 3: VIEW FLAGGED RESPONSES (RENAMED FROM TAB 4)
+            with gr.Tab("👀 View Flagged Responses"):
+                gr.Markdown("## Review Flagged Responses")
+                with gr.Row():
+                    with gr.Column():
+                        flagged_messages_display = gr.Dataframe(
+                            headers=["Timestamp", "Flag Reason", "Flagged Message", "Conversation Context"],
+                            interactive=False,
+                            max_height=400
+                        )
+                        refresh_btn = gr.Button("🔄 Refresh", variant="primary")
+                    with gr.Column():
+                        conversation_context_display = gr.Chatbot(
+                            label="Conversation Context",
+                            height=400
+                        )
+                # Event handlers for Tab 3
+                flagged_messages_display.select(
+                    handle_row_select,
+                    outputs=[conversation_context_display]
+                )
+                refresh_btn.click(
+                    read_flagged_messages,
+                    outputs=[flagged_messages_display]
+                )
+            # TAB 4: MODEL EVAL RESULTS (MOVED FROM TAB 5)
+            with gr.Tab("📈 Model Eval Results"):
+                gr.Markdown("## Model Evaluation Results")
+                gr.Markdown("### 🚧 Coming Soon")
+                gr.Markdown(
+                    "This section will display comprehensive model evaluation metrics, charts, and performance analysis.")
+                # Placeholder content
+                with gr.Row():
+                    with gr.Column():
+                        gr.Markdown("#### Evaluation Metrics")
+                        gr.Markdown("- Accuracy scores")
+                        gr.Markdown("- Performance benchmarks")
+                        gr.Markdown("- Comparative analysis")
+                    with gr.Column():
+                        gr.Markdown("#### Visualization")
+                        gr.Markdown("- Performance charts")
+                        gr.Markdown("- Score distributions")
+                        gr.Markdown("- Trend analysis")
+            # TAB 5: ABOUT (MOVED FROM TAB 6)
+            with gr.Tab("ℹ️ About"):
+                gr.Markdown("## About Loggenix MOE Model")
+                gr.Markdown("""
+                ### Model: `kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v6.2-finetuned-tool`
+                This is a fine-tuned Mixture of Experts (MOE) model designed for specialized AI tasks with tool calling capabilities.
+                #### Key Features:
+                - **Architecture**: MOE with 0.3B total parameters, 0.1B active parameters
+                - **Training**: Fine-tuned with learning rate 7e-5, batch size 16
+                - **Hardware**: Optimized for RTX 4090 GPU
+                - **Capabilities**: Tool calling, instruction following, task-specific responses
+                #### Model Specifications:
+                - **Total Parameters**: 0.3B
+                - **Active Parameters**: 0.1B
+                - **Context Length**: 4096 tokens
+                - **Precision**: FP16 for optimal performance
+                - **Flash Attention**: Supported for faster inference
+                #### Sample Inference Code:
+                ```python
+                from transformers import AutoModelForCausalLM, AutoTokenizer
+                import torch
+                # Load model and tokenizer
+                model_id = "kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v6.2-finetuned-tool"
+                tokenizer = AutoTokenizer.from_pretrained(model_id)
+                model = AutoModelForCausalLM.from_pretrained(
+                    model_id,
+                    device_map="auto",
+                    torch_dtype=torch.float16,
+                    attn_implementation="flash_attention_2"
+                ).eval()
+                # Prepare messages
+                messages = [
+                    {"role": "system", "content": "You are a helpful AI assistant."},
+                    {"role": "user", "content": "Calculate 25 + 37"}
+                ]
+                # Format and generate
+                prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+                inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
+                with torch.no_grad():
+                    outputs = model.generate(
+                        **inputs,
+                        max_new_tokens=512,
+                        do_sample=True,
+                        temperature=0.7,
+                        pad_token_id=tokenizer.pad_token_id
+                    )
+                response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+                print(response)
+                ```
+                #### Tool Calling Support:
+                The model supports structured tool calling for mathematical operations, data analysis, and other specialized tasks.
+                #### Performance Optimizations:
+                - **Speed Mode**: Max 512 new tokens for fast responses
+                - **Balanced Mode**: Max 2048 new tokens for comprehensive answers
+                - **Full Capacity**: Dynamic token allocation up to context limit
+                ---
+                **Developed by**: Kshitij Thakkar
+                **Version**: v6.2
+                **License**: Please check model repository for licensing details
+                """)
+        # Load initial data
+        demo.load(
+            fn=read_flagged_messages,
+            outputs=[flagged_messages_display]
+        )
+    return demo
+# Launch the application
+if __name__ == "__main__":
+    print("Starting AI Tasks Evaluation Suite...")
+    demo = create_interface()
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        debug=True
+    )

model_handler.py ADDED Viewed

	@@ -0,0 +1,434 @@

+import torch
+import time
+import gc
+import json
+import re
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+from typing import Dict, Any, Optional
+# Performance optimizations
+torch.backends.cudnn.benchmark = True
+torch.backends.cuda.matmul.allow_tf32 = True
+torch.backends.cudnn.allow_tf32 = True
+# Global model and tokenizer variables
+model = None
+tokenizer = None
+MODEL_ID = "kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v6.2-finetuned-tool"
+# Inference configurations
+INFERENCE_CONFIGS = {
+    "Optimized for Speed": {
+        "max_new_tokens_base": 512,
+        "max_new_tokens_cap": 512,
+        "min_tokens": 50,
+        "temperature": 0.7,
+        "top_p": 0.9,
+        "do_sample": True,
+        "use_cache": False,
+        "description": "Fast responses with limited output length"
+    },
+    "Middle-ground": {
+        "max_new_tokens_base": 2048,
+        "max_new_tokens_cap": 2048,
+        "min_tokens": 50,
+        "temperature": 0.7,
+        "top_p": 0.9,
+        "do_sample": True,
+        "use_cache": False,
+        "description": "Balanced performance and output quality"
+    },
+    "Full Capacity": {
+        "max_new_tokens_base": 4096,
+        "max_new_tokens_cap": 4096,
+        "min_tokens": 1,
+        "temperature": 0.7,
+        "top_p": 0.9,
+        "do_sample": True,
+        "use_cache": False,
+        "description": "Maximum output length with dynamic allocation"
+    }
+}
+def get_inference_configs():
+    """Get available inference configurations"""
+    return INFERENCE_CONFIGS
+def load_model():
+    """Load model and tokenizer with optimizations"""
+    global model, tokenizer
+    if model is not None and tokenizer is not None:
+        return model, tokenizer
+    print("Loading tokenizer...")
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+    ## load 8 bit quants
+    quantization_config = BitsAndBytesConfig(
+        load_in_8bit=True,
+        llm_int8_threshold=6.0,
+        llm_int8_has_fp16_weight=False,
+    )
+    # # Or 4-bit for even more memory savings
+    # quantization_config = BitsAndBytesConfig(
+    #     load_in_4bit=True,
+    #     bnb_4bit_compute_dtype=torch.float16,
+    #     bnb_4bit_quant_type="nf4",
+    #     bnb_4bit_use_double_quant=True,
+    # )
+    print("Loading model...")
+    model = AutoModelForCausalLM.from_pretrained(
+        MODEL_ID,
+        device_map="auto",
+        dtype=torch.float16,  # Use half precision for speed
+        attn_implementation="flash_attention_2" if hasattr(torch.nn, 'scaled_dot_product_attention') else None,
+        use_cache=True,
+        quantization_config=quantization_config,
+    ).eval()
+    # Enable gradient checkpointing if available
+    if hasattr(model, 'gradient_checkpointing_enable'):
+        model.gradient_checkpointing_enable()
+    # Set pad_token_id
+    if model.config.pad_token_id is None and tokenizer.pad_token_id is not None:
+        model.config.pad_token_id = tokenizer.pad_token_id
+    # Set padding side to left for better batching
+    tokenizer.padding_side = "left"
+    memory = model.get_memory_footprint() / 1e6
+    print(f"Memory footprint: {memory:,.1f} MB")
+    return model, tokenizer
+# ===== TOOL DEFINITIONS =====
+def calculate_numbers(operation: str, num1: float, num2: float) -> Dict[str, Any]:
+    """
+    Sample tool to perform basic mathematical operations on two numbers.
+    Args:
+        operation: The operation to perform ('add', 'subtract', 'multiply', 'divide')
+        num1: First number
+        num2: Second number
+    Returns:
+        Dictionary with result and operation details
+    """
+    try:
+        num1, num2 = float(num1), float(num2)
+        if operation.lower() == 'add':
+            result = num1 + num2
+        elif operation.lower() == 'subtract':
+            result = num1 - num2
+        elif operation.lower() == 'multiply':
+            result = num1 * num2
+        elif operation.lower() == 'divide':
+            if num2 == 0:
+                return {"error": "Division by zero is not allowed"}
+            result = num1 / num2
+        else:
+            return {"error": f"Unknown operation: {operation}"}
+        return {
+            "result": result,
+            "operation": operation,
+            "operands": [num1, num2],
+            "formatted": f"{num1} {operation} {num2} = {result}"
+        }
+    except ValueError as e:
+        return {"error": f"Invalid number format: {str(e)}"}
+    except Exception as e:
+        return {"error": f"Calculation error: {str(e)}"}
+# Tool registry
+AVAILABLE_TOOLS = {
+    "calculate_numbers": {
+        "function": calculate_numbers,
+        "description": "Perform basic mathematical operations (add, subtract, multiply, divide) on two numbers",
+        "parameters": {
+            "operation": "The mathematical operation to perform",
+            "num1": "First number",
+            "num2": "Second number"
+        }
+    }
+}
+def execute_tool_call(tool_name: str, **kwargs) -> Dict[str, Any]:
+    """Execute a tool call with given parameters"""
+    print(f"Executing tool: {tool_name} with parameters: {kwargs}")
+    if tool_name not in AVAILABLE_TOOLS:
+        return {"error": f"Unknown tool: {tool_name}"}
+    try:
+        tool_function = AVAILABLE_TOOLS[tool_name]["function"]
+        result = tool_function(**kwargs)
+        return {
+            "tool_name": tool_name,
+            "parameters": kwargs,
+            "result": result
+        }
+    except Exception as e:
+        print(f"Tool execution failed: {str(e)}")
+        return {
+            "tool_name": tool_name,
+            "parameters": kwargs,
+            "error": f"Tool execution error: {str(e)}"
+        }
+# def parse_tool_calls(text: str) -> list:
+#     """
+#     Parse tool calls from model output.
+#     Expected format: [TOOL_CALL:tool_name(param1=value1, param2=value2)]
+#     """
+#     tool_calls = []
+#     #pattern = r'\[TOOL_CALL:(\w+)\((.*?)\)\]'
+#     pattern = r'(\[TOOL_CALL:(\w+)\((.*?)\)\]|<tool_call>\s*{"name":\s*"(\w+)",\s*"parameters":\s*{([^}]*)}\s*}\s*</tool_call>)'
+#     matches = re.findall(pattern, text)
+#     print(matches)
+#
+#     for tool_name, params_str in matches:
+#         try:
+#             params = {}
+#             if params_str.strip():
+#                 param_pairs = params_str.split(',')
+#                 for pair in param_pairs:
+#                     if '=' in pair:
+#                         key, value = pair.split('=', 1)
+#                         key = key.strip()
+#                         value = value.strip().strip('"\'')  # Remove quotes
+#                         params[key] = value
+#             tool_calls.append({
+#                 "tool_name": tool_name,
+#                 "parameters": params,
+#                 "original_call": f"[TOOL_CALL:{tool_name}({params_str})]"  # Store original call for replacement
+#             })
+#         except Exception as e:
+#             print(f"Error parsing tool call '{tool_name}({params_str})': {e}")
+#             continue
+#
+#     return tool_calls
+def parse_tool_calls(text: str) -> list:
+    """
+    Parse tool calls from model output.
+    Supports both formats:
+    - [TOOL_CALL:tool_name(param1=value1, param2=value2)]
+    - <tool_call>{"name": "tool_name", "parameters": {"param1": "value1", "param2": "value2"}}</tool_call>
+    """
+    tool_calls = []
+    # Pattern for both formats
+    pattern = r'(\[TOOL_CALL:(\w+)\((.*?)\)\]|<tool_call>\s*{"name":\s*"(\w+)",\s*"parameters":\s*{([^}]*)}\s*}\s*</tool_call>)'
+    matches = re.findall(pattern, text)
+    print("Raw matches:", matches)
+    for match in matches:
+        full_match, old_tool_name, old_params, json_tool_name, json_params = match
+        # Determine which format was matched
+        if old_tool_name:  # Old format: [TOOL_CALL:tool_name(params)]
+            tool_name = old_tool_name
+            params_str = old_params
+            original_call = f"[TOOL_CALL:{tool_name}({params_str})]"
+            try:
+                params = {}
+                if params_str.strip():
+                    param_pairs = params_str.split(',')
+                    for pair in param_pairs:
+                        if '=' in pair:
+                            key, value = pair.split('=', 1)
+                            key = key.strip()
+                            value = value.strip().strip('"\'')  # Remove quotes
+                            params[key] = value
+                tool_calls.append({
+                    "tool_name": tool_name,
+                    "parameters": params,
+                    "original_call": original_call
+                })
+            except Exception as e:
+                print(f"Error parsing old format tool call '{tool_name}({params_str})': {e}")
+                continue
+        elif json_tool_name:  # JSON format: <tool_call>...</tool_call>
+            tool_name = json_tool_name
+            params_str = json_params
+            original_call = full_match
+            try:
+                params = {}
+                if params_str.strip():
+                    # Parse JSON-like parameters
+                    # Handle the format: "operation": "add", "num1": "125", "num2": "675"
+                    param_pairs = params_str.split(',')
+                    for pair in param_pairs:
+                        if ':' in pair:
+                            key, value = pair.split(':', 1)
+                            key = key.strip().strip('"\'')  # Remove quotes and whitespace
+                            value = value.strip().strip('"\'')  # Remove quotes and whitespace
+                            params[key] = value
+                tool_calls.append({
+                    "tool_name": tool_name,
+                    "parameters": params,
+                    "original_call": original_call
+                })
+            except Exception as e:
+                print(f"Error parsing JSON format tool call '{tool_name}': {e}")
+                continue
+    return tool_calls
+def process_tool_calls(text: str) -> str:
+    """Process tool calls in the generated text and replace with results"""
+    tool_calls = parse_tool_calls(text)
+    if not tool_calls:
+        return text
+    processed_text = text
+    for tool_call in tool_calls:
+        tool_name = tool_call["tool_name"]
+        parameters = tool_call["parameters"]
+        original_call = tool_call["original_call"]
+        try:
+            # Validate parameters before execution
+            if not isinstance(parameters, dict):
+                raise ValueError(f"Invalid parameters for tool {tool_name}: {parameters}")
+            # Execute tool
+            result = execute_tool_call(tool_name, **parameters)
+            # Create replacement text
+            if "error" in result:
+                replacement = f"[TOOL_ERROR: {result['error']}]"
+            else:
+                if "result" in result["result"]:
+                    replacement = f"[TOOL_RESULT: {result['result']['formatted']}]"
+                else:
+                    replacement = f"[TOOL_RESULT: {result['result']}]"
+            # Replace tool call with result
+            processed_text = processed_text.replace(original_call, replacement)
+        except Exception as e:
+            print(f"Error processing tool call '{tool_name}': {e}")
+            replacement = f"[TOOL_ERROR: Failed to process tool call: {str(e)}]"
+            processed_text = processed_text.replace(original_call, replacement)
+    return processed_text
+def monitor_memory():
+    if torch.cuda.is_available():
+        allocated = torch.cuda.memory_allocated() / 1e9
+        cached = torch.cuda.memory_reserved() / 1e9
+        print(f"GPU Memory - Allocated: {allocated:.2f}GB, Cached: {cached:.2f}GB")
+def generate_response(system_prompt: str, user_input: str, config_name: str = "Middle-ground") -> str:
+    """
+    Run inference with the given task (system prompt) and user input using the specified config.
+    """
+    load_model()
+    config = INFERENCE_CONFIGS[config_name]
+    input_messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": user_input}
+    ]
+    prompt_text = tokenizer.apply_chat_template(
+        input_messages,
+        tokenize=False,
+        add_generation_prompt=True
+    )
+    input_length = len(tokenizer.encode(prompt_text))
+    context_length = min(input_length, 3584)  # Leave room for generation
+    inputs = tokenizer(
+        prompt_text,
+        return_tensors="pt",
+        truncation=True,
+        max_length=context_length,
+        padding=False
+    ).to(model.device)
+    actual_input_length = inputs['input_ids'].shape[1]
+    max_new_tokens = min(config["max_new_tokens_cap"], 4096 - actual_input_length - 10)
+    max_new_tokens = max(config["min_tokens"], max_new_tokens)
+    with torch.no_grad():
+        start_time = time.time()
+        outputs = model.generate(
+            **inputs,
+            do_sample=config["do_sample"],
+            temperature=config["temperature"],
+            top_p=config["top_p"],
+            use_cache=config["use_cache"],
+            max_new_tokens=max_new_tokens,
+            pad_token_id=tokenizer.pad_token_id,
+            eos_token_id=tokenizer.eos_token_id,
+            # Memory optimizations
+            output_attentions=False,
+            output_hidden_states=False,
+            return_dict_in_generate=False,
+        )
+        inference_time = time.time() - start_time
+        print(f"Inference time: {inference_time:.2f} seconds")
+        memory = model.get_memory_footprint() / 1e6
+        monitor_memory()
+        print(f"Memory footprint: {memory:,.1f} MB")
+    # Clean up
+    gc.collect()
+    full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    if prompt_text in full_text:
+        response_start = full_text.find(prompt_text) + len(prompt_text)
+        generated_response = full_text[response_start:].strip()
+    else:
+        # More robust fallback: try to extract response after the last user message
+        generated_response = full_text.strip()
+        try:
+            # Look for common assistant/response indicators
+            response_indicators = ["Assistant:", "<|assistant|>", "[/INST]", "Response:"]
+            for indicator in response_indicators:
+                if indicator in full_text:
+                    parts = full_text.split(indicator)
+                    if len(parts) > 1:
+                        generated_response = parts[-1].strip()
+                        break
+            # If no indicator found, try to remove the input part
+            user_message = user_input
+            if user_message in full_text:
+                parts = full_text.split(user_message)
+                if len(parts) > 1:
+                    generated_response = parts[-1].strip()
+        except Exception:
+            generated_response = full_text.strip()
+    # Process any tool calls in the generated response
+    generated_response = process_tool_calls(generated_response)
+   # print('Input tokens:', inputs.input_ids.numel())
+    #print('Output tokens:', outputs.input_ids.numel())
+   # print('Output tokens:', outputs['input_ids'].numel())
+    return generated_response

requirements.txt ADDED Viewed

Binary file (3.03 kB). View file