|
|
|
""" |
|
Enhanced LLM Compatibility Advisor - Complete with Quantization & Advanced Features |
|
Author: Assistant |
|
Description: Comprehensive device-based LLM recommendations with quantization, comparison, and download assistance |
|
Requirements: streamlit, pandas, plotly, openpyxl |
|
""" |
|
|
|
import streamlit as st |
|
import pandas as pd |
|
import numpy as np |
|
import re |
|
import plotly.express as px |
|
import plotly.graph_objects as go |
|
from typing import Optional, Tuple, List, Dict |
|
import json |
|
|
|
|
|
st.set_page_config( |
|
page_title="Enhanced LLM Compatibility Advisor", |
|
layout="wide", |
|
page_icon="๐ง ", |
|
initial_sidebar_state="expanded" |
|
) |
|
|
|
|
|
@st.cache_data |
|
def load_data(): |
|
paths = [ |
|
"src/BITS_INTERNS.xlsx", |
|
"src/Summer of AI - ICFAI (Responses) (3).xlsx" |
|
] |
|
|
|
combined_df = pd.DataFrame() |
|
for path in paths: |
|
try: |
|
df = pd.read_excel(path, sheet_name="Form Responses 1") |
|
df.columns = df.columns.str.strip() |
|
combined_df = pd.concat([combined_df, df], ignore_index=True) |
|
except FileNotFoundError: |
|
return None, f"Excel file '{path}' not found. Please upload the file." |
|
except Exception as e: |
|
return None, f"Error loading '{path}': {str(e)}" |
|
|
|
if combined_df.empty: |
|
return None, "No data found in Excel files." |
|
else: |
|
return combined_df, None |
|
|
|
|
|
def extract_numeric_ram(ram) -> Optional[int]: |
|
if pd.isna(ram): |
|
return None |
|
|
|
ram_str = str(ram).lower().replace(" ", "") |
|
|
|
|
|
gb_match = re.search(r"(\d+(?:\.\d+)?)(?:gb|g)", ram_str) |
|
if gb_match: |
|
return int(float(gb_match.group(1))) |
|
|
|
|
|
mb_match = re.search(r"(\d+)(?:mb|m)", ram_str) |
|
if mb_match: |
|
return max(1, int(int(mb_match.group(1)) / 1024)) |
|
|
|
|
|
plain_match = re.search(r"(\d+)", ram_str) |
|
if plain_match: |
|
return int(plain_match.group(1)) |
|
|
|
return None |
|
|
|
|
|
QUANTIZATION_FORMATS = { |
|
"FP16": { |
|
"multiplier": 1.0, |
|
"description": "Full precision, best quality", |
|
"icon": "๐ฅ", |
|
"quality": "Excellent", |
|
"speed": "Moderate", |
|
"memory_efficiency": "Low" |
|
}, |
|
"8-bit": { |
|
"multiplier": 0.5, |
|
"description": "50% smaller, good quality", |
|
"icon": "โก", |
|
"quality": "Very Good", |
|
"speed": "Good", |
|
"memory_efficiency": "Good" |
|
}, |
|
"4-bit": { |
|
"multiplier": 0.25, |
|
"description": "75% smaller, acceptable quality", |
|
"icon": "๐", |
|
"quality": "Good", |
|
"speed": "Very Good", |
|
"memory_efficiency": "Excellent" |
|
}, |
|
"2-bit": { |
|
"multiplier": 0.125, |
|
"description": "87.5% smaller, experimental", |
|
"icon": "๐งช", |
|
"quality": "Fair", |
|
"speed": "Excellent", |
|
"memory_efficiency": "Outstanding" |
|
} |
|
} |
|
|
|
def calculate_quantized_size(base_size_str, quant_format): |
|
"""Calculate quantized model size with better formatting""" |
|
size_match = re.search(r'(\d+\.?\d*)', base_size_str) |
|
if not size_match: |
|
return base_size_str |
|
|
|
base_size = float(size_match.group(1)) |
|
unit = base_size_str.replace(size_match.group(1), "").strip() |
|
|
|
multiplier = QUANTIZATION_FORMATS[quant_format]["multiplier"] |
|
new_size = base_size * multiplier |
|
|
|
|
|
if unit.upper() == "GB" and new_size < 1: |
|
return f"{new_size * 1024:.0f}MB" |
|
elif unit.upper() == "MB" and new_size > 1024: |
|
return f"{new_size / 1024:.1f}GB" |
|
else: |
|
return f"{new_size:.1f}{unit}" |
|
|
|
|
|
LLM_DATABASE = { |
|
"ultra_low": { |
|
"general": [ |
|
{ "name": "TinyLlama-1.1B-Chat", "size": "2.2GB", "description": "Ultra-compact conversational model" }, |
|
{ "name": "DistilBERT-base", "size": "0.3GB", "description": "Efficient BERT variant for NLP tasks" }, |
|
{ "name": "all-MiniLM-L6-v2", "size": "0.1GB", "description": "Sentence embeddings specialist" }, |
|
{ "name": "OPT-125M", "size": "0.5GB", "description": "Meta's lightweight language model" }, |
|
{ "name": "GPT-Neo-125M", "size": "0.5GB", "description": "EleutherAI's compact model" }, |
|
{ "name": "DistilGPT-2", "size": "0.3GB", "description": "Distilled version of GPT-2" }, |
|
{ "name": "MobileBERT", "size": "0.2GB", "description": "Google's mobile-optimized BERT" }, |
|
{ "name": "ALBERT-base", "size": "0.4GB", "description": "A Lite BERT for self-supervised learning" }, |
|
{ "name": "RoBERTa-base", "size": "0.5GB", "description": "Robustly optimized BERT pretraining" }, |
|
{ "name": "ELECTRA-small", "size": "0.2GB", "description": "Efficiently learning encoder representations" }, |
|
{ "name": "MobileLLaMA-1B", "size": "1.0GB", "description": "Mobile-optimized Llama variant" }, |
|
{ "name": "GPT-2-small", "size": "0.5GB", "description": "OpenAI's original small model" }, |
|
{ "name": "T5-small", "size": "0.2GB", "description": "Text-to-Text Transfer Transformer" }, |
|
{ "name": "FLAN-T5-small", "size": "0.3GB", "description": "Instruction-tuned T5" }, |
|
{ "name": "UL2-small", "size": "0.8GB", "description": "Unified Language Learner" }, |
|
{ "name": "DeBERTa-v3-small", "size": "0.4GB", "description": "Microsoft's enhanced BERT" }, |
|
{ "name": "CANINE-s", "size": "0.5GB", "description": "Character-level model" }, |
|
{ "name": "Longformer-base", "size": "0.6GB", "description": "Long document understanding" }, |
|
{ "name": "BigBird-small", "size": "0.7GB", "description": "Sparse attention model" }, |
|
{ "name": "Reformer-small", "size": "0.3GB", "description": "Memory-efficient transformer" }, |
|
{ "name": "FNet-small", "size": "0.4GB", "description": "Fourier transform model" }, |
|
{ "name": "Synthesizer-small", "size": "0.3GB", "description": "Synthetic attention patterns" }, |
|
{ "name": "GPT-Neo-1.3B", "size": "1.3GB", "description": "EleutherAI's 1.3B model" }, |
|
{ "name": "OPT-350M", "size": "0.7GB", "description": "Meta's 350M parameter model" }, |
|
{ "name": "BLOOM-560M", "size": "1.1GB", "description": "BigScience's small multilingual" } |
|
|
|
|
|
], |
|
"code": [ |
|
|
|
{ "name": "CodeT5-small", "size": "0.3GB", "description": "Compact code generation model" }, |
|
{ "name": "Replit-code-v1-3B", "size": "1.2GB", "description": "Code completion specialist" }, |
|
{ "name": "UnixCoder-base", "size": "0.5GB", "description": "Microsoft's code understanding model" }, |
|
{ "name": "CodeBERT-base", "size": "0.5GB", "description": "Bimodal pre-trained model for programming" }, |
|
{ "name": "GraphCodeBERT-base", "size": "0.5GB", "description": "Pre-trained model with data flow" }, |
|
{ "name": "CodeT5-base", "size": "0.9GB", "description": "Identifier-aware unified pre-trained encoder-decoder" }, |
|
{ "name": "PyCodeGPT-110M", "size": "0.4GB", "description": "Python code generation specialist" }, |
|
{ "name": "CodeParrot-110M", "size": "0.4GB", "description": "GPT-2 model trained on Python code" }, |
|
{ "name": "CodeSearchNet-small", "size": "0.6GB", "description": "Code search and understanding" }, |
|
{ "name": "CuBERT-small", "size": "0.4GB", "description": "Google's code understanding" }, |
|
{ "name": "CodeGPT-small", "size": "0.5GB", "description": "Microsoft's code GPT" }, |
|
{ "name": "PLBART-small", "size": "0.7GB", "description": "Programming language BART" }, |
|
{ "name": "TreeBERT-small", "size": "0.6GB", "description": "Tree-based code representation" }, |
|
{ "name": "CoTexT-small", "size": "0.5GB", "description": "Code and text pre-training" }, |
|
{ "name": "SynCoBERT-small", "size": "0.6GB", "description": "Syntax-guided code BERT" } |
|
|
|
|
|
] |
|
}, |
|
"low": { |
|
"general": [ |
|
|
|
{ "name": "Phi-1.5", "size": "2.8GB", "description": "Microsoft's efficient reasoning model" }, |
|
{ "name": "Gemma-2B", "size": "1.4GB", "description": "Google's compact foundation model" }, |
|
{ "name": "OpenLLaMA-3B", "size": "2.1GB", "description": "Open source LLaMA reproduction" }, |
|
{ "name": "RedPajama-3B", "size": "2.0GB", "description": "Together AI's open model" }, |
|
{ "name": "StableLM-3B", "size": "2.3GB", "description": "Stability AI's language model" }, |
|
{ "name": "Pythia-2.8B", "size": "2.8GB", "description": "EleutherAI's training suite model" }, |
|
{ "name": "GPT-Neo-2.7B", "size": "2.7GB", "description": "EleutherAI's open GPT model" }, |
|
{ "name": "OPT-2.7B", "size": "2.7GB", "description": "Meta's open pre-trained transformer" }, |
|
{ "name": "BLOOM-3B", "size": "3.0GB", "description": "BigScience's multilingual model" }, |
|
{ "name": "GPT-J-6B", "size": "3.5GB", "description": "EleutherAI's 6B parameter model" }, |
|
{ "name": "Cerebras-GPT-2.7B", "size": "2.7GB", "description": "Cerebras Systems' open model" }, |
|
{ "name": "PaLM-2B", "size": "2.0GB", "description": "Google's Pathways Language Model" }, |
|
{ "name": "LaMDA-2B", "size": "2.2GB", "description": "Google's Language Model for Dialogue" }, |
|
{ "name": "FairSeq-2.7B", "size": "2.7GB", "description": "Facebook's sequence-to-sequence toolkit" }, |
|
{ "name": "Megatron-2.5B", "size": "2.5GB", "description": "NVIDIA's transformer model" }, |
|
{ "name": "GLM-2B", "size": "2.0GB", "description": "General Language Model pretraining" }, |
|
{ "name": "CPM-2", "size": "2.6GB", "description": "Chinese"}, |
|
|
|
], |
|
"code": [ |
|
|
|
{ "name": "CodeGen-2B", "size": "1.8GB", "description": "Salesforce's code generation model" }, |
|
{ "name": "StarCoder-1B", "size": "1.1GB", "description": "BigCode's programming assistant" }, |
|
{ "name": "InCoder-1B", "size": "1.0GB", "description": "Facebook's code infilling model" }, |
|
{ "name": "PolyCoder-2.7B", "size": "2.7GB", "description": "Carnegie Mellon's code model" }, |
|
{ "name": "CodeParrot-small", "size": "1.5GB", "description": "HuggingFace's Python code model" }, |
|
{ "name": "SantaCoder-1.1B", "size": "1.1GB", "description": "BigCode's multilingual code model" }, |
|
{ "name": "GPT-Code-2B", "size": "2.0GB", "description": "Code-specialized GPT variant" }, |
|
{ "name": "AlphaCode-2B", "size": "2.2GB", "description": "DeepMind's programming model" }, |
|
{ "name": "Codex-2B", "size": "2.0GB", "description": "OpenAI's code generation model" }, |
|
{ "name": "TabNine-2B", "size": "2.1GB", "description": "AI code completion assistant" } |
|
|
|
], |
|
"chat": [ |
|
|
|
{ "name": "Alpaca-3B", "size": "2.0GB", "description": "Stanford's instruction-following model" }, |
|
{ "name": "Vicuna-3B", "size": "2.1GB", "description": "UC Berkeley's chat model" }, |
|
{ "name": "Dolly-3B", "size": "2.2GB", "description": "Databricks' instruction-tuned model" }, |
|
{ "name": "OpenAssistant-3B", "size": "2.3GB", "description": "LAION's assistant model" }, |
|
{ "name": "StableVicuna-3B", "size": "2.1GB", "description": "Stable version of Vicuna" }, |
|
{ "name": "MPT-3B-Chat", "size": "2.0GB", "description": "MosaicML's chat variant" }, |
|
{ "name": "RedPajama-Chat-3B", "size": "2.1GB", "description": "Together AI's chat model" }, |
|
{ "name": "OpenChatKit-3B", "size": "2.2GB", "description": "Together AI's open chat model" }, |
|
{ "name": "Koala-3B", "size": "2.0GB", "description": "UC Berkeley's dialogue model" }, |
|
{ "name": "Guanaco-3B", "size": "2.1GB", "description": "QLoRA fine-tuned model" } |
|
|
|
|
|
], |
|
|
|
"reasoning": [ |
|
{ "name": "WizardMath-7B", "size": "4.0GB", "description": "Mathematical reasoning specialist" }, |
|
{ "name": "MAmmoTH-7B", "size": "4.1GB", "description": "Mathematical reasoning model" }, |
|
{ "name": "MetaMath-7B", "size": "3.9GB", "description": "Mathematical problem solver" }, |
|
{ "name": "Abel-7B", "size": "4.0GB", "description": "Advanced reasoning capabilities" }, |
|
{ "name": "Orca-2-7B", "size": "4.1GB", "description": "Microsoft's reasoning specialist" } |
|
] |
|
}, |
|
"moderate_low": { |
|
"general": [ |
|
|
|
{ "name": "Phi-2", "size": "5.2GB", "description": "Microsoft's advanced 2.7B parameter model" }, |
|
{ "name": "Gemma-7B", "size": "4.2GB", "description": "Google's efficient 7B model" }, |
|
{ "name": "Mistral-7B-v0.1", "size": "4.1GB", "description": "Mistral AI's foundation model" }, |
|
{ "name": "OpenLLaMA-7B", "size": "4.0GB", "description": "Open source 7B language model" }, |
|
{ "name": "MPT-7B", "size": "4.3GB", "description": "MosaicML's transformer model" }, |
|
{ "name": "Falcon-7B", "size": "4.1GB", "description": "TII's instruction model" }, |
|
{ "name": "Pythia-6.9B", "size": "6.9GB", "description": "EleutherAI's large training model" }, |
|
{ "name": "BLOOM-7B", "size": "7.0GB", "description": "BigScience's multilingual foundation model" }, |
|
{ "name": "OLMo-7B", "size": "4.2GB", "description": "Allen AI's open language model" }, |
|
{ "name": "Llama-7B", "size": "4.0GB", "description": "Meta's foundation model" }, |
|
{ "name": "StableLM-7B", "size": "4.1GB", "description": "Stability AI's larger model" }, |
|
{ "name": "RedPajama-7B", "size": "4.0GB", "description": "Together AI's 7B model" }, |
|
{ "name": "OpenLLaMA-7B-v2", "size": "4.1GB", "description": "Improved OpenLLaMA version" }, |
|
{ "name": "Vicuna-7B", "size": "3.9GB", "description": "UC Berkeley's 7B chat model" }, |
|
{ "name": "Alpaca-7B", "size": "3.8GB", "description": "Stanford's instruction model" }, |
|
{ "name": "GPT-NeoX-6B", "size": "6.0GB", "description": "EleutherAI's improved model" }, |
|
{ "name": "OPT-6.7B", "size": "6.7GB", "description": "Meta's 6.7B parameter model" }, |
|
{ "name": "T5-large", "size": "3.0GB", "description": "Large Text-to-Text Transfer" }, |
|
{ "name": "FLAN-T5-large", "size": "3.2GB", "description": "Instruction-tuned T5 large" }, |
|
{ "name": "UL2-base", "size": "4.0GB", "description": "Unified Language Learner base" } |
|
], |
|
|
|
"code": [ |
|
|
|
{ "name": "CodeLlama-7B", "size": "3.8GB", "description": "Meta's specialized code model" }, |
|
{ "name": "StarCoder-7B", "size": "4.0GB", "description": "Advanced code generation model" }, |
|
{ "name": "SantaCoder-1.1B", "size": "1.2GB", "description": "Multilingual code model" }, |
|
{ "name": "CodeGen-6B", "size": "6.0GB", "description": "Salesforce's larger code model" }, |
|
{ "name": "CodeT5p-6B", "size": "6.2GB", "description": "Salesforce's code understanding model" }, |
|
{ "name": "InCoder-6B", "size": "6.0GB", "description": "Facebook's large infilling model" }, |
|
{ "name": "PolyCoder-6B", "size": "6.1GB", "description": "Carnegie Mellon's large code model" }, |
|
{ "name": "AlphaCode-7B", "size": "4.0GB", "description": "DeepMind's competitive programming" }, |
|
{ "name": "Codex-7B", "size": "4.1GB", "description": "OpenAI's advanced code model" }, |
|
{ "name": "WizardCoder-7B", "size": "4.0GB", "description": "Microsoft's coding wizard" } |
|
|
|
|
|
], |
|
"chat": [ |
|
|
|
{ "name": "Zephyr-7B-beta", "size": "4.2GB", "description": "HuggingFace's chat specialist" }, |
|
{ "name": "Neural-Chat-7B", "size": "4.1GB", "description": "Intel's optimized chat model" }, |
|
{ "name": "OpenChat-7B", "size": "4.0GB", "description": "High-quality conversation model" }, |
|
{ "name": "Nous-Hermes-7B", "size": "4.1GB", "description": "NousResearch's assistant model" }, |
|
{ "name": "StableBeluga-7B", "size": "4.2GB", "description": "Stability AI's chat model" }, |
|
{ "name": "Llama-2-7B-Chat", "size": "3.9GB", "description": "Meta's chat-optimized model" }, |
|
{ "name": "Vicuna-7B-v1.3", "size": "3.9GB", "description": "Improved Vicuna chat model" }, |
|
{ "name": "WizardLM-7B", "size": "4.0GB", "description": "Microsoft's instruction model" }, |
|
{ "name": "Orca-Mini-7B", "size": "4.1GB", "description": "Microsoft's reasoning model" }, |
|
{ "name": "Samantha-7B", "size": "4.0GB", "description": "Eric Hartford's assistant model" } |
|
|
|
|
|
] |
|
}, |
|
"moderate": { |
|
"general": [ |
|
|
|
{ "name": "Llama-2-7B-Chat", "size": "3.5GB", "description": "Meta's popular chat model (4-bit)" }, |
|
{ "name": "Mistral-7B-Instruct-v0.2", "size": "4.1GB", "description": "Latest Mistral instruction model" }, |
|
{ "name": "Qwen-7B-Chat", "size": "4.0GB", "description": "Alibaba's multilingual model" }, |
|
{ "name": "Baichuan2-7B-Chat", "size": "4.1GB", "description": "Chinese LLM with strong capabilities" }, |
|
{ "name": "Yi-6B-Chat", "size": "3.8GB", "description": "01.AI's bilingual chat model" }, |
|
{ "name": "InternLM-7B-Chat", "size": "4.0GB", "description": "Shanghai AI Lab's model" }, |
|
{ "name": "ChatGLM3-6B", "size": "3.7GB", "description": "Tsinghua's latest chat model" }, |
|
{ "name": "Aquila-7B", "size": "4.1GB", "description": "BAAI's Chinese-English model" }, |
|
{ "name": "Skywork-13B", "size": "7.2GB", "description": "Kunlun's bilingual model" }, |
|
{ "name": "Llama-2-7B", "size": "3.8GB", "description": "Meta's base foundation model" }, |
|
{ "name": "Mistral-7B-v0.1", "size": "4.0GB", "description": "Original Mistral foundation" }, |
|
{ "name": "Solar-10.7B", "size": "5.4GB", "description": "Upstage's efficient model" }, |
|
{ "name": "Nous-Hermes-2-7B", "size": "4.0GB", "description": "NousResearch's improved model" }, |
|
{ "name": "OpenHermes-2.5-7B", "size": "4.1GB", "description": "Teknium's assistant model" }, |
|
{ "name": "Starling-LM-7B", "size": "4.0GB", "description": "Berkeley's RLAIF model" }, |
|
{ "name": "Openchat-3.5-7B", "size": "4.0GB", "description": "OpenChat's latest version" }, |
|
{ "name": "Dolphin-2.2.1-7B", "size": "4.1GB", "description": "Eric Hartford's uncensored model" }, |
|
{ "name": "PlatYi-7B", "size": "4.0GB", "description": "01.AI's chat-optimized model" }, |
|
{ "name": "TinyLlama-1.1B-Chat", "size": "1.1GB", "description": "Compact conversational model" }, |
|
{ "name": "DeepSeek-LLM-7B", "size": "4.2GB", "description": "DeepSeek's language model" } |
|
], |
|
|
|
|
|
"code": [ |
|
|
|
{ "name": "CodeLlama-7B-Instruct", "size": "3.8GB", "description": "Instruction-tuned code specialist" }, |
|
{ "name": "WizardCoder-7B", "size": "4.0GB", "description": "Enhanced coding capabilities" }, |
|
{ "name": "Phind-CodeLlama-7B-v2", "size": "3.9GB", "description": "Code search optimized model" }, |
|
{ "name": "Magicoder-7B", "size": "4.0GB", "description": "OSS-Instruct trained code model" }, |
|
{ "name": "DeepSeek-Coder-7B", "size": "3.9GB", "description": "DeepSeek's coding specialist" }, |
|
{ "name": "WizardCoder-Python-7B", "size": "4.0GB", "description": "Python-specialized coding model" }, |
|
{ "name": "StarCoder-7B", "size": "4.0GB", "description": "BigCode's 7B programming model" }, |
|
{ "name": "CodeT5p-7B", "size": "4.1GB", "description": "Salesforce's code understanding" }, |
|
{ "name": "InstructCodeT5p-7B", "size": "4.2GB", "description": "Instruction-tuned CodeT5p" }, |
|
{ "name": "CodeGen2-7B", "size": "4.0GB", "description": "Salesforce's improved code model" }, |
|
{ "name": "SantaCoder-7B", "size": "4.1GB", "description": "BigCode's multilingual coder" }, |
|
{ "name": "Replit-Code-7B", "size": "4.0GB", "description": "Replit's code completion model" }, |
|
{ "name": "Code-Alpaca-7B", "size": "3.9GB", "description": "Stanford's code instruction model" }, |
|
{ "name": "UnixCoder-7B", "size": "4.0GB", "description": "Microsoft's large code model" } |
|
], |
|
"chat": [ |
|
|
|
{ "name": "Vicuna-7B-v1.5", "size": "3.9GB", "description": "Enhanced conversational model" }, |
|
{ "name": "ChatGLM2-6B", "size": "3.7GB", "description": "Tsinghua's bilingual chat model" }, |
|
{ "name": "Baize-7B", "size": "4.0GB", "description": "Self-chat trained model" }, |
|
{ "name": "OpenBuddy-7B", "size": "4.0GB", "description": "Cross-lingual AI assistant" }, |
|
{ "name": "Koala-7B", "size": "3.9GB", "description": "UC Berkeley's dialogue model" }, |
|
{ "name": "GPT4All-7B", "size": "4.0GB", "description": "Nomic AI's local chat model" }, |
|
{ "name": "Wizard-Vicuna-7B", "size": "4.1GB", "description": "Combined instruction model" }, |
|
{ "name": "Manticore-7B", "size": "4.0GB", "description": "Multi-domain chat model" }, |
|
{ "name": "Airoboros-7B", "size": "4.1GB", "description": "Context-aware chat model" }, |
|
{ "name": "Samantha-1.2-7B", "size": "4.0GB", "description": "Empathetic AI assistant" } |
|
|
|
|
|
|
|
], |
|
"reasoning": [ |
|
{ "name": "MetaMath-7B", "size": "3.9GB", "description": "Mathematical problem solving" }, |
|
{ "name": "Abel-7B", "size": "4.0GB", "description": "Advanced reasoning capabilities" }, |
|
{ "name": "WizardMath-7B-V1.1", "size": "4.0GB", "description": "Enhanced math reasoning" }, |
|
{ "name": "MAmmoTH-7B", "size": "4.1GB", "description": "Mathematical reasoning model" }, |
|
{ "name": "Orca-2-7B", "size": "4.2GB", "description": "Microsoft's reasoning model" }, |
|
{ "name": "OpenOrca-7B", "size": "4.0GB", "description": "Open-source Orca variant" } |
|
], |
|
"multilingual": [ |
|
|
|
{ "name": "Qwen-7B", "size": "4.0GB", "description": "Alibaba's multilingual foundation" }, |
|
{ "name": "Baichuan2-7B", "size": "4.1GB", "description": "Chinese-English bilingual" }, |
|
{ "name": "InternLM-7B", "size": "4.0GB", "description": "Shanghai AI Lab multilingual" }, |
|
{ "name": "Chinese-LLaMA-2-7B", "size": "4.0GB", "description": "Chinese-optimized Llama" }, |
|
{ "name": "Vigogne-7B", "size": "4.1GB", "description": "French instruction model" } |
|
|
|
|
|
] |
|
}, |
|
"good": { |
|
"general": [ |
|
|
|
{ "name": "Llama-2-13B-Chat", "size": "7.3GB", "description": "Larger Llama variant (4-bit)" }, |
|
{ "name": "Vicuna-13B-v1.5", "size": "7.2GB", "description": "Enhanced large chat model" }, |
|
{ "name": "OpenChat-3.5-13B", "size": "7.1GB", "description": "High-quality large chat model" }, |
|
{ "name": "Qwen-14B-Chat", "size": "7.8GB", "description": "Alibaba's advanced model" }, |
|
{ "name": "Baichuan2-13B-Chat", "size": "7.5GB", "description": "Large Chinese language model" }, |
|
{ "name": "Yi-34B-Chat (8-bit)", "size": "19.5GB", "description": "01.AI's flagship model" }, |
|
{ "name": "Nous-Hermes-13B", "size": "7.3GB", "description": "NousResearch's large assistant" }, |
|
{ "name": "WizardLM-13B", "size": "7.2GB", "description": "Microsoft's instruction model" }, |
|
{ "name": "Alpaca-13B", "size": "7.0GB", "description": "Stanford's large instruction model" }, |
|
{ "name": "Llama-2-13B", "size": "7.0GB", "description": "Meta's 13B foundation model" }, |
|
{ "name": "MPT-30B", "size": "15.0GB", "description": "MosaicML's large transformer" }, |
|
{ "name": "Falcon-40B (8-bit)", "size": "20.0GB", "description": "TII's large instruction model" }, |
|
{ "name": "Guanaco-13B", "size": "7.1GB", "description": "QLoRA fine-tuned model" }, |
|
{ "name": "Orca-13B", "size": "7.4GB", "description": "Microsoft's reasoning model" }, |
|
{ "name": "Platypus-13B", "size": "7.2GB", "description": "Fine-tuned Llama variant" }, |
|
{ "name": "WizardLM-13B-V1.2", "size": "7.3GB", "description": "Improved WizardLM" }, |
|
{ "name": "Nous-Hermes-2-13B", "size": "7.4GB", "description": "Enhanced Hermes model" }, |
|
{ "name": "OpenOrca-13B", "size": "7.2GB", "description": "Open-source Orca recreation" }, |
|
{ "name": "Airoboros-13B", "size": "7.3GB", "description": "Context-aware large model" }, |
|
{ "name": "MythoMax-13B", "size": "7.2GB", "description": "Roleplay-optimized model" } |
|
|
|
|
|
], |
|
"code": [ |
|
|
|
{ "name": "CodeLlama-13B-Instruct", "size": "7.3GB", "description": "Large code generation model" }, |
|
{ "name": "WizardCoder-15B", "size": "8.2GB", "description": "Advanced coding assistant" }, |
|
{ "name": "StarCoder-15B", "size": "8.5GB", "description": "Large programming model" }, |
|
{ "name": "CodeT5p-16B", "size": "8.8GB", "description": "Salesforce's large code model" }, |
|
{ "name": "Phind-CodeLlama-34B (8-bit)", "size": "19.0GB", "description": "Large code search model" }, |
|
{ "name": "DeepSeek-Coder-33B (8-bit)", "size": "18.5GB", "description": "Large coding specialist" }, |
|
{ "name": "CodeLlama-13B-Python", "size": "7.4GB", "description": "Python-specialized CodeLlama" }, |
|
{ "name": "WizardCoder-Python-13B", "size": "7.3GB", "description": "Python coding wizard" }, |
|
{ "name": "InstructCodeT5p-16B", "size": "8.9GB", "description": "Large instruction code model" }, |
|
{ "name": "CodeGen2-16B", "size": "8.7GB", "description": "Salesforce's large code model" } |
|
|
|
|
|
], |
|
"multimodal": [ |
|
|
|
{ "name": "LLaVA-13B", "size": "7.5GB", "description": "Large vision-language model" }, |
|
{ "name": "MiniGPT-4-13B", "size": "7.2GB", "description": "Multimodal conversational AI" }, |
|
{ "name": "InstructBLIP-13B", "size": "7.8GB", "description": "Vision-language instruction model" }, |
|
{ "name": "BLIP-2-FlanT5-XL", "size": "4.8GB", "description": "Salesforce's vision-language model" }, |
|
{ "name": "Flamingo-9B", "size": "9.0GB", "description": "DeepMind's few-shot learning model" }, |
|
{ "name": "LLaVA-1.5-13B", "size": "7.6GB", "description": "Improved LLaVA model" }, |
|
{ "name": "Otter-13B", "size": "7.4GB", "description": "Multi-modal instruction tuned" }, |
|
{ "name": "mPLUG-Owl-14B", "size": "8.0GB", "description": "Alibaba's multimodal model" }, |
|
{ "name": "InternLM-XComposer-7B", "size": "7.0GB", "description": "Vision-language composition" }, |
|
{ "name": "Qwen-VL-7B", "size": "7.2GB", "description": "Qwen vision-language model" } |
|
|
|
|
|
], |
|
"reasoning": [ |
|
|
|
{ "name": "WizardMath-13B", "size": "7.3GB", "description": "Advanced mathematical reasoning" }, |
|
{ "name": "Orca-2-13B", "size": "7.4GB", "description": "Microsoft's reasoning specialist" }, |
|
{ "name": "MetaMath-13B", "size": "7.2GB", "description": "Mathematical problem solver" }, |
|
{ "name": "MAmmoTH-13B", "size": "7.3GB", "description": "Large mathematical reasoning model" }, |
|
{ "name": "Abel-13B", "size": "7.4GB", "description": "Advanced reasoning capabilities" }, |
|
{ "name": "Goat-13B", "size": "7.2GB", "description": "Arithmetic reasoning specialist" }, |
|
{ "name": "OpenOrca-Platypus-13B", "size": "7.3GB", "description": "Combined reasoning model" } |
|
|
|
], |
|
|
|
}, |
|
"high": { |
|
"general": [ |
|
|
|
{ "name": "Mixtral-8x7B-Instruct-v0.1", "size": "26.9GB", "description": "Mixture of experts model (4-bit)" }, |
|
{ "name": "Llama-2-70B-Chat (8-bit)", "size": "38.0GB", "description": "Large language model" }, |
|
{ "name": "Yi-34B-Chat", "size": "19.5GB", "description": "01.AI's flagship model" }, |
|
{ "name": "Qwen-72B (4-bit)", "size": "36.0GB", "description": "Alibaba's largest model" }, |
|
{ "name": "DeepSeek-67B", "size": "35.0GB", "description": "Advanced reasoning model" }, |
|
{ "name": "Nous-Hermes-2-Mixtral-8x7B", "size": "26.9GB", "description": "NousResearch's MoE model" }, |
|
{ "name": "Solar-10.7B", "size": "10.7GB", "description": "Upstage's efficient model" }, |
|
{ "name": "Dolphin-2.5-Mixtral-8x7B", "size": "26.9GB", "description": "Uncensored Mixtral variant" }, |
|
{ "name": "Llama-2-70B", "size": "35.0GB", "description": "Meta's flagship model (8-bit)" }, |
|
{ "name": "Falcon-40B", "size": "20.0GB", "description": "TII's large model" }, |
|
{ "name": "MPT-30B", "size": "15.0GB", "description": "MosaicML's 30B model" }, |
|
{ "name": "Nous-Hermes-2-Yi-34B", "size": "19.6GB", "description": "Enhanced Yi model" }, |
|
{ "name": "OpenHermes-2.5-Mistral-7B", "size": "4.1GB", "description": "Teknium's Mistral variant" }, |
|
{ "name": "Starling-LM-7B-alpha", "size": "4.2GB", "description": "Berkeley's RLAIF model" }, |
|
{ "name": "NeuralBeagle-14B", "size": "8.0GB", "description": "MLP KAT merged model" }, |
|
{ "name": "Goliath-120B (4-bit)", "size": "60.0GB", "description": "Large merged model" }, |
|
{ "name": "Xwin-LM-70B (8-bit)", "size": "38.5GB", "description": "Xwin team's large model" }, |
|
{ "name": "Airoboros-L2-70B (8-bit)", "size": "38.0GB", "description": "Large context model" } |
|
|
|
|
|
], |
|
"code": [ |
|
|
|
{ "name": "CodeLlama-34B-Instruct", "size": "19.0GB", "description": "Large specialized coder" }, |
|
{ "name": "DeepSeek-Coder-33B", "size": "18.5GB", "description": "Advanced code generation" }, |
|
{ "name": "WizardCoder-34B", "size": "19.2GB", "description": "Enterprise-grade coding" }, |
|
{ "name": "StarCoder2-15B", "size": "8.5GB", "description": "Next-gen programming model" }, |
|
{ "name": "Phind-CodeLlama-34B", "size": "19.0GB", "description": "Code search specialized model" }, |
|
{ "name": "Magicoder-34B", "size": "19.1GB", "description": "Large OSS-Instruct model" }, |
|
{ "name": "CodeLlama-34B-Python", "size": "19.1GB", "description": "Python-specialized large model" }, |
|
{ "name": "WizardCoder-Python-34B", "size": "19.2GB", "description": "Large Python specialist" }, |
|
{ "name": "StarCoder-15.5B", "size": "8.8GB", "description": "Enhanced StarCoder" }, |
|
{ "name": "Code-Alpaca-34B", "size": "18.9GB", "description": "Large code instruction model" } |
|
|
|
|
|
], |
|
"chat": [ |
|
|
|
|
|
{ "name": "Vicuna-33B", "size": "18.5GB", "description": "Large conversational model" }, |
|
{ "name": "Guanaco-65B (4-bit)", "size": "33.0GB", "description": "Large instruction-tuned model" }, |
|
{ "name": "Alpaca-30B", "size": "18.0GB", "description": "Large Stanford model" }, |
|
{ "name": "OpenBuddy-34B", "size": "19.0GB", "description": "Large cross-lingual assistant" }, |
|
{ "name": "WizardLM-30B", "size": "17.0GB", "description": "Large instruction model" }, |
|
{ "name": "Nous-Hermes-Llama2-70B (8-bit)", "size": "38.2GB", "description": "Large Hermes variant" }, |
|
{ "name": "Airoboros-65B (4-bit)", "size": "33.5GB", "description": "Large context chat model" }, |
|
{ "name": "MythoMax-L2-13B", "size": "7.4GB", "description": "Roleplay optimized" } |
|
], |
|
"reasoning": [ |
|
|
|
{ "name": "WizardMath-70B (8-bit)", "size": "38.5GB", "description": "Premier math reasoning" }, |
|
{ "name": "MetaMath-70B (8-bit)", "size": "38.0GB", "description": "Advanced mathematical AI" }, |
|
{ "name": "Goat-70B (8-bit)", "size": "35.0GB", "description": "Arithmetic reasoning specialist" }, |
|
{ "name": "MAmmoTH-70B (8-bit)", "size": "38.2GB", "description": "Large mathematical model" }, |
|
{ "name": "Orca-2-13B", "size": "7.4GB", "description": "Microsoft's reasoning model" }, |
|
{ "name": "Abel-70B (8-bit)", "size": "38.1GB", "description": "Large reasoning model" } |
|
] |
|
|
|
}, |
|
"ultra_high": { |
|
"general": [ |
|
{"name": "Llama-2-70B", "size": "130GB", "description": "Full precision", "parameters": "70B", "context": "4K"}, |
|
{"name": "Mixtral-8x22B", "size": "176GB", "description": "Latest mixture model", "parameters": "141B", "context": "64K"}, |
|
{"name": "Qwen-72B", "size": "145GB", "description": "Alibaba's flagship", "parameters": "72B", "context": "32K"}, |
|
{"name": "Llama-3-70B", "size": "140GB", "description": "Meta's latest", "parameters": "70B", "context": "8K"} |
|
], |
|
"code": [ |
|
{"name": "CodeLlama-34B", "size": "68GB", "description": "Full precision code", "parameters": "34B", "context": "16K"}, |
|
{"name": "DeepSeek-Coder-33B", "size": "66GB", "description": "Full precision coding", "parameters": "33B", "context": "16K"} |
|
], |
|
"reasoning": [ |
|
{"name": "WizardMath-70B", "size": "130GB", "description": "Full precision math", "parameters": "70B", "context": "2K"}, |
|
{"name": "Goat-70B", "size": "132GB", "description": "Arithmetic reasoning", "parameters": "70B", "context": "2K"} |
|
] |
|
} |
|
} |
|
|
|
|
|
|
|
GPU_DATABASE = { |
|
"RTX 3060": {"vram": 8, "performance": "mid", "architecture": "Ampere", "tensor_cores": "2nd gen", "memory_bandwidth": "360 GB/s"}, |
|
"RTX 3070": {"vram": 8, "performance": "high", "architecture": "Ampere", "tensor_cores": "2nd gen", "memory_bandwidth": "448 GB/s"}, |
|
"RTX 3080": {"vram": 10, "performance": "high", "architecture": "Ampere", "tensor_cores": "2nd gen", "memory_bandwidth": "760 GB/s"}, |
|
"RTX 3090": {"vram": 24, "performance": "ultra", "architecture": "Ampere", "tensor_cores": "2nd gen", "memory_bandwidth": "936 GB/s"}, |
|
"RTX 4060": {"vram": 8, "performance": "mid", "architecture": "Ada Lovelace", "tensor_cores": "4th gen", "memory_bandwidth": "272 GB/s"}, |
|
"RTX 4070": {"vram": 12, "performance": "high", "architecture": "Ada Lovelace", "tensor_cores": "4th gen", "memory_bandwidth": "504 GB/s"}, |
|
"RTX 4080": {"vram": 16, "performance": "ultra", "architecture": "Ada Lovelace", "tensor_cores": "4th gen", "memory_bandwidth": "716 GB/s"}, |
|
"RTX 4090": {"vram": 24, "performance": "ultra", "architecture": "Ada Lovelace", "tensor_cores": "4th gen", "memory_bandwidth": "1008 GB/s"}, |
|
"Apple M1": {"vram": 8, "performance": "mid", "architecture": "Apple Silicon", "tensor_cores": "None", "memory_bandwidth": "68.25 GB/s"}, |
|
"Apple M2": {"vram": 16, "performance": "high", "architecture": "Apple Silicon", "tensor_cores": "None", "memory_bandwidth": "100 GB/s"}, |
|
"Apple M3": {"vram": 24, "performance": "ultra", "architecture": "Apple Silicon", "tensor_cores": "None", "memory_bandwidth": "150 GB/s"}, |
|
"RX 6700 XT": {"vram": 12, "performance": "mid", "architecture": "RDNA 2", "tensor_cores": "None", "memory_bandwidth": "384 GB/s"}, |
|
"RX 7900 XTX": {"vram": 24, "performance": "ultra", "architecture": "RDNA 3", "tensor_cores": "None", "memory_bandwidth": "960 GB/s"}, |
|
} |
|
|
|
def get_gpu_recommendations(gpu_name, ram_gb): |
|
"""Get GPU-specific model recommendations""" |
|
if gpu_name == "No GPU": |
|
return "CPU-only models recommended", "Use 4-bit quantization for better performance" |
|
|
|
gpu_info = GPU_DATABASE.get(gpu_name.split(" (")[0], {"vram": 0, "performance": "low"}) |
|
vram = gpu_info["vram"] |
|
|
|
if vram <= 8: |
|
return f"7B models with 4-bit quantization", f"Estimated VRAM usage: ~{vram-1}GB" |
|
elif vram <= 12: |
|
return f"13B models with 8-bit quantization", f"Estimated VRAM usage: ~{vram-1}GB" |
|
elif vram <= 16: |
|
return f"13B models at FP16 or 30B with 4-bit", f"Estimated VRAM usage: ~{vram-1}GB" |
|
else: |
|
return f"70B models with 4-bit quantization", f"Estimated VRAM usage: ~{vram-2}GB" |
|
|
|
def predict_inference_speed(model_size_gb, ram_gb, has_gpu=False, gpu_name=""): |
|
"""Predict approximate inference speed""" |
|
if model_size_gb > ram_gb: |
|
return "โ Insufficient RAM", "Consider smaller model or quantization" |
|
|
|
if has_gpu and gpu_name != "No GPU": |
|
gpu_info = GPU_DATABASE.get(gpu_name.split(" (")[0], {"performance": "low"}) |
|
perf = gpu_info["performance"] |
|
|
|
if perf == "ultra": |
|
if model_size_gb <= 4: |
|
return "โก Blazing Fast", "~50-100 tokens/sec" |
|
elif model_size_gb <= 8: |
|
return "๐ Very Fast", "~30-60 tokens/sec" |
|
elif model_size_gb <= 16: |
|
return "๐ Fast", "~15-30 tokens/sec" |
|
else: |
|
return "๐ Moderate", "~5-15 tokens/sec" |
|
elif perf == "high": |
|
if model_size_gb <= 4: |
|
return "โก Very Fast", "~30-50 tokens/sec" |
|
elif model_size_gb <= 8: |
|
return "๐ Fast", "~15-30 tokens/sec" |
|
else: |
|
return "๐ Moderate", "~5-15 tokens/sec" |
|
else: |
|
if model_size_gb <= 4: |
|
return "โก Fast", "~15-30 tokens/sec" |
|
else: |
|
return "๐ Slow", "~3-10 tokens/sec" |
|
else: |
|
|
|
if model_size_gb <= 2: |
|
return "โก Acceptable", "~5-15 tokens/sec" |
|
elif model_size_gb <= 4: |
|
return "๐ Slow", "~1-5 tokens/sec" |
|
else: |
|
return "๐ Very Slow", "~0.5-2 tokens/sec" |
|
|
|
|
|
def recommend_llm(ram_str) -> Tuple[str, str, str, Dict[str, List[Dict]]]: |
|
"""Returns (recommendation, performance_tier, additional_info, detailed_models)""" |
|
ram = extract_numeric_ram(ram_str) |
|
|
|
if ram is None: |
|
return ("โช Check exact specs or test with quantized models.", |
|
"Unknown", |
|
"Verify RAM specifications", |
|
{}) |
|
|
|
if ram <= 2: |
|
models = LLM_DATABASE["ultra_low"] |
|
return ("๐ธ Ultra-lightweight models - basic NLP tasks", |
|
"Ultra Low", |
|
"Mobile-optimized, simple tasks, limited context", |
|
models) |
|
elif ram <= 4: |
|
models = LLM_DATABASE["low"] |
|
return ("๐ธ Small language models - decent capabilities", |
|
"Low", |
|
"Basic chat, simple reasoning, text classification", |
|
models) |
|
elif ram <= 6: |
|
models = LLM_DATABASE["moderate_low"] |
|
return ("๐ Mid-range models - good general performance", |
|
"Moderate-Low", |
|
"Solid reasoning, coding help, longer conversations", |
|
models) |
|
elif ram <= 8: |
|
models = LLM_DATABASE["moderate"] |
|
return ("๐ Strong 7B models - excellent capabilities", |
|
"Moderate", |
|
"Professional use, coding assistance, complex reasoning", |
|
models) |
|
elif ram <= 16: |
|
models = LLM_DATABASE["good"] |
|
return ("๐ข High-quality models - premium performance", |
|
"Good", |
|
"Advanced tasks, multimodal support, research use", |
|
models) |
|
elif ram <= 32: |
|
models = LLM_DATABASE["high"] |
|
return ("๐ต Premium models - professional grade", |
|
"High", |
|
"Enterprise ready, complex reasoning, specialized tasks", |
|
models) |
|
else: |
|
models = LLM_DATABASE["ultra_high"] |
|
return ("๐ต Top-tier models - enterprise capabilities", |
|
"Ultra High", |
|
"Research grade, maximum performance, domain expertise", |
|
models) |
|
|
|
|
|
def get_os_info(os_name) -> Tuple[str, str]: |
|
"""Returns (icon, clean_name)""" |
|
if pd.isna(os_name): |
|
return "๐ป", "Not specified" |
|
|
|
os = str(os_name).lower() |
|
if "windows" in os: |
|
return "๐ช", os_name |
|
elif "mac" in os or "darwin" in os: |
|
return "๐", os_name |
|
elif "linux" in os or "ubuntu" in os: |
|
return "๐ง", os_name |
|
elif "android" in os: |
|
return "๐ค", os_name |
|
elif "ios" in os: |
|
return "๐ฑ", os_name |
|
else: |
|
return "๐ป", os_name |
|
|
|
|
|
def create_model_comparison_table(selected_models, quantization_type="FP16"): |
|
"""Create a comparison table for selected models""" |
|
comparison_data = [] |
|
|
|
for model_info in selected_models: |
|
quant_size = calculate_quantized_size(model_info['size'], quantization_type) |
|
|
|
|
|
size_match = re.search(r'(\d+\.?\d*)', quant_size) |
|
if size_match: |
|
size_num = float(size_match.group(1)) |
|
estimated_vram = f"{size_num * 1.2:.1f}GB" |
|
else: |
|
estimated_vram = "Unknown" |
|
|
|
comparison_data.append({ |
|
'Model': model_info['name'], |
|
'Parameters': model_info.get('parameters', 'Unknown'), |
|
'Context': model_info.get('context', 'Unknown'), |
|
'Original Size': model_info['size'], |
|
f'{quantization_type} Size': quant_size, |
|
'Est. VRAM': estimated_vram, |
|
'Description': model_info['description'] |
|
}) |
|
|
|
return pd.DataFrame(comparison_data) |
|
|
|
|
|
def display_model_categories(models_dict: Dict[str, List[Dict]], ram_gb: int, show_quantization=True): |
|
"""Display models with quantization options""" |
|
if not models_dict: |
|
return |
|
|
|
st.markdown(f"### ๐ฏ Recommended Models for {ram_gb}GB RAM:") |
|
|
|
for category, model_list in models_dict.items(): |
|
if model_list: |
|
with st.expander(f"๐ {category.replace('_', ' ').title()} Models"): |
|
for model in model_list[:6]: |
|
st.markdown(f"**{model['name']}**") |
|
|
|
|
|
detail_col1, detail_col2, detail_col3 = st.columns(3) |
|
with detail_col1: |
|
st.caption(f"๐ {model.get('parameters', 'Unknown')} params") |
|
with detail_col2: |
|
st.caption(f"๐ {model.get('context', 'Unknown')} context") |
|
with detail_col3: |
|
st.caption(f"๐พ {model['size']} original") |
|
|
|
st.markdown(f"*{model['description']}*") |
|
|
|
if show_quantization: |
|
|
|
quant_cols = st.columns(4) |
|
for i, (quant_type, quant_info) in enumerate(QUANTIZATION_FORMATS.items()): |
|
with quant_cols[i]: |
|
quant_size = calculate_quantized_size(model['size'], quant_type) |
|
st.metric( |
|
label=f"{quant_info['icon']} {quant_type}", |
|
value=quant_size, |
|
help=quant_info['description'] |
|
) |
|
|
|
st.markdown("---") |
|
|
|
|
|
def create_performance_chart(df): |
|
"""Create a performance distribution chart""" |
|
laptop_rams = df["Laptop RAM"].apply(extract_numeric_ram).dropna() |
|
mobile_rams = df["Mobile RAM"].apply(extract_numeric_ram).dropna() |
|
|
|
fig = go.Figure() |
|
|
|
fig.add_trace(go.Histogram( |
|
x=laptop_rams, |
|
name="Laptop RAM", |
|
opacity=0.7, |
|
nbinsx=10, |
|
marker_color='#1f77b4' |
|
)) |
|
|
|
fig.add_trace(go.Histogram( |
|
x=mobile_rams, |
|
name="Mobile RAM", |
|
opacity=0.7, |
|
nbinsx=10, |
|
marker_color='#ff7f0e' |
|
)) |
|
|
|
fig.update_layout( |
|
title="RAM Distribution Across Devices", |
|
xaxis_title="RAM (GB)", |
|
yaxis_title="Number of Students", |
|
barmode='overlay', |
|
height=400, |
|
showlegend=True |
|
) |
|
|
|
return fig |
|
|
|
|
|
def generate_demo_data(): |
|
"""Generate demo data for testing when Excel files are missing""" |
|
demo_data = { |
|
"Full Name": [ |
|
"Demo Student 1", "Demo Student 2", "Demo Student 3", "Demo Student 4", |
|
"Demo Student 5", "Demo Student 6", "Demo Student 7", "Demo Student 8", |
|
"Demo Student 9", "Demo Student 10", "Demo Student 11", "Demo Student 12" |
|
], |
|
"Laptop RAM": ["8GB", "16GB", "4GB", "32GB", "6GB", "12GB", "2GB", "24GB", "64GB", "3GB", "20GB", "10GB"], |
|
"Mobile RAM": ["4GB", "8GB", "3GB", "12GB", "6GB", "4GB", "2GB", "8GB", "16GB", "3GB", "6GB", "8GB"], |
|
"Laptop Operating System": [ |
|
"Windows 11", "macOS Monterey", "Ubuntu 22.04", "Windows 10", |
|
"macOS Big Sur", "Fedora 36", "Windows 11", "macOS Ventura", |
|
"Ubuntu 20.04", "Windows 10", "macOS Sonoma", "Pop!_OS 22.04" |
|
], |
|
"Mobile Operating System": [ |
|
"Android 13", "iOS 16", "Android 12", "iOS 15", |
|
"Android 14", "iOS 17", "Android 11", "iOS 16", |
|
"Android 13", "iOS 15", "Android 14", "iOS 17" |
|
] |
|
} |
|
return pd.DataFrame(demo_data) |
|
|
|
|
|
def prepare_user_options(df): |
|
"""Safely prepare user options for selectbox, handling NaN values and mixed types""" |
|
try: |
|
unique_names = df["Full Name"].dropna().unique() |
|
|
|
valid_names = [] |
|
for name in unique_names: |
|
try: |
|
str_name = str(name).strip() |
|
if str_name and str_name.lower() != 'nan': |
|
valid_names.append(str_name) |
|
except: |
|
continue |
|
|
|
options = ["Select a student..."] + sorted(valid_names) |
|
return options |
|
except Exception as e: |
|
st.error(f"Error preparing user options: {e}") |
|
return ["Select a student..."] |
|
|
|
|
|
st.title("๐ง LLM Compatibility Advisor") |
|
st.markdown("Get personalized recommendations from **150+ popular open source AI models** with download sizes!") |
|
|
|
|
|
df, error = load_data() |
|
|
|
if error or df is None or df.empty: |
|
st.warning("โ ๏ธ Excel files not found. Running with demo data for testing.") |
|
st.info("๐ To use real data, place 'BITS_INTERNS.xlsx' and 'Summer of AI - ICFAI (Responses) (3).xlsx' in the 'src/' directory.") |
|
df = generate_demo_data() |
|
|
|
with st.expander("๐ Expected Data Format"): |
|
st.markdown(""" |
|
The app expects Excel files with the following columns: |
|
- **Full Name**: Student name |
|
- **Laptop RAM**: RAM specification (e.g., "8GB", "16 GB", "8192MB") |
|
- **Mobile RAM**: Mobile device RAM |
|
- **Laptop Operating System**: OS name |
|
- **Mobile Operating System**: Mobile OS name |
|
""") |
|
|
|
|
|
required_columns = ["Full Name", "Laptop RAM", "Mobile RAM"] |
|
missing_columns = [col for col in required_columns if col not in df.columns] |
|
|
|
if missing_columns: |
|
st.error(f"Missing required columns: {missing_columns}") |
|
st.info("Please ensure your Excel file contains the required columns.") |
|
st.stop() |
|
|
|
|
|
df = df.copy() |
|
df["Full Name"] = df["Full Name"].astype(str).str.strip() |
|
|
|
|
|
with st.sidebar: |
|
st.header("๐ Filters & Info") |
|
|
|
|
|
performance_filter = st.multiselect( |
|
"Filter by Performance Tier:", |
|
["Ultra Low", "Low", "Moderate-Low", "Moderate", "Good", "High", "Ultra High", "Unknown"], |
|
default=["Ultra Low", "Low", "Moderate-Low", "Moderate", "Good", "High", "Ultra High", "Unknown"] |
|
) |
|
|
|
|
|
st.subheader("Model Categories") |
|
show_categories = st.multiselect( |
|
"Show specific categories:", |
|
["general", "code", "chat", "reasoning", "multimodal"], |
|
default=["general", "code", "chat"] |
|
) |
|
|
|
st.markdown("---") |
|
st.markdown("### ๐ Quick Stats") |
|
st.metric("Total Students", len(df)) |
|
st.metric("Popular Models", "150+") |
|
|
|
|
|
avg_laptop_ram = df["Laptop RAM"].apply(extract_numeric_ram).mean() |
|
avg_mobile_ram = df["Mobile RAM"].apply(extract_numeric_ram).mean() |
|
|
|
if not pd.isna(avg_laptop_ram): |
|
st.metric("Avg Laptop RAM", f"{avg_laptop_ram:.1f} GB") |
|
if not pd.isna(avg_mobile_ram): |
|
st.metric("Avg Mobile RAM", f"{avg_mobile_ram:.1f} GB") |
|
|
|
|
|
st.subheader("๐ค Individual Student Analysis") |
|
|
|
|
|
user_options = prepare_user_options(df) |
|
|
|
selected_user = st.selectbox( |
|
"Choose a student:", |
|
options=user_options, |
|
index=0 |
|
) |
|
|
|
if selected_user and selected_user != "Select a student...": |
|
|
|
user_data_mask = df["Full Name"].astype(str).str.strip() == selected_user |
|
if user_data_mask.any(): |
|
user_data = df[user_data_mask].iloc[0] |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
|
|
with col1: |
|
st.markdown("### ๐ป Laptop Configuration") |
|
laptop_os_icon, laptop_os_name = get_os_info(user_data.get('Laptop Operating System')) |
|
laptop_ram = user_data.get('Laptop RAM', 'Not specified') |
|
laptop_rec, laptop_tier, laptop_info, laptop_models = recommend_llm(laptop_ram) |
|
laptop_ram_gb = extract_numeric_ram(laptop_ram) or 0 |
|
|
|
st.markdown(f"**OS:** {laptop_os_icon} {laptop_os_name}") |
|
st.markdown(f"**RAM:** {laptop_ram}") |
|
st.markdown(f"**Performance Tier:** {laptop_tier}") |
|
|
|
st.success(f"**๐ก Recommendation:** {laptop_rec}") |
|
st.info(f"**โน๏ธ Notes:** {laptop_info}") |
|
|
|
|
|
if laptop_models: |
|
filtered_models = {k: v for k, v in laptop_models.items() if k in show_categories} |
|
display_model_categories(filtered_models, laptop_ram_gb) |
|
|
|
with col2: |
|
st.markdown("### ๐ฑ Mobile Configuration") |
|
mobile_os_icon, mobile_os_name = get_os_info(user_data.get('Mobile Operating System')) |
|
mobile_ram = user_data.get('Mobile RAM', 'Not specified') |
|
mobile_rec, mobile_tier, mobile_info, mobile_models = recommend_llm(mobile_ram) |
|
mobile_ram_gb = extract_numeric_ram(mobile_ram) or 0 |
|
|
|
st.markdown(f"**OS:** {mobile_os_icon} {mobile_os_name}") |
|
st.markdown(f"**RAM:** {mobile_ram}") |
|
st.markdown(f"**Performance Tier:** {mobile_tier}") |
|
|
|
st.success(f"**๐ก Recommendation:** {mobile_rec}") |
|
st.info(f"**โน๏ธ Notes:** {mobile_info}") |
|
|
|
|
|
if mobile_models: |
|
filtered_models = {k: v for k, v in mobile_models.items() if k in show_categories} |
|
display_model_categories(filtered_models, mobile_ram_gb) |
|
|
|
|
|
st.markdown("---") |
|
st.header("๐ Batch Analysis & Insights") |
|
|
|
|
|
df_display = df[["Full Name", "Laptop RAM", "Mobile RAM"]].copy() |
|
|
|
|
|
laptop_recommendations = df["Laptop RAM"].apply(lambda x: recommend_llm(x)[0]) |
|
mobile_recommendations = df["Mobile RAM"].apply(lambda x: recommend_llm(x)[0]) |
|
laptop_tiers = df["Laptop RAM"].apply(lambda x: recommend_llm(x)[1]) |
|
mobile_tiers = df["Mobile RAM"].apply(lambda x: recommend_llm(x)[1]) |
|
|
|
df_display["Laptop LLM"] = laptop_recommendations |
|
df_display["Mobile LLM"] = mobile_recommendations |
|
df_display["Laptop Tier"] = laptop_tiers |
|
df_display["Mobile Tier"] = mobile_tiers |
|
|
|
|
|
mask = (laptop_tiers.isin(performance_filter) | mobile_tiers.isin(performance_filter)) |
|
df_filtered = df_display[mask] |
|
|
|
|
|
st.subheader(f"๐ Student Recommendations ({len(df_filtered)} students)") |
|
st.dataframe( |
|
df_filtered, |
|
use_container_width=True, |
|
column_config={ |
|
"Full Name": st.column_config.TextColumn("Student Name", width="medium"), |
|
"Laptop RAM": st.column_config.TextColumn("Laptop RAM", width="small"), |
|
"Mobile RAM": st.column_config.TextColumn("Mobile RAM", width="small"), |
|
"Laptop LLM": st.column_config.TextColumn("Laptop Recommendation", width="large"), |
|
"Mobile LLM": st.column_config.TextColumn("Mobile Recommendation", width="large"), |
|
"Laptop Tier": st.column_config.TextColumn("L-Tier", width="small"), |
|
"Mobile Tier": st.column_config.TextColumn("M-Tier", width="small"), |
|
} |
|
) |
|
|
|
|
|
if len(df) > 1: |
|
st.subheader("๐ RAM Distribution Analysis") |
|
fig = create_performance_chart(df) |
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
|
|
st.subheader("๐ฏ Performance Tier Summary") |
|
tier_col1, tier_col2 = st.columns(2) |
|
|
|
with tier_col1: |
|
st.markdown("**Laptop Performance Tiers:**") |
|
laptop_tier_counts = laptop_tiers.value_counts() |
|
for tier, count in laptop_tier_counts.items(): |
|
percentage = (count / len(laptop_tiers)) * 100 |
|
st.write(f"โข {tier}: {count} students ({percentage:.1f}%)") |
|
|
|
with tier_col2: |
|
st.markdown("**Mobile Performance Tiers:**") |
|
mobile_tier_counts = mobile_tiers.value_counts() |
|
for tier, count in mobile_tier_counts.items(): |
|
percentage = (count / len(mobile_tier_counts)) * 100 |
|
st.write(f"โข {tier}: {count} students ({percentage:.1f}%)") |
|
|
|
|
|
st.markdown("---") |
|
st.header("๐ Popular Model Explorer") |
|
|
|
explorer_col1, explorer_col2 = st.columns(2) |
|
|
|
with explorer_col1: |
|
selected_ram_range = st.selectbox( |
|
"Select RAM range to explore models:", |
|
["โค2GB (Ultra Low)", "3-4GB (Low)", "5-6GB (Moderate-Low)", |
|
"7-8GB (Moderate)", "9-16GB (Good)", "17-32GB (High)", ">32GB (Ultra High)"] |
|
) |
|
|
|
with explorer_col2: |
|
selected_category = st.selectbox( |
|
"Select model category:", |
|
["general", "code", "chat", "reasoning", "multimodal"] |
|
) |
|
|
|
|
|
ram_mapping = { |
|
"โค2GB (Ultra Low)": "ultra_low", |
|
"3-4GB (Low)": "low", |
|
"5-6GB (Moderate-Low)": "moderate_low", |
|
"7-8GB (Moderate)": "moderate", |
|
"9-16GB (Good)": "good", |
|
"17-32GB (High)": "high", |
|
">32GB (Ultra High)": "ultra_high" |
|
} |
|
|
|
selected_ram_key = ram_mapping[selected_ram_range] |
|
if selected_ram_key in LLM_DATABASE and selected_category in LLM_DATABASE[selected_ram_key]: |
|
models = LLM_DATABASE[selected_ram_key][selected_category] |
|
|
|
st.subheader(f"๐ฏ {selected_category.title()} Models for {selected_ram_range}") |
|
|
|
|
|
for model in models: |
|
with st.container(): |
|
col1, col2, col3 = st.columns([3, 1, 3]) |
|
with col1: |
|
st.markdown(f"### {model['name']}") |
|
with col2: |
|
st.markdown(f"**{model['size']}**") |
|
st.caption("Download Size") |
|
with col3: |
|
st.markdown(f"*{model['description']}*") |
|
|
|
if "Llama" in model['name']: |
|
st.caption("๐ Available on Hugging Face & Ollama") |
|
elif "Mistral" in model['name']: |
|
st.caption("๐ Available on Hugging Face & Mistral AI") |
|
elif "Gemma" in model['name']: |
|
st.caption("๐ Available on Hugging Face & Google") |
|
else: |
|
st.caption("๐ Available on Hugging Face") |
|
st.markdown("---") |
|
else: |
|
st.info(f"No {selected_category} models available for {selected_ram_range}") |
|
|
|
|
|
with st.expander("๐ Model Guide & Download Information"): |
|
st.markdown(""" |
|
## ๐ Popular Models by Category |
|
|
|
### ๐ฏ **General Purpose Champions** |
|
- **Llama-2 Series**: Meta's flagship models (7B, 13B, 70B) |
|
- **Mistral Series**: Excellent efficiency and performance |
|
- **Gemma**: Google's efficient models (2B, 7B) |
|
- **Phi**: Microsoft's compact powerhouses |
|
|
|
### ๐ป **Code Specialists** |
|
- **CodeLlama**: Meta's dedicated coding models |
|
- **StarCoder**: BigCode's programming experts |
|
- **WizardCoder**: Enhanced coding capabilities |
|
- **DeepSeek-Coder**: Chinese tech giant's coder |
|
|
|
### ๐ฌ **Chat Optimized** |
|
- **Vicuna**: UC Berkeley's ChatGPT alternative |
|
- **Zephyr**: HuggingFace's chat specialist |
|
- **OpenChat**: High-quality conversation models |
|
- **Neural-Chat**: Intel-optimized chat models |
|
|
|
### ๐งฎ **Reasoning Masters** |
|
- **WizardMath**: Mathematical problem solving |
|
- **MetaMath**: Advanced arithmetic reasoning |
|
- **Orca-2**: Microsoft's reasoning specialist |
|
- **Goat**: Specialized arithmetic model |
|
|
|
### ๐๏ธ **Multimodal Models** |
|
- **LLaVA**: Large Language and Vision Assistant |
|
- **MiniGPT-4**: Multimodal conversational AI |
|
|
|
## ๐พ Download Size Reference |
|
|
|
| Model Size | FP16 | 8-bit | 4-bit | Use Case | |
|
|------------|------|-------|-------|----------| |
|
| **1-3B** | 2-6GB | 1-3GB | 0.5-1.5GB | Mobile, Edge | |
|
| **7B** | 13GB | 7GB | 3.5GB | Desktop, Laptop | |
|
| **13B** | 26GB | 13GB | 7GB | Workstation | |
|
| **30-34B** | 60GB | 30GB | 15GB | Server, Cloud | |
|
| **70B** | 140GB | 70GB | 35GB | High-end Server | |
|
|
|
## ๐ ๏ธ Where to Download |
|
|
|
### **Primary Sources** |
|
- **๐ค Hugging Face**: Largest repository with 400,000+ models |
|
- **๐ฆ Ollama**: Simple CLI tool for local deployment |
|
- **๐ฆ LM Studio**: User-friendly GUI for model management |
|
|
|
### **Quantized Formats** |
|
- **GGUF**: Best for CPU inference (llama.cpp) |
|
- **GPTQ**: GPU-optimized quantization |
|
- **AWQ**: Advanced weight quantization |
|
|
|
### **Download Tips** |
|
- Use git lfs for large models from Hugging Face |
|
- Consider bandwidth and storage before downloading |
|
- Start with 4-bit quantized versions for testing |
|
- Use ollama pull model_name for easiest setup |
|
|
|
## ๐ง Optimization Strategies |
|
|
|
### **Memory Reduction** |
|
- **4-bit quantization**: 75% memory reduction |
|
- **8-bit quantization**: 50% memory reduction |
|
- **CPU offloading**: Use system RAM for overflow |
|
|
|
### **Speed Optimization** |
|
- **GPU acceleration**: CUDA, ROCm, Metal |
|
- **Batch processing**: Process multiple requests |
|
- **Context caching**: Reuse computations |
|
""") |
|
|
|
|
|
st.markdown("---") |
|
st.markdown(""" |
|
### ๐ Essential Download & Deployment Tools |
|
**๐ฆ Easy Model Deployment:** |
|
- [**Ollama**](https://ollama.ai/) โ curl -fsSL https://ollama.ai/install.sh | sh |
|
- [**LM Studio**](https://lmstudio.ai/) โ Drag-and-drop GUI for running models locally |
|
- [**GPT4All**](https://gpt4all.io/) โ Cross-platform desktop app for local LLMs |
|
**๐ค Model Repositories:** |
|
- [**Hugging Face Hub**](https://huggingface.co/models) โ Filter by model size, task, and license |
|
- [**TheBloke's Quantizations**](https://huggingface.co/TheBloke) โ Pre-quantized models in GGUF/GPTQ format |
|
- [**Awesome LLM**](https://github.com/Hannibal046/Awesome-LLMs) โ Curated list of models and resources |
|
--- |
|
""") |