Spaces:
Sleeping
Sleeping
| """ | |
| FairFate Embeddings API - Qwen3-Embedding-4B | |
| Multilingual semantic embeddings for tabletop RPG product classification | |
| """ | |
| import gradio as gr | |
| from sentence_transformers import SentenceTransformer | |
| import numpy as np | |
| from typing import List, Union | |
| import spaces # ZeroGPU decorator | |
| # Load model once at startup | |
| # Using Qwen3-Embedding-4B for 2560 native dimensions (truncate to 1536 for production) | |
| # Qwen3-4B is optimal for 1536 dims: 60% retention (vs 42.9% for GTE-Qwen2-7B) | |
| MODEL_NAME = "Qwen/Qwen3-Embedding-4B" | |
| print(f" Loading model: {MODEL_NAME}") | |
| model = SentenceTransformer(MODEL_NAME, trust_remote_code=True) | |
| print(f" Model loaded successfully") | |
| print(f" Native Dimensions: {model.get_sentence_embedding_dimension()}") | |
| print(f" Max Seq Length: {model.max_seq_length}") | |
| print(f" Matryoshka Support: Yes (truncate to any dimension ≤ {model.get_sentence_embedding_dimension()})") | |
| # Optional: Add instruction prefix for RPG domain (improves accuracy by 1-5%) | |
| INSTRUCTION_PREFIX = "Represent this tabletop RPG product for semantic search: " | |
| # ZeroGPU: allocate GPU for 60 seconds | |
| def generate_embeddings( | |
| texts: Union[str, List[str]], | |
| use_instruction: bool = True, | |
| output_dimensions: int = 1536 | |
| ) -> List[List[float]]: | |
| """ | |
| Generate embeddings for text(s) with matryoshka truncation | |
| Args: | |
| texts: Single string or list of strings | |
| use_instruction: Whether to prepend instruction prefix (recommended) | |
| output_dimensions: Output embedding size (32-2560, default 1536 for production) | |
| Returns: | |
| List of embedding vectors (L2 normalized) | |
| """ | |
| # Handle single string | |
| if isinstance(texts, str): | |
| texts = [texts] | |
| # Add instruction prefix if enabled (Qwen3-Embedding models are instruction-aware) | |
| if use_instruction: | |
| texts = [INSTRUCTION_PREFIX + text for text in texts] | |
| # Generate embeddings | |
| embeddings = model.encode( | |
| texts, | |
| convert_to_numpy=True, | |
| normalize_embeddings=True, # L2 normalize for cosine similarity | |
| show_progress_bar=False, | |
| batch_size=32 | |
| ) | |
| # Get native dimensions | |
| native_dims = model.get_sentence_embedding_dimension() | |
| # Matryoshka truncation: Simply take first N dimensions | |
| # Qwen3-Embedding models support truncation to any dimension ≤ native_dims | |
| if output_dimensions != native_dims: | |
| if output_dimensions > native_dims: | |
| print(f"⚠️ Warning: Requested {output_dimensions} dims but model has {native_dims}. Using {native_dims}.") | |
| output_dimensions = native_dims | |
| embeddings = embeddings[:, :output_dimensions] | |
| # Convert to list for JSON serialization | |
| return embeddings.tolist() | |
| def batch_generate(texts_input: str, use_instruction: bool, output_dims: int): | |
| """ | |
| Gradio interface for batch embedding generation | |
| Expects newline-separated texts | |
| """ | |
| if not texts_input.strip(): | |
| return {"error": "Please provide at least one text"} | |
| texts = [t.strip() for t in texts_input.split('\n') if t.strip()] | |
| try: | |
| embeddings = generate_embeddings(texts, use_instruction, output_dims) | |
| return embeddings | |
| except Exception as e: | |
| return {"error": str(e)} | |
| def calculate_similarity(text1: str, text2: str, use_instruction: bool) -> dict: | |
| """ | |
| Calculate comprehensive similarity metrics between two texts | |
| Returns dict with all similarity metrics | |
| """ | |
| if not text1.strip() or not text2.strip(): | |
| return {"error": "Please provide both texts"} | |
| try: | |
| embeddings = generate_embeddings([text1, text2], use_instruction) | |
| # Calculate all similarity metrics | |
| emb1 = np.array(embeddings[0]) | |
| emb2 = np.array(embeddings[1]) | |
| # Cosine Similarity (for normalized vectors, just dot product) | |
| cosine = float(np.dot(emb1, emb2)) | |
| # Euclidean Distance | |
| euclidean_dist = float(np.linalg.norm(emb1 - emb2)) | |
| euclidean_sim = 1 / (1 + euclidean_dist) | |
| # Jaccard Similarity (min/max interpretation for continuous vectors) | |
| intersection = np.sum(np.minimum(np.abs(emb1), np.abs(emb2))) | |
| union = np.sum(np.maximum(np.abs(emb1), np.abs(emb2))) | |
| jaccard = float(intersection / union if union > 0 else 0) | |
| # Sorensen-Dice Coefficient | |
| intersection = np.sum(np.minimum(np.abs(emb1), np.abs(emb2))) | |
| sum_magnitudes = np.sum(np.abs(emb1)) + np.sum(np.abs(emb2)) | |
| sorensen_dice = float(2 * intersection / sum_magnitudes if sum_magnitudes > 0 else 0) | |
| # Manhattan Distance | |
| manhattan = float(np.sum(np.abs(emb1 - emb2))) | |
| # Pearson Correlation | |
| pearson = float(np.corrcoef(emb1, emb2)[0, 1]) | |
| return { | |
| 'cosine': cosine, | |
| 'euclidean_distance': euclidean_dist, | |
| 'euclidean_similarity': euclidean_sim, | |
| 'jaccard': jaccard, | |
| 'sorensen_dice': sorensen_dice, | |
| 'manhattan': manhattan, | |
| 'pearson': pearson | |
| } | |
| except Exception as e: | |
| return {"error": str(e)} | |
| # Create Gradio interface | |
| with gr.Blocks(title="FairFate Embeddings API - Qwen3", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown(""" | |
| # FairFate Embeddings API | |
| **Powered by Qwen3-Embedding-4B** - Advanced Multilingual Embedding Model | |
| - **100+ Languages** (English, Spanish, French, German, Chinese, Japanese, etc.) | |
| - **2560 Native Dimensions** (matryoshka truncation to 1536 for production) | |
| - **32K Context** (massive text support) | |
| - **Instruction-Aware** (optimized for RPG content) | |
| - **Matryoshka Support** (flexible 32-2560 dimensions) | |
| - **Optimal for 1536 dims** (60% dimension retention) | |
| Perfect for: Product classification, semantic search, recommendations, multilingual matching | |
| """) | |
| with gr.Tab("🔮 Generate Embeddings"): | |
| gr.Markdown(""" | |
| Generate semantic embeddings for product descriptions, titles, or any text. | |
| Enter one text per line for batch processing. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| input_text = gr.Textbox( | |
| label="Input Texts (one per line)", | |
| placeholder="Example:\nStorm King's Thunder - Epic D&D 5E adventure\nCurse of Strahd - Gothic horror campaign\nPathfinder 2E Core Rulebook", | |
| lines=8 | |
| ) | |
| use_inst = gr.Checkbox(label="Use instruction prefix (recommended for RPG content)", value=True) | |
| output_dims = gr.Slider( | |
| minimum=32, maximum=2560, value=1536, step=32, | |
| label="Output Dimensions (Production: 1536)" | |
| ) | |
| submit_btn = gr.Button("Generate Embeddings", variant="primary") | |
| with gr.Column(): | |
| output_json = gr.JSON(label="Results") | |
| # Register as API endpoint AND UI button | |
| # api_name parameter makes this callable via @gradio/client as "/batch_generate" | |
| submit_btn.click( | |
| batch_generate, | |
| inputs=[input_text, use_inst, output_dims], | |
| outputs=output_json, | |
| api_name="batch_generate" # Expose as /batch_generate API endpoint | |
| ) | |
| gr.Examples( | |
| examples=[ | |
| ["D&D 5E epic fantasy adventure with dragons and dungeons", True, 1536], | |
| ["Cyberpunk shadowrun detective noir campaign\nPathfinder 2E beginner box starter set\nCall of Cthulhu horror investigation", True, 1536], | |
| ], | |
| inputs=[input_text, use_inst, output_dims], | |
| ) | |
| with gr.Tab("Similarity Calculator"): | |
| gr.Markdown(""" | |
| **Comprehensive Similarity Analysis** - Compare two texts using multiple metrics: | |
| - **Cosine Similarity**: Angle between vectors (best for semantic meaning) | |
| - **Jaccard Similarity**: Intersection over union (set-like comparison) | |
| - **Sørensen-Dice**: Weighted intersection (emphasizes shared features) | |
| - **Euclidean Distance/Similarity**: Straight-line distance in vector space | |
| - **Manhattan Distance**: Grid-based distance (L1 norm) | |
| - **Pearson Correlation**: Linear relationship between vectors | |
| Perfect for duplicate detection, classification testing, and understanding product relationships! | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| text1 = gr.Textbox( | |
| label="First Text", | |
| placeholder="Storm King's Thunder - Giant-themed D&D adventure", | |
| lines=3 | |
| ) | |
| text2 = gr.Textbox( | |
| label="Second Text", | |
| placeholder="Princes of the Apocalypse - Elemental evil campaign", | |
| lines=3 | |
| ) | |
| use_inst_sim = gr.Checkbox(label="Use instruction prefix", value=True) | |
| calc_btn = gr.Button("Calculate Similarity", variant="primary") | |
| with gr.Column(): | |
| similarity_output = gr.JSON(label="Similarity Result") | |
| # Register as API endpoint AND UI button | |
| calc_btn.click( | |
| calculate_similarity, | |
| inputs=[text1, text2, use_inst_sim], | |
| outputs=similarity_output, | |
| api_name="calculate_similarity" # Expose as /calculate_similarity API endpoint | |
| ) | |
| gr.Examples( | |
| examples=[ | |
| ["D&D 5E fantasy adventure", "Dungeons and Dragons fifth edition module", True], | |
| ["Horror investigation mystery", "Comedy fantasy lighthearted fun", True], | |
| ["Pathfinder 2E rulebook", "D&D 5E Player's Handbook", True], | |
| ], | |
| inputs=[text1, text2, use_inst_sim], | |
| ) | |
| with gr.Tab("API Documentation"): | |
| gr.Markdown(""" | |
| ## Quick Start | |
| ### Python | |
| ```python | |
| import requests | |
| import numpy as np | |
| url = "https://YOUR_USERNAME-fairfate-embeddings.hf.space/api/predict" | |
| # Generate embeddings | |
| texts = [ | |
| "Storm King's Thunder - Epic D&D 5E adventure", | |
| "Curse of Strahd - Gothic horror campaign" | |
| ] | |
| response = requests.post( | |
| url, | |
| json={ | |
| "data": [texts, True, 1536], # [texts, use_instruction, dimensions] | |
| "fn_index": 0 # Index of generate_embeddings function | |
| } | |
| ) | |
| result = response.json() | |
| embeddings = result["data"][0] | |
| print(f"Generated {len(embeddings)} embeddings") | |
| print(f"Dimensions: {len(embeddings[0])}") # Should output 1536 | |
| ``` | |
| ### TypeScript/JavaScript | |
| ```typescript | |
| const url = 'https://YOUR_USERNAME-fairfate-embeddings.hf.space/api/predict'; | |
| const response = await fetch(url, { | |
| method: 'POST', | |
| headers: { 'Content-Type': 'application/json' }, | |
| body: JSON.stringify({ | |
| data: [ | |
| ["Your text here", "Another text"], | |
| true, // use_instruction | |
| 1536 // output_dimensions (production default) | |
| ], | |
| fn_index: 0 | |
| }) | |
| }); | |
| const result = await response.json(); | |
| const embeddings = result.data[0]; // Array of 1536-dim vectors | |
| ``` | |
| ### cURL | |
| ```bash | |
| curl -X POST \\ | |
| https://YOUR_USERNAME-fairfate-embeddings.hf.space/api/predict \\ | |
| -H "Content-Type: application/json" \\ | |
| -d '{ | |
| "data": [["Your text here"], true, 1536], | |
| "fn_index": 0 | |
| }' | |
| ``` | |
| ## Parameters | |
| | Parameter | Type | Default | Description | | |
| |-----------|------|---------|-------------| | |
| | `texts` | string[] | required | Array of texts to embed | | |
| | `use_instruction` | boolean | true | Add instruction prefix (improves accuracy) | | |
| | `output_dimensions` | number | 1536 | Output size (32-3584, production default: 1536) | | |
| ## Use Cases | |
| - **Product Classification**: Auto-tag by genre, system, theme | |
| - **Semantic Search**: Find by meaning, not keywords | |
| - **Recommendations**: "Similar products" | |
| - **Duplicate Detection**: Find similar listings | |
| - **Multilingual Matching**: Cross-language similarity | |
| ## Performance | |
| | Batch Size | GPU Throughput | CPU Throughput | | |
| |------------|----------------|----------------| | |
| | 1 | ~800/sec | ~80/sec | | |
| | 32 | ~4000/sec | ~250/sec | | |
| ## Supported Languages | |
| English, Spanish, French, German, Italian, Portuguese, Russian, Polish, Dutch, Czech, | |
| Chinese, Japanese, Korean, Arabic, Hebrew, Hindi, Thai, Vietnamese, Indonesian, | |
| Turkish, Swedish, Norwegian, Danish, Finnish, Greek, Romanian, Hungarian, and 80+ more! | |
| ## Citation | |
| ```bibtex | |
| @misc{qwen3-embedding-2025, | |
| title={Qwen3-Embedding: Multilingual Text Embedding Models}, | |
| author={Qwen Team, Alibaba Cloud}, | |
| year={2025}, | |
| url={https://github.com/QwenLM/Qwen3-Embedding} | |
| } | |
| ``` | |
| """) | |
| with gr.Tab("Model Info"): | |
| gr.Markdown(f""" | |
| ## Model Details | |
| - **Model:** {MODEL_NAME} | |
| - **Dimensions:** {model.get_sentence_embedding_dimension()} | |
| - **Max Sequence Length:** {model.max_seq_length} tokens | |
| - **Languages:** 100+ | |
| - **License:** Apache 2.0 | |
| - **Normalization:** L2 normalized (ready for cosine similarity) | |
| ## Advantages | |
| **Best Multilingual Performance** - Top tier on MTEB leaderboard | |
| **Massive Context** - 32K tokens (vs 512 for most models) | |
| **Instruction-Aware** - Can customize for specific domains | |
| **Flexible Dimensions** - 32 to 2560 dimensions (matryoshka truncation) | |
| **Code-Switching** - Handles mixed-language text | |
| **Production Optimized** - 60% retention at 1536 dims (best in class) | |
| ## Resources | |
| - [Model Card](https://huggingface.co/Qwen/Qwen3-Embedding-4B) | |
| - [Qwen3-Embedding GitHub](https://github.com/QwenLM/Qwen3-Embedding) | |
| - [Qwen Blog](https://qwenlm.github.io/) | |
| - [MTEB Leaderboard](https://huggingface.co/spaces/mteb/leaderboard) | |
| """) | |
| # Launch with API enabled | |
| if __name__ == "__main__": | |
| demo.launch() | |