Spaces:

akhaliq
/

anycoder

Running

App Files Files Community

akhaliq HF Staff commited on 15 days ago

Commit

c2c7255

1 Parent(s): ee44e03

add inference provider code

Browse files

Files changed (2) hide show

anycoder_app/deploy.py +396 -6
anycoder_app/ui.py +56 -5

anycoder_app/deploy.py CHANGED Viewed

@@ -2030,9 +2030,301 @@ def import_space_from_hf(space_id: str) -> Tuple[str, str, str, str]:
     return status, code, language, space_url
-def import_model_from_hf(model_id: str) -> Tuple[str, str, str, str]:
     """
     Import a HuggingFace model by ID and extract code snippet.
     Returns: (status, code, language, model_url)
     """
@@ -2042,13 +2334,111 @@ def import_model_from_hf(model_id: str) -> Tuple[str, str, str, str]:
     # Build model URL
     model_url = f"https://huggingface.co/{model_id}"
-    # Use existing import_repo_to_app function
-    status, code, _ = import_repo_to_app(model_url)
-    # Determine language - default to python for model imports
-    language = "gradio"  # Default framework for model demos
-    return status, code, language, model_url
 def import_repo_to_app(url: str, framework: str = "Gradio") -> Tuple[str, str, str]:

     return status, code, language, space_url
+def _generate_inference_code_template(model_id: str, pipeline_tag: Optional[str], has_inference_providers: bool) -> Optional[str]:
+    """
+    Generate inference provider code template based on model's pipeline tag.
+    Args:
+        model_id: The HuggingFace model ID
+        pipeline_tag: The model's pipeline tag (e.g., "text-generation", "text-to-image")
+        has_inference_providers: Whether the model has inference providers available
+    Returns:
+        Generated code snippet or None
+    """
+    if not has_inference_providers:
+        return None
+    # Map pipeline tags to code templates based on HuggingFace Inference Providers docs
+    # https://huggingface.co/docs/inference-providers
+    # Chat Completion / Text Generation models
+    if pipeline_tag in ["text-generation", "conversational"]:
+        return f'''import os
+from huggingface_hub import InferenceClient
+client = InferenceClient(
+    api_key=os.environ["HF_TOKEN"],
+)
+completion = client.chat.completions.create(
+    model="{model_id}",
+    messages=[
+        {{
+            "role": "user",
+            "content": "What is the capital of France?"
+        }}
+    ],
+)
+print(completion.choices[0].message)'''
+    # Vision-Language Models (Image-Text to Text)
+    elif pipeline_tag in ["image-text-to-text", "visual-question-answering"]:
+        return f'''import os
+from huggingface_hub import InferenceClient
+client = InferenceClient(
+    api_key=os.environ["HF_TOKEN"],
+)
+completion = client.chat.completions.create(
+    model="{model_id}",
+    messages=[
+        {{
+            "role": "user",
+            "content": [
+                {{
+                    "type": "text",
+                    "text": "Describe this image in one sentence."
+                }},
+                {{
+                    "type": "image_url",
+                    "image_url": {{
+                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+                    }}
+                }}
+            ]
+        }}
+    ],
+)
+print(completion.choices[0].message)'''
+    # Text to Image models
+    elif pipeline_tag == "text-to-image":
+        return f'''import os
+from huggingface_hub import InferenceClient
+client = InferenceClient(
+    api_key=os.environ["HF_TOKEN"],
+)
+# output is a PIL.Image object
+image = client.text_to_image(
+    "Astronaut riding a horse",
+    model="{model_id}",
+)
+# Save the image
+image.save("output.png")'''
+    # Text to Video models
+    elif pipeline_tag == "text-to-video":
+        return f'''import os
+from huggingface_hub import InferenceClient
+client = InferenceClient(
+    api_key=os.environ["HF_TOKEN"],
+)
+video = client.text_to_video(
+    "A young man walking on the street",
+    model="{model_id}",
+)
+# Save the video
+with open("output.mp4", "wb") as f:
+    f.write(video)'''
+    # Image to Image models
+    elif pipeline_tag == "image-to-image":
+        return f'''import os
+from huggingface_hub import InferenceClient
+from PIL import Image
+client = InferenceClient(
+    api_key=os.environ["HF_TOKEN"],
+)
+# Load input image
+input_image = Image.open("input.jpg")
+# output is a PIL.Image object
+output_image = client.image_to_image(
+    input_image,
+    model="{model_id}",
+    prompt="Make it more vibrant"
+)
+# Save the output
+output_image.save("output.png")'''
+    # Text to Speech models
+    elif pipeline_tag == "text-to-speech":
+        return f'''import os
+from huggingface_hub import InferenceClient
+client = InferenceClient(
+    api_key=os.environ["HF_TOKEN"],
+)
+audio = client.text_to_speech(
+    "Hello world",
+    model="{model_id}",
+)
+# Save the audio
+with open("output.mp3", "wb") as f:
+    f.write(audio)'''
+    # Automatic Speech Recognition
+    elif pipeline_tag == "automatic-speech-recognition":
+        return f'''import os
+from huggingface_hub import InferenceClient
+client = InferenceClient(
+    api_key=os.environ["HF_TOKEN"],
+)
+with open("audio.mp3", "rb") as f:
+    audio_data = f.read()
+result = client.automatic_speech_recognition(
+    audio_data,
+    model="{model_id}",
+)
+print(result)'''
+    # Feature Extraction / Embeddings
+    elif pipeline_tag == "feature-extraction":
+        return f'''import os
+from huggingface_hub import InferenceClient
+client = InferenceClient(
+    api_key=os.environ["HF_TOKEN"],
+)
+embeddings = client.feature_extraction(
+    "Hello world",
+    model="{model_id}",
+)
+print(embeddings)'''
+    # Default: try chat completion for conversational models
+    else:
+        # If it has inference providers but unknown task, try chat completion
+        return f'''import os
+from huggingface_hub import InferenceClient
+client = InferenceClient(
+    api_key=os.environ["HF_TOKEN"],
+)
+completion = client.chat.completions.create(
+    model="{model_id}",
+    messages=[
+        {{
+            "role": "user",
+            "content": "Hello, how are you?"
+        }}
+    ],
+)
+print(completion.choices[0].message)'''
+def _fetch_inference_provider_code(model_id: str) -> Optional[str]:
+    """
+    Fetch inference provider information from HuggingFace API and generate code template.
+    Args:
+        model_id: The HuggingFace model ID (e.g., "moonshotai/Kimi-K2-Thinking")
+    Returns:
+        The code snippet if model has inference providers, None otherwise
+    """
+    try:
+        # Fetch trending models data from HuggingFace API
+        response = requests.get("https://huggingface.co/api/trending", timeout=10)
+        if response.status_code != 200:
+            print(f"Failed to fetch trending models API: HTTP {response.status_code}")
+            return None
+        trending_data = response.json()
+        recently_trending = trending_data.get("recentlyTrending", [])
+        # Find the specific model in trending data
+        model_info = None
+        for item in recently_trending:
+            repo_data = item.get("repoData", {})
+            if repo_data.get("id") == model_id:
+                model_info = repo_data
+                break
+        # If not found in trending, try to get model info directly from API
+        if not model_info:
+            try:
+                api = HfApi()
+                info = api.model_info(model_id)
+                pipeline_tag = getattr(info, "pipeline_tag", None)
+                # Check if model has inference providers via model info
+                # Note: The direct API might not have availableInferenceProviders
+                # In this case, we'll generate a generic template
+                has_inference = pipeline_tag is not None
+                if has_inference:
+                    return _generate_inference_code_template(model_id, pipeline_tag, True)
+            except Exception as e:
+                print(f"Could not fetch model info for {model_id}: {e}")
+                return None
+        else:
+            # Extract pipeline tag and inference providers info
+            pipeline_tag = model_info.get("pipeline_tag")
+            inference_providers = model_info.get("availableInferenceProviders", [])
+            has_inference_providers = len(inference_providers) > 0
+            # Generate code template based on pipeline tag
+            return _generate_inference_code_template(model_id, pipeline_tag, has_inference_providers)
+        return None
+    except Exception as e:
+        print(f"Error fetching inference provider code: {e}")
+        return None
+# Global storage for code alternatives (used when both inference and local code are available)
+_model_code_alternatives = {}
+def store_model_code_alternatives(model_id: str, inference_code: Optional[str], local_code: Optional[str]):
+    """Store both code alternatives for a model for later retrieval."""
+    global _model_code_alternatives
+    _model_code_alternatives[model_id] = {
+        'inference': inference_code,
+        'local': local_code
+    }
+def get_model_code_alternatives(model_id: str) -> Dict[str, Optional[str]]:
+    """Retrieve stored code alternatives for a model."""
+    global _model_code_alternatives
+    return _model_code_alternatives.get(model_id, {'inference': None, 'local': None})
+def import_model_from_hf(model_id: str, prefer_local: bool = False) -> Tuple[str, str, str, str]:
     """
     Import a HuggingFace model by ID and extract code snippet.
+    Tries to fetch both inference provider code and transformers/diffusers code from README.
+    Args:
+        model_id: The HuggingFace model ID
+        prefer_local: If True and both options available, return local code instead of inference code
     Returns: (status, code, language, model_url)
     """
     # Build model URL
     model_url = f"https://huggingface.co/{model_id}"
+    # Try to fetch both types of code
+    inference_code = _fetch_inference_provider_code(model_id)
+    # Also try to extract transformers/diffusers code from README
+    readme_status, readme_code, _ = import_repo_to_app(model_url)
+    has_readme_code = readme_code and ("transformers" in readme_code or "diffusers" in readme_code)
+    # Store both alternatives for later switching
+    store_model_code_alternatives(model_id, inference_code, readme_code if has_readme_code else None)
+    # Build status message and code based on what's available
+    if inference_code and has_readme_code:
+        # Both available - provide choice
+        if prefer_local:
+            status = f"""✅ **Found multiple code options for `{model_id}`**
+**Currently showing:** Local Transformers/Diffusers Code (Option 2) 💻
+**Option 1: Inference Provider Code (Serverless)** ⚡
+- Uses HuggingFace Inference API (serverless, pay-per-use)
+- No GPU required, instant startup
+- Requires `HF_TOKEN` environment variable
+**Option 2: Local Transformers/Diffusers Code (Currently Active)** 💻
+- Runs locally on your hardware
+- Requires GPU for optimal performance
+- Full control over model parameters
+---
+To switch to inference provider code, click the button below or ask: "Show me the inference provider code instead"
+"""
+            code = readme_code
+        else:
+            status = f"""✅ **Found multiple code options for `{model_id}`**
+**Currently showing:** Inference Provider Code (Option 1) ⚡ *Recommended*
+**Option 1: Inference Provider Code (Serverless - Currently Active)** ⚡
+- Uses HuggingFace Inference API (serverless, pay-per-use)
+- No GPU required, instant startup
+- Requires `HF_TOKEN` environment variable
+**Option 2: Local Transformers/Diffusers Code** 💻
+- Runs locally on your hardware
+- Requires GPU for optimal performance
+- Full control over model parameters
+---
+To switch to local transformers/diffusers code, click the button below or ask: "Show me the local transformers code instead"
+"""
+            code = inference_code
+        language = "gradio"
+        return status, code, language, model_url
+    elif inference_code:
+        # Only inference provider code available
+        status = f"✅ Imported inference provider code for `{model_id}` (serverless inference)"
+        language = "gradio"
+        return status, inference_code, language, model_url
+    elif has_readme_code:
+        # Only README code available
+        status = f"✅ Imported transformers/diffusers code from README for `{model_id}` (local inference)"
+        language = "gradio"
+        return status, readme_code, language, model_url
+    else:
+        # No code found
+        status = f"⚠️ No inference provider or transformers/diffusers code found for `{model_id}`"
+        return status, "", "python", model_url
+def switch_model_code_type(model_id: str, current_code: str) -> Tuple[str, str]:
+    """
+    Switch between inference provider code and local transformers/diffusers code.
+    Args:
+        model_id: The model ID
+        current_code: The currently displayed code
+    Returns: (status_message, new_code)
+    """
+    alternatives = get_model_code_alternatives(model_id)
+    inference_code = alternatives['inference']
+    local_code = alternatives['local']
+    if not inference_code and not local_code:
+        return "⚠️ No alternative code available for this model.", current_code
+    # Determine which code is currently shown
+    is_showing_inference = current_code == inference_code
+    if is_showing_inference and local_code:
+        # Switch to local code
+        status = f"✅ Switched to **Local Transformers/Diffusers Code** for `{model_id}` 💻\n\nThis code runs locally on your hardware."
+        return status, local_code
+    elif not is_showing_inference and inference_code:
+        # Switch to inference provider code
+        status = f"✅ Switched to **Inference Provider Code** for `{model_id}` ⚡\n\nThis code uses serverless HuggingFace Inference API."
+        return status, inference_code
+    else:
+        return "⚠️ Alternative code type not available for this model.", current_code
 def import_repo_to_app(url: str, framework: str = "Gradio") -> Tuple[str, str, str]:

anycoder_app/ui.py CHANGED Viewed

@@ -45,7 +45,8 @@ from .deploy import (
     _parse_repo_or_model_url, load_project_from_url, check_hf_space_url,
     import_repo_to_app, extract_import_statements,
     generate_requirements_txt_with_llm, prettify_comfyui_json_for_html,
-    get_trending_models, import_model_from_hf, get_trending_spaces, import_space_from_hf
 )
 from .agent import (
     agent_generate_with_questions, agent_process_answers_and_generate
@@ -113,6 +114,7 @@ with gr.Blocks(
     models_first_change = gr.State(True)
     spaces_first_change = gr.State(True)
     agent_mode_enabled = gr.State(False)
     agent_conversation_state = gr.State({
         "stage": "initial",  # initial, waiting_for_answers, generating
         "original_query": "",
@@ -155,6 +157,7 @@ with gr.Blocks(
             visible=True
         )
         trending_models_status = gr.Markdown(visible=False)
         # Trending HuggingFace Spaces section
         trending_spaces_dropdown = gr.Dropdown(
@@ -1934,7 +1937,9 @@ CMD ["streamlit", "run", "streamlit_app.py", "--server.port=7860", "--server.add
                 hist,  # history
                 history_to_chatbot_messages(hist),  # history_output
                 history_to_chatbot_messages(hist),  # chat_history
-                False  # Set first_change to False after first trigger
             ]
         if not model_id or model_id == "":
@@ -1945,7 +1950,9 @@ CMD ["streamlit", "run", "streamlit_app.py", "--server.port=7860", "--server.add
                 hist,  # history
                 history_to_chatbot_messages(hist),  # history_output
                 history_to_chatbot_messages(hist),  # chat_history
-                False  # Keep first_change as False
             ]
         # Import the model
@@ -1957,6 +1964,9 @@ CMD ["streamlit", "run", "streamlit_app.py", "--server.port=7860", "--server.add
         # Determine code language for display
         code_lang = "python"
         return [
             gr.update(value=status, visible=True),  # status
             gr.update(value=code, language=code_lang),  # code_output
@@ -1964,7 +1974,9 @@ CMD ["streamlit", "run", "streamlit_app.py", "--server.port=7860", "--server.add
             loaded_history,  # history
             history_to_chatbot_messages(loaded_history),  # history_output
             history_to_chatbot_messages(loaded_history),  # chat_history
-            False  # Keep first_change as False
         ]
     trending_models_dropdown.change(
@@ -1977,7 +1989,46 @@ CMD ["streamlit", "run", "streamlit_app.py", "--server.port=7860", "--server.add
             history,
             history_output,
             chat_history,
-            models_first_change
         ]
     )

     _parse_repo_or_model_url, load_project_from_url, check_hf_space_url,
     import_repo_to_app, extract_import_statements,
     generate_requirements_txt_with_llm, prettify_comfyui_json_for_html,
+    get_trending_models, import_model_from_hf, get_trending_spaces, import_space_from_hf,
+    switch_model_code_type
 )
 from .agent import (
     agent_generate_with_questions, agent_process_answers_and_generate
     models_first_change = gr.State(True)
     spaces_first_change = gr.State(True)
     agent_mode_enabled = gr.State(False)
+    current_trending_model_id = gr.State("")  # Track current trending model for code switching
     agent_conversation_state = gr.State({
         "stage": "initial",  # initial, waiting_for_answers, generating
         "original_query": "",
             visible=True
         )
         trending_models_status = gr.Markdown(visible=False)
+        switch_model_code_btn = gr.Button("🔄 Switch Code Type", visible=False, size="sm", variant="secondary")
         # Trending HuggingFace Spaces section
         trending_spaces_dropdown = gr.Dropdown(
                 hist,  # history
                 history_to_chatbot_messages(hist),  # history_output
                 history_to_chatbot_messages(hist),  # chat_history
+                False,  # Set first_change to False after first trigger
+                gr.update(visible=False),  # switch_model_code_btn
+                ""  # current_trending_model_id
             ]
         if not model_id or model_id == "":
                 hist,  # history
                 history_to_chatbot_messages(hist),  # history_output
                 history_to_chatbot_messages(hist),  # chat_history
+                False,  # Keep first_change as False
+                gr.update(visible=False),  # switch_model_code_btn
+                ""  # current_trending_model_id
             ]
         # Import the model
         # Determine code language for display
         code_lang = "python"
+        # Check if button should be visible (both code types available)
+        show_switch_btn = "Found multiple code options" in status
         return [
             gr.update(value=status, visible=True),  # status
             gr.update(value=code, language=code_lang),  # code_output
             loaded_history,  # history
             history_to_chatbot_messages(loaded_history),  # history_output
             history_to_chatbot_messages(loaded_history),  # chat_history
+            False,  # Keep first_change as False
+            gr.update(visible=show_switch_btn),  # switch_model_code_btn
+            model_id  # current_trending_model_id
         ]
     trending_models_dropdown.change(
             history,
             history_output,
             chat_history,
+            models_first_change,
+            switch_model_code_btn,
+            current_trending_model_id
+        ]
+    )
+    # Handle switching between inference provider and local code
+    def handle_switch_model_code(model_id, current_code, hist):
+        """Switch between inference provider and local transformers/diffusers code"""
+        if not model_id:
+            return [
+                gr.update(),  # status
+                gr.update(),  # code_output
+                hist,  # history
+                history_to_chatbot_messages(hist),  # history_output
+                history_to_chatbot_messages(hist)  # chat_history
+            ]
+        status_msg, new_code = switch_model_code_type(model_id, current_code)
+        # Update history with switch message
+        switch_history = hist + [[f"Switched code type for {model_id}", new_code]]
+        return [
+            gr.update(value=status_msg, visible=True),  # status
+            gr.update(value=new_code, language="python"),  # code_output
+            switch_history,  # history
+            history_to_chatbot_messages(switch_history),  # history_output
+            history_to_chatbot_messages(switch_history)  # chat_history
+        ]
+    switch_model_code_btn.click(
+        handle_switch_model_code,
+        inputs=[current_trending_model_id, code_output, history],
+        outputs=[
+            trending_models_status,
+            code_output,
+            history,
+            history_output,
+            chat_history
         ]
     )