Spaces:

gperdrizet
/

resumate

Configuration error

App Files Files Community

gperdrizet commited on Jul 14

Commit

78d9058

unverified ·

2 Parent(s): ffb69ab 3d53e4b

Merge pull request #11 from gperdrizet/dev

Browse files

Files changed (10) hide show

.github/workflows/python_ci.yml +1 -1
.gitignore +2 -1
README.md +3 -5
configuration.py +11 -11
functions/job_call.py +7 -8
functions/writer_agent.py +1 -1
inference_endpoints/deepseekR1-qwen-32B.py +0 -74
inference_endpoints/llama3-1-8B-instruct.py +0 -76
inference_endpoints/qwen2-5-coder-14B-instruct.py +0 -74
requirements.txt +0 -1

.github/workflows/python_ci.yml CHANGED Viewed

@@ -25,7 +25,7 @@ jobs:
     - name: Test with unittest
       env:
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-        MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
       run: |
         python -m unittest tests/test_gradio.py
         python -m unittest tests/test_linkedin_resume.py

     - name: Test with unittest
       env:
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
       run: |
         python -m unittest tests/test_gradio.py
         python -m unittest tests/test_linkedin_resume.py

.gitignore CHANGED Viewed

@@ -2,4 +2,5 @@ __pycache__
 .vscode
 .venv
 .env
-data

 .vscode
 .venv
 .env
+data
+inference_endopints

README.md CHANGED Viewed

@@ -2,7 +2,7 @@
 [![Python CI](https://github.com/gperdrizet/resumate/actions/workflows/python_ci.yml/badge.svg)](https://github.com/gperdrizet/resumate/actions/workflows/python_ci.yml)[![HuggingFace Space](https://github.com/gperdrizet/resumate/actions/workflows/publish_hf_space.yml/badge.svg)](https://github.com/gperdrizet/resumate/actions/workflows/publish_hf_space.yml)[![Codespaces Prebuilds](https://github.com/gperdrizet/resumate/actions/workflows/codespaces/create_codespaces_prebuilds/badge.svg)](https://github.com/gperdrizet/resumate/actions/workflows/codespaces/create_codespaces_prebuilds)
-Resumate is a simple web app that helps you generate a tailored resume for a specific job post. It collects your LinkedIn profile (PDF export), GitHub profile URL, and the job post text, then processes this information to help you create a resume that matches the job requirements.
 ## Features
@@ -20,8 +20,7 @@ Resumate is a simple web app that helps you generate a tailored resume for a spe
 2. **Add API keys as secrets**
    - In your fork, go to **Settings > Secrets and variables > Codespaces**.
-   - Add secrets named `OPENAI_API_KEY` and `MODAL_API_KEY` with your API keys as values.
-   - These will be available as environment variables in your Codespace.
 3. **Start a Codespace**
    - Click the "Code" button on your fork and select "Open with Codespaces".
@@ -48,8 +47,7 @@ Resumate is a simple web app that helps you generate a tailored resume for a spe
 4. **Set your API keys as environment variables:**
    Add your API keys to `.venv/bin/activate`:
     ```bash
-    export OPENAI_API_KEY=your_openai_api_key
-    export MODAL_API_KEY=your_modal_api_key
     ```
 5. **Activate the virtual environment:**
    ```bash

 [![Python CI](https://github.com/gperdrizet/resumate/actions/workflows/python_ci.yml/badge.svg)](https://github.com/gperdrizet/resumate/actions/workflows/python_ci.yml)[![HuggingFace Space](https://github.com/gperdrizet/resumate/actions/workflows/publish_hf_space.yml/badge.svg)](https://github.com/gperdrizet/resumate/actions/workflows/publish_hf_space.yml)[![Codespaces Prebuilds](https://github.com/gperdrizet/resumate/actions/workflows/codespaces/create_codespaces_prebuilds/badge.svg)](https://github.com/gperdrizet/resumate/actions/workflows/codespaces/create_codespaces_prebuilds)
+Resumate is a simple web app that helps you generate a tailored resume for a specific job post using the Antropic API. It collects your LinkedIn profile (PDF export), GitHub profile URL, and the job post text, then processes this information to help you create a resume that matches the job requirements.
 ## Features
 2. **Add API keys as secrets**
    - In your fork, go to **Settings > Secrets and variables > Codespaces**.
+   - Add `ANTHROPIC_API_KEY` with your API key as value.
 3. **Start a Codespace**
    - Click the "Code" button on your fork and select "Open with Codespaces".
 4. **Set your API keys as environment variables:**
    Add your API keys to `.venv/bin/activate`:
     ```bash
+    export ANTHROPIC_API_KEY=your_anthropic_api_key
     ```
 5. **Activate the virtual environment:**
    ```bash

configuration.py CHANGED Viewed

@@ -6,30 +6,30 @@ from smolagents import OpenAIServerModel
 DEFAULT_GITHUB_PROFILE = "https://github.com/gperdrizet"
-# AGENT_MODEL = OpenAIServerModel(
-#     model_id="gpt-4.1",
-#     max_tokens=8000
-# )
 # Will be used for single shot summarization with no-frills prompting
 # (e.g. job call extraction). It needs to output JSON formatted text,
 # but this task does not require any complex reasoning or planning.
-SUMMARIZER_MODEL = OpenAI(
-    base_url="https://gperdrizet--llama-3-1-8b-instruct-serve.modal.run/v1",
-    api_key=os.environ['MODAL_TOKEN_SECRET']
 )
 # Will be used for resume resume writing agent via HuggingFace smolagents
 # Including selection of relevant projects from GitHub profile
 #
 # Notes:
 # - DeepSeek-R1-Distill-Qwen-32B does not seem to work well with smolagents,
 #   has trouble correctly formatting responses as code.
 AGENT_MODEL = OpenAIServerModel(
-    model_id="Qwen/Qwen2.5-Coder-14B-Instruct", # Same as HF model string
-    api_base="https://gperdrizet--qwen2-5-coder-14b-instruct-serve.modal.run/v1",
-    api_key=os.environ["MODAL_TOKEN_SECRET"],
 )
 INSTRUCTIONS = """

 DEFAULT_GITHUB_PROFILE = "https://github.com/gperdrizet"
 # Will be used for single shot summarization with no-frills prompting
 # (e.g. job call extraction). It needs to output JSON formatted text,
 # but this task does not require any complex reasoning or planning.
+SUMMARIZER_CLIENT = OpenAI(
+    base_url="https://api.anthropic.com/v1/",
+    api_key=os.environ["ANTHROPIC_API_KEY"]
 )
+SUMMARIZER_MODEL = "claude-3-5-haiku-20241022"
 # Will be used for resume resume writing agent via HuggingFace smolagents
 # Including selection of relevant projects from GitHub profile
 #
 # Notes:
 # - DeepSeek-R1-Distill-Qwen-32B does not seem to work well with smolagents,
 #   has trouble correctly formatting responses as code.
+# - Qwen2.5-Coder-14B-Instruct works OK, but is not great at markdown formatting
+#   and tends to get some details wrong.
+# - Claude-3-5-Haiku is the best model for this task so far.
 AGENT_MODEL = OpenAIServerModel(
+    model_id="claude-3-5-haiku-20241022", # Same as HF model string
+    api_base="https://api.anthropic.com/v1/",
+    api_key=os.environ["ANTHROPIC_API_KEY"],
 )
 INSTRUCTIONS = """

functions/job_call.py CHANGED Viewed

@@ -4,7 +4,11 @@ import json
 import logging
 from pathlib import Path
 from datetime import datetime
-from configuration import JOB_CALL_EXTRACTION_PROMPT, SUMMARIZER_MODEL
 # pylint: disable=broad-exception-caught
@@ -57,11 +61,6 @@ def summarize_job_call(job_call: str) -> str:
     logger.info("Summarizing job call (%d characters)", len(job_call))
-    # Default to first available model
-    model = SUMMARIZER_MODEL.models.list().data[0]
-    model_id = model.id
-    print(f"Using model: {model_id}")
     messages = [
         {
             'role': 'system',
@@ -70,12 +69,12 @@ def summarize_job_call(job_call: str) -> str:
     ]
     completion_args = {
-        'model': model_id,
         'messages': messages,
     }
     try:
-        response = SUMMARIZER_MODEL.chat.completions.create(**completion_args)
     except Exception as e:
         response = None

 import logging
 from pathlib import Path
 from datetime import datetime
+from configuration import (
+    JOB_CALL_EXTRACTION_PROMPT,
+    SUMMARIZER_MODEL,
+    SUMMARIZER_CLIENT
+)
 # pylint: disable=broad-exception-caught
     logger.info("Summarizing job call (%d characters)", len(job_call))
     messages = [
         {
             'role': 'system',
     ]
     completion_args = {
+        'model': SUMMARIZER_MODEL,
         'messages': messages,
     }
     try:
+        response = SUMMARIZER_CLIENT.chat.completions.create(**completion_args)
     except Exception as e:
         response = None

functions/writer_agent.py CHANGED Viewed

@@ -30,7 +30,7 @@ def write_resume(content: str, user_instructions: str = None, job_summary: str =
         agent = CodeAgent(
             model=AGENT_MODEL,
             tools=[],
-            additional_authorized_imports=['json'],
             name="writer_agent",
             verbosity_level=5,
             max_steps=20,

         agent = CodeAgent(
             model=AGENT_MODEL,
             tools=[],
+            additional_authorized_imports=['json', 'pandas'],
             name="writer_agent",
             verbosity_level=5,
             max_steps=20,

inference_endpoints/deepseekR1-qwen-32B.py DELETED Viewed

@@ -1,74 +0,0 @@
-"""Run OpenAI-compatible LLM inference with DeepSeek-V3 and vLLM
-Usage: modal deploy deepseek-v3.py"""
-## Set up the container image
-import os
-import subprocess
-import modal
-vllm_image = (
-    modal.Image.debian_slim(python_version="3.12")
-    .pip_install(
-        "vllm==0.7.2",
-        "huggingface_hub[hf_transfer]==0.26.2",
-        "flashinfer-python==0.2.0.post2",  # pinning, very unstable
-        extra_index_url="https://flashinfer.ai/whl/cu124/torch2.5",
-    )
-    .env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})  # faster model transfers
-)
-# Turn on V1 backend engine. Needs CUDA >=8, excluding 8.6 and 8.9.
-vllm_image = vllm_image.env({"VLLM_USE_V1": "1"})
-# Download the model weights
-MODELS_DIR = "/models"
-MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
-# Cache model weights
-hf_cache_vol = modal.Volume.from_name("huggingface-cache", create_if_missing=True)
-vllm_cache_vol = modal.Volume.from_name("vllm-cache", create_if_missing=True)
-## Build a vLLM engine and serve it
-app = modal.App("deepseek-R1-qwen-32B")
-N_GPU = 2
-MINUTES = 60 # seconds
-VLLM_PORT = 8000
-@app.function(
-    image=vllm_image,
-    gpu=f"H100:{N_GPU}",
-    scaledown_window=15 * MINUTES,  # how long should we stay up with no requests?
-    timeout=10 * MINUTES,  # how long should we wait for container start?
-    volumes={
-        "/root/.cache/huggingface": hf_cache_vol,
-        "/root/.cache/vllm": vllm_cache_vol,
-    },
-    secrets=[modal.Secret.from_name("resumate_key")]
-)
-@modal.concurrent(
-    max_inputs=100
-)  # how many requests can one replica handle? tune carefully!
-@modal.web_server(port=VLLM_PORT, startup_timeout=15 * MINUTES)
-def serve():
-    """Run vLLM inference server with DeepSeek model."""
-    cmd = [
-        "vllm",
-        "serve",
-        "--uvicorn-log-level=info",
-        MODEL_NAME,
-        "--served-model-name", MODEL_NAME,
-        "--tensor-parallel-size", "2",
-        "--max-model-len", "32768",
-        "--host", "0.0.0.0",
-        "--port", str(VLLM_PORT),
-        "--api-key",os.environ["MODAL_TOKEN_SECRET"],
-        "--enforce-eager"
-    ]
-    subprocess.Popen(" ".join(cmd), shell=True)

inference_endpoints/llama3-1-8B-instruct.py DELETED Viewed

@@ -1,76 +0,0 @@
-"""Run OpenAI-compatible LLM text summarization with LLaMA 3.1-8B and vLLM
-Usage: modal deploy vllm_summarization_server.py"""
-import os
-import subprocess
-import modal
-vllm_image = (
-    modal.Image.debian_slim(python_version="3.12")
-    .pip_install(
-        "vllm==0.7.2",
-        "huggingface_hub[hf_transfer]==0.26.2",
-        "flashinfer-python==0.2.0.post2",  # pinning, very unstable
-        extra_index_url="https://flashinfer.ai/whl/cu124/torch2.5",
-    )
-    .env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})  # faster model transfers
-)
-# Turn on V1 backend engine. Note: NVIDIA T4 does not seem to support
-# this due to CUDA incompatibility. Needs CUDA >=8, excluding 8.6 and 8.9.
-# For V1 backend use L40S
-vllm_image = vllm_image.env({"VLLM_USE_V1": "1"})
-# Download the model weights
-MODELS_DIR = "/llamas"
-MODEL_NAME = "neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w4a16"
-MODEL_REVISION = "a7c09948d9a632c2c840722f519672cd94af885d"
-# Cache model weights
-hf_cache_vol = modal.Volume.from_name("huggingface-cache", create_if_missing=True)
-vllm_cache_vol = modal.Volume.from_name("vllm-cache", create_if_missing=True)
-## Build a vLLM engine and serve it
-app = modal.App("llama-3-1-8B-instruct")
-N_GPU = 1
-MINUTES = 60 # seconds
-VLLM_PORT = 8000
-@app.function(
-    image=vllm_image,
-    gpu=f"L40S:{N_GPU}",
-    scaledown_window=15 * MINUTES,  # how long should we stay up with no requests?
-    timeout=10 * MINUTES,  # how long should we wait for container start?
-    volumes={
-        "/root/.cache/huggingface": hf_cache_vol,
-        "/root/.cache/vllm": vllm_cache_vol,
-    },
-    secrets=[modal.Secret.from_name("resumate_key")]
-)
-@modal.concurrent(
-    max_inputs=100
-)  # how many requests can one replica handle? tune carefully!
-@modal.web_server(port=VLLM_PORT, startup_timeout=5 * MINUTES)
-def serve():
-    """Serve the LLaMA 3.1-8B Instruct model with vLLM."""
-    cmd = [
-        "vllm",
-        "serve",
-        "--uvicorn-log-level=info",
-        MODEL_NAME,
-        "--served-model-name", MODEL_NAME,
-        "--revision", MODEL_REVISION,
-        "--host", "0.0.0.0",
-        "--port", str(VLLM_PORT),
-        "--api-key", os.environ["MODAL_TOKEN_SECRET"],
-        "--enforce-eager"
-    ]
-    subprocess.Popen(" ".join(cmd), shell=True)

inference_endpoints/qwen2-5-coder-14B-instruct.py DELETED Viewed

@@ -1,74 +0,0 @@
-"""Run OpenAI-compatible LLM inference with DeepSeek-V3 and vLLM
-Usage: modal deploy deepseek-v3.py"""
-## Set up the container image
-import os
-import subprocess
-import modal
-vllm_image = (
-    modal.Image.debian_slim(python_version="3.12")
-    .pip_install(
-        "vllm==0.7.2",
-        "huggingface_hub[hf_transfer]==0.26.2",
-        "flashinfer-python==0.2.0.post2",  # pinning, very unstable
-        extra_index_url="https://flashinfer.ai/whl/cu124/torch2.5",
-    )
-    .env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})  # faster model transfers
-)
-# Turn on V1 backend engine. Needs CUDA >=8, excluding 8.6 and 8.9.
-vllm_image = vllm_image.env({"VLLM_USE_V1": "1"})
-# Download the model weights
-MODELS_DIR = "/models"
-MODEL_NAME = "Qwen/Qwen2.5-Coder-14B-Instruct"
-# Cache model weights
-hf_cache_vol = modal.Volume.from_name("huggingface-cache", create_if_missing=True)
-vllm_cache_vol = modal.Volume.from_name("vllm-cache", create_if_missing=True)
-## Build a vLLM engine and serve it
-app = modal.App("qwen2-5-coder-14B-instruct")
-N_GPU = 1
-MINUTES = 60 # seconds
-VLLM_PORT = 8000
-@app.function(
-    image=vllm_image,
-    gpu=f"L40S:{N_GPU}",
-    scaledown_window=15 * MINUTES,  # how long should we stay up with no requests?
-    timeout=10 * MINUTES,  # how long should we wait for container start?
-    volumes={
-        "/root/.cache/huggingface": hf_cache_vol,
-        "/root/.cache/vllm": vllm_cache_vol,
-    },
-    secrets=[modal.Secret.from_name("resumate_key")]
-)
-@modal.concurrent(
-    max_inputs=100
-)  # how many requests can one replica handle? tune carefully!
-@modal.web_server(port=VLLM_PORT, startup_timeout=15 * MINUTES)
-def serve():
-    """Run vLLM inference server with DeepSeek model."""
-    cmd = [
-        "vllm",
-        "serve",
-        "--uvicorn-log-level=info",
-        MODEL_NAME,
-        "--served-model-name", MODEL_NAME,
-        "--tensor-parallel-size", "2",
-        "--max-model-len", "16000",
-        "--host", "0.0.0.0",
-        "--port", str(VLLM_PORT),
-        "--api-key",os.environ["MODAL_TOKEN_SECRET"],
-        "--enforce-eager"
-    ]
-    subprocess.Popen(" ".join(cmd), shell=True)

requirements.txt CHANGED Viewed

@@ -1,5 +1,4 @@
 gradio==5.35.0
-modal
 openai
 PyPDF2
 requests

 gradio==5.35.0
 openai
 PyPDF2
 requests