Spaces:

autoevaluate
/

model-evaluator

Runtime error

App Files Files Community

lewtun HF Staff commited on Aug 26, 2022

Commit

118ffe4

2 Parent(s): 533bc81 efe936d

Merge branch 'main' into add-nli

Browse files

Files changed (7) hide show

.env.example +0 -4
.env.template +4 -0
.github/workflows/run_evaluation_jobs.yml +30 -0
README.md +8 -2
app.py +69 -41
requirements.txt +1 -0
run_evaluation_jobs.py +64 -0

.env.example DELETED Viewed

@@ -1,4 +0,0 @@
-AUTOTRAIN_USERNAME=autoevaluator # The bot that authors evaluation jobs
-HF_TOKEN=hf_xxx # An API token of the `autoevaluator` user
-AUTOTRAIN_BACKEND_API=https://api-staging.autotrain.huggingface.co # The AutoTrain backend to send jobs to. Use https://api.autotrain.huggingface.co for prod
-DATASETS_PREVIEW_API=https://datasets-server.huggingface.co # The API to grab dataset information from

.env.template ADDED Viewed

	@@ -0,0 +1,4 @@

+AUTOTRAIN_USERNAME=autoevaluator                                    # The bot or user that authors evaluation jobs
+HF_TOKEN=hf_xxx                                                     # An API token of the `autoevaluator` user
+AUTOTRAIN_BACKEND_API=https://api-staging.autotrain.huggingface.co  # The AutoTrain backend to send jobs to. Use https://api.autotrain.huggingface.co for prod or http://localhost:8000 for local development
+DATASETS_PREVIEW_API=https://datasets-server.huggingface.co         # The API to grab dataset information from

.github/workflows/run_evaluation_jobs.yml ADDED Viewed

	@@ -0,0 +1,30 @@

+name: Start evaluation jobs
+on:
+  schedule:
+    - cron:  '*/15 * * * *' # Start evaluations every 15th minute
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v2
+      - name: Setup Python Environment
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.8
+      - name: Install requirements
+        run: pip install -r requirements.txt
+      - name: Execute scoring script
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+          AUTOTRAIN_USERNAME: ${{ secrets.AUTOTRAIN_USERNAME }}
+          AUTOTRAIN_BACKEND_API: ${{ secrets.AUTOTRAIN_BACKEND_API }}
+        run: |
+          HF_TOKEN=$HF_TOKEN AUTOTRAIN_USERNAME=$AUTOTRAIN_USERNAME AUTOTRAIN_BACKEND_API=$AUTOTRAIN_BACKEND_API python run_evaluation_jobs.py

README.md CHANGED Viewed

@@ -39,7 +39,7 @@ pip install -r requirements.txt
 Next, copy the example file of environment variables:
 ```
-cp .env.examples .env
 ```
 and set the `HF_TOKEN` variable with a valid API token from the `autoevaluator` user. Finally, spin up the application by running:
@@ -53,5 +53,11 @@ streamlit run app.py
 Models are evaluated by AutoTrain, with the payload sent to the `AUTOTRAIN_BACKEND_API` environment variable. The current configuration for evaluation jobs running on Spaces is:
 ```
-AUTOTRAIN_BACKEND_API=https://api.autotrain.huggingface.co
 ```

 Next, copy the example file of environment variables:
 ```
+cp .env.template .env
 ```
 and set the `HF_TOKEN` variable with a valid API token from the `autoevaluator` user. Finally, spin up the application by running:
 Models are evaluated by AutoTrain, with the payload sent to the `AUTOTRAIN_BACKEND_API` environment variable. The current configuration for evaluation jobs running on Spaces is:
 ```
+AUTOTRAIN_BACKEND_API=https://api-staging.autotrain.huggingface.co
+```
+To evaluate models with a _local_ instance of AutoTrain, change the environment to:
+```
+AUTOTRAIN_BACKEND_API=http://localhost:8000
 ```

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 from pathlib import Path
 import pandas as pd
@@ -561,50 +562,77 @@ with st.form(key="form"):
                     ).json()
                     print(f"INFO -- Dataset creation response: {data_json_resp}")
                     if data_json_resp["download_status"] == 1:
-                        train_json_resp = http_get(
-                            path=f"/projects/{project_json_resp['id']}/data/start_process",
                             token=HF_TOKEN,
                             domain=AUTOTRAIN_BACKEND_API,
                         ).json()
-                        print(f"INFO -- AutoTrain job response: {train_json_resp}")
-                        if train_json_resp["success"]:
-                            train_eval_index = {
-                                "train-eval-index": [
-                                    {
-                                        "config": selected_config,
-                                        "task": AUTOTRAIN_TASK_TO_HUB_TASK[selected_task],
-                                        "task_id": selected_task,
-                                        "splits": {"eval_split": selected_split},
-                                        "col_mapping": col_mapping,
-                                    }
-                                ]
-                            }
-                            selected_metadata = yaml.dump(train_eval_index, sort_keys=False)
-                            dataset_card_url = get_dataset_card_url(selected_dataset)
-                            st.success("✅ Successfully submitted evaluation job!")
-                            st.markdown(
-                                f"""
-                            Evaluation can take up to 1 hour to complete, so grab a ☕️ or 🍵 while you wait:
-                            * 🔔 A [Hub pull request](https://huggingface.co/docs/hub/repositories-pull-requests-discussions) with the evaluation results will be opened for each model you selected. Check your email for notifications.
-                            * 📊 Click [here](https://hf.co/spaces/autoevaluate/leaderboards?dataset={selected_dataset}) to view the results from your submission once the Hub pull request is merged.
-                            * 🥱 Tired of configuring evaluations? Add the following metadata to the [dataset card]({dataset_card_url}) to enable 1-click evaluations:
-                            """  # noqa
-                            )
-                            st.markdown(
-                                f"""
-                            ```yaml
-                            {selected_metadata}
-                            """
-                            )
-                            print("INFO -- Pushing evaluation job logs to the Hub")
-                            evaluation_log = {}
-                            evaluation_log["payload"] = project_payload
-                            evaluation_log["project_creation_response"] = project_json_resp
-                            evaluation_log["dataset_creation_response"] = data_json_resp
-                            evaluation_log["autotrain_job_response"] = train_json_resp
-                            commit_evaluation_log(evaluation_log, hf_access_token=HF_TOKEN)
                         else:
-                            st.error("🙈 Oh no, there was an error submitting your evaluation job!")
             else:
                 st.warning("⚠️ No models left to evaluate! Please select other models and try again.")

 import os
+import time
 from pathlib import Path
 import pandas as pd
                     ).json()
                     print(f"INFO -- Dataset creation response: {data_json_resp}")
                     if data_json_resp["download_status"] == 1:
+                        train_json_resp = http_post(
+                            path=f"/projects/{project_json_resp['id']}/data/start_processing",
                             token=HF_TOKEN,
                             domain=AUTOTRAIN_BACKEND_API,
                         ).json()
+                        # For local development we process and approve projects on-the-fly
+                        if "localhost" in AUTOTRAIN_BACKEND_API:
+                            with st.spinner("⏳ Waiting for data processing to complete ..."):
+                                is_data_processing_success = False
+                                while is_data_processing_success is not True:
+                                    project_status = http_get(
+                                        path=f"/projects/{project_json_resp['id']}",
+                                        token=HF_TOKEN,
+                                        domain=AUTOTRAIN_BACKEND_API,
+                                    ).json()
+                                    if project_status["status"] == 3:
+                                        is_data_processing_success = True
+                                    time.sleep(10)
+                            # Approve training job
+                            train_job_resp = http_post(
+                                path=f"/projects/{project_json_resp['id']}/start_training",
+                                token=HF_TOKEN,
+                                domain=AUTOTRAIN_BACKEND_API,
+                            ).json()
+                            st.success("✅  Data processing and project approval complete - go forth and evaluate!")
                         else:
+                            # Prod/staging submissions are evaluated in a cron job via run_evaluation_jobs.py
+                            print(f"INFO -- AutoTrain job response: {train_json_resp}")
+                            if train_json_resp["success"]:
+                                train_eval_index = {
+                                    "train-eval-index": [
+                                        {
+                                            "config": selected_config,
+                                            "task": AUTOTRAIN_TASK_TO_HUB_TASK[selected_task],
+                                            "task_id": selected_task,
+                                            "splits": {"eval_split": selected_split},
+                                            "col_mapping": col_mapping,
+                                        }
+                                    ]
+                                }
+                                selected_metadata = yaml.dump(train_eval_index, sort_keys=False)
+                                dataset_card_url = get_dataset_card_url(selected_dataset)
+                                st.success("✅ Successfully submitted evaluation job!")
+                                st.markdown(
+                                    f"""
+                                Evaluation can take up to 1 hour to complete, so grab a ☕️ or 🍵 while you wait:
+                                * 🔔 A [Hub pull request](https://huggingface.co/docs/hub/repositories-pull-requests-discussions) with the evaluation results will be opened for each model you selected. Check your email for notifications.
+                                * 📊 Click [here](https://hf.co/spaces/autoevaluate/leaderboards?dataset={selected_dataset}) to view the results from your submission once the Hub pull request is merged.
+                                * 🥱 Tired of configuring evaluations? Add the following metadata to the [dataset card]({dataset_card_url}) to enable 1-click evaluations:
+                                """  # noqa
+                                )
+                                st.markdown(
+                                    f"""
+                                ```yaml
+                                {selected_metadata}
+                                """
+                                )
+                                print("INFO -- Pushing evaluation job logs to the Hub")
+                                evaluation_log = {}
+                                evaluation_log["project_id"] = project_json_resp["id"]
+                                evaluation_log["autotrain_env"] = (
+                                    "staging" if "staging" in AUTOTRAIN_BACKEND_API else "prod"
+                                )
+                                evaluation_log["payload"] = project_payload
+                                evaluation_log["project_creation_response"] = project_json_resp
+                                evaluation_log["dataset_creation_response"] = data_json_resp
+                                evaluation_log["autotrain_job_response"] = train_json_resp
+                                commit_evaluation_log(evaluation_log, hf_access_token=HF_TOKEN)
+                            else:
+                                st.error("🙈 Oh no, there was an error submitting your evaluation job!")
             else:
                 st.warning("⚠️ No models left to evaluate! Please select other models and try again.")

requirements.txt CHANGED Viewed

@@ -4,6 +4,7 @@ streamlit==1.10.0
 datasets<2.3
 evaluate<0.2
 jsonlines
 # Dataset specific deps
 py7zr<0.19
 openpyxl<3.1

 datasets<2.3
 evaluate<0.2
 jsonlines
+typer
 # Dataset specific deps
 py7zr<0.19
 openpyxl<3.1

run_evaluation_jobs.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import os
+from pathlib import Path
+import typer
+from datasets import load_dataset
+from dotenv import load_dotenv
+from utils import http_get, http_post
+if Path(".env").is_file():
+    load_dotenv(".env")
+HF_TOKEN = os.getenv("HF_TOKEN")
+AUTOTRAIN_USERNAME = os.getenv("AUTOTRAIN_USERNAME")
+AUTOTRAIN_BACKEND_API = os.getenv("AUTOTRAIN_BACKEND_API")
+if "staging" in AUTOTRAIN_BACKEND_API:
+    AUTOTRAIN_ENV = "staging"
+else:
+    AUTOTRAIN_ENV = "prod"
+def main():
+    print(f"💡 Starting jobs on {AUTOTRAIN_ENV} environment")
+    logs_df = load_dataset("autoevaluate/evaluation-job-logs", use_auth_token=HF_TOKEN, split="train").to_pandas()
+    # Filter out legacy AutoTrain submissions prior to project approvals requirement
+    projects_df = logs_df.copy()[(~logs_df["project_id"].isnull())]
+    # Filter IDs for appropriate AutoTrain env (staging vs prod)
+    projects_df = projects_df.copy().query(f"autotrain_env == '{AUTOTRAIN_ENV}'")
+    projects_to_approve = projects_df["project_id"].astype(int).tolist()
+    failed_approvals = []
+    print(f"🚀 Found {len(projects_to_approve)} evaluation projects to approve!")
+    for project_id in projects_to_approve:
+        print(f"Attempting to evaluate project ID {project_id} ...")
+        try:
+            project_info = http_get(
+                path=f"/projects/{project_id}",
+                token=HF_TOKEN,
+                domain=AUTOTRAIN_BACKEND_API,
+            ).json()
+            print(project_info)
+            # Only start evaluation for projects with completed data processing (status=3)
+            if project_info["status"] == 3 and project_info["training_status"] == "not_started":
+                train_job_resp = http_post(
+                    path=f"/projects/{project_id}/start_training",
+                    token=HF_TOKEN,
+                    domain=AUTOTRAIN_BACKEND_API,
+                ).json()
+                print(f"🤖 Project {project_id} approval response: {train_job_resp}")
+            else:
+                print(f"💪 Project {project_id} either not ready or has already been evaluated. Skipping ...")
+        except Exception as e:
+            print(f"There was a problem obtaining the project info for project ID {project_id}")
+            print(f"Error message: {e}")
+            failed_approvals.append(project_id)
+            pass
+    if len(failed_approvals) > 0:
+        print(f"🚨 Failed to approve {len(failed_approvals)} projects: {failed_approvals}")
+if __name__ == "__main__":
+    typer.run(main)