Spaces:

librarian-bots
/

dataset-language-detection-api

Running

davanstrien HF Staff commited on Jan 24, 2024

Commit

1b8ac52

1 Parent(s): 5a41550

Refactor code and add type annotations

Files changed (1) hide show

main.py CHANGED Viewed

@@ -1,20 +1,20 @@
 import os
 import random
 from datetime import timedelta
-from pathlib import Path
 from statistics import mean
 from typing import Any, Iterator, Union
 import fasttext
 from cashews import cache
 from dotenv import load_dotenv
-from fastapi import FastAPI
 from httpx import AsyncClient, Client, Timeout
 from huggingface_hub import hf_hub_download
-from huggingface_hub.utils import logging
 from iso639 import Lang
 from starlette.responses import RedirectResponse
 from toolz import concat, groupby, valmap
 cache.setup("mem://")
@@ -130,6 +130,8 @@ async def get_random_rows(
 def load_model(repo_id: str) -> fasttext.FastText._FastText:
     Path("code/models").mkdir(parents=True, exist_ok=True)
     model_path = hf_hub_download(
         repo_id,
@@ -237,14 +239,18 @@ def predict_rows(
 def root():
     return RedirectResponse(url="/docs")
 @app.get("/predict_dataset_language/{hub_id:path}")
 @cache(ttl=timedelta(minutes=10))
 async def predict_language(
-    hub_id: str,
     config: str | None = None,
     split: str | None = None,
-    max_request_calls: int = 10,
     number_of_rows: int = 1000,
 ) -> dict[Any, Any] | None:
     is_valid = datasets_server_valid_rows(hub_id)

 import os
 import random
 from datetime import timedelta
 from statistics import mean
 from typing import Any, Iterator, Union
+from typing import Annotated
 import fasttext
 from cashews import cache
 from dotenv import load_dotenv
+from fastapi import FastAPI, Path
 from httpx import AsyncClient, Client, Timeout
 from huggingface_hub import hf_hub_download
 from iso639 import Lang
 from starlette.responses import RedirectResponse
 from toolz import concat, groupby, valmap
+import logging
 cache.setup("mem://")
 def load_model(repo_id: str) -> fasttext.FastText._FastText:
+    from pathlib import Path
     Path("code/models").mkdir(parents=True, exist_ok=True)
     model_path = hf_hub_download(
         repo_id,
 def root():
     return RedirectResponse(url="/docs")
+    # item_id: Annotated[int, Path(title="The ID of the item to get", ge=1)], q: str
 @app.get("/predict_dataset_language/{hub_id:path}")
 @cache(ttl=timedelta(minutes=10))
 async def predict_language(
+    hub_id: Annotated[str, Path(title="The hub id of the dataset to predict")],
     config: str | None = None,
     split: str | None = None,
+    max_request_calls: Annotated[
+        int, Path(title="Max number of requests to datasets server", gt=0, le=20)
+    ] = 10,
     number_of_rows: int = 1000,
 ) -> dict[Any, Any] | None:
     is_valid = datasets_server_valid_rows(hub_id)