Spaces:

CandidAI
/

ask-candid

Running

File size: 3,315 Bytes

68e9b80

from typing import Any
import logging

from langchain_core.tools import tool
import httpx

from ask_candid.base.utils import retry_on_status
from ask_candid.base.config.rest import AUTOCODING, DOCUMENT

logging.basicConfig(format="[%(levelname)s] (%(asctime)s) :: %(message)s")
logger = logging.getLogger(__name__)
logger.setLevel(logging.ERROR)


@retry_on_status(num_retries=3)
def get_with_retries(url: str, payload: dict[str, Any] | None, headers: dict[str, str] | None):
    with httpx.Client(transport=httpx.HTTPTransport(retries=3), timeout=30) as client:
        return client.get(url=url, params=payload, headers=headers)


@tool
def autocode(text: str) -> dict[str, list] | str:
    """Uses natural language processing to align some input text to Candid's taxonomy: https://taxonomy.candid.org.
    The taxonomy describes activity in the social and philanthropic sectors.

    Parameters
    ----------
    text : str
        Text describing working in the social sector. This should be related to the social and/or philanthropic sector.

    Returns
    -------
    dict[str, list] | str
        Taxonomy responses. The keys of the dictionary are individual taxonomy facets, and the items in the dictionary
        are each term which the NLP model has determined is relevant giving the input text. This also includes
        confidence score.
    """

    r = httpx.get(
        url=AUTOCODING.endpoint("predict"),
        params={"text": text},
        headers={**AUTOCODING.header} # type: ignore
    )

    if r.status_code != 200:
        logger.error("Error calling autocoding API %s. Error: %s", str(r.request.url), r.reason_phrase)
        return f"Error calling autocoding. Error: {r.reason_phrase}"

    data: dict = r.json().get("data", {})
    return {k: v for k, v in data.items() if k in {"subject", "population"}}


@tool
def geo_detect(text: str) -> list[dict[str, Any]] | str:
    """Uses natural language processing to find and match named geographies found in the supplied text. The output
    will supply identified geographies from [Geonames](https://www.geonames.org/).

    Parameters
    ----------
    text : str
        Text describing working in the social sector. This should be related to the social and/or philanthropic sector.

    Returns
    -------
    list[dict[str, Any]] | str
        Matched geographies responses. This is an array of JSON objects which contain the `name` of the geography as it
        appeared in the supplied text, and the best match to a Geonames geography. For many Candid knowledge tools the
        `geonames_id` value will be most useful.
        If output is a string then that means there was some error, and retry should be considered
    """

    r = get_with_retries(
        url=DOCUMENT.endpoint("entities/geographies"),
        payload={"text": text, "only_best_match": True},
        headers={**DOCUMENT.header}
    )
    assert isinstance(r, httpx.Response)
    if r.status_code != 200:
        logger.error("Error calling geo detection API %s. Error: %s", str(r.request.url), r.reason_phrase)
        return f"Error calling geo detection. Error: {r.reason_phrase}"

    data: dict = r.json().get("entities", [])
    return [{"name": entity["name"], "match": entity["match"][:1]} for entity in data if entity.get("type") == "geo"]