Eurovoc dataset for translation or conversion link

#2
by scampion - opened
European Parliament org

If you need to translate Eurovoc terms, you can download the XML version on the official repository

European Parliament org
edited Oct 17, 2023

You can also utilize a SPARQL endpoint for this purpose.

import pandas as pd
import time
from SPARQLWrapper import SPARQLWrapper, JSON
from tqdm import tqdm

def get_eurovoc_code(label, max_retries=5, sleep_time=5):
"""This function returns the Eurovoc code for a given label."""

# Define the SPARQL endpoint and set the return format
sparql = SPARQLWrapper("https://publications.europa.eu/webapi/rdf/sparql")
sparql.setReturnFormat(JSON)

# Set the query with updated conditions
query = f"""
PREFIX cdm: <http://publications.europa.eu/ontology/cdm#>
SELECT * WHERE {{
    ?eurovoc <http://www.w3.org/2004/02/skos/core#prefLabel> "{label}"@en.
    FILTER(regex(str(?eurovoc), "http://eurovoc.europa.eu/" ))
}}
"""

sparql.setQuery(query)

retries = 0
while retries < max_retries:
    try:
        # Execute the query and return the Eurovoc code
        results = sparql.query().convert()
        for result in results["results"]["bindings"]:
            r = result["eurovoc"]["value"].split("/")[-1]
            return r  # assuming the Eurovoc code is the last part of the URI
        return None  # if no code found
    except Exception as e:
        if "HTTP Error 503" in str(e):
            retries += 1
            time.sleep(sleep_time)
        else:
            raise e
raise Exception(f"Failed after {max_retries} attempts for label: {label}")

Sign up or log in to comment