Spaces:
Sleeping
Sleeping
"""This module contains utility functions for the project""" | |
import mmh3 | |
from haystack import Document | |
def get_unique_docs(dataset, unique_docs: set): | |
"""Get unique documents from dataset | |
Args: | |
dataset: list of dictionaries | |
Returns: | |
docs: list of haystack.Document | |
""" | |
docs = list() | |
for doc in dataset: | |
if doc["context"] is not None and doc["context_id"] not in unique_docs: | |
unique_docs.add(doc["context_id"]) | |
document = Document( | |
content=doc["context"], | |
meta={ | |
"title": doc["context_title"], | |
"context_id": doc["context_id"], | |
"url": doc["url"], | |
"source": "QASports", | |
"category": "basketball", | |
}, | |
) | |
docs.append(document) | |
return docs | |