|
"log chat messages and feedbacks to a dataset" |
|
|
|
from typing import Tuple |
|
|
|
import os |
|
import tempfile |
|
import ujson |
|
import uuid |
|
|
|
import huggingface_hub |
|
import pandas as pd |
|
|
|
LOGS_DATSET_PATH = "logikon/benjamin-logs" |
|
|
|
|
|
async def log_messages( |
|
messages: Tuple[str, str], |
|
conversation_id: str, |
|
step: int, |
|
metadata: dict = None |
|
): |
|
|
|
data = { |
|
"conversation_id": conversation_id, |
|
"step": step, |
|
"human": messages[0], |
|
"ai": messages[1], |
|
"metadata": list(metadata.items()) if metadata else [] |
|
} |
|
|
|
with tempfile.TemporaryFile(mode="w+") as f: |
|
ujson.dump(data, f) |
|
f.flush() |
|
|
|
api = huggingface_hub.HfApi() |
|
api.upload_file( |
|
path_or_fileobj=f.buffer, |
|
path_in_repo=os.path.join("data", pd.Timestamp.now().date().isoformat(), conversation_id, f"step_{step}.json"), |
|
repo_id=LOGS_DATSET_PATH, |
|
repo_type="dataset", |
|
token=os.environ["HF_DATASETS_TOKEN"] |
|
) |
|
|
|
async def log_feedback( |
|
liked: bool, |
|
conversation_id: str, |
|
step: int, |
|
metadata: dict = None |
|
): |
|
|
|
data = { |
|
"conversation_id": conversation_id, |
|
"step": step, |
|
"liked": liked, |
|
"metadata": list(metadata.items()) if metadata else [] |
|
} |
|
|
|
with tempfile.TemporaryFile(mode="w+") as f: |
|
ujson.dump(data, f) |
|
f.flush() |
|
|
|
api = huggingface_hub.HfApi() |
|
api.upload_file( |
|
path_or_fileobj=f.buffer, |
|
path_in_repo=os.path.join("data", pd.Timestamp.now().date().isoformat(), conversation_id, f"feedback_{step[0]}_{str(uuid.uuid4())}.json"), |
|
repo_id=LOGS_DATSET_PATH, |
|
repo_type="dataset", |
|
token=os.environ["HF_DATASETS_TOKEN"] |
|
) |