import json
import os
from pydantic import BaseModel
from typing import Literal


class EnvConfig(BaseModel):
	# you token from Settings
	hf_token: str = os.getenv("HF_TOKEN")
	# NAME of TEI endpoint
	tei_name: str = os.getenv("TEI_NAME")
	# name of chunked dataset
	chunked_ds_name: str = os.getenv("CHUNKED_DS_NAME")
	# name of embeddings dataset
	embed_ds_name: str = os.getenv("EMBED_DS_NAME")
	# splits of input dataset to process, comma separated
	input_splits: str = os.getenv("INPUT_SPLITS")
	# name of column to load from input dataset
	input_text_col: str = os.getenv("INPUT_TEXT_COL")


class ChunkConfig(BaseModel):
	strategy: Literal["recursive", "sequence", "constant"]
	split_seq: str
	chunk_len: int
	private: bool


class EmbedConfig(BaseModel):
	private: bool
	semaphore_bound: int


class WebhookPayloadEvent(BaseModel):
	action: Literal["create", "update", "delete"]
	scope: str


class WebhookPayloadRepo(BaseModel):
	type: Literal["dataset", "model", "space"]
	name: str
	id: str
	private: bool
	headSha: str


class WebhookPayload(BaseModel):
	event: WebhookPayloadEvent
	repo: WebhookPayloadRepo


with open(os.path.join(os.getcwd(), "configs/chunk_config.json")) as c:
	data = json.load(c)
	chunk_config = ChunkConfig.model_validate_json(json.dumps(data))

with open(os.path.join(os.getcwd(), "configs/embed_config.json")) as c:
	data = json.load(c)
	embed_config = EmbedConfig.model_validate_json(json.dumps(data))


env_config = EnvConfig()
env_config.input_splits = [spl.strip() for spl in env_config.input_splits.split(",") if spl]