| |
| import os |
| import uuid |
| from datetime import datetime |
| from fastapi import BackgroundTasks, HTTPException |
| from langchain.text_splitter import RecursiveCharacterTextSplitter |
| from ..services.llm import get_embeddings |
| from ..config import settings |
| from ..db.mongodb import mongodb |
| from ..db.chat_manager import chat_manager |
| from langchain_community.vectorstores import FAISS |
|
|
| |
| os.makedirs(settings.VIDEOS_DIR, exist_ok=True) |
|
|
| |
| chunks_collection = mongodb.db.get_collection("chunks") |
|
|
|
|
| def process_transcription(transcription: str, user_id: str, title: str, source_type: str, |
| source_url: str = None, file_size: int = None) -> str: |
| """ |
| Split transcription into chunks, store in MongoDB, initialize chat history, and return session ID. |
| """ |
| |
| splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=20) |
| splits = splitter.split_text(transcription) |
|
|
| |
| session_id = str(uuid.uuid4()) |
| mongodb.videos.insert_one({ |
| "video_id": session_id, |
| "user_id": user_id, |
| "title": title, |
| "source_type": source_type, |
| "source_url": source_url, |
| "created_at": datetime.utcnow(), |
| "transcription": transcription, |
| "size": file_size |
| }) |
|
|
| |
| chunk_docs = [{"session_id": session_id, "text": chunk} for chunk in splits] |
| chunks_collection.insert_many(chunk_docs) |
|
|
| |
| chat_manager.initialize_chat_history(session_id) |
|
|
| return session_id |
|
|
|
|
| def get_retriever(session_id: str): |
| """ |
| Build a Retriever by loading chunks from MongoDB and creating a FAISS vectorstore. |
| """ |
| |
| docs = [doc["text"] for doc in chunks_collection.find({"session_id": session_id})] |
| if not docs: |
| raise HTTPException(status_code=404, detail="Session data not found. Please transcribe first.") |
|
|
| |
| embeddings = get_embeddings() |
| vectorstore = FAISS.from_texts(docs, embeddings) |
| return vectorstore.as_retriever(search_kwargs={"k": 3}) |
|
|
|
|
| def save_video_file(video_id: str, file_path: str, contents: bytes) -> None: |
| """ |
| Persist the uploaded video file to disk. |
| """ |
| os.makedirs(os.path.dirname(file_path), exist_ok=True) |
| with open(file_path, "wb") as f: |
| f.write(contents) |
|
|