matsuap's picture
Upload folder using huggingface_hub
951d5c6 verified
from fastapi import APIRouter, Depends, UploadFile, File, HTTPException
from typing import List
from sqlalchemy.orm import Session
from services.s3_service import s3_service
from api.auth import get_current_user
from core.database import get_db
from models import db_models
from models.schemas import SourceFileResponse
from services.rag_service import rag_service
router = APIRouter(prefix="/api/sources", tags=["sources"])
@router.post("/upload", response_model=dict)
async def upload_source(
file: UploadFile = File(...),
current_user: db_models.User = Depends(get_current_user),
db: Session = Depends(get_db)
):
try:
content = await file.read()
file_info = await s3_service.upload_file(
file_content=content,
filename=file.filename,
user_id=str(current_user.id)
)
# Save metadata to database
db_source = db_models.Source(
filename=file.filename,
s3_key=file_info["key"],
s3_url=file_info["public_url"], # Store public URL in DB
size=len(content),
user_id=current_user.id
)
db.add(db_source)
db.commit()
db.refresh(db_source)
return {
"id": db_source.id,
"filename": file.filename,
"key": file_info["key"],
"public_url": file_info["public_url"],
"private_url": file_info["private_url"],
"message": "Upload successful"
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.get("/list", response_model=List[SourceFileResponse])
async def list_sources(
current_user: db_models.User = Depends(get_current_user),
db: Session = Depends(get_db)
):
try:
# Join Source with RAGDocument to get indexing info if it exists
results = db.query(
db_models.Source,
db_models.RAGDocument.id.label("rag_id"),
db_models.RAGDocument.azure_doc_id
).outerjoin(
db_models.RAGDocument,
db_models.Source.id == db_models.RAGDocument.source_id
).filter(
db_models.Source.user_id == current_user.id
).all()
response_sources = []
for source, rag_id, azure_doc_id in results:
response_sources.append({
"id": source.id,
"filename": source.filename,
"s3_key": source.s3_key,
"public_url": source.s3_url,
"private_url": s3_service.get_presigned_url(source.s3_key),
"size": source.size,
"created_at": source.created_at,
"rag_id": rag_id,
"azure_doc_id": azure_doc_id
})
return response_sources
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.delete("/{source_id}")
async def delete_source(
source_id: int,
current_user: db_models.User = Depends(get_current_user),
db: Session = Depends(get_db)
):
source = db.query(db_models.Source).filter(
db_models.Source.id == source_id,
db_models.Source.user_id == current_user.id
).first()
if not source:
raise HTTPException(status_code=404, detail="Source not found")
try:
# 1. Handle RAG Document (Delete completely as it's useless without the source)
rag_doc = db.query(db_models.RAGDocument).filter(
db_models.RAGDocument.source_id == source.id
).first()
if rag_doc:
# Delete from Azure Search
rag_service.delete_document(rag_doc.azure_doc_id)
# Delete from DB
db.delete(rag_doc)
# 2. Handle other dependencies (Delete everything linked to this source)
# We must delete children (Flashcards, Questions) before parents (Sets) because of SQL constraints
# Delete Flashcards
flashcard_set_ids = [s.id for s in db.query(db_models.FlashcardSet).filter(db_models.FlashcardSet.source_id == source.id).all()]
if flashcard_set_ids:
db.query(db_models.Flashcard).filter(db_models.Flashcard.flashcard_set_id.in_(flashcard_set_ids)).delete(synchronize_session=False)
# Delete Quiz Questions
quiz_set_ids = [s.id for s in db.query(db_models.QuizSet).filter(db_models.QuizSet.source_id == source.id).all()]
if quiz_set_ids:
db.query(db_models.QuizQuestion).filter(db_models.QuizQuestion.quiz_set_id.in_(quiz_set_ids)).delete(synchronize_session=False)
# Now delete the sets and other items
db.query(db_models.MindMap).filter(db_models.MindMap.source_id == source.id).delete()
db.query(db_models.FlashcardSet).filter(db_models.FlashcardSet.source_id == source.id).delete()
db.query(db_models.QuizSet).filter(db_models.QuizSet.source_id == source.id).delete()
db.query(db_models.Report).filter(db_models.Report.source_id == source.id).delete()
db.query(db_models.VideoSummary).filter(db_models.VideoSummary.source_id == source.id).delete()
db.commit() # Commit deletions
# 3. Delete from S3 if it exists
if source.s3_key:
await s3_service.delete_file(source.s3_key)
# 4. Delete the Source itself from Database
db.delete(source)
db.commit()
return {"message": "Source and all associated generated content (mind maps, quizzes, etc.) deleted successfully."}
except Exception as e:
db.rollback()
raise HTTPException(status_code=500, detail=f"Failed to delete source: {str(e)}")