Spaces:
Running
Running
Commit
·
7402e0f
1
Parent(s):
2432a11
Update note services logic
Browse files- .gitignore +5 -2
- Dockerfile +6 -2
- app/api/folders/folders_create.py +26 -0
- app/api/folders/folders_delete.py +14 -0
- app/api/folders/folders_get.py +17 -0
- app/api/folders/folders_update.py +25 -0
- app/api/notes.py +0 -35
- app/api/notes/notes_audio.py +43 -0
- app/api/notes/notes_get.py +18 -0
- app/api/notes/notes_regenerate.py +34 -0
- app/api/notes/notes_text.py +45 -0
- app/api/notes/notes_update.py +30 -0
- app/config.py +7 -3
- app/infra/auth.py +10 -0
- app/{services → infra}/firebase.py +3 -7
- app/jobs/enrichment_job.py +45 -44
- app/main.py +17 -4
- app/models/enums.py +11 -0
- app/models/folder.py +9 -0
- app/models/note.py +31 -0
- app/services/{mindmap_service.py → enrichment/mindmap.py} +13 -21
- app/services/enrichment/normalize.py +148 -0
- app/services/enrichment/pipeline.py +49 -0
- app/services/{summary_service.py → enrichment/summary.py} +17 -20
- app/services/enrichment/title_keywords.py +187 -0
- app/services/folder_store.py +23 -0
- app/services/note_store.py +26 -0
- app/services/storage.py +0 -33
- app/utils/id.py +4 -0
- app/utils/time.py +4 -0
- requirements.txt +1 -0
.gitignore
CHANGED
|
@@ -1,4 +1,7 @@
|
|
| 1 |
-
NOTE_SERVICE_FLOW.md
|
| 2 |
.myvenv
|
| 3 |
__pycache__/
|
| 4 |
-
*.pyc
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
.myvenv
|
| 2 |
__pycache__/
|
| 3 |
+
*.pyc
|
| 4 |
+
*.json
|
| 5 |
+
.env
|
| 6 |
+
*.txt
|
| 7 |
+
docs/
|
Dockerfile
CHANGED
|
@@ -1,10 +1,14 @@
|
|
| 1 |
FROM python:3.11-slim
|
| 2 |
WORKDIR /app
|
| 3 |
|
| 4 |
-
|
| 5 |
-
COPY requirements.txt ./
|
| 6 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 7 |
|
| 8 |
COPY . .
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
|
| 1 |
FROM python:3.11-slim
|
| 2 |
WORKDIR /app
|
| 3 |
|
| 4 |
+
COPY requirements.txt .
|
|
|
|
| 5 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 6 |
|
| 7 |
COPY . .
|
| 8 |
|
| 9 |
+
EXPOSE 7860
|
| 10 |
+
|
| 11 |
+
HEALTHCHECK --interval=30s --timeout=3s --start-period=10s \
|
| 12 |
+
CMD curl -f http://localhost:7860/health || exit 1
|
| 13 |
+
|
| 14 |
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
app/api/folders/folders_create.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
+
from app.services.folder_store import create_folder
|
| 4 |
+
from app.utils.id import new_id
|
| 5 |
+
from app.utils.time import now_ts
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
router = APIRouter(prefix="/folders", tags=["folders"])
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class CreateFolderRequest(BaseModel):
|
| 12 |
+
name: str
|
| 13 |
+
color_hex: str | None = None
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
@router.post("")
|
| 17 |
+
def create(req: CreateFolderRequest):
|
| 18 |
+
folder = {
|
| 19 |
+
"folder_id": new_id(),
|
| 20 |
+
"name": req.name,
|
| 21 |
+
"color_hex": req.color_hex,
|
| 22 |
+
"created_at": now_ts(),
|
| 23 |
+
"updated_at": now_ts(),
|
| 24 |
+
}
|
| 25 |
+
create_folder(folder)
|
| 26 |
+
return folder
|
app/api/folders/folders_delete.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, HTTPException
|
| 2 |
+
from app.services.folder_store import get_folder, delete_folder
|
| 3 |
+
|
| 4 |
+
router = APIRouter(prefix="/folders", tags=["folders"])
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
@router.delete("/{folder_id}")
|
| 8 |
+
def delete_folder_api(folder_id: str):
|
| 9 |
+
folder = get_folder(folder_id)
|
| 10 |
+
if not folder:
|
| 11 |
+
raise HTTPException(status_code=404, detail="Folder not found")
|
| 12 |
+
|
| 13 |
+
delete_folder(folder_id)
|
| 14 |
+
return {"folder_id": folder_id, "deleted": True}
|
app/api/folders/folders_get.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, HTTPException
|
| 2 |
+
from app.services.folder_store import get_folder, list_folders
|
| 3 |
+
|
| 4 |
+
router = APIRouter(prefix="/folders", tags=["folders"])
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
@router.get("")
|
| 8 |
+
def get_folders():
|
| 9 |
+
return list_folders()
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@router.get("/{folder_id}")
|
| 13 |
+
def get_folder_by_id(folder_id: str):
|
| 14 |
+
folder = get_folder(folder_id)
|
| 15 |
+
if not folder:
|
| 16 |
+
raise HTTPException(status_code=404, detail="Folder not found")
|
| 17 |
+
return folder
|
app/api/folders/folders_update.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, HTTPException
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
+
from typing import Optional
|
| 4 |
+
from app.services.folder_store import get_folder, update_folder
|
| 5 |
+
from app.utils.time import now_ts
|
| 6 |
+
|
| 7 |
+
router = APIRouter(prefix="/folders", tags=["folders"])
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class UpdateFolderRequest(BaseModel):
|
| 11 |
+
name: Optional[str] = None
|
| 12 |
+
color_hex: Optional[str] = None
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
@router.patch("/{folder_id}")
|
| 16 |
+
def update_folder_api(folder_id: str, req: UpdateFolderRequest):
|
| 17 |
+
folder = get_folder(folder_id)
|
| 18 |
+
if not folder:
|
| 19 |
+
raise HTTPException(status_code=404, detail="Folder not found")
|
| 20 |
+
|
| 21 |
+
updates = req.dict(exclude_unset=True)
|
| 22 |
+
updates["updated_at"] = now_ts()
|
| 23 |
+
|
| 24 |
+
update_folder(folder_id, updates)
|
| 25 |
+
return {"folder_id": folder_id, "updated": True}
|
app/api/notes.py
DELETED
|
@@ -1,35 +0,0 @@
|
|
| 1 |
-
from fastapi import APIRouter, BackgroundTasks, HTTPException
|
| 2 |
-
from pydantic import BaseModel
|
| 3 |
-
from typing import Optional, List
|
| 4 |
-
from app.services.storage import create_note as storage_create_note, get_note
|
| 5 |
-
from app.jobs.enrichment_job import run_enrichment
|
| 6 |
-
|
| 7 |
-
router = APIRouter(prefix="/notes")
|
| 8 |
-
|
| 9 |
-
class CreateNoteRequest(BaseModel):
|
| 10 |
-
note_id: str
|
| 11 |
-
raw_text: str
|
| 12 |
-
normalized_text: Optional[str] = None
|
| 13 |
-
keywords: List[str] = []
|
| 14 |
-
chunks: list = []
|
| 15 |
-
duration: Optional[float] = None
|
| 16 |
-
sample_rate: Optional[int] = None
|
| 17 |
-
asr_model: Optional[str] = None
|
| 18 |
-
normalization_model: Optional[str] = None
|
| 19 |
-
generate: List[str] = []
|
| 20 |
-
|
| 21 |
-
@router.post("")
|
| 22 |
-
async def create_note(req: CreateNoteRequest, bg: BackgroundTasks):
|
| 23 |
-
storage_create_note(req.note_id, req.dict())
|
| 24 |
-
|
| 25 |
-
if req.generate:
|
| 26 |
-
bg.add_task(run_enrichment, req.note_id, req.generate)
|
| 27 |
-
|
| 28 |
-
return {"note_id": req.note_id, "status": "stored"}
|
| 29 |
-
|
| 30 |
-
@router.get("/{note_id}")
|
| 31 |
-
def fetch_note(note_id: str):
|
| 32 |
-
note = get_note(note_id)
|
| 33 |
-
if not note:
|
| 34 |
-
raise HTTPException(404, "Note not found")
|
| 35 |
-
return note
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/api/notes/notes_audio.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, BackgroundTasks, HTTPException
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
+
from typing import Dict, List, Optional
|
| 4 |
+
from app.services.note_store import create_note
|
| 5 |
+
from app.jobs.enrichment_job import enrich_note
|
| 6 |
+
from app.models.enums import NoteType, NoteStatus
|
| 7 |
+
from app.utils.time import now_ts
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
router = APIRouter(prefix="/internal/notes", tags=["internal"])
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class CreateAudioNoteRequest(BaseModel):
|
| 14 |
+
note_id: str
|
| 15 |
+
raw_text: str
|
| 16 |
+
metadata: Dict
|
| 17 |
+
generate: List[str] = []
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
@router.post("/audio")
|
| 21 |
+
async def create_audio_note(req: CreateAudioNoteRequest, bg: BackgroundTasks):
|
| 22 |
+
now = now_ts()
|
| 23 |
+
has_enrichment = bool(req.generate)
|
| 24 |
+
|
| 25 |
+
note = {
|
| 26 |
+
"note_id": req.note_id,
|
| 27 |
+
"type": NoteType.audio,
|
| 28 |
+
"raw_text": req.raw_text,
|
| 29 |
+
"metadata": req.metadata,
|
| 30 |
+
"status": NoteStatus.processing if has_enrichment else NoteStatus.created,
|
| 31 |
+
"created_at": now,
|
| 32 |
+
"updated_at": now,
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
create_note(note)
|
| 36 |
+
|
| 37 |
+
if has_enrichment:
|
| 38 |
+
bg.add_task(enrich_note, req.note_id, req.generate)
|
| 39 |
+
|
| 40 |
+
return {
|
| 41 |
+
"note_id": req.note_id,
|
| 42 |
+
"status": note["status"],
|
| 43 |
+
}
|
app/api/notes/notes_get.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, HTTPException
|
| 2 |
+
from app.services.note_store import get_note, list_notes
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
router = APIRouter(prefix="/notes", tags=["notes"])
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
@router.get("")
|
| 9 |
+
def get_notes(folder_id: str | None = None):
|
| 10 |
+
return list_notes(folder_id)
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
@router.get("/{note_id}")
|
| 14 |
+
def get_note_by_id(note_id: str):
|
| 15 |
+
note = get_note(note_id)
|
| 16 |
+
if not note:
|
| 17 |
+
raise HTTPException(404, "Note not found")
|
| 18 |
+
return note
|
app/api/notes/notes_regenerate.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.models.enums import NoteStatus
|
| 2 |
+
from app.utils.time import now_ts
|
| 3 |
+
from fastapi import APIRouter, BackgroundTasks, HTTPException
|
| 4 |
+
from pydantic import BaseModel
|
| 5 |
+
from typing import List
|
| 6 |
+
from app.services.note_store import get_note, update_note
|
| 7 |
+
from app.jobs.enrichment_job import enrich_note
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
router = APIRouter(prefix="/notes", tags=["notes"])
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class RegenerateRequest(BaseModel):
|
| 14 |
+
generate: List[str]
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
@router.post("/{note_id}/regenerate")
|
| 18 |
+
def regenerate_note(note_id: str, req: RegenerateRequest, bg: BackgroundTasks):
|
| 19 |
+
note = get_note(note_id)
|
| 20 |
+
if not note:
|
| 21 |
+
raise HTTPException(404, "Note not found")
|
| 22 |
+
|
| 23 |
+
# mark processing immediately
|
| 24 |
+
update_note(note_id, {
|
| 25 |
+
"status": NoteStatus.processing,
|
| 26 |
+
"updated_at": now_ts(),
|
| 27 |
+
})
|
| 28 |
+
|
| 29 |
+
bg.add_task(enrich_note, note_id, req.generate)
|
| 30 |
+
|
| 31 |
+
return {
|
| 32 |
+
"note_id": note_id,
|
| 33 |
+
"status": NoteStatus.processing,
|
| 34 |
+
}
|
app/api/notes/notes_text.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, BackgroundTasks
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
+
from typing import List, Optional
|
| 4 |
+
from app.services.note_store import create_note
|
| 5 |
+
from app.jobs.enrichment_job import enrich_note
|
| 6 |
+
from app.models.enums import NoteType, NoteStatus
|
| 7 |
+
from app.utils.id import new_id
|
| 8 |
+
from app.utils.time import now_ts
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
router = APIRouter(prefix="/notes", tags=["notes"])
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class CreateTextNoteRequest(BaseModel):
|
| 15 |
+
raw_text: str
|
| 16 |
+
folder_id: Optional[str] = None
|
| 17 |
+
generate: List[str] = []
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
@router.post("/text")
|
| 21 |
+
async def create_text_note(req: CreateTextNoteRequest, bg: BackgroundTasks):
|
| 22 |
+
note_id = new_id()
|
| 23 |
+
now = now_ts()
|
| 24 |
+
|
| 25 |
+
has_enrichment = bool(req.generate)
|
| 26 |
+
|
| 27 |
+
note = {
|
| 28 |
+
"note_id": note_id,
|
| 29 |
+
"type": NoteType.text,
|
| 30 |
+
"raw_text": req.raw_text,
|
| 31 |
+
"folder_id": req.folder_id,
|
| 32 |
+
"status": NoteStatus.processing if has_enrichment else NoteStatus.created,
|
| 33 |
+
"created_at": now,
|
| 34 |
+
"updated_at": now,
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
create_note(note)
|
| 38 |
+
|
| 39 |
+
if has_enrichment:
|
| 40 |
+
bg.add_task(enrich_note, note_id, req.generate)
|
| 41 |
+
|
| 42 |
+
return {
|
| 43 |
+
"note_id": note_id,
|
| 44 |
+
"status": note["status"],
|
| 45 |
+
}
|
app/api/notes/notes_update.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, HTTPException
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
+
from typing import Optional
|
| 4 |
+
from app.services.note_store import get_note, update_note
|
| 5 |
+
from app.utils.time import now_ts
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
router = APIRouter(prefix="/notes", tags=["notes"])
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class UpdateNoteRequest(BaseModel):
|
| 12 |
+
folder_id: Optional[str] = None
|
| 13 |
+
title: Optional[str] = None
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
@router.patch("/{note_id}")
|
| 17 |
+
def update_note_api(note_id: str, req: UpdateNoteRequest):
|
| 18 |
+
note = get_note(note_id)
|
| 19 |
+
if not note:
|
| 20 |
+
raise HTTPException(404, "Note not found")
|
| 21 |
+
|
| 22 |
+
updates = req.dict(exclude_unset=True)
|
| 23 |
+
|
| 24 |
+
# ❗ Không cho client sửa status trực tiếp
|
| 25 |
+
updates.pop("status", None)
|
| 26 |
+
|
| 27 |
+
updates["updated_at"] = now_ts()
|
| 28 |
+
|
| 29 |
+
update_note(note_id, updates)
|
| 30 |
+
return {"note_id": note_id, "updated": True}
|
app/config.py
CHANGED
|
@@ -1,5 +1,9 @@
|
|
| 1 |
import os
|
| 2 |
|
| 3 |
-
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY"
|
| 4 |
-
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
|
| 3 |
+
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
| 4 |
+
if not GEMINI_API_KEY:
|
| 5 |
+
raise RuntimeError("GEMINI_API_KEY is required")
|
| 6 |
+
|
| 7 |
+
GEMINI_MODEL = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")
|
| 8 |
+
|
| 9 |
+
FIREBASE_SERVICE_ACCOUNT = os.getenv("FIREBASE_SERVICE_ACCOUNT", "")
|
app/infra/auth.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import Request
|
| 2 |
+
|
| 3 |
+
def get_current_user_id(request: Request) -> str | None:
|
| 4 |
+
"""
|
| 5 |
+
Placeholder for auth context.
|
| 6 |
+
Later:
|
| 7 |
+
- Extract from JWT
|
| 8 |
+
- Or API Gateway headers
|
| 9 |
+
"""
|
| 10 |
+
return request.headers.get("x-user-id")
|
app/{services → infra}/firebase.py
RENAMED
|
@@ -1,13 +1,9 @@
|
|
| 1 |
import firebase_admin
|
| 2 |
from firebase_admin import credentials, firestore
|
| 3 |
-
import json
|
| 4 |
-
from app.config import FIREBASE_SERVICE_ACCOUNT
|
| 5 |
|
| 6 |
if not firebase_admin._apps:
|
| 7 |
-
|
| 8 |
-
raise RuntimeError("Missing FIREBASE_SERVICE_ACCOUNT")
|
| 9 |
-
|
| 10 |
-
cred = credentials.Certificate(json.loads(FIREBASE_SERVICE_ACCOUNT))
|
| 11 |
firebase_admin.initialize_app(cred)
|
| 12 |
|
| 13 |
-
db = firestore.client()
|
|
|
|
| 1 |
import firebase_admin
|
| 2 |
from firebase_admin import credentials, firestore
|
| 3 |
+
import os, json
|
|
|
|
| 4 |
|
| 5 |
if not firebase_admin._apps:
|
| 6 |
+
cred = credentials.Certificate(json.loads(os.environ["FIREBASE_SERVICE_ACCOUNT"]))
|
|
|
|
|
|
|
|
|
|
| 7 |
firebase_admin.initialize_app(cred)
|
| 8 |
|
| 9 |
+
db = firestore.client()
|
app/jobs/enrichment_job.py
CHANGED
|
@@ -1,53 +1,54 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
# note = get_note(note_id)
|
| 7 |
-
# if not note:
|
| 8 |
-
# return
|
| 9 |
-
|
| 10 |
-
# text = note.get("normalized_text") or note["raw_text"]
|
| 11 |
-
|
| 12 |
-
# update_note(note_id, status="processing")
|
| 13 |
-
# updates = {}
|
| 14 |
-
|
| 15 |
-
# if "summary" in tasks:
|
| 16 |
-
# updates["summary"] = await generate_summary(text)
|
| 17 |
-
|
| 18 |
-
# if "mindmap" in tasks:
|
| 19 |
-
# updates["mindmap"] = await generate_mindmap(text)
|
| 20 |
|
| 21 |
-
|
| 22 |
|
| 23 |
-
import logging
|
| 24 |
-
from app.services.storage import get_note, update_note
|
| 25 |
-
from app.services.summary_service import generate_summary
|
| 26 |
-
from app.services.mindmap_service import generate_mindmap
|
| 27 |
|
| 28 |
-
async def
|
| 29 |
note = get_note(note_id)
|
| 30 |
if not note:
|
| 31 |
-
logging.warning(f"[enrichment] Note not found: {note_id}")
|
| 32 |
return
|
| 33 |
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
try:
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
from app.services.note_store import get_note, update_note
|
| 3 |
+
from app.services.enrichment.pipeline import run_pipeline
|
| 4 |
+
from app.models.enums import NoteStatus
|
| 5 |
+
from app.utils.time import now_ts
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
+
logger = logging.getLogger(__name__)
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
+
async def enrich_note(note_id: str, tasks: list[str]):
|
| 11 |
note = get_note(note_id)
|
| 12 |
if not note:
|
|
|
|
| 13 |
return
|
| 14 |
|
| 15 |
+
# Mark as processing
|
| 16 |
+
update_note(
|
| 17 |
+
note_id,
|
| 18 |
+
{
|
| 19 |
+
"status": NoteStatus.processing,
|
| 20 |
+
"updated_at": now_ts(),
|
| 21 |
+
},
|
| 22 |
+
)
|
| 23 |
|
| 24 |
try:
|
| 25 |
+
# Run NLP pipeline (mutates a copy of note)
|
| 26 |
+
enriched = await run_pipeline(note, tasks)
|
| 27 |
+
|
| 28 |
+
updates = {}
|
| 29 |
+
|
| 30 |
+
# Only persist known enrichment fields
|
| 31 |
+
for field in (
|
| 32 |
+
"title",
|
| 33 |
+
"normalized_text",
|
| 34 |
+
"keywords",
|
| 35 |
+
"summary",
|
| 36 |
+
"mindmap",
|
| 37 |
+
):
|
| 38 |
+
if field in enriched:
|
| 39 |
+
updates[field] = enriched[field]
|
| 40 |
+
|
| 41 |
+
updates["status"] = NoteStatus.ready
|
| 42 |
+
updates["updated_at"] = now_ts()
|
| 43 |
+
|
| 44 |
+
update_note(note_id, updates)
|
| 45 |
+
|
| 46 |
+
except Exception:
|
| 47 |
+
logger.exception("Enrichment failed note_id=%s", note_id)
|
| 48 |
+
update_note(
|
| 49 |
+
note_id,
|
| 50 |
+
{
|
| 51 |
+
"status": NoteStatus.error,
|
| 52 |
+
"updated_at": now_ts(),
|
| 53 |
+
},
|
| 54 |
+
)
|
app/main.py
CHANGED
|
@@ -1,9 +1,22 @@
|
|
| 1 |
from fastapi import FastAPI
|
| 2 |
-
from app.api.notes import
|
|
|
|
| 3 |
|
| 4 |
-
app = FastAPI(title="Note
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
@app.get("/health")
|
| 8 |
def health():
|
| 9 |
-
return {"status": "ok"}
|
|
|
|
| 1 |
from fastapi import FastAPI
|
| 2 |
+
from app.api.notes import notes_text, notes_audio, notes_get, notes_update, notes_regenerate
|
| 3 |
+
from app.api.folders import folders_create, folders_get, folders_update, folders_delete
|
| 4 |
|
| 5 |
+
app = FastAPI(title="Note Service API")
|
| 6 |
+
|
| 7 |
+
# Notes
|
| 8 |
+
app.include_router(notes_text.router)
|
| 9 |
+
app.include_router(notes_audio.router)
|
| 10 |
+
app.include_router(notes_get.router)
|
| 11 |
+
app.include_router(notes_update.router)
|
| 12 |
+
app.include_router(notes_regenerate.router)
|
| 13 |
+
|
| 14 |
+
# Folders
|
| 15 |
+
app.include_router(folders_create.router)
|
| 16 |
+
app.include_router(folders_get.router)
|
| 17 |
+
app.include_router(folders_update.router)
|
| 18 |
+
app.include_router(folders_delete.router)
|
| 19 |
|
| 20 |
@app.get("/health")
|
| 21 |
def health():
|
| 22 |
+
return {"status": "ok"}
|
app/models/enums.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from enum import Enum
|
| 2 |
+
|
| 3 |
+
class NoteType(str, Enum):
|
| 4 |
+
audio = "audio"
|
| 5 |
+
text = "text"
|
| 6 |
+
|
| 7 |
+
class NoteStatus(str, Enum):
|
| 8 |
+
created = "created"
|
| 9 |
+
processing = "processing"
|
| 10 |
+
ready = "ready"
|
| 11 |
+
error = "error"
|
app/models/folder.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel
|
| 2 |
+
|
| 3 |
+
class Folder(BaseModel):
|
| 4 |
+
folder_id: str
|
| 5 |
+
name: str
|
| 6 |
+
color_hex: str | None = None
|
| 7 |
+
user_id: str | None = None
|
| 8 |
+
created_at: int
|
| 9 |
+
updated_at: int
|
app/models/note.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel
|
| 2 |
+
from typing import Optional, List, Dict
|
| 3 |
+
from app.models.enums import NoteType, NoteStatus
|
| 4 |
+
|
| 5 |
+
class AudioMetadata(BaseModel):
|
| 6 |
+
duration: Optional[float] = None
|
| 7 |
+
chunks: Optional[list] = None
|
| 8 |
+
sample_rate: Optional[int] = None
|
| 9 |
+
asr_model: Optional[str] = None
|
| 10 |
+
|
| 11 |
+
class NoteMetadata(BaseModel):
|
| 12 |
+
audio: Optional[AudioMetadata] = None
|
| 13 |
+
client: Optional[Dict] = None
|
| 14 |
+
|
| 15 |
+
class Note(BaseModel):
|
| 16 |
+
note_id: str
|
| 17 |
+
type: NoteType
|
| 18 |
+
|
| 19 |
+
title: Optional[str] = None
|
| 20 |
+
raw_text: Optional[str] = None
|
| 21 |
+
normalized_text: Optional[str] = None
|
| 22 |
+
keywords: Optional[List[str]] = None
|
| 23 |
+
summary: Optional[str] = None
|
| 24 |
+
mindmap: Optional[Dict] = None
|
| 25 |
+
|
| 26 |
+
folder_id: Optional[str] = None
|
| 27 |
+
metadata: Optional[NoteMetadata] = None
|
| 28 |
+
|
| 29 |
+
status: NoteStatus
|
| 30 |
+
created_at: int
|
| 31 |
+
updated_at: int
|
app/services/{mindmap_service.py → enrichment/mindmap.py}
RENAMED
|
@@ -1,11 +1,11 @@
|
|
| 1 |
import asyncio
|
| 2 |
-
import json
|
| 3 |
-
import logging
|
| 4 |
-
import random
|
| 5 |
import re
|
| 6 |
-
import
|
| 7 |
-
|
| 8 |
-
from
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
try:
|
| 11 |
import google.genai as genai
|
|
@@ -23,8 +23,7 @@ try:
|
|
| 23 |
except Exception:
|
| 24 |
GoogleAPIError = Exception
|
| 25 |
|
| 26 |
-
|
| 27 |
-
_gemini_client = None
|
| 28 |
|
| 29 |
if not genai:
|
| 30 |
logging.warning("[mindmap_service] google.genai not available, mindmap generation will be disabled")
|
|
@@ -32,23 +31,18 @@ elif not GEMINI_API_KEY:
|
|
| 32 |
logging.warning("[mindmap_service] GEMINI_API_KEY is not set, mindmap generation will be disabled")
|
| 33 |
else:
|
| 34 |
try:
|
| 35 |
-
|
| 36 |
-
logging.info(f"[mindmap_service] Initialized google.genai client with model={
|
| 37 |
except Exception as e:
|
| 38 |
logging.exception(f"[mindmap_service] Failed to init google.genai client: {e}")
|
| 39 |
-
|
| 40 |
-
|
| 41 |
|
| 42 |
async def generate_mindmap(text: str) -> dict:
|
| 43 |
-
|
| 44 |
-
Fallback: trả {{}} nếu không có model hoặc lỗi.
|
| 45 |
-
"""
|
| 46 |
-
if not _gemini_client:
|
| 47 |
return {}
|
| 48 |
|
| 49 |
prompt = f"""
|
| 50 |
Bạn là chuyên gia tạo Sơ đồ tư duy. Hãy phân tích văn bản sau và tạo CẤU TRÚC JSON Mindmap.
|
| 51 |
-
|
| 52 |
Yêu cầu:
|
| 53 |
1. Xác định Ý chính làm Root.
|
| 54 |
2. Phân tách ý phụ thành nhánh con (tối đa 3 cấp).
|
|
@@ -57,7 +51,6 @@ Yêu cầu:
|
|
| 57 |
- Root: "#6200EE"
|
| 58 |
- Các nhánh con: sử dụng một trong các màu: "#F59E2B", "#2ECF9A", "#2F9BFF"
|
| 59 |
5. CHỈ TRẢ VỀ JSON, không giải thích thêm.
|
| 60 |
-
|
| 61 |
Cấu trúc JSON bắt buộc:
|
| 62 |
{{
|
| 63 |
"root": {{
|
|
@@ -85,8 +78,8 @@ Văn bản:
|
|
| 85 |
last_exc = None
|
| 86 |
for attempt in range(1, MAX_RETRIES + 1):
|
| 87 |
try:
|
| 88 |
-
resp =
|
| 89 |
-
model=
|
| 90 |
contents=prompt,
|
| 91 |
)
|
| 92 |
return resp.text or ""
|
|
@@ -160,4 +153,3 @@ Văn bản:
|
|
| 160 |
return fallback
|
| 161 |
except Exception:
|
| 162 |
return {}
|
| 163 |
-
|
|
|
|
| 1 |
import asyncio
|
|
|
|
|
|
|
|
|
|
| 2 |
import re
|
| 3 |
+
import logging
|
| 4 |
+
from random import random
|
| 5 |
+
from time import time
|
| 6 |
+
import google.genai as genai
|
| 7 |
+
import json
|
| 8 |
+
from app.config import GEMINI_API_KEY, GEMINI_MODEL
|
| 9 |
|
| 10 |
try:
|
| 11 |
import google.genai as genai
|
|
|
|
| 23 |
except Exception:
|
| 24 |
GoogleAPIError = Exception
|
| 25 |
|
| 26 |
+
gemini_client = None
|
|
|
|
| 27 |
|
| 28 |
if not genai:
|
| 29 |
logging.warning("[mindmap_service] google.genai not available, mindmap generation will be disabled")
|
|
|
|
| 31 |
logging.warning("[mindmap_service] GEMINI_API_KEY is not set, mindmap generation will be disabled")
|
| 32 |
else:
|
| 33 |
try:
|
| 34 |
+
gemini_client = genai.Client(api_key=GEMINI_API_KEY)
|
| 35 |
+
logging.info(f"[mindmap_service] Initialized google.genai client with model={GEMINI_MODEL}")
|
| 36 |
except Exception as e:
|
| 37 |
logging.exception(f"[mindmap_service] Failed to init google.genai client: {e}")
|
| 38 |
+
gemini_client = None
|
|
|
|
| 39 |
|
| 40 |
async def generate_mindmap(text: str) -> dict:
|
| 41 |
+
if not text:
|
|
|
|
|
|
|
|
|
|
| 42 |
return {}
|
| 43 |
|
| 44 |
prompt = f"""
|
| 45 |
Bạn là chuyên gia tạo Sơ đồ tư duy. Hãy phân tích văn bản sau và tạo CẤU TRÚC JSON Mindmap.
|
|
|
|
| 46 |
Yêu cầu:
|
| 47 |
1. Xác định Ý chính làm Root.
|
| 48 |
2. Phân tách ý phụ thành nhánh con (tối đa 3 cấp).
|
|
|
|
| 51 |
- Root: "#6200EE"
|
| 52 |
- Các nhánh con: sử dụng một trong các màu: "#F59E2B", "#2ECF9A", "#2F9BFF"
|
| 53 |
5. CHỈ TRẢ VỀ JSON, không giải thích thêm.
|
|
|
|
| 54 |
Cấu trúc JSON bắt buộc:
|
| 55 |
{{
|
| 56 |
"root": {{
|
|
|
|
| 78 |
last_exc = None
|
| 79 |
for attempt in range(1, MAX_RETRIES + 1):
|
| 80 |
try:
|
| 81 |
+
resp = gemini_client.models.generate_content(
|
| 82 |
+
model=GEMINI_MODEL,
|
| 83 |
contents=prompt,
|
| 84 |
)
|
| 85 |
return resp.text or ""
|
|
|
|
| 153 |
return fallback
|
| 154 |
except Exception:
|
| 155 |
return {}
|
|
|
app/services/enrichment/normalize.py
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import os
|
| 3 |
+
import logging
|
| 4 |
+
import json
|
| 5 |
+
import re
|
| 6 |
+
import time
|
| 7 |
+
import random
|
| 8 |
+
from app.config import GEMINI_API_KEY, GEMINI_MODEL
|
| 9 |
+
|
| 10 |
+
try:
|
| 11 |
+
import google.genai as genai
|
| 12 |
+
try:
|
| 13 |
+
from google.genai import errors as genai_errors
|
| 14 |
+
except Exception:
|
| 15 |
+
genai_errors = None
|
| 16 |
+
except Exception:
|
| 17 |
+
genai = None
|
| 18 |
+
genai_errors = None
|
| 19 |
+
logging.warning("[normalize_service] google.genai module not found; normalization disabled")
|
| 20 |
+
|
| 21 |
+
try:
|
| 22 |
+
from google.api_core.exceptions import GoogleAPIError
|
| 23 |
+
except Exception:
|
| 24 |
+
GoogleAPIError = Exception
|
| 25 |
+
|
| 26 |
+
gemini_client = None
|
| 27 |
+
|
| 28 |
+
if not genai:
|
| 29 |
+
logging.warning("[normalize_service] google.genai not available, normalization will be disabled")
|
| 30 |
+
elif not GEMINI_API_KEY:
|
| 31 |
+
logging.warning("[normalize_service] GEMINI_API_KEY is not set, normalization will be disabled")
|
| 32 |
+
else:
|
| 33 |
+
try:
|
| 34 |
+
gemini_client = genai.Client(api_key=GEMINI_API_KEY)
|
| 35 |
+
logging.info(f"[normalize_service] Initialized google.genai client with model={GEMINI_MODEL}")
|
| 36 |
+
except Exception as e:
|
| 37 |
+
logging.exception(f"[normalize_service] Failed to init google.genai client: {e}")
|
| 38 |
+
gemini_client = None
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
async def normalize_text(raw_text: str) -> str:
|
| 42 |
+
if not raw_text:
|
| 43 |
+
return raw_text
|
| 44 |
+
|
| 45 |
+
prompt = f"""
|
| 46 |
+
Bạn là một hệ thống Xử lý Hậu kỳ NLP (NLP Post-Processing) Tiếng Việt.
|
| 47 |
+
Đầu vào là văn bản thô (raw transcript), có thể thiếu dấu câu và sai chính tả do nhận dạng giọng nói.
|
| 48 |
+
|
| 49 |
+
Nhiệm vụ:
|
| 50 |
+
- Sửa lỗi chính tả do ASR.
|
| 51 |
+
- Thêm dấu câu phù hợp.
|
| 52 |
+
- Viết hoa đúng chuẩn tiếng Việt (đầu câu, tên riêng nếu suy luận được).
|
| 53 |
+
- Loại bỏ các từ/cụm từ bị lặp lại vô nghĩa.
|
| 54 |
+
- Giữ nguyên nội dung và ý nghĩa gốc, không rút gọn, không thêm thông tin mới.
|
| 55 |
+
|
| 56 |
+
YÊU CẦU ĐẦU RA:
|
| 57 |
+
- Chỉ trả về văn bản đã chuẩn hóa
|
| 58 |
+
- KHÔNG JSON, KHÔNG giải thích, KHÔNG markdown
|
| 59 |
+
|
| 60 |
+
Văn bản đầu vào:
|
| 61 |
+
\"\"\"{raw_text}\"\"\"
|
| 62 |
+
"""
|
| 63 |
+
|
| 64 |
+
loop = asyncio.get_event_loop()
|
| 65 |
+
|
| 66 |
+
MAX_RETRIES = 3
|
| 67 |
+
BASE_DELAY = 1.0
|
| 68 |
+
|
| 69 |
+
def call():
|
| 70 |
+
last_exc = None
|
| 71 |
+
for attempt in range(1, MAX_RETRIES + 1):
|
| 72 |
+
try:
|
| 73 |
+
if gemini_client:
|
| 74 |
+
resp = gemini_client.models.generate_content(
|
| 75 |
+
model=GEMINI_MODEL,
|
| 76 |
+
contents=prompt,
|
| 77 |
+
)
|
| 78 |
+
return resp.text or ""
|
| 79 |
+
else:
|
| 80 |
+
model = genai.GenerativeModel(GEMINI_MODEL) if genai else None
|
| 81 |
+
if model:
|
| 82 |
+
resp = model.generate_content(prompt)
|
| 83 |
+
return getattr(resp, "text", "") or ""
|
| 84 |
+
return ""
|
| 85 |
+
except Exception as e:
|
| 86 |
+
last_exc = e
|
| 87 |
+
msg = str(e)
|
| 88 |
+
if "503" in msg or "UNAVAILABLE" in msg:
|
| 89 |
+
if attempt < MAX_RETRIES:
|
| 90 |
+
delay = BASE_DELAY * (2 ** (attempt - 1))
|
| 91 |
+
delay += random.uniform(0, 0.5 * delay)
|
| 92 |
+
logging.warning(
|
| 93 |
+
f"[normalize_service] model overloaded "
|
| 94 |
+
f"(attempt {attempt}/{MAX_RETRIES}), retrying after {delay:.2f}s"
|
| 95 |
+
)
|
| 96 |
+
time.sleep(delay)
|
| 97 |
+
continue
|
| 98 |
+
logging.exception(
|
| 99 |
+
f"[normalize_service] normalize call failed on attempt {attempt}: {e}"
|
| 100 |
+
)
|
| 101 |
+
break
|
| 102 |
+
|
| 103 |
+
if last_exc:
|
| 104 |
+
raise last_exc
|
| 105 |
+
return ""
|
| 106 |
+
|
| 107 |
+
try:
|
| 108 |
+
raw = await loop.run_in_executor(None, call)
|
| 109 |
+
|
| 110 |
+
if raw:
|
| 111 |
+
text = raw.strip()
|
| 112 |
+
|
| 113 |
+
# defensive cleanup (trường hợp model vẫn lỡ trả markdown)
|
| 114 |
+
text = re.sub(r"^```.*?\n", "", text, flags=re.DOTALL)
|
| 115 |
+
text = re.sub(r"```$", "", text)
|
| 116 |
+
text = text.strip('"').strip("'").strip()
|
| 117 |
+
|
| 118 |
+
if text:
|
| 119 |
+
return text
|
| 120 |
+
|
| 121 |
+
except GoogleAPIError as e:
|
| 122 |
+
logging.error(f"[normalize_service] Gemini API error: {e}")
|
| 123 |
+
except Exception as e:
|
| 124 |
+
logging.exception(f"[normalize_service] normalize_text failed: {e}")
|
| 125 |
+
|
| 126 |
+
# ===== fallback: best-effort local normalization =====
|
| 127 |
+
try:
|
| 128 |
+
text = raw_text.strip()
|
| 129 |
+
text = re.sub(r"\s+", " ", text)
|
| 130 |
+
|
| 131 |
+
if text and text[-1] not in ".!?":
|
| 132 |
+
text += "."
|
| 133 |
+
|
| 134 |
+
def cap_sentences(s: str) -> str:
|
| 135 |
+
parts = re.split(r'([.!?]\s+)', s)
|
| 136 |
+
out = ""
|
| 137 |
+
for i in range(0, len(parts), 2):
|
| 138 |
+
sentence = parts[i].strip()
|
| 139 |
+
sep = parts[i + 1] if i + 1 < len(parts) else ""
|
| 140 |
+
if sentence:
|
| 141 |
+
sentence = sentence[0].upper() + sentence[1:]
|
| 142 |
+
out += sentence + sep
|
| 143 |
+
return out
|
| 144 |
+
|
| 145 |
+
return cap_sentences(text)
|
| 146 |
+
|
| 147 |
+
except Exception:
|
| 148 |
+
return raw_text
|
app/services/enrichment/pipeline.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.services.enrichment.normalize import normalize_text
|
| 2 |
+
from app.services.enrichment.title_keywords import extract_title_and_keywords
|
| 3 |
+
from app.services.enrichment.summary import generate_summary
|
| 4 |
+
from app.services.enrichment.mindmap import generate_mindmap
|
| 5 |
+
|
| 6 |
+
async def run_pipeline(note: dict, tasks: list[str]):
|
| 7 |
+
raw_text = note.get("raw_text") or ""
|
| 8 |
+
if not raw_text.strip():
|
| 9 |
+
# Nothing to process
|
| 10 |
+
return note
|
| 11 |
+
|
| 12 |
+
text = raw_text
|
| 13 |
+
|
| 14 |
+
# 1️⃣ Normalize
|
| 15 |
+
if "normalize" in tasks:
|
| 16 |
+
try:
|
| 17 |
+
text = await normalize_text(text)
|
| 18 |
+
note["normalized_text"] = text
|
| 19 |
+
except Exception:
|
| 20 |
+
# Fail-safe: keep raw_text
|
| 21 |
+
note["normalized_text"] = text
|
| 22 |
+
|
| 23 |
+
# 2️⃣ Title + Keywords (same AI call)
|
| 24 |
+
if "keywords" in tasks:
|
| 25 |
+
try:
|
| 26 |
+
title, keywords = await extract_title_and_keywords(text)
|
| 27 |
+
if title:
|
| 28 |
+
note["title"] = title
|
| 29 |
+
if keywords:
|
| 30 |
+
note["keywords"] = keywords
|
| 31 |
+
except Exception:
|
| 32 |
+
# Fail-safe: skip title & keywords
|
| 33 |
+
pass
|
| 34 |
+
|
| 35 |
+
# 3️⃣ Summary
|
| 36 |
+
if "summary" in tasks:
|
| 37 |
+
try:
|
| 38 |
+
note["summary"] = await generate_summary(text)
|
| 39 |
+
except Exception:
|
| 40 |
+
pass
|
| 41 |
+
|
| 42 |
+
# 4️⃣ Mindmap
|
| 43 |
+
if "mindmap" in tasks:
|
| 44 |
+
try:
|
| 45 |
+
note["mindmap"] = await generate_mindmap(text)
|
| 46 |
+
except Exception:
|
| 47 |
+
pass
|
| 48 |
+
|
| 49 |
+
return note
|
app/services/{summary_service.py → enrichment/summary.py}
RENAMED
|
@@ -1,10 +1,11 @@
|
|
| 1 |
import asyncio
|
|
|
|
| 2 |
import logging
|
| 3 |
import random
|
| 4 |
-
import re
|
| 5 |
import time
|
|
|
|
| 6 |
|
| 7 |
-
from app.config import GEMINI_API_KEY
|
| 8 |
|
| 9 |
try:
|
| 10 |
import google.genai as genai
|
|
@@ -15,45 +16,41 @@ try:
|
|
| 15 |
except Exception:
|
| 16 |
genai = None
|
| 17 |
genai_errors = None
|
| 18 |
-
logging.warning("[summary_service] google.genai module not found;
|
| 19 |
|
| 20 |
try:
|
| 21 |
from google.api_core.exceptions import GoogleAPIError
|
| 22 |
except Exception:
|
| 23 |
GoogleAPIError = Exception
|
| 24 |
|
| 25 |
-
|
| 26 |
-
_gemini_client = None
|
| 27 |
|
| 28 |
if not genai:
|
| 29 |
-
logging.warning("[summary_service] google.genai not available, summary will be
|
| 30 |
elif not GEMINI_API_KEY:
|
| 31 |
-
logging.warning("[summary_service] GEMINI_API_KEY is not set, summary will be
|
| 32 |
else:
|
| 33 |
try:
|
| 34 |
-
|
| 35 |
-
logging.info(f"[summary_service] Initialized google.genai client with model={
|
| 36 |
except Exception as e:
|
| 37 |
logging.exception(f"[summary_service] Failed to init google.genai client: {e}")
|
| 38 |
-
|
| 39 |
-
|
| 40 |
|
| 41 |
async def generate_summary(text: str) -> str:
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
if not
|
| 46 |
return ""
|
| 47 |
|
| 48 |
prompt = f"""
|
| 49 |
Bạn là chuyên gia tóm tắt. Hãy tóm tắt văn bản sau thành một đoạn văn duy nhất.
|
| 50 |
-
|
| 51 |
Yêu cầu:
|
| 52 |
1. Viết khoảng 3-5 câu, tổng hợp đầy đủ chủ đề và các ý chính.
|
| 53 |
2. Viết liền mạch, KHÔNG xuống dòng, KHÔNG dùng gạch đầu dòng hay đánh số.
|
| 54 |
3. Chỉ dựa trên thông tin được cung cấp, tuyệt đối KHÔNG tự thêm thông tin bên ngoài.
|
| 55 |
4. Trả về VĂN BẢN THUẦN (plain text), không bọc trong ``` hoặc JSON.
|
| 56 |
-
|
| 57 |
Văn bản:
|
| 58 |
\"\"\"{text}\"\"\"
|
| 59 |
"""
|
|
@@ -67,8 +64,8 @@ Văn bản:
|
|
| 67 |
last_exc = None
|
| 68 |
for attempt in range(1, MAX_RETRIES + 1):
|
| 69 |
try:
|
| 70 |
-
resp =
|
| 71 |
-
model=
|
| 72 |
contents=prompt,
|
| 73 |
)
|
| 74 |
return (resp.text or "").strip()
|
|
@@ -121,4 +118,4 @@ Văn bản:
|
|
| 121 |
logging.info("[summary_service] Returning fallback summary after errors")
|
| 122 |
return fallback
|
| 123 |
except Exception:
|
| 124 |
-
return ""
|
|
|
|
| 1 |
import asyncio
|
| 2 |
+
import os
|
| 3 |
import logging
|
| 4 |
import random
|
|
|
|
| 5 |
import time
|
| 6 |
+
import re
|
| 7 |
|
| 8 |
+
from app.config import GEMINI_API_KEY, GEMINI_MODEL
|
| 9 |
|
| 10 |
try:
|
| 11 |
import google.genai as genai
|
|
|
|
| 16 |
except Exception:
|
| 17 |
genai = None
|
| 18 |
genai_errors = None
|
| 19 |
+
logging.warning("[summary_service] google.genai module not found; summary generation disabled")
|
| 20 |
|
| 21 |
try:
|
| 22 |
from google.api_core.exceptions import GoogleAPIError
|
| 23 |
except Exception:
|
| 24 |
GoogleAPIError = Exception
|
| 25 |
|
| 26 |
+
gemini_client = None
|
|
|
|
| 27 |
|
| 28 |
if not genai:
|
| 29 |
+
logging.warning("[summary_service] google.genai not available, summary generation will be disabled")
|
| 30 |
elif not GEMINI_API_KEY:
|
| 31 |
+
logging.warning("[summary_service] GEMINI_API_KEY is not set, summary generation will be disabled")
|
| 32 |
else:
|
| 33 |
try:
|
| 34 |
+
gemini_client = genai.Client(api_key=GEMINI_API_KEY)
|
| 35 |
+
logging.info(f"[summary_service] Initialized google.genai client with model={GEMINI_MODEL}")
|
| 36 |
except Exception as e:
|
| 37 |
logging.exception(f"[summary_service] Failed to init google.genai client: {e}")
|
| 38 |
+
gemini_client = None
|
|
|
|
| 39 |
|
| 40 |
async def generate_summary(text: str) -> str:
|
| 41 |
+
if not gemini_client:
|
| 42 |
+
return ""
|
| 43 |
+
|
| 44 |
+
if not text:
|
| 45 |
return ""
|
| 46 |
|
| 47 |
prompt = f"""
|
| 48 |
Bạn là chuyên gia tóm tắt. Hãy tóm tắt văn bản sau thành một đoạn văn duy nhất.
|
|
|
|
| 49 |
Yêu cầu:
|
| 50 |
1. Viết khoảng 3-5 câu, tổng hợp đầy đủ chủ đề và các ý chính.
|
| 51 |
2. Viết liền mạch, KHÔNG xuống dòng, KHÔNG dùng gạch đầu dòng hay đánh số.
|
| 52 |
3. Chỉ dựa trên thông tin được cung cấp, tuyệt đối KHÔNG tự thêm thông tin bên ngoài.
|
| 53 |
4. Trả về VĂN BẢN THUẦN (plain text), không bọc trong ``` hoặc JSON.
|
|
|
|
| 54 |
Văn bản:
|
| 55 |
\"\"\"{text}\"\"\"
|
| 56 |
"""
|
|
|
|
| 64 |
last_exc = None
|
| 65 |
for attempt in range(1, MAX_RETRIES + 1):
|
| 66 |
try:
|
| 67 |
+
resp = gemini_client.models.generate_content(
|
| 68 |
+
model=GEMINI_MODEL,
|
| 69 |
contents=prompt,
|
| 70 |
)
|
| 71 |
return (resp.text or "").strip()
|
|
|
|
| 118 |
logging.info("[summary_service] Returning fallback summary after errors")
|
| 119 |
return fallback
|
| 120 |
except Exception:
|
| 121 |
+
return ""
|
app/services/enrichment/title_keywords.py
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import logging
|
| 3 |
+
import json
|
| 4 |
+
import asyncio
|
| 5 |
+
import time
|
| 6 |
+
import random
|
| 7 |
+
import re
|
| 8 |
+
from app.config import GEMINI_API_KEY, GEMINI_MODEL
|
| 9 |
+
|
| 10 |
+
try:
|
| 11 |
+
import google.genai as genai
|
| 12 |
+
try:
|
| 13 |
+
from google.genai import errors as genai_errors
|
| 14 |
+
except Exception:
|
| 15 |
+
genai_errors = None
|
| 16 |
+
except Exception:
|
| 17 |
+
genai = None
|
| 18 |
+
genai_errors = None
|
| 19 |
+
logging.warning("[keywords_service] google.genai module not found; keyword extraction disabled")
|
| 20 |
+
|
| 21 |
+
try:
|
| 22 |
+
from google.api_core.exceptions import GoogleAPIError
|
| 23 |
+
except Exception:
|
| 24 |
+
GoogleAPIError = Exception
|
| 25 |
+
|
| 26 |
+
gemini_client = None
|
| 27 |
+
|
| 28 |
+
if not genai:
|
| 29 |
+
logging.warning("[keywords_service] google.genai not available, keyword extraction will be disabled")
|
| 30 |
+
elif not GEMINI_API_KEY:
|
| 31 |
+
logging.warning("[keywords_service] GEMINI_API_KEY is not set, keyword extraction will be disabled")
|
| 32 |
+
else:
|
| 33 |
+
try:
|
| 34 |
+
gemini_client = genai.Client(api_key=GEMINI_API_KEY)
|
| 35 |
+
logging.info(f"[keywords_service] Initialized google.genai client with model={GEMINI_MODEL}")
|
| 36 |
+
except Exception as e:
|
| 37 |
+
logging.exception(f"[keywords_service] Failed to init google.genai client: {e}")
|
| 38 |
+
gemini_client = None
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
async def extract_title_and_keywords(text: str) -> tuple[str | None, list[str]]:
|
| 42 |
+
if not text or not text.strip():
|
| 43 |
+
return None, []
|
| 44 |
+
|
| 45 |
+
if not gemini_client and not genai:
|
| 46 |
+
# AI not available → safe fallback
|
| 47 |
+
return None, []
|
| 48 |
+
|
| 49 |
+
prompt = f"""
|
| 50 |
+
Bạn là một hệ thống Xử lý Hậu kỳ NLP (NLP Post-Processing) Tiếng Việt.
|
| 51 |
+
|
| 52 |
+
Nhiệm vụ:
|
| 53 |
+
1. Sinh **tiêu đề (title)** ngắn gọn phản ánh đúng chủ đề chính của văn bản:
|
| 54 |
+
- Độ dài tối đa **10 từ**
|
| 55 |
+
- Mang tính mô tả, trung tính, phù hợp làm tiêu đề ghi chú (note)
|
| 56 |
+
- KHÔNG giật tít, KHÔNG suy diễn quá mức
|
| 57 |
+
|
| 58 |
+
2. Rút trích các **từ khóa quan trọng** phản ánh đúng **chủ đề và nội dung chính** của văn bản.
|
| 59 |
+
- Mỗi từ khóa dài từ **1–4 từ**.
|
| 60 |
+
- Ưu tiên danh từ, cụm danh từ, thuật ngữ, khái niệm chính.
|
| 61 |
+
- Loại bỏ từ chung chung, từ đệm, từ cảm thán, từ lặp nghĩa.
|
| 62 |
+
- KHÔNG diễn giải, KHÔNG tóm tắt, KHÔNG chuẩn hóa lại văn bản.
|
| 63 |
+
- KHÔNG tạo từ khóa không xuất hiện hoặc không suy luận hợp lý từ văn bản.
|
| 64 |
+
|
| 65 |
+
Quy tắc:
|
| 66 |
+
- Số lượng từ khóa: 3–10 (tùy độ dài và nội dung văn bản).
|
| 67 |
+
- Giữ nguyên chữ thường/hoa theo cách viết phổ biến.
|
| 68 |
+
- KHÔNG trùng lặp từ khóa.
|
| 69 |
+
- KHÔNG sắp xếp theo bảng chữ cái; ưu tiên theo mức độ quan trọng.
|
| 70 |
+
|
| 71 |
+
Văn bản đầu vào:
|
| 72 |
+
\"\"\"{text}\"\"\"
|
| 73 |
+
|
| 74 |
+
YÊU CẦU ĐẦU RA:
|
| 75 |
+
- Chỉ trả về **JSON hợp lệ**
|
| 76 |
+
- KHÔNG giải thích
|
| 77 |
+
- KHÔNG markdown
|
| 78 |
+
- KHÔNG thêm trường khác ngoài schema dưới đây
|
| 79 |
+
|
| 80 |
+
Cấu trúc JSON bắt buộc:
|
| 81 |
+
{{
|
| 82 |
+
"title": "Tiêu đề ngắn gọn",
|
| 83 |
+
"keywords": ["Từ khóa 1", "Từ khóa 2", "..."]
|
| 84 |
+
}}
|
| 85 |
+
"""
|
| 86 |
+
|
| 87 |
+
loop = asyncio.get_event_loop()
|
| 88 |
+
|
| 89 |
+
MAX_RETRIES = 3
|
| 90 |
+
BASE_DELAY = 1.0
|
| 91 |
+
|
| 92 |
+
def call():
|
| 93 |
+
last_exc = None
|
| 94 |
+
for attempt in range(1, MAX_RETRIES + 1):
|
| 95 |
+
try:
|
| 96 |
+
if gemini_client:
|
| 97 |
+
resp = gemini_client.models.generate_content(
|
| 98 |
+
model=GEMINI_MODEL,
|
| 99 |
+
contents=prompt,
|
| 100 |
+
)
|
| 101 |
+
return getattr(resp, "text", "") or ""
|
| 102 |
+
else:
|
| 103 |
+
model = genai.GenerativeModel(GEMINI_MODEL) if genai else None
|
| 104 |
+
if model:
|
| 105 |
+
resp = model.generate_content(prompt)
|
| 106 |
+
return getattr(resp, "text", "") or ""
|
| 107 |
+
return ""
|
| 108 |
+
except Exception as e:
|
| 109 |
+
last_exc = e
|
| 110 |
+
is_server_error = False
|
| 111 |
+
try:
|
| 112 |
+
if genai_errors and isinstance(e, genai_errors.ServerError):
|
| 113 |
+
is_server_error = True
|
| 114 |
+
except Exception:
|
| 115 |
+
pass
|
| 116 |
+
|
| 117 |
+
msg = str(e)
|
| 118 |
+
if "503" in msg or "UNAVAILABLE" in msg or is_server_error:
|
| 119 |
+
if attempt < MAX_RETRIES:
|
| 120 |
+
delay = BASE_DELAY * (2 ** (attempt - 1))
|
| 121 |
+
delay = delay + random.uniform(0, 0.5 * delay)
|
| 122 |
+
logging.warning(f"[keywords_service] model overloaded (attempt {attempt}/{MAX_RETRIES}), retrying after {delay:.2f}s")
|
| 123 |
+
time.sleep(delay)
|
| 124 |
+
continue
|
| 125 |
+
logging.exception(f"[keywords_service] extract_keywords call failed on attempt {attempt}: {e}")
|
| 126 |
+
break
|
| 127 |
+
|
| 128 |
+
if last_exc:
|
| 129 |
+
raise last_exc
|
| 130 |
+
return ""
|
| 131 |
+
|
| 132 |
+
try:
|
| 133 |
+
raw = await loop.run_in_executor(None, call)
|
| 134 |
+
title, keywords = _parse_response(raw)
|
| 135 |
+
return title, keywords
|
| 136 |
+
except GoogleAPIError as e:
|
| 137 |
+
logging.error("[title_keywords_service] Gemini API error: %s", e)
|
| 138 |
+
except Exception as e:
|
| 139 |
+
logging.exception("[title_keywords_service] extract failed: %s", e)
|
| 140 |
+
|
| 141 |
+
return None, []
|
| 142 |
+
|
| 143 |
+
def _parse_response(raw: str) -> tuple[str | None, list[str]]:
|
| 144 |
+
if not raw:
|
| 145 |
+
return None, []
|
| 146 |
+
|
| 147 |
+
raw = raw.strip()
|
| 148 |
+
|
| 149 |
+
# Try extracting JSON block
|
| 150 |
+
start = raw.find("{")
|
| 151 |
+
end = raw.rfind("}")
|
| 152 |
+
|
| 153 |
+
if start != -1 and end != -1 and end > start:
|
| 154 |
+
raw_json = raw[start : end + 1]
|
| 155 |
+
else:
|
| 156 |
+
raw_json = raw
|
| 157 |
+
|
| 158 |
+
try:
|
| 159 |
+
parsed = json.loads(raw_json)
|
| 160 |
+
except Exception as e:
|
| 161 |
+
logging.warning(
|
| 162 |
+
"[title_keywords_service] Failed to parse JSON: %s | raw=%r",
|
| 163 |
+
e,
|
| 164 |
+
raw[:300],
|
| 165 |
+
)
|
| 166 |
+
return None, []
|
| 167 |
+
|
| 168 |
+
title = parsed.get("title")
|
| 169 |
+
keywords = parsed.get("keywords")
|
| 170 |
+
|
| 171 |
+
# Validate title
|
| 172 |
+
if not isinstance(title, str) or not title.strip():
|
| 173 |
+
title = None
|
| 174 |
+
else:
|
| 175 |
+
title = title.strip()
|
| 176 |
+
|
| 177 |
+
# Validate keywords
|
| 178 |
+
if not isinstance(keywords, list):
|
| 179 |
+
keywords = []
|
| 180 |
+
else:
|
| 181 |
+
keywords = [
|
| 182 |
+
k.strip()
|
| 183 |
+
for k in keywords
|
| 184 |
+
if isinstance(k, str) and k.strip()
|
| 185 |
+
]
|
| 186 |
+
|
| 187 |
+
return title, keywords
|
app/services/folder_store.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.infra.firebase import db
|
| 2 |
+
|
| 3 |
+
COL = "folders"
|
| 4 |
+
|
| 5 |
+
def create_folder(folder: dict):
|
| 6 |
+
db.collection(COL).document(folder["folder_id"]).set(folder)
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def get_folder(folder_id: str):
|
| 10 |
+
doc = db.collection(COL).document(folder_id).get()
|
| 11 |
+
return doc.to_dict() if doc.exists else None
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def list_folders():
|
| 15 |
+
return [d.to_dict() for d in db.collection(COL).stream()]
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def update_folder(folder_id: str, data: dict):
|
| 19 |
+
db.collection(COL).document(folder_id).update(data)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def delete_folder(folder_id: str):
|
| 23 |
+
db.collection(COL).document(folder_id).delete()
|
app/services/note_store.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.infra.firebase import db
|
| 2 |
+
|
| 3 |
+
COL = "notes"
|
| 4 |
+
|
| 5 |
+
def create_note(note: dict):
|
| 6 |
+
ref = db.collection(COL).document(note["note_id"])
|
| 7 |
+
if ref.get().exists:
|
| 8 |
+
raise ValueError(f"Note already exists: {note['note_id']}")
|
| 9 |
+
ref.set(note)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def update_note(note_id: str, data: dict):
|
| 13 |
+
if not data:
|
| 14 |
+
return
|
| 15 |
+
db.collection(COL).document(note_id).update(data)
|
| 16 |
+
|
| 17 |
+
def get_note(note_id: str):
|
| 18 |
+
doc = db.collection(COL).document(note_id).get()
|
| 19 |
+
return doc.to_dict() if doc.exists else None
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def list_notes(folder_id: str | None = None):
|
| 23 |
+
q = db.collection(COL)
|
| 24 |
+
if folder_id:
|
| 25 |
+
q = q.where("folder_id", "==", folder_id)
|
| 26 |
+
return [d.to_dict() for d in q.stream()]
|
app/services/storage.py
DELETED
|
@@ -1,33 +0,0 @@
|
|
| 1 |
-
from datetime import datetime
|
| 2 |
-
from app.services.firebase import db
|
| 3 |
-
|
| 4 |
-
COLLECTION = "notes"
|
| 5 |
-
|
| 6 |
-
import logging
|
| 7 |
-
|
| 8 |
-
def create_note(note_id: str, payload: dict):
|
| 9 |
-
now = datetime.utcnow()
|
| 10 |
-
payload.update({
|
| 11 |
-
"status": "created",
|
| 12 |
-
"created_at": now,
|
| 13 |
-
"updated_at": now
|
| 14 |
-
})
|
| 15 |
-
logging.info(f"[NoteService] create_note: id={note_id}, keys={list(payload.keys())}")
|
| 16 |
-
db.collection(COLLECTION).document(note_id).set(payload)
|
| 17 |
-
logging.info(f"[NoteService] create_note: saved id={note_id}")
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
def update_note(note_id: str, data: dict = None, status: str = None):
|
| 21 |
-
updates = {"updated_at": datetime.utcnow()}
|
| 22 |
-
if data:
|
| 23 |
-
updates.update(data)
|
| 24 |
-
if status:
|
| 25 |
-
updates["status"] = status
|
| 26 |
-
|
| 27 |
-
db.collection(COLLECTION).document(note_id).update(updates)
|
| 28 |
-
|
| 29 |
-
def get_note(note_id: str):
|
| 30 |
-
logging.info(f"[NoteService] get_note: id={note_id}")
|
| 31 |
-
doc = db.collection(COLLECTION).document(note_id).get()
|
| 32 |
-
logging.info(f"[NoteService] get_note: exists={doc.exists}")
|
| 33 |
-
return doc.to_dict() if doc.exists else None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/utils/id.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import uuid
|
| 2 |
+
|
| 3 |
+
def new_id() -> str:
|
| 4 |
+
return uuid.uuid4().hex
|
app/utils/time.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import time
|
| 2 |
+
|
| 3 |
+
def now_ts() -> int:
|
| 4 |
+
return int(time.time() * 1000)
|
requirements.txt
CHANGED
|
@@ -2,3 +2,4 @@ fastapi
|
|
| 2 |
uvicorn
|
| 3 |
google-genai
|
| 4 |
firebase-admin
|
|
|
|
|
|
| 2 |
uvicorn
|
| 3 |
google-genai
|
| 4 |
firebase-admin
|
| 5 |
+
pydantic
|