Spaces:
Running
Running
Commit
·
0fee802
1
Parent(s):
7874196
Upd broken imports
Browse files- app.py +1 -1
- utils/api/rotator.py +1 -2
- utils/api/router.py +1 -2
- utils/ingestion/caption.py +1 -2
- utils/ingestion/chunker.py +1 -1
- utils/ingestion/parser.py +1 -1
- utils/rag/embeddings.py +1 -2
- utils/rag/rag.py +1 -1
app.py
CHANGED
|
@@ -19,7 +19,7 @@ from pymongo.errors import PyMongoError, ConnectionFailure, ServerSelectionTimeo
|
|
| 19 |
|
| 20 |
from utils.api.rotator import APIKeyRotator
|
| 21 |
from utils.ingestion.parser import parse_pdf_bytes, parse_docx_bytes
|
| 22 |
-
from utils.caption import BlipCaptioner
|
| 23 |
from utils.ingestion.chunker import build_cards_from_pages
|
| 24 |
from utils.rag.embeddings import EmbeddingClient
|
| 25 |
from utils.rag.rag import RAGStore, ensure_indexes
|
|
|
|
| 19 |
|
| 20 |
from utils.api.rotator import APIKeyRotator
|
| 21 |
from utils.ingestion.parser import parse_pdf_bytes, parse_docx_bytes
|
| 22 |
+
from utils.ingestion.caption import BlipCaptioner
|
| 23 |
from utils.ingestion.chunker import build_cards_from_pages
|
| 24 |
from utils.rag.embeddings import EmbeddingClient
|
| 25 |
from utils.rag.rag import RAGStore, ensure_indexes
|
utils/api/rotator.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
| 1 |
# ────────────────────────────── utils/rotator.py ──────────────────────────────
|
| 2 |
import os
|
| 3 |
import itertools
|
| 4 |
-
import
|
| 5 |
-
from .logger import get_logger
|
| 6 |
from typing import Optional
|
| 7 |
|
| 8 |
import httpx
|
|
|
|
| 1 |
# ────────────────────────────── utils/rotator.py ──────────────────────────────
|
| 2 |
import os
|
| 3 |
import itertools
|
| 4 |
+
from ..logger import get_logger
|
|
|
|
| 5 |
from typing import Optional
|
| 6 |
|
| 7 |
import httpx
|
utils/api/router.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
# ────────────────────────────── utils/router.py ──────────────────────────────
|
| 2 |
import os
|
| 3 |
-
import
|
| 4 |
-
from .logger import get_logger
|
| 5 |
from typing import Dict, Any
|
| 6 |
from .rotator import robust_post_json, APIKeyRotator
|
| 7 |
|
|
|
|
| 1 |
# ────────────────────────────── utils/router.py ──────────────────────────────
|
| 2 |
import os
|
| 3 |
+
from ..logger import get_logger
|
|
|
|
| 4 |
from typing import Dict, Any
|
| 5 |
from .rotator import robust_post_json, APIKeyRotator
|
| 6 |
|
utils/ingestion/caption.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
| 1 |
# ────────────────────────────── utils/caption.py ──────────────────────────────
|
| 2 |
from typing import Optional
|
| 3 |
from PIL import Image
|
| 4 |
-
import
|
| 5 |
-
from utils.logger import get_logger
|
| 6 |
|
| 7 |
# Use transformers BLIP base (CPU friendly)
|
| 8 |
try:
|
|
|
|
| 1 |
# ────────────────────────────── utils/caption.py ──────────────────────────────
|
| 2 |
from typing import Optional
|
| 3 |
from PIL import Image
|
| 4 |
+
from ..logger import get_logger
|
|
|
|
| 5 |
|
| 6 |
# Use transformers BLIP base (CPU friendly)
|
| 7 |
try:
|
utils/ingestion/chunker.py
CHANGED
|
@@ -3,7 +3,7 @@ import re
|
|
| 3 |
from typing import List, Dict, Any
|
| 4 |
from utils.service.summarizer import cheap_summarize, clean_chunk_text
|
| 5 |
from utils.service.common import split_sentences, slugify
|
| 6 |
-
from
|
| 7 |
|
| 8 |
# Enhanced semantic chunker with overlap and better structure:
|
| 9 |
# - Split by headings / numbered sections if present
|
|
|
|
| 3 |
from typing import List, Dict, Any
|
| 4 |
from utils.service.summarizer import cheap_summarize, clean_chunk_text
|
| 5 |
from utils.service.common import split_sentences, slugify
|
| 6 |
+
from ..logger import get_logger
|
| 7 |
|
| 8 |
# Enhanced semantic chunker with overlap and better structure:
|
| 9 |
# - Split by headings / numbered sections if present
|
utils/ingestion/parser.py
CHANGED
|
@@ -4,7 +4,7 @@ import fitz # PyMuPDF
|
|
| 4 |
from docx import Document
|
| 5 |
from PIL import Image
|
| 6 |
import numpy as np
|
| 7 |
-
from
|
| 8 |
|
| 9 |
logger = get_logger("PARSER", __name__)
|
| 10 |
|
|
|
|
| 4 |
from docx import Document
|
| 5 |
from PIL import Image
|
| 6 |
import numpy as np
|
| 7 |
+
from ..logger import get_logger
|
| 8 |
|
| 9 |
logger = get_logger("PARSER", __name__)
|
| 10 |
|
utils/rag/embeddings.py
CHANGED
|
@@ -2,8 +2,7 @@
|
|
| 2 |
import os
|
| 3 |
from typing import List
|
| 4 |
import numpy as np
|
| 5 |
-
import
|
| 6 |
-
from .logger import get_logger
|
| 7 |
|
| 8 |
try:
|
| 9 |
from sentence_transformers import SentenceTransformer
|
|
|
|
| 2 |
import os
|
| 3 |
from typing import List
|
| 4 |
import numpy as np
|
| 5 |
+
from ..logger import get_logger
|
|
|
|
| 6 |
|
| 7 |
try:
|
| 8 |
from sentence_transformers import SentenceTransformer
|
utils/rag/rag.py
CHANGED
|
@@ -7,7 +7,7 @@ from pymongo.collection import Collection
|
|
| 7 |
from pymongo.errors import PyMongoError
|
| 8 |
import numpy as np
|
| 9 |
import logging
|
| 10 |
-
from
|
| 11 |
|
| 12 |
VECTOR_DIM = 384 # all-MiniLM-L6-v2
|
| 13 |
INDEX_NAME = os.getenv("MONGO_VECTOR_INDEX", "vector_index")
|
|
|
|
| 7 |
from pymongo.errors import PyMongoError
|
| 8 |
import numpy as np
|
| 9 |
import logging
|
| 10 |
+
from ..logger import get_logger
|
| 11 |
|
| 12 |
VECTOR_DIM = 384 # all-MiniLM-L6-v2
|
| 13 |
INDEX_NAME = os.getenv("MONGO_VECTOR_INDEX", "vector_index")
|