Spaces:
Running
Running
Update Azure Blob Service Config
Browse files
services/embed_model/embed_model.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import os
|
| 2 |
import dotenv
|
| 3 |
-
|
|
|
|
| 4 |
from datetime import datetime
|
| 5 |
from langchain_openai import AzureOpenAIEmbeddings
|
| 6 |
|
|
|
|
| 1 |
import os
|
| 2 |
import dotenv
|
| 3 |
+
from config.env_constant import EnvFilepath
|
| 4 |
+
dotenv.load_dotenv(EnvFilepath.ENVPATH)
|
| 5 |
from datetime import datetime
|
| 6 |
from langchain_openai import AzureOpenAIEmbeddings
|
| 7 |
|
services/uploader/azure_blob_service.py
CHANGED
|
@@ -1,14 +1,15 @@
|
|
| 1 |
import os
|
| 2 |
import dotenv
|
| 3 |
-
|
|
|
|
| 4 |
|
|
|
|
| 5 |
from azure.identity import DefaultAzureCredential
|
| 6 |
from azure.storage.blob.aio import BlobServiceClient, ContainerClient
|
| 7 |
# from azure.storage.blob import BlobServiceClient, ContainerClient
|
| 8 |
from fastapi import UploadFile
|
| 9 |
|
| 10 |
|
| 11 |
-
from config.config import azure_blob_config
|
| 12 |
from utils.logger import get_logger
|
| 13 |
# from src.utils.decorator import trace_runtime
|
| 14 |
|
|
@@ -20,13 +21,32 @@ logger = get_logger("azure blob")
|
|
| 20 |
# CONNECTION_STRING = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
|
| 21 |
# CONTAINER_NAME = "pdf-uploads" # The name of your Azure Blob Storage container
|
| 22 |
|
| 23 |
-
async def get_blob_service_client(url=os.environ.get('azureai__container__endpoint')) -> BlobServiceClient:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
try:
|
| 25 |
-
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
return blob_service_client
|
| 28 |
-
|
| 29 |
-
|
|
|
|
|
|
|
| 30 |
|
| 31 |
|
| 32 |
async def get_container_client(url=os.environ.get('azureai__container__endpoint'), container_name=os.environ.get("azureai__container__name")) -> ContainerClient:
|
|
@@ -47,7 +67,7 @@ class AzureBlobStorageService:
|
|
| 47 |
# Initialize the BlobServiceClient for the entire application lifetime
|
| 48 |
self.blob_service_client = get_blob_service_client()
|
| 49 |
self.container_client = get_container_client()
|
| 50 |
-
self.prefix =
|
| 51 |
|
| 52 |
if not self.blob_service_client:
|
| 53 |
raise ValueError("azure service client is not set.")
|
|
|
|
| 1 |
import os
|
| 2 |
import dotenv
|
| 3 |
+
from config.env_constant import EnvFilepath
|
| 4 |
+
dotenv.load_dotenv(EnvFilepath.ENVPATH)
|
| 5 |
|
| 6 |
+
from config.constant import AzureBlobConstants
|
| 7 |
from azure.identity import DefaultAzureCredential
|
| 8 |
from azure.storage.blob.aio import BlobServiceClient, ContainerClient
|
| 9 |
# from azure.storage.blob import BlobServiceClient, ContainerClient
|
| 10 |
from fastapi import UploadFile
|
| 11 |
|
| 12 |
|
|
|
|
| 13 |
from utils.logger import get_logger
|
| 14 |
# from src.utils.decorator import trace_runtime
|
| 15 |
|
|
|
|
| 21 |
# CONNECTION_STRING = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
|
| 22 |
# CONTAINER_NAME = "pdf-uploads" # The name of your Azure Blob Storage container
|
| 23 |
|
| 24 |
+
# async def get_blob_service_client(url=os.environ.get('azureai__container__endpoint')) -> BlobServiceClient:
|
| 25 |
+
# try:
|
| 26 |
+
# default_credential = DefaultAzureCredential()
|
| 27 |
+
# blob_service_client = BlobServiceClient(url, credential=default_credential)
|
| 28 |
+
# return blob_service_client
|
| 29 |
+
# except Exception as E:
|
| 30 |
+
# logger.error(f'❌ Getting blob service client error, {E}')
|
| 31 |
+
|
| 32 |
+
async def get_blob_service_client() -> BlobServiceClient:
|
| 33 |
try:
|
| 34 |
+
account_url = os.environ.get("azureai__container__endpoint")
|
| 35 |
+
sas_token = os.environ.get("azureai__search__sas")
|
| 36 |
+
|
| 37 |
+
if not account_url or not sas_token:
|
| 38 |
+
raise ValueError("Missing AZURE_STORAGE_ACCOUNT_URL or AZURE_STORAGE_SAS_TOKEN")
|
| 39 |
+
|
| 40 |
+
blob_service_client = BlobServiceClient(
|
| 41 |
+
account_url=account_url,
|
| 42 |
+
credential=sas_token
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
return blob_service_client
|
| 46 |
+
|
| 47 |
+
except Exception as e:
|
| 48 |
+
logger.error(f"❌ Getting blob service client error: {e}")
|
| 49 |
+
raise
|
| 50 |
|
| 51 |
|
| 52 |
async def get_container_client(url=os.environ.get('azureai__container__endpoint'), container_name=os.environ.get("azureai__container__name")) -> ContainerClient:
|
|
|
|
| 67 |
# Initialize the BlobServiceClient for the entire application lifetime
|
| 68 |
self.blob_service_client = get_blob_service_client()
|
| 69 |
self.container_client = get_container_client()
|
| 70 |
+
self.prefix = AzureBlobConstants.BLOB_PREFIX
|
| 71 |
|
| 72 |
if not self.blob_service_client:
|
| 73 |
raise ValueError("azure service client is not set.")
|
utils/utils.py
CHANGED
|
@@ -2,17 +2,17 @@ import os
|
|
| 2 |
import sys
|
| 3 |
import io
|
| 4 |
import time
|
| 5 |
-
import dotenv
|
| 6 |
import PyPDF2
|
| 7 |
import asyncio
|
| 8 |
-
import pandas as pd
|
| 9 |
# import fitz
|
| 10 |
import pytesseract
|
| 11 |
-
dotenv
|
|
|
|
|
|
|
| 12 |
from PyPDF2 import PdfReader
|
| 13 |
from functools import wraps
|
| 14 |
from typing import ByteString
|
| 15 |
-
from pdf2image import
|
| 16 |
|
| 17 |
|
| 18 |
def measure_runtime(func):
|
|
|
|
| 2 |
import sys
|
| 3 |
import io
|
| 4 |
import time
|
|
|
|
| 5 |
import PyPDF2
|
| 6 |
import asyncio
|
|
|
|
| 7 |
# import fitz
|
| 8 |
import pytesseract
|
| 9 |
+
import dotenv
|
| 10 |
+
from config.env_constant import EnvFilepath
|
| 11 |
+
dotenv.load_dotenv(EnvFilepath.ENVPATH)
|
| 12 |
from PyPDF2 import PdfReader
|
| 13 |
from functools import wraps
|
| 14 |
from typing import ByteString
|
| 15 |
+
from pdf2image import convert_from_bytes
|
| 16 |
|
| 17 |
|
| 18 |
def measure_runtime(func):
|