SB-PoC / .env
Chirapath's picture
First draft coding project
963ae98 verified
# =================================================================
# OCR SERVICE CONFIGURATION
# =================================================================
# Get these from your Azure Portal -> Document Intelligence resource
AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT=https://ocrservice256807.cognitiveservices.azure.com/
AZURE_DOCUMENT_INTELLIGENCE_KEY=3HVIJlvMH1AF5wuNSv0w1qd43AejgulvtdFInpFGJambLtr0DvISJQQJ99BGACqBBLyXJ3w3AAALACOG4NKs
# Server Configuration (Optional)
OCR_HOST=0.0.0.0
OCR_PORT=8400
OCR_DEBUG=True
OCR_LOG_LEVEL=INFO
# # CORS Configuration (Optional - for production)
# ALLOWED_ORIGINS=["http://localhost:3000", "https://yourdomain.com"]
# Rate Limiting (Optional - for production)
RATE_LIMIT_REQUESTS=100
RATE_LIMIT_WINDOW=3600
# Web Scraping Configuration (Optional)
MAX_IMAGES_PER_PAGE=10
REQUEST_TIMEOUT=30
USER_AGENT="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
# ================================================================
# RAG SERVICE CONFIGURATION
# ================================================================
AZURE_OPENAI_ENDPOINT=https://ai-models-service256807.cognitiveservices.azure.com/
AZURE_OPENAI_KEY=3rz1lDxopNZktRyKpRbclNb8Evp5l3sgGy7ITNRgBdhKr1pGkY3OJQQJ99BGACYeBjFXJ3w3AAAAACOGcIO1
AZURE_OPENAI_DEPLOYMENT=text-embedding-3-small
AZURE_OPENAI_API_VERSION=2024-12-01-preview
PG_HOST=sbaipocpostgresql.postgres.database.azure.com
PG_PORT=5432
PG_DATABASE=vectorsearch
PG_USER=user
PG_PASSWORD="P@ssw0rd"
PG_SSL_MODE=require
OCR_SERVICE_URL=http://localhost:8400
RAG_HOST=0.0.0.0
RAG_PORT=8401
RAG_DEBUG=True
RAG_LOG_LEVEL=INFO
CHUNK_SIZE=1536
CHUNK_OVERLAP=100
MIN_CHUNK_SIZE=200
ALLOWED_ORIGINS=*
DEFAULT_SEARCH_LIMIT=10
DEFAULT_SIMILARITY_THRESHOLD=0.5
MAX_SEARCH_RESULTS=100
# Database connection pooling
DB_POOL_MIN_SIZE=2
DB_POOL_MAX_SIZE=20
DB_COMMAND_TIMEOUT=60
# Request timeouts (seconds)
REQUEST_TIMEOUT=30
EMBEDDING_TIMEOUT=60
SERVICE_VERSION=1.0.0
RAG_SERVICE_URL=http://localhost:8401
TEST_TIMEOUT=30
# =================================================================
# NER SERVICE CONFIGURATION
# =================================================================
# Server Configuration
NER_HOST=0.0.0.0
NER_PORT=8500
DEBUG=True
NER_LOG_LEVEL=INFO
# OCR Service Configuration (from your existing OCR service)
OCR_SERVICE_URL=http://localhost:8400
# DeepSeek API Configuration
# Get these from your Azure AI service or DeepSeek API
DEEPSEEK_ENDPOINT=https://ai-models-service256807.services.ai.azure.com/models
DEEPSEEK_API_KEY=3rz1lDxopNZktRyKpRbclNb8Evp5l3sgGy7ITNRgBdhKr1pGkY3OJQQJ99BGACYeBjFXJ3w3AAAAACOGcIO1
DEEPSEEK_MODEL=DeepSeek-R1-0528
# Azure OpenAI Configuration (for embeddings)
# Get these from your Azure OpenAI resource
AZURE_OPENAI_ENDPOINT=https://openaiservice2568.openai.azure.com/openai/deployments/text-embedding-3-large/embeddings?api-version=2023-05-15
AZURE_OPENAI_API_KEY=8CZSXFphWviu1KBpweiUntRKrJgYR2hApSUT76f5MlBsSjuvKulnJQQJ99BCACYeBjFXJ3w3AAABACOGc2vU
EMBEDDING_MODEL=text-embedding-3-large
# Azure Storage Configuration (SAS Authentication)
# Option 1: Use Storage Account URL + SAS Token (Recommended)
AZURE_STORAGE_ACCOUNT_URL=https://historylog256807.blob.core.windows.net/
AZURE_BLOB_SAS_TOKEN="sp=racwdli&st=2025-07-07T09:05:50Z&se=2026-07-06T17:05:50Z&sv=2024-11-04&sr=c&sig=TKDn8t6QLFnO70bstW%2FH%2FjrYGczNnFyOap1qG9RTPEU%3D"
# Option 2: Use complete SAS URL (Alternative - leave blank if using Option 1)
#AZURE_BLOB_SAS_URL=https://historylog256807.blob.core.windows.net/historylog?sp=racwdli&st=2025-07-07T09:05:50Z&se=2026-07-06T17:05:50Z&sv=2024-11-04&sr=c&sig=TKDn8t6QLFnO70bstW%2FH%2FjrYGczNnFyOap1qG9RTPEU%3D
BLOB_CONTAINER=historylog
# PostgreSQL Configuration (Azure Database for PostgreSQL flexible server)
POSTGRES_HOST=sbaipocpostgresql.postgres.database.azure.com
POSTGRES_PORT=5432
POSTGRES_USER=user
POSTGRES_PASSWORD="P@ssw0rd"
POSTGRES_DATABASE=postgres
AZURE_OPENAI_DEPLOYMENT_NAME=text-embedding-3-large
# Processing Configuration
MAX_FILE_SIZE=50 # Maximum file size in MB
REQUEST_TIMEOUT=300 # Request timeout in seconds
# CORS Configuration (optional)
ALLOWED_ORIGINS=*