diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000000000000000000000000000000000000..a7731138a868329b151cd9091b4a9897a374233d --- /dev/null +++ b/.dockerignore @@ -0,0 +1,39 @@ +env/ +venv/ +*.pyc +__pycache__/ +*.pyo +*.pyd +.Python +db.sqlite3 +db.sqlite3-journal +*.log +.env +.env.local +*.pot +*.mo +.git/ +.github/ +.gitignore +*.md +README.md +DEPLOYMENT.md +CI_CD_SETUP.md +AWS_DEPLOYMENT.md +REQUIREMENTS.md +DESIGN.md +build.sh +.vscode/ +.idea/ +*.swp +*.swo +*~ +.DS_Store +Thumbs.db +render.yaml +Procfile +runtime.txt +requirements-windows.txt +media/ +staticfiles/ +node_modules/ diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000000000000000000000000000000000000..c3e707acc13f11e8bee20abdd0f446d2dfeea97b --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,165 @@ +name: CI - Tests and Checks + +on: + push: + branches: [ master, develop ] + pull_request: + branches: [ master, develop ] + +jobs: + test: + runs-on: ubuntu-latest + + services: + postgres: + image: postgres:15 + env: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + POSTGRES_DB: test_db + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + ports: + - 5432:5432 + + redis: + image: redis:7 + options: >- + --health-cmd "redis-cli ping" + --health-interval 10s + --health-timeout 5s + --health-retries 5 + ports: + - 6379:6379 + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: '3.11' + cache: 'pip' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install flake8 black isort + + - name: Run linting (flake8) + run: | + # Stop the build if there are Python syntax errors or undefined names + flake8 videocaller/agora --count --select=E9,F63,F7,F82 --show-source --statistics + # Exit-zero treats all errors as warnings + flake8 videocaller/agora --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + continue-on-error: true + + - name: Check code formatting (black) + run: | + black --check videocaller/agora + continue-on-error: true + + - name: Check import sorting (isort) + run: | + isort --check-only videocaller/agora + continue-on-error: true + + - name: Run migrations + env: + DATABASE_URL: postgresql://postgres:postgres@localhost:5432/test_db + REDIS_URL: redis://localhost:6379/0 + DJANGO_SECRET_KEY: test-secret-key-for-ci + DJANGO_DEBUG: 'true' + PUSHER_APP_ID: 'test-app-id' + PUSHER_KEY: 'test-key' + PUSHER_SECRET: 'test-secret' + PUSHER_CLUSTER: 'test-cluster' + run: | + cd videocaller + python manage.py migrate --no-input + + - name: Run Django tests + env: + DATABASE_URL: postgresql://postgres:postgres@localhost:5432/test_db + REDIS_URL: redis://localhost:6379/0 + DJANGO_SECRET_KEY: test-secret-key-for-ci + DJANGO_DEBUG: 'true' + PUSHER_APP_ID: 'test-app-id' + PUSHER_KEY: 'test-key' + PUSHER_SECRET: 'test-secret' + PUSHER_CLUSTER: 'test-cluster' + GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} + QDRANT_URL: ${{ secrets.QDRANT_URL }} + QDRANT_API_KEY: ${{ secrets.QDRANT_API_KEY }} + run: | + cd videocaller + python manage.py test + continue-on-error: true + + - name: Check for missing migrations + env: + DATABASE_URL: postgresql://postgres:postgres@localhost:5432/test_db + DJANGO_SECRET_KEY: test-secret-key-for-ci + PUSHER_APP_ID: 'test-app-id' + PUSHER_KEY: 'test-key' + PUSHER_SECRET: 'test-secret' + PUSHER_CLUSTER: 'test-cluster' + run: | + cd videocaller + python manage.py makemigrations --check --dry-run --no-input + + security: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install safety + run: | + pip install safety + + - name: Check for security vulnerabilities + run: | + safety check --json -r requirements.txt || true + continue-on-error: true + + build: + runs-on: ubuntu-latest + needs: [test, security] + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: '3.11' + cache: 'pip' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + + - name: Collect static files + env: + DJANGO_SECRET_KEY: test-secret-key-for-ci + DJANGO_DEBUG: 'false' + run: | + cd videocaller + python manage.py collectstatic --no-input + + - name: Build validation complete + run: echo "✅ All checks passed!" diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml new file mode 100644 index 0000000000000000000000000000000000000000..dab108b172ec3882b6feae238aab0a5ef1d78be0 --- /dev/null +++ b/.github/workflows/deploy.yml @@ -0,0 +1,41 @@ +name: CD - Deploy to Render + +on: + push: + branches: [ master ] + workflow_dispatch: + +jobs: + deploy: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Deploy to Render + uses: johnbeynon/render-deploy-action@v0.0.8 + with: + service-id: ${{ secrets.RENDER_SERVICE_ID }} + api-key: ${{ secrets.RENDER_API_KEY }} + + - name: Wait for deployment + run: | + echo "⏳ Waiting for Render deployment to complete..." + sleep 60 + + - name: Health check + run: | + echo "🔍 Running health check..." + response=$(curl -s -o /dev/null -w "%{http_code}" ${{ secrets.RENDER_APP_URL }}/api/health/google/ || echo "000") + if [ "$response" = "200" ] || [ "$response" = "503" ]; then + echo "✅ App is responding (HTTP $response)" + else + echo "⚠️ App returned HTTP $response - may need investigation" + exit 0 + fi + + - name: Deployment notification + run: | + echo "🚀 Deployment to Render completed!" + echo "🌐 App URL: ${{ secrets.RENDER_APP_URL }}" diff --git a/.github/workflows/manual-deploy.yml b/.github/workflows/manual-deploy.yml new file mode 100644 index 0000000000000000000000000000000000000000..f022d3d4a28d95a6f3fedf2bfee8b3408aec55a1 --- /dev/null +++ b/.github/workflows/manual-deploy.yml @@ -0,0 +1,45 @@ +name: Manual Deploy + +on: + workflow_dispatch: + inputs: + environment: + description: 'Environment to deploy to' + required: true + default: 'production' + type: choice + options: + - production + - staging + +jobs: + deploy: + runs-on: ubuntu-latest + environment: ${{ github.event.inputs.environment }} + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set deployment message + run: | + echo "🚀 Deploying to ${{ github.event.inputs.environment }}..." + + - name: Deploy to Render + uses: johnbeynon/render-deploy-action@v0.0.8 + with: + service-id: ${{ secrets.RENDER_SERVICE_ID }} + api-key: ${{ secrets.RENDER_API_KEY }} + + - name: Post-deployment health check + run: | + echo "⏳ Waiting for deployment..." + sleep 60 + echo "🔍 Running health check..." + response=$(curl -s -o /dev/null -w "%{http_code}" ${{ secrets.RENDER_APP_URL }}/api/health/google/ || echo "000") + echo "Health check returned: HTTP $response" + + - name: Deployment complete + run: | + echo "✅ Deployment to ${{ github.event.inputs.environment }} completed!" + echo "🌐 App URL: ${{ secrets.RENDER_APP_URL }}" diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml new file mode 100644 index 0000000000000000000000000000000000000000..b439d1d4b6d30ff0654c8bfd913a69c5459e8cb7 --- /dev/null +++ b/.github/workflows/security.yml @@ -0,0 +1,56 @@ +name: Weekly Security Scan + +on: + schedule: + # Run every Monday at 9 AM UTC + - cron: '0 9 * * 1' + workflow_dispatch: + +jobs: + security-scan: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install security tools + run: | + pip install safety bandit + + - name: Run Safety check + run: | + safety check --json -r requirements.txt > safety-report.json || true + cat safety-report.json + + - name: Run Bandit security linter + run: | + bandit -r videocaller/agora -f json -o bandit-report.json || true + cat bandit-report.json + + - name: Upload security reports + uses: actions/upload-artifact@v4 + with: + name: security-reports + path: | + safety-report.json + bandit-report.json + retention-days: 30 + + - name: Create issue if vulnerabilities found + if: failure() + uses: actions/github-script@v7 + with: + script: | + github.rest.issues.create({ + owner: context.repo.owner, + repo: context.repo.repo, + title: '🚨 Security Vulnerabilities Detected', + body: 'Security scan found vulnerabilities. Check the workflow artifacts for details.', + labels: ['security', 'automated'] + }) diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..854c726d05b7874528f041e8ba3da56ec01aec6c --- /dev/null +++ b/.gitignore @@ -0,0 +1,36 @@ +# Python +__pycache__/ +*.py[cod] +*.pyd +*.so +*.egg-info/ +.dist/ +.build/ +.venv/ +venv/ +ENV/ +env/ + +# Django +*.sqlite3 +/media/ +/staticfiles/ + +# Environment variables +.env +**/.env + +# Logs +*.log +*.webm +# Media uploads +*.pdf +*.mp3 + +# OS files +.DS_Store +Thumbs.db + +# IDE +.vscode/ +.idea/ diff --git a/AWS_DEPLOYMENT.md b/AWS_DEPLOYMENT.md new file mode 100644 index 0000000000000000000000000000000000000000..d56524a20fa1a88cfb420401ca729deffc1c7781 --- /dev/null +++ b/AWS_DEPLOYMENT.md @@ -0,0 +1,745 @@ +# AWS Deployment Guide - Complete Steps + +## Prerequisites + +### 1. AWS Account Setup +- [ ] Create AWS account at https://aws.amazon.com +- [ ] Enable billing alerts +- [ ] Create IAM user with admin access (don't use root) +- [ ] Install AWS CLI: `aws configure` + +### 2. Local Requirements +- [ ] Git installed +- [ ] Docker installed (for testing containers) +- [ ] AWS CLI installed +- [ ] EB CLI installed: `pip install awsebcli` + +--- + +## Step 1: Prepare Your Application + +### A. Create Production Requirements +```bash +# Already done - verify requirements.txt has: +# - psycopg2-binary (PostgreSQL) +# - gunicorn (WSGI server) +# - whitenoise (static files) +# - dj-database-url (DB config) +``` + +### B. Create Dockerfile +```dockerfile +FROM python:3.11-slim + +# Set environment variables +ENV PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 \ + PIP_NO_CACHE_DIR=1 + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + gcc \ + postgresql-client \ + && rm -rf /var/lib/apt/lists/* + +# Set work directory +WORKDIR /app + +# Install Python dependencies +COPY requirements.txt . +RUN pip install --upgrade pip && \ + pip install -r requirements.txt + +# Copy project +COPY videocaller/ ./videocaller/ + +# Collect static files +WORKDIR /app/videocaller +RUN python manage.py collectstatic --no-input + +# Run migrations and start server +CMD ["daphne", "-b", "0.0.0.0", "-p", "8000", "videocaller.asgi:application"] +``` + +### C. Create .dockerignore +``` +env/ +venv/ +*.pyc +__pycache__/ +db.sqlite3 +.env +.git/ +.github/ +*.md +build.sh +``` + +--- + +## Step 2: AWS Services Setup + +### A. Create RDS PostgreSQL Database + +1. **Go to RDS Console** + - Navigate to https://console.aws.amazon.com/rds + +2. **Create Database** + ``` + Choose: PostgreSQL 15 + Template: Free tier (or Production for real apps) + + Settings: + - DB instance identifier: aimeet-db + - Master username: postgres + - Master password: [generate strong password] + + Instance configuration: + - DB instance class: db.t3.micro (free tier) + + Storage: + - Allocated storage: 20 GB + - Storage autoscaling: Enable + + Connectivity: + - VPC: Default + - Public access: Yes (for now, restrict later) + - VPC security group: Create new + - Database port: 5432 + + Database authentication: + - Password authentication + + Additional configuration: + - Initial database name: aimeet + - Automated backups: Enable (7 days retention) + ``` + +3. **Note the endpoint** (e.g., `aimeet-db.xxxxx.us-east-1.rds.amazonaws.com`) + +### B. Create ElastiCache Redis + +1. **Go to ElastiCache Console** + - Navigate to https://console.aws.amazon.com/elasticache + +2. **Create Redis Cluster** + ``` + Cluster mode: Disabled + Engine: Redis 7.x + + Cluster info: + - Name: aimeet-redis + - Engine version: 7.0 + - Port: 6379 + - Node type: cache.t3.micro + - Number of replicas: 0 (for dev/test) + + Subnet group: Default + Security groups: Create new or use default + ``` + +3. **Note the endpoint** (e.g., `aimeet-redis.xxxxx.cache.amazonaws.com:6379`) + +### C. Create S3 Bucket (Already exists for recordings) + +1. **Verify your S3 bucket** from AGORA_STORAGE_BUCKET_NAME +2. **Set CORS policy** if needed for uploads + +### D. Create Application Load Balancer (ALB) + +1. **Go to EC2 > Load Balancers** + +2. **Create ALB** + ``` + Type: Application Load Balancer + Name: aimeet-alb + Scheme: Internet-facing + IP address type: IPv4 + + Network mapping: + - VPC: Default + - Availability Zones: Select 2+ zones + + Security groups: Create new + - Name: aimeet-alb-sg + - Inbound: HTTP (80), HTTPS (443) + + Listeners: + - HTTP:80 → Forward to target group (create below) + - HTTPS:443 → Forward to target group (needs SSL cert) + ``` + +3. **Create Target Group** + ``` + Type: Instances + Name: aimeet-targets + Protocol: HTTP + Port: 8000 + VPC: Default + + Health check: + - Protocol: HTTP + - Path: / + - Interval: 30 seconds + ``` + +### E. Request SSL Certificate (ACM) + +1. **Go to Certificate Manager** + - Region: Same as ALB + +2. **Request Certificate** + ``` + Domain: yourdomain.com + Validation: DNS (recommended) + + Add to DNS: + - Copy CNAME records to your domain registrar + - Wait for validation (~5-30 minutes) + ``` + +3. **Attach to ALB** + - Edit HTTPS:443 listener + - Select your certificate + +--- + +## Step 3: Deploy Application + +### Option A: Deploy with Elastic Beanstalk (Easiest) + +#### A.1 Initialize EB +```bash +cd c:\dev\Django-VIdeocall-App + +# Initialize +eb init + +# Prompts: +# Region: us-east-1 +# Application name: aimeet +# Platform: Docker +# SSH: Yes (generate keypair) +``` + +#### A.2 Create Environment +```bash +eb create production + +# Environment name: aimeet-production +# DNS CNAME: aimeet (will be aimeet.us-east-1.elasticbeanstalk.com) +# Load balancer: Application +``` + +#### A.3 Configure Environment Variables +```bash +eb setenv \ + DJANGO_SECRET_KEY="your-secret-key" \ + DJANGO_DEBUG="false" \ + DJANGO_ALLOWED_HOSTS="aimeet-production.us-east-1.elasticbeanstalk.com,yourdomain.com" \ + DATABASE_URL="postgresql://postgres:password@aimeet-db.xxxxx.us-east-1.rds.amazonaws.com:5432/aimeet" \ + REDIS_URL="redis://aimeet-redis.xxxxx.cache.amazonaws.com:6379/0" \ + AWS_ACCESS_KEY_ID="your-key" \ + AWS_SECRET_ACCESS_KEY="your-secret" \ + AWS_STORAGE_BUCKET_NAME="your-bucket" \ + AGORA_APP_ID="..." \ + AGORA_APP_Certificate="..." \ + ASSEMBLYAI_API_KEY="..." \ + GOOGLE_API_KEY="..." \ + QDRANT_URL="..." \ + QDRANT_API_KEY="..." \ + PUSHER_APP_ID="..." \ + PUSHER_KEY="..." \ + PUSHER_SECRET="..." \ + PUSHER_CLUSTER="..." +``` + +#### A.4 Deploy +```bash +eb deploy +``` + +#### A.5 Run Migrations +```bash +eb ssh +cd /var/app/current/videocaller +python manage.py migrate +python manage.py createsuperuser +exit +``` + +### Option B: Deploy with ECS Fargate (More scalable) + +#### B.1 Build and Push Docker Image +```bash +# Build +docker build -t aimeet:latest . + +# Tag for ECR +aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin YOUR_ACCOUNT_ID.dkr.ecr.us-east-1.amazonaws.com + +# Create repository +aws ecr create-repository --repository-name aimeet --region us-east-1 + +# Tag and push +docker tag aimeet:latest YOUR_ACCOUNT_ID.dkr.ecr.us-east-1.amazonaws.com/aimeet:latest +docker push YOUR_ACCOUNT_ID.dkr.ecr.us-east-1.amazonaws.com/aimeet:latest +``` + +#### B.2 Create ECS Cluster +```bash +aws ecs create-cluster --cluster-name aimeet-cluster --region us-east-1 +``` + +#### B.3 Create Task Definition +Create `ecs-task-definition.json`: +```json +{ + "family": "aimeet-task", + "networkMode": "awsvpc", + "requiresCompatibilities": ["FARGATE"], + "cpu": "512", + "memory": "1024", + "containerDefinitions": [ + { + "name": "aimeet-web", + "image": "YOUR_ACCOUNT_ID.dkr.ecr.us-east-1.amazonaws.com/aimeet:latest", + "portMappings": [ + { + "containerPort": 8000, + "protocol": "tcp" + } + ], + "environment": [ + {"name": "DJANGO_DEBUG", "value": "false"} + ], + "secrets": [ + {"name": "DJANGO_SECRET_KEY", "valueFrom": "arn:aws:secretsmanager:..."}, + {"name": "DATABASE_URL", "valueFrom": "arn:aws:secretsmanager:..."} + ], + "logConfiguration": { + "logDriver": "awslogs", + "options": { + "awslogs-group": "/ecs/aimeet", + "awslogs-region": "us-east-1", + "awslogs-stream-prefix": "ecs" + } + } + } + ] +} +``` + +Register: +```bash +aws ecs register-task-definition --cli-input-json file://ecs-task-definition.json +``` + +#### B.4 Create ECS Service +```bash +aws ecs create-service \ + --cluster aimeet-cluster \ + --service-name aimeet-service \ + --task-definition aimeet-task \ + --desired-count 2 \ + --launch-type FARGATE \ + --network-configuration "awsvpcConfiguration={subnets=[subnet-xxx,subnet-yyy],securityGroups=[sg-xxx],assignPublicIp=ENABLED}" \ + --load-balancers "targetGroupArn=arn:aws:elasticloadbalancing:...,containerName=aimeet-web,containerPort=8000" +``` + +### Option C: Deploy on EC2 (Manual, most control) + +#### C.1 Launch EC2 Instance +``` +AMI: Ubuntu 22.04 LTS +Instance type: t3.medium +Security group: Allow 22 (SSH), 8000 (Daphne), 80, 443 +Key pair: Create/select for SSH +Storage: 30 GB +``` + +#### C.2 SSH and Setup +```bash +ssh -i your-key.pem ubuntu@your-ec2-ip + +# Update system +sudo apt update && sudo apt upgrade -y + +# Install dependencies +sudo apt install -y python3.11 python3.11-venv python3-pip git nginx postgresql-client redis-tools + +# Clone repo +git clone https://github.com/prashantdubeypng/Aimeet.git +cd Aimeet + +# Create virtual environment +python3.11 -m venv env +source env/bin/activate + +# Install dependencies +pip install --upgrade pip +pip install -r requirements.txt + +# Set environment variables +sudo nano /etc/environment +# Add all env vars + +# Run migrations +cd videocaller +python manage.py migrate +python manage.py createsuperuser +python manage.py collectstatic --no-input + +# Create systemd service +sudo nano /etc/systemd/system/aimeet.service +``` + +`/etc/systemd/system/aimeet.service`: +```ini +[Unit] +Description=AIMeet Daphne Service +After=network.target + +[Service] +User=ubuntu +Group=ubuntu +WorkingDirectory=/home/ubuntu/Aimeet/videocaller +EnvironmentFile=/etc/environment +ExecStart=/home/ubuntu/Aimeet/env/bin/daphne -b 0.0.0.0 -p 8000 videocaller.asgi:application +Restart=always + +[Install] +WantedBy=multi-user.target +``` + +```bash +# Start service +sudo systemctl daemon-reload +sudo systemctl enable aimeet +sudo systemctl start aimeet + +# Configure Nginx +sudo nano /etc/nginx/sites-available/aimeet +``` + +`/etc/nginx/sites-available/aimeet`: +```nginx +upstream django { + server 127.0.0.1:8000; +} + +server { + listen 80; + server_name yourdomain.com; + + location / { + proxy_pass http://django; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + location /static/ { + alias /home/ubuntu/Aimeet/videocaller/staticfiles/; + } + + location /media/ { + alias /home/ubuntu/Aimeet/videocaller/media/; + } +} +``` + +```bash +# Enable site +sudo ln -s /etc/nginx/sites-available/aimeet /etc/nginx/sites-enabled/ +sudo nginx -t +sudo systemctl restart nginx + +# Install SSL (Let's Encrypt) +sudo apt install certbot python3-certbot-nginx +sudo certbot --nginx -d yourdomain.com +``` + +--- + +## Step 4: Configure Domain (Route 53) + +### A. Create Hosted Zone +1. Go to Route 53 +2. Create hosted zone: `yourdomain.com` +3. Note the nameservers + +### B. Update Domain Registrar +1. Go to your domain registrar (GoDaddy, Namecheap, etc.) +2. Update nameservers to Route 53's NS records + +### C. Create DNS Records +``` +Type: A +Name: @ (or blank) +Value: [ALB DNS or EC2 Elastic IP] +TTL: 300 + +Type: CNAME +Name: www +Value: yourdomain.com +TTL: 300 +``` + +--- + +## Step 5: Setup CloudWatch Monitoring + +### A. Enable CloudWatch Logs +```bash +# For EB +eb logs --cloudwatch-logs enable + +# For ECS - already enabled in task definition + +# For EC2 - install CloudWatch agent +wget https://s3.amazonaws.com/amazoncloudwatch-agent/ubuntu/amd64/latest/amazon-cloudwatch-agent.deb +sudo dpkg -i amazon-cloudwatch-agent.deb +``` + +### B. Create Alarms +``` +Metric: RDSCPUUtilization > 80% +Metric: ALBTargetResponseTime > 2s +Metric: EC2StatusCheckFailed +Action: Send SNS notification +``` + +--- + +## Step 6: Setup Auto-Scaling (Optional) + +### For EB +```bash +eb scale 2 # Start with 2 instances + +# Configure auto-scaling +eb config +# Set min instances: 2 +# Set max instances: 10 +# Scaling trigger: CPU > 70% +``` + +### For ECS +```bash +aws application-autoscaling register-scalable-target \ + --service-namespace ecs \ + --scalable-dimension ecs:service:DesiredCount \ + --resource-id service/aimeet-cluster/aimeet-service \ + --min-capacity 2 \ + --max-capacity 10 +``` + +--- + +## Step 7: Backup Strategy + +### A. RDS Automated Backups +- Already enabled (7-day retention) +- Take manual snapshots before major changes + +### B. S3 Versioning +```bash +aws s3api put-bucket-versioning \ + --bucket your-bucket \ + --versioning-configuration Status=Enabled +``` + +### C. Database Snapshots +```bash +# Manual snapshot +aws rds create-db-snapshot \ + --db-instance-identifier aimeet-db \ + --db-snapshot-identifier aimeet-db-backup-$(date +%Y%m%d) +``` + +--- + +## Step 8: Security Hardening + +### A. IAM Roles +- Create role for EC2/ECS with minimal permissions +- Don't use root credentials + +### B. Security Groups +``` +RDS Security Group: +- Inbound: PostgreSQL (5432) from EC2/ECS security group only + +Redis Security Group: +- Inbound: Redis (6379) from EC2/ECS security group only + +EC2/ECS Security Group: +- Inbound: HTTP/HTTPS from ALB only +- Inbound: SSH from your IP only + +ALB Security Group: +- Inbound: HTTP (80), HTTPS (443) from 0.0.0.0/0 +``` + +### C. Enable WAF (Optional) +``` +Go to AWS WAF +Create Web ACL +Attach to ALB +Enable managed rule sets: +- AWS-AWSManagedRulesCommonRuleSet +- AWS-AWSManagedRulesKnownBadInputsRuleSet +``` + +--- + +## Step 9: CI/CD with GitHub Actions + +Already configured in `.github/workflows/deploy.yml` but update for AWS: + +```yaml +name: Deploy to AWS + +on: + push: + branches: [master] + +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: us-east-1 + + - name: Deploy to Elastic Beanstalk + run: | + pip install awsebcli + eb deploy production --staged +``` + +--- + +## Step 10: Cost Optimization + +### Development/Testing +``` +RDS: db.t3.micro (Free tier eligible) +ElastiCache: cache.t3.micro +EC2: t3.small +Total: ~$50-70/month +``` + +### Production (Low traffic) +``` +RDS: db.t3.small with Multi-AZ +ElastiCache: cache.t3.small with replication +EC2/ECS: 2x t3.medium +ALB: ~$16/month +Total: ~$150-200/month +``` + +### Cost Saving Tips +- Use Reserved Instances (save 30-60%) +- Enable RDS auto-scaling storage +- Use S3 Intelligent-Tiering +- Set CloudWatch alarms for billing +- Use Spot Instances for non-critical workloads + +--- + +## Troubleshooting + +### Database Connection Issues +```bash +# Test from EC2 +telnet aimeet-db.xxxxx.rds.amazonaws.com 5432 + +# Check security groups +# Ensure EC2 security group is allowed in RDS inbound rules +``` + +### Static Files Not Loading +```bash +# Ensure STATIC_ROOT is set +# Run collectstatic +python manage.py collectstatic --no-input + +# Check Nginx config +# Verify WhiteNoise middleware order +``` + +### WebSocket Connection Fails +```bash +# Ensure ALB supports WebSocket +# Check target group health +# Verify Daphne is running (not Gunicorn) +``` + +--- + +## Final Checklist + +- [ ] RDS database created and accessible +- [ ] Redis cluster running +- [ ] S3 bucket configured +- [ ] Application deployed (EB/ECS/EC2) +- [ ] Migrations run +- [ ] Superuser created +- [ ] SSL certificate installed +- [ ] Domain pointed to ALB/EC2 +- [ ] Environment variables set +- [ ] CloudWatch monitoring enabled +- [ ] Backups configured +- [ ] Security groups hardened +- [ ] CI/CD pipeline tested +- [ ] Cost alerts set + +--- + +## Quick Deployment Commands + +```bash +# Elastic Beanstalk (recommended for quick start) +eb init +eb create production +eb setenv [all env vars] +eb deploy +eb ssh +cd /var/app/current/videocaller +python manage.py migrate +python manage.py createsuperuser + +# Access logs +eb logs + +# Check status +eb status + +# Terminate (be careful!) +eb terminate production +``` + +--- + +**Estimated Setup Time:** +- EB Deploy: 2-3 hours +- ECS Deploy: 4-6 hours +- EC2 Manual: 6-8 hours + +**Support Resources:** +- AWS Documentation: https://docs.aws.amazon.com +- Elastic Beanstalk Guide: https://docs.aws.amazon.com/elasticbeanstalk +- Django Deployment: https://docs.djangoproject.com/en/4.1/howto/deployment/ + +Good luck with your AWS deployment! 🚀 diff --git a/CI_CD_SETUP.md b/CI_CD_SETUP.md new file mode 100644 index 0000000000000000000000000000000000000000..ea8ba9b5c70f05da91481a12164fa0eb57280ac0 --- /dev/null +++ b/CI_CD_SETUP.md @@ -0,0 +1,296 @@ +# CI/CD Setup Guide + +## Overview +This project uses **GitHub Actions** for Continuous Integration and Continuous Deployment. + +--- + +## Workflows + +### 1. **CI - Tests and Checks** (`ci.yml`) +**Triggers:** Push or PR to `master` or `develop` + +**What it does:** +- ✅ Runs Python linting (flake8) +- ✅ Checks code formatting (black, isort) +- ✅ Runs Django tests +- ✅ Checks for missing migrations +- ✅ Security vulnerability scan +- ✅ Builds and validates static files + +**Services:** +- PostgreSQL 15 +- Redis 7 + +--- + +### 2. **CD - Deploy to Render** (`deploy.yml`) +**Triggers:** Push to `master` or manual trigger + +**What it does:** +- 🚀 Automatically deploys to Render +- 🔍 Runs post-deployment health check +- 📢 Notifies deployment status + +--- + +### 3. **Manual Deploy** (`manual-deploy.yml`) +**Triggers:** Manual trigger via GitHub Actions UI + +**What it does:** +- 🚀 Deploy to production or staging +- 🔍 Post-deployment health check + +**Usage:** +1. Go to **Actions** tab in GitHub +2. Select "Manual Deploy" +3. Click **Run workflow** +4. Choose environment (production/staging) + +--- + +### 4. **Weekly Security Scan** (`security.yml`) +**Triggers:** Every Monday at 9 AM UTC or manual trigger + +**What it does:** +- 🔒 Scans dependencies for vulnerabilities (Safety) +- 🔒 Code security analysis (Bandit) +- 📊 Uploads reports as artifacts +- 🚨 Creates GitHub issue if vulnerabilities found + +--- + +## Setup Instructions + +### 1. Configure GitHub Secrets +Go to **Settings** → **Secrets and variables** → **Actions** → **New repository secret** + +**Required secrets:** + +```bash +RENDER_SERVICE_ID # Get from Render dashboard +RENDER_API_KEY # Generate at https://dashboard.render.com/u/settings#api-keys +RENDER_APP_URL # Your app URL (e.g., https://aimeet.onrender.com) +``` + +**Optional (for tests):** +```bash +GOOGLE_API_KEY # For running integration tests +QDRANT_URL # For running RAG tests +QDRANT_API_KEY # For Qdrant tests +``` + +### 2. Get Render Credentials + +#### **Service ID:** +1. Go to https://dashboard.render.com +2. Open your web service +3. URL will look like: `https://dashboard.render.com/web/srv-xxxxxxxxxxxxx` +4. Copy the `srv-xxxxxxxxxxxxx` part + +#### **API Key:** +1. Go to https://dashboard.render.com/u/settings#api-keys +2. Click **Generate New Key** +3. Name it: `GitHub Actions` +4. Copy the key (only shown once!) + +### 3. Enable GitHub Actions +1. Go to your repo → **Actions** tab +2. Click **"I understand my workflows, go ahead and enable them"** + +--- + +## How Auto-Deploy Works + +``` +Push to master + ↓ +GitHub Actions triggers + ↓ +Runs CI tests (optional, can skip) + ↓ +Calls Render API to deploy + ↓ +Render pulls latest code + ↓ +Runs build.sh (migrations, static files) + ↓ +Restarts services + ↓ +Health check runs + ↓ +✅ Deployment complete! +``` + +--- + +## Branch Protection (Recommended) + +### Protect `master` branch: +1. Go to **Settings** → **Branches** → **Add rule** +2. Branch name pattern: `master` +3. Enable: + - ✅ Require a pull request before merging + - ✅ Require status checks to pass before merging + - Select: `test`, `security`, `build` + - ✅ Require branches to be up to date before merging +4. Save changes + +Now all pushes to `master` must pass CI checks! + +--- + +## Monitoring Deployments + +### View deployment status: +1. Go to **Actions** tab +2. Click on any workflow run +3. View logs for each step + +### View deployment history: +1. Go to Render Dashboard +2. Select your service +3. Click **Events** tab + +--- + +## Rollback a Deployment + +### Quick rollback on Render: +1. Go to Render Dashboard → Your service +2. Click **Events** tab +3. Find last working deployment +4. Click **"Redeploy"** + +### Rollback via GitHub: +```bash +# Revert the commit +git revert +git push origin master + +# Auto-deploys the previous working version +``` + +--- + +## Manual Deployment + +### Via GitHub Actions: +1. Go to **Actions** tab +2. Select "Manual Deploy" workflow +3. Click **Run workflow** +4. Choose environment +5. Click **Run workflow** + +### Via Render Dashboard: +1. Go to your service +2. Click **Manual Deploy** +3. Select branch: `master` +4. Click **Deploy** + +--- + +## Disable Auto-Deploy + +### Option 1: In GitHub +Disable the workflow: +```bash +git mv .github/workflows/deploy.yml .github/workflows/deploy.yml.disabled +git commit -m "Disable auto-deploy" +git push +``` + +### Option 2: In Render +1. Go to your service → **Settings** +2. Find "Auto-Deploy" +3. Toggle **OFF** + +--- + +## Troubleshooting + +### CI tests failing? +- Check the **Actions** tab logs +- Common issues: + - Missing migrations + - Linting errors + - Test failures + +### Deployment failing? +- Check Render Dashboard → **Logs** +- Common issues: + - Missing environment variables + - Database migration errors + - Build script errors + +### Health check failing? +- App may still be starting (wait 2-3 minutes) +- Check if `GOOGLE_API_KEY` is set correctly +- Visit the health endpoint manually + +--- + +## Environment Variables in CI + +The CI workflow uses test values for: +- `DATABASE_URL` → PostgreSQL service +- `REDIS_URL` → Redis service +- `DJANGO_SECRET_KEY` → Test key + +Real credentials (from GitHub Secrets) are only used for: +- Integration tests (optional) +- Deployment to Render + +--- + +## Advanced: Staging Environment + +### Create staging service on Render: +1. Duplicate your web service +2. Name it: `aimeet-staging` +3. Set branch: `develop` + +### Add staging workflow: +```yaml +# .github/workflows/deploy-staging.yml +name: Deploy to Staging + +on: + push: + branches: [ develop ] + +jobs: + deploy: + # ... same as deploy.yml but with staging secrets +``` + +### Add staging secrets: +``` +RENDER_STAGING_SERVICE_ID +RENDER_STAGING_APP_URL +``` + +--- + +## Cost of CI/CD + +**GitHub Actions:** +- Public repos: **FREE** unlimited +- Private repos: 2,000 minutes/month free (more than enough) + +**Total cost:** $0 for public repos! 🎉 + +--- + +## Badge for README + +Add this to your README.md to show build status: + +```markdown +![CI](https://github.com/prashantdubeypng/Aimeet/workflows/CI%20-%20Tests%20and%20Checks/badge.svg) +![Deploy](https://github.com/prashantdubeypng/Aimeet/workflows/CD%20-%20Deploy%20to%20Render/badge.svg) +``` + +--- + +**Need help?** Check the workflow logs in the Actions tab! diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md new file mode 100644 index 0000000000000000000000000000000000000000..bfdf59cf771e60888b08045518b5ed709ff99442 --- /dev/null +++ b/DEPLOYMENT.md @@ -0,0 +1,149 @@ +# Deploy to Render.com + +## Prerequisites +- GitHub account with this repo pushed +- Render.com account (free signup) +- All API keys ready (Agora, AssemblyAI, Google, Qdrant, Pusher) + +## Deployment Steps + +### 1. Push Code to GitHub +```bash +git add . +git commit -m "Add Render deployment config" +git push origin master +``` + +### 2. Create New Web Service on Render +1. Go to https://dashboard.render.com/ +2. Click **"New +"** → **"Blueprint"** +3. Connect your GitHub repository: `prashantdubeypng/Aimeet` +4. Render will detect `render.yaml` automatically +5. Click **"Apply"** + +### 3. Set Environment Variables +In Render Dashboard, go to your web service → **Environment** tab and add: + +**Required Variables:** +```bash +DJANGO_ALLOWED_HOSTS=your-app-name.onrender.com +AWS_ACCESS_KEY_ID=your_aws_key +AWS_SECRET_ACCESS_KEY=your_aws_secret +AWS_STORAGE_BUCKET_NAME=your_bucket_name +AGORA_APP_ID=your_agora_app_id +AGORA_APP_CERTIFICATE=your_agora_cert +AGORA_CUSTOMER_ID=your_agora_customer_id +AGORA_CUSTOMER_SECRET=your_agora_customer_secret +ASSEMBLYAI_API_KEY=your_assemblyai_key +GOOGLE_API_KEY=your_google_api_key +QDRANT_URL=https://your-qdrant-instance.qdrant.io:6333 +QDRANT_API_KEY=your_qdrant_key +PUSHER_APP_ID=your_pusher_app_id +PUSHER_KEY=your_pusher_key +PUSHER_SECRET=your_pusher_secret +PUSHER_CLUSTER=your_pusher_cluster +``` + +**Auto-Generated (already set by render.yaml):** +- `DJANGO_SECRET_KEY` ✓ +- `DATABASE_URL` ✓ +- `REDIS_URL` ✓ + +### 4. Wait for Deployment +- Render will automatically: + - Install dependencies + - Run migrations + - Collect static files + - Start Daphne server + - Start Django-Q worker + +### 5. Create Superuser (First Time) +After deployment, go to **Shell** tab in Render Dashboard: +```bash +cd videocaller +python manage.py createsuperuser +``` + +### 6. Test Your App +Visit: `https://your-app-name.onrender.com` + +## Render Services Created + +### 1. Web Service (Daphne) +- Runs Django/WebSocket server +- Auto-scales on demand +- **Cost:** Free tier (500 hrs/month) or Starter ($7/month) + +### 2. Worker Service (Django-Q) +- Processes background tasks (transcription, embeddings) +- **Cost:** Starter ($7/month) + +### 3. PostgreSQL Database +- Persistent storage for meetings/users +- **Cost:** Free for 90 days, then $7/month + +### 4. Redis +- Cache + Django-Q broker +- **Cost:** Free for 90 days, then $7/month + +**Total Cost After Free Trial:** ~$21/month + +## Free Tier Limitations +- Web service sleeps after 15 min inactivity (50 sec cold start) +- 500 build hours/month +- 100 GB bandwidth/month + +## Custom Domain (Optional) +1. Go to **Settings** → **Custom Domain** +2. Add your domain: `yourdomain.com` +3. Update DNS CNAME to point to Render + +## Monitoring +- **Logs:** Dashboard → Logs tab +- **Metrics:** Dashboard → Metrics tab +- **Health Check:** https://your-app.onrender.com/api/health/google/ + +## Troubleshooting + +### Service won't start +Check logs for errors: +```bash +# Common issues: +- Missing environment variables +- PostgreSQL connection failed +- Redis connection failed +``` + +### WebSocket not working +Ensure: +- Daphne is running (not Gunicorn) +- ALLOWED_HOSTS includes your domain +- CSRF_TRUSTED_ORIGINS is set + +### Static files not loading +Run manually: +```bash +cd videocaller +python manage.py collectstatic --no-input +``` + +## Rollback +If deployment fails: +1. Go to **Events** tab +2. Find previous successful deploy +3. Click **"Redeploy"** + +## Auto-Deploy on Push +Render automatically deploys when you push to `master` branch. + +Disable: **Settings** → **Auto-Deploy** → OFF + +## Scale Up +To handle more users: +1. **Settings** → **Instance Type** → Select higher tier +2. Add more worker instances +3. Upgrade PostgreSQL/Redis plans + +--- + +**Support:** Check logs in Render Dashboard or visit https://render.com/docs diff --git a/DESIGN.md b/DESIGN.md new file mode 100644 index 0000000000000000000000000000000000000000..ab53254154b8d93fa5ee985a44599bbe6ae2c7aa --- /dev/null +++ b/DESIGN.md @@ -0,0 +1,993 @@ +# AIMeet - System Design Document + +## 1. Use Case Diagram + +```mermaid +graph TB + User["👤 User"] + Host["👤 Meeting Host"] + Participant["👤 Participant"] + System["🖥️ AIMeet System"] + OpenAI["🤖 OpenAI"] + AssemblyAI["📻 AssemblyAI"] + Qdrant["📊 Qdrant"] + S3["☁️ AWS S3"] + Agora["📱 Agora RTC"] + + User -->|Register/Login| System + User -->|Create Meeting| System + User -->|Join Meeting| System + Host -->|Start Recording| System + Participant -->|Join via Code| System + Host -->|End Meeting| System + Host -->|Upload Recording| System + System -->|Upload Audio| S3 + System -->|Transcribe| AssemblyAI + AssemblyAI -->|Return Transcript| System + User -->|Prepare for Search| System + System -->|Generate Embeddings| OpenAI + System -->|Store Vectors| Qdrant + User -->|Ask Question| System + System -->|Search Vectors| Qdrant + System -->|Generate Response| OpenAI + User -->|Upload Document| System + User -->|Chat| System + System -->|Video Stream| Agora + Agora -->|Audio/Video| System + + style User fill:#e1f5ff + style Host fill:#fff3e0 + style Participant fill:#f3e5f5 + style System fill:#e8f5e9 + style OpenAI fill:#ffebee + style AssemblyAI fill:#fce4ec + style Qdrant fill:#f1f8e9 + style S3 fill:#ede7f6 + style Agora fill:#e0f2f1 +``` + +--- + +## 2. User Flow Diagram + +### 2.1 New User Onboarding Flow + +```mermaid +flowchart TD + Start([User Visits App]) --> Register{Existing User?} + Register -->|No| SignUp["Sign Up
Username, Email, Password"] + Register -->|Yes| Login["Log In
Email, Password"] + SignUp --> CreateAccount["Create Account
Validate & Hash Password"] + CreateAccount --> Dashboard["🏠 View Dashboard
Meetings, Chat, Recordings"] + Login --> Dashboard + Dashboard --> End([Ready to Use App]) + + style Start fill:#e3f2fd + style Register fill:#fff9c4 + style SignUp fill:#f8bbd0 + style Login fill:#f8bbd0 + style CreateAccount fill:#c8e6c9 + style Dashboard fill:#b3e5fc + style End fill:#a5d6a7 +``` + +### 2.2 Meeting Creation & Participation Flow + +```mermaid +flowchart TD + Host["👤 Host"] --> Create["Click 'Create Meeting'"] + Create --> AddDetails["Add Title, Description
Set Max Participants"] + AddDetails --> Generate["System Generates
Room Code"] + Generate --> Share["Share Code with
Participants"] + + Share --> P1["👤 Participant 1"] + Share --> P2["👤 Participant 2"] + Share --> PN["👤 Participant N"] + + P1 --> Join["Join Meeting
Enter Room Code"] + P2 --> Join + PN --> Join + + Join --> GetToken["Request Agora Token
from Server"] + GetToken --> Connect["🎥 Connect to Agora RTC
Start Video/Audio"] + Connect --> Record["🎙️ Recording Starts
MediaRecorder in Browser"] + Record --> Chat["💬 Chat Available
Real-time via WebSocket"] + + Chat --> MeetingActive["✅ Meeting Active"] + MeetingActive --> Discussion["Participants Discuss"] + Discussion --> HostEnd["Host Clicks 'End Meeting'"] + HostEnd --> RecordingStop["🎙️ Recording Stops
Saved Locally"] + RecordingStop --> UploadOption["Show Upload Option"] + + UploadOption --> Upload["Click 'Upload Recording'"] + Upload --> S3Upload["📤 Upload to AWS S3
WebM Format"] + S3Upload --> SaveMetadata["Save Recording URL
in Database"] + SaveMetadata --> Success["✅ Recording Saved"] + + style Host fill:#fff3e0 + style Create fill:#fff9c4 + style AddDetails fill:#c5e1a5 + style Generate fill:#aed581 + style Share fill:#9ccc65 + style P1 fill:#f3e5f5 + style P2 fill:#f3e5f5 + style PN fill:#f3e5f5 + style Join fill:#e1bee7 + style GetToken fill:#ce93d8 + style Connect fill:#ba68c8 + style Record fill:#ab47bc + style Chat fill:#9575cd + style MeetingActive fill:#7986cb + style Discussion fill:#64b5f6 + style HostEnd fill:#42a5f5 + style RecordingStop fill:#2196f3 + style UploadOption fill:#1976d2 + style Upload fill:#1565c0 + style S3Upload fill:#0d47a1 + style SaveMetadata fill:#1565c0 + style Success fill:#1b5e20 +``` + +### 2.3 Transcription & RAG Pipeline Flow + +```mermaid +flowchart TD + Recording["📂 Recording in S3"] --> Transcribe["Click 'Start Transcription'
or Auto-trigger"] + Transcribe --> GetURL["Generate Presigned URL
24-hour Expiry"] + GetURL --> SendAPI["📤 Send to AssemblyAI
with Presigned URL"] + SendAPI --> ReceiveID["Receive Transcript ID
Status: processing"] + ReceiveID --> Poll["🔄 Poll Every 3 Seconds
Check Status"] + + Poll --> Check{Status?} + Check -->|Still Processing| Poll + Check -->|Completed| SaveText["Save Full Transcript
to Database"] + Check -->|Failed| Error["❌ Show Error
Retry Option"] + + SaveText --> Ready["✅ Transcript Ready"] + Ready --> PrepareClick["User Clicks
'Prepare for Search'"] + + PrepareClick --> Chunk["📝 Chunk Transcript
500 tokens, 50 overlap
RecursiveCharacterTextSplitter"] + Chunk --> CreateChunks["Create TranscriptChunk
Records in DB"] + CreateChunks --> Embed["🤖 Generate Embeddings
OpenAI text-embedding-3-small
Batch API"] + Embed --> Vectors["Get 1536-dim Vectors
for All Chunks"] + Vectors --> StoreQdrant["💾 Store in Qdrant
Vector DB
Cosine Similarity"] + StoreQdrant --> UpdateFlags["Update MeetingRoom
chunks_created_at
embeddings_created_at"] + UpdateFlags --> Complete["✅ Ready for Q&A
Searchable"] + + style Recording fill:#f3e5f5 + style Transcribe fill:#e1bee7 + style GetURL fill:#ce93d8 + style SendAPI fill:#ba68c8 + style ReceiveID fill:#ab47bc + style Poll fill:#9575cd + style Check fill:#fff9c4 + style SaveText fill:#7986cb + style Ready fill:#64b5f6 + style PrepareClick fill:#42a5f5 + style Chunk fill:#2196f3 + style CreateChunks fill:#1976d2 + style Embed fill:#1565c0 + style Vectors fill:#0d47a1 + style StoreQdrant fill:#1565c0 + style UpdateFlags fill:#1976d2 + style Complete fill:#1b5e20 + style Error fill:#c62828 +``` + +### 2.4 Question Answering Flow + +```mermaid +flowchart TD + User["👤 User"] --> Question["💭 Ask a Question
About Meeting Content"] + Question --> Input["Type Question in UI"] + Input --> Submit["Click 'Ask'"] + + Submit --> Embed["🤖 Embed Question
OpenAI API
text-embedding-3-small"] + Embed --> QueryVector["Get Query Vector
1536 dimensions"] + + QueryVector --> Search["🔍 Search Qdrant
Cosine Similarity
Top-5 Chunks"] + Search --> Results["Get Similar Chunks
+ Relevance Scores
+ Timestamps"] + + Results --> History["📜 Retrieve Conversation
Last 5 Q&A Turns
from DB"] + History --> HistoryData["Past Questions &
Answers Loaded"] + + HistoryData --> BuildPrompt["🛠️ Build LLM Prompt"] + BuildPrompt --> AddSystem["Add System Message
Analysis Instructions"] + AddSystem --> AddContext["Add Context
Top-5 Chunks
Transcript Sections"] + AddContext --> AddHistory["Add Conversation
Past Q&A Exchanges"] + AddHistory --> AddQuery["Add Current Query"] + AddQuery --> Prompt["Complete Prompt
Ready for LLM"] + + Prompt --> CallGPT["📞 Call OpenAI
GPT-4o-mini
Max 1000 tokens"] + CallGPT --> Generate["Generate Response
with Full Context"] + Generate --> Response["Get Assistant
Response Text"] + + Response --> Save["💾 Save to DB
ConversationHistory
Link Chunks"] + Save --> Display["📺 Display Answer
to User
Show Relevant Chunks
with Timestamps"] + Display --> ShowSources["Show Sources
Chunk Text
Confidence Scores"] + ShowSources --> End["✅ User Sees Answer
with Full Context"] + + style User fill:#e3f2fd + style Question fill:#bbdefb + style Input fill:#90caf9 + style Submit fill:#64b5f6 + style Embed fill:#42a5f5 + style QueryVector fill:#2196f3 + style Search fill:#1976d2 + style Results fill:#1565c0 + style History fill:#0d47a1 + style HistoryData fill:#1565c0 + style BuildPrompt fill:#1976d2 + style AddSystem fill:#2196f3 + style AddContext fill:#42a5f5 + style AddHistory fill:#64b5f6 + style AddQuery fill:#90caf9 + style Prompt fill:#bbdefb + style CallGPT fill:#e3f2fd + style Generate fill:#bbdefb + style Response fill:#90caf9 + style Save fill:#64b5f6 + style Display fill:#42a5f5 + style ShowSources fill:#2196f3 + style End fill:#1b5e20 +``` + +### 2.5 Meeting Preparation (Sticky Notes) Flow + +```mermaid +flowchart TD + User["👤 User"] --> CreateNew["Creating New Meeting
for 'Hiring Interview'"] + CreateNew --> AddTitle["Add Title & Agenda"] + AddTitle --> System["🤖 System Analyzes
Title Keywords"] + + System --> Extract["Extract Keywords
- hiring
- interview
- data science"] + Extract --> SearchQdrant["🔍 Search Past Meetings
in Qdrant
Similar Topics"] + SearchQdrant --> FindPast["Find Related Past
Meetings
- Jan 10: Team Formation
- Jan 15: Hiring Discussion
- Jan 20: DS Skills"] + + FindPast --> StickyNotes["📌 Show Sticky Notes
Related Past Discussions"] + StickyNotes --> Display["Display:
'In your last hiring meeting,
you discussed...'"] + Display --> Expand["User Can Expand
to Read Full Context"] + Expand --> Context["See Relevant Chunks
from Past Meetings
- Requirements discussed
- Decisions made
- Concerns raised"] + + Context --> Prepare["✅ User Prepared
with Full History
Before New Meeting"] + + style User fill:#fff3e0 + style CreateNew fill:#ffe0b2 + style AddTitle fill:#ffcc80 + style System fill:#ffb74d + style Extract fill:#ffa726 + style SearchQdrant fill:#ff9800 + style FindPast fill:#f57c00 + style StickyNotes fill:#e65100 + style Display fill:#fff9c4 + style Expand fill:#fff59d + style Context fill:#fff176 + style Prepare fill:#1b5e20 +``` + +--- + +## 3. System Architecture Diagram + +### 3.1 High-Level Architecture + +```mermaid +graph TB + subgraph Client["🖥️ Client Layer"] + Web["Web UI
HTML/CSS/JS"] + Agora_SDK["Agora RTC SDK
Video/Audio"] + MediaRec["MediaRecorder
Audio Capture"] + end + + subgraph API["🌐 API Layer"] + Django["Django REST
Framework"] + WebSocket["WebSocket
Pusher"] + end + + subgraph Logic["💻 Application Logic"] + Views["Views
Meeting, Recording
Chat, RAG"] + Utils["Utilities
Recording, Transcription
Embedding, RAG"] + Models["Models
Database ORM"] + end + + subgraph Storage["💾 Data Layer"] + DB["PostgreSQL
Relational Data"] + S3["AWS S3
Files & Media"] + end + + subgraph AI["🤖 AI Services"] + OpenAI["OpenAI API
Embeddings
GPT-4o"] + AssemblyAI["AssemblyAI
Transcription"] + Qdrant["Qdrant Cloud
Vector DB"] + end + + subgraph External["📡 External"] + AgoraCloud["Agora Cloud
RTC"] + end + + Web --> Django + Agora_SDK --> Django + MediaRec --> Django + WebSocket --> Django + Django --> Views + Django --> Utils + Views --> Models + Utils --> Models + Models --> DB + Models --> S3 + Views --> S3 + Utils --> S3 + Utils --> OpenAI + Utils --> AssemblyAI + Utils --> Qdrant + Agora_SDK --> AgoraCloud + + style Client fill:#e3f2fd + style API fill:#f3e5f5 + style Logic fill:#e8f5e9 + style Storage fill:#fff3e0 + style AI fill:#ffebee + style External fill:#f1f8e9 +``` + +### 3.2 AWS Deployment Architecture + +```mermaid +graph TB + subgraph AWS["☁️ AWS Region ap-south-1"] + subgraph VPC["VPC"] + subgraph PublicSubnet["Public Subnet"] + ALB["ALB
HTTPS"] + end + + subgraph PrivateSubnet["Private Subnet"] + EC2_1["EC2 Instance 1
Django App"] + EC2_2["EC2 Instance 2
Django App"] + EC2_N["EC2 Instance N
Auto-Scaling"] + end + + subgraph DBSubnet["DB Subnet"] + RDS["RDS PostgreSQL
Multi-AZ"] + end + end + + S3["S3 Bucket
Recordings, Docs"] + CloudFront["CloudFront CDN
Static Assets"] + CloudWatch["CloudWatch
Monitoring"] + Secrets["Secrets Manager
API Keys"] + end + + Users["👥 Users
Internet"] + OpenAI_Cloud["🤖 OpenAI
Cloud"] + Qdrant_Cloud["📊 Qdrant
Cloud"] + AssemblyAI_Cloud["📻 AssemblyAI
Cloud"] + Agora_Cloud["📱 Agora
Cloud"] + + Users -->|HTTPS| ALB + ALB -->|Route| EC2_1 + ALB -->|Route| EC2_2 + ALB -->|Route| EC2_N + EC2_1 -->|Read/Write| RDS + EC2_2 -->|Read/Write| RDS + EC2_N -->|Read/Write| RDS + EC2_1 -->|Upload/Download| S3 + S3 -->|Serve| CloudFront + EC2_1 -->|Logs| CloudWatch + EC2_2 -->|Logs| CloudWatch + EC2_N -->|Logs| CloudWatch + EC2_1 -->|Get Keys| Secrets + EC2_1 -->|API Call| OpenAI_Cloud + EC2_1 -->|API Call| Qdrant_Cloud + EC2_1 -->|API Call| AssemblyAI_Cloud + Users -->|Video| Agora_Cloud + + style AWS fill:#ede7f6 + style VPC fill:#f3e5f5 + style PublicSubnet fill:#e1bee7 + style PrivateSubnet fill:#ce93d8 + style DBSubnet fill:#ba68c8 + style Users fill:#bbdefb + style OpenAI_Cloud fill:#ffebee + style Qdrant_Cloud fill:#f1f8e9 + style AssemblyAI_Cloud fill:#fce4ec + style Agora_Cloud fill:#e0f2f1 +``` + +--- + +## 4. Data Flow Diagram + +### 4.1 Recording & Transcription Flow + +```mermaid +flowchart LR + Browser["Browser
MediaRecorder"] + LocalFile["Local WebM
Audio File
5-50 MB"] + Django["Django
Backend"] + Presigned["Presigned URL
24-hour expiry"] + S3["AWS S3
aimeet-s3-bucket"] + Assembly["AssemblyAI
Service"] + Polling["Polling Loop
Every 3 sec"] + Complete["Transcript
Complete"] + Database["PostgreSQL
transcript_text"] + Ready["✅ Ready
for RAG"] + + Browser -->|Capture Audio| LocalFile + LocalFile -->|User Uploads| Django + Django -->|Generate URL| Presigned + Django -->|Upload| S3 + S3 -->|Notify| Assembly + Assembly -->|Process| Polling + Polling -->|Check Status| Assembly + Assembly -->|Return Result| Complete + Complete -->|Save| Database + Database --> Ready + + style Browser fill:#bbdefb + style LocalFile fill:#90caf9 + style Django fill:#64b5f6 + style Presigned fill:#42a5f5 + style S3 fill:#2196f3 + style Assembly fill:#ff9800 + style Polling fill:#fff9c4 + style Complete fill:#fff176 + style Database fill:#64b5f6 + style Ready fill:#1b5e20 +``` + +### 4.2 Embedding & Storage Flow + +```mermaid +flowchart LR + Transcript["Transcript
Text"] + Splitter["RecursiveCharacter
TextSplitter
500 tokens
50 overlap"] + Chunks["Text Chunks
Array[str]"] + DB_Chunks["Create
TranscriptChunk
Records"] + OpenAI_API["OpenAI API
Batch Embeddings"] + Vectors["1536-dim
Vectors
Array[float]"] + Qdrant["Qdrant Cloud
Collection"] + Indexed["✅ Indexed
Searchable"] + + Transcript -->|Split| Splitter + Splitter -->|Output| Chunks + Chunks -->|Save| DB_Chunks + Chunks -->|Send| OpenAI_API + OpenAI_API -->|Generate| Vectors + Vectors -->|Upsert| Qdrant + Qdrant --> Indexed + + style Transcript fill:#f3e5f5 + style Splitter fill:#e1bee7 + style Chunks fill:#ce93d8 + style DB_Chunks fill:#ba68c8 + style OpenAI_API fill:#ffebee + style Vectors fill:#ffcdd2 + style Qdrant fill:#f1f8e9 + style Indexed fill:#1b5e20 +``` + +### 4.3 Query & Response Flow + +```mermaid +flowchart LR + UserQ["User Question"] + Embed_Q["Embed Question
OpenAI API"] + Vector_Q["Query Vector
1536-dim"] + Search["Search Qdrant
Cosine Similarity
top-k=5"] + TopChunks["Top-5 Chunks
+ Scores"] + History["Fetch Conversation
History
Last 5 turns"] + Prompt_Build["Build LLM
Prompt
System+Context
+History+Query"] + GPT["Call GPT-4o
API"] + Response["Generate
Response"] + Save_History["Save Q&A
to DB"] + Display["Display to
User
+ Sources"] + + UserQ -->|Send| Embed_Q + Embed_Q -->|Return| Vector_Q + Vector_Q -->|Query| Search + Search -->|Return| TopChunks + TopChunks -->|Include| Prompt_Build + History -->|Include| Prompt_Build + Prompt_Build -->|Send| GPT + GPT -->|Generate| Response + Response -->|Save| Save_History + Response -->|Show| Display + + style UserQ fill:#e3f2fd + style Embed_Q fill:#bbdefb + style Vector_Q fill:#90caf9 + style Search fill:#64b5f6 + style TopChunks fill:#42a5f5 + style History fill:#2196f3 + style Prompt_Build fill:#1976d2 + style GPT fill:#ffebee + style Response fill:#ffcdd2 + style Save_History fill:#64b5f6 + style Display fill:#1b5e20 +``` + +--- + +## 5. Database Schema Diagram + +```mermaid +erDiagram + AUTH_USER ||--o{ MEETING_ROOM : hosts + AUTH_USER ||--o{ CHAT_MESSAGE : sends + AUTH_USER ||--o{ CONVERSATION_HISTORY : asks + + MEETING_ROOM ||--o{ TRANSCRIPT_CHUNK : contains + MEETING_ROOM ||--o{ DOCUMENT_UPLOAD : has + MEETING_ROOM ||--o{ CONVERSATION_HISTORY : discusses + + DOCUMENT_UPLOAD ||--o{ DOCUMENT_CHUNK : contains + + AUTH_USER { + int id PK + string username UK + string email + string password_hash + string first_name + string last_name + datetime created_at + } + + MEETING_ROOM { + int id PK + string room_id UK + string room_code UK + int host_id FK + string title + text description + int max_participants + string recording_status + text recording_sid + string s3_recording_url + text transcript_text + string transcript_status + string transcript_id + datetime chunks_created_at + datetime embeddings_created_at + int embedding_version + boolean is_active + datetime created_at + } + + TRANSCRIPT_CHUNK { + int id PK + int meeting_id FK + text chunk_text + int chunk_index + int start_time + int end_time + string embedding_vector_id + datetime created_at + } + + DOCUMENT_UPLOAD { + int id PK + int meeting_id FK + string file_name + string file_type + string s3_url + text raw_text + datetime chunks_created_at + datetime embeddings_created_at + datetime created_at + } + + DOCUMENT_CHUNK { + int id PK + int document_id FK + text chunk_text + int chunk_index + string embedding_vector_id + datetime created_at + } + + CONVERSATION_HISTORY { + int id PK + int meeting_id FK + int user_id FK + text user_question + text assistant_response + json relevant_chunks + datetime created_at + } + + CHAT_MESSAGE { + int id PK + int user_id FK + text content + datetime created_at + } +``` + +--- + +## 6. Component Interaction Diagram + +### 6.1 Meeting & Recording Components + +```mermaid +graph TB + FrontEnd["🎨 Frontend
HTML/CSS/JS"] + DjangoView["📝 Django View
meeting()"] + AgoraSDK["📱 Agora RTC
SDK"] + MediaRec["🎙️ MediaRecorder
Audio Capture"] + RecordingUtils["🛠️ RecordingUtils
S3Manager"] + S3["☁️ AWS S3"] + + FrontEnd -->|render page| DjangoView + FrontEnd -->|initialize| AgoraSDK + FrontEnd -->|start recording| MediaRec + DjangoView -->|generate token| AgoraSDK + AgoraSDK -->|connect to| FrontEnd + MediaRec -->|on meeting end| FrontEnd + FrontEnd -->|upload recording| DjangoView + DjangoView -->|call| RecordingUtils + RecordingUtils -->|upload file| S3 + RecordingUtils -->|save metadata| DjangoView + + style FrontEnd fill:#e3f2fd + style DjangoView fill:#c8e6c9 + style AgoraSDK fill:#e0f2f1 + style MediaRec fill:#fff9c4 + style RecordingUtils fill:#f0f4c3 + style S3 fill:#ede7f6 +``` + +### 6.2 RAG Pipeline Components + +```mermaid +graph TB + AssemblyAI["📻 AssemblyAI
Service"] + AssemblyUtils["🛠️ AssemblyAI
Utils"] + DjangoView["📝 Django View
RAG Endpoints"] + RAGUtils["🛠️ RAG Utils
Chunking & Query"] + EmbeddingUtils["🛠️ Embedding
Utils"] + OpenAI["🤖 OpenAI
API"] + Qdrant["📊 Qdrant
Vector DB"] + Database["💾 PostgreSQL
Models"] + + AssemblyAI -->|transcribe| AssemblyUtils + AssemblyUtils -->|save| Database + DjangoView -->|call| RAGUtils + RAGUtils -->|chunk| Database + RAGUtils -->|embed| EmbeddingUtils + EmbeddingUtils -->|call| OpenAI + EmbeddingUtils -->|store| Qdrant + RAGUtils -->|query| Qdrant + RAGUtils -->|generate response| OpenAI + RAGUtils -->|save history| Database + + style AssemblyAI fill:#ff9800 + style AssemblyUtils fill:#fff9c4 + style DjangoView fill:#c8e6c9 + style RAGUtils fill:#a5d6a7 + style EmbeddingUtils fill:#81c784 + style OpenAI fill:#ffcdd2 + style Qdrant fill:#f1f8e9 + style Database fill:#b3e5fc +``` + +--- + +## 7. Sequence Diagrams + +### 7.1 Meeting Creation Sequence + +```mermaid +sequenceDiagram + actor User + participant Frontend + participant Django + participant Database + participant Agora_Cloud + + User->>Frontend: Click 'Create Meeting' + Frontend->>Frontend: Show form + User->>Frontend: Enter title, description + Frontend->>Django: POST /create/ + Django->>Django: Generate room_code + Django->>Django: Create MeetingRoom object + Django->>Database: Save to DB + Database-->>Django: Meeting ID + Django-->>Frontend: Return room_code, meeting_id + Frontend->>Frontend: Display room code + Frontend->>User: "Share this code: abc-def-ghi" + + User->>Frontend: Click 'Join Meeting' + Frontend->>Django: GET /meeting// + Django->>Database: Fetch meeting + Database-->>Django: Meeting object + Django->>Django: Generate Agora token + Django-->>Frontend: Return token + Frontend->>Agora_Cloud: Connect with token + Agora_Cloud-->>Frontend: Connection established + Frontend->>Frontend: Initialize video/audio + Frontend->>Frontend: Start MediaRecorder + Frontend->>User: "Meeting started" +``` + +### 7.2 Question Answering Sequence + +```mermaid +sequenceDiagram + actor User + participant Frontend + participant Django + participant OpenAI_API + participant Qdrant_DB + participant Database + participant GPT4O_API + + User->>Frontend: Type question & click 'Ask' + Frontend->>Django: POST /api/meetings//query/ + Django->>OpenAI_API: Embed question + OpenAI_API-->>Django: query_vector (1536-dim) + Django->>Qdrant_DB: Search with vector + Qdrant_DB-->>Django: Top-5 chunks + scores + Django->>Database: Fetch conversation history + Database-->>Django: Last 5 Q&A turns + Django->>Django: Build LLM prompt + Django->>GPT4O_API: Send prompt + GPT4O_API-->>Django: Generated response + Django->>Database: Save Q&A to ConversationHistory + Database-->>Django: Saved + Django-->>Frontend: Return response + chunks + Frontend->>Frontend: Display response + Frontend->>Frontend: Show relevant chunks + Frontend->>User: Display answer with sources +``` + +--- + +## 8. State Machine Diagrams + +### 8.1 Meeting State Machine + +```mermaid +stateDiagram-v2 + [*] --> Created: create_meeting() + Created --> Active: host_joins() + Active --> Active: participants_join() + Active --> Recording: start_recording() + Recording --> Recording: chat_messages() + Recording --> Ended: host_ends_meeting() + Ended --> Transcribing: upload_recording() + Transcribing --> Transcribed: transcription_complete() + Transcribed --> Processing: prepare_for_rag() + Processing --> Ready: embeddings_stored() + Ready --> Archived: archive_meeting() + + note right of Created + Room code generated + Max participants set + end note + + note right of Active + Video/audio streaming + Chat enabled + end note + + note right of Recording + Audio recorded locally + Chat saved + end note + + note right of Ended + Recording stopped + Waiting for upload + end note + + note right of Transcribing + AssemblyAI processing + Status polling + end note + + note right of Transcribed + Transcript saved + Ready for chunking + end note + + note right of Processing + Chunks created + Embeddings generated + end note + + note right of Ready + Searchable + Q&A enabled + end note +``` + +### 8.2 Transcription State Machine + +```mermaid +stateDiagram-v2 + [*] --> NotStarted + NotStarted --> Processing: upload_recording() + Processing --> Processing: poll_status() + Processing --> Completed: transcription_complete() + Processing --> Failed: error_occurred() + Completed --> [*] + Failed --> NotStarted: retry() + + note right of NotStarted + Waiting for upload + end note + + note right of Processing + AssemblyAI job running + Polling every 3 sec + end note + + note right of Completed + Transcript saved to DB + Available for RAG + end note + + note right of Failed + Error occurred + User can retry + end note +``` + +--- + +## 9. API Request/Response Flow + +### 9.1 Create Meeting Request Flow + +```mermaid +graph LR + Client["Client"] + Request["POST /create/
Content-Type: application/json"] + Body["Body:
title
description
max_participants"] + Django["Django View
create_room()"] + Validate["Validate Input
Auth Check"] + Generate["Generate
room_code"] + Save["Save to
Database"] + Response["Response 200
JSON:
room_code
meeting_id"] + + Client -->|Send| Request + Request -->|Include| Body + Body -->|Sent to| Django + Django -->|Process| Validate + Validate -->|Generate| Generate + Generate -->|Save| Save + Save -->|Return| Response + Response -->|Receive| Client + + style Client fill:#e3f2fd + style Request fill:#c5e1a5 + style Body fill:#aed581 + style Django fill:#c8e6c9 + style Validate fill:#a5d6a7 + style Generate fill:#81c784 + style Save fill:#66bb6a + style Response fill:#4caf50 +``` + +### 9.2 Query Meeting Endpoint Request Flow + +```mermaid +graph LR + Client["Client
Frontend"] + Request["POST /api/meetings/<id>/query/
Content-Type: application/json"] + Body["Body:
question: string"] + Auth["Check Auth
JWT Token"] + Parse["Parse
Question"] + Embed["Embed
Question"] + Search["Search
Qdrant"] + GetHistory["Fetch
History"] + BuildPrompt["Build
Prompt"] + CallLLM["Call
GPT-4o"] + Save["Save
Q&A"] + Response["Response 200
JSON:
response
relevant_chunks"] + + Client -->|Send| Request + Request -->|Include| Body + Body -->|Sent to| Auth + Auth -->|Validate| Parse + Parse -->|Call| Embed + Embed -->|Search| Search + Parse -->|Fetch| GetHistory + Search -->|Include| BuildPrompt + GetHistory -->|Include| BuildPrompt + BuildPrompt -->|Call| CallLLM + CallLLM -->|Store| Save + CallLLM -->|Return| Response + Response -->|Receive| Client + + style Client fill:#e3f2fd + style Request fill:#c5e1a5 + style Body fill:#aed581 + style Auth fill:#c8e6c9 + style Parse fill:#a5d6a7 + style Embed fill:#81c784 + style Search fill:#66bb6a + style GetHistory fill:#4caf50 + style BuildPrompt fill:#43a047 + style CallLLM fill:#388e3c + style Save fill:#2e7d32 + style Response fill:#1b5e20 +``` + +--- + +## 10. Error Handling Flow + +```mermaid +flowchart TD + Request["User Request"] + Try["Try to Process"] + Check{Error?} + + Check -->|No| Success["✅ Success
Return Data"] + Check -->|Yes| Type{Error Type?} + + Type -->|404| NotFound["❌ Not Found
Status 404"] + Type -->|401| Unauthorized["❌ Unauthorized
Status 401"] + Type -->|403| Forbidden["❌ Forbidden
Status 403"] + Type -->|400| BadRequest["❌ Bad Request
Status 400"] + Type -->|500| ServerError["❌ Server Error
Status 500"] + Type -->|API Error| APIError["❌ External API
Error"] + + NotFound -->|Log| Log["Log Error
to CloudWatch"] + Unauthorized -->|Log| Log + Forbidden -->|Log| Log + BadRequest -->|Log| Log + ServerError -->|Log| Log + APIError -->|Log| Log + + Log -->|Alert| Alert{Severity?} + Alert -->|Critical| Page["Page On-Call"] + Alert -->|Warning| Notify["Send Notification"] + Alert -->|Info| Store["Store in Log"] + + Page -->|Resolved| Response["Return Error
to User"] + Notify -->|Resolved| Response + Store -->|Timeout| Response + + Request -->|Send| Try + Success -->|Send| Response + + style Request fill:#e3f2fd + style Try fill:#fff9c4 + style Check fill:#fff59d + style Type fill:#fff176 + style Success fill:#c8e6c9 + style NotFound fill:#ffcdd2 + style Unauthorized fill:#ef9a9a + style Forbidden fill:#e57373 + style BadRequest fill:#ef5350 + style ServerError fill:#f44336 + style APIError fill:#e53935 + style Log fill:#fff9c4 + style Alert fill:#fff59d + style Page fill:#ff6f00 + style Notify fill:#ffa726 + style Store fill:#ffb74d + style Response fill:#1b5e20 +``` + +--- + +## Summary + +This design document provides: + +1. **Use Cases** - All system actors and interactions +2. **User Flows** - Step-by-step journeys for key scenarios +3. **System Architecture** - Component relationships and deployment +4. **Data Flows** - How data moves through the system +5. **Database Schema** - Entity relationships and structure +6. **Component Interactions** - How modules communicate +7. **Sequences** - Detailed interaction timelines +8. **State Machines** - Meeting and transcription state transitions +9. **API Flows** - Request/response patterns +10. **Error Handling** - Exception management and alerting + +All diagrams use Mermaid syntax for easy updates and version control. diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..2ca672b355bd4b6637bdc7cf201cf2b0a945f51d --- /dev/null +++ b/Dockerfile @@ -0,0 +1,34 @@ +FROM python:3.11-slim + +# Set environment variables +ENV PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 \ + PIP_NO_CACHE_DIR=1 + +# Install system dependencies +# gcc is needed for some Python packages; no postgresql-client needed (psycopg2-binary is self-contained) +RUN apt-get update && apt-get install -y \ + gcc \ + && rm -rf /var/lib/apt/lists/* + +# Set work directory +WORKDIR /app + +# Install Python dependencies +COPY requirements.txt . +RUN pip install --upgrade pip && \ + pip install -r requirements.txt + +# Copy project source +COPY videocaller/ ./videocaller/ + +# Copy HuggingFace startup script +COPY start.hf.sh /app/start.hf.sh +RUN chmod +x /app/start.hf.sh + +# HuggingFace Spaces requires the app to listen on port 7860 +EXPOSE 7860 + +# Static files are collected at startup (in start.hf.sh) so env vars are available +# CMD runs migrations, optionally starts Django-Q worker, then starts Daphne on port 7860 +CMD ["/app/start.hf.sh"] diff --git a/Dockerfile.huggingface b/Dockerfile.huggingface new file mode 100644 index 0000000000000000000000000000000000000000..2ca672b355bd4b6637bdc7cf201cf2b0a945f51d --- /dev/null +++ b/Dockerfile.huggingface @@ -0,0 +1,34 @@ +FROM python:3.11-slim + +# Set environment variables +ENV PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 \ + PIP_NO_CACHE_DIR=1 + +# Install system dependencies +# gcc is needed for some Python packages; no postgresql-client needed (psycopg2-binary is self-contained) +RUN apt-get update && apt-get install -y \ + gcc \ + && rm -rf /var/lib/apt/lists/* + +# Set work directory +WORKDIR /app + +# Install Python dependencies +COPY requirements.txt . +RUN pip install --upgrade pip && \ + pip install -r requirements.txt + +# Copy project source +COPY videocaller/ ./videocaller/ + +# Copy HuggingFace startup script +COPY start.hf.sh /app/start.hf.sh +RUN chmod +x /app/start.hf.sh + +# HuggingFace Spaces requires the app to listen on port 7860 +EXPOSE 7860 + +# Static files are collected at startup (in start.hf.sh) so env vars are available +# CMD runs migrations, optionally starts Django-Q worker, then starts Daphne on port 7860 +CMD ["/app/start.hf.sh"] diff --git a/Dockerfile.worker b/Dockerfile.worker new file mode 100644 index 0000000000000000000000000000000000000000..bd120d2903a30ea91c752abe166b0cb3c8539420 --- /dev/null +++ b/Dockerfile.worker @@ -0,0 +1,27 @@ +FROM python:3.11-slim + +# Set environment variables +ENV PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 \ + PIP_NO_CACHE_DIR=1 + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + gcc \ + postgresql-client \ + && rm -rf /var/lib/apt/lists/* + +# Set work directory +WORKDIR /app + +# Install Python dependencies +COPY requirements.txt . +RUN pip install --upgrade pip && \ + pip install -r requirements.txt + +# Copy project +COPY videocaller/ ./videocaller/ + +# Run qcluster worker +WORKDIR /app/videocaller +CMD ["python", "manage.py", "qcluster"] diff --git a/HF_DEPLOYMENT.md b/HF_DEPLOYMENT.md new file mode 100644 index 0000000000000000000000000000000000000000..5fcef05e87bfb8c148c0ea52a377429a0e2862c3 --- /dev/null +++ b/HF_DEPLOYMENT.md @@ -0,0 +1,147 @@ +# Deploying Aimeet to Hugging Face Spaces + +Hugging Face Spaces with the **Docker SDK** lets you run this full Django app for free. +The free tier gives you 2 vCPU and 16 GB RAM. + +## Constraints to Know + +| Constraint | Impact | Solution | +|---|---|---| +| No persistent disk | SQLite data & uploaded files lost on restart | Use external PostgreSQL + AWS S3 | +| Single container | Web server + worker share one process | Handled — `start.hf.sh` runs both | +| Space hibernation | Free Spaces sleep after 48 h inactivity (~30 s wake) | Acceptable for demos | +| Port 7860 | Must serve on this port | Already set in `Dockerfile.huggingface` | + +--- + +## Step 0 — Set Up External Services (free tiers) + +| Service | Provider | What to get | +|---|---|---| +| **PostgreSQL** | [supabase.com](https://supabase.com) or [neon.tech](https://neon.tech) | Connection string (`postgresql://...`) | +| **Redis** | [upstash.com](https://upstash.com) | Redis URL (`rediss://...`) | +| **Vector DB** | [cloud.qdrant.io](https://cloud.qdrant.io) | Cluster URL + API key | +| **Video calls** | [console.agora.io](https://console.agora.io) | App ID + Certificate | +| **Realtime chat** | [pusher.com](https://pusher.com) | App ID, Key, Secret, Cluster | +| **Google Gemini** | [aistudio.google.com](https://aistudio.google.com) | API key | +| **Transcription** | [assemblyai.com](https://assemblyai.com) | API key (optional) | +| **File storage** | [AWS S3](https://aws.amazon.com/s3/) | Bucket + IAM key/secret (optional) | + +--- + +## Step 1 — Create a New Space on Hugging Face + +1. Go to **[huggingface.co](https://huggingface.co)** → **New Space**. +2. Fill in: + - **Space name**: `aimeet` (or anything you like) + - **SDK**: **Docker** + - **Visibility**: Public or Private +3. Click **Create Space**. + +The Space page shows an empty repo. Copy its git URL (looks like +`https://huggingface.co/spaces/YOUR_USERNAME/aimeet`). + +--- + +## Step 2 — Clone the Space Repo and Copy the Project + +```bash +# Clone the empty HF Space repo +git clone https://huggingface.co/spaces/YOUR_USERNAME/aimeet hf-space +cd hf-space + +# Copy all project files into it +cp -r /path/to/Django-VIdeocall-App/. . + +# HF Spaces requires its own README.md with YAML front matter +cp hf_space_README.md README.md +rm hf_space_README.md # remove the copy — README.md is the one HF uses + +# Tell Docker to use the HF-specific Dockerfile +cp Dockerfile.huggingface Dockerfile + +# Remove the local virtualenv and build artifacts (already in .gitignore) +# The .gitignore already excludes env/, __pycache__, *.sqlite3, media/, etc. +``` + +--- + +## Step 3 — Commit and Push + +```bash +git lfs install # HF uses Git LFS; run once +git add . +git commit -m "Initial deploy of Aimeet to HF Spaces" +git push +``` + +The Space starts building automatically. Open the **Logs** tab on the Space page to follow the build. First build takes **5–10 minutes** because of large dependencies (`sentence-transformers`, `unstructured`, etc.). + +--- + +## Step 4 — Set Environment Secrets + +In the Space page go to **Settings → Variables and secrets → New secret** and add: + +| Key | Value | +|---|---| +| `DJANGO_SECRET_KEY` | Run `python -c "import secrets; print(secrets.token_urlsafe(50))"` | +| `DJANGO_DEBUG` | `false` | +| `DATABASE_URL` | Your PostgreSQL connection string | +| `REDIS_URL` | Your Upstash Redis URL | +| `AGORA_APP_ID` | Agora App ID | +| `AGORA_APP_CERTIFICATE` | Agora App Certificate | +| `PUSHER_APP_ID` | Pusher App ID | +| `PUSHER_KEY` | Pusher Key | +| `PUSHER_SECRET` | Pusher Secret | +| `PUSHER_CLUSTER` | Pusher cluster (e.g. `ap2`) | +| `GOOGLE_API_KEY` | Gemini API key | +| `QDRANT_URL` | Qdrant cluster URL | +| `QDRANT_API_KEY` | Qdrant API key | +| `ASSEMBLYAI_API_KEY` | AssemblyAI key (optional) | +| `AWS_ACCESS_KEY_ID` | AWS key (optional, for recordings) | +| `AWS_SECRET_ACCESS_KEY` | AWS secret | +| `AWS_STORAGE_BUCKET_NAME` | S3 bucket name | + +> **Important**: Use **Secrets** (not Variables) for all API keys and passwords — they are encrypted and not exposed in build logs. + +After adding secrets, click **Restart Space** in the Settings tab to apply them. + +--- + +## Step 5 — Access Your App + +Once the Space is running, your app is live at: + +``` +https://YOUR_USERNAME-aimeet.hf.space +``` + +HF Spaces automatically sets the `SPACE_HOST` environment variable to this hostname. The `settings.py` already reads it and adds it to `ALLOWED_HOSTS` and `CSRF_TRUSTED_ORIGINS`. + +--- + +## Updating the App + +Push changes to the HF Space git repo to trigger a rebuild: + +```bash +cd hf-space +# make your changes +git add . +git commit -m "Update: description of change" +git push +``` + +--- + +## Troubleshooting + +| Symptom | Likely cause | Fix | +|---|---|---| +| Build fails at `pip install` | Heavy deps hit memory limit | Try Space tier upgrade or pin lighter versions | +| `DisallowedHost` error | Hostname not in `ALLOWED_HOSTS` | Verify `SPACE_HOST` env var is set automatically by HF | +| `500 Internal Server Error` | Missing required secret | Check Logs tab for the traceback; add the missing secret | +| `CSRF verification failed` | CSRF origin mismatch | Ensure `DJANGO_DEBUG=false` and `DATABASE_URL` is set | +| Database errors on first run | Migrations not run yet | Migrations run automatically in `start.hf.sh`; check Logs | +| Static files returning 404 | `collectstatic` failed | Check build logs; ensure `DJANGO_SECRET_KEY` secret is set | diff --git a/Procfile b/Procfile new file mode 100644 index 0000000000000000000000000000000000000000..30078f550499102541e1ea483f7238506bed52b3 --- /dev/null +++ b/Procfile @@ -0,0 +1,2 @@ +web: cd videocaller && daphne -b 0.0.0.0 -p $PORT videocaller.asgi:application +worker: cd videocaller && python manage.py qcluster diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c797ab6c17612fade75347eb795c3693bfc3a6be --- /dev/null +++ b/README.md @@ -0,0 +1,46 @@ +--- +title: Aimeet +emoji: 🎥 +colorFrom: blue +colorTo: indigo +sdk: docker +app_port: 7860 +pinned: false +--- + +# Aimeet — AI-Powered Video Meeting Platform + +Real-time video meetings with AI summaries, RAG document chat, and live transcription. + +## Features + +- **Video/audio calls** via Agora RTC +- **AI meeting summaries** powered by Google Gemini +- **Document Q&A** with RAG (LangChain + Qdrant) +- **Live transcription** via AssemblyAI +- **Real-time chat** via Pusher + +## Required Environment Variables + +Set these in **Settings → Variables and secrets** of your Space: + +| Key | Required | Description | +|---|---|---| +| `DJANGO_SECRET_KEY` | ✅ | Django secret key (generate a random 50-char string) | +| `DJANGO_DEBUG` | | `false` for production | +| `DATABASE_URL` | ✅ | PostgreSQL URL (Supabase / Neon free tier) | +| `REDIS_URL` | | Redis URL for task queue (Upstash free tier) | +| `AGORA_APP_ID` | ✅ | Agora App ID | +| `AGORA_APP_CERTIFICATE` | ✅ | Agora App Certificate | +| `PUSHER_APP_ID` | ✅ | Pusher App ID | +| `PUSHER_KEY` | ✅ | Pusher Key | +| `PUSHER_SECRET` | ✅ | Pusher Secret | +| `PUSHER_CLUSTER` | ✅ | Pusher Cluster (e.g. `ap2`) | +| `GOOGLE_API_KEY` | ✅ | Google Gemini API key | +| `QDRANT_URL` | ✅ | Qdrant Cloud cluster URL | +| `QDRANT_API_KEY` | ✅ | Qdrant Cloud API key | +| `ASSEMBLYAI_API_KEY` | | AssemblyAI transcription key | +| `AWS_ACCESS_KEY_ID` | | AWS S3 for recording storage | +| `AWS_SECRET_ACCESS_KEY` | | AWS S3 secret | +| `AWS_STORAGE_BUCKET_NAME` | | S3 bucket name | +| `AWS_S3_REGION_NAME` | | S3 region (default: `us-east-1`) | diff --git a/REQUIREMENTS.md b/REQUIREMENTS.md new file mode 100644 index 0000000000000000000000000000000000000000..7605ec975b7a070cd09cf4fa5c8bd7eda504bdc1 --- /dev/null +++ b/REQUIREMENTS.md @@ -0,0 +1,472 @@ +# AIMeet - Requirements Document + +## 1. Functional Requirements + +### 1.1 User Management +- FR-1.1: Users must be able to register with username, email, and password +- FR-1.2: Users must be able to log in with credentials +- FR-1.3: Users must be able to log out +- FR-1.4: Users must be able to reset password via email +- FR-1.5: User profiles must store name, email, profile picture + +### 1.2 Meeting Management +- FR-2.1: Users can create a meeting with title, description, and max participants +- FR-2.2: System generates unique shareable room code for each meeting +- FR-2.3: Users can join meetings using room code +- FR-2.4: Meeting host can end the meeting +- FR-2.5: Meeting state tracks: active, ended, archived +- FR-2.6: Users can view list of their meetings (hosted and joined) +- FR-2.7: Users can delete or archive completed meetings + +### 1.3 Real-Time Video & Audio +- FR-3.1: Video streaming using Agora RTC SDK +- FR-3.2: Audio streaming with VP8 codec +- FR-3.3: Dynamic bitrate adjustment based on network +- FR-3.4: Participants can mute/unmute audio and video +- FR-3.5: Host can kick participants +- FR-3.6: Screen sharing capability (optional, future) + +### 1.4 Recording +- FR-4.1: Audio is automatically recorded during meeting using MediaRecorder +- FR-4.2: Recording saved as WebM format locally +- FR-4.3: Users can upload recording after meeting +- FR-4.4: Recording uploaded to AWS S3 +- FR-4.5: System stores recording metadata (size, duration, upload time) +- FR-4.6: Presigned URLs generated for private S3 access + +### 1.5 Transcription +- FR-5.1: Uploaded recordings sent to AssemblyAI for transcription +- FR-5.2: System polls AssemblyAI for transcription status +- FR-5.3: Completed transcripts saved to database +- FR-5.4: Transcript status tracked: not_started, processing, completed, failed +- FR-5.5: Transcript linked to meeting record + +### 1.6 Knowledge Processing (RAG) +- FR-6.1: Users can trigger "Prepare for Search" to process transcript +- FR-6.2: System chunks transcript using recursive character splitting (500 tokens, 50 overlap) +- FR-6.3: Chunks stored in TranscriptChunk model +- FR-6.4: Chunks embedded using OpenAI text-embedding-3-small +- FR-6.5: Embeddings stored in Qdrant vector database +- FR-6.6: Idempotent processing: check timestamps to avoid reprocessing + +### 1.7 Question Answering (RAG Query) +- FR-7.1: Users can ask questions about meeting content +- FR-7.2: Question embedded using same OpenAI model +- FR-7.3: System searches Qdrant for top-5 similar chunks +- FR-7.4: Conversation history retrieved for context +- FR-7.5: GPT-4o called with context + history + question +- FR-7.6: Response generated and displayed to user +- FR-7.7: Q&A turn saved to ConversationHistory + +### 1.8 Meeting Preparation (Sticky Notes) +- FR-8.1: When creating new meeting, system suggests related past meetings +- FR-8.2: Suggestions based on meeting title/agenda keywords +- FR-8.3: Shows what was discussed about same topics before +- FR-8.4: Users can expand sticky notes to see full context +- FR-8.5: Helps prevent duplicate discussions + +### 1.9 Document Management +- FR-9.1: Users can upload documents (PDF, DOCX, TXT) +- FR-9.2: Documents stored in S3 +- FR-9.3: Document text extracted and stored +- FR-9.4: Documents chunked same way as transcripts +- FR-9.5: Document chunks embedded and stored in Qdrant +- FR-9.6: Users can view list of documents per meeting +- FR-9.7: Users can delete documents + +### 1.10 Unified Search +- FR-10.1: Questions search both transcripts and documents +- FR-10.2: Results include source type (meeting transcript vs document) +- FR-10.3: Search results show relevance scores +- FR-10.4: Source metadata (timestamps, document names) included + +### 1.11 Chat +- FR-11.1: Real-time chat during meetings using WebSocket +- FR-11.2: Chat messages saved to database +- FR-11.3: Users can view chat history +- FR-11.4: Message timestamps tracked +- FR-11.5: Messages linked to user and meeting + +### 1.12 Reporting & Analytics (Future) +- FR-12.1: Meeting duration and participant count +- FR-12.2: Transcript statistics (word count, duration) +- FR-12.3: Q&A usage statistics +- FR-12.4: Most discussed topics across meetings + +--- + +## 2. Non-Functional Requirements + +### 2.1 Performance +- NFR-1.1: Q&A response time: <4 seconds (including LLM latency) +- NFR-1.2: Vector search latency: <500ms +- NFR-1.3: API response time: <1 second for non-AI endpoints +- NFR-1.4: Page load time: <3 seconds +- NFR-1.5: Concurrent users: 100+ with auto-scaling +- NFR-1.6: Transcript processing: <1 minute for typical meeting + +### 2.2 Scalability +- NFR-2.1: Horizontal scaling via EC2 Auto Scaling Groups +- NFR-2.2: Database: RDS with read replicas +- NFR-2.3: S3 handles unlimited storage +- NFR-2.4: Qdrant Cloud manages vector scaling +- NFR-2.5: Support growth from 10 to 10,000 users + +### 2.3 Reliability +- NFR-3.1: 99.5% uptime SLA +- NFR-3.2: Automated daily database backups +- NFR-3.3: Multi-AZ RDS for failover +- NFR-3.4: CloudFront CDN for static assets +- NFR-3.5: Graceful error handling and user feedback + +### 2.4 Security +- NFR-4.1: HTTPS for all communications +- NFR-4.2: Password hashing with bcrypt +- NFR-4.3: JWT tokens for API authentication +- NFR-4.4: SQL injection protection via ORM +- NFR-4.5: XSS protection via template escaping +- NFR-4.6: CSRF protection on forms +- NFR-4.7: S3 encryption at rest (AES-256) +- NFR-4.8: Database encryption (KMS) +- NFR-4.9: API keys in Secrets Manager (no hardcoding) +- NFR-4.10: Private S3 access via presigned URLs +- NFR-4.11: Private subnet for RDS (no public IP) +- NFR-4.12: Rate limiting: 100 requests/minute per user + +### 2.5 Usability +- NFR-5.1: Responsive design for mobile (375px+) and desktop +- NFR-5.2: Accessibility: WCAG 2.1 Level AA compliance +- NFR-5.3: Intuitive UI with clear navigation +- NFR-5.4: Error messages explain what went wrong +- NFR-5.5: Dark and light mode support (future) + +### 2.6 Maintainability +- NFR-6.1: Code documented with docstrings +- NFR-6.2: DRY principle: no code duplication +- NFR-6.3: Clear separation of concerns +- NFR-6.4: Comprehensive logging with timestamps +- NFR-6.5: Automated testing (unit + integration) + +### 2.7 Compatibility +- NFR-7.1: Browser support: Chrome, Firefox, Safari, Edge (latest 2 versions) +- NFR-7.2: Mobile support: iOS Safari, Android Chrome +- NFR-7.3: Python 3.13+ support +- NFR-7.4: PostgreSQL 12+ support + +--- + +## 3. System Requirements + +### 3.1 Software Requirements +- **Backend**: Django 4.x, Python 3.13+ +- **Database**: PostgreSQL 12+ (or SQLite for dev) +- **Web Server**: Gunicorn + Nginx +- **Vector DB**: Qdrant 1.x +- **Message Queue** (future): Celery + Redis + +### 3.2 Hardware Requirements (Production) +- **Compute**: EC2 t3.medium (2 vCPU, 4GB RAM) minimum + - Development: t3.small sufficient + - Production: t3.large+ with auto-scaling 2-10 instances +- **Database**: RDS t4g.medium (2 vCPU, 1GB RAM) + - Storage: 100GB gp3 (auto-scaling) +- **Bandwidth**: 10 Mbps minimum (up to 1 Gbps for scaling) + +### 3.3 Browser Requirements +- Minimum: Chrome 90+, Firefox 88+, Safari 14+, Edge 90+ +- WebRTC support required for video +- LocalStorage and SessionStorage support +- WebSocket support + +--- + +## 4. Dependencies + +### 4.1 Backend Dependencies +``` +Django==4.2 +djangorestframework==3.14.0 +psycopg2-binary==2.9.0 +python-dotenv==1.0.0 + +# AI & ML +openai==2.16.0 +qdrant-client==1.16.2 +requests==2.31.0 + +# Transcription +AssemblyAI (API, no package) + +# Cloud +boto3==1.26.137 + +# Real-time +pusher==3.3.1 + +# Video +agora-rtm (Agora SDK) +agora-token-builder (Token generation) + +# Utilities +python-dateutil==2.8.2 +pytz==2023.3 +Pillow==10.0.0 +``` + +### 4.2 Frontend Dependencies +``` +Agora RTC SDK v4.24.2 (JavaScript) +Bootstrap 5.3 +jQuery 3.6 (optional, for DOM manipulation) +``` + +### 4.3 External Services +- **OpenAI API**: Embeddings (text-embedding-3-small) + LLM (GPT-4o) +- **AssemblyAI API**: Speech-to-text transcription +- **Qdrant Cloud**: Vector database hosting +- **AWS Services**: EC2, RDS, S3, CloudWatch, Secrets Manager, ALB +- **Agora**: Video/audio RTC +- **Pusher**: WebSocket for chat + +--- + +## 5. API Requirements + +### 5.1 REST API Specifications +- **Base URL**: `/api/` or `/` (depending on endpoint) +- **Content-Type**: `application/json` +- **Authentication**: Django session + optional JWT for API clients +- **Response Format**: JSON with status, data, and error fields +- **Pagination**: Limit + offset for list endpoints +- **Versioning**: Not required initially (v1 implicit) + +### 5.2 WebSocket Requirements +- **Protocol**: WebSocket (Pusher-managed) +- **Channels**: Per-meeting chat channels +- **Message Format**: JSON +- **Auto-reconnect**: Client-side retry logic + +### 5.3 Rate Limiting +- 100 requests/minute per user +- 1000 requests/minute per IP +- Q&A queries: 10 per minute per user + +--- + +## 6. Infrastructure Requirements + +### 6.1 AWS Services Required +- **Compute**: EC2 (application server) +- **Database**: RDS PostgreSQL (relational data) +- **Storage**: S3 (recordings, documents) +- **CDN**: CloudFront (static assets, S3 downloads) +- **Load Balancer**: Application Load Balancer (ALB) +- **Monitoring**: CloudWatch (logs, metrics, alarms) +- **Secrets**: Secrets Manager (API keys, credentials) +- **Networking**: VPC, Security Groups, NAT Gateway + +### 6.2 Third-Party Services Required +- **Qdrant Cloud**: Vector database (managed) +- **OpenAI**: API access (embeddings + GPT-4o) +- **AssemblyAI**: Transcription API +- **Agora**: RTC infrastructure +- **Pusher**: WebSocket infrastructure + +### 6.3 Monitoring & Logging +- CloudWatch Logs: All application logs +- CloudWatch Metrics: CPU, memory, request latency +- CloudWatch Alarms: Errors, latency spikes, service degradation +- Application Insights: APM for performance tracking (optional) + +--- + +## 7. Data Requirements + +### 7.1 Database Schema +- **Users**: id, username, email, password_hash, created_at +- **MeetingRoom**: id, room_code, host_id, title, description, status, recording data, transcript data, embedding metadata +- **TranscriptChunk**: id, meeting_id, chunk_text, chunk_index, embedding_vector_id +- **DocumentUpload**: id, meeting_id, file_name, file_type, s3_url, raw_text +- **DocumentChunk**: id, document_id, chunk_text, chunk_index, embedding_vector_id +- **ConversationHistory**: id, meeting_id, user_id, user_question, assistant_response, relevant_chunks +- **ChatMessage**: id, user_id, content, created_at + +### 7.2 Vector Database Schema +- **Collection**: meeting_transcripts + - Dimension: 1536 (OpenAI text-embedding-3-small) + - Distance: Cosine Similarity + - Payload: meeting_id, chunk_index, text, timestamps + +### 7.3 Storage (S3) Structure +``` +s3://aimeet-s3-bucket/ +├── recordings/ +│ ├── meeting_123_audio.webm +│ └── meeting_124_audio.webm +├── documents/ +│ ├── document_456.pdf +│ └── document_457.txt +└── transcripts/ + ├── transcript_123.txt + └── transcript_124.txt +``` + +### 7.4 Data Retention Policy +- Recordings: Keep indefinitely (archive to Glacier after 90 days) +- Transcripts: Keep indefinitely +- Chat messages: Keep indefinitely +- Documents: Keep indefinitely +- Database backups: 35-day retention +- Logs: 30-day retention + +--- + +## 8. Integration Requirements + +### 8.1 External API Integrations +- **OpenAI API**: Embeddings (batch and single) +- **AssemblyAI API**: Transcription (async polling) +- **Qdrant API**: Vector search and storage +- **AWS SDK (Boto3)**: S3 operations +- **Agora SDK**: Token generation and RTC +- **Pusher API**: WebSocket messaging + +### 8.2 Authentication Integrations +- Django authentication (built-in) +- Optional: OAuth2 (Google, GitHub) - future +- Optional: SAML - future + +--- + +## 9. Testing Requirements + +### 9.1 Unit Testing +- Models: Test data validation and relationships +- Views: Test API endpoints with mocks +- Utilities: Test embedding, chunking, RAG functions +- Target: >80% code coverage + +### 9.2 Integration Testing +- End-to-end meeting flow +- Recording upload and transcription +- RAG pipeline (chunk → embed → search → query) +- Document upload and search + +### 9.3 Performance Testing +- Load test: 100 concurrent users +- Transcription processing time +- Q&A response latency +- Vector search speed + +### 9.4 Security Testing +- OWASP Top 10 vulnerability scanning +- SQL injection attempts +- XSS payloads +- CSRF validation + +--- + +## 10. Documentation Requirements + +### 10.1 Code Documentation +- Docstrings for all functions/methods +- Inline comments for complex logic +- README.md for setup and usage +- API documentation (Swagger/OpenAPI) + +### 10.2 User Documentation +- Quick start guide +- Feature tutorials +- FAQ +- Troubleshooting guide + +### 10.3 System Documentation +- ARCHITECTURE.md (system design) +- DESIGN.md (diagrams and flows) +- REQUIREMENTS.md (this document) +- Deployment guide + +--- + +## 11. Future Enhancements + +### 11.1 Planned Features +- Speaker diarization (identify who said what) +- Automatic action item detection +- Topic summaries and key moments +- Calendar integration +- Role-based access control +- Multi-language support +- Slack/Teams integration +- Custom embedding models + +### 11.2 Optimization Opportunities +- Redis caching layer (conversation history, user sessions) +- Celery background jobs (transcription polling, document processing) +- WebRTC data channels (peer-to-peer communication) +- Progressive Web App (PWA) capabilities + +--- + +## 12. Success Criteria + +### 12.1 Functional Success +- All FR requirements fully implemented +- All tests passing +- No critical bugs in production + +### 12.2 Performance Success +- Page load time <3 seconds (95th percentile) +- Q&A response time <4 seconds (95th percentile) +- 99.5% uptime maintained +- <1 second vector search latency + +### 12.3 User Success +- User registration completion rate >90% +- Meeting creation to Q&A within 5 minutes +- >80% of users try Q&A feature within first week + +### 12.4 Business Success +- Support 1000+ concurrent users +- Cost <$1000/month at 1000-user scale +- Document uploaded for >50% of meetings +- Sticky notes used in >40% of meetings + +--- + +## 13. Constraints & Assumptions + +### 13.1 Constraints +- OpenAI API rate limits (depends on plan) +- AssemblyAI transcription queue +- AWS service quotas +- Budget limitations for cloud services + +### 13.2 Assumptions +- Users have stable internet connection (>2 Mbps) +- Meetings typically 30 minutes to 2 hours +- Transcripts typically 5K-20K tokens +- Users have modern browsers (2020+) +- Organizations want to keep data private (not shared) + +--- + +## 14. Compliance & Standards + +### 14.1 Security Standards +- SSL/TLS 1.3 for encryption +- OWASP Top 10 compliance +- GDPR compliance (user data protection) +- HIPAA compliance (if health data involved) - future + +### 14.2 Coding Standards +- PEP 8 for Python code style +- Django best practices +- RESTful API design +- Semantic versioning for releases + +### 14.3 Accessibility Standards +- WCAG 2.1 Level AA compliance +- Keyboard navigation support +- Screen reader compatibility +- Color contrast ratios >4.5:1 diff --git a/build.sh b/build.sh new file mode 100644 index 0000000000000000000000000000000000000000..ac11cd17d813ba089092f8c69e8925893eb6d52e --- /dev/null +++ b/build.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +# Exit on error +set -o errexit + +# Install dependencies +pip install --upgrade pip +pip install -r requirements.txt + +# Navigate to Django project directory +cd videocaller + +# Collect static files +python manage.py collectstatic --no-input + +# Run migrations +python manage.py migrate --no-input diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000000000000000000000000000000000000..3609e7aa7941c88b4ff2521cd411e41e50dd8d12 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,74 @@ +version: "3.8" + +services: + web: + build: . + working_dir: /app/videocaller + command: daphne -b 0.0.0.0 -p 8000 videocaller.asgi:application + volumes: + - ./videocaller:/app/videocaller + ports: + - "8000:8000" + env_file: + - .env + depends_on: + db: + condition: service_started + redis: + condition: service_healthy + environment: + - DATABASE_URL=postgresql://postgres:postgres@db:5432/aimeet + - REDIS_URL=redis://redis:6379/0 + + worker: + build: . + working_dir: /app/videocaller + command: python manage.py qcluster + volumes: + - ./videocaller:/app/videocaller + env_file: + - .env + depends_on: + db: + condition: service_started + redis: + condition: service_healthy + environment: + - DATABASE_URL=postgresql://postgres:postgres@db:5432/aimeet + - REDIS_URL=redis://redis:6379/0 + + db: + image: postgres:15 + volumes: + - postgres_data:/var/lib/postgresql/data + environment: + - POSTGRES_DB=aimeet + - POSTGRES_USER=postgres + - POSTGRES_PASSWORD=postgres + ports: + - "5432:5432" + + redis: + image: redis:7-alpine + ports: + - "6379:6379" + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 5s + timeout: 3s + retries: 5 + + nginx: + image: nginx:alpine + ports: + - "80:80" + - "443:443" + volumes: + - ./nginx.conf:/etc/nginx/nginx.conf:ro + - ./videocaller/staticfiles:/app/staticfiles:ro + - ./videocaller/media:/app/media:ro + depends_on: + - web + +volumes: + postgres_data: diff --git a/hf_space_README.md b/hf_space_README.md new file mode 100644 index 0000000000000000000000000000000000000000..c797ab6c17612fade75347eb795c3693bfc3a6be --- /dev/null +++ b/hf_space_README.md @@ -0,0 +1,46 @@ +--- +title: Aimeet +emoji: 🎥 +colorFrom: blue +colorTo: indigo +sdk: docker +app_port: 7860 +pinned: false +--- + +# Aimeet — AI-Powered Video Meeting Platform + +Real-time video meetings with AI summaries, RAG document chat, and live transcription. + +## Features + +- **Video/audio calls** via Agora RTC +- **AI meeting summaries** powered by Google Gemini +- **Document Q&A** with RAG (LangChain + Qdrant) +- **Live transcription** via AssemblyAI +- **Real-time chat** via Pusher + +## Required Environment Variables + +Set these in **Settings → Variables and secrets** of your Space: + +| Key | Required | Description | +|---|---|---| +| `DJANGO_SECRET_KEY` | ✅ | Django secret key (generate a random 50-char string) | +| `DJANGO_DEBUG` | | `false` for production | +| `DATABASE_URL` | ✅ | PostgreSQL URL (Supabase / Neon free tier) | +| `REDIS_URL` | | Redis URL for task queue (Upstash free tier) | +| `AGORA_APP_ID` | ✅ | Agora App ID | +| `AGORA_APP_CERTIFICATE` | ✅ | Agora App Certificate | +| `PUSHER_APP_ID` | ✅ | Pusher App ID | +| `PUSHER_KEY` | ✅ | Pusher Key | +| `PUSHER_SECRET` | ✅ | Pusher Secret | +| `PUSHER_CLUSTER` | ✅ | Pusher Cluster (e.g. `ap2`) | +| `GOOGLE_API_KEY` | ✅ | Google Gemini API key | +| `QDRANT_URL` | ✅ | Qdrant Cloud cluster URL | +| `QDRANT_API_KEY` | ✅ | Qdrant Cloud API key | +| `ASSEMBLYAI_API_KEY` | | AssemblyAI transcription key | +| `AWS_ACCESS_KEY_ID` | | AWS S3 for recording storage | +| `AWS_SECRET_ACCESS_KEY` | | AWS S3 secret | +| `AWS_STORAGE_BUCKET_NAME` | | S3 bucket name | +| `AWS_S3_REGION_NAME` | | S3 region (default: `us-east-1`) | diff --git a/nginx.conf b/nginx.conf new file mode 100644 index 0000000000000000000000000000000000000000..e204ab3c9c7ef4f28caff42561a1720ef04ce8ba --- /dev/null +++ b/nginx.conf @@ -0,0 +1,40 @@ +events { + worker_connections 1024; +} + +http { + upstream django { + server web:8000; + } + + server { + listen 80; + server_name localhost; + client_max_body_size 100M; + + location / { + proxy_pass http://django; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_redirect off; + proxy_buffering off; + } + + location /static/ { + alias /app/staticfiles/; + expires 30d; + add_header Cache-Control "public, immutable"; + } + + location /media/ { + alias /app/media/; + expires 7d; + add_header Cache-Control "public"; + } + } +} diff --git a/render.yaml b/render.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d119b373b2bc067287deee60ef2feab3dafdb121 --- /dev/null +++ b/render.yaml @@ -0,0 +1,135 @@ +services: + - type: web + name: aimeet + runtime: docker + region: oregon + plan: starter + branch: master + dockerfilePath: ./Dockerfile + envVars: + - key: PYTHON_VERSION + value: 3.11.4 + - key: DJANGO_SECRET_KEY + generateValue: true + - key: DJANGO_DEBUG + value: "false" + - key: DJANGO_ALLOWED_HOSTS + sync: false + - key: DATABASE_URL + fromDatabase: + name: aimeet-db + property: connectionString + - key: REDIS_URL + fromDatabase: + name: aimeet-redis + property: connectionString + - key: AWS_ACCESS_KEY_ID + sync: false + - key: AWS_SECRET_ACCESS_KEY + sync: false + - key: AWS_STORAGE_BUCKET_NAME + sync: false + - key: AWS_S3_REGION_NAME + value: us-east-1 + - key: AGORA_APP_ID + sync: false + - key: AGORA_APP_CERTIFICATE + sync: false + - key: AGORA_CUSTOMER_ID + sync: false + - key: AGORA_CUSTOMER_SECRET + sync: false + - key: AGORA_RECORDING_REGION + value: NA + - key: ASSEMBLYAI_API_KEY + sync: false + - key: GOOGLE_API_KEY + sync: false + - key: GOOGLE_GENERATE_MODEL + value: gemini-2.5-flash-lite + - key: GOOGLE_CONNECT_TIMEOUT + value: 10 + - key: GOOGLE_READ_TIMEOUT + value: 600 + - key: GOOGLE_MAX_TOKENS + value: 1000 + - key: HF_EMBEDDING_MODEL + value: sentence-transformers/all-MiniLM-L6-v2 + - key: HF_EMBEDDING_DIMENSION + value: 384 + - key: QDRANT_URL + sync: false + - key: QDRANT_API_KEY + sync: false + - key: QDRANT_COLLECTION_NAME + value: meeting_transcripts + - key: PUSHER_APP_ID + sync: false + - key: PUSHER_KEY + sync: false + - key: PUSHER_SECRET + sync: false + - key: PUSHER_CLUSTER + sync: false + + - type: worker + name: aimeet-worker + runtime: docker + region: oregon + plan: starter + branch: master + dockerfilePath: ./Dockerfile.worker + envVars: + - key: PYTHON_VERSION + value: 3.11.4 + - key: DJANGO_SECRET_KEY + sync: false + - key: DATABASE_URL + fromDatabase: + name: aimeet-db + property: connectionString + - key: REDIS_URL + fromDatabase: + name: aimeet-redis + property: connectionString + - key: AWS_ACCESS_KEY_ID + sync: false + - key: AWS_SECRET_ACCESS_KEY + sync: false + - key: AWS_STORAGE_BUCKET_NAME + sync: false + - key: AWS_S3_REGION_NAME + value: us-east-1 + - key: ASSEMBLYAI_API_KEY + sync: false + - key: GOOGLE_API_KEY + sync: false + - key: GOOGLE_GENERATE_MODEL + value: gemini-2.5-flash-lite + - key: GOOGLE_CONNECT_TIMEOUT + value: 10 + - key: GOOGLE_READ_TIMEOUT + value: 600 + - key: GOOGLE_MAX_TOKENS + value: 1000 + - key: HF_EMBEDDING_MODEL + value: sentence-transformers/all-MiniLM-L6-v2 + - key: HF_EMBEDDING_DIMENSION + value: 384 + - key: QDRANT_URL + sync: false + - key: QDRANT_API_KEY + sync: false + - key: QDRANT_COLLECTION_NAME + value: meeting_transcripts + +databases: + - name: aimeet-db + databaseName: aimeet + user: aimeet + region: oregon + plan: starter + + - name: aimeet-redis + plan: starter + region: oregon diff --git a/requirements-windows.txt b/requirements-windows.txt new file mode 100644 index 0000000000000000000000000000000000000000..82d258bf16be46bb86a3a3cd87df0d91f9023b9b --- /dev/null +++ b/requirements-windows.txt @@ -0,0 +1,4 @@ +# Windows-specific dependencies +# Install with: pip install -r requirements.txt -r requirements-windows.txt + +python-magic-bin==0.4.14 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..a9ad198fa00c4078db936796dea6c8d28b95df7e --- /dev/null +++ b/requirements.txt @@ -0,0 +1,24 @@ +Django==4.1.5 +setuptools==69.5.1 +packaging==24.2 +python-dotenv==0.21.0 +pusher==3.3.1 +boto3==1.26.137 +requests==2.31.0 +daphne==4.1.2 +unstructured[pdf]==0.18.31 +django-q==1.3.9 +redis==3.5.3 +google-generativeai==0.7.2 +langchain==0.2.16 +langchain-core==0.2.38 +langchain-google-genai==1.0.10 +langchain-qdrant==0.1.4 +qdrant-client==1.7.3 +langchain-text-splitters==0.2.4 +langchain-huggingface==0.0.3 +sentence-transformers==2.7.0 +psycopg2-binary==2.9.9 +gunicorn==21.2.0 +whitenoise==6.6.0 +dj-database-url==2.1.0 diff --git a/runtime.txt b/runtime.txt new file mode 100644 index 0000000000000000000000000000000000000000..1f79d441fe9d8203caddccc9029d5ddecfbc6b08 --- /dev/null +++ b/runtime.txt @@ -0,0 +1 @@ +python-3.11.7 diff --git a/start.hf.sh b/start.hf.sh new file mode 100644 index 0000000000000000000000000000000000000000..b3c88814f740d576f02d2bd0a3d48b04a857ef82 --- /dev/null +++ b/start.hf.sh @@ -0,0 +1,25 @@ +#!/bin/bash +set -e + +cd /app/videocaller + +echo "=== Collecting static files ===" +python manage.py collectstatic --noinput + +echo "=== Running database migrations ===" +python manage.py migrate --noinput + +# Start Django-Q worker in background only if Redis is configured. +# Without REDIS_URL the Q_CLUSTER falls back to the ORM broker (database-backed). +if [ -n "$REDIS_URL" ]; then + echo "=== Starting Django-Q worker (Redis broker) ===" + python manage.py qcluster & + echo "Worker started with PID $!" +else + echo "=== No REDIS_URL set — starting Django-Q worker with ORM broker ===" + python manage.py qcluster & + echo "Worker started with PID $!" +fi + +echo "=== Starting Daphne ASGI server on 0.0.0.0:7860 ===" +exec daphne -b 0.0.0.0 -p 7860 videocaller.asgi:application diff --git a/start.sh b/start.sh new file mode 100644 index 0000000000000000000000000000000000000000..cfc2e922791a052af55ba12e6bd21b3d188eb033 --- /dev/null +++ b/start.sh @@ -0,0 +1,13 @@ +#!/bin/bash +set -e + +cd /app/videocaller + +# Run migrations +python manage.py migrate --noinput + +# Collect static files (if needed) +python manage.py collectstatic --noinput + +# Start Daphne web server +exec daphne -b 0.0.0.0 -p 8000 videocaller.asgi:application diff --git a/videocaller/agora/__init__.py b/videocaller/agora/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/videocaller/agora/admin.py b/videocaller/agora/admin.py new file mode 100644 index 0000000000000000000000000000000000000000..8c38f3f3dad51e4585f3984282c2a4bec5349c1e --- /dev/null +++ b/videocaller/agora/admin.py @@ -0,0 +1,3 @@ +from django.contrib import admin + +# Register your models here. diff --git a/videocaller/agora/agenda_utils.py b/videocaller/agora/agenda_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..e10b5d63a318b3b0b07dac299da2afb2b10a9b9b --- /dev/null +++ b/videocaller/agora/agenda_utils.py @@ -0,0 +1,90 @@ +import logging +import re +from typing import List +import requests +from django.conf import settings +from .embedding_utils import search_similar_chunks + +logger = logging.getLogger(__name__) + +GOOGLE_API_KEY = getattr(settings, 'GOOGLE_API_KEY', '') +GOOGLE_GENERATE_MODEL = getattr(settings, 'GOOGLE_GENERATE_MODEL', 'gemini-2.5-flash-lite') +GOOGLE_API_BASE = "https://generativelanguage.googleapis.com/v1beta/models/" +GOOGLE_CONNECT_TIMEOUT = getattr(settings, 'GOOGLE_CONNECT_TIMEOUT', 10) +GOOGLE_READ_TIMEOUT = getattr(settings, 'GOOGLE_READ_TIMEOUT', 600) + + +def _google_generate(prompt: str) -> str: + if not GOOGLE_API_KEY: + raise ValueError("GOOGLE_API_KEY is not configured") + + url = f"{GOOGLE_API_BASE}{GOOGLE_GENERATE_MODEL}:generateContent?key={GOOGLE_API_KEY}" + payload = { + "contents": [ + { + "role": "user", + "parts": [ + {"text": prompt} + ] + } + ] + } + response = requests.post( + url, + json=payload, + timeout=(GOOGLE_CONNECT_TIMEOUT, GOOGLE_READ_TIMEOUT) + ) + response.raise_for_status() + data = response.json() + + text_parts: List[str] = [] + for candidate in data.get("candidates", []): + for part in candidate.get("content", {}).get("parts", []): + part_text = part.get("text") + if part_text: + text_parts.append(part_text) + return "".join(text_parts).strip() + + +def _parse_points(text: str, max_points: int = 8) -> List[str]: + lines = [line.strip() for line in text.splitlines() if line.strip()] + points: List[str] = [] + for line in lines: + cleaned = re.sub(r"^[\-\*•\.\d\)\(]+\s*", "", line).strip() + if cleaned: + points.append(cleaned) + if len(points) >= max_points: + break + if not points and text.strip(): + points = [text.strip()] + return points + + +def generate_agenda_points(meeting_title: str, meeting_description: str, meeting_id: int | None) -> List[str]: + agenda_hint = (meeting_title or "").strip() + query = f"Agenda: {agenda_hint}" if agenda_hint else "meeting agenda" + relevant_chunks = search_similar_chunks(query, meeting_id, top_k=12) + if not relevant_chunks: + return [] + + context = "\n\n".join([ + f"[Source: {chunk.get('source_type', 'meeting_transcript')}] {chunk['text']}" + for chunk in relevant_chunks + ]) + + prompt = ( + "You are preparing concise discussion points for a meeting.\n" + "Use ONLY the PAST NOTES below. Do not add new information or assumptions.\n" + "If the notes do not support a point, do not include it.\n" + "Return 5-8 short points (max 14 words each), one per line, no numbering.\n\n" + f"MEETING TITLE: {agenda_hint or 'N/A'}\n\n" + f"PAST NOTES:\n{context}" + ) + + try: + raw = _google_generate(prompt) + points = _parse_points(raw) + return points + except Exception as e: + logger.error("Error generating agenda points: %s", str(e)) + return [] diff --git a/videocaller/agora/agora_key/AccessToken.py b/videocaller/agora/agora_key/AccessToken.py new file mode 100644 index 0000000000000000000000000000000000000000..b9ab9ec43569799a156e4198f79e518e5386731d --- /dev/null +++ b/videocaller/agora/agora_key/AccessToken.py @@ -0,0 +1,182 @@ +import hmac +from hashlib import sha256 +import base64 +import struct +from zlib import crc32 +import secrets +import time +from collections import OrderedDict + +kJoinChannel = 1 +kPublishAudioStream = 2 +kPublishVideoStream = 3 +kPublishDataStream = 4 +kPublishAudiocdn = 5 +kPublishVideoCdn = 6 +kRequestPublishAudioStream = 7 +kRequestPublishVideoStream = 8 +kRequestPublishDataStream = 9 +kInvitePublishAudioStream = 10 +kInvitePublishVideoStream = 11 +kInvitePublishDataStream = 12 +kAdministrateChannel = 101 +kRtmLogin = 1000 + +VERSION_LENGTH = 3 +APP_ID_LENGTH = 32 + + +def getVersion(): + return '006' + + +def packUint16(x): + return struct.pack(' Dict: + """Process a document and store chunks + embeddings.""" + raise NotImplementedError + + def _store_chunks(self, document: DocumentUpload, chunks: List[str], block_types: Optional[List[str]] = None, + metadatas: Optional[List[Dict]] = None) -> int: + chunk_objects = [] + for idx, chunk_text in enumerate(chunks): + block_type = block_types[idx] if block_types and idx < len(block_types) else "text" + metadata = metadatas[idx] if metadatas and idx < len(metadatas) else {} + chunk_objects.append( + DocumentChunk.objects.create( + document=document, + chunk_text=chunk_text, + chunk_index=idx, + block_type=block_type, + metadata=metadata, + ) + ) + + store_document_chunks_in_vector_db(document.meeting_id, document, chunks, chunk_objects) + document.embeddings_created_at = timezone.now() + document.save(update_fields=["embeddings_created_at"]) + return len(chunks) + + +class AudioDocumentStrategy(BaseDocumentStrategy): + """Process audio files by transcribing and chunking text.""" + + def process(self, document: DocumentUpload, local_path: str, s3_url: Optional[str], presigned_url: Optional[str]) -> Dict: + if not settings.ASSEMBLYAI_API_KEY: + raise RuntimeError("AssemblyAI API key is not configured") + + audio_url = presigned_url or s3_url + if not audio_url: + raise RuntimeError("Audio requires S3 upload or presigned URL for transcription") + + assembly_client = AssemblyAIClient() + start_data = assembly_client.start_transcription(audio_url) + transcript_id = start_data.get("id") + status = start_data.get("status", "processing") + + transcript_text = None + if transcript_id: + result = assembly_client.wait_for_transcription(transcript_id, timeout_seconds=120, poll_interval=4) + status = result.get("status", status) + if status == "completed": + transcript_text = result.get("text") + + if status != "completed" or not transcript_text: + error_msg = result.get("error") if transcript_id else "Transcription failed" + raise RuntimeError(error_msg or "Transcription failed") + + document.raw_text = transcript_text + document.status = "processing" + document.save(update_fields=["raw_text", "status"]) + + chunks = chunk_transcript(transcript_text) + chunk_count = self._store_chunks(document, chunks) + return {"chunk_count": chunk_count, "status": "completed"} + + +class TextDocumentStrategy(BaseDocumentStrategy): + """Process plain text files.""" + + def process(self, document: DocumentUpload, local_path: str, s3_url: Optional[str], presigned_url: Optional[str]) -> Dict: + with open(local_path, "r", encoding="utf-8", errors="ignore") as handle: + raw_text = handle.read() + + if not raw_text.strip(): + raise RuntimeError("Empty text file") + + document.raw_text = raw_text + document.status = "processing" + document.save(update_fields=["raw_text", "status"]) + + chunks = chunk_transcript(raw_text) + chunk_count = self._store_chunks(document, chunks) + return {"chunk_count": chunk_count, "status": "completed"} + + +class UnstructuredDocumentStrategy(BaseDocumentStrategy): + """Process PDF/DOC/DOCX with unstructured partitioning.""" + + def process(self, document: DocumentUpload, local_path: str, s3_url: Optional[str], presigned_url: Optional[str]) -> Dict: + try: + from unstructured.partition.auto import partition + except Exception as exc: + raise RuntimeError("unstructured library is required for PDF/DOC processing") from exc + + elements = partition(filename=local_path) + blocks: List[str] = [] + block_types: List[str] = [] + metadatas: List[Dict] = [] + + for element in elements: + text = getattr(element, "text", "") or "" + if not text.strip(): + continue + block_type = getattr(element, "category", None) or element.__class__.__name__ + metadata_obj = getattr(element, "metadata", None) + metadata = metadata_obj.to_dict() if metadata_obj and hasattr(metadata_obj, "to_dict") else {} + blocks.append(text.strip()) + block_types.append(str(block_type).lower()) + metadatas.append(metadata) + + if not blocks: + raise RuntimeError("No readable content extracted from document") + + raw_text = "\n\n".join(blocks) + document.raw_text = raw_text + document.status = "processing" + document.save(update_fields=["raw_text", "status"]) + + chunks: List[str] = [] + chunk_block_types: List[str] = [] + chunk_metadatas: List[Dict] = [] + + for block_text, block_type, metadata in zip(blocks, block_types, metadatas): + block_chunks = chunk_transcript(block_text) + for chunk in block_chunks: + chunks.append(chunk) + chunk_block_types.append(block_type) + chunk_metadatas.append(metadata) + + chunk_count = self._store_chunks(document, chunks, chunk_block_types, chunk_metadatas) + return {"chunk_count": chunk_count, "status": "completed"} + + +class DocumentProcessorFactory: + """Factory to select processing strategy based on extension.""" + + @staticmethod + def get_strategy(file_path: str) -> BaseDocumentStrategy: + _, ext = os.path.splitext(file_path.lower()) + if ext not in ALLOWED_EXTENSIONS: + raise RuntimeError(f"Unsupported file type: {ext}") + + if ext == ".mp3": + return AudioDocumentStrategy() + if ext == ".txt": + return TextDocumentStrategy() + return UnstructuredDocumentStrategy() + + @staticmethod + def upload_to_s3_if_configured(local_path: str, s3_key: str) -> Dict: + s3_url = None + presigned_url = None + s3_error = None + if settings.AWS_ACCESS_KEY_ID and settings.AWS_SECRET_ACCESS_KEY and settings.AWS_STORAGE_BUCKET_NAME: + try: + s3_manager = S3Manager() + uploaded = s3_manager.upload_file(local_path, s3_key) + if uploaded: + s3_url = s3_manager.get_s3_url(s3_key) + presigned_url = s3_manager.generate_presigned_url(s3_key) + else: + s3_error = "S3 upload failed" + except Exception as upload_error: + s3_error = str(upload_error) + return { + "s3_url": s3_url, + "presigned_url": presigned_url, + "s3_error": s3_error + } diff --git a/videocaller/agora/embedding_utils.py b/videocaller/agora/embedding_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..389a09958967ac6d47eabdb5bc9a20944d5d2438 --- /dev/null +++ b/videocaller/agora/embedding_utils.py @@ -0,0 +1,280 @@ +"""LangChain embeddings and Qdrant vector DB management""" +import logging +import uuid +from typing import List, Dict +from django.conf import settings +from qdrant_client import QdrantClient +from qdrant_client.models import Distance, VectorParams, Filter, FieldCondition, MatchValue, PayloadSchemaType +from langchain_huggingface import HuggingFaceEmbeddings +from langchain_qdrant import QdrantVectorStore +from langchain_text_splitters import RecursiveCharacterTextSplitter + +logger = logging.getLogger(__name__) + +qdrant_client = QdrantClient(url=settings.QDRANT_URL, api_key=getattr(settings, 'QDRANT_API_KEY', None)) + +EMBEDDING_MODEL = getattr(settings, 'HF_EMBEDDING_MODEL', 'sentence-transformers/all-MiniLM-L6-v2') +EMBEDDING_DIMENSION = getattr(settings, 'HF_EMBEDDING_DIMENSION', None) +COLLECTION_NAME = getattr(settings, 'QDRANT_COLLECTION_NAME', 'meeting_transcripts') +_embeddings = None + + +def get_embeddings() -> HuggingFaceEmbeddings: + """Lazily initialize embeddings to reduce startup memory usage.""" + global _embeddings + if _embeddings is None: + _embeddings = HuggingFaceEmbeddings( + model_name=EMBEDDING_MODEL, + model_kwargs={"device": "cpu"}, + ) + return _embeddings + + +def get_embedding_dimension() -> int: + """Get embedding dimension from config or derive it from the model.""" + if EMBEDDING_DIMENSION: + return int(EMBEDDING_DIMENSION) + + embeddings = get_embeddings() + client = getattr(embeddings, "client", None) + if client and hasattr(client, "get_sentence_embedding_dimension"): + return int(client.get_sentence_embedding_dimension()) + + raise ValueError("HF_EMBEDDING_DIMENSION is not set and model dimension is unavailable") + + +def ensure_collection_exists(): + """Create Qdrant collection if it doesn't exist""" + try: + collection = qdrant_client.get_collection(COLLECTION_NAME) + existing_size = collection.config.params.vectors.size + desired_size = get_embedding_dimension() + if existing_size != desired_size: + logger.warning( + "Qdrant collection size mismatch (%s != %s), recreating: %s", + existing_size, + desired_size, + COLLECTION_NAME + ) + qdrant_client.delete_collection(COLLECTION_NAME) + qdrant_client.create_collection( + collection_name=COLLECTION_NAME, + vectors_config=VectorParams(size=desired_size, distance=Distance.COSINE), + ) + except Exception: + logger.info(f"Creating Qdrant collection: {COLLECTION_NAME}") + desired_size = get_embedding_dimension() + qdrant_client.create_collection( + collection_name=COLLECTION_NAME, + vectors_config=VectorParams(size=desired_size, distance=Distance.COSINE), + ) + + ensure_payload_indexes() + + +def ensure_payload_indexes() -> None: + """Ensure payload indexes exist for filtered searches.""" + try: + qdrant_client.create_payload_index( + collection_name=COLLECTION_NAME, + field_name="meeting_id", + field_schema=PayloadSchemaType.INTEGER, + ) + except Exception as e: + logger.info("Skipping payload index creation for meeting_id: %s", str(e)) + + +def chunk_transcript(transcript_text: str, chunk_size: int = 500, overlap: int = 50) -> List[str]: + """ + Split transcript into overlapping chunks using RecursiveCharacterTextSplitter + + Args: + transcript_text: Full transcript text + chunk_size: Target tokens per chunk (approximate) + overlap: Token overlap between chunks + + Returns: + List of text chunks + """ + splitter = RecursiveCharacterTextSplitter( + chunk_size=chunk_size, + chunk_overlap=overlap, + separators=["\n\n", "\n", ". ", " ", ""] + ) + logger.info(splitter.split_text(transcript_text)) + return splitter.split_text(transcript_text) + + +def get_vectorstore() -> QdrantVectorStore: + ensure_collection_exists() + return QdrantVectorStore( + client=qdrant_client, + collection_name=COLLECTION_NAME, + embedding=get_embeddings(), + ) + + +def store_chunks_in_vector_db( + meeting_id: int, + chunks: List[str], + chunk_objects: List = None +) -> List[str]: + """ + Store chunks and their embeddings in Qdrant + + Args: + meeting_id: ID of the meeting + chunks: List of text chunks + chunk_objects: Optional list of TranscriptChunk model instances + + Returns: + List of vector IDs stored in Qdrant + """ + ensure_collection_exists() + + try: + from .models import MeetingRoom + + vectorstore = get_vectorstore() + meeting_title = MeetingRoom.objects.filter(id=meeting_id).values_list("title", flat=True).first() or "" + vector_ids = [str(uuid.uuid5(uuid.NAMESPACE_URL, f"meeting:{meeting_id}:{idx}")) for idx in range(len(chunks))] + + metadatas = [] + logger.info("inside the store_chunk_in_vector") + logger.info(chunks) + for idx, chunk in enumerate(chunks): + payload = { + "meeting_id": meeting_id, + "meeting_title": meeting_title, + "chunk_index": idx, + "text": chunk[:512], + "chunk_length": len(chunk), + "source_type": "meeting_transcript" + } + + if chunk_objects and idx < len(chunk_objects): + payload["chunk_db_id"] = chunk_objects[idx].id + if chunk_objects[idx].start_time: + payload["start_time"] = chunk_objects[idx].start_time + if chunk_objects[idx].end_time: + payload["end_time"] = chunk_objects[idx].end_time + + metadatas.append(payload) + + vectorstore.add_texts(texts=chunks, metadatas=metadatas, ids=vector_ids) + logger.info(f"Stored {len(chunks)} chunks for meeting {meeting_id}") + return vector_ids + except Exception as e: + logger.error(f"Error storing chunks in vector DB: {str(e)}") + raise + + +def store_document_chunks_in_vector_db( + meeting_id: int, + document, + chunks: List[str], + chunk_objects: List = None +) -> List[str]: + """Store document chunks and their embeddings in Qdrant""" + ensure_collection_exists() + + try: + from .models import MeetingRoom + + vectorstore = get_vectorstore() + meeting_title = MeetingRoom.objects.filter(id=meeting_id).values_list("title", flat=True).first() or "" + vector_ids = [str(uuid.uuid5(uuid.NAMESPACE_URL, f"document:{document.id}:{idx}")) for idx in range(len(chunks))] + + metadatas = [] + logger.info(chunks) + for idx, chunk in enumerate(chunks): + payload = { + "meeting_id": meeting_id, + "meeting_title": meeting_title, + "document_id": document.id, + "document_name": document.file_name, + "chunk_index": idx, + "text": chunk[:512], + "chunk_length": len(chunk), + "source_type": "document" + } + + if chunk_objects and idx < len(chunk_objects): + payload["chunk_db_id"] = chunk_objects[idx].id + payload["block_type"] = chunk_objects[idx].block_type + + metadatas.append(payload) + + vectorstore.add_texts(texts=chunks, metadatas=metadatas, ids=vector_ids) + logger.info(f"Stored {len(chunks)} document chunks for meeting {meeting_id}") + return vector_ids + except Exception as e: + logger.error(f"Error storing document chunks in vector DB: {str(e)}") + raise + + +def search_similar_chunks(query: str, meeting_id: int | None = None, top_k: int = 5) -> List[Dict]: + """ + Search for chunks similar to query using vector similarity + + Args: + query: User query + meeting_id: ID of the meeting to search in + top_k: Number of top results to return + + Returns: + List of dicts with chunk text, score, and metadata + """ + try: + print("trying to search the similiar to the query asked") + vectorstore = get_vectorstore() + filter_ = None + if meeting_id is not None: + filter_ = Filter( + must=[FieldCondition(key="meeting_id", match=MatchValue(value=meeting_id))] + ) + + results = vectorstore.similarity_search_with_score(query, k=top_k, filter=filter_) + formatted_results = [] + for doc, score in results: + metadata = doc.metadata or {} + formatted_results.append({ + "text": doc.page_content, + "score": score, + "chunk_index": metadata.get("chunk_index", 0), + "start_time": metadata.get("start_time"), + "end_time": metadata.get("end_time"), + "source_type": metadata.get("source_type", "meeting_transcript"), + "meeting_title": metadata.get("meeting_title"), + "document_id": metadata.get("document_id"), + "document_name": metadata.get("document_name"), + "metadata": metadata + }) + + return formatted_results + + except Exception as e: + logger.error(f"Error searching similar chunks: {str(e)}") + return [] + + +def delete_meeting_embeddings(meeting_id: int): + """Delete all embeddings for a meeting""" + try: + qdrant_client.delete( + collection_name=COLLECTION_NAME, + points_selector={ + "filter": { + "must": [ + { + "key": "meeting_id", + "match": {"value": meeting_id} + } + ] + } + } + ) + logger.info(f"Deleted embeddings for meeting {meeting_id}") + except Exception as e: + logger.error(f"Error deleting embeddings: {str(e)}") + raise diff --git a/videocaller/agora/migrations/0001_initial.py b/videocaller/agora/migrations/0001_initial.py new file mode 100644 index 0000000000000000000000000000000000000000..b4e4b0df4307b5e15503d2ebea4985bad1bc8c83 --- /dev/null +++ b/videocaller/agora/migrations/0001_initial.py @@ -0,0 +1,35 @@ +# Generated by Django 4.1.5 on 2026-02-02 12:18 + +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion +import uuid + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.CreateModel( + name='MeetingRoom', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('room_id', models.CharField(default=uuid.uuid4, max_length=100, unique=True)), + ('room_code', models.CharField(max_length=10, unique=True)), + ('title', models.CharField(blank=True, max_length=255)), + ('description', models.TextField(blank=True)), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('is_active', models.BooleanField(default=True)), + ('max_participants', models.IntegerField(default=10)), + ('host', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='hosted_meetings', to=settings.AUTH_USER_MODEL)), + ], + options={ + 'ordering': ['-created_at'], + }, + ), + ] diff --git a/videocaller/agora/migrations/0002_meetingroom_recording_duration_and_more.py b/videocaller/agora/migrations/0002_meetingroom_recording_duration_and_more.py new file mode 100644 index 0000000000000000000000000000000000000000..bb19e42603660a767b91937cfd827b80a532b130 --- /dev/null +++ b/videocaller/agora/migrations/0002_meetingroom_recording_duration_and_more.py @@ -0,0 +1,33 @@ +# Generated by Django 4.1.5 on 2026-02-02 15:21 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('agora', '0001_initial'), + ] + + operations = [ + migrations.AddField( + model_name='meetingroom', + name='recording_duration', + field=models.IntegerField(default=0, help_text='Duration in seconds'), + ), + migrations.AddField( + model_name='meetingroom', + name='recording_enabled', + field=models.BooleanField(default=False), + ), + migrations.AddField( + model_name='meetingroom', + name='recording_file', + field=models.FileField(blank=True, null=True, upload_to='recordings/%Y/%m/%d/'), + ), + migrations.AddField( + model_name='meetingroom', + name='transcript_file', + field=models.FileField(blank=True, null=True, upload_to='transcripts/%Y/%m/%d/'), + ), + ] diff --git a/videocaller/agora/migrations/0003_remove_meetingroom_recording_file_and_more.py b/videocaller/agora/migrations/0003_remove_meetingroom_recording_file_and_more.py new file mode 100644 index 0000000000000000000000000000000000000000..c1b19748b39982a38fba9af123f92daa23ad9cd8 --- /dev/null +++ b/videocaller/agora/migrations/0003_remove_meetingroom_recording_file_and_more.py @@ -0,0 +1,51 @@ +# Generated by Django 4.1.5 on 2026-02-02 16:39 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('agora', '0002_meetingroom_recording_duration_and_more'), + ] + + operations = [ + migrations.RemoveField( + model_name='meetingroom', + name='recording_file', + ), + migrations.RemoveField( + model_name='meetingroom', + name='transcript_file', + ), + migrations.AddField( + model_name='meetingroom', + name='recording_resource_id', + field=models.CharField(blank=True, help_text='Agora Resource ID', max_length=255, null=True), + ), + migrations.AddField( + model_name='meetingroom', + name='recording_sid', + field=models.CharField(blank=True, help_text='Agora Recording SID', max_length=255, null=True), + ), + migrations.AddField( + model_name='meetingroom', + name='recording_status', + field=models.CharField(choices=[('not_started', 'Not Started'), ('recording', 'Recording'), ('completed', 'Completed'), ('failed', 'Failed')], default='not_started', max_length=50), + ), + migrations.AddField( + model_name='meetingroom', + name='recording_uid', + field=models.IntegerField(blank=True, help_text='Agora Recording Bot UID', null=True), + ), + migrations.AddField( + model_name='meetingroom', + name='s3_recording_url', + field=models.URLField(blank=True, help_text='S3 URL for recording', max_length=500, null=True), + ), + migrations.AddField( + model_name='meetingroom', + name='s3_transcript_url', + field=models.URLField(blank=True, help_text='S3 URL for transcript', max_length=500, null=True), + ), + ] diff --git a/videocaller/agora/migrations/0004_chatmessage.py b/videocaller/agora/migrations/0004_chatmessage.py new file mode 100644 index 0000000000000000000000000000000000000000..a4a8d9f7cdf2b13915dd4a1a2bf171e7328cf94c --- /dev/null +++ b/videocaller/agora/migrations/0004_chatmessage.py @@ -0,0 +1,28 @@ +# Generated by Django 4.1.5 on 2026-02-03 06:12 + +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ('agora', '0003_remove_meetingroom_recording_file_and_more'), + ] + + operations = [ + migrations.CreateModel( + name='ChatMessage', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('content', models.TextField()), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='chat_messages', to=settings.AUTH_USER_MODEL)), + ], + options={ + 'ordering': ['-created_at'], + }, + ), + ] diff --git a/videocaller/agora/migrations/0005_meetingroom_transcript_id_and_more.py b/videocaller/agora/migrations/0005_meetingroom_transcript_id_and_more.py new file mode 100644 index 0000000000000000000000000000000000000000..dff426b24f60a5a1c300d463cc77e1f2a1649a8f --- /dev/null +++ b/videocaller/agora/migrations/0005_meetingroom_transcript_id_and_more.py @@ -0,0 +1,28 @@ +# Generated by Django 4.1.5 on 2026-02-03 12:40 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('agora', '0004_chatmessage'), + ] + + operations = [ + migrations.AddField( + model_name='meetingroom', + name='transcript_id', + field=models.CharField(blank=True, help_text='AssemblyAI Transcript ID', max_length=255, null=True), + ), + migrations.AddField( + model_name='meetingroom', + name='transcript_status', + field=models.CharField(choices=[('not_started', 'Not Started'), ('processing', 'Processing'), ('completed', 'Completed'), ('failed', 'Failed')], default='not_started', max_length=50), + ), + migrations.AddField( + model_name='meetingroom', + name='transcript_text', + field=models.TextField(blank=True, null=True), + ), + ] diff --git a/videocaller/agora/migrations/0006_meetingroom_chunks_created_at_and_more.py b/videocaller/agora/migrations/0006_meetingroom_chunks_created_at_and_more.py new file mode 100644 index 0000000000000000000000000000000000000000..b5999439b633c03fe315cfe734f71faeea3c290d --- /dev/null +++ b/videocaller/agora/migrations/0006_meetingroom_chunks_created_at_and_more.py @@ -0,0 +1,62 @@ +# Generated by Django 4.1.5 on 2026-02-03 16:58 + +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ('agora', '0005_meetingroom_transcript_id_and_more'), + ] + + operations = [ + migrations.AddField( + model_name='meetingroom', + name='chunks_created_at', + field=models.DateTimeField(blank=True, help_text='Timestamp when chunks were created', null=True), + ), + migrations.AddField( + model_name='meetingroom', + name='embedding_version', + field=models.IntegerField(default=1, help_text='Version of embeddings (for migration)'), + ), + migrations.AddField( + model_name='meetingroom', + name='embeddings_created_at', + field=models.DateTimeField(blank=True, help_text='Timestamp when embeddings were generated', null=True), + ), + migrations.CreateModel( + name='TranscriptChunk', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('chunk_text', models.TextField(help_text='Text chunk from transcript')), + ('chunk_index', models.IntegerField(help_text='Order of chunk in transcript')), + ('start_time', models.IntegerField(blank=True, help_text='Start time in seconds', null=True)), + ('end_time', models.IntegerField(blank=True, help_text='End time in seconds', null=True)), + ('embedding_vector_id', models.CharField(blank=True, help_text='Vector DB ID', max_length=255, null=True)), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('meeting', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='transcript_chunks', to='agora.meetingroom')), + ], + options={ + 'ordering': ['meeting', 'chunk_index'], + }, + ), + migrations.CreateModel( + name='ConversationHistory', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('user_question', models.TextField()), + ('assistant_response', models.TextField()), + ('relevant_chunks', models.JSONField(default=list, help_text='List of chunk IDs used for response')), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('meeting', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='conversation_history', to='agora.meetingroom')), + ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)), + ], + options={ + 'ordering': ['-created_at'], + }, + ), + ] diff --git a/videocaller/agora/migrations/0007_document_uploads.py b/videocaller/agora/migrations/0007_document_uploads.py new file mode 100644 index 0000000000000000000000000000000000000000..a0a3b76ee2373a03fc1881d05acedc6082bf15e4 --- /dev/null +++ b/videocaller/agora/migrations/0007_document_uploads.py @@ -0,0 +1,53 @@ +# Generated by Django 4.1.5 on 2026-02-08 00:00 + +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ('agora', '0006_meetingroom_chunks_created_at_and_more'), + ] + + operations = [ + migrations.CreateModel( + name='DocumentUpload', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('file_name', models.CharField(max_length=255)), + ('file_type', models.CharField(help_text='File extension or MIME hint', max_length=50)), + ('s3_url', models.URLField(blank=True, max_length=500, null=True)), + ('raw_text', models.TextField(blank=True, null=True)), + ('status', models.CharField(choices=[('uploaded', 'Uploaded'), ('processing', 'Processing'), ('completed', 'Completed'), ('failed', 'Failed')], default='uploaded', max_length=50)), + ('error_message', models.TextField(blank=True, null=True)), + ('processed_at', models.DateTimeField(blank=True, null=True)), + ('embeddings_created_at', models.DateTimeField(blank=True, null=True)), + ('embedding_version', models.IntegerField(default=1, help_text='Version of embeddings (for migration)')), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('meeting', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='document_uploads', to='agora.meetingroom')), + ('uploaded_by', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='document_uploads', to=settings.AUTH_USER_MODEL)), + ], + options={ + 'ordering': ['-created_at'], + }, + ), + migrations.CreateModel( + name='DocumentChunk', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('chunk_text', models.TextField(help_text='Text chunk from document')), + ('chunk_index', models.IntegerField(help_text='Order of chunk in document')), + ('block_type', models.CharField(default='text', help_text='text/table/image/other', max_length=50)), + ('metadata', models.JSONField(default=dict, help_text='Extractor metadata')), + ('embedding_vector_id', models.CharField(blank=True, help_text='Vector DB ID', max_length=255, null=True)), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('document', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='chunks', to='agora.documentupload')), + ], + options={ + 'ordering': ['document', 'chunk_index'], + }, + ), + ] diff --git a/videocaller/agora/migrations/0008_document_upload_storage_path.py b/videocaller/agora/migrations/0008_document_upload_storage_path.py new file mode 100644 index 0000000000000000000000000000000000000000..c2c302e3f9b80215b74b2c69cf597f664c11dc5e --- /dev/null +++ b/videocaller/agora/migrations/0008_document_upload_storage_path.py @@ -0,0 +1,19 @@ +# Generated by Django 4.1.5 on 2026-02-08 00:00 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('agora', '0007_document_uploads'), + ] + + operations = [ + migrations.AddField( + model_name='documentupload', + name='storage_path', + field=models.CharField(default='', help_text='Storage path for local processing', max_length=500), + preserve_default=False, + ), + ] diff --git a/videocaller/agora/migrations/0009_split_meeting_models.py b/videocaller/agora/migrations/0009_split_meeting_models.py new file mode 100644 index 0000000000000000000000000000000000000000..b607e3d78ce19bb157a2df72c44f907ef78c7892 --- /dev/null +++ b/videocaller/agora/migrations/0009_split_meeting_models.py @@ -0,0 +1,135 @@ +# Generated by Django 4.1.5 on 2026-02-10 00:00 + +from django.db import migrations, models +import django.db.models.deletion + + +def migrate_meeting_fields(apps, schema_editor): + MeetingRoom = apps.get_model('agora', 'MeetingRoom') + MeetingRecording = apps.get_model('agora', 'MeetingRecording') + MeetingTranscript = apps.get_model('agora', 'MeetingTranscript') + MeetingRagState = apps.get_model('agora', 'MeetingRagState') + + for room in MeetingRoom.objects.all(): + MeetingRecording.objects.create( + meeting=room, + recording_enabled=room.recording_enabled, + recording_sid=room.recording_sid, + recording_resource_id=room.recording_resource_id, + recording_uid=room.recording_uid, + recording_status=room.recording_status, + s3_recording_url=room.s3_recording_url, + recording_duration=room.recording_duration + ) + MeetingTranscript.objects.create( + meeting=room, + transcript_text=room.transcript_text, + transcript_status=room.transcript_status, + transcript_id=room.transcript_id, + s3_transcript_url=room.s3_transcript_url + ) + MeetingRagState.objects.create( + meeting=room, + chunks_created_at=room.chunks_created_at, + embeddings_created_at=room.embeddings_created_at, + embedding_version=room.embedding_version + ) + + +def reverse_migrate_meeting_fields(apps, schema_editor): + MeetingRoom = apps.get_model('agora', 'MeetingRoom') + MeetingRecording = apps.get_model('agora', 'MeetingRecording') + MeetingTranscript = apps.get_model('agora', 'MeetingTranscript') + MeetingRagState = apps.get_model('agora', 'MeetingRagState') + + for room in MeetingRoom.objects.all(): + try: + recording = MeetingRecording.objects.get(meeting=room) + room.recording_enabled = recording.recording_enabled + room.recording_sid = recording.recording_sid + room.recording_resource_id = recording.recording_resource_id + room.recording_uid = recording.recording_uid + room.recording_status = recording.recording_status + room.s3_recording_url = recording.s3_recording_url + room.recording_duration = recording.recording_duration + except MeetingRecording.DoesNotExist: + pass + + try: + transcript = MeetingTranscript.objects.get(meeting=room) + room.transcript_text = transcript.transcript_text + room.transcript_status = transcript.transcript_status + room.transcript_id = transcript.transcript_id + room.s3_transcript_url = transcript.s3_transcript_url + except MeetingTranscript.DoesNotExist: + pass + + try: + rag_state = MeetingRagState.objects.get(meeting=room) + room.chunks_created_at = rag_state.chunks_created_at + room.embeddings_created_at = rag_state.embeddings_created_at + room.embedding_version = rag_state.embedding_version + except MeetingRagState.DoesNotExist: + pass + + room.save() + + +class Migration(migrations.Migration): + + dependencies = [ + ('agora', '0008_document_upload_storage_path'), + ] + + operations = [ + migrations.CreateModel( + name='MeetingRecording', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('recording_enabled', models.BooleanField(default=False)), + ('recording_sid', models.CharField(blank=True, help_text='Agora Recording SID', max_length=255, null=True)), + ('recording_resource_id', models.CharField(blank=True, help_text='Agora Resource ID', max_length=255, null=True)), + ('recording_uid', models.IntegerField(blank=True, help_text='Agora Recording Bot UID', null=True)), + ('recording_status', models.CharField(choices=[('not_started', 'Not Started'), ('recording', 'Recording'), ('completed', 'Completed'), ('failed', 'Failed')], default='not_started', max_length=50)), + ('s3_recording_url', models.URLField(blank=True, help_text='S3 URL for recording', max_length=500, null=True)), + ('recording_duration', models.IntegerField(default=0, help_text='Duration in seconds')), + ('meeting', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name='recording', to='agora.meetingroom')), + ], + ), + migrations.CreateModel( + name='MeetingTranscript', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('transcript_text', models.TextField(blank=True, null=True)), + ('transcript_status', models.CharField(choices=[('not_started', 'Not Started'), ('processing', 'Processing'), ('completed', 'Completed'), ('failed', 'Failed')], default='not_started', max_length=50)), + ('transcript_id', models.CharField(blank=True, help_text='AssemblyAI Transcript ID', max_length=255, null=True)), + ('s3_transcript_url', models.URLField(blank=True, help_text='S3 URL for transcript', max_length=500, null=True)), + ('meeting', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name='transcript', to='agora.meetingroom')), + ], + ), + migrations.CreateModel( + name='MeetingRagState', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('chunks_created_at', models.DateTimeField(blank=True, help_text='Timestamp when chunks were created', null=True)), + ('embeddings_created_at', models.DateTimeField(blank=True, help_text='Timestamp when embeddings were generated', null=True)), + ('embedding_version', models.IntegerField(default=1, help_text='Version of embeddings (for migration)')), + ('meeting', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name='rag_state', to='agora.meetingroom')), + ], + ), + migrations.RunPython(migrate_meeting_fields, reverse_migrate_meeting_fields), + migrations.RemoveField(model_name='meetingroom', name='recording_enabled'), + migrations.RemoveField(model_name='meetingroom', name='recording_sid'), + migrations.RemoveField(model_name='meetingroom', name='recording_resource_id'), + migrations.RemoveField(model_name='meetingroom', name='recording_uid'), + migrations.RemoveField(model_name='meetingroom', name='recording_status'), + migrations.RemoveField(model_name='meetingroom', name='s3_recording_url'), + migrations.RemoveField(model_name='meetingroom', name='s3_transcript_url'), + migrations.RemoveField(model_name='meetingroom', name='recording_duration'), + migrations.RemoveField(model_name='meetingroom', name='transcript_text'), + migrations.RemoveField(model_name='meetingroom', name='transcript_status'), + migrations.RemoveField(model_name='meetingroom', name='transcript_id'), + migrations.RemoveField(model_name='meetingroom', name='chunks_created_at'), + migrations.RemoveField(model_name='meetingroom', name='embeddings_created_at'), + migrations.RemoveField(model_name='meetingroom', name='embedding_version'), + ] diff --git a/videocaller/agora/migrations/0010_documentupload_chunk_count.py b/videocaller/agora/migrations/0010_documentupload_chunk_count.py new file mode 100644 index 0000000000000000000000000000000000000000..7c812431c1865fab90fd564bc942198aa477a1a8 --- /dev/null +++ b/videocaller/agora/migrations/0010_documentupload_chunk_count.py @@ -0,0 +1,18 @@ +# Generated by Django 4.1.5 on 2026-02-10 00:00 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('agora', '0009_split_meeting_models'), + ] + + operations = [ + migrations.AddField( + model_name='documentupload', + name='chunk_count', + field=models.IntegerField(default=0, help_text='Number of chunks created'), + ), + ] diff --git a/videocaller/agora/migrations/0011_meeting_agenda_point.py b/videocaller/agora/migrations/0011_meeting_agenda_point.py new file mode 100644 index 0000000000000000000000000000000000000000..998fead59eb1f862faac4d6038d1f1ffe9d80865 --- /dev/null +++ b/videocaller/agora/migrations/0011_meeting_agenda_point.py @@ -0,0 +1,26 @@ +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + dependencies = [ + ('agora', '0010_documentupload_chunk_count'), + ] + + operations = [ + migrations.CreateModel( + name='MeetingAgendaPoint', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('text', models.TextField()), + ('order', models.IntegerField(default=0)), + ('is_ai_generated', models.BooleanField(default=True)), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('created_by', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='auth.user')), + ('meeting', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='agenda_points', to='agora.meetingroom')), + ], + options={ + 'ordering': ['order', 'created_at'], + }, + ), + ] diff --git a/videocaller/agora/migrations/0012_alter_meetingroom_room_code.py b/videocaller/agora/migrations/0012_alter_meetingroom_room_code.py new file mode 100644 index 0000000000000000000000000000000000000000..4c26c72a4d74703a1cdc5718d95f68dbd1386992 --- /dev/null +++ b/videocaller/agora/migrations/0012_alter_meetingroom_room_code.py @@ -0,0 +1,15 @@ +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ('agora', '0011_meeting_agenda_point'), + ] + + operations = [ + migrations.AlterField( + model_name='meetingroom', + name='room_code', + field=models.CharField(max_length=15, unique=True), + ), + ] diff --git a/videocaller/agora/migrations/__init__.py b/videocaller/agora/migrations/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/videocaller/agora/models.py b/videocaller/agora/models.py new file mode 100644 index 0000000000000000000000000000000000000000..939fe7defb9b9e3ab58e97614c9fdfbba4e265ff --- /dev/null +++ b/videocaller/agora/models.py @@ -0,0 +1,191 @@ +from django.db import models +from django.contrib.auth.models import User +import uuid + +class MeetingRoom(models.Model): + room_id = models.CharField(max_length=100, unique=True, default=uuid.uuid4) + room_code = models.CharField(max_length=15, unique=True) # Shareable code like "abc-def-ghi" + host = models.ForeignKey(User, on_delete=models.CASCADE, related_name='hosted_meetings') + title = models.CharField(max_length=255, blank=True) + description = models.TextField(blank=True) + created_at = models.DateTimeField(auto_now_add=True) + is_active = models.BooleanField(default=True) + max_participants = models.IntegerField(default=10) + + def __str__(self): + return f"{self.title} - {self.room_code}" + + class Meta: + ordering = ['-created_at'] + + def get_recording(self): + try: + return self.recording + except MeetingRecording.DoesNotExist: + return MeetingRecording.objects.create(meeting=self) + + def get_transcript(self): + try: + return self.transcript + except MeetingTranscript.DoesNotExist: + return MeetingTranscript.objects.create(meeting=self) + + def get_rag_state(self): + try: + return self.rag_state + except MeetingRagState.DoesNotExist: + return MeetingRagState.objects.create(meeting=self) + + +class MeetingRecording(models.Model): + meeting = models.OneToOneField(MeetingRoom, on_delete=models.CASCADE, related_name='recording') + recording_enabled = models.BooleanField(default=False) + recording_sid = models.CharField(max_length=255, null=True, blank=True, help_text="Agora Recording SID") + recording_resource_id = models.CharField(max_length=255, null=True, blank=True, help_text="Agora Resource ID") + recording_uid = models.IntegerField(null=True, blank=True, help_text="Agora Recording Bot UID") + recording_status = models.CharField( + max_length=50, + default='not_started', + choices=[('not_started', 'Not Started'), ('recording', 'Recording'), ('completed', 'Completed'), ('failed', 'Failed')] + ) + s3_recording_url = models.URLField(max_length=500, null=True, blank=True, help_text="S3 URL for recording") + recording_duration = models.IntegerField(default=0, help_text="Duration in seconds") + + def __str__(self): + return f"Recording - {self.meeting.title}" + + +class MeetingTranscript(models.Model): + meeting = models.OneToOneField(MeetingRoom, on_delete=models.CASCADE, related_name='transcript') + transcript_text = models.TextField(null=True, blank=True) + transcript_status = models.CharField( + max_length=50, + default='not_started', + choices=[('not_started', 'Not Started'), ('processing', 'Processing'), ('completed', 'Completed'), ('failed', 'Failed')] + ) + transcript_id = models.CharField(max_length=255, null=True, blank=True, help_text="AssemblyAI Transcript ID") + s3_transcript_url = models.URLField(max_length=500, null=True, blank=True, help_text="S3 URL for transcript") + + def __str__(self): + return f"Transcript - {self.meeting.title}" + + +class MeetingRagState(models.Model): + meeting = models.OneToOneField(MeetingRoom, on_delete=models.CASCADE, related_name='rag_state') + chunks_created_at = models.DateTimeField(null=True, blank=True, help_text="Timestamp when chunks were created") + embeddings_created_at = models.DateTimeField(null=True, blank=True, help_text="Timestamp when embeddings were generated") + embedding_version = models.IntegerField(default=1, help_text="Version of embeddings (for migration)") + + def __str__(self): + return f"RAG State - {self.meeting.title}" + + +class ChatMessage(models.Model): + user = models.ForeignKey(User, on_delete=models.CASCADE, related_name='chat_messages') + content = models.TextField() + created_at = models.DateTimeField(auto_now_add=True) + + def __str__(self): + return f"{self.user.username}: {self.content[:30]}" + + class Meta: + ordering = ['-created_at'] + + +class TranscriptChunk(models.Model): + """Store transcript chunks with their embeddings for RAG""" + meeting = models.ForeignKey(MeetingRoom, on_delete=models.CASCADE, related_name='transcript_chunks') + chunk_text = models.TextField(help_text="Text chunk from transcript") + chunk_index = models.IntegerField(help_text="Order of chunk in transcript") + start_time = models.IntegerField(null=True, blank=True, help_text="Start time in seconds") + end_time = models.IntegerField(null=True, blank=True, help_text="End time in seconds") + embedding_vector_id = models.CharField(max_length=255, null=True, blank=True, help_text="Vector DB ID") + created_at = models.DateTimeField(auto_now_add=True) + + def __str__(self): + return f"Chunk {self.chunk_index} - {self.meeting.title}" + + class Meta: + ordering = ['meeting', 'chunk_index'] + + +class DocumentUpload(models.Model): + """Store uploaded external documents/audio linked to a meeting""" + meeting = models.ForeignKey(MeetingRoom, on_delete=models.CASCADE, related_name='document_uploads') + uploaded_by = models.ForeignKey(User, on_delete=models.CASCADE, related_name='document_uploads') + file_name = models.CharField(max_length=255) + file_type = models.CharField(max_length=50, help_text="File extension or MIME hint") + storage_path = models.CharField(max_length=500, help_text="Storage path for local processing") + s3_url = models.URLField(max_length=500, null=True, blank=True) + raw_text = models.TextField(null=True, blank=True) + status = models.CharField( + max_length=50, + default='uploaded', + choices=[ + ('uploaded', 'Uploaded'), + ('processing', 'Processing'), + ('completed', 'Completed'), + ('failed', 'Failed') + ] + ) + error_message = models.TextField(null=True, blank=True) + processed_at = models.DateTimeField(null=True, blank=True) + embeddings_created_at = models.DateTimeField(null=True, blank=True) + embedding_version = models.IntegerField(default=1, help_text="Version of embeddings (for migration)") + chunk_count = models.IntegerField(default=0, help_text="Number of chunks created") + created_at = models.DateTimeField(auto_now_add=True) + + def __str__(self): + return f"{self.file_name} - {self.meeting.title}" + + class Meta: + ordering = ['-created_at'] + + +class DocumentChunk(models.Model): + """Store document chunks with embeddings for RAG""" + document = models.ForeignKey(DocumentUpload, on_delete=models.CASCADE, related_name='chunks') + chunk_text = models.TextField(help_text="Text chunk from document") + chunk_index = models.IntegerField(help_text="Order of chunk in document") + block_type = models.CharField(max_length=50, default='text', help_text="text/table/image/other") + metadata = models.JSONField(default=dict, help_text="Extractor metadata") + embedding_vector_id = models.CharField(max_length=255, null=True, blank=True, help_text="Vector DB ID") + created_at = models.DateTimeField(auto_now_add=True) + + def __str__(self): + return f"DocChunk {self.chunk_index} - {self.document.file_name}" + + class Meta: + ordering = ['document', 'chunk_index'] + + +class MeetingAgendaPoint(models.Model): + """Shared agenda points for a meeting.""" + meeting = models.ForeignKey(MeetingRoom, on_delete=models.CASCADE, related_name='agenda_points') + text = models.TextField() + order = models.IntegerField(default=0) + created_by = models.ForeignKey(User, on_delete=models.SET_NULL, null=True, blank=True) + is_ai_generated = models.BooleanField(default=True) + created_at = models.DateTimeField(auto_now_add=True) + + def __str__(self): + return f"Agenda {self.order} - {self.meeting.title}" + + class Meta: + ordering = ['order', 'created_at'] + + +class ConversationHistory(models.Model): + """Store Q&A history for context-aware responses""" + meeting = models.ForeignKey(MeetingRoom, on_delete=models.CASCADE, related_name='conversation_history') + user = models.ForeignKey(User, on_delete=models.CASCADE) + user_question = models.TextField() + assistant_response = models.TextField() + relevant_chunks = models.JSONField(default=list, help_text="List of chunk IDs used for response") + created_at = models.DateTimeField(auto_now_add=True) + + def __str__(self): + return f"{self.meeting.title} - {self.user.username}" + + class Meta: + ordering = ['-created_at'] diff --git a/videocaller/agora/rag_utils.py b/videocaller/agora/rag_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..ae087415dff7a8ab34161aba35d3ab10fd21236e --- /dev/null +++ b/videocaller/agora/rag_utils.py @@ -0,0 +1,440 @@ +"""RAG (Retrieval-Augmented Generation) service for intelligent query responses""" +import json +import logging +import queue +import threading +from typing import Iterable, List, Dict, Tuple +import requests +from django.conf import settings +from asgiref.sync import sync_to_async +from .embedding_utils import search_similar_chunks +from .models import ConversationHistory + +logger = logging.getLogger(__name__) + +GOOGLE_API_KEY = getattr(settings, 'GOOGLE_API_KEY', '') +GOOGLE_GENERATE_MODEL = getattr(settings, 'GOOGLE_GENERATE_MODEL', 'gemini-2.5-flash-lite') +GOOGLE_API_BASE = "https://generativelanguage.googleapis.com/v1beta/models/" +GOOGLE_CONNECT_TIMEOUT = getattr(settings, 'GOOGLE_CONNECT_TIMEOUT', 10) +GOOGLE_READ_TIMEOUT = getattr(settings, 'GOOGLE_READ_TIMEOUT', 600) +MAX_TOKENS = getattr(settings, 'GOOGLE_MAX_TOKENS', 1000) +MAX_CONVERSATION_TURNS = 5 # Limit context window + + +def _build_google_prompt(system_prompt: str, conversation_context: List[Dict], query: str) -> str: + parts: List[str] = ["SYSTEM:", system_prompt.strip()] + if conversation_context: + parts.append("\nCONVERSATION:") + for item in conversation_context: + role = "ASSISTANT" if item["role"] == "assistant" else "USER" + parts.append(f"{role}: {item['content'].strip()}") + parts.append("\nUSER QUESTION:") + parts.append(query.strip()) + parts.append("\nASSISTANT:") + return "\n".join(parts) + + +def _google_generate(prompt: str) -> str: + if not GOOGLE_API_KEY: + raise ValueError("GOOGLE_API_KEY is not configured") + + url = f"{GOOGLE_API_BASE}{GOOGLE_GENERATE_MODEL}:generateContent?key={GOOGLE_API_KEY}" + payload = { + "contents": [ + { + "role": "user", + "parts": [{"text": prompt}] + } + ], + "generationConfig": { + "maxOutputTokens": MAX_TOKENS, + "temperature": 0.7 + } + } + try: + response = requests.post( + url, + json=payload, + timeout=(GOOGLE_CONNECT_TIMEOUT, GOOGLE_READ_TIMEOUT) + ) + response.raise_for_status() + data = response.json() + except requests.exceptions.ReadTimeout as e: + logger.error("Google generate timed out: %s", str(e)) + raise + except requests.exceptions.RequestException as e: + status = getattr(e.response, "status_code", None) + body = getattr(e.response, "text", "") + logger.error("Google generate request failed (status=%s): %s", status, body[:1000]) + raise + except ValueError as e: + logger.error("Google generate invalid JSON response: %s", str(e)) + raise + + text_parts: List[str] = [] + for candidate in data.get("candidates", []): + for part in candidate.get("content", {}).get("parts", []): + part_text = part.get("text") + if part_text: + text_parts.append(part_text) + return "".join(text_parts).strip() + + +def _google_generate_stream(prompt: str) -> Iterable[str]: + if not GOOGLE_API_KEY: + raise ValueError("GOOGLE_API_KEY is not configured") + + url = f"{GOOGLE_API_BASE}{GOOGLE_GENERATE_MODEL}:streamGenerateContent?key={GOOGLE_API_KEY}" + payload = { + "contents": [ + { + "role": "user", + "parts": [{"text": prompt}] + } + ], + "generationConfig": { + "maxOutputTokens": MAX_TOKENS, + "temperature": 0.7 + } + } + try: + with requests.post( + url, + json=payload, + stream=True, + timeout=(GOOGLE_CONNECT_TIMEOUT, GOOGLE_READ_TIMEOUT) + ) as response: + response.raise_for_status() + buffered_lines: List[str] = [] + for line in response.iter_lines(decode_unicode=True): + if not line: + continue + try: + data = json.loads(line) + for candidate in data.get("candidates", []): + for part in candidate.get("content", {}).get("parts", []): + part_text = part.get("text") + if part_text: + yield part_text + continue + except json.JSONDecodeError: + buffered_lines.append(line) + + if buffered_lines: + buffered_payload = "\n".join(buffered_lines).strip() + try: + data = json.loads(buffered_payload) + if isinstance(data, list): + items = data + else: + items = [data] + for item in items: + for candidate in item.get("candidates", []): + for part in candidate.get("content", {}).get("parts", []): + part_text = part.get("text") + if part_text: + yield part_text + except json.JSONDecodeError: + logger.warning("Google stream buffered payload was not JSON") + except requests.exceptions.ReadTimeout as e: + logger.error("Google stream timed out: %s", str(e)) + yield "\n[Model timed out. Try again.]" + except requests.exceptions.RequestException as e: + status = getattr(e.response, "status_code", None) + body = getattr(e.response, "text", "") + logger.error("Google stream request failed (status=%s): %s", status, body[:1000]) + yield "\n[Model error. Please try again.]" + + +def get_conversation_context(meeting_id: int | None, user_id: int, limit: int = MAX_CONVERSATION_TURNS) -> List[Dict]: + """ + Retrieve recent conversation history for a user in a meeting + + Args: + meeting_id: ID of the meeting + user_id: ID of the user + limit: Number of recent turns to retrieve + + Returns: + List of dicts with questions and responses + """ + try: + history_query = ConversationHistory.objects.filter(user_id=user_id) + if meeting_id is not None: + history_query = history_query.filter(meeting_id=meeting_id) + + history = history_query.order_by('-created_at')[:limit] + + # Reverse to get chronological order + context = [] + for turn in reversed(history): + context.append({ + "role": "user", + "content": turn.user_question + }) + context.append({ + "role": "assistant", + "content": turn.assistant_response + }) + + return context + except Exception as e: + logger.error(f"Error retrieving conversation context: {str(e)}") + return [] + + +def _save_conversation_turn( + meeting_id: int | None, + user_id: int, + query: str, + assistant_response: str, + relevant_chunks: List[Dict] +) -> None: + if meeting_id is None: + return + + try: + from .models import MeetingRoom + meeting = MeetingRoom.objects.get(id=meeting_id) + ConversationHistory.objects.create( + meeting=meeting, + user_id=user_id, + user_question=query, + assistant_response=assistant_response, + relevant_chunks=[chunk['chunk_index'] for chunk in relevant_chunks] + ) + except Exception as e: + logger.error(f"Error saving conversation history: {str(e)}") + + +def generate_rag_response( + meeting_id: int | None, + user_id: int, + query: str, + top_k: int = 5 +) -> Tuple[str, List[Dict]]: + """ + Generate response using RAG: retrieve relevant chunks + conversation history + LLM + + Args: + meeting_id: ID of the meeting to query + user_id: ID of the user asking + query: User's question + top_k: Number of similar chunks to retrieve + + Returns: + Tuple of (response_text, relevant_chunks) + """ + try: + # Step 1: Retrieve similar chunks from vector DB + relevant_chunks = search_similar_chunks(query, meeting_id, top_k) + + if not relevant_chunks: + logger.warning(f"No relevant chunks found for meeting {meeting_id}, query: {query}") + return "Sorry, I couldn't find relevant information in the available documents or transcripts.", [] + + # Step 2: Get conversation history for context + conversation_context = get_conversation_context(meeting_id, user_id) + + # Step 3: Build system prompt + chunks_text = "\n\n".join([ + f"[Source: {chunk.get('source_type', 'meeting_transcript')}, " + f"Chunk {chunk['chunk_index']}, " + f"Doc: {chunk.get('document_name', 'N/A')}] {chunk['text']}" + for chunk in relevant_chunks + ]) + + system_prompt = f"""You are a helpful assistant answering questions from meeting transcripts and uploaded documents. + +You have access to relevant parts of a transcript provided below. Use this context to answer user questions accurately and concisely. +If the information is not in the provided context, say you don't have that information from the transcript. + +RELEVANT TRANSCRIPT SECTIONS: +{chunks_text} + +Answer the user's question based ONLY on the provided transcript sections. Be specific and cite which part of the transcript you're referring to when possible.""" + + # Step 4: Build prompt for Google + prompt = _build_google_prompt(system_prompt, conversation_context, query) + + # Step 5: Call Google + assistant_response = _google_generate(prompt) + + # Step 6: Save conversation turn (for next context) + _save_conversation_turn(meeting_id, user_id, query, assistant_response, relevant_chunks) + + return assistant_response, relevant_chunks + + except Exception as e: + logger.error(f"Error generating RAG response: {str(e)}") + raise + + +def stream_rag_response( + meeting_id: int | None, + user_id: int, + query: str, + top_k: int = 5 +) -> Tuple[Iterable[str], List[Dict]]: + try: + relevant_chunks = search_similar_chunks(query, meeting_id, top_k) + + if not relevant_chunks: + return iter(["Sorry, I couldn't find relevant information in the available documents or transcripts."]), [] + + conversation_context = get_conversation_context(meeting_id, user_id) + chunks_text = "\n\n".join([ + f"[Source: {chunk.get('source_type', 'meeting_transcript')}, " + f"Chunk {chunk['chunk_index']}, " + f"Doc: {chunk.get('document_name', 'N/A')}] {chunk['text']}" + for chunk in relevant_chunks + ]) + system_prompt = f"""You are a helpful assistant answering questions from meeting transcripts and uploaded documents. + +You have access to relevant parts of a transcript provided below. Use this context to answer user questions accurately and concisely. +If the information is not in the provided context, say you don't have that information from the transcript. + +RELEVANT TRANSCRIPT SECTIONS: +{chunks_text} + +Answer the user's question based ONLY on the provided transcript sections. Be specific and cite which part of the transcript you're referring to when possible.""" + + prompt = _build_google_prompt(system_prompt, conversation_context, query) + + def generator() -> Iterable[str]: + yield "Thinking...\n" + parts: List[str] = [] + for token in _google_generate_stream(prompt): + parts.append(token) + yield token + assistant_response = "".join(parts) + _save_conversation_turn(meeting_id, user_id, query, assistant_response, relevant_chunks) + + return generator(), relevant_chunks + except Exception as e: + logger.error(f"Error streaming RAG response: {str(e)}") + raise + + +async def stream_rag_response_async( + meeting_id: int | None, + user_id: int, + query: str, + top_k: int = 5 +) -> Tuple[Iterable[str], List[Dict]]: + try: + relevant_chunks = await sync_to_async(search_similar_chunks)(query, meeting_id, top_k) + + if not relevant_chunks: + return iter(["Sorry, I couldn't find relevant information in the available documents or transcripts."]), [] + + conversation_context = await sync_to_async(get_conversation_context)(meeting_id, user_id) + chunks_text = "\n\n".join([ + f"[Source: {chunk.get('source_type', 'meeting_transcript')}, " + f"Chunk {chunk['chunk_index']}, " + f"Doc: {chunk.get('document_name', 'N/A')}] {chunk['text']}" + for chunk in relevant_chunks + ]) + system_prompt = f"""You are a helpful assistant answering questions from meeting transcripts and uploaded documents. + +You have access to relevant parts of a transcript provided below. Use this context to answer user questions accurately and concisely. +If the information is not in the provided context, say you don't have that information from the transcript. + +RELEVANT TRANSCRIPT SECTIONS: +{chunks_text} + +Answer the user's question based ONLY on the provided transcript sections. Be specific and cite which part of the transcript you're referring to when possible.""" + + prompt = _build_google_prompt(system_prompt, conversation_context, query) + token_queue: queue.Queue = queue.Queue() + stop_marker = object() + + def worker(): + try: + for token in _google_generate_stream(prompt): + token_queue.put(token) + except Exception: + token_queue.put("\n[Model error. Please try again.]") + finally: + token_queue.put(stop_marker) + + thread = threading.Thread(target=worker, daemon=True) + thread.start() + + def generator() -> Iterable[str]: + parts: List[str] = [] + yield "Thinking...\n" + while True: + item = token_queue.get() + if item is stop_marker: + break + parts.append(item) + yield item + assistant_response = "".join(parts) + _save_conversation_turn(meeting_id, user_id, query, assistant_response, relevant_chunks) + + return generator(), relevant_chunks + except Exception as e: + logger.error(f"Error streaming RAG response (async): {str(e)}") + raise + + +def process_transcript_for_rag(meeting_id: int) -> Dict: + """ + Process a completed transcript: chunk it and generate embeddings + + Args: + meeting_id: ID of the meeting with completed transcript + + Returns: + Dict with processing status and chunk count + """ + try: + from .models import MeetingRoom, TranscriptChunk + from .embedding_utils import chunk_transcript, store_chunks_in_vector_db + from django.utils import timezone + + meeting = MeetingRoom.objects.get(id=meeting_id) + transcript = meeting.get_transcript() + rag_state = meeting.get_rag_state() + + if not transcript.transcript_text: + logger.error(f"No transcript text found for meeting {meeting_id}") + return {"success": False, "error": "No transcript text"} + + # Check if already processed + if rag_state.chunks_created_at and rag_state.embeddings_created_at: + logger.info(f"Meeting {meeting_id} already processed for RAG") + return {"success": True, "message": "Already processed", "chunk_count": 0} + + # Step 1: Chunk the transcript + chunks = chunk_transcript(transcript.transcript_text) + logger.info(f"Created {len(chunks)} chunks for meeting {meeting_id}") + + # Step 2: Create TranscriptChunk objects in DB + chunk_objects = [] + for idx, chunk_text in enumerate(chunks): + chunk_obj = TranscriptChunk.objects.create( + meeting=meeting, + chunk_text=chunk_text, + chunk_index=idx + ) + chunk_objects.append(chunk_obj) + + # Step 3: Store chunks and embeddings in Qdrant + store_chunks_in_vector_db(meeting_id, chunks, chunk_objects) + + # Step 4: Update meeting status + rag_state.chunks_created_at = timezone.now() + rag_state.embeddings_created_at = timezone.now() + rag_state.save() + + return { + "success": True, + "chunk_count": len(chunks), + "message": f"Successfully processed {len(chunks)} chunks" + } + + except Exception as e: + logger.error(f"Error processing transcript for RAG: {str(e)}") + return {"success": False, "error": str(e)} diff --git a/videocaller/agora/recording_utils.py b/videocaller/agora/recording_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..d9c5fa06963689da38b30b23837bb2c840943c6d --- /dev/null +++ b/videocaller/agora/recording_utils.py @@ -0,0 +1,285 @@ +""" +Agora Cloud Recording utilities for managing recording lifecycle and S3 uploads +""" +import os +import time +import base64 +import requests +import boto3 +from django.conf import settings +from botocore.exceptions import ClientError + + +class AgoraCloudRecording: + """Handles Agora Cloud Recording API operations""" + + def __init__(self): + self.app_id = settings.AGORA_APP_ID + self.customer_id = settings.AGORA_CUSTOMER_ID + self.customer_secret = settings.AGORA_CUSTOMER_SECRET + self.region = settings.AGORA_RECORDING_REGION + + # Base URL for Agora Cloud Recording API + region_map = { + 'NA': 'https://api.agora.io/v1/apps', + 'EU': 'https://api-eu.agora.io/v1/apps', + 'AP': 'https://api-ap.agora.io/v1/apps', + 'CN': 'https://api-cn.agora.io/v1/apps' + } + self.base_url = region_map.get(self.region, region_map['NA']) + + def _get_auth_header(self): + """Generate Basic Auth header for Agora API""" + credentials = f"{self.customer_id}:{self.customer_secret}" + encoded = base64.b64encode(credentials.encode()).decode() + return {'Authorization': f'Basic {encoded}', 'Content-Type': 'application/json'} + + def acquire_resource(self, channel_name, uid): + """ + Acquire a resource ID for cloud recording + + Args: + channel_name: The channel to record + uid: The UID for the recording bot (should be unique and not used by any user) + + Returns: + dict: {'resourceId': 'xxx', 'success': True/False} + """ + url = f"{self.base_url}/{self.app_id}/cloud_recording/acquire" + + payload = { + "cname": channel_name, + "uid": str(uid), + "clientRequest": { + "resourceExpiredHour": 24, + "scene": 0 # 0 for real-time recording + } + } + + try: + response = requests.post(url, json=payload, headers=self._get_auth_header()) + response.raise_for_status() + data = response.json() + return { + 'resourceId': data.get('resourceId'), + 'success': True + } + except requests.exceptions.RequestException as e: + return { + 'success': False, + 'error': str(e) + } + + def start_recording(self, channel_name, uid, resource_id, token, bucket_name, bucket_access_key, + bucket_secret_key, bucket_region): + """ + Start cloud recording + + Args: + channel_name: Channel to record + uid: Recording bot UID + resource_id: Resource ID from acquire + token: Agora RTC token for the recording bot + bucket_name: S3 bucket name + bucket_access_key: AWS access key + bucket_secret_key: AWS secret key + bucket_region: AWS region (0=us-east-1, 1=us-east-2, etc.) + + Returns: + dict: {'sid': 'xxx', 'resourceId': 'xxx', 'success': True/False} + """ + url = f"{self.base_url}/{self.app_id}/cloud_recording/resourceid/{resource_id}/mode/mix/start" + + # Region mapping for S3 + region_code_map = { + 'us-east-1': 0, 'us-east-2': 1, 'us-west-1': 2, 'us-west-2': 3, + 'eu-west-1': 4, 'eu-west-2': 5, 'eu-west-3': 6, 'eu-central-1': 7, + 'ap-southeast-1': 8, 'ap-southeast-2': 9, 'ap-northeast-1': 10, + 'ap-northeast-2': 11, 'sa-east-1': 12, 'ca-central-1': 13, + 'ap-south-1': 14, 'cn-north-1': 15, 'cn-northwest-1': 16 + } + + payload = { + "cname": channel_name, + "uid": str(uid), + "clientRequest": { + "token": token, + "recordingConfig": { + "maxIdleTime": 30, + "streamTypes": 2, # 0=audio, 1=video, 2=both + "channelType": 0, # 0=communication, 1=live broadcast + "videoStreamType": 0, # 0=high stream, 1=low stream + "subscribeUidGroup": 0 # Record all users + }, + "recordingFileConfig": { + "avFileType": ["hls", "mp4"] # HLS for live, MP4 for download + }, + "storageConfig": { + "vendor": 1, # 1=AWS S3, 2=Alibaba Cloud, 3=Tencent Cloud + "region": region_code_map.get(bucket_region, 0), + "bucket": bucket_name, + "accessKey": bucket_access_key, + "secretKey": bucket_secret_key, + "fileNamePrefix": [f"recordings/{channel_name}"] + } + } + } + + try: + response = requests.post(url, json=payload, headers=self._get_auth_header()) + response.raise_for_status() + data = response.json() + return { + 'sid': data.get('sid'), + 'resourceId': data.get('resourceId'), + 'success': True + } + except requests.exceptions.RequestException as e: + return { + 'success': False, + 'error': str(e), + 'response': response.text if 'response' in locals() else None + } + + def stop_recording(self, channel_name, uid, resource_id, sid): + """ + Stop cloud recording + + Args: + channel_name: Channel being recorded + uid: Recording bot UID + resource_id: Resource ID from acquire + sid: Session ID from start + + Returns: + dict: {'serverResponse': {...}, 'success': True/False} + """ + url = f"{self.base_url}/{self.app_id}/cloud_recording/resourceid/{resource_id}/sid/{sid}/mode/mix/stop" + + payload = { + "cname": channel_name, + "uid": str(uid), + "clientRequest": {} + } + + try: + response = requests.post(url, json=payload, headers=self._get_auth_header()) + response.raise_for_status() + data = response.json() + return { + 'serverResponse': data.get('serverResponse', {}), + 'success': True + } + except requests.exceptions.RequestException as e: + return { + 'success': False, + 'error': str(e) + } + + def query_recording(self, resource_id, sid): + """ + Query recording status + + Args: + resource_id: Resource ID from acquire + sid: Session ID from start + + Returns: + dict: {'serverResponse': {...}, 'success': True/False} + """ + url = f"{self.base_url}/{self.app_id}/cloud_recording/resourceid/{resource_id}/sid/{sid}/mode/mix/query" + + try: + response = requests.get(url, headers=self._get_auth_header()) + response.raise_for_status() + data = response.json() + return { + 'serverResponse': data.get('serverResponse', {}), + 'success': True + } + except requests.exceptions.RequestException as e: + return { + 'success': False, + 'error': str(e) + } + + +class S3Manager: + """Handles AWS S3 operations for recordings""" + + def __init__(self): + self.s3_client = boto3.client( + 's3', + aws_access_key_id=settings.AWS_ACCESS_KEY_ID, + aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY, + region_name=settings.AWS_S3_REGION_NAME + ) + self.bucket_name = settings.AWS_STORAGE_BUCKET_NAME + + def generate_presigned_url(self, s3_key, expiration=3600): + """ + Generate a presigned URL for accessing S3 object + + Args: + s3_key: S3 object key + expiration: URL expiration time in seconds (default 1 hour) + + Returns: + str: Presigned URL or None if error + """ + try: + url = self.s3_client.generate_presigned_url( + 'get_object', + Params={'Bucket': self.bucket_name, 'Key': s3_key}, + ExpiresIn=expiration + ) + return url + except ClientError as e: + print(f"Error generating presigned URL: {e}") + return None + + def upload_file(self, file_path, s3_key): + """ + Upload a file to S3 + + Args: + file_path: Local file path + s3_key: S3 object key + + Returns: + bool: True if successful, False otherwise + """ + try: + self.s3_client.upload_file(file_path, self.bucket_name, s3_key) + return True + except ClientError as e: + print(f"Error uploading to S3: {e}") + return False + + def get_s3_url(self, s3_key): + """ + Get public S3 URL (for public buckets) or object location + + Args: + s3_key: S3 object key + + Returns: + str: S3 URL + """ + return f"https://{self.bucket_name}.s3.{settings.AWS_S3_REGION_NAME}.amazonaws.com/{s3_key}" + + def check_file_exists(self, s3_key): + """ + Check if a file exists in S3 + + Args: + s3_key: S3 object key + + Returns: + bool: True if exists, False otherwise + """ + try: + self.s3_client.head_object(Bucket=self.bucket_name, Key=s3_key) + return True + except ClientError: + return False diff --git a/videocaller/agora/static/agora/agora-logo.png b/videocaller/agora/static/agora/agora-logo.png new file mode 100644 index 0000000000000000000000000000000000000000..4d346dbb0f29f4f370adf538224c6aba45c0dde8 Binary files /dev/null and b/videocaller/agora/static/agora/agora-logo.png differ diff --git a/videocaller/agora/static/agora/index.css b/videocaller/agora/static/agora/index.css new file mode 100644 index 0000000000000000000000000000000000000000..aa5b84a09cdcbe03151bc33db5975dc1bee85339 --- /dev/null +++ b/videocaller/agora/static/agora/index.css @@ -0,0 +1,52 @@ +main { + margin-top: 50px; +} + +#video-container { + width: 700px; + height: 500px; + max-width: 90vw; + max-height: 50vh; + margin: 0 auto; + border: 1px solid #099dfd; + position: relative; + box-shadow: 1px 1px 11px #9e9e9e; + background-color: #fff; +} + +#local-video { + width: 30%; + height: 30%; + position: absolute; + left: 10px; + bottom: 10px; + border: 1px solid #fff; + border-radius: 6px; + z-index: 2; + cursor: pointer; +} + +#remote-video { + width: 100%; + height: 100%; + position: absolute; + left: 0; + right: 0; + bottom: 0; + top: 0; + z-index: 1; + margin: 0; + padding: 0; + cursor: pointer; +} + +.action-btns { + position: absolute; + bottom: 20px; + left: 50%; + margin-left: -50px; + z-index: 3; + display: flex; + flex-direction: row; + flex-wrap: wrap; +} diff --git a/videocaller/agora/static/agora/index.js b/videocaller/agora/static/agora/index.js new file mode 100644 index 0000000000000000000000000000000000000000..fd9aa901c0959e1340ccbabdfd35a8ee6ce00716 --- /dev/null +++ b/videocaller/agora/static/agora/index.js @@ -0,0 +1,277 @@ +const app = new Vue({ + el: "#app", + delimiters: ["${", "}"], + data: { + callPlaced: false, + client: null, + localStream: null, + mutedAudio: false, + mutedVideo: false, + userOnlineChannel: null, + onlineUsers: [], + incomingCall: false, + incomingCaller: "", + agoraChannel: null, + }, + mounted() { + this.initUserOnlineChannel(); + }, + + methods: { + initUserOnlineChannel() { + const userOnlineChannel = pusher.subscribe("presence-online-channel"); + + // Start Pusher Presence Channel Event Listeners + + userOnlineChannel.bind("pusher:subscription_succeeded", (data) => { + // From Laravel Echo, wrapper for Pusher Js Client + let members = Object.keys(data.members).map((k) => data.members[k]); + this.onlineUsers = members; + }); + + userOnlineChannel.bind("pusher:member_added", (data) => { + let user = data.info; + // check user availability + const joiningUserIndex = this.onlineUsers.findIndex( + (data) => data.id === user.id + ); + if (joiningUserIndex < 0) { + this.onlineUsers.push(user); + } + }); + + userOnlineChannel.bind("pusher:member_removed", (data) => { + let user = data.info; + const leavingUserIndex = this.onlineUsers.findIndex( + (data) => data.id === user.id + ); + this.onlineUsers.splice(leavingUserIndex, 1); + }); + + userOnlineChannel.bind("pusher:subscription_error", (err) => { + console.log("Subscription Error", err); + }); + + userOnlineChannel.bind("an_event", (data) => { + console.log("a_channel: ", data); + }); + + userOnlineChannel.bind("make-agora-call", (data) => { + // Listen to incoming call. This can be replaced with a private channel + + if (parseInt(data.userToCall) === parseInt(AUTH_USER_ID)) { + const callerIndex = this.onlineUsers.findIndex( + (user) => user.id === data.from + ); + this.incomingCaller = this.onlineUsers[callerIndex]["name"]; + this.incomingCall = true; + + // the channel that was sent over to the user being called is what + // the receiver will use to join the call when accepting the call. + this.agoraChannel = data.channelName; + } + }); + }, + + getUserOnlineStatus(id) { + const onlineUserIndex = this.onlineUsers.findIndex( + (data) => data.id === id + ); + if (onlineUserIndex < 0) { + return "Offline"; + } + return "Online"; + }, + + async placeCall(id, calleeName) { + try { + // channelName = the caller's and the callee's id. you can use anything. tho. + const channelName = `${AUTH_USER}_${calleeName}`; + const tokenRes = await this.generateToken(channelName); + + // // Broadcasts a call event to the callee and also gets back the token + let placeCallRes = await axios.post( + "/call-user/", + { + user_to_call: id, + channel_name: channelName, + }, + { + headers: { + "Content-Type": "application/json", + "X-CSRFToken": CSRF_TOKEN, + }, + } + ); + + this.initializeAgora(tokenRes.data.appID); + this.joinRoom(tokenRes.data.token, channelName); + } catch (error) { + console.log(error); + } + }, + + async acceptCall() { + const tokenRes = await this.generateToken(this.agoraChannel); + this.initializeAgora(tokenRes.data.appID); + + this.joinRoom(tokenRes.data.token, this.agoraChannel); + this.incomingCall = false; + this.callPlaced = true; + }, + + declineCall() { + // You can send a request to the caller to + // alert them of rejected call + this.incomingCall = false; + }, + + generateToken(channelName) { + return axios.post( + "/token/", + { + channelName, + }, + { + headers: { + "Content-Type": "application/json", + "X-CSRFToken": CSRF_TOKEN, + }, + } + ); + }, + + /** + * Agora Events and Listeners + */ + initializeAgora(agora_app_id) { + this.client = AgoraRTC.createClient({ mode: "rtc", codec: "h264" }); + this.client.init( + agora_app_id, + () => { + console.log("AgoraRTC client initialized"); + }, + (err) => { + console.log("AgoraRTC client init failed", err); + } + ); + }, + + async joinRoom(token, channel) { + this.client.join( + token, + channel, + AUTH_USER, + (uid) => { + console.log("User " + uid + " join channel successfully"); + this.callPlaced = true; + this.createLocalStream(); + this.initializedAgoraListeners(); + }, + (err) => { + console.log("Join channel failed", err); + } + ); + }, + + initializedAgoraListeners() { + // Register event listeners + this.client.on("stream-published", function (evt) { + console.log("Publish local stream successfully"); + console.log(evt); + }); + + //subscribe remote stream + this.client.on("stream-added", ({ stream }) => { + console.log("New stream added: " + stream.getId()); + this.client.subscribe(stream, function (err) { + console.log("Subscribe stream failed", err); + }); + }); + + this.client.on("stream-subscribed", (evt) => { + // Attach remote stream to the remote-video div + + console.log("incoming remote stream event: ", evt); + + evt.stream.play("remote-video"); + this.client.publish(evt.stream); + }); + + this.client.on("stream-removed", ({ stream }) => { + console.log(String(stream.getId())); + stream.close(); + }); + + this.client.on("peer-online", (evt) => { + console.log("peer-online", evt.uid); + }); + + this.client.on("peer-leave", (evt) => { + var uid = evt.uid; + var reason = evt.reason; + console.log("remote user left ", uid, "reason: ", reason); + }); + + this.client.on("stream-unpublished", (evt) => { + console.log(evt); + }); + }, + + createLocalStream() { + this.localStream = AgoraRTC.createStream({ + audio: true, + video: true, + }); + + // Initialize the local stream + this.localStream.init( + () => { + // Play the local stream + this.localStream.play("local-video"); + // Publish the local stream + this.client.publish(this.localStream, (err) => { + console.log("publish local stream", err); + }); + }, + (err) => { + console.log(err); + } + ); + }, + + endCall() { + this.localStream.close(); + this.client.leave( + () => { + console.log("Leave channel successfully"); + this.callPlaced = false; + }, + (err) => { + console.log("Leave channel failed"); + } + ); + window.pusher.unsubscribe(); + }, + + handleAudioToggle() { + if (this.mutedAudio) { + this.localStream.unmuteAudio(); + this.mutedAudio = false; + } else { + this.localStream.muteAudio(); + this.mutedAudio = true; + } + }, + + handleVideoToggle() { + if (this.mutedVideo) { + this.localStream.unmuteVideo(); + this.mutedVideo = false; + } else { + this.localStream.muteVideo(); + this.mutedVideo = true; + } + }, + }, +}); diff --git a/videocaller/agora/tasks.py b/videocaller/agora/tasks.py new file mode 100644 index 0000000000000000000000000000000000000000..9f7368b5ebc0f9655c583fab447dc41a13274572 --- /dev/null +++ b/videocaller/agora/tasks.py @@ -0,0 +1,61 @@ +"""Background tasks for document processing.""" +from __future__ import annotations + +import logging +import traceback + +from django.core.files.storage import default_storage +from django.utils import timezone + +from .document_processing import DocumentProcessorFactory +from .models import DocumentUpload + +logger = logging.getLogger(__name__) + + +def process_document_upload(document_id: int) -> None: + try: + document = DocumentUpload.objects.get(id=document_id) + except DocumentUpload.DoesNotExist: + logger.error("Document upload %s not found", document_id) + return + + logger.info("Starting document processing: id=%s name=%s", document.id, document.file_name) + document.status = "processing" + document.error_message = None + document.save(update_fields=["status", "error_message"]) + + try: + full_path = default_storage.path(document.storage_path) + s3_key = f"documents/{document.storage_path.split('/')[-1]}" + s3_result = DocumentProcessorFactory.upload_to_s3_if_configured(full_path, s3_key) + + if s3_result.get("s3_url"): + document.s3_url = s3_result["s3_url"] + document.save(update_fields=["s3_url"]) + + strategy = DocumentProcessorFactory.get_strategy(full_path) + result = strategy.process( + document=document, + local_path=full_path, + s3_url=s3_result.get("s3_url"), + presigned_url=s3_result.get("presigned_url") + ) + + document.status = "completed" + document.chunk_count = int(result.get("chunk_count", 0) or 0) + document.processed_at = timezone.now() + document.save(update_fields=["status", "processed_at", "chunk_count"]) + logger.info( + "Completed document processing: id=%s status=%s chunks=%s", + document.id, + document.status, + document.chunk_count + ) + + except Exception as exc: + logger.error("Document processing failed: %s", exc) + logger.error(traceback.format_exc()) + document.status = "failed" + document.error_message = str(exc) + document.save(update_fields=["status", "error_message"]) diff --git a/videocaller/agora/templates/agora/create_room.html b/videocaller/agora/templates/agora/create_room.html new file mode 100644 index 0000000000000000000000000000000000000000..3c2555b312774e39389351e8510135f747635cd0 --- /dev/null +++ b/videocaller/agora/templates/agora/create_room.html @@ -0,0 +1,90 @@ + + + + + + Create Meeting + + + + +
+

Create New Meeting

+
+ {% csrf_token %} + +
+ + +
+ +
+ + +
+ + +
+ + +
+ + diff --git a/videocaller/agora/templates/agora/documents.html b/videocaller/agora/templates/agora/documents.html new file mode 100644 index 0000000000000000000000000000000000000000..e8697bde7535ed79a4c96c6ead4d1fb65ea7d6c4 --- /dev/null +++ b/videocaller/agora/templates/agora/documents.html @@ -0,0 +1,105 @@ +{% load static %} + + + + + + Documents - {{ room.title }} + + + + +
+
+

Documents - {{ room.title }}

+ Meeting code: {{ room.room_code }} +
+
+ Back to meeting + +
+
+ +
+
+
+ + + + diff --git a/videocaller/agora/templates/agora/home.html b/videocaller/agora/templates/agora/home.html new file mode 100644 index 0000000000000000000000000000000000000000..d7f9bde8374d9e59ef1a112bd9017b36e6080350 --- /dev/null +++ b/videocaller/agora/templates/agora/home.html @@ -0,0 +1,583 @@ +{% load static %} + + + + + + Video Meetings + + + + + + +
+
+
+
+

Welcome, {{ user.username }}! 👋

+

Start a new meeting or join an existing one

+ +
+ + {% if user_rooms %} +
+

Your Meetings

+ {% for room in user_rooms %} +
+
{{ room.title }}
+ Code: {{ room.room_code }} +
Hosted by you
+ {% if room.description %} +

{{ room.description }}

+ {% endif %} + +
+ {% endfor %} +
+ {% endif %} + +
+

Upload External Data

+ + + + + +
Hosts can upload to their meetings. Supports PDF, TXT, DOC/DOCX, MP3.
+
+ +
+

Available Meetings

+ {% if all_rooms %} + {% for room in all_rooms %} +
+
{{ room.title }}
+ Code: {{ room.room_code }} +
Hosted by {{ room.host.username }}
+ {% if room.description %} +

{{ room.description }}

+ {% endif %} + +
+ {% endfor %} + {% else %} +
+

No active meetings available

+

Create one now!

+
+ {% endif %} +
+
+ +
+
+
+

Chat

+
+ + + +
+
+ +
+
+
+ + +
+
+ + + + +
+
+
+
+ + + + diff --git a/videocaller/agora/templates/agora/index.html b/videocaller/agora/templates/agora/index.html new file mode 100644 index 0000000000000000000000000000000000000000..424690b6d4fb9502c8f7ff665d5e13301568b004 --- /dev/null +++ b/videocaller/agora/templates/agora/index.html @@ -0,0 +1,143 @@ +{% load static %} + + + + + + + + + + + + + Agora Video Chat Django + + +
+
+
+
+
+ Agora Logo +
+
+
+
+
+
+
+ {% for singleUser in allUsers%} + + + {% endfor %} +
+
+
+ + +
+
+

Incoming Call From ${ incomingCaller }

+
+ + +
+
+
+ +
+ +
+
+
+ +
+ + + +
+ +
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/videocaller/agora/templates/agora/join_room.html b/videocaller/agora/templates/agora/join_room.html new file mode 100644 index 0000000000000000000000000000000000000000..3e0ac85b8b45a6414b3f81af92bf8719d144041a --- /dev/null +++ b/videocaller/agora/templates/agora/join_room.html @@ -0,0 +1,104 @@ + + + + + + Join Meeting + + + + +
+

Join Meeting

+ + {% if error %} +
{{ error }}
+ {% endif %} + +
+ {% csrf_token %} + +
+ + +
+ Enter the meeting code (e.g., abc-def-ghi) +
+
+ + +
+ + +
+ + diff --git a/videocaller/agora/templates/agora/login.html b/videocaller/agora/templates/agora/login.html new file mode 100644 index 0000000000000000000000000000000000000000..0ca042691c28274715dcfdeadfdaf42dab1923ff --- /dev/null +++ b/videocaller/agora/templates/agora/login.html @@ -0,0 +1,86 @@ + + + + + + Login - Video Meeting + + + + + + + diff --git a/videocaller/agora/templates/agora/meeting.html b/videocaller/agora/templates/agora/meeting.html new file mode 100644 index 0000000000000000000000000000000000000000..1f5adc8717b6beaa9faf62664766376c5768f7ed --- /dev/null +++ b/videocaller/agora/templates/agora/meeting.html @@ -0,0 +1,1190 @@ +{% load static %} + + + + + + + {{ room.title }} - Meeting + + + + + + +
+
{{ room.title }}
+
Code: {{ room.room_code }}
+
+ +
+ + 👥 Participants: 1 +
+ +
+ + +
+
+
+
+
You (Local)
+
+ +
+ +
+ {% if is_host %} + + {% else %} + + {% endif %} + + + {% if is_host %} + + {% else %} + + {% endif %} +
+ + {% if is_host %} +
+ + + + Supports PDF, TXT, DOC/DOCX, MP3 + Open documents page +
+
+
No uploads yet.
+
+ {% endif %} + +
+
Ask questions about this meeting or uploaded documents.
+
+ + +
+
+ +
+
+ + + + diff --git a/videocaller/agora/templates/agora/register.html b/videocaller/agora/templates/agora/register.html new file mode 100644 index 0000000000000000000000000000000000000000..58d24dc55a9f95377ab8dcc6fc5228cf42bcbec9 --- /dev/null +++ b/videocaller/agora/templates/agora/register.html @@ -0,0 +1,107 @@ + + + + + + Register - Video Meeting + + + + +
+

Create Account

+
+ {% csrf_token %} + +
+ + {{ form.username }} + {% if form.username.errors %} +
    + {% for error in form.username.errors %} +
  • {{ error }}
  • + {% endfor %} +
+ {% endif %} +
+ +
+ + {{ form.password1 }} + {% if form.password1.errors %} +
    + {% for error in form.password1.errors %} +
  • {{ error }}
  • + {% endfor %} +
+ {% endif %} +
+ +
+ + {{ form.password2 }} + {% if form.password2.errors %} +
    + {% for error in form.password2.errors %} +
  • {{ error }}
  • + {% endfor %} +
+ {% endif %} +
+ + +
+ + +
+ + diff --git a/videocaller/agora/tests.py b/videocaller/agora/tests.py new file mode 100644 index 0000000000000000000000000000000000000000..7ce503c2dd97ba78597f6ff6e4393132753573f6 --- /dev/null +++ b/videocaller/agora/tests.py @@ -0,0 +1,3 @@ +from django.test import TestCase + +# Create your tests here. diff --git a/videocaller/agora/urls.py b/videocaller/agora/urls.py new file mode 100644 index 0000000000000000000000000000000000000000..b011a81ce97d6f2e6930238761d9dc2cc229578f --- /dev/null +++ b/videocaller/agora/urls.py @@ -0,0 +1,42 @@ +from django.urls import path +from . import views +from django.contrib.auth.views import LoginView, LogoutView + +urlpatterns = [ + # Authentication + path('register/', views.register, name='register'), + path('login/', LoginView.as_view(template_name='agora/login.html'), name='login'), + path('logout/', LogoutView.as_view(next_page='login'), name='logout'), + + # Home & Room Management + path('', views.home, name='home'), + path('create/', views.create_room, name='create_room'), + path('join/', views.join_room, name='join_room'), + + # Meeting + path('meeting//', views.meeting, name='meeting'), + path('meeting//end/', views.end_meeting, name='end_meeting'), + path('meeting//upload-recording/', views.upload_recording, name='upload_recording'), + path('meeting//upload-document/', views.upload_document, name='upload_document'), + path('meeting//documents/', views.documents_page, name='documents_page'), + + # Cloud Recording Endpoints + path('meeting//start-recording/', views.start_recording, name='start_recording'), + path('meeting//stop-recording/', views.stop_recording, name='stop_recording'), + path('meeting//query-recording/', views.query_recording, name='query_recording'), + + # API Endpoints + path('pusher/auth/', views.pusher_auth, name='agora-pusher-auth'), + path('token/', views.generate_agora_token, name='agora-token'), + path('chat/messages/', views.chat_messages, name='chat_messages'), + + # RAG Endpoints + path('api/meetings//prepare-rag/', views.prepare_meeting_for_rag, name='prepare_rag'), + path('api/meetings//query/', views.query_meeting_transcript, name='query_transcript'), + path('api/query/', views.query_global_rag, name='query_global_rag'), + path('api/meetings//conversation-history/', views.get_conversation_history, name='conversation_history'), + path('api/meetings//documents/', views.list_documents, name='list_documents'), + path('api/meetings//agenda/', views.meeting_agenda, name='meeting_agenda'), + path('api/meetings//agenda//', views.delete_agenda_point, name='delete_agenda_point'), + path('api/health/google/', views.google_llm_health, name='google_llm_health'), +] diff --git a/videocaller/agora/views.py b/videocaller/agora/views.py new file mode 100644 index 0000000000000000000000000000000000000000..ef059a038077c887c27c47b81712d8aec571f138 --- /dev/null +++ b/videocaller/agora/views.py @@ -0,0 +1,1033 @@ +import os +import time +import json +import random +import string +import logging +import requests + +from django.http.response import JsonResponse +from django.http import StreamingHttpResponse, HttpResponseNotAllowed +from django.contrib.auth import get_user_model, authenticate, login +from django.contrib.auth.decorators import login_required +from django.shortcuts import render, redirect, get_object_or_404 +from django.views.decorators.http import require_POST, require_http_methods +from django.contrib.auth.forms import UserCreationForm +from django.db.models import Q +from django.db import models +from django.core.files.base import ContentFile +from django.conf import settings + +from .agora_key.RtcTokenBuilder import RtcTokenBuilder, Role_Attendee +from .models import MeetingRoom, ChatMessage, DocumentUpload, DocumentChunk, MeetingAgendaPoint +from .recording_utils import AgoraCloudRecording, S3Manager +from .assemblyai_utils import AssemblyAIClient +from .rag_utils import generate_rag_response, process_transcript_for_rag, stream_rag_response_async +from .agenda_utils import generate_agenda_points +from asgiref.sync import sync_to_async +from django_q.tasks import async_task +from pusher import Pusher + +logger = logging.getLogger(__name__) + +# Lazy-load Pusher client to avoid crashes when env vars aren't set (e.g., in CI) +_pusher_client = None + +def get_pusher_client(): + global _pusher_client + if _pusher_client is None: + app_id = os.environ.get('PUSHER_APP_ID') + key = os.environ.get('PUSHER_KEY') + secret = os.environ.get('PUSHER_SECRET') + cluster = os.environ.get('PUSHER_CLUSTER') + + # Only initialize if all required env vars are present + if all([app_id, key, secret, cluster]): + _pusher_client = Pusher( + app_id=app_id, + key=key, + secret=secret, + ssl=True, + cluster=cluster + ) + return _pusher_client + + +def generate_room_code(): + """Generate a unique 8-character room code like abc-d-ghi""" + chars = string.ascii_lowercase + string.digits + code = '-'.join([''.join(random.choices(chars, k=3)), random.choice(chars), ''.join(random.choices(chars, k=3))]) + return code + + +# User Registration +def register(request): + if request.user.is_authenticated: + return redirect('home') + + if request.method == 'POST': + form = UserCreationForm(request.POST) + if form.is_valid(): + user = form.save() + username = form.cleaned_data.get('username') + password = form.cleaned_data.get('password1') + user = authenticate(username=username, password=password) + login(request, user) + return redirect('home') + else: + form = UserCreationForm() + + return render(request, 'agora/register.html', {'form': form}) + + +# Home Page - List Meeting Rooms +@login_required(login_url='/register/') +def home(request): + # Get all active rooms + all_rooms = MeetingRoom.objects.filter(is_active=True) + user_rooms = request.user.hosted_meetings.filter(is_active=True) + + # Recent chat messages for dashboard + chat_messages = ChatMessage.objects.select_related('user').order_by('-created_at')[:50] + + return render(request, 'agora/home.html', { + 'all_rooms': all_rooms, + 'user_rooms': user_rooms, + 'chat_messages': list(reversed(chat_messages)) + }) + + +# Create Meeting Room +@login_required(login_url='/register/') +def create_room(request): + if request.method == 'POST': + title = request.POST.get('title', 'Untitled Meeting') + description = request.POST.get('description', '') + + # Generate unique room code + room_code = generate_room_code() + + room = MeetingRoom.objects.create( + host=request.user, + title=title, + description=description, + room_code=room_code + ) + + return redirect('meeting', room_code=room.room_code) + + return render(request, 'agora/create_room.html') + + +# Join Meeting Room by Code +@login_required(login_url='/register/') +def join_room(request): + if request.method == 'POST': + room_code = request.POST.get('room_code', '').strip() + + try: + room = MeetingRoom.objects.get(room_code=room_code, is_active=True) + return redirect('meeting', room_code=room.room_code) + except MeetingRoom.DoesNotExist: + return render(request, 'agora/join_room.html', {'error': 'Room not found or inactive'}) + + return render(request, 'agora/join_room.html') + + +# Meeting Room Interface +@login_required(login_url='/register/') +def meeting(request, room_code): + room = get_object_or_404(MeetingRoom, room_code=room_code, is_active=True) + + return render(request, 'agora/meeting.html', { + 'room': room, + 'room_code': room_code, + 'room_id': str(room.room_id), + 'is_host': room.host == request.user, + 'meeting_db_id': room.id + }) + + +# End Meeting (Host Only) +@login_required(login_url='/register/') +@require_POST +def end_meeting(request, room_code): + room = get_object_or_404(MeetingRoom, room_code=room_code) + + if room.host != request.user: + return JsonResponse({'error': 'Only host can end meeting'}, status=403) + + room.is_active = False + room.save() + + return JsonResponse({'message': 'Meeting ended'}) + + +# Pusher Authentication +def pusher_auth(request): + client = get_pusher_client() + if not client: + return JsonResponse({'error': 'Pusher not configured'}, status=503) + + payload = client.authenticate( + channel=request.POST['channel_name'], + socket_id=request.POST['socket_id'], + custom_data={ + 'user_id': request.user.id, + 'user_info': { + 'id': request.user.id, + 'name': request.user.username + } + }) + return JsonResponse(payload) + + +# Generate Agora Token for Room +def generate_agora_token(request): + appID = settings.AGORA_APP_ID + appCertificate = settings.AGORA_APP_CERTIFICATE + + data = json.loads(request.body.decode('utf-8')) + channelName = data['channelName'] + + # For Agora SDK v4, use numeric UID + uid = request.user.id + + expireTimeInSeconds = 3600 + currentTimestamp = int(time.time()) + privilegeExpiredTs = currentTimestamp + expireTimeInSeconds + + token = RtcTokenBuilder.buildTokenWithAccount( + appID, appCertificate, channelName, str(uid), Role_Attendee, privilegeExpiredTs) + + return JsonResponse({'token': token, 'appID': appID, 'uid': uid}) + + +# Start Cloud Recording (Host Only) +@login_required(login_url='/register/') +@require_POST +def start_recording(request, room_code): + """Start Agora Cloud Recording for the meeting""" + try: + # Validate credentials are set + if not settings.AGORA_CUSTOMER_ID or settings.AGORA_CUSTOMER_ID == 'your_customer_id_here': + return JsonResponse({ + 'error': 'Agora Cloud Recording not configured. Please add AGORA_CUSTOMER_ID and AGORA_CUSTOMER_SECRET to .env file. See CLOUD_RECORDING_SETUP.md for details.' + }, status=500) + + if not settings.AWS_ACCESS_KEY_ID or settings.AWS_ACCESS_KEY_ID == 'your_aws_access_key_here': + return JsonResponse({ + 'error': 'AWS S3 not configured. Please add AWS credentials to .env file. See CLOUD_RECORDING_SETUP.md for details.' + }, status=500) + + room = get_object_or_404(MeetingRoom, room_code=room_code) + + # Only host can start recording + if room.host != request.user: + return JsonResponse({'error': 'Only the host can start recording'}, status=403) + + recording = room.get_recording() + + # Check if already recording + if recording.recording_status == 'recording': + return JsonResponse({'error': 'Recording already in progress'}, status=400) + + # Generate unique UID for recording bot (use a large number to avoid conflicts) + recording_uid = 999000000 + room.id + + # Get channel name (same as room_id) + channel_name = str(room.room_id) + + # Generate token for recording bot + appID = settings.AGORA_APP_ID + appCertificate = settings.AGORA_APP_CERTIFICATE + expireTimeInSeconds = 3600 + currentTimestamp = int(time.time()) + privilegeExpiredTs = currentTimestamp + expireTimeInSeconds + + recording_token = RtcTokenBuilder.buildTokenWithAccount( + appID, appCertificate, channel_name, str(recording_uid), + Role_Attendee, privilegeExpiredTs + ) + + # Initialize cloud recording + cloud_recording = AgoraCloudRecording() + + # Step 1: Acquire resource + acquire_result = cloud_recording.acquire_resource(channel_name, recording_uid) + + if not acquire_result['success']: + return JsonResponse({ + 'error': 'Failed to acquire recording resource', + 'details': acquire_result.get('error') + }, status=500) + + resource_id = acquire_result['resourceId'] + + # Step 2: Start recording + start_result = cloud_recording.start_recording( + channel_name=channel_name, + uid=recording_uid, + resource_id=resource_id, + token=recording_token, + bucket_name=settings.AWS_STORAGE_BUCKET_NAME, + bucket_access_key=settings.AWS_ACCESS_KEY_ID, + bucket_secret_key=settings.AWS_SECRET_ACCESS_KEY, + bucket_region=settings.AWS_S3_REGION_NAME + ) + + if not start_result['success']: + return JsonResponse({ + 'error': 'Failed to start recording', + 'details': start_result.get('error'), + 'response': start_result.get('response') + }, status=500) + + # Update recording info + recording.recording_enabled = True + recording.recording_status = 'recording' + recording.recording_sid = start_result['sid'] + recording.recording_resource_id = resource_id + recording.recording_uid = recording_uid + recording.save() + + return JsonResponse({ + 'message': 'Recording started successfully', + 'sid': start_result['sid'], + 'resourceId': resource_id + }) + + except Exception as e: + return JsonResponse({'error': str(e)}, status=500) + + +# Stop Cloud Recording (Host Only) +@login_required(login_url='/register/') +@require_POST +def stop_recording(request, room_code): + """Stop Agora Cloud Recording and update S3 URL""" + try: + room = get_object_or_404(MeetingRoom, room_code=room_code) + + # Only host can stop recording + if room.host != request.user: + return JsonResponse({'error': 'Only the host can stop recording'}, status=403) + + recording = room.get_recording() + + # Check if recording is active + if recording.recording_status != 'recording': + return JsonResponse({'error': 'No active recording found'}, status=400) + + if not recording.recording_sid or not recording.recording_resource_id: + return JsonResponse({'error': 'Missing recording session data'}, status=400) + + # Initialize cloud recording + cloud_recording = AgoraCloudRecording() + + # Stop recording + stop_result = cloud_recording.stop_recording( + channel_name=str(room.room_id), + uid=recording.recording_uid, + resource_id=recording.recording_resource_id, + sid=recording.recording_sid + ) + + if not stop_result['success']: + return JsonResponse({ + 'error': 'Failed to stop recording', + 'details': stop_result.get('error') + }, status=500) + + # Get recording file info from response + server_response = stop_result.get('serverResponse', {}) + file_list = server_response.get('fileList', []) + + # Update recording status + recording.recording_status = 'completed' + + # Generate S3 URL if files are available + if file_list: + # Get the first MP4 file (or HLS if no MP4) + recording_file = None + for file_info in file_list: + filename = file_info.get('fileName', '') + if filename.endswith('.mp4'): + recording_file = filename + break + + if not recording_file and file_list: + recording_file = file_list[0].get('fileName') + + if recording_file: + # Construct S3 URL + s3_manager = S3Manager() + s3_key = f"recordings/{recording_file}" + recording.s3_recording_url = s3_manager.get_s3_url(s3_key) + + recording.save() + + return JsonResponse({ + 'message': 'Recording stopped successfully', + 'fileList': file_list, + 's3_url': recording.s3_recording_url + }) + + except Exception as e: + return JsonResponse({'error': str(e)}, status=500) + + +# Query Recording Status +@login_required(login_url='/register/') +def query_recording(request, room_code): + """Query the current status of cloud recording""" + try: + room = get_object_or_404(MeetingRoom, room_code=room_code) + + recording = room.get_recording() + + if not recording.recording_resource_id or not recording.recording_sid: + return JsonResponse({'error': 'No recording session found'}, status=400) + + cloud_recording = AgoraCloudRecording() + + query_result = cloud_recording.query_recording( + resource_id=recording.recording_resource_id, + sid=recording.recording_sid + ) + + if not query_result['success']: + return JsonResponse({ + 'error': 'Failed to query recording', + 'details': query_result.get('error') + }, status=500) + + return JsonResponse({ + 'status': recording.recording_status, + 'serverResponse': query_result.get('serverResponse', {}) + }) + + except Exception as e: + return JsonResponse({'error': str(e)}, status=500) + + +# Upload Meeting Recording (Local Audio Recording for Testing) +@login_required(login_url='/register/') +@require_POST +def upload_recording(request, room_code): + """Upload locally recorded audio to project directory""" + try: + room = get_object_or_404(MeetingRoom, room_code=room_code) + + # Only host can upload recordings + if room.host != request.user: + return JsonResponse({'error': 'Only the host can upload recordings'}, status=403) + + if 'recording' not in request.FILES: + return JsonResponse({'error': 'No recording file provided'}, status=400) + + recording_file = request.FILES['recording'] + duration = request.POST.get('duration', 0) + + # Generate filename with timestamp + timestamp = int(time.time()) + filename = f"{room_code}_{timestamp}.webm" + + # Save the audio recording + from django.core.files.storage import default_storage + file_path = f"recordings/{filename}" + saved_path = default_storage.save(file_path, ContentFile(recording_file.read())) + + # Get full file path + full_path = default_storage.path(saved_path) + + recording = room.get_recording() + transcript = room.get_transcript() + + # Try to upload to S3 if configured + s3_url = None + s3_error = None + presigned_url = None + if settings.AWS_ACCESS_KEY_ID and settings.AWS_SECRET_ACCESS_KEY and settings.AWS_STORAGE_BUCKET_NAME: + try: + s3_manager = S3Manager() + s3_key = f"recordings/{filename}" + uploaded = s3_manager.upload_file(full_path, s3_key) + if uploaded: + s3_url = s3_manager.get_s3_url(s3_key) + presigned_url = s3_manager.generate_presigned_url(s3_key) + recording.s3_recording_url = s3_url + else: + s3_error = "S3 upload failed" + except Exception as upload_error: + s3_error = str(upload_error) + + # Transcribe with AssemblyAI (use presigned URL for private buckets) + transcript_text = None + transcript_status = 'not_started' + transcript_id = None + transcript_error = None + if settings.ASSEMBLYAI_API_KEY and (presigned_url or s3_url): + try: + assembly_client = AssemblyAIClient() + audio_url = presigned_url or s3_url + start_data = assembly_client.start_transcription(audio_url) + transcript_id = start_data.get('id') + transcript_status = start_data.get('status', 'processing') + + if transcript_id: + result = assembly_client.wait_for_transcription(transcript_id, timeout_seconds=60, poll_interval=3) + transcript_status = result.get('status', transcript_status) + if transcript_status == 'completed': + transcript_text = result.get('text') + elif transcript_status == 'failed': + transcript_error = result.get('error') + except Exception as transcribe_error: + transcript_status = 'failed' + transcript_error = str(transcribe_error) + + # Update recording and transcript info + recording.recording_enabled = True + recording.recording_duration = int(float(duration)) + recording.recording_status = 'completed' + recording.save() + + transcript.transcript_text = transcript_text + transcript.transcript_status = transcript_status + transcript.transcript_id = transcript_id + transcript.save() + + rag_enqueued = False + if transcript_status == 'completed' and transcript_text: + async_task('agora.rag_utils.process_transcript_for_rag', room.id) + rag_enqueued = True + + response_data = { + 'message': 'Audio recording saved successfully', + 'filename': filename, + 'duration': duration, + 'file_path': saved_path, + 'full_path': full_path, + 'size_bytes': recording_file.size, + 's3_url': s3_url, + 's3_error': s3_error, + 'transcript_status': transcript_status, + 'transcript_id': transcript_id, + 'transcript_error': transcript_error, + 'rag_enqueued': rag_enqueued + } + return JsonResponse(response_data) + + except Exception as e: + import traceback + error_details = traceback.format_exc() + print(f"Error uploading recording: {error_details}") + return JsonResponse({'error': str(e), 'details': error_details}, status=500) + + +# Upload External Document/Audio +@login_required(login_url='/register/') +@require_POST +def upload_document(request, room_code): + """Upload external data (pdf/txt/doc/docx/mp3) for RAG""" + try: + room = get_object_or_404(MeetingRoom, room_code=room_code) + + if room.host != request.user: + return JsonResponse({'error': 'Only the host can upload documents'}, status=403) + + if 'document' not in request.FILES: + return JsonResponse({'error': 'No document file provided'}, status=400) + + uploaded_file = request.FILES['document'] + original_name = uploaded_file.name + _, ext = os.path.splitext(original_name) + ext = ext.lower() + + timestamp = int(time.time()) + safe_name = f"{room_code}_{timestamp}{ext}" + + from django.core.files.storage import default_storage + file_path = f"documents/{safe_name}" + saved_path = default_storage.save(file_path, ContentFile(uploaded_file.read())) + full_path = default_storage.path(saved_path) + + document = DocumentUpload.objects.create( + meeting=room, + uploaded_by=request.user, + file_name=original_name, + file_type=ext.lstrip('.'), + storage_path=saved_path, + status='uploaded' + ) + + task_id = async_task('agora.tasks.process_document_upload', document.id) + document.status = 'queued' + document.save(update_fields=["status"]) + + return JsonResponse({ + 'message': 'Document queued for processing', + 'document_id': document.id, + 'file_name': document.file_name, + 'file_type': document.file_type, + 'status': document.status, + 'task_id': task_id + }) + + except Exception as e: + if 'document' in locals(): + document.status = 'failed' + document.error_message = str(e) + document.save(update_fields=["status", "error_message"]) + return JsonResponse({'error': str(e)}, status=500) + + +@login_required +@require_http_methods(["GET"]) +def list_documents(request, meeting_id): + """Return document upload statuses for a meeting.""" + try: + meeting = get_object_or_404(MeetingRoom, id=meeting_id) + + if meeting.host != request.user: + return JsonResponse({'error': 'Only host can view documents'}, status=403) + + documents = DocumentUpload.objects.filter(meeting=meeting).order_by('-created_at') + data = [ + { + 'id': doc.id, + 'file_name': doc.file_name, + 'file_type': doc.file_type, + 'status': doc.status, + 's3_url': doc.s3_url, + 'chunk_count': doc.chunk_count, + 'error_message': doc.error_message, + 'created_at': doc.created_at.isoformat(), + 'processed_at': doc.processed_at.isoformat() if doc.processed_at else None + } + for doc in documents + ] + + return JsonResponse({'success': True, 'documents': data}) + + except Exception as e: + return JsonResponse({'error': str(e)}, status=500) + + +@login_required +def documents_page(request, room_code): + room = get_object_or_404(MeetingRoom, room_code=room_code, is_active=True) + if room.host != request.user: + return redirect('meeting', room_code=room.room_code) + + return render(request, 'agora/documents.html', { + 'room': room, + 'meeting_db_id': room.id + }) + + +# Chat History (Dashboard) +@login_required(login_url='/register/') +@require_http_methods(["GET", "POST"]) +def chat_messages(request): + if request.method == "GET": + messages = ChatMessage.objects.select_related('user').order_by('-created_at')[:50] + data = [ + { + 'id': msg.id, + 'user': msg.user.username, + 'content': msg.content, + 'created_at': msg.created_at.isoformat() + } + for msg in reversed(messages) + ] + return JsonResponse({'messages': data}) + + # POST - create new message + try: + payload = json.loads(request.body.decode('utf-8')) + content = payload.get('content', '').strip() + if not content: + return JsonResponse({'error': 'Message content is required'}, status=400) + + msg = ChatMessage.objects.create(user=request.user, content=content) + return JsonResponse({ + 'id': msg.id, + 'user': msg.user.username, + 'content': msg.content, + 'created_at': msg.created_at.isoformat() + }) + except Exception as e: + return JsonResponse({'error': str(e)}, status=500) + + +@login_required +@require_http_methods(["GET"]) +def prepare_meeting_for_rag(request, meeting_id): + """ + Process transcript for RAG: chunk and create embeddings + GET /api/meetings/{meeting_id}/prepare-rag/ + """ + try: + meeting = get_object_or_404(MeetingRoom, id=meeting_id) + + # Only allow host to trigger + if meeting.host != request.user: + return JsonResponse({'error': 'Only host can prepare for RAG'}, status=403) + + transcript = meeting.get_transcript() + + # Check if transcript is ready + if transcript.transcript_status != 'completed': + return JsonResponse({ + 'error': 'Transcript not yet completed', + 'status': transcript.transcript_status + }, status=400) + + # Process for RAG + result = process_transcript_for_rag(meeting.id) + + if result['success']: + return JsonResponse({ + 'success': True, + 'message': result['message'], + 'chunk_count': result['chunk_count'] + }) + else: + return JsonResponse({ + 'success': False, + 'error': result['error'] + }, status=500) + + except Exception as e: + return JsonResponse({'error': str(e)}, status=500) + + +async def query_meeting_transcript(request, meeting_id): + """ + Query a meeting transcript using RAG with conversation context + POST /api/meetings/{meeting_id}/query/ + Body: {'question': 'user question'} + """ + try: + if request.method != 'POST': + return HttpResponseNotAllowed(['POST']) + is_authenticated = await sync_to_async(lambda: request.user.is_authenticated)() + if not is_authenticated: + return JsonResponse({'error': 'Authentication required'}, status=401) + meeting = await sync_to_async(get_object_or_404)(MeetingRoom, id=meeting_id) + payload = json.loads(request.body.decode('utf-8')) + question = payload.get('question', '').strip() + + if not question: + return JsonResponse({'error': 'Question is required'}, status=400) + + # Check if meeting is prepared for RAG (transcript or documents) + has_doc_chunks = await sync_to_async( + DocumentChunk.objects.filter(document__meeting=meeting).exists + )() + rag_state = await sync_to_async(meeting.get_rag_state)() + if not rag_state.embeddings_created_at and not has_doc_chunks: + return JsonResponse({ + 'error': 'Meeting data not yet processed for RAG. Upload a document or prepare transcript.', + 'status': 'not_prepared' + }, status=400) + + stream = request.GET.get('stream') == 'true' + user_id = await sync_to_async(lambda: request.user.id)() + + if stream: + stream_gen, _ = await stream_rag_response_async( + meeting_id=meeting.id, + user_id=user_id, + query=question, + top_k=5 + ) + return StreamingHttpResponse(stream_gen, content_type='text/plain; charset=utf-8') + + response_text, relevant_chunks = await sync_to_async(generate_rag_response)( + meeting_id=meeting.id, + user_id=user_id, + query=question, + top_k=5 + ) + + return JsonResponse({ + 'success': True, + 'response': response_text, + 'relevant_chunks': [ + { + 'index': chunk['chunk_index'], + 'text': chunk['text'], + 'score': round(chunk['score'], 3), + 'start_time': chunk.get('start_time'), + 'end_time': chunk.get('end_time'), + 'source_type': chunk.get('source_type'), + 'meeting_title': chunk.get('meeting_title'), + 'document_id': chunk.get('document_id'), + 'document_name': chunk.get('document_name') + } + for chunk in relevant_chunks + ] + }) + + except Exception as e: + return JsonResponse({'error': str(e)}, status=500) + + +async def query_global_rag(request): + """Query across all meetings and documents for the user.""" + try: + if request.method != 'POST': + return HttpResponseNotAllowed(['POST']) + is_authenticated = await sync_to_async(lambda: request.user.is_authenticated)() + if not is_authenticated: + return JsonResponse({'error': 'Authentication required'}, status=401) + payload = json.loads(request.body.decode('utf-8')) + question = payload.get('question', '').strip() + + if not question: + return JsonResponse({'error': 'Question is required'}, status=400) + + stream = request.GET.get('stream') == 'true' + user_id = await sync_to_async(lambda: request.user.id)() + + if stream: + stream_gen, _ = await stream_rag_response_async( + meeting_id=None, + user_id=user_id, + query=question, + top_k=5 + ) + return StreamingHttpResponse(stream_gen, content_type='text/plain; charset=utf-8') + + response_text, relevant_chunks = await sync_to_async(generate_rag_response)( + meeting_id=None, + user_id=user_id, + query=question, + top_k=5 + ) + + return JsonResponse({ + 'success': True, + 'response': response_text, + 'relevant_chunks': [ + { + 'index': chunk['chunk_index'], + 'text': chunk['text'], + 'score': round(chunk['score'], 3), + 'start_time': chunk.get('start_time'), + 'end_time': chunk.get('end_time'), + 'source_type': chunk.get('source_type'), + 'meeting_title': chunk.get('meeting_title'), + 'document_id': chunk.get('document_id'), + 'document_name': chunk.get('document_name') + } + for chunk in relevant_chunks + ] + }) + except Exception as e: + return JsonResponse({'error': str(e)}, status=500) + + +@login_required(login_url='/register/') +@require_http_methods(["GET"]) +def google_llm_health(request): + """ + Health-check for Google LLM configuration. + GET /api/health/google/ + """ + if not settings.GOOGLE_API_KEY: + return JsonResponse({'ok': False, 'error': 'GOOGLE_API_KEY is not configured'}, status=503) + + model_name = settings.GOOGLE_GENERATE_MODEL + url = ( + "https://generativelanguage.googleapis.com/v1beta/models/" + f"{model_name}:generateContent?key={settings.GOOGLE_API_KEY}" + ) + payload = { + "contents": [ + { + "role": "user", + "parts": [{"text": "Reply with OK"}] + } + ], + "generationConfig": { + "maxOutputTokens": 8, + "temperature": 0.0 + } + } + + started = time.time() + try: + response = requests.post( + url, + json=payload, + timeout=(settings.GOOGLE_CONNECT_TIMEOUT, settings.GOOGLE_READ_TIMEOUT) + ) + response.raise_for_status() + data = response.json() + + text_parts = [] + for candidate in data.get("candidates", []): + for part in candidate.get("content", {}).get("parts", []): + part_text = part.get("text") + if part_text: + text_parts.append(part_text) + + output = "".join(text_parts).strip() + latency_ms = int((time.time() - started) * 1000) + return JsonResponse({ + 'ok': True, + 'model': model_name, + 'latency_ms': latency_ms, + 'output': output + }) + except requests.exceptions.ReadTimeout as e: + logger.error("Google health-check timed out: %s", str(e)) + return JsonResponse({ + 'ok': False, + 'model': model_name, + 'error': 'Google request timed out' + }, status=503) + except requests.exceptions.RequestException as e: + status = getattr(e.response, "status_code", None) + body = getattr(e.response, "text", "") + logger.error("Google health-check failed (status=%s): %s", status, body[:1000]) + return JsonResponse({ + 'ok': False, + 'model': model_name, + 'error': 'Google request failed' + }, status=503) + except ValueError as e: + logger.error("Google health-check invalid JSON response: %s", str(e)) + return JsonResponse({ + 'ok': False, + 'model': model_name, + 'error': 'Invalid JSON response from Google' + }, status=503) + except Exception as e: + logger.error("Google health-check unexpected error: %s", str(e)) + return JsonResponse({ + 'ok': False, + 'model': model_name, + 'error': str(e) + }, status=503) + + +@login_required +@require_http_methods(["GET"]) +def get_conversation_history(request, meeting_id): + """ + Get conversation history for a meeting + GET /api/meetings/{meeting_id}/conversation-history/ + """ + try: + from .models import ConversationHistory + + meeting = get_object_or_404(MeetingRoom, id=meeting_id) + + history = ConversationHistory.objects.filter( + meeting=meeting, + user=request.user + ).order_by('created_at') + + return JsonResponse({ + 'success': True, + 'conversation': [ + { + 'id': item.id, + 'question': item.user_question, + 'response': item.assistant_response, + 'created_at': item.created_at.isoformat(), + 'relevant_chunks': item.relevant_chunks + } + for item in history + ] + }) + + except Exception as e: + return JsonResponse({'error': str(e)}, status=500) + + +@login_required +@require_http_methods(["GET", "POST"]) +def meeting_agenda(request, meeting_id): + """Get or add agenda points for a meeting.""" + meeting = get_object_or_404(MeetingRoom, id=meeting_id) + + if request.method == "POST": + try: + payload = json.loads(request.body.decode('utf-8')) + text = payload.get('text', '').strip() + if not text: + return JsonResponse({'error': 'Text is required'}, status=400) + max_order = meeting.agenda_points.aggregate(models.Max('order')).get('order__max') or 0 + point = MeetingAgendaPoint.objects.create( + meeting=meeting, + text=text, + order=max_order + 1, + created_by=request.user, + is_ai_generated=False + ) + return JsonResponse({ + 'id': point.id, + 'text': point.text, + 'order': point.order, + 'is_ai_generated': point.is_ai_generated + }) + except Exception as e: + return JsonResponse({'error': str(e)}, status=500) + + points = list(meeting.agenda_points.all()) + if not points: + generated = generate_agenda_points(meeting.title, meeting.description, meeting.id) + for idx, text in enumerate(generated, start=1): + MeetingAgendaPoint.objects.create( + meeting=meeting, + text=text, + order=idx, + created_by=None, + is_ai_generated=True + ) + points = list(meeting.agenda_points.all()) + + return JsonResponse({ + 'points': [ + { + 'id': point.id, + 'text': point.text, + 'order': point.order, + 'is_ai_generated': point.is_ai_generated + } + for point in points + ] + }) + + +@login_required +@require_http_methods(["POST", "DELETE"]) +def delete_agenda_point(request, meeting_id, point_id): + """Remove an agenda point and resequence ordering.""" + meeting = get_object_or_404(MeetingRoom, id=meeting_id) + point = get_object_or_404(MeetingAgendaPoint, id=point_id, meeting=meeting) + point.delete() + + remaining = list(meeting.agenda_points.order_by('order', 'created_at')) + for idx, item in enumerate(remaining, start=1): + if item.order != idx: + item.order = idx + item.save(update_fields=['order']) + + return JsonResponse({ + 'success': True, + 'points': [ + { + 'id': item.id, + 'text': item.text, + 'order': item.order, + 'is_ai_generated': item.is_ai_generated + } + for item in remaining + ] + }) diff --git a/videocaller/manage.py b/videocaller/manage.py new file mode 100644 index 0000000000000000000000000000000000000000..3d1047f5df6b286d218b88e85631a12006a75466 --- /dev/null +++ b/videocaller/manage.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +"""Django's command-line utility for administrative tasks.""" +import os +import sys + + +def main(): + """Run administrative tasks.""" + os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'videocaller.settings') + try: + from django.core.management import execute_from_command_line + except ImportError as exc: + raise ImportError( + "Couldn't import Django. Are you sure it's installed and " + "available on your PYTHONPATH environment variable? Did you " + "forget to activate a virtual environment?" + ) from exc + execute_from_command_line(sys.argv) + + +if __name__ == '__main__': + main() diff --git a/videocaller/videocaller/__init__.py b/videocaller/videocaller/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/videocaller/videocaller/asgi.py b/videocaller/videocaller/asgi.py new file mode 100644 index 0000000000000000000000000000000000000000..63121125d8936d135aee240986dfecf791cc69c3 --- /dev/null +++ b/videocaller/videocaller/asgi.py @@ -0,0 +1,16 @@ +""" +ASGI config for videocaller project. + +It exposes the ASGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/4.1/howto/deployment/asgi/ +""" + +import os + +from django.core.asgi import get_asgi_application + +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'videocaller.settings') + +application = get_asgi_application() diff --git a/videocaller/videocaller/settings.py b/videocaller/videocaller/settings.py new file mode 100644 index 0000000000000000000000000000000000000000..a3a6001855d7d2a1e8a5dc46c60a441c49cc1341 --- /dev/null +++ b/videocaller/videocaller/settings.py @@ -0,0 +1,262 @@ +""" +Django settings for videocaller project. + +Generated by 'django-admin startproject' using Django 4.1.5. + +For more information on this file, see +https://docs.djangoproject.com/en/4.1/topics/settings/ + +For the full list of settings and their values, see +https://docs.djangoproject.com/en/4.1/ref/settings/ +""" + +from pathlib import Path +import os +import dotenv +import dj_database_url + +dotenv.load_dotenv() + +# Build paths inside the project like this: BASE_DIR / 'subdir'. +BASE_DIR = Path(__file__).resolve().parent.parent + + +# Quick-start development settings - unsuitable for production +# See https://docs.djangoproject.com/en/4.1/howto/deployment/checklist/ + +# SECURITY WARNING: keep the secret key used in production secret! +SECRET_KEY = os.environ.get('DJANGO_SECRET_KEY', 'django-insecure-dev-key-change-in-production') + +# SECURITY WARNING: don't run with debug turned on in production! +DEBUG = os.environ.get('DJANGO_DEBUG', 'true').lower() == 'true' + +allowed_hosts = os.environ.get('DJANGO_ALLOWED_HOSTS', 'localhost,127.0.0.1') +ALLOWED_HOSTS = [host.strip() for host in allowed_hosts.split(',') if host.strip()] + +# Add Render.com domains automatically in production +if not DEBUG: + # Auto-add *.onrender.com and *.onrender.io for Render deployments + render_domains = ['onrender.com', 'onrender.io'] + current_host = os.environ.get('RENDER_EXTERNAL_HOSTNAME') + if current_host and current_host not in ALLOWED_HOSTS: + ALLOWED_HOSTS.append(current_host) + +# Add HuggingFace Spaces hostname (auto-set by HF runtime as SPACE_HOST) +hf_space_host = os.environ.get('SPACE_HOST') +if hf_space_host and hf_space_host not in ALLOWED_HOSTS: + ALLOWED_HOSTS.append(hf_space_host) + +# Trust Render.com proxy headers +if not DEBUG: + CSRF_TRUSTED_ORIGINS = [f'https://{host}' for host in ALLOWED_HOSTS if host not in ['localhost', '127.0.0.1']] + SECURE_PROXY_SSL_HEADER = ('HTTP_X_FORWARDED_PROTO', 'https') + SECURE_SSL_REDIRECT = True + SESSION_COOKIE_SECURE = True + CSRF_COOKIE_SECURE = True + +# Authentication redirects +LOGIN_REDIRECT_URL = 'home' # Redirect to home page after login +LOGOUT_REDIRECT_URL = 'login' # Redirect to login after logout +LOGIN_URL = 'login' # Where to redirect for @login_required + +# Application definition + +INSTALLED_APPS = [ + 'django.contrib.admin', + 'django.contrib.auth', + 'django.contrib.contenttypes', + 'django.contrib.sessions', + 'django.contrib.messages', + 'django.contrib.staticfiles', + 'django_q', + + # Your agora app comes here + 'agora' +] + +MIDDLEWARE = [ + 'django.middleware.security.SecurityMiddleware', + 'whitenoise.middleware.WhiteNoiseMiddleware', + 'django.contrib.sessions.middleware.SessionMiddleware', + 'django.middleware.common.CommonMiddleware', + 'django.middleware.csrf.CsrfViewMiddleware', + 'django.contrib.auth.middleware.AuthenticationMiddleware', + 'django.contrib.messages.middleware.MessageMiddleware', + 'django.middleware.clickjacking.XFrameOptionsMiddleware', +] + +ROOT_URLCONF = 'videocaller.urls' + +TEMPLATES = [ + { + 'BACKEND': 'django.template.backends.django.DjangoTemplates', + 'DIRS': [], + 'APP_DIRS': True, + 'OPTIONS': { + 'context_processors': [ + 'django.template.context_processors.debug', + 'django.template.context_processors.request', + 'django.contrib.auth.context_processors.auth', + 'django.contrib.messages.context_processors.messages', + ], + }, + }, +] + +WSGI_APPLICATION = 'videocaller.wsgi.application' +ASGI_APPLICATION = 'videocaller.asgi.application' + + +# Database +# https://docs.djangoproject.com/en/4.1/ref/settings/#databases + +# Use PostgreSQL on production (Render), SQLite for local dev +database_url = os.environ.get('DATABASE_URL', '').strip() +if database_url: + DATABASES = { + 'default': dj_database_url.config( + default=database_url, + conn_max_age=600, + conn_health_checks=True, + ) + } +else: + DATABASES = { + 'default': { + 'ENGINE': 'django.db.backends.sqlite3', + 'NAME': BASE_DIR / 'db.sqlite3', + } + } + + +# Password validation +# https://docs.djangoproject.com/en/4.1/ref/settings/#auth-password-validators + +AUTH_PASSWORD_VALIDATORS = [ + { + 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', + }, +] + + +# Internationalization +# https://docs.djangoproject.com/en/4.1/topics/i18n/ + +LANGUAGE_CODE = 'en-us' + +TIME_ZONE = 'UTC' + +USE_I18N = True + +USE_TZ = True + + +# Static files (CSS, JavaScript, Images) +# https://docs.djangoproject.com/en/4.1/howto/static-files/ + +STATIC_URL = '/static/' +STATIC_ROOT = BASE_DIR / 'staticfiles' + +# WhiteNoise configuration for serving static files +STATICFILES_STORAGE = 'whitenoise.storage.CompressedManifestStaticFilesStorage' + +# Media files (User uploads) +MEDIA_URL = '/media/' +MEDIA_ROOT = BASE_DIR / 'media' + +# AWS S3 Configuration +AWS_ACCESS_KEY_ID = os.environ.get('AWS_ACCESS_KEY_ID') +AWS_SECRET_ACCESS_KEY = os.environ.get('AWS_SECRET_ACCESS_KEY') +AWS_STORAGE_BUCKET_NAME = os.environ.get('AWS_STORAGE_BUCKET_NAME') +AWS_S3_REGION_NAME = os.environ.get('AWS_S3_REGION_NAME', 'us-east-1') + +# Agora Cloud Recording Configuration +AGORA_APP_ID = os.environ.get('AGORA_APP_ID') +AGORA_APP_CERTIFICATE = os.environ.get('AGORA_APP_CERTIFICATE') +AGORA_CUSTOMER_ID = os.environ.get('AGORA_CUSTOMER_ID') +AGORA_CUSTOMER_SECRET = os.environ.get('AGORA_CUSTOMER_SECRET') +AGORA_RECORDING_REGION = os.environ.get('AGORA_RECORDING_REGION', 'NA') + +# AssemblyAI Configuration +ASSEMBLYAI_API_KEY = os.environ.get('ASSEMBLYAI_API_KEY') + +# Google Gemini Configuration for RAG +GOOGLE_API_KEY = os.environ.get('GOOGLE_API_KEY') +GOOGLE_EMBEDDING_MODEL = os.environ.get('GOOGLE_EMBEDDING_MODEL', 'models/embedding-001') +GOOGLE_EMBEDDING_DIMENSION = int(os.environ.get('GOOGLE_EMBEDDING_DIMENSION', 768)) +GOOGLE_LLM_MODEL = os.environ.get('GOOGLE_LLM_MODEL', 'gemini-pro') +GOOGLE_GENERATE_MODEL = os.environ.get('GOOGLE_GENERATE_MODEL', 'gemini-2.5-flash-lite') +GOOGLE_CONNECT_TIMEOUT = int(os.environ.get('GOOGLE_CONNECT_TIMEOUT', 10)) +GOOGLE_READ_TIMEOUT = int(os.environ.get('GOOGLE_READ_TIMEOUT', 600)) +GOOGLE_MAX_TOKENS = int(os.environ.get('GOOGLE_MAX_TOKENS', 1000)) + +# Ollama Configuration for RAG +def _get_bool_env(name, default=False): + value = os.environ.get(name) + if value is None: + return default + return value.strip().lower() in {'1', 'true', 'yes', 'on'} + +OLLAMA_URL = os.environ.get('OLLAMA_URL', 'http://localhost:11434') +OLLAMA_MODEL = os.environ.get('OLLAMA_MODEL', 'mistral') +OLLAMA_CONNECT_TIMEOUT = int(os.environ.get('OLLAMA_CONNECT_TIMEOUT', 10)) +OLLAMA_READ_TIMEOUT = int(os.environ.get('OLLAMA_READ_TIMEOUT', 600)) +OLLAMA_NUM_PREDICT = int(os.environ.get('OLLAMA_NUM_PREDICT', 1024)) +OLLAMA_STREAM = _get_bool_env('OLLAMA_STREAM', True) + +# Hugging Face Embeddings +HF_EMBEDDING_MODEL = os.environ.get('HF_EMBEDDING_MODEL', 'sentence-transformers/all-MiniLM-L6-v2') +HF_EMBEDDING_DIMENSION = int(os.environ.get('HF_EMBEDDING_DIMENSION', 384)) + +# Qdrant Vector Database Configuration +QDRANT_URL = os.environ.get('QDRANT_URL', 'http://localhost:6333') +QDRANT_API_KEY = os.environ.get('QDRANT_API_KEY') +QDRANT_COLLECTION_NAME = os.environ.get('QDRANT_COLLECTION_NAME', 'meeting_transcripts') + +# Default primary key field type +# https://docs.djangoproject.com/en/4.1/ref/settings/#default-auto-field + +DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField' + +# Django-Q configuration +from urllib.parse import urlparse + +redis_url = os.environ.get('REDIS_URL') +redis_config = None +if redis_url: + parsed = urlparse(redis_url) + # Render Redis URLs use rediss:// (SSL) + redis_config = { + 'host': parsed.hostname, + 'port': parsed.port or 6379, + 'db': int((parsed.path or '/0').lstrip('/') or 0), + 'password': parsed.password, + 'ssl': parsed.scheme == 'rediss', + 'ssl_cert_reqs': None if parsed.scheme == 'rediss' else False + } + +Q_CLUSTER = { + 'name': 'videocaller', + 'workers': 2, + 'recycle': 500, + 'timeout': 1200, + 'retry': 1300, + 'queue_limit': 50, + 'bulk': 5, +} + +if redis_config: + # Use Redis broker when REDIS_URL is provided (recommended for production) + Q_CLUSTER['redis'] = redis_config +else: + # Fall back to database-backed ORM broker when no Redis is available + Q_CLUSTER['orm'] = 'default' diff --git a/videocaller/videocaller/urls.py b/videocaller/videocaller/urls.py new file mode 100644 index 0000000000000000000000000000000000000000..287fbdac99830d31a22e7d62671f0fb4cddb9722 --- /dev/null +++ b/videocaller/videocaller/urls.py @@ -0,0 +1,22 @@ +"""videocaller URL Configuration + +The `urlpatterns` list routes URLs to views. For more information please see: + https://docs.djangoproject.com/en/4.1/topics/http/urls/ +Examples: +Function views + 1. Add an import: from my_app import views + 2. Add a URL to urlpatterns: path('', views.home, name='home') +Class-based views + 1. Add an import: from other_app.views import Home + 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home') +Including another URLconf + 1. Import the include() function: from django.urls import include, path + 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) +""" +from django.contrib import admin +from django.urls import path, include + +urlpatterns = [ + path('admin/', admin.site.urls), + path('', include('agora.urls')), +] diff --git a/videocaller/videocaller/wsgi.py b/videocaller/videocaller/wsgi.py new file mode 100644 index 0000000000000000000000000000000000000000..90bdc2b3cc85767bd2d07d9104849dfcbac5df98 --- /dev/null +++ b/videocaller/videocaller/wsgi.py @@ -0,0 +1,16 @@ +""" +WSGI config for videocaller project. + +It exposes the WSGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/4.1/howto/deployment/wsgi/ +""" + +import os + +from django.core.wsgi import get_wsgi_application + +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'videocaller.settings') + +application = get_wsgi_application() diff --git a/worker.sh b/worker.sh new file mode 100644 index 0000000000000000000000000000000000000000..0985de20d9425999327db70b4b48a9f98ecc9879 --- /dev/null +++ b/worker.sh @@ -0,0 +1,7 @@ +#!/bin/bash +set -e + +cd /app/videocaller + +# Start Django-Q worker +exec python manage.py qcluster