prashantdubeypng commited on
Commit
4db0a21
·
0 Parent(s):

Deploy Aimeet to HuggingFace Spaces

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .dockerignore +39 -0
  2. .github/workflows/ci.yml +165 -0
  3. .github/workflows/deploy.yml +41 -0
  4. .github/workflows/manual-deploy.yml +45 -0
  5. .github/workflows/security.yml +56 -0
  6. .gitignore +36 -0
  7. AWS_DEPLOYMENT.md +745 -0
  8. CI_CD_SETUP.md +296 -0
  9. DEPLOYMENT.md +149 -0
  10. DESIGN.md +993 -0
  11. Dockerfile +34 -0
  12. Dockerfile.huggingface +34 -0
  13. Dockerfile.worker +27 -0
  14. HF_DEPLOYMENT.md +147 -0
  15. Procfile +2 -0
  16. README.md +46 -0
  17. REQUIREMENTS.md +472 -0
  18. build.sh +16 -0
  19. docker-compose.yml +74 -0
  20. hf_space_README.md +46 -0
  21. nginx.conf +40 -0
  22. render.yaml +135 -0
  23. requirements-windows.txt +4 -0
  24. requirements.txt +24 -0
  25. runtime.txt +1 -0
  26. start.hf.sh +25 -0
  27. start.sh +13 -0
  28. videocaller/agora/__init__.py +0 -0
  29. videocaller/agora/admin.py +3 -0
  30. videocaller/agora/agenda_utils.py +90 -0
  31. videocaller/agora/agora_key/AccessToken.py +182 -0
  32. videocaller/agora/agora_key/RtcTokenBuilder.py +53 -0
  33. videocaller/agora/apps.py +6 -0
  34. videocaller/agora/assemblyai_utils.py +40 -0
  35. videocaller/agora/document_processing.py +188 -0
  36. videocaller/agora/embedding_utils.py +280 -0
  37. videocaller/agora/migrations/0001_initial.py +35 -0
  38. videocaller/agora/migrations/0002_meetingroom_recording_duration_and_more.py +33 -0
  39. videocaller/agora/migrations/0003_remove_meetingroom_recording_file_and_more.py +51 -0
  40. videocaller/agora/migrations/0004_chatmessage.py +28 -0
  41. videocaller/agora/migrations/0005_meetingroom_transcript_id_and_more.py +28 -0
  42. videocaller/agora/migrations/0006_meetingroom_chunks_created_at_and_more.py +62 -0
  43. videocaller/agora/migrations/0007_document_uploads.py +53 -0
  44. videocaller/agora/migrations/0008_document_upload_storage_path.py +19 -0
  45. videocaller/agora/migrations/0009_split_meeting_models.py +135 -0
  46. videocaller/agora/migrations/0010_documentupload_chunk_count.py +18 -0
  47. videocaller/agora/migrations/0011_meeting_agenda_point.py +26 -0
  48. videocaller/agora/migrations/0012_alter_meetingroom_room_code.py +15 -0
  49. videocaller/agora/migrations/__init__.py +0 -0
  50. videocaller/agora/models.py +191 -0
.dockerignore ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ env/
2
+ venv/
3
+ *.pyc
4
+ __pycache__/
5
+ *.pyo
6
+ *.pyd
7
+ .Python
8
+ db.sqlite3
9
+ db.sqlite3-journal
10
+ *.log
11
+ .env
12
+ .env.local
13
+ *.pot
14
+ *.mo
15
+ .git/
16
+ .github/
17
+ .gitignore
18
+ *.md
19
+ README.md
20
+ DEPLOYMENT.md
21
+ CI_CD_SETUP.md
22
+ AWS_DEPLOYMENT.md
23
+ REQUIREMENTS.md
24
+ DESIGN.md
25
+ build.sh
26
+ .vscode/
27
+ .idea/
28
+ *.swp
29
+ *.swo
30
+ *~
31
+ .DS_Store
32
+ Thumbs.db
33
+ render.yaml
34
+ Procfile
35
+ runtime.txt
36
+ requirements-windows.txt
37
+ media/
38
+ staticfiles/
39
+ node_modules/
.github/workflows/ci.yml ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: CI - Tests and Checks
2
+
3
+ on:
4
+ push:
5
+ branches: [ master, develop ]
6
+ pull_request:
7
+ branches: [ master, develop ]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+
13
+ services:
14
+ postgres:
15
+ image: postgres:15
16
+ env:
17
+ POSTGRES_USER: postgres
18
+ POSTGRES_PASSWORD: postgres
19
+ POSTGRES_DB: test_db
20
+ options: >-
21
+ --health-cmd pg_isready
22
+ --health-interval 10s
23
+ --health-timeout 5s
24
+ --health-retries 5
25
+ ports:
26
+ - 5432:5432
27
+
28
+ redis:
29
+ image: redis:7
30
+ options: >-
31
+ --health-cmd "redis-cli ping"
32
+ --health-interval 10s
33
+ --health-timeout 5s
34
+ --health-retries 5
35
+ ports:
36
+ - 6379:6379
37
+
38
+ steps:
39
+ - name: Checkout code
40
+ uses: actions/checkout@v4
41
+
42
+ - name: Set up Python 3.11
43
+ uses: actions/setup-python@v5
44
+ with:
45
+ python-version: '3.11'
46
+ cache: 'pip'
47
+
48
+ - name: Install dependencies
49
+ run: |
50
+ python -m pip install --upgrade pip
51
+ pip install -r requirements.txt
52
+ pip install flake8 black isort
53
+
54
+ - name: Run linting (flake8)
55
+ run: |
56
+ # Stop the build if there are Python syntax errors or undefined names
57
+ flake8 videocaller/agora --count --select=E9,F63,F7,F82 --show-source --statistics
58
+ # Exit-zero treats all errors as warnings
59
+ flake8 videocaller/agora --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
60
+ continue-on-error: true
61
+
62
+ - name: Check code formatting (black)
63
+ run: |
64
+ black --check videocaller/agora
65
+ continue-on-error: true
66
+
67
+ - name: Check import sorting (isort)
68
+ run: |
69
+ isort --check-only videocaller/agora
70
+ continue-on-error: true
71
+
72
+ - name: Run migrations
73
+ env:
74
+ DATABASE_URL: postgresql://postgres:postgres@localhost:5432/test_db
75
+ REDIS_URL: redis://localhost:6379/0
76
+ DJANGO_SECRET_KEY: test-secret-key-for-ci
77
+ DJANGO_DEBUG: 'true'
78
+ PUSHER_APP_ID: 'test-app-id'
79
+ PUSHER_KEY: 'test-key'
80
+ PUSHER_SECRET: 'test-secret'
81
+ PUSHER_CLUSTER: 'test-cluster'
82
+ run: |
83
+ cd videocaller
84
+ python manage.py migrate --no-input
85
+
86
+ - name: Run Django tests
87
+ env:
88
+ DATABASE_URL: postgresql://postgres:postgres@localhost:5432/test_db
89
+ REDIS_URL: redis://localhost:6379/0
90
+ DJANGO_SECRET_KEY: test-secret-key-for-ci
91
+ DJANGO_DEBUG: 'true'
92
+ PUSHER_APP_ID: 'test-app-id'
93
+ PUSHER_KEY: 'test-key'
94
+ PUSHER_SECRET: 'test-secret'
95
+ PUSHER_CLUSTER: 'test-cluster'
96
+ GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
97
+ QDRANT_URL: ${{ secrets.QDRANT_URL }}
98
+ QDRANT_API_KEY: ${{ secrets.QDRANT_API_KEY }}
99
+ run: |
100
+ cd videocaller
101
+ python manage.py test
102
+ continue-on-error: true
103
+
104
+ - name: Check for missing migrations
105
+ env:
106
+ DATABASE_URL: postgresql://postgres:postgres@localhost:5432/test_db
107
+ DJANGO_SECRET_KEY: test-secret-key-for-ci
108
+ PUSHER_APP_ID: 'test-app-id'
109
+ PUSHER_KEY: 'test-key'
110
+ PUSHER_SECRET: 'test-secret'
111
+ PUSHER_CLUSTER: 'test-cluster'
112
+ run: |
113
+ cd videocaller
114
+ python manage.py makemigrations --check --dry-run --no-input
115
+
116
+ security:
117
+ runs-on: ubuntu-latest
118
+
119
+ steps:
120
+ - name: Checkout code
121
+ uses: actions/checkout@v4
122
+
123
+ - name: Set up Python 3.11
124
+ uses: actions/setup-python@v5
125
+ with:
126
+ python-version: '3.11'
127
+
128
+ - name: Install safety
129
+ run: |
130
+ pip install safety
131
+
132
+ - name: Check for security vulnerabilities
133
+ run: |
134
+ safety check --json -r requirements.txt || true
135
+ continue-on-error: true
136
+
137
+ build:
138
+ runs-on: ubuntu-latest
139
+ needs: [test, security]
140
+
141
+ steps:
142
+ - name: Checkout code
143
+ uses: actions/checkout@v4
144
+
145
+ - name: Set up Python 3.11
146
+ uses: actions/setup-python@v5
147
+ with:
148
+ python-version: '3.11'
149
+ cache: 'pip'
150
+
151
+ - name: Install dependencies
152
+ run: |
153
+ python -m pip install --upgrade pip
154
+ pip install -r requirements.txt
155
+
156
+ - name: Collect static files
157
+ env:
158
+ DJANGO_SECRET_KEY: test-secret-key-for-ci
159
+ DJANGO_DEBUG: 'false'
160
+ run: |
161
+ cd videocaller
162
+ python manage.py collectstatic --no-input
163
+
164
+ - name: Build validation complete
165
+ run: echo "✅ All checks passed!"
.github/workflows/deploy.yml ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: CD - Deploy to Render
2
+
3
+ on:
4
+ push:
5
+ branches: [ master ]
6
+ workflow_dispatch:
7
+
8
+ jobs:
9
+ deploy:
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - name: Checkout code
14
+ uses: actions/checkout@v4
15
+
16
+ - name: Deploy to Render
17
+ uses: johnbeynon/render-deploy-action@v0.0.8
18
+ with:
19
+ service-id: ${{ secrets.RENDER_SERVICE_ID }}
20
+ api-key: ${{ secrets.RENDER_API_KEY }}
21
+
22
+ - name: Wait for deployment
23
+ run: |
24
+ echo "⏳ Waiting for Render deployment to complete..."
25
+ sleep 60
26
+
27
+ - name: Health check
28
+ run: |
29
+ echo "🔍 Running health check..."
30
+ response=$(curl -s -o /dev/null -w "%{http_code}" ${{ secrets.RENDER_APP_URL }}/api/health/google/ || echo "000")
31
+ if [ "$response" = "200" ] || [ "$response" = "503" ]; then
32
+ echo "✅ App is responding (HTTP $response)"
33
+ else
34
+ echo "⚠️ App returned HTTP $response - may need investigation"
35
+ exit 0
36
+ fi
37
+
38
+ - name: Deployment notification
39
+ run: |
40
+ echo "🚀 Deployment to Render completed!"
41
+ echo "🌐 App URL: ${{ secrets.RENDER_APP_URL }}"
.github/workflows/manual-deploy.yml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Manual Deploy
2
+
3
+ on:
4
+ workflow_dispatch:
5
+ inputs:
6
+ environment:
7
+ description: 'Environment to deploy to'
8
+ required: true
9
+ default: 'production'
10
+ type: choice
11
+ options:
12
+ - production
13
+ - staging
14
+
15
+ jobs:
16
+ deploy:
17
+ runs-on: ubuntu-latest
18
+ environment: ${{ github.event.inputs.environment }}
19
+
20
+ steps:
21
+ - name: Checkout code
22
+ uses: actions/checkout@v4
23
+
24
+ - name: Set deployment message
25
+ run: |
26
+ echo "🚀 Deploying to ${{ github.event.inputs.environment }}..."
27
+
28
+ - name: Deploy to Render
29
+ uses: johnbeynon/render-deploy-action@v0.0.8
30
+ with:
31
+ service-id: ${{ secrets.RENDER_SERVICE_ID }}
32
+ api-key: ${{ secrets.RENDER_API_KEY }}
33
+
34
+ - name: Post-deployment health check
35
+ run: |
36
+ echo "⏳ Waiting for deployment..."
37
+ sleep 60
38
+ echo "🔍 Running health check..."
39
+ response=$(curl -s -o /dev/null -w "%{http_code}" ${{ secrets.RENDER_APP_URL }}/api/health/google/ || echo "000")
40
+ echo "Health check returned: HTTP $response"
41
+
42
+ - name: Deployment complete
43
+ run: |
44
+ echo "✅ Deployment to ${{ github.event.inputs.environment }} completed!"
45
+ echo "🌐 App URL: ${{ secrets.RENDER_APP_URL }}"
.github/workflows/security.yml ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Weekly Security Scan
2
+
3
+ on:
4
+ schedule:
5
+ # Run every Monday at 9 AM UTC
6
+ - cron: '0 9 * * 1'
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ security-scan:
11
+ runs-on: ubuntu-latest
12
+
13
+ steps:
14
+ - name: Checkout code
15
+ uses: actions/checkout@v4
16
+
17
+ - name: Set up Python 3.11
18
+ uses: actions/setup-python@v5
19
+ with:
20
+ python-version: '3.11'
21
+
22
+ - name: Install security tools
23
+ run: |
24
+ pip install safety bandit
25
+
26
+ - name: Run Safety check
27
+ run: |
28
+ safety check --json -r requirements.txt > safety-report.json || true
29
+ cat safety-report.json
30
+
31
+ - name: Run Bandit security linter
32
+ run: |
33
+ bandit -r videocaller/agora -f json -o bandit-report.json || true
34
+ cat bandit-report.json
35
+
36
+ - name: Upload security reports
37
+ uses: actions/upload-artifact@v4
38
+ with:
39
+ name: security-reports
40
+ path: |
41
+ safety-report.json
42
+ bandit-report.json
43
+ retention-days: 30
44
+
45
+ - name: Create issue if vulnerabilities found
46
+ if: failure()
47
+ uses: actions/github-script@v7
48
+ with:
49
+ script: |
50
+ github.rest.issues.create({
51
+ owner: context.repo.owner,
52
+ repo: context.repo.repo,
53
+ title: '🚨 Security Vulnerabilities Detected',
54
+ body: 'Security scan found vulnerabilities. Check the workflow artifacts for details.',
55
+ labels: ['security', 'automated']
56
+ })
.gitignore ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.pyd
5
+ *.so
6
+ *.egg-info/
7
+ .dist/
8
+ .build/
9
+ .venv/
10
+ venv/
11
+ ENV/
12
+ env/
13
+
14
+ # Django
15
+ *.sqlite3
16
+ /media/
17
+ /staticfiles/
18
+
19
+ # Environment variables
20
+ .env
21
+ **/.env
22
+
23
+ # Logs
24
+ *.log
25
+ *.webm
26
+ # Media uploads
27
+ *.pdf
28
+ *.mp3
29
+
30
+ # OS files
31
+ .DS_Store
32
+ Thumbs.db
33
+
34
+ # IDE
35
+ .vscode/
36
+ .idea/
AWS_DEPLOYMENT.md ADDED
@@ -0,0 +1,745 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AWS Deployment Guide - Complete Steps
2
+
3
+ ## Prerequisites
4
+
5
+ ### 1. AWS Account Setup
6
+ - [ ] Create AWS account at https://aws.amazon.com
7
+ - [ ] Enable billing alerts
8
+ - [ ] Create IAM user with admin access (don't use root)
9
+ - [ ] Install AWS CLI: `aws configure`
10
+
11
+ ### 2. Local Requirements
12
+ - [ ] Git installed
13
+ - [ ] Docker installed (for testing containers)
14
+ - [ ] AWS CLI installed
15
+ - [ ] EB CLI installed: `pip install awsebcli`
16
+
17
+ ---
18
+
19
+ ## Step 1: Prepare Your Application
20
+
21
+ ### A. Create Production Requirements
22
+ ```bash
23
+ # Already done - verify requirements.txt has:
24
+ # - psycopg2-binary (PostgreSQL)
25
+ # - gunicorn (WSGI server)
26
+ # - whitenoise (static files)
27
+ # - dj-database-url (DB config)
28
+ ```
29
+
30
+ ### B. Create Dockerfile
31
+ ```dockerfile
32
+ FROM python:3.11-slim
33
+
34
+ # Set environment variables
35
+ ENV PYTHONUNBUFFERED=1 \
36
+ PYTHONDONTWRITEBYTECODE=1 \
37
+ PIP_NO_CACHE_DIR=1
38
+
39
+ # Install system dependencies
40
+ RUN apt-get update && apt-get install -y \
41
+ gcc \
42
+ postgresql-client \
43
+ && rm -rf /var/lib/apt/lists/*
44
+
45
+ # Set work directory
46
+ WORKDIR /app
47
+
48
+ # Install Python dependencies
49
+ COPY requirements.txt .
50
+ RUN pip install --upgrade pip && \
51
+ pip install -r requirements.txt
52
+
53
+ # Copy project
54
+ COPY videocaller/ ./videocaller/
55
+
56
+ # Collect static files
57
+ WORKDIR /app/videocaller
58
+ RUN python manage.py collectstatic --no-input
59
+
60
+ # Run migrations and start server
61
+ CMD ["daphne", "-b", "0.0.0.0", "-p", "8000", "videocaller.asgi:application"]
62
+ ```
63
+
64
+ ### C. Create .dockerignore
65
+ ```
66
+ env/
67
+ venv/
68
+ *.pyc
69
+ __pycache__/
70
+ db.sqlite3
71
+ .env
72
+ .git/
73
+ .github/
74
+ *.md
75
+ build.sh
76
+ ```
77
+
78
+ ---
79
+
80
+ ## Step 2: AWS Services Setup
81
+
82
+ ### A. Create RDS PostgreSQL Database
83
+
84
+ 1. **Go to RDS Console**
85
+ - Navigate to https://console.aws.amazon.com/rds
86
+
87
+ 2. **Create Database**
88
+ ```
89
+ Choose: PostgreSQL 15
90
+ Template: Free tier (or Production for real apps)
91
+
92
+ Settings:
93
+ - DB instance identifier: aimeet-db
94
+ - Master username: postgres
95
+ - Master password: [generate strong password]
96
+
97
+ Instance configuration:
98
+ - DB instance class: db.t3.micro (free tier)
99
+
100
+ Storage:
101
+ - Allocated storage: 20 GB
102
+ - Storage autoscaling: Enable
103
+
104
+ Connectivity:
105
+ - VPC: Default
106
+ - Public access: Yes (for now, restrict later)
107
+ - VPC security group: Create new
108
+ - Database port: 5432
109
+
110
+ Database authentication:
111
+ - Password authentication
112
+
113
+ Additional configuration:
114
+ - Initial database name: aimeet
115
+ - Automated backups: Enable (7 days retention)
116
+ ```
117
+
118
+ 3. **Note the endpoint** (e.g., `aimeet-db.xxxxx.us-east-1.rds.amazonaws.com`)
119
+
120
+ ### B. Create ElastiCache Redis
121
+
122
+ 1. **Go to ElastiCache Console**
123
+ - Navigate to https://console.aws.amazon.com/elasticache
124
+
125
+ 2. **Create Redis Cluster**
126
+ ```
127
+ Cluster mode: Disabled
128
+ Engine: Redis 7.x
129
+
130
+ Cluster info:
131
+ - Name: aimeet-redis
132
+ - Engine version: 7.0
133
+ - Port: 6379
134
+ - Node type: cache.t3.micro
135
+ - Number of replicas: 0 (for dev/test)
136
+
137
+ Subnet group: Default
138
+ Security groups: Create new or use default
139
+ ```
140
+
141
+ 3. **Note the endpoint** (e.g., `aimeet-redis.xxxxx.cache.amazonaws.com:6379`)
142
+
143
+ ### C. Create S3 Bucket (Already exists for recordings)
144
+
145
+ 1. **Verify your S3 bucket** from AGORA_STORAGE_BUCKET_NAME
146
+ 2. **Set CORS policy** if needed for uploads
147
+
148
+ ### D. Create Application Load Balancer (ALB)
149
+
150
+ 1. **Go to EC2 > Load Balancers**
151
+
152
+ 2. **Create ALB**
153
+ ```
154
+ Type: Application Load Balancer
155
+ Name: aimeet-alb
156
+ Scheme: Internet-facing
157
+ IP address type: IPv4
158
+
159
+ Network mapping:
160
+ - VPC: Default
161
+ - Availability Zones: Select 2+ zones
162
+
163
+ Security groups: Create new
164
+ - Name: aimeet-alb-sg
165
+ - Inbound: HTTP (80), HTTPS (443)
166
+
167
+ Listeners:
168
+ - HTTP:80 → Forward to target group (create below)
169
+ - HTTPS:443 → Forward to target group (needs SSL cert)
170
+ ```
171
+
172
+ 3. **Create Target Group**
173
+ ```
174
+ Type: Instances
175
+ Name: aimeet-targets
176
+ Protocol: HTTP
177
+ Port: 8000
178
+ VPC: Default
179
+
180
+ Health check:
181
+ - Protocol: HTTP
182
+ - Path: /
183
+ - Interval: 30 seconds
184
+ ```
185
+
186
+ ### E. Request SSL Certificate (ACM)
187
+
188
+ 1. **Go to Certificate Manager**
189
+ - Region: Same as ALB
190
+
191
+ 2. **Request Certificate**
192
+ ```
193
+ Domain: yourdomain.com
194
+ Validation: DNS (recommended)
195
+
196
+ Add to DNS:
197
+ - Copy CNAME records to your domain registrar
198
+ - Wait for validation (~5-30 minutes)
199
+ ```
200
+
201
+ 3. **Attach to ALB**
202
+ - Edit HTTPS:443 listener
203
+ - Select your certificate
204
+
205
+ ---
206
+
207
+ ## Step 3: Deploy Application
208
+
209
+ ### Option A: Deploy with Elastic Beanstalk (Easiest)
210
+
211
+ #### A.1 Initialize EB
212
+ ```bash
213
+ cd c:\dev\Django-VIdeocall-App
214
+
215
+ # Initialize
216
+ eb init
217
+
218
+ # Prompts:
219
+ # Region: us-east-1
220
+ # Application name: aimeet
221
+ # Platform: Docker
222
+ # SSH: Yes (generate keypair)
223
+ ```
224
+
225
+ #### A.2 Create Environment
226
+ ```bash
227
+ eb create production
228
+
229
+ # Environment name: aimeet-production
230
+ # DNS CNAME: aimeet (will be aimeet.us-east-1.elasticbeanstalk.com)
231
+ # Load balancer: Application
232
+ ```
233
+
234
+ #### A.3 Configure Environment Variables
235
+ ```bash
236
+ eb setenv \
237
+ DJANGO_SECRET_KEY="your-secret-key" \
238
+ DJANGO_DEBUG="false" \
239
+ DJANGO_ALLOWED_HOSTS="aimeet-production.us-east-1.elasticbeanstalk.com,yourdomain.com" \
240
+ DATABASE_URL="postgresql://postgres:password@aimeet-db.xxxxx.us-east-1.rds.amazonaws.com:5432/aimeet" \
241
+ REDIS_URL="redis://aimeet-redis.xxxxx.cache.amazonaws.com:6379/0" \
242
+ AWS_ACCESS_KEY_ID="your-key" \
243
+ AWS_SECRET_ACCESS_KEY="your-secret" \
244
+ AWS_STORAGE_BUCKET_NAME="your-bucket" \
245
+ AGORA_APP_ID="..." \
246
+ AGORA_APP_Certificate="..." \
247
+ ASSEMBLYAI_API_KEY="..." \
248
+ GOOGLE_API_KEY="..." \
249
+ QDRANT_URL="..." \
250
+ QDRANT_API_KEY="..." \
251
+ PUSHER_APP_ID="..." \
252
+ PUSHER_KEY="..." \
253
+ PUSHER_SECRET="..." \
254
+ PUSHER_CLUSTER="..."
255
+ ```
256
+
257
+ #### A.4 Deploy
258
+ ```bash
259
+ eb deploy
260
+ ```
261
+
262
+ #### A.5 Run Migrations
263
+ ```bash
264
+ eb ssh
265
+ cd /var/app/current/videocaller
266
+ python manage.py migrate
267
+ python manage.py createsuperuser
268
+ exit
269
+ ```
270
+
271
+ ### Option B: Deploy with ECS Fargate (More scalable)
272
+
273
+ #### B.1 Build and Push Docker Image
274
+ ```bash
275
+ # Build
276
+ docker build -t aimeet:latest .
277
+
278
+ # Tag for ECR
279
+ aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin YOUR_ACCOUNT_ID.dkr.ecr.us-east-1.amazonaws.com
280
+
281
+ # Create repository
282
+ aws ecr create-repository --repository-name aimeet --region us-east-1
283
+
284
+ # Tag and push
285
+ docker tag aimeet:latest YOUR_ACCOUNT_ID.dkr.ecr.us-east-1.amazonaws.com/aimeet:latest
286
+ docker push YOUR_ACCOUNT_ID.dkr.ecr.us-east-1.amazonaws.com/aimeet:latest
287
+ ```
288
+
289
+ #### B.2 Create ECS Cluster
290
+ ```bash
291
+ aws ecs create-cluster --cluster-name aimeet-cluster --region us-east-1
292
+ ```
293
+
294
+ #### B.3 Create Task Definition
295
+ Create `ecs-task-definition.json`:
296
+ ```json
297
+ {
298
+ "family": "aimeet-task",
299
+ "networkMode": "awsvpc",
300
+ "requiresCompatibilities": ["FARGATE"],
301
+ "cpu": "512",
302
+ "memory": "1024",
303
+ "containerDefinitions": [
304
+ {
305
+ "name": "aimeet-web",
306
+ "image": "YOUR_ACCOUNT_ID.dkr.ecr.us-east-1.amazonaws.com/aimeet:latest",
307
+ "portMappings": [
308
+ {
309
+ "containerPort": 8000,
310
+ "protocol": "tcp"
311
+ }
312
+ ],
313
+ "environment": [
314
+ {"name": "DJANGO_DEBUG", "value": "false"}
315
+ ],
316
+ "secrets": [
317
+ {"name": "DJANGO_SECRET_KEY", "valueFrom": "arn:aws:secretsmanager:..."},
318
+ {"name": "DATABASE_URL", "valueFrom": "arn:aws:secretsmanager:..."}
319
+ ],
320
+ "logConfiguration": {
321
+ "logDriver": "awslogs",
322
+ "options": {
323
+ "awslogs-group": "/ecs/aimeet",
324
+ "awslogs-region": "us-east-1",
325
+ "awslogs-stream-prefix": "ecs"
326
+ }
327
+ }
328
+ }
329
+ ]
330
+ }
331
+ ```
332
+
333
+ Register:
334
+ ```bash
335
+ aws ecs register-task-definition --cli-input-json file://ecs-task-definition.json
336
+ ```
337
+
338
+ #### B.4 Create ECS Service
339
+ ```bash
340
+ aws ecs create-service \
341
+ --cluster aimeet-cluster \
342
+ --service-name aimeet-service \
343
+ --task-definition aimeet-task \
344
+ --desired-count 2 \
345
+ --launch-type FARGATE \
346
+ --network-configuration "awsvpcConfiguration={subnets=[subnet-xxx,subnet-yyy],securityGroups=[sg-xxx],assignPublicIp=ENABLED}" \
347
+ --load-balancers "targetGroupArn=arn:aws:elasticloadbalancing:...,containerName=aimeet-web,containerPort=8000"
348
+ ```
349
+
350
+ ### Option C: Deploy on EC2 (Manual, most control)
351
+
352
+ #### C.1 Launch EC2 Instance
353
+ ```
354
+ AMI: Ubuntu 22.04 LTS
355
+ Instance type: t3.medium
356
+ Security group: Allow 22 (SSH), 8000 (Daphne), 80, 443
357
+ Key pair: Create/select for SSH
358
+ Storage: 30 GB
359
+ ```
360
+
361
+ #### C.2 SSH and Setup
362
+ ```bash
363
+ ssh -i your-key.pem ubuntu@your-ec2-ip
364
+
365
+ # Update system
366
+ sudo apt update && sudo apt upgrade -y
367
+
368
+ # Install dependencies
369
+ sudo apt install -y python3.11 python3.11-venv python3-pip git nginx postgresql-client redis-tools
370
+
371
+ # Clone repo
372
+ git clone https://github.com/prashantdubeypng/Aimeet.git
373
+ cd Aimeet
374
+
375
+ # Create virtual environment
376
+ python3.11 -m venv env
377
+ source env/bin/activate
378
+
379
+ # Install dependencies
380
+ pip install --upgrade pip
381
+ pip install -r requirements.txt
382
+
383
+ # Set environment variables
384
+ sudo nano /etc/environment
385
+ # Add all env vars
386
+
387
+ # Run migrations
388
+ cd videocaller
389
+ python manage.py migrate
390
+ python manage.py createsuperuser
391
+ python manage.py collectstatic --no-input
392
+
393
+ # Create systemd service
394
+ sudo nano /etc/systemd/system/aimeet.service
395
+ ```
396
+
397
+ `/etc/systemd/system/aimeet.service`:
398
+ ```ini
399
+ [Unit]
400
+ Description=AIMeet Daphne Service
401
+ After=network.target
402
+
403
+ [Service]
404
+ User=ubuntu
405
+ Group=ubuntu
406
+ WorkingDirectory=/home/ubuntu/Aimeet/videocaller
407
+ EnvironmentFile=/etc/environment
408
+ ExecStart=/home/ubuntu/Aimeet/env/bin/daphne -b 0.0.0.0 -p 8000 videocaller.asgi:application
409
+ Restart=always
410
+
411
+ [Install]
412
+ WantedBy=multi-user.target
413
+ ```
414
+
415
+ ```bash
416
+ # Start service
417
+ sudo systemctl daemon-reload
418
+ sudo systemctl enable aimeet
419
+ sudo systemctl start aimeet
420
+
421
+ # Configure Nginx
422
+ sudo nano /etc/nginx/sites-available/aimeet
423
+ ```
424
+
425
+ `/etc/nginx/sites-available/aimeet`:
426
+ ```nginx
427
+ upstream django {
428
+ server 127.0.0.1:8000;
429
+ }
430
+
431
+ server {
432
+ listen 80;
433
+ server_name yourdomain.com;
434
+
435
+ location / {
436
+ proxy_pass http://django;
437
+ proxy_http_version 1.1;
438
+ proxy_set_header Upgrade $http_upgrade;
439
+ proxy_set_header Connection "upgrade";
440
+ proxy_set_header Host $host;
441
+ proxy_set_header X-Real-IP $remote_addr;
442
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
443
+ proxy_set_header X-Forwarded-Proto $scheme;
444
+ }
445
+
446
+ location /static/ {
447
+ alias /home/ubuntu/Aimeet/videocaller/staticfiles/;
448
+ }
449
+
450
+ location /media/ {
451
+ alias /home/ubuntu/Aimeet/videocaller/media/;
452
+ }
453
+ }
454
+ ```
455
+
456
+ ```bash
457
+ # Enable site
458
+ sudo ln -s /etc/nginx/sites-available/aimeet /etc/nginx/sites-enabled/
459
+ sudo nginx -t
460
+ sudo systemctl restart nginx
461
+
462
+ # Install SSL (Let's Encrypt)
463
+ sudo apt install certbot python3-certbot-nginx
464
+ sudo certbot --nginx -d yourdomain.com
465
+ ```
466
+
467
+ ---
468
+
469
+ ## Step 4: Configure Domain (Route 53)
470
+
471
+ ### A. Create Hosted Zone
472
+ 1. Go to Route 53
473
+ 2. Create hosted zone: `yourdomain.com`
474
+ 3. Note the nameservers
475
+
476
+ ### B. Update Domain Registrar
477
+ 1. Go to your domain registrar (GoDaddy, Namecheap, etc.)
478
+ 2. Update nameservers to Route 53's NS records
479
+
480
+ ### C. Create DNS Records
481
+ ```
482
+ Type: A
483
+ Name: @ (or blank)
484
+ Value: [ALB DNS or EC2 Elastic IP]
485
+ TTL: 300
486
+
487
+ Type: CNAME
488
+ Name: www
489
+ Value: yourdomain.com
490
+ TTL: 300
491
+ ```
492
+
493
+ ---
494
+
495
+ ## Step 5: Setup CloudWatch Monitoring
496
+
497
+ ### A. Enable CloudWatch Logs
498
+ ```bash
499
+ # For EB
500
+ eb logs --cloudwatch-logs enable
501
+
502
+ # For ECS - already enabled in task definition
503
+
504
+ # For EC2 - install CloudWatch agent
505
+ wget https://s3.amazonaws.com/amazoncloudwatch-agent/ubuntu/amd64/latest/amazon-cloudwatch-agent.deb
506
+ sudo dpkg -i amazon-cloudwatch-agent.deb
507
+ ```
508
+
509
+ ### B. Create Alarms
510
+ ```
511
+ Metric: RDSCPUUtilization > 80%
512
+ Metric: ALBTargetResponseTime > 2s
513
+ Metric: EC2StatusCheckFailed
514
+ Action: Send SNS notification
515
+ ```
516
+
517
+ ---
518
+
519
+ ## Step 6: Setup Auto-Scaling (Optional)
520
+
521
+ ### For EB
522
+ ```bash
523
+ eb scale 2 # Start with 2 instances
524
+
525
+ # Configure auto-scaling
526
+ eb config
527
+ # Set min instances: 2
528
+ # Set max instances: 10
529
+ # Scaling trigger: CPU > 70%
530
+ ```
531
+
532
+ ### For ECS
533
+ ```bash
534
+ aws application-autoscaling register-scalable-target \
535
+ --service-namespace ecs \
536
+ --scalable-dimension ecs:service:DesiredCount \
537
+ --resource-id service/aimeet-cluster/aimeet-service \
538
+ --min-capacity 2 \
539
+ --max-capacity 10
540
+ ```
541
+
542
+ ---
543
+
544
+ ## Step 7: Backup Strategy
545
+
546
+ ### A. RDS Automated Backups
547
+ - Already enabled (7-day retention)
548
+ - Take manual snapshots before major changes
549
+
550
+ ### B. S3 Versioning
551
+ ```bash
552
+ aws s3api put-bucket-versioning \
553
+ --bucket your-bucket \
554
+ --versioning-configuration Status=Enabled
555
+ ```
556
+
557
+ ### C. Database Snapshots
558
+ ```bash
559
+ # Manual snapshot
560
+ aws rds create-db-snapshot \
561
+ --db-instance-identifier aimeet-db \
562
+ --db-snapshot-identifier aimeet-db-backup-$(date +%Y%m%d)
563
+ ```
564
+
565
+ ---
566
+
567
+ ## Step 8: Security Hardening
568
+
569
+ ### A. IAM Roles
570
+ - Create role for EC2/ECS with minimal permissions
571
+ - Don't use root credentials
572
+
573
+ ### B. Security Groups
574
+ ```
575
+ RDS Security Group:
576
+ - Inbound: PostgreSQL (5432) from EC2/ECS security group only
577
+
578
+ Redis Security Group:
579
+ - Inbound: Redis (6379) from EC2/ECS security group only
580
+
581
+ EC2/ECS Security Group:
582
+ - Inbound: HTTP/HTTPS from ALB only
583
+ - Inbound: SSH from your IP only
584
+
585
+ ALB Security Group:
586
+ - Inbound: HTTP (80), HTTPS (443) from 0.0.0.0/0
587
+ ```
588
+
589
+ ### C. Enable WAF (Optional)
590
+ ```
591
+ Go to AWS WAF
592
+ Create Web ACL
593
+ Attach to ALB
594
+ Enable managed rule sets:
595
+ - AWS-AWSManagedRulesCommonRuleSet
596
+ - AWS-AWSManagedRulesKnownBadInputsRuleSet
597
+ ```
598
+
599
+ ---
600
+
601
+ ## Step 9: CI/CD with GitHub Actions
602
+
603
+ Already configured in `.github/workflows/deploy.yml` but update for AWS:
604
+
605
+ ```yaml
606
+ name: Deploy to AWS
607
+
608
+ on:
609
+ push:
610
+ branches: [master]
611
+
612
+ jobs:
613
+ deploy:
614
+ runs-on: ubuntu-latest
615
+ steps:
616
+ - uses: actions/checkout@v4
617
+
618
+ - name: Configure AWS credentials
619
+ uses: aws-actions/configure-aws-credentials@v4
620
+ with:
621
+ aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
622
+ aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
623
+ aws-region: us-east-1
624
+
625
+ - name: Deploy to Elastic Beanstalk
626
+ run: |
627
+ pip install awsebcli
628
+ eb deploy production --staged
629
+ ```
630
+
631
+ ---
632
+
633
+ ## Step 10: Cost Optimization
634
+
635
+ ### Development/Testing
636
+ ```
637
+ RDS: db.t3.micro (Free tier eligible)
638
+ ElastiCache: cache.t3.micro
639
+ EC2: t3.small
640
+ Total: ~$50-70/month
641
+ ```
642
+
643
+ ### Production (Low traffic)
644
+ ```
645
+ RDS: db.t3.small with Multi-AZ
646
+ ElastiCache: cache.t3.small with replication
647
+ EC2/ECS: 2x t3.medium
648
+ ALB: ~$16/month
649
+ Total: ~$150-200/month
650
+ ```
651
+
652
+ ### Cost Saving Tips
653
+ - Use Reserved Instances (save 30-60%)
654
+ - Enable RDS auto-scaling storage
655
+ - Use S3 Intelligent-Tiering
656
+ - Set CloudWatch alarms for billing
657
+ - Use Spot Instances for non-critical workloads
658
+
659
+ ---
660
+
661
+ ## Troubleshooting
662
+
663
+ ### Database Connection Issues
664
+ ```bash
665
+ # Test from EC2
666
+ telnet aimeet-db.xxxxx.rds.amazonaws.com 5432
667
+
668
+ # Check security groups
669
+ # Ensure EC2 security group is allowed in RDS inbound rules
670
+ ```
671
+
672
+ ### Static Files Not Loading
673
+ ```bash
674
+ # Ensure STATIC_ROOT is set
675
+ # Run collectstatic
676
+ python manage.py collectstatic --no-input
677
+
678
+ # Check Nginx config
679
+ # Verify WhiteNoise middleware order
680
+ ```
681
+
682
+ ### WebSocket Connection Fails
683
+ ```bash
684
+ # Ensure ALB supports WebSocket
685
+ # Check target group health
686
+ # Verify Daphne is running (not Gunicorn)
687
+ ```
688
+
689
+ ---
690
+
691
+ ## Final Checklist
692
+
693
+ - [ ] RDS database created and accessible
694
+ - [ ] Redis cluster running
695
+ - [ ] S3 bucket configured
696
+ - [ ] Application deployed (EB/ECS/EC2)
697
+ - [ ] Migrations run
698
+ - [ ] Superuser created
699
+ - [ ] SSL certificate installed
700
+ - [ ] Domain pointed to ALB/EC2
701
+ - [ ] Environment variables set
702
+ - [ ] CloudWatch monitoring enabled
703
+ - [ ] Backups configured
704
+ - [ ] Security groups hardened
705
+ - [ ] CI/CD pipeline tested
706
+ - [ ] Cost alerts set
707
+
708
+ ---
709
+
710
+ ## Quick Deployment Commands
711
+
712
+ ```bash
713
+ # Elastic Beanstalk (recommended for quick start)
714
+ eb init
715
+ eb create production
716
+ eb setenv [all env vars]
717
+ eb deploy
718
+ eb ssh
719
+ cd /var/app/current/videocaller
720
+ python manage.py migrate
721
+ python manage.py createsuperuser
722
+
723
+ # Access logs
724
+ eb logs
725
+
726
+ # Check status
727
+ eb status
728
+
729
+ # Terminate (be careful!)
730
+ eb terminate production
731
+ ```
732
+
733
+ ---
734
+
735
+ **Estimated Setup Time:**
736
+ - EB Deploy: 2-3 hours
737
+ - ECS Deploy: 4-6 hours
738
+ - EC2 Manual: 6-8 hours
739
+
740
+ **Support Resources:**
741
+ - AWS Documentation: https://docs.aws.amazon.com
742
+ - Elastic Beanstalk Guide: https://docs.aws.amazon.com/elasticbeanstalk
743
+ - Django Deployment: https://docs.djangoproject.com/en/4.1/howto/deployment/
744
+
745
+ Good luck with your AWS deployment! 🚀
CI_CD_SETUP.md ADDED
@@ -0,0 +1,296 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CI/CD Setup Guide
2
+
3
+ ## Overview
4
+ This project uses **GitHub Actions** for Continuous Integration and Continuous Deployment.
5
+
6
+ ---
7
+
8
+ ## Workflows
9
+
10
+ ### 1. **CI - Tests and Checks** (`ci.yml`)
11
+ **Triggers:** Push or PR to `master` or `develop`
12
+
13
+ **What it does:**
14
+ - ✅ Runs Python linting (flake8)
15
+ - ✅ Checks code formatting (black, isort)
16
+ - ✅ Runs Django tests
17
+ - ✅ Checks for missing migrations
18
+ - ✅ Security vulnerability scan
19
+ - ✅ Builds and validates static files
20
+
21
+ **Services:**
22
+ - PostgreSQL 15
23
+ - Redis 7
24
+
25
+ ---
26
+
27
+ ### 2. **CD - Deploy to Render** (`deploy.yml`)
28
+ **Triggers:** Push to `master` or manual trigger
29
+
30
+ **What it does:**
31
+ - 🚀 Automatically deploys to Render
32
+ - 🔍 Runs post-deployment health check
33
+ - 📢 Notifies deployment status
34
+
35
+ ---
36
+
37
+ ### 3. **Manual Deploy** (`manual-deploy.yml`)
38
+ **Triggers:** Manual trigger via GitHub Actions UI
39
+
40
+ **What it does:**
41
+ - 🚀 Deploy to production or staging
42
+ - 🔍 Post-deployment health check
43
+
44
+ **Usage:**
45
+ 1. Go to **Actions** tab in GitHub
46
+ 2. Select "Manual Deploy"
47
+ 3. Click **Run workflow**
48
+ 4. Choose environment (production/staging)
49
+
50
+ ---
51
+
52
+ ### 4. **Weekly Security Scan** (`security.yml`)
53
+ **Triggers:** Every Monday at 9 AM UTC or manual trigger
54
+
55
+ **What it does:**
56
+ - 🔒 Scans dependencies for vulnerabilities (Safety)
57
+ - 🔒 Code security analysis (Bandit)
58
+ - 📊 Uploads reports as artifacts
59
+ - 🚨 Creates GitHub issue if vulnerabilities found
60
+
61
+ ---
62
+
63
+ ## Setup Instructions
64
+
65
+ ### 1. Configure GitHub Secrets
66
+ Go to **Settings** → **Secrets and variables** → **Actions** → **New repository secret**
67
+
68
+ **Required secrets:**
69
+
70
+ ```bash
71
+ RENDER_SERVICE_ID # Get from Render dashboard
72
+ RENDER_API_KEY # Generate at https://dashboard.render.com/u/settings#api-keys
73
+ RENDER_APP_URL # Your app URL (e.g., https://aimeet.onrender.com)
74
+ ```
75
+
76
+ **Optional (for tests):**
77
+ ```bash
78
+ GOOGLE_API_KEY # For running integration tests
79
+ QDRANT_URL # For running RAG tests
80
+ QDRANT_API_KEY # For Qdrant tests
81
+ ```
82
+
83
+ ### 2. Get Render Credentials
84
+
85
+ #### **Service ID:**
86
+ 1. Go to https://dashboard.render.com
87
+ 2. Open your web service
88
+ 3. URL will look like: `https://dashboard.render.com/web/srv-xxxxxxxxxxxxx`
89
+ 4. Copy the `srv-xxxxxxxxxxxxx` part
90
+
91
+ #### **API Key:**
92
+ 1. Go to https://dashboard.render.com/u/settings#api-keys
93
+ 2. Click **Generate New Key**
94
+ 3. Name it: `GitHub Actions`
95
+ 4. Copy the key (only shown once!)
96
+
97
+ ### 3. Enable GitHub Actions
98
+ 1. Go to your repo → **Actions** tab
99
+ 2. Click **"I understand my workflows, go ahead and enable them"**
100
+
101
+ ---
102
+
103
+ ## How Auto-Deploy Works
104
+
105
+ ```
106
+ Push to master
107
+
108
+ GitHub Actions triggers
109
+
110
+ Runs CI tests (optional, can skip)
111
+
112
+ Calls Render API to deploy
113
+
114
+ Render pulls latest code
115
+
116
+ Runs build.sh (migrations, static files)
117
+
118
+ Restarts services
119
+
120
+ Health check runs
121
+
122
+ ✅ Deployment complete!
123
+ ```
124
+
125
+ ---
126
+
127
+ ## Branch Protection (Recommended)
128
+
129
+ ### Protect `master` branch:
130
+ 1. Go to **Settings** → **Branches** → **Add rule**
131
+ 2. Branch name pattern: `master`
132
+ 3. Enable:
133
+ - ✅ Require a pull request before merging
134
+ - ✅ Require status checks to pass before merging
135
+ - Select: `test`, `security`, `build`
136
+ - ✅ Require branches to be up to date before merging
137
+ 4. Save changes
138
+
139
+ Now all pushes to `master` must pass CI checks!
140
+
141
+ ---
142
+
143
+ ## Monitoring Deployments
144
+
145
+ ### View deployment status:
146
+ 1. Go to **Actions** tab
147
+ 2. Click on any workflow run
148
+ 3. View logs for each step
149
+
150
+ ### View deployment history:
151
+ 1. Go to Render Dashboard
152
+ 2. Select your service
153
+ 3. Click **Events** tab
154
+
155
+ ---
156
+
157
+ ## Rollback a Deployment
158
+
159
+ ### Quick rollback on Render:
160
+ 1. Go to Render Dashboard → Your service
161
+ 2. Click **Events** tab
162
+ 3. Find last working deployment
163
+ 4. Click **"Redeploy"**
164
+
165
+ ### Rollback via GitHub:
166
+ ```bash
167
+ # Revert the commit
168
+ git revert <bad-commit-hash>
169
+ git push origin master
170
+
171
+ # Auto-deploys the previous working version
172
+ ```
173
+
174
+ ---
175
+
176
+ ## Manual Deployment
177
+
178
+ ### Via GitHub Actions:
179
+ 1. Go to **Actions** tab
180
+ 2. Select "Manual Deploy" workflow
181
+ 3. Click **Run workflow**
182
+ 4. Choose environment
183
+ 5. Click **Run workflow**
184
+
185
+ ### Via Render Dashboard:
186
+ 1. Go to your service
187
+ 2. Click **Manual Deploy**
188
+ 3. Select branch: `master`
189
+ 4. Click **Deploy**
190
+
191
+ ---
192
+
193
+ ## Disable Auto-Deploy
194
+
195
+ ### Option 1: In GitHub
196
+ Disable the workflow:
197
+ ```bash
198
+ git mv .github/workflows/deploy.yml .github/workflows/deploy.yml.disabled
199
+ git commit -m "Disable auto-deploy"
200
+ git push
201
+ ```
202
+
203
+ ### Option 2: In Render
204
+ 1. Go to your service → **Settings**
205
+ 2. Find "Auto-Deploy"
206
+ 3. Toggle **OFF**
207
+
208
+ ---
209
+
210
+ ## Troubleshooting
211
+
212
+ ### CI tests failing?
213
+ - Check the **Actions** tab logs
214
+ - Common issues:
215
+ - Missing migrations
216
+ - Linting errors
217
+ - Test failures
218
+
219
+ ### Deployment failing?
220
+ - Check Render Dashboard → **Logs**
221
+ - Common issues:
222
+ - Missing environment variables
223
+ - Database migration errors
224
+ - Build script errors
225
+
226
+ ### Health check failing?
227
+ - App may still be starting (wait 2-3 minutes)
228
+ - Check if `GOOGLE_API_KEY` is set correctly
229
+ - Visit the health endpoint manually
230
+
231
+ ---
232
+
233
+ ## Environment Variables in CI
234
+
235
+ The CI workflow uses test values for:
236
+ - `DATABASE_URL` → PostgreSQL service
237
+ - `REDIS_URL` → Redis service
238
+ - `DJANGO_SECRET_KEY` → Test key
239
+
240
+ Real credentials (from GitHub Secrets) are only used for:
241
+ - Integration tests (optional)
242
+ - Deployment to Render
243
+
244
+ ---
245
+
246
+ ## Advanced: Staging Environment
247
+
248
+ ### Create staging service on Render:
249
+ 1. Duplicate your web service
250
+ 2. Name it: `aimeet-staging`
251
+ 3. Set branch: `develop`
252
+
253
+ ### Add staging workflow:
254
+ ```yaml
255
+ # .github/workflows/deploy-staging.yml
256
+ name: Deploy to Staging
257
+
258
+ on:
259
+ push:
260
+ branches: [ develop ]
261
+
262
+ jobs:
263
+ deploy:
264
+ # ... same as deploy.yml but with staging secrets
265
+ ```
266
+
267
+ ### Add staging secrets:
268
+ ```
269
+ RENDER_STAGING_SERVICE_ID
270
+ RENDER_STAGING_APP_URL
271
+ ```
272
+
273
+ ---
274
+
275
+ ## Cost of CI/CD
276
+
277
+ **GitHub Actions:**
278
+ - Public repos: **FREE** unlimited
279
+ - Private repos: 2,000 minutes/month free (more than enough)
280
+
281
+ **Total cost:** $0 for public repos! 🎉
282
+
283
+ ---
284
+
285
+ ## Badge for README
286
+
287
+ Add this to your README.md to show build status:
288
+
289
+ ```markdown
290
+ ![CI](https://github.com/prashantdubeypng/Aimeet/workflows/CI%20-%20Tests%20and%20Checks/badge.svg)
291
+ ![Deploy](https://github.com/prashantdubeypng/Aimeet/workflows/CD%20-%20Deploy%20to%20Render/badge.svg)
292
+ ```
293
+
294
+ ---
295
+
296
+ **Need help?** Check the workflow logs in the Actions tab!
DEPLOYMENT.md ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Deploy to Render.com
2
+
3
+ ## Prerequisites
4
+ - GitHub account with this repo pushed
5
+ - Render.com account (free signup)
6
+ - All API keys ready (Agora, AssemblyAI, Google, Qdrant, Pusher)
7
+
8
+ ## Deployment Steps
9
+
10
+ ### 1. Push Code to GitHub
11
+ ```bash
12
+ git add .
13
+ git commit -m "Add Render deployment config"
14
+ git push origin master
15
+ ```
16
+
17
+ ### 2. Create New Web Service on Render
18
+ 1. Go to https://dashboard.render.com/
19
+ 2. Click **"New +"** → **"Blueprint"**
20
+ 3. Connect your GitHub repository: `prashantdubeypng/Aimeet`
21
+ 4. Render will detect `render.yaml` automatically
22
+ 5. Click **"Apply"**
23
+
24
+ ### 3. Set Environment Variables
25
+ In Render Dashboard, go to your web service → **Environment** tab and add:
26
+
27
+ **Required Variables:**
28
+ ```bash
29
+ DJANGO_ALLOWED_HOSTS=your-app-name.onrender.com
30
+ AWS_ACCESS_KEY_ID=your_aws_key
31
+ AWS_SECRET_ACCESS_KEY=your_aws_secret
32
+ AWS_STORAGE_BUCKET_NAME=your_bucket_name
33
+ AGORA_APP_ID=your_agora_app_id
34
+ AGORA_APP_CERTIFICATE=your_agora_cert
35
+ AGORA_CUSTOMER_ID=your_agora_customer_id
36
+ AGORA_CUSTOMER_SECRET=your_agora_customer_secret
37
+ ASSEMBLYAI_API_KEY=your_assemblyai_key
38
+ GOOGLE_API_KEY=your_google_api_key
39
+ QDRANT_URL=https://your-qdrant-instance.qdrant.io:6333
40
+ QDRANT_API_KEY=your_qdrant_key
41
+ PUSHER_APP_ID=your_pusher_app_id
42
+ PUSHER_KEY=your_pusher_key
43
+ PUSHER_SECRET=your_pusher_secret
44
+ PUSHER_CLUSTER=your_pusher_cluster
45
+ ```
46
+
47
+ **Auto-Generated (already set by render.yaml):**
48
+ - `DJANGO_SECRET_KEY` ✓
49
+ - `DATABASE_URL` ✓
50
+ - `REDIS_URL` ✓
51
+
52
+ ### 4. Wait for Deployment
53
+ - Render will automatically:
54
+ - Install dependencies
55
+ - Run migrations
56
+ - Collect static files
57
+ - Start Daphne server
58
+ - Start Django-Q worker
59
+
60
+ ### 5. Create Superuser (First Time)
61
+ After deployment, go to **Shell** tab in Render Dashboard:
62
+ ```bash
63
+ cd videocaller
64
+ python manage.py createsuperuser
65
+ ```
66
+
67
+ ### 6. Test Your App
68
+ Visit: `https://your-app-name.onrender.com`
69
+
70
+ ## Render Services Created
71
+
72
+ ### 1. Web Service (Daphne)
73
+ - Runs Django/WebSocket server
74
+ - Auto-scales on demand
75
+ - **Cost:** Free tier (500 hrs/month) or Starter ($7/month)
76
+
77
+ ### 2. Worker Service (Django-Q)
78
+ - Processes background tasks (transcription, embeddings)
79
+ - **Cost:** Starter ($7/month)
80
+
81
+ ### 3. PostgreSQL Database
82
+ - Persistent storage for meetings/users
83
+ - **Cost:** Free for 90 days, then $7/month
84
+
85
+ ### 4. Redis
86
+ - Cache + Django-Q broker
87
+ - **Cost:** Free for 90 days, then $7/month
88
+
89
+ **Total Cost After Free Trial:** ~$21/month
90
+
91
+ ## Free Tier Limitations
92
+ - Web service sleeps after 15 min inactivity (50 sec cold start)
93
+ - 500 build hours/month
94
+ - 100 GB bandwidth/month
95
+
96
+ ## Custom Domain (Optional)
97
+ 1. Go to **Settings** → **Custom Domain**
98
+ 2. Add your domain: `yourdomain.com`
99
+ 3. Update DNS CNAME to point to Render
100
+
101
+ ## Monitoring
102
+ - **Logs:** Dashboard → Logs tab
103
+ - **Metrics:** Dashboard → Metrics tab
104
+ - **Health Check:** https://your-app.onrender.com/api/health/google/
105
+
106
+ ## Troubleshooting
107
+
108
+ ### Service won't start
109
+ Check logs for errors:
110
+ ```bash
111
+ # Common issues:
112
+ - Missing environment variables
113
+ - PostgreSQL connection failed
114
+ - Redis connection failed
115
+ ```
116
+
117
+ ### WebSocket not working
118
+ Ensure:
119
+ - Daphne is running (not Gunicorn)
120
+ - ALLOWED_HOSTS includes your domain
121
+ - CSRF_TRUSTED_ORIGINS is set
122
+
123
+ ### Static files not loading
124
+ Run manually:
125
+ ```bash
126
+ cd videocaller
127
+ python manage.py collectstatic --no-input
128
+ ```
129
+
130
+ ## Rollback
131
+ If deployment fails:
132
+ 1. Go to **Events** tab
133
+ 2. Find previous successful deploy
134
+ 3. Click **"Redeploy"**
135
+
136
+ ## Auto-Deploy on Push
137
+ Render automatically deploys when you push to `master` branch.
138
+
139
+ Disable: **Settings** → **Auto-Deploy** → OFF
140
+
141
+ ## Scale Up
142
+ To handle more users:
143
+ 1. **Settings** → **Instance Type** → Select higher tier
144
+ 2. Add more worker instances
145
+ 3. Upgrade PostgreSQL/Redis plans
146
+
147
+ ---
148
+
149
+ **Support:** Check logs in Render Dashboard or visit https://render.com/docs
DESIGN.md ADDED
@@ -0,0 +1,993 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AIMeet - System Design Document
2
+
3
+ ## 1. Use Case Diagram
4
+
5
+ ```mermaid
6
+ graph TB
7
+ User["👤 User"]
8
+ Host["👤 Meeting Host"]
9
+ Participant["👤 Participant"]
10
+ System["🖥️ AIMeet System"]
11
+ OpenAI["🤖 OpenAI"]
12
+ AssemblyAI["📻 AssemblyAI"]
13
+ Qdrant["📊 Qdrant"]
14
+ S3["☁️ AWS S3"]
15
+ Agora["📱 Agora RTC"]
16
+
17
+ User -->|Register/Login| System
18
+ User -->|Create Meeting| System
19
+ User -->|Join Meeting| System
20
+ Host -->|Start Recording| System
21
+ Participant -->|Join via Code| System
22
+ Host -->|End Meeting| System
23
+ Host -->|Upload Recording| System
24
+ System -->|Upload Audio| S3
25
+ System -->|Transcribe| AssemblyAI
26
+ AssemblyAI -->|Return Transcript| System
27
+ User -->|Prepare for Search| System
28
+ System -->|Generate Embeddings| OpenAI
29
+ System -->|Store Vectors| Qdrant
30
+ User -->|Ask Question| System
31
+ System -->|Search Vectors| Qdrant
32
+ System -->|Generate Response| OpenAI
33
+ User -->|Upload Document| System
34
+ User -->|Chat| System
35
+ System -->|Video Stream| Agora
36
+ Agora -->|Audio/Video| System
37
+
38
+ style User fill:#e1f5ff
39
+ style Host fill:#fff3e0
40
+ style Participant fill:#f3e5f5
41
+ style System fill:#e8f5e9
42
+ style OpenAI fill:#ffebee
43
+ style AssemblyAI fill:#fce4ec
44
+ style Qdrant fill:#f1f8e9
45
+ style S3 fill:#ede7f6
46
+ style Agora fill:#e0f2f1
47
+ ```
48
+
49
+ ---
50
+
51
+ ## 2. User Flow Diagram
52
+
53
+ ### 2.1 New User Onboarding Flow
54
+
55
+ ```mermaid
56
+ flowchart TD
57
+ Start([User Visits App]) --> Register{Existing User?}
58
+ Register -->|No| SignUp["Sign Up<br/>Username, Email, Password"]
59
+ Register -->|Yes| Login["Log In<br/>Email, Password"]
60
+ SignUp --> CreateAccount["Create Account<br/>Validate & Hash Password"]
61
+ CreateAccount --> Dashboard["🏠 View Dashboard<br/>Meetings, Chat, Recordings"]
62
+ Login --> Dashboard
63
+ Dashboard --> End([Ready to Use App])
64
+
65
+ style Start fill:#e3f2fd
66
+ style Register fill:#fff9c4
67
+ style SignUp fill:#f8bbd0
68
+ style Login fill:#f8bbd0
69
+ style CreateAccount fill:#c8e6c9
70
+ style Dashboard fill:#b3e5fc
71
+ style End fill:#a5d6a7
72
+ ```
73
+
74
+ ### 2.2 Meeting Creation & Participation Flow
75
+
76
+ ```mermaid
77
+ flowchart TD
78
+ Host["👤 Host"] --> Create["Click 'Create Meeting'"]
79
+ Create --> AddDetails["Add Title, Description<br/>Set Max Participants"]
80
+ AddDetails --> Generate["System Generates<br/>Room Code"]
81
+ Generate --> Share["Share Code with<br/>Participants"]
82
+
83
+ Share --> P1["👤 Participant 1"]
84
+ Share --> P2["👤 Participant 2"]
85
+ Share --> PN["👤 Participant N"]
86
+
87
+ P1 --> Join["Join Meeting<br/>Enter Room Code"]
88
+ P2 --> Join
89
+ PN --> Join
90
+
91
+ Join --> GetToken["Request Agora Token<br/>from Server"]
92
+ GetToken --> Connect["🎥 Connect to Agora RTC<br/>Start Video/Audio"]
93
+ Connect --> Record["🎙️ Recording Starts<br/>MediaRecorder in Browser"]
94
+ Record --> Chat["💬 Chat Available<br/>Real-time via WebSocket"]
95
+
96
+ Chat --> MeetingActive["✅ Meeting Active"]
97
+ MeetingActive --> Discussion["Participants Discuss"]
98
+ Discussion --> HostEnd["Host Clicks 'End Meeting'"]
99
+ HostEnd --> RecordingStop["🎙️ Recording Stops<br/>Saved Locally"]
100
+ RecordingStop --> UploadOption["Show Upload Option"]
101
+
102
+ UploadOption --> Upload["Click 'Upload Recording'"]
103
+ Upload --> S3Upload["📤 Upload to AWS S3<br/>WebM Format"]
104
+ S3Upload --> SaveMetadata["Save Recording URL<br/>in Database"]
105
+ SaveMetadata --> Success["✅ Recording Saved"]
106
+
107
+ style Host fill:#fff3e0
108
+ style Create fill:#fff9c4
109
+ style AddDetails fill:#c5e1a5
110
+ style Generate fill:#aed581
111
+ style Share fill:#9ccc65
112
+ style P1 fill:#f3e5f5
113
+ style P2 fill:#f3e5f5
114
+ style PN fill:#f3e5f5
115
+ style Join fill:#e1bee7
116
+ style GetToken fill:#ce93d8
117
+ style Connect fill:#ba68c8
118
+ style Record fill:#ab47bc
119
+ style Chat fill:#9575cd
120
+ style MeetingActive fill:#7986cb
121
+ style Discussion fill:#64b5f6
122
+ style HostEnd fill:#42a5f5
123
+ style RecordingStop fill:#2196f3
124
+ style UploadOption fill:#1976d2
125
+ style Upload fill:#1565c0
126
+ style S3Upload fill:#0d47a1
127
+ style SaveMetadata fill:#1565c0
128
+ style Success fill:#1b5e20
129
+ ```
130
+
131
+ ### 2.3 Transcription & RAG Pipeline Flow
132
+
133
+ ```mermaid
134
+ flowchart TD
135
+ Recording["📂 Recording in S3"] --> Transcribe["Click 'Start Transcription'<br/>or Auto-trigger"]
136
+ Transcribe --> GetURL["Generate Presigned URL<br/>24-hour Expiry"]
137
+ GetURL --> SendAPI["📤 Send to AssemblyAI<br/>with Presigned URL"]
138
+ SendAPI --> ReceiveID["Receive Transcript ID<br/>Status: processing"]
139
+ ReceiveID --> Poll["🔄 Poll Every 3 Seconds<br/>Check Status"]
140
+
141
+ Poll --> Check{Status?}
142
+ Check -->|Still Processing| Poll
143
+ Check -->|Completed| SaveText["Save Full Transcript<br/>to Database"]
144
+ Check -->|Failed| Error["❌ Show Error<br/>Retry Option"]
145
+
146
+ SaveText --> Ready["✅ Transcript Ready"]
147
+ Ready --> PrepareClick["User Clicks<br/>'Prepare for Search'"]
148
+
149
+ PrepareClick --> Chunk["📝 Chunk Transcript<br/>500 tokens, 50 overlap<br/>RecursiveCharacterTextSplitter"]
150
+ Chunk --> CreateChunks["Create TranscriptChunk<br/>Records in DB"]
151
+ CreateChunks --> Embed["🤖 Generate Embeddings<br/>OpenAI text-embedding-3-small<br/>Batch API"]
152
+ Embed --> Vectors["Get 1536-dim Vectors<br/>for All Chunks"]
153
+ Vectors --> StoreQdrant["💾 Store in Qdrant<br/>Vector DB<br/>Cosine Similarity"]
154
+ StoreQdrant --> UpdateFlags["Update MeetingRoom<br/>chunks_created_at<br/>embeddings_created_at"]
155
+ UpdateFlags --> Complete["✅ Ready for Q&A<br/>Searchable"]
156
+
157
+ style Recording fill:#f3e5f5
158
+ style Transcribe fill:#e1bee7
159
+ style GetURL fill:#ce93d8
160
+ style SendAPI fill:#ba68c8
161
+ style ReceiveID fill:#ab47bc
162
+ style Poll fill:#9575cd
163
+ style Check fill:#fff9c4
164
+ style SaveText fill:#7986cb
165
+ style Ready fill:#64b5f6
166
+ style PrepareClick fill:#42a5f5
167
+ style Chunk fill:#2196f3
168
+ style CreateChunks fill:#1976d2
169
+ style Embed fill:#1565c0
170
+ style Vectors fill:#0d47a1
171
+ style StoreQdrant fill:#1565c0
172
+ style UpdateFlags fill:#1976d2
173
+ style Complete fill:#1b5e20
174
+ style Error fill:#c62828
175
+ ```
176
+
177
+ ### 2.4 Question Answering Flow
178
+
179
+ ```mermaid
180
+ flowchart TD
181
+ User["👤 User"] --> Question["💭 Ask a Question<br/>About Meeting Content"]
182
+ Question --> Input["Type Question in UI"]
183
+ Input --> Submit["Click 'Ask'"]
184
+
185
+ Submit --> Embed["🤖 Embed Question<br/>OpenAI API<br/>text-embedding-3-small"]
186
+ Embed --> QueryVector["Get Query Vector<br/>1536 dimensions"]
187
+
188
+ QueryVector --> Search["🔍 Search Qdrant<br/>Cosine Similarity<br/>Top-5 Chunks"]
189
+ Search --> Results["Get Similar Chunks<br/>+ Relevance Scores<br/>+ Timestamps"]
190
+
191
+ Results --> History["📜 Retrieve Conversation<br/>Last 5 Q&A Turns<br/>from DB"]
192
+ History --> HistoryData["Past Questions &<br/>Answers Loaded"]
193
+
194
+ HistoryData --> BuildPrompt["🛠️ Build LLM Prompt"]
195
+ BuildPrompt --> AddSystem["Add System Message<br/>Analysis Instructions"]
196
+ AddSystem --> AddContext["Add Context<br/>Top-5 Chunks<br/>Transcript Sections"]
197
+ AddContext --> AddHistory["Add Conversation<br/>Past Q&A Exchanges"]
198
+ AddHistory --> AddQuery["Add Current Query"]
199
+ AddQuery --> Prompt["Complete Prompt<br/>Ready for LLM"]
200
+
201
+ Prompt --> CallGPT["📞 Call OpenAI<br/>GPT-4o-mini<br/>Max 1000 tokens"]
202
+ CallGPT --> Generate["Generate Response<br/>with Full Context"]
203
+ Generate --> Response["Get Assistant<br/>Response Text"]
204
+
205
+ Response --> Save["💾 Save to DB<br/>ConversationHistory<br/>Link Chunks"]
206
+ Save --> Display["📺 Display Answer<br/>to User<br/>Show Relevant Chunks<br/>with Timestamps"]
207
+ Display --> ShowSources["Show Sources<br/>Chunk Text<br/>Confidence Scores"]
208
+ ShowSources --> End["✅ User Sees Answer<br/>with Full Context"]
209
+
210
+ style User fill:#e3f2fd
211
+ style Question fill:#bbdefb
212
+ style Input fill:#90caf9
213
+ style Submit fill:#64b5f6
214
+ style Embed fill:#42a5f5
215
+ style QueryVector fill:#2196f3
216
+ style Search fill:#1976d2
217
+ style Results fill:#1565c0
218
+ style History fill:#0d47a1
219
+ style HistoryData fill:#1565c0
220
+ style BuildPrompt fill:#1976d2
221
+ style AddSystem fill:#2196f3
222
+ style AddContext fill:#42a5f5
223
+ style AddHistory fill:#64b5f6
224
+ style AddQuery fill:#90caf9
225
+ style Prompt fill:#bbdefb
226
+ style CallGPT fill:#e3f2fd
227
+ style Generate fill:#bbdefb
228
+ style Response fill:#90caf9
229
+ style Save fill:#64b5f6
230
+ style Display fill:#42a5f5
231
+ style ShowSources fill:#2196f3
232
+ style End fill:#1b5e20
233
+ ```
234
+
235
+ ### 2.5 Meeting Preparation (Sticky Notes) Flow
236
+
237
+ ```mermaid
238
+ flowchart TD
239
+ User["👤 User"] --> CreateNew["Creating New Meeting<br/>for 'Hiring Interview'"]
240
+ CreateNew --> AddTitle["Add Title & Agenda"]
241
+ AddTitle --> System["🤖 System Analyzes<br/>Title Keywords"]
242
+
243
+ System --> Extract["Extract Keywords<br/>- hiring<br/>- interview<br/>- data science"]
244
+ Extract --> SearchQdrant["🔍 Search Past Meetings<br/>in Qdrant<br/>Similar Topics"]
245
+ SearchQdrant --> FindPast["Find Related Past<br/>Meetings<br/>- Jan 10: Team Formation<br/>- Jan 15: Hiring Discussion<br/>- Jan 20: DS Skills"]
246
+
247
+ FindPast --> StickyNotes["📌 Show Sticky Notes<br/>Related Past Discussions"]
248
+ StickyNotes --> Display["Display:<br/>'In your last hiring meeting,<br/>you discussed...'"]
249
+ Display --> Expand["User Can Expand<br/>to Read Full Context"]
250
+ Expand --> Context["See Relevant Chunks<br/>from Past Meetings<br/>- Requirements discussed<br/>- Decisions made<br/>- Concerns raised"]
251
+
252
+ Context --> Prepare["✅ User Prepared<br/>with Full History<br/>Before New Meeting"]
253
+
254
+ style User fill:#fff3e0
255
+ style CreateNew fill:#ffe0b2
256
+ style AddTitle fill:#ffcc80
257
+ style System fill:#ffb74d
258
+ style Extract fill:#ffa726
259
+ style SearchQdrant fill:#ff9800
260
+ style FindPast fill:#f57c00
261
+ style StickyNotes fill:#e65100
262
+ style Display fill:#fff9c4
263
+ style Expand fill:#fff59d
264
+ style Context fill:#fff176
265
+ style Prepare fill:#1b5e20
266
+ ```
267
+
268
+ ---
269
+
270
+ ## 3. System Architecture Diagram
271
+
272
+ ### 3.1 High-Level Architecture
273
+
274
+ ```mermaid
275
+ graph TB
276
+ subgraph Client["🖥️ Client Layer"]
277
+ Web["Web UI<br/>HTML/CSS/JS"]
278
+ Agora_SDK["Agora RTC SDK<br/>Video/Audio"]
279
+ MediaRec["MediaRecorder<br/>Audio Capture"]
280
+ end
281
+
282
+ subgraph API["🌐 API Layer"]
283
+ Django["Django REST<br/>Framework"]
284
+ WebSocket["WebSocket<br/>Pusher"]
285
+ end
286
+
287
+ subgraph Logic["💻 Application Logic"]
288
+ Views["Views<br/>Meeting, Recording<br/>Chat, RAG"]
289
+ Utils["Utilities<br/>Recording, Transcription<br/>Embedding, RAG"]
290
+ Models["Models<br/>Database ORM"]
291
+ end
292
+
293
+ subgraph Storage["💾 Data Layer"]
294
+ DB["PostgreSQL<br/>Relational Data"]
295
+ S3["AWS S3<br/>Files & Media"]
296
+ end
297
+
298
+ subgraph AI["🤖 AI Services"]
299
+ OpenAI["OpenAI API<br/>Embeddings<br/>GPT-4o"]
300
+ AssemblyAI["AssemblyAI<br/>Transcription"]
301
+ Qdrant["Qdrant Cloud<br/>Vector DB"]
302
+ end
303
+
304
+ subgraph External["📡 External"]
305
+ AgoraCloud["Agora Cloud<br/>RTC"]
306
+ end
307
+
308
+ Web --> Django
309
+ Agora_SDK --> Django
310
+ MediaRec --> Django
311
+ WebSocket --> Django
312
+ Django --> Views
313
+ Django --> Utils
314
+ Views --> Models
315
+ Utils --> Models
316
+ Models --> DB
317
+ Models --> S3
318
+ Views --> S3
319
+ Utils --> S3
320
+ Utils --> OpenAI
321
+ Utils --> AssemblyAI
322
+ Utils --> Qdrant
323
+ Agora_SDK --> AgoraCloud
324
+
325
+ style Client fill:#e3f2fd
326
+ style API fill:#f3e5f5
327
+ style Logic fill:#e8f5e9
328
+ style Storage fill:#fff3e0
329
+ style AI fill:#ffebee
330
+ style External fill:#f1f8e9
331
+ ```
332
+
333
+ ### 3.2 AWS Deployment Architecture
334
+
335
+ ```mermaid
336
+ graph TB
337
+ subgraph AWS["☁️ AWS Region ap-south-1"]
338
+ subgraph VPC["VPC"]
339
+ subgraph PublicSubnet["Public Subnet"]
340
+ ALB["ALB<br/>HTTPS"]
341
+ end
342
+
343
+ subgraph PrivateSubnet["Private Subnet"]
344
+ EC2_1["EC2 Instance 1<br/>Django App"]
345
+ EC2_2["EC2 Instance 2<br/>Django App"]
346
+ EC2_N["EC2 Instance N<br/>Auto-Scaling"]
347
+ end
348
+
349
+ subgraph DBSubnet["DB Subnet"]
350
+ RDS["RDS PostgreSQL<br/>Multi-AZ"]
351
+ end
352
+ end
353
+
354
+ S3["S3 Bucket<br/>Recordings, Docs"]
355
+ CloudFront["CloudFront CDN<br/>Static Assets"]
356
+ CloudWatch["CloudWatch<br/>Monitoring"]
357
+ Secrets["Secrets Manager<br/>API Keys"]
358
+ end
359
+
360
+ Users["👥 Users<br/>Internet"]
361
+ OpenAI_Cloud["🤖 OpenAI<br/>Cloud"]
362
+ Qdrant_Cloud["📊 Qdrant<br/>Cloud"]
363
+ AssemblyAI_Cloud["📻 AssemblyAI<br/>Cloud"]
364
+ Agora_Cloud["📱 Agora<br/>Cloud"]
365
+
366
+ Users -->|HTTPS| ALB
367
+ ALB -->|Route| EC2_1
368
+ ALB -->|Route| EC2_2
369
+ ALB -->|Route| EC2_N
370
+ EC2_1 -->|Read/Write| RDS
371
+ EC2_2 -->|Read/Write| RDS
372
+ EC2_N -->|Read/Write| RDS
373
+ EC2_1 -->|Upload/Download| S3
374
+ S3 -->|Serve| CloudFront
375
+ EC2_1 -->|Logs| CloudWatch
376
+ EC2_2 -->|Logs| CloudWatch
377
+ EC2_N -->|Logs| CloudWatch
378
+ EC2_1 -->|Get Keys| Secrets
379
+ EC2_1 -->|API Call| OpenAI_Cloud
380
+ EC2_1 -->|API Call| Qdrant_Cloud
381
+ EC2_1 -->|API Call| AssemblyAI_Cloud
382
+ Users -->|Video| Agora_Cloud
383
+
384
+ style AWS fill:#ede7f6
385
+ style VPC fill:#f3e5f5
386
+ style PublicSubnet fill:#e1bee7
387
+ style PrivateSubnet fill:#ce93d8
388
+ style DBSubnet fill:#ba68c8
389
+ style Users fill:#bbdefb
390
+ style OpenAI_Cloud fill:#ffebee
391
+ style Qdrant_Cloud fill:#f1f8e9
392
+ style AssemblyAI_Cloud fill:#fce4ec
393
+ style Agora_Cloud fill:#e0f2f1
394
+ ```
395
+
396
+ ---
397
+
398
+ ## 4. Data Flow Diagram
399
+
400
+ ### 4.1 Recording & Transcription Flow
401
+
402
+ ```mermaid
403
+ flowchart LR
404
+ Browser["Browser<br/>MediaRecorder"]
405
+ LocalFile["Local WebM<br/>Audio File<br/>5-50 MB"]
406
+ Django["Django<br/>Backend"]
407
+ Presigned["Presigned URL<br/>24-hour expiry"]
408
+ S3["AWS S3<br/>aimeet-s3-bucket"]
409
+ Assembly["AssemblyAI<br/>Service"]
410
+ Polling["Polling Loop<br/>Every 3 sec"]
411
+ Complete["Transcript<br/>Complete"]
412
+ Database["PostgreSQL<br/>transcript_text"]
413
+ Ready["✅ Ready<br/>for RAG"]
414
+
415
+ Browser -->|Capture Audio| LocalFile
416
+ LocalFile -->|User Uploads| Django
417
+ Django -->|Generate URL| Presigned
418
+ Django -->|Upload| S3
419
+ S3 -->|Notify| Assembly
420
+ Assembly -->|Process| Polling
421
+ Polling -->|Check Status| Assembly
422
+ Assembly -->|Return Result| Complete
423
+ Complete -->|Save| Database
424
+ Database --> Ready
425
+
426
+ style Browser fill:#bbdefb
427
+ style LocalFile fill:#90caf9
428
+ style Django fill:#64b5f6
429
+ style Presigned fill:#42a5f5
430
+ style S3 fill:#2196f3
431
+ style Assembly fill:#ff9800
432
+ style Polling fill:#fff9c4
433
+ style Complete fill:#fff176
434
+ style Database fill:#64b5f6
435
+ style Ready fill:#1b5e20
436
+ ```
437
+
438
+ ### 4.2 Embedding & Storage Flow
439
+
440
+ ```mermaid
441
+ flowchart LR
442
+ Transcript["Transcript<br/>Text"]
443
+ Splitter["RecursiveCharacter<br/>TextSplitter<br/>500 tokens<br/>50 overlap"]
444
+ Chunks["Text Chunks<br/>Array[str]"]
445
+ DB_Chunks["Create<br/>TranscriptChunk<br/>Records"]
446
+ OpenAI_API["OpenAI API<br/>Batch Embeddings"]
447
+ Vectors["1536-dim<br/>Vectors<br/>Array[float]"]
448
+ Qdrant["Qdrant Cloud<br/>Collection"]
449
+ Indexed["✅ Indexed<br/>Searchable"]
450
+
451
+ Transcript -->|Split| Splitter
452
+ Splitter -->|Output| Chunks
453
+ Chunks -->|Save| DB_Chunks
454
+ Chunks -->|Send| OpenAI_API
455
+ OpenAI_API -->|Generate| Vectors
456
+ Vectors -->|Upsert| Qdrant
457
+ Qdrant --> Indexed
458
+
459
+ style Transcript fill:#f3e5f5
460
+ style Splitter fill:#e1bee7
461
+ style Chunks fill:#ce93d8
462
+ style DB_Chunks fill:#ba68c8
463
+ style OpenAI_API fill:#ffebee
464
+ style Vectors fill:#ffcdd2
465
+ style Qdrant fill:#f1f8e9
466
+ style Indexed fill:#1b5e20
467
+ ```
468
+
469
+ ### 4.3 Query & Response Flow
470
+
471
+ ```mermaid
472
+ flowchart LR
473
+ UserQ["User Question"]
474
+ Embed_Q["Embed Question<br/>OpenAI API"]
475
+ Vector_Q["Query Vector<br/>1536-dim"]
476
+ Search["Search Qdrant<br/>Cosine Similarity<br/>top-k=5"]
477
+ TopChunks["Top-5 Chunks<br/>+ Scores"]
478
+ History["Fetch Conversation<br/>History<br/>Last 5 turns"]
479
+ Prompt_Build["Build LLM<br/>Prompt<br/>System+Context<br/>+History+Query"]
480
+ GPT["Call GPT-4o<br/>API"]
481
+ Response["Generate<br/>Response"]
482
+ Save_History["Save Q&A<br/>to DB"]
483
+ Display["Display to<br/>User<br/>+ Sources"]
484
+
485
+ UserQ -->|Send| Embed_Q
486
+ Embed_Q -->|Return| Vector_Q
487
+ Vector_Q -->|Query| Search
488
+ Search -->|Return| TopChunks
489
+ TopChunks -->|Include| Prompt_Build
490
+ History -->|Include| Prompt_Build
491
+ Prompt_Build -->|Send| GPT
492
+ GPT -->|Generate| Response
493
+ Response -->|Save| Save_History
494
+ Response -->|Show| Display
495
+
496
+ style UserQ fill:#e3f2fd
497
+ style Embed_Q fill:#bbdefb
498
+ style Vector_Q fill:#90caf9
499
+ style Search fill:#64b5f6
500
+ style TopChunks fill:#42a5f5
501
+ style History fill:#2196f3
502
+ style Prompt_Build fill:#1976d2
503
+ style GPT fill:#ffebee
504
+ style Response fill:#ffcdd2
505
+ style Save_History fill:#64b5f6
506
+ style Display fill:#1b5e20
507
+ ```
508
+
509
+ ---
510
+
511
+ ## 5. Database Schema Diagram
512
+
513
+ ```mermaid
514
+ erDiagram
515
+ AUTH_USER ||--o{ MEETING_ROOM : hosts
516
+ AUTH_USER ||--o{ CHAT_MESSAGE : sends
517
+ AUTH_USER ||--o{ CONVERSATION_HISTORY : asks
518
+
519
+ MEETING_ROOM ||--o{ TRANSCRIPT_CHUNK : contains
520
+ MEETING_ROOM ||--o{ DOCUMENT_UPLOAD : has
521
+ MEETING_ROOM ||--o{ CONVERSATION_HISTORY : discusses
522
+
523
+ DOCUMENT_UPLOAD ||--o{ DOCUMENT_CHUNK : contains
524
+
525
+ AUTH_USER {
526
+ int id PK
527
+ string username UK
528
+ string email
529
+ string password_hash
530
+ string first_name
531
+ string last_name
532
+ datetime created_at
533
+ }
534
+
535
+ MEETING_ROOM {
536
+ int id PK
537
+ string room_id UK
538
+ string room_code UK
539
+ int host_id FK
540
+ string title
541
+ text description
542
+ int max_participants
543
+ string recording_status
544
+ text recording_sid
545
+ string s3_recording_url
546
+ text transcript_text
547
+ string transcript_status
548
+ string transcript_id
549
+ datetime chunks_created_at
550
+ datetime embeddings_created_at
551
+ int embedding_version
552
+ boolean is_active
553
+ datetime created_at
554
+ }
555
+
556
+ TRANSCRIPT_CHUNK {
557
+ int id PK
558
+ int meeting_id FK
559
+ text chunk_text
560
+ int chunk_index
561
+ int start_time
562
+ int end_time
563
+ string embedding_vector_id
564
+ datetime created_at
565
+ }
566
+
567
+ DOCUMENT_UPLOAD {
568
+ int id PK
569
+ int meeting_id FK
570
+ string file_name
571
+ string file_type
572
+ string s3_url
573
+ text raw_text
574
+ datetime chunks_created_at
575
+ datetime embeddings_created_at
576
+ datetime created_at
577
+ }
578
+
579
+ DOCUMENT_CHUNK {
580
+ int id PK
581
+ int document_id FK
582
+ text chunk_text
583
+ int chunk_index
584
+ string embedding_vector_id
585
+ datetime created_at
586
+ }
587
+
588
+ CONVERSATION_HISTORY {
589
+ int id PK
590
+ int meeting_id FK
591
+ int user_id FK
592
+ text user_question
593
+ text assistant_response
594
+ json relevant_chunks
595
+ datetime created_at
596
+ }
597
+
598
+ CHAT_MESSAGE {
599
+ int id PK
600
+ int user_id FK
601
+ text content
602
+ datetime created_at
603
+ }
604
+ ```
605
+
606
+ ---
607
+
608
+ ## 6. Component Interaction Diagram
609
+
610
+ ### 6.1 Meeting & Recording Components
611
+
612
+ ```mermaid
613
+ graph TB
614
+ FrontEnd["🎨 Frontend<br/>HTML/CSS/JS"]
615
+ DjangoView["📝 Django View<br/>meeting()"]
616
+ AgoraSDK["📱 Agora RTC<br/>SDK"]
617
+ MediaRec["🎙️ MediaRecorder<br/>Audio Capture"]
618
+ RecordingUtils["🛠️ RecordingUtils<br/>S3Manager"]
619
+ S3["☁️ AWS S3"]
620
+
621
+ FrontEnd -->|render page| DjangoView
622
+ FrontEnd -->|initialize| AgoraSDK
623
+ FrontEnd -->|start recording| MediaRec
624
+ DjangoView -->|generate token| AgoraSDK
625
+ AgoraSDK -->|connect to| FrontEnd
626
+ MediaRec -->|on meeting end| FrontEnd
627
+ FrontEnd -->|upload recording| DjangoView
628
+ DjangoView -->|call| RecordingUtils
629
+ RecordingUtils -->|upload file| S3
630
+ RecordingUtils -->|save metadata| DjangoView
631
+
632
+ style FrontEnd fill:#e3f2fd
633
+ style DjangoView fill:#c8e6c9
634
+ style AgoraSDK fill:#e0f2f1
635
+ style MediaRec fill:#fff9c4
636
+ style RecordingUtils fill:#f0f4c3
637
+ style S3 fill:#ede7f6
638
+ ```
639
+
640
+ ### 6.2 RAG Pipeline Components
641
+
642
+ ```mermaid
643
+ graph TB
644
+ AssemblyAI["📻 AssemblyAI<br/>Service"]
645
+ AssemblyUtils["🛠️ AssemblyAI<br/>Utils"]
646
+ DjangoView["📝 Django View<br/>RAG Endpoints"]
647
+ RAGUtils["🛠️ RAG Utils<br/>Chunking & Query"]
648
+ EmbeddingUtils["🛠️ Embedding<br/>Utils"]
649
+ OpenAI["🤖 OpenAI<br/>API"]
650
+ Qdrant["📊 Qdrant<br/>Vector DB"]
651
+ Database["💾 PostgreSQL<br/>Models"]
652
+
653
+ AssemblyAI -->|transcribe| AssemblyUtils
654
+ AssemblyUtils -->|save| Database
655
+ DjangoView -->|call| RAGUtils
656
+ RAGUtils -->|chunk| Database
657
+ RAGUtils -->|embed| EmbeddingUtils
658
+ EmbeddingUtils -->|call| OpenAI
659
+ EmbeddingUtils -->|store| Qdrant
660
+ RAGUtils -->|query| Qdrant
661
+ RAGUtils -->|generate response| OpenAI
662
+ RAGUtils -->|save history| Database
663
+
664
+ style AssemblyAI fill:#ff9800
665
+ style AssemblyUtils fill:#fff9c4
666
+ style DjangoView fill:#c8e6c9
667
+ style RAGUtils fill:#a5d6a7
668
+ style EmbeddingUtils fill:#81c784
669
+ style OpenAI fill:#ffcdd2
670
+ style Qdrant fill:#f1f8e9
671
+ style Database fill:#b3e5fc
672
+ ```
673
+
674
+ ---
675
+
676
+ ## 7. Sequence Diagrams
677
+
678
+ ### 7.1 Meeting Creation Sequence
679
+
680
+ ```mermaid
681
+ sequenceDiagram
682
+ actor User
683
+ participant Frontend
684
+ participant Django
685
+ participant Database
686
+ participant Agora_Cloud
687
+
688
+ User->>Frontend: Click 'Create Meeting'
689
+ Frontend->>Frontend: Show form
690
+ User->>Frontend: Enter title, description
691
+ Frontend->>Django: POST /create/
692
+ Django->>Django: Generate room_code
693
+ Django->>Django: Create MeetingRoom object
694
+ Django->>Database: Save to DB
695
+ Database-->>Django: Meeting ID
696
+ Django-->>Frontend: Return room_code, meeting_id
697
+ Frontend->>Frontend: Display room code
698
+ Frontend->>User: "Share this code: abc-def-ghi"
699
+
700
+ User->>Frontend: Click 'Join Meeting'
701
+ Frontend->>Django: GET /meeting/<code>/
702
+ Django->>Database: Fetch meeting
703
+ Database-->>Django: Meeting object
704
+ Django->>Django: Generate Agora token
705
+ Django-->>Frontend: Return token
706
+ Frontend->>Agora_Cloud: Connect with token
707
+ Agora_Cloud-->>Frontend: Connection established
708
+ Frontend->>Frontend: Initialize video/audio
709
+ Frontend->>Frontend: Start MediaRecorder
710
+ Frontend->>User: "Meeting started"
711
+ ```
712
+
713
+ ### 7.2 Question Answering Sequence
714
+
715
+ ```mermaid
716
+ sequenceDiagram
717
+ actor User
718
+ participant Frontend
719
+ participant Django
720
+ participant OpenAI_API
721
+ participant Qdrant_DB
722
+ participant Database
723
+ participant GPT4O_API
724
+
725
+ User->>Frontend: Type question & click 'Ask'
726
+ Frontend->>Django: POST /api/meetings/<id>/query/
727
+ Django->>OpenAI_API: Embed question
728
+ OpenAI_API-->>Django: query_vector (1536-dim)
729
+ Django->>Qdrant_DB: Search with vector
730
+ Qdrant_DB-->>Django: Top-5 chunks + scores
731
+ Django->>Database: Fetch conversation history
732
+ Database-->>Django: Last 5 Q&A turns
733
+ Django->>Django: Build LLM prompt
734
+ Django->>GPT4O_API: Send prompt
735
+ GPT4O_API-->>Django: Generated response
736
+ Django->>Database: Save Q&A to ConversationHistory
737
+ Database-->>Django: Saved
738
+ Django-->>Frontend: Return response + chunks
739
+ Frontend->>Frontend: Display response
740
+ Frontend->>Frontend: Show relevant chunks
741
+ Frontend->>User: Display answer with sources
742
+ ```
743
+
744
+ ---
745
+
746
+ ## 8. State Machine Diagrams
747
+
748
+ ### 8.1 Meeting State Machine
749
+
750
+ ```mermaid
751
+ stateDiagram-v2
752
+ [*] --> Created: create_meeting()
753
+ Created --> Active: host_joins()
754
+ Active --> Active: participants_join()
755
+ Active --> Recording: start_recording()
756
+ Recording --> Recording: chat_messages()
757
+ Recording --> Ended: host_ends_meeting()
758
+ Ended --> Transcribing: upload_recording()
759
+ Transcribing --> Transcribed: transcription_complete()
760
+ Transcribed --> Processing: prepare_for_rag()
761
+ Processing --> Ready: embeddings_stored()
762
+ Ready --> Archived: archive_meeting()
763
+
764
+ note right of Created
765
+ Room code generated
766
+ Max participants set
767
+ end note
768
+
769
+ note right of Active
770
+ Video/audio streaming
771
+ Chat enabled
772
+ end note
773
+
774
+ note right of Recording
775
+ Audio recorded locally
776
+ Chat saved
777
+ end note
778
+
779
+ note right of Ended
780
+ Recording stopped
781
+ Waiting for upload
782
+ end note
783
+
784
+ note right of Transcribing
785
+ AssemblyAI processing
786
+ Status polling
787
+ end note
788
+
789
+ note right of Transcribed
790
+ Transcript saved
791
+ Ready for chunking
792
+ end note
793
+
794
+ note right of Processing
795
+ Chunks created
796
+ Embeddings generated
797
+ end note
798
+
799
+ note right of Ready
800
+ Searchable
801
+ Q&A enabled
802
+ end note
803
+ ```
804
+
805
+ ### 8.2 Transcription State Machine
806
+
807
+ ```mermaid
808
+ stateDiagram-v2
809
+ [*] --> NotStarted
810
+ NotStarted --> Processing: upload_recording()
811
+ Processing --> Processing: poll_status()
812
+ Processing --> Completed: transcription_complete()
813
+ Processing --> Failed: error_occurred()
814
+ Completed --> [*]
815
+ Failed --> NotStarted: retry()
816
+
817
+ note right of NotStarted
818
+ Waiting for upload
819
+ end note
820
+
821
+ note right of Processing
822
+ AssemblyAI job running
823
+ Polling every 3 sec
824
+ end note
825
+
826
+ note right of Completed
827
+ Transcript saved to DB
828
+ Available for RAG
829
+ end note
830
+
831
+ note right of Failed
832
+ Error occurred
833
+ User can retry
834
+ end note
835
+ ```
836
+
837
+ ---
838
+
839
+ ## 9. API Request/Response Flow
840
+
841
+ ### 9.1 Create Meeting Request Flow
842
+
843
+ ```mermaid
844
+ graph LR
845
+ Client["Client"]
846
+ Request["POST /create/<br/>Content-Type: application/json"]
847
+ Body["Body:<br/>title<br/>description<br/>max_participants"]
848
+ Django["Django View<br/>create_room()"]
849
+ Validate["Validate Input<br/>Auth Check"]
850
+ Generate["Generate<br/>room_code"]
851
+ Save["Save to<br/>Database"]
852
+ Response["Response 200<br/>JSON:<br/>room_code<br/>meeting_id"]
853
+
854
+ Client -->|Send| Request
855
+ Request -->|Include| Body
856
+ Body -->|Sent to| Django
857
+ Django -->|Process| Validate
858
+ Validate -->|Generate| Generate
859
+ Generate -->|Save| Save
860
+ Save -->|Return| Response
861
+ Response -->|Receive| Client
862
+
863
+ style Client fill:#e3f2fd
864
+ style Request fill:#c5e1a5
865
+ style Body fill:#aed581
866
+ style Django fill:#c8e6c9
867
+ style Validate fill:#a5d6a7
868
+ style Generate fill:#81c784
869
+ style Save fill:#66bb6a
870
+ style Response fill:#4caf50
871
+ ```
872
+
873
+ ### 9.2 Query Meeting Endpoint Request Flow
874
+
875
+ ```mermaid
876
+ graph LR
877
+ Client["Client<br/>Frontend"]
878
+ Request["POST /api/meetings/&lt;id&gt;/query/<br/>Content-Type: application/json"]
879
+ Body["Body:<br/>question: string"]
880
+ Auth["Check Auth<br/>JWT Token"]
881
+ Parse["Parse<br/>Question"]
882
+ Embed["Embed<br/>Question"]
883
+ Search["Search<br/>Qdrant"]
884
+ GetHistory["Fetch<br/>History"]
885
+ BuildPrompt["Build<br/>Prompt"]
886
+ CallLLM["Call<br/>GPT-4o"]
887
+ Save["Save<br/>Q&A"]
888
+ Response["Response 200<br/>JSON:<br/>response<br/>relevant_chunks"]
889
+
890
+ Client -->|Send| Request
891
+ Request -->|Include| Body
892
+ Body -->|Sent to| Auth
893
+ Auth -->|Validate| Parse
894
+ Parse -->|Call| Embed
895
+ Embed -->|Search| Search
896
+ Parse -->|Fetch| GetHistory
897
+ Search -->|Include| BuildPrompt
898
+ GetHistory -->|Include| BuildPrompt
899
+ BuildPrompt -->|Call| CallLLM
900
+ CallLLM -->|Store| Save
901
+ CallLLM -->|Return| Response
902
+ Response -->|Receive| Client
903
+
904
+ style Client fill:#e3f2fd
905
+ style Request fill:#c5e1a5
906
+ style Body fill:#aed581
907
+ style Auth fill:#c8e6c9
908
+ style Parse fill:#a5d6a7
909
+ style Embed fill:#81c784
910
+ style Search fill:#66bb6a
911
+ style GetHistory fill:#4caf50
912
+ style BuildPrompt fill:#43a047
913
+ style CallLLM fill:#388e3c
914
+ style Save fill:#2e7d32
915
+ style Response fill:#1b5e20
916
+ ```
917
+
918
+ ---
919
+
920
+ ## 10. Error Handling Flow
921
+
922
+ ```mermaid
923
+ flowchart TD
924
+ Request["User Request"]
925
+ Try["Try to Process"]
926
+ Check{Error?}
927
+
928
+ Check -->|No| Success["✅ Success<br/>Return Data"]
929
+ Check -->|Yes| Type{Error Type?}
930
+
931
+ Type -->|404| NotFound["❌ Not Found<br/>Status 404"]
932
+ Type -->|401| Unauthorized["❌ Unauthorized<br/>Status 401"]
933
+ Type -->|403| Forbidden["❌ Forbidden<br/>Status 403"]
934
+ Type -->|400| BadRequest["❌ Bad Request<br/>Status 400"]
935
+ Type -->|500| ServerError["❌ Server Error<br/>Status 500"]
936
+ Type -->|API Error| APIError["❌ External API<br/>Error"]
937
+
938
+ NotFound -->|Log| Log["Log Error<br/>to CloudWatch"]
939
+ Unauthorized -->|Log| Log
940
+ Forbidden -->|Log| Log
941
+ BadRequest -->|Log| Log
942
+ ServerError -->|Log| Log
943
+ APIError -->|Log| Log
944
+
945
+ Log -->|Alert| Alert{Severity?}
946
+ Alert -->|Critical| Page["Page On-Call"]
947
+ Alert -->|Warning| Notify["Send Notification"]
948
+ Alert -->|Info| Store["Store in Log"]
949
+
950
+ Page -->|Resolved| Response["Return Error<br/>to User"]
951
+ Notify -->|Resolved| Response
952
+ Store -->|Timeout| Response
953
+
954
+ Request -->|Send| Try
955
+ Success -->|Send| Response
956
+
957
+ style Request fill:#e3f2fd
958
+ style Try fill:#fff9c4
959
+ style Check fill:#fff59d
960
+ style Type fill:#fff176
961
+ style Success fill:#c8e6c9
962
+ style NotFound fill:#ffcdd2
963
+ style Unauthorized fill:#ef9a9a
964
+ style Forbidden fill:#e57373
965
+ style BadRequest fill:#ef5350
966
+ style ServerError fill:#f44336
967
+ style APIError fill:#e53935
968
+ style Log fill:#fff9c4
969
+ style Alert fill:#fff59d
970
+ style Page fill:#ff6f00
971
+ style Notify fill:#ffa726
972
+ style Store fill:#ffb74d
973
+ style Response fill:#1b5e20
974
+ ```
975
+
976
+ ---
977
+
978
+ ## Summary
979
+
980
+ This design document provides:
981
+
982
+ 1. **Use Cases** - All system actors and interactions
983
+ 2. **User Flows** - Step-by-step journeys for key scenarios
984
+ 3. **System Architecture** - Component relationships and deployment
985
+ 4. **Data Flows** - How data moves through the system
986
+ 5. **Database Schema** - Entity relationships and structure
987
+ 6. **Component Interactions** - How modules communicate
988
+ 7. **Sequences** - Detailed interaction timelines
989
+ 8. **State Machines** - Meeting and transcription state transitions
990
+ 9. **API Flows** - Request/response patterns
991
+ 10. **Error Handling** - Exception management and alerting
992
+
993
+ All diagrams use Mermaid syntax for easy updates and version control.
Dockerfile ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ # Set environment variables
4
+ ENV PYTHONUNBUFFERED=1 \
5
+ PYTHONDONTWRITEBYTECODE=1 \
6
+ PIP_NO_CACHE_DIR=1
7
+
8
+ # Install system dependencies
9
+ # gcc is needed for some Python packages; no postgresql-client needed (psycopg2-binary is self-contained)
10
+ RUN apt-get update && apt-get install -y \
11
+ gcc \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+ # Set work directory
15
+ WORKDIR /app
16
+
17
+ # Install Python dependencies
18
+ COPY requirements.txt .
19
+ RUN pip install --upgrade pip && \
20
+ pip install -r requirements.txt
21
+
22
+ # Copy project source
23
+ COPY videocaller/ ./videocaller/
24
+
25
+ # Copy HuggingFace startup script
26
+ COPY start.hf.sh /app/start.hf.sh
27
+ RUN chmod +x /app/start.hf.sh
28
+
29
+ # HuggingFace Spaces requires the app to listen on port 7860
30
+ EXPOSE 7860
31
+
32
+ # Static files are collected at startup (in start.hf.sh) so env vars are available
33
+ # CMD runs migrations, optionally starts Django-Q worker, then starts Daphne on port 7860
34
+ CMD ["/app/start.hf.sh"]
Dockerfile.huggingface ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ # Set environment variables
4
+ ENV PYTHONUNBUFFERED=1 \
5
+ PYTHONDONTWRITEBYTECODE=1 \
6
+ PIP_NO_CACHE_DIR=1
7
+
8
+ # Install system dependencies
9
+ # gcc is needed for some Python packages; no postgresql-client needed (psycopg2-binary is self-contained)
10
+ RUN apt-get update && apt-get install -y \
11
+ gcc \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+ # Set work directory
15
+ WORKDIR /app
16
+
17
+ # Install Python dependencies
18
+ COPY requirements.txt .
19
+ RUN pip install --upgrade pip && \
20
+ pip install -r requirements.txt
21
+
22
+ # Copy project source
23
+ COPY videocaller/ ./videocaller/
24
+
25
+ # Copy HuggingFace startup script
26
+ COPY start.hf.sh /app/start.hf.sh
27
+ RUN chmod +x /app/start.hf.sh
28
+
29
+ # HuggingFace Spaces requires the app to listen on port 7860
30
+ EXPOSE 7860
31
+
32
+ # Static files are collected at startup (in start.hf.sh) so env vars are available
33
+ # CMD runs migrations, optionally starts Django-Q worker, then starts Daphne on port 7860
34
+ CMD ["/app/start.hf.sh"]
Dockerfile.worker ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ # Set environment variables
4
+ ENV PYTHONUNBUFFERED=1 \
5
+ PYTHONDONTWRITEBYTECODE=1 \
6
+ PIP_NO_CACHE_DIR=1
7
+
8
+ # Install system dependencies
9
+ RUN apt-get update && apt-get install -y \
10
+ gcc \
11
+ postgresql-client \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+ # Set work directory
15
+ WORKDIR /app
16
+
17
+ # Install Python dependencies
18
+ COPY requirements.txt .
19
+ RUN pip install --upgrade pip && \
20
+ pip install -r requirements.txt
21
+
22
+ # Copy project
23
+ COPY videocaller/ ./videocaller/
24
+
25
+ # Run qcluster worker
26
+ WORKDIR /app/videocaller
27
+ CMD ["python", "manage.py", "qcluster"]
HF_DEPLOYMENT.md ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Deploying Aimeet to Hugging Face Spaces
2
+
3
+ Hugging Face Spaces with the **Docker SDK** lets you run this full Django app for free.
4
+ The free tier gives you 2 vCPU and 16 GB RAM.
5
+
6
+ ## Constraints to Know
7
+
8
+ | Constraint | Impact | Solution |
9
+ |---|---|---|
10
+ | No persistent disk | SQLite data & uploaded files lost on restart | Use external PostgreSQL + AWS S3 |
11
+ | Single container | Web server + worker share one process | Handled — `start.hf.sh` runs both |
12
+ | Space hibernation | Free Spaces sleep after 48 h inactivity (~30 s wake) | Acceptable for demos |
13
+ | Port 7860 | Must serve on this port | Already set in `Dockerfile.huggingface` |
14
+
15
+ ---
16
+
17
+ ## Step 0 — Set Up External Services (free tiers)
18
+
19
+ | Service | Provider | What to get |
20
+ |---|---|---|
21
+ | **PostgreSQL** | [supabase.com](https://supabase.com) or [neon.tech](https://neon.tech) | Connection string (`postgresql://...`) |
22
+ | **Redis** | [upstash.com](https://upstash.com) | Redis URL (`rediss://...`) |
23
+ | **Vector DB** | [cloud.qdrant.io](https://cloud.qdrant.io) | Cluster URL + API key |
24
+ | **Video calls** | [console.agora.io](https://console.agora.io) | App ID + Certificate |
25
+ | **Realtime chat** | [pusher.com](https://pusher.com) | App ID, Key, Secret, Cluster |
26
+ | **Google Gemini** | [aistudio.google.com](https://aistudio.google.com) | API key |
27
+ | **Transcription** | [assemblyai.com](https://assemblyai.com) | API key (optional) |
28
+ | **File storage** | [AWS S3](https://aws.amazon.com/s3/) | Bucket + IAM key/secret (optional) |
29
+
30
+ ---
31
+
32
+ ## Step 1 — Create a New Space on Hugging Face
33
+
34
+ 1. Go to **[huggingface.co](https://huggingface.co)** → **New Space**.
35
+ 2. Fill in:
36
+ - **Space name**: `aimeet` (or anything you like)
37
+ - **SDK**: **Docker**
38
+ - **Visibility**: Public or Private
39
+ 3. Click **Create Space**.
40
+
41
+ The Space page shows an empty repo. Copy its git URL (looks like
42
+ `https://huggingface.co/spaces/YOUR_USERNAME/aimeet`).
43
+
44
+ ---
45
+
46
+ ## Step 2 — Clone the Space Repo and Copy the Project
47
+
48
+ ```bash
49
+ # Clone the empty HF Space repo
50
+ git clone https://huggingface.co/spaces/YOUR_USERNAME/aimeet hf-space
51
+ cd hf-space
52
+
53
+ # Copy all project files into it
54
+ cp -r /path/to/Django-VIdeocall-App/. .
55
+
56
+ # HF Spaces requires its own README.md with YAML front matter
57
+ cp hf_space_README.md README.md
58
+ rm hf_space_README.md # remove the copy — README.md is the one HF uses
59
+
60
+ # Tell Docker to use the HF-specific Dockerfile
61
+ cp Dockerfile.huggingface Dockerfile
62
+
63
+ # Remove the local virtualenv and build artifacts (already in .gitignore)
64
+ # The .gitignore already excludes env/, __pycache__, *.sqlite3, media/, etc.
65
+ ```
66
+
67
+ ---
68
+
69
+ ## Step 3 — Commit and Push
70
+
71
+ ```bash
72
+ git lfs install # HF uses Git LFS; run once
73
+ git add .
74
+ git commit -m "Initial deploy of Aimeet to HF Spaces"
75
+ git push
76
+ ```
77
+
78
+ The Space starts building automatically. Open the **Logs** tab on the Space page to follow the build. First build takes **5–10 minutes** because of large dependencies (`sentence-transformers`, `unstructured`, etc.).
79
+
80
+ ---
81
+
82
+ ## Step 4 — Set Environment Secrets
83
+
84
+ In the Space page go to **Settings → Variables and secrets → New secret** and add:
85
+
86
+ | Key | Value |
87
+ |---|---|
88
+ | `DJANGO_SECRET_KEY` | Run `python -c "import secrets; print(secrets.token_urlsafe(50))"` |
89
+ | `DJANGO_DEBUG` | `false` |
90
+ | `DATABASE_URL` | Your PostgreSQL connection string |
91
+ | `REDIS_URL` | Your Upstash Redis URL |
92
+ | `AGORA_APP_ID` | Agora App ID |
93
+ | `AGORA_APP_CERTIFICATE` | Agora App Certificate |
94
+ | `PUSHER_APP_ID` | Pusher App ID |
95
+ | `PUSHER_KEY` | Pusher Key |
96
+ | `PUSHER_SECRET` | Pusher Secret |
97
+ | `PUSHER_CLUSTER` | Pusher cluster (e.g. `ap2`) |
98
+ | `GOOGLE_API_KEY` | Gemini API key |
99
+ | `QDRANT_URL` | Qdrant cluster URL |
100
+ | `QDRANT_API_KEY` | Qdrant API key |
101
+ | `ASSEMBLYAI_API_KEY` | AssemblyAI key (optional) |
102
+ | `AWS_ACCESS_KEY_ID` | AWS key (optional, for recordings) |
103
+ | `AWS_SECRET_ACCESS_KEY` | AWS secret |
104
+ | `AWS_STORAGE_BUCKET_NAME` | S3 bucket name |
105
+
106
+ > **Important**: Use **Secrets** (not Variables) for all API keys and passwords — they are encrypted and not exposed in build logs.
107
+
108
+ After adding secrets, click **Restart Space** in the Settings tab to apply them.
109
+
110
+ ---
111
+
112
+ ## Step 5 — Access Your App
113
+
114
+ Once the Space is running, your app is live at:
115
+
116
+ ```
117
+ https://YOUR_USERNAME-aimeet.hf.space
118
+ ```
119
+
120
+ HF Spaces automatically sets the `SPACE_HOST` environment variable to this hostname. The `settings.py` already reads it and adds it to `ALLOWED_HOSTS` and `CSRF_TRUSTED_ORIGINS`.
121
+
122
+ ---
123
+
124
+ ## Updating the App
125
+
126
+ Push changes to the HF Space git repo to trigger a rebuild:
127
+
128
+ ```bash
129
+ cd hf-space
130
+ # make your changes
131
+ git add .
132
+ git commit -m "Update: description of change"
133
+ git push
134
+ ```
135
+
136
+ ---
137
+
138
+ ## Troubleshooting
139
+
140
+ | Symptom | Likely cause | Fix |
141
+ |---|---|---|
142
+ | Build fails at `pip install` | Heavy deps hit memory limit | Try Space tier upgrade or pin lighter versions |
143
+ | `DisallowedHost` error | Hostname not in `ALLOWED_HOSTS` | Verify `SPACE_HOST` env var is set automatically by HF |
144
+ | `500 Internal Server Error` | Missing required secret | Check Logs tab for the traceback; add the missing secret |
145
+ | `CSRF verification failed` | CSRF origin mismatch | Ensure `DJANGO_DEBUG=false` and `DATABASE_URL` is set |
146
+ | Database errors on first run | Migrations not run yet | Migrations run automatically in `start.hf.sh`; check Logs |
147
+ | Static files returning 404 | `collectstatic` failed | Check build logs; ensure `DJANGO_SECRET_KEY` secret is set |
Procfile ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ web: cd videocaller && daphne -b 0.0.0.0 -p $PORT videocaller.asgi:application
2
+ worker: cd videocaller && python manage.py qcluster
README.md ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Aimeet
3
+ emoji: 🎥
4
+ colorFrom: blue
5
+ colorTo: indigo
6
+ sdk: docker
7
+ app_port: 7860
8
+ pinned: false
9
+ ---
10
+
11
+ # Aimeet — AI-Powered Video Meeting Platform
12
+
13
+ Real-time video meetings with AI summaries, RAG document chat, and live transcription.
14
+
15
+ ## Features
16
+
17
+ - **Video/audio calls** via Agora RTC
18
+ - **AI meeting summaries** powered by Google Gemini
19
+ - **Document Q&A** with RAG (LangChain + Qdrant)
20
+ - **Live transcription** via AssemblyAI
21
+ - **Real-time chat** via Pusher
22
+
23
+ ## Required Environment Variables
24
+
25
+ Set these in **Settings → Variables and secrets** of your Space:
26
+
27
+ | Key | Required | Description |
28
+ |---|---|---|
29
+ | `DJANGO_SECRET_KEY` | ✅ | Django secret key (generate a random 50-char string) |
30
+ | `DJANGO_DEBUG` | | `false` for production |
31
+ | `DATABASE_URL` | ✅ | PostgreSQL URL (Supabase / Neon free tier) |
32
+ | `REDIS_URL` | | Redis URL for task queue (Upstash free tier) |
33
+ | `AGORA_APP_ID` | ✅ | Agora App ID |
34
+ | `AGORA_APP_CERTIFICATE` | ✅ | Agora App Certificate |
35
+ | `PUSHER_APP_ID` | ✅ | Pusher App ID |
36
+ | `PUSHER_KEY` | ✅ | Pusher Key |
37
+ | `PUSHER_SECRET` | ✅ | Pusher Secret |
38
+ | `PUSHER_CLUSTER` | ✅ | Pusher Cluster (e.g. `ap2`) |
39
+ | `GOOGLE_API_KEY` | ✅ | Google Gemini API key |
40
+ | `QDRANT_URL` | ✅ | Qdrant Cloud cluster URL |
41
+ | `QDRANT_API_KEY` | ✅ | Qdrant Cloud API key |
42
+ | `ASSEMBLYAI_API_KEY` | | AssemblyAI transcription key |
43
+ | `AWS_ACCESS_KEY_ID` | | AWS S3 for recording storage |
44
+ | `AWS_SECRET_ACCESS_KEY` | | AWS S3 secret |
45
+ | `AWS_STORAGE_BUCKET_NAME` | | S3 bucket name |
46
+ | `AWS_S3_REGION_NAME` | | S3 region (default: `us-east-1`) |
REQUIREMENTS.md ADDED
@@ -0,0 +1,472 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AIMeet - Requirements Document
2
+
3
+ ## 1. Functional Requirements
4
+
5
+ ### 1.1 User Management
6
+ - FR-1.1: Users must be able to register with username, email, and password
7
+ - FR-1.2: Users must be able to log in with credentials
8
+ - FR-1.3: Users must be able to log out
9
+ - FR-1.4: Users must be able to reset password via email
10
+ - FR-1.5: User profiles must store name, email, profile picture
11
+
12
+ ### 1.2 Meeting Management
13
+ - FR-2.1: Users can create a meeting with title, description, and max participants
14
+ - FR-2.2: System generates unique shareable room code for each meeting
15
+ - FR-2.3: Users can join meetings using room code
16
+ - FR-2.4: Meeting host can end the meeting
17
+ - FR-2.5: Meeting state tracks: active, ended, archived
18
+ - FR-2.6: Users can view list of their meetings (hosted and joined)
19
+ - FR-2.7: Users can delete or archive completed meetings
20
+
21
+ ### 1.3 Real-Time Video & Audio
22
+ - FR-3.1: Video streaming using Agora RTC SDK
23
+ - FR-3.2: Audio streaming with VP8 codec
24
+ - FR-3.3: Dynamic bitrate adjustment based on network
25
+ - FR-3.4: Participants can mute/unmute audio and video
26
+ - FR-3.5: Host can kick participants
27
+ - FR-3.6: Screen sharing capability (optional, future)
28
+
29
+ ### 1.4 Recording
30
+ - FR-4.1: Audio is automatically recorded during meeting using MediaRecorder
31
+ - FR-4.2: Recording saved as WebM format locally
32
+ - FR-4.3: Users can upload recording after meeting
33
+ - FR-4.4: Recording uploaded to AWS S3
34
+ - FR-4.5: System stores recording metadata (size, duration, upload time)
35
+ - FR-4.6: Presigned URLs generated for private S3 access
36
+
37
+ ### 1.5 Transcription
38
+ - FR-5.1: Uploaded recordings sent to AssemblyAI for transcription
39
+ - FR-5.2: System polls AssemblyAI for transcription status
40
+ - FR-5.3: Completed transcripts saved to database
41
+ - FR-5.4: Transcript status tracked: not_started, processing, completed, failed
42
+ - FR-5.5: Transcript linked to meeting record
43
+
44
+ ### 1.6 Knowledge Processing (RAG)
45
+ - FR-6.1: Users can trigger "Prepare for Search" to process transcript
46
+ - FR-6.2: System chunks transcript using recursive character splitting (500 tokens, 50 overlap)
47
+ - FR-6.3: Chunks stored in TranscriptChunk model
48
+ - FR-6.4: Chunks embedded using OpenAI text-embedding-3-small
49
+ - FR-6.5: Embeddings stored in Qdrant vector database
50
+ - FR-6.6: Idempotent processing: check timestamps to avoid reprocessing
51
+
52
+ ### 1.7 Question Answering (RAG Query)
53
+ - FR-7.1: Users can ask questions about meeting content
54
+ - FR-7.2: Question embedded using same OpenAI model
55
+ - FR-7.3: System searches Qdrant for top-5 similar chunks
56
+ - FR-7.4: Conversation history retrieved for context
57
+ - FR-7.5: GPT-4o called with context + history + question
58
+ - FR-7.6: Response generated and displayed to user
59
+ - FR-7.7: Q&A turn saved to ConversationHistory
60
+
61
+ ### 1.8 Meeting Preparation (Sticky Notes)
62
+ - FR-8.1: When creating new meeting, system suggests related past meetings
63
+ - FR-8.2: Suggestions based on meeting title/agenda keywords
64
+ - FR-8.3: Shows what was discussed about same topics before
65
+ - FR-8.4: Users can expand sticky notes to see full context
66
+ - FR-8.5: Helps prevent duplicate discussions
67
+
68
+ ### 1.9 Document Management
69
+ - FR-9.1: Users can upload documents (PDF, DOCX, TXT)
70
+ - FR-9.2: Documents stored in S3
71
+ - FR-9.3: Document text extracted and stored
72
+ - FR-9.4: Documents chunked same way as transcripts
73
+ - FR-9.5: Document chunks embedded and stored in Qdrant
74
+ - FR-9.6: Users can view list of documents per meeting
75
+ - FR-9.7: Users can delete documents
76
+
77
+ ### 1.10 Unified Search
78
+ - FR-10.1: Questions search both transcripts and documents
79
+ - FR-10.2: Results include source type (meeting transcript vs document)
80
+ - FR-10.3: Search results show relevance scores
81
+ - FR-10.4: Source metadata (timestamps, document names) included
82
+
83
+ ### 1.11 Chat
84
+ - FR-11.1: Real-time chat during meetings using WebSocket
85
+ - FR-11.2: Chat messages saved to database
86
+ - FR-11.3: Users can view chat history
87
+ - FR-11.4: Message timestamps tracked
88
+ - FR-11.5: Messages linked to user and meeting
89
+
90
+ ### 1.12 Reporting & Analytics (Future)
91
+ - FR-12.1: Meeting duration and participant count
92
+ - FR-12.2: Transcript statistics (word count, duration)
93
+ - FR-12.3: Q&A usage statistics
94
+ - FR-12.4: Most discussed topics across meetings
95
+
96
+ ---
97
+
98
+ ## 2. Non-Functional Requirements
99
+
100
+ ### 2.1 Performance
101
+ - NFR-1.1: Q&A response time: <4 seconds (including LLM latency)
102
+ - NFR-1.2: Vector search latency: <500ms
103
+ - NFR-1.3: API response time: <1 second for non-AI endpoints
104
+ - NFR-1.4: Page load time: <3 seconds
105
+ - NFR-1.5: Concurrent users: 100+ with auto-scaling
106
+ - NFR-1.6: Transcript processing: <1 minute for typical meeting
107
+
108
+ ### 2.2 Scalability
109
+ - NFR-2.1: Horizontal scaling via EC2 Auto Scaling Groups
110
+ - NFR-2.2: Database: RDS with read replicas
111
+ - NFR-2.3: S3 handles unlimited storage
112
+ - NFR-2.4: Qdrant Cloud manages vector scaling
113
+ - NFR-2.5: Support growth from 10 to 10,000 users
114
+
115
+ ### 2.3 Reliability
116
+ - NFR-3.1: 99.5% uptime SLA
117
+ - NFR-3.2: Automated daily database backups
118
+ - NFR-3.3: Multi-AZ RDS for failover
119
+ - NFR-3.4: CloudFront CDN for static assets
120
+ - NFR-3.5: Graceful error handling and user feedback
121
+
122
+ ### 2.4 Security
123
+ - NFR-4.1: HTTPS for all communications
124
+ - NFR-4.2: Password hashing with bcrypt
125
+ - NFR-4.3: JWT tokens for API authentication
126
+ - NFR-4.4: SQL injection protection via ORM
127
+ - NFR-4.5: XSS protection via template escaping
128
+ - NFR-4.6: CSRF protection on forms
129
+ - NFR-4.7: S3 encryption at rest (AES-256)
130
+ - NFR-4.8: Database encryption (KMS)
131
+ - NFR-4.9: API keys in Secrets Manager (no hardcoding)
132
+ - NFR-4.10: Private S3 access via presigned URLs
133
+ - NFR-4.11: Private subnet for RDS (no public IP)
134
+ - NFR-4.12: Rate limiting: 100 requests/minute per user
135
+
136
+ ### 2.5 Usability
137
+ - NFR-5.1: Responsive design for mobile (375px+) and desktop
138
+ - NFR-5.2: Accessibility: WCAG 2.1 Level AA compliance
139
+ - NFR-5.3: Intuitive UI with clear navigation
140
+ - NFR-5.4: Error messages explain what went wrong
141
+ - NFR-5.5: Dark and light mode support (future)
142
+
143
+ ### 2.6 Maintainability
144
+ - NFR-6.1: Code documented with docstrings
145
+ - NFR-6.2: DRY principle: no code duplication
146
+ - NFR-6.3: Clear separation of concerns
147
+ - NFR-6.4: Comprehensive logging with timestamps
148
+ - NFR-6.5: Automated testing (unit + integration)
149
+
150
+ ### 2.7 Compatibility
151
+ - NFR-7.1: Browser support: Chrome, Firefox, Safari, Edge (latest 2 versions)
152
+ - NFR-7.2: Mobile support: iOS Safari, Android Chrome
153
+ - NFR-7.3: Python 3.13+ support
154
+ - NFR-7.4: PostgreSQL 12+ support
155
+
156
+ ---
157
+
158
+ ## 3. System Requirements
159
+
160
+ ### 3.1 Software Requirements
161
+ - **Backend**: Django 4.x, Python 3.13+
162
+ - **Database**: PostgreSQL 12+ (or SQLite for dev)
163
+ - **Web Server**: Gunicorn + Nginx
164
+ - **Vector DB**: Qdrant 1.x
165
+ - **Message Queue** (future): Celery + Redis
166
+
167
+ ### 3.2 Hardware Requirements (Production)
168
+ - **Compute**: EC2 t3.medium (2 vCPU, 4GB RAM) minimum
169
+ - Development: t3.small sufficient
170
+ - Production: t3.large+ with auto-scaling 2-10 instances
171
+ - **Database**: RDS t4g.medium (2 vCPU, 1GB RAM)
172
+ - Storage: 100GB gp3 (auto-scaling)
173
+ - **Bandwidth**: 10 Mbps minimum (up to 1 Gbps for scaling)
174
+
175
+ ### 3.3 Browser Requirements
176
+ - Minimum: Chrome 90+, Firefox 88+, Safari 14+, Edge 90+
177
+ - WebRTC support required for video
178
+ - LocalStorage and SessionStorage support
179
+ - WebSocket support
180
+
181
+ ---
182
+
183
+ ## 4. Dependencies
184
+
185
+ ### 4.1 Backend Dependencies
186
+ ```
187
+ Django==4.2
188
+ djangorestframework==3.14.0
189
+ psycopg2-binary==2.9.0
190
+ python-dotenv==1.0.0
191
+
192
+ # AI & ML
193
+ openai==2.16.0
194
+ qdrant-client==1.16.2
195
+ requests==2.31.0
196
+
197
+ # Transcription
198
+ AssemblyAI (API, no package)
199
+
200
+ # Cloud
201
+ boto3==1.26.137
202
+
203
+ # Real-time
204
+ pusher==3.3.1
205
+
206
+ # Video
207
+ agora-rtm (Agora SDK)
208
+ agora-token-builder (Token generation)
209
+
210
+ # Utilities
211
+ python-dateutil==2.8.2
212
+ pytz==2023.3
213
+ Pillow==10.0.0
214
+ ```
215
+
216
+ ### 4.2 Frontend Dependencies
217
+ ```
218
+ Agora RTC SDK v4.24.2 (JavaScript)
219
+ Bootstrap 5.3
220
+ jQuery 3.6 (optional, for DOM manipulation)
221
+ ```
222
+
223
+ ### 4.3 External Services
224
+ - **OpenAI API**: Embeddings (text-embedding-3-small) + LLM (GPT-4o)
225
+ - **AssemblyAI API**: Speech-to-text transcription
226
+ - **Qdrant Cloud**: Vector database hosting
227
+ - **AWS Services**: EC2, RDS, S3, CloudWatch, Secrets Manager, ALB
228
+ - **Agora**: Video/audio RTC
229
+ - **Pusher**: WebSocket for chat
230
+
231
+ ---
232
+
233
+ ## 5. API Requirements
234
+
235
+ ### 5.1 REST API Specifications
236
+ - **Base URL**: `/api/` or `/` (depending on endpoint)
237
+ - **Content-Type**: `application/json`
238
+ - **Authentication**: Django session + optional JWT for API clients
239
+ - **Response Format**: JSON with status, data, and error fields
240
+ - **Pagination**: Limit + offset for list endpoints
241
+ - **Versioning**: Not required initially (v1 implicit)
242
+
243
+ ### 5.2 WebSocket Requirements
244
+ - **Protocol**: WebSocket (Pusher-managed)
245
+ - **Channels**: Per-meeting chat channels
246
+ - **Message Format**: JSON
247
+ - **Auto-reconnect**: Client-side retry logic
248
+
249
+ ### 5.3 Rate Limiting
250
+ - 100 requests/minute per user
251
+ - 1000 requests/minute per IP
252
+ - Q&A queries: 10 per minute per user
253
+
254
+ ---
255
+
256
+ ## 6. Infrastructure Requirements
257
+
258
+ ### 6.1 AWS Services Required
259
+ - **Compute**: EC2 (application server)
260
+ - **Database**: RDS PostgreSQL (relational data)
261
+ - **Storage**: S3 (recordings, documents)
262
+ - **CDN**: CloudFront (static assets, S3 downloads)
263
+ - **Load Balancer**: Application Load Balancer (ALB)
264
+ - **Monitoring**: CloudWatch (logs, metrics, alarms)
265
+ - **Secrets**: Secrets Manager (API keys, credentials)
266
+ - **Networking**: VPC, Security Groups, NAT Gateway
267
+
268
+ ### 6.2 Third-Party Services Required
269
+ - **Qdrant Cloud**: Vector database (managed)
270
+ - **OpenAI**: API access (embeddings + GPT-4o)
271
+ - **AssemblyAI**: Transcription API
272
+ - **Agora**: RTC infrastructure
273
+ - **Pusher**: WebSocket infrastructure
274
+
275
+ ### 6.3 Monitoring & Logging
276
+ - CloudWatch Logs: All application logs
277
+ - CloudWatch Metrics: CPU, memory, request latency
278
+ - CloudWatch Alarms: Errors, latency spikes, service degradation
279
+ - Application Insights: APM for performance tracking (optional)
280
+
281
+ ---
282
+
283
+ ## 7. Data Requirements
284
+
285
+ ### 7.1 Database Schema
286
+ - **Users**: id, username, email, password_hash, created_at
287
+ - **MeetingRoom**: id, room_code, host_id, title, description, status, recording data, transcript data, embedding metadata
288
+ - **TranscriptChunk**: id, meeting_id, chunk_text, chunk_index, embedding_vector_id
289
+ - **DocumentUpload**: id, meeting_id, file_name, file_type, s3_url, raw_text
290
+ - **DocumentChunk**: id, document_id, chunk_text, chunk_index, embedding_vector_id
291
+ - **ConversationHistory**: id, meeting_id, user_id, user_question, assistant_response, relevant_chunks
292
+ - **ChatMessage**: id, user_id, content, created_at
293
+
294
+ ### 7.2 Vector Database Schema
295
+ - **Collection**: meeting_transcripts
296
+ - Dimension: 1536 (OpenAI text-embedding-3-small)
297
+ - Distance: Cosine Similarity
298
+ - Payload: meeting_id, chunk_index, text, timestamps
299
+
300
+ ### 7.3 Storage (S3) Structure
301
+ ```
302
+ s3://aimeet-s3-bucket/
303
+ ├── recordings/
304
+ │ ├── meeting_123_audio.webm
305
+ │ └── meeting_124_audio.webm
306
+ ├── documents/
307
+ │ ├── document_456.pdf
308
+ │ └── document_457.txt
309
+ └── transcripts/
310
+ ├── transcript_123.txt
311
+ └── transcript_124.txt
312
+ ```
313
+
314
+ ### 7.4 Data Retention Policy
315
+ - Recordings: Keep indefinitely (archive to Glacier after 90 days)
316
+ - Transcripts: Keep indefinitely
317
+ - Chat messages: Keep indefinitely
318
+ - Documents: Keep indefinitely
319
+ - Database backups: 35-day retention
320
+ - Logs: 30-day retention
321
+
322
+ ---
323
+
324
+ ## 8. Integration Requirements
325
+
326
+ ### 8.1 External API Integrations
327
+ - **OpenAI API**: Embeddings (batch and single)
328
+ - **AssemblyAI API**: Transcription (async polling)
329
+ - **Qdrant API**: Vector search and storage
330
+ - **AWS SDK (Boto3)**: S3 operations
331
+ - **Agora SDK**: Token generation and RTC
332
+ - **Pusher API**: WebSocket messaging
333
+
334
+ ### 8.2 Authentication Integrations
335
+ - Django authentication (built-in)
336
+ - Optional: OAuth2 (Google, GitHub) - future
337
+ - Optional: SAML - future
338
+
339
+ ---
340
+
341
+ ## 9. Testing Requirements
342
+
343
+ ### 9.1 Unit Testing
344
+ - Models: Test data validation and relationships
345
+ - Views: Test API endpoints with mocks
346
+ - Utilities: Test embedding, chunking, RAG functions
347
+ - Target: >80% code coverage
348
+
349
+ ### 9.2 Integration Testing
350
+ - End-to-end meeting flow
351
+ - Recording upload and transcription
352
+ - RAG pipeline (chunk → embed → search → query)
353
+ - Document upload and search
354
+
355
+ ### 9.3 Performance Testing
356
+ - Load test: 100 concurrent users
357
+ - Transcription processing time
358
+ - Q&A response latency
359
+ - Vector search speed
360
+
361
+ ### 9.4 Security Testing
362
+ - OWASP Top 10 vulnerability scanning
363
+ - SQL injection attempts
364
+ - XSS payloads
365
+ - CSRF validation
366
+
367
+ ---
368
+
369
+ ## 10. Documentation Requirements
370
+
371
+ ### 10.1 Code Documentation
372
+ - Docstrings for all functions/methods
373
+ - Inline comments for complex logic
374
+ - README.md for setup and usage
375
+ - API documentation (Swagger/OpenAPI)
376
+
377
+ ### 10.2 User Documentation
378
+ - Quick start guide
379
+ - Feature tutorials
380
+ - FAQ
381
+ - Troubleshooting guide
382
+
383
+ ### 10.3 System Documentation
384
+ - ARCHITECTURE.md (system design)
385
+ - DESIGN.md (diagrams and flows)
386
+ - REQUIREMENTS.md (this document)
387
+ - Deployment guide
388
+
389
+ ---
390
+
391
+ ## 11. Future Enhancements
392
+
393
+ ### 11.1 Planned Features
394
+ - Speaker diarization (identify who said what)
395
+ - Automatic action item detection
396
+ - Topic summaries and key moments
397
+ - Calendar integration
398
+ - Role-based access control
399
+ - Multi-language support
400
+ - Slack/Teams integration
401
+ - Custom embedding models
402
+
403
+ ### 11.2 Optimization Opportunities
404
+ - Redis caching layer (conversation history, user sessions)
405
+ - Celery background jobs (transcription polling, document processing)
406
+ - WebRTC data channels (peer-to-peer communication)
407
+ - Progressive Web App (PWA) capabilities
408
+
409
+ ---
410
+
411
+ ## 12. Success Criteria
412
+
413
+ ### 12.1 Functional Success
414
+ - All FR requirements fully implemented
415
+ - All tests passing
416
+ - No critical bugs in production
417
+
418
+ ### 12.2 Performance Success
419
+ - Page load time <3 seconds (95th percentile)
420
+ - Q&A response time <4 seconds (95th percentile)
421
+ - 99.5% uptime maintained
422
+ - <1 second vector search latency
423
+
424
+ ### 12.3 User Success
425
+ - User registration completion rate >90%
426
+ - Meeting creation to Q&A within 5 minutes
427
+ - >80% of users try Q&A feature within first week
428
+
429
+ ### 12.4 Business Success
430
+ - Support 1000+ concurrent users
431
+ - Cost <$1000/month at 1000-user scale
432
+ - Document uploaded for >50% of meetings
433
+ - Sticky notes used in >40% of meetings
434
+
435
+ ---
436
+
437
+ ## 13. Constraints & Assumptions
438
+
439
+ ### 13.1 Constraints
440
+ - OpenAI API rate limits (depends on plan)
441
+ - AssemblyAI transcription queue
442
+ - AWS service quotas
443
+ - Budget limitations for cloud services
444
+
445
+ ### 13.2 Assumptions
446
+ - Users have stable internet connection (>2 Mbps)
447
+ - Meetings typically 30 minutes to 2 hours
448
+ - Transcripts typically 5K-20K tokens
449
+ - Users have modern browsers (2020+)
450
+ - Organizations want to keep data private (not shared)
451
+
452
+ ---
453
+
454
+ ## 14. Compliance & Standards
455
+
456
+ ### 14.1 Security Standards
457
+ - SSL/TLS 1.3 for encryption
458
+ - OWASP Top 10 compliance
459
+ - GDPR compliance (user data protection)
460
+ - HIPAA compliance (if health data involved) - future
461
+
462
+ ### 14.2 Coding Standards
463
+ - PEP 8 for Python code style
464
+ - Django best practices
465
+ - RESTful API design
466
+ - Semantic versioning for releases
467
+
468
+ ### 14.3 Accessibility Standards
469
+ - WCAG 2.1 Level AA compliance
470
+ - Keyboard navigation support
471
+ - Screen reader compatibility
472
+ - Color contrast ratios >4.5:1
build.sh ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ # Exit on error
3
+ set -o errexit
4
+
5
+ # Install dependencies
6
+ pip install --upgrade pip
7
+ pip install -r requirements.txt
8
+
9
+ # Navigate to Django project directory
10
+ cd videocaller
11
+
12
+ # Collect static files
13
+ python manage.py collectstatic --no-input
14
+
15
+ # Run migrations
16
+ python manage.py migrate --no-input
docker-compose.yml ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: "3.8"
2
+
3
+ services:
4
+ web:
5
+ build: .
6
+ working_dir: /app/videocaller
7
+ command: daphne -b 0.0.0.0 -p 8000 videocaller.asgi:application
8
+ volumes:
9
+ - ./videocaller:/app/videocaller
10
+ ports:
11
+ - "8000:8000"
12
+ env_file:
13
+ - .env
14
+ depends_on:
15
+ db:
16
+ condition: service_started
17
+ redis:
18
+ condition: service_healthy
19
+ environment:
20
+ - DATABASE_URL=postgresql://postgres:postgres@db:5432/aimeet
21
+ - REDIS_URL=redis://redis:6379/0
22
+
23
+ worker:
24
+ build: .
25
+ working_dir: /app/videocaller
26
+ command: python manage.py qcluster
27
+ volumes:
28
+ - ./videocaller:/app/videocaller
29
+ env_file:
30
+ - .env
31
+ depends_on:
32
+ db:
33
+ condition: service_started
34
+ redis:
35
+ condition: service_healthy
36
+ environment:
37
+ - DATABASE_URL=postgresql://postgres:postgres@db:5432/aimeet
38
+ - REDIS_URL=redis://redis:6379/0
39
+
40
+ db:
41
+ image: postgres:15
42
+ volumes:
43
+ - postgres_data:/var/lib/postgresql/data
44
+ environment:
45
+ - POSTGRES_DB=aimeet
46
+ - POSTGRES_USER=postgres
47
+ - POSTGRES_PASSWORD=postgres
48
+ ports:
49
+ - "5432:5432"
50
+
51
+ redis:
52
+ image: redis:7-alpine
53
+ ports:
54
+ - "6379:6379"
55
+ healthcheck:
56
+ test: ["CMD", "redis-cli", "ping"]
57
+ interval: 5s
58
+ timeout: 3s
59
+ retries: 5
60
+
61
+ nginx:
62
+ image: nginx:alpine
63
+ ports:
64
+ - "80:80"
65
+ - "443:443"
66
+ volumes:
67
+ - ./nginx.conf:/etc/nginx/nginx.conf:ro
68
+ - ./videocaller/staticfiles:/app/staticfiles:ro
69
+ - ./videocaller/media:/app/media:ro
70
+ depends_on:
71
+ - web
72
+
73
+ volumes:
74
+ postgres_data:
hf_space_README.md ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Aimeet
3
+ emoji: 🎥
4
+ colorFrom: blue
5
+ colorTo: indigo
6
+ sdk: docker
7
+ app_port: 7860
8
+ pinned: false
9
+ ---
10
+
11
+ # Aimeet — AI-Powered Video Meeting Platform
12
+
13
+ Real-time video meetings with AI summaries, RAG document chat, and live transcription.
14
+
15
+ ## Features
16
+
17
+ - **Video/audio calls** via Agora RTC
18
+ - **AI meeting summaries** powered by Google Gemini
19
+ - **Document Q&A** with RAG (LangChain + Qdrant)
20
+ - **Live transcription** via AssemblyAI
21
+ - **Real-time chat** via Pusher
22
+
23
+ ## Required Environment Variables
24
+
25
+ Set these in **Settings → Variables and secrets** of your Space:
26
+
27
+ | Key | Required | Description |
28
+ |---|---|---|
29
+ | `DJANGO_SECRET_KEY` | ✅ | Django secret key (generate a random 50-char string) |
30
+ | `DJANGO_DEBUG` | | `false` for production |
31
+ | `DATABASE_URL` | ✅ | PostgreSQL URL (Supabase / Neon free tier) |
32
+ | `REDIS_URL` | | Redis URL for task queue (Upstash free tier) |
33
+ | `AGORA_APP_ID` | ✅ | Agora App ID |
34
+ | `AGORA_APP_CERTIFICATE` | ✅ | Agora App Certificate |
35
+ | `PUSHER_APP_ID` | ✅ | Pusher App ID |
36
+ | `PUSHER_KEY` | ✅ | Pusher Key |
37
+ | `PUSHER_SECRET` | ✅ | Pusher Secret |
38
+ | `PUSHER_CLUSTER` | ✅ | Pusher Cluster (e.g. `ap2`) |
39
+ | `GOOGLE_API_KEY` | ✅ | Google Gemini API key |
40
+ | `QDRANT_URL` | ✅ | Qdrant Cloud cluster URL |
41
+ | `QDRANT_API_KEY` | ✅ | Qdrant Cloud API key |
42
+ | `ASSEMBLYAI_API_KEY` | | AssemblyAI transcription key |
43
+ | `AWS_ACCESS_KEY_ID` | | AWS S3 for recording storage |
44
+ | `AWS_SECRET_ACCESS_KEY` | | AWS S3 secret |
45
+ | `AWS_STORAGE_BUCKET_NAME` | | S3 bucket name |
46
+ | `AWS_S3_REGION_NAME` | | S3 region (default: `us-east-1`) |
nginx.conf ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ events {
2
+ worker_connections 1024;
3
+ }
4
+
5
+ http {
6
+ upstream django {
7
+ server web:8000;
8
+ }
9
+
10
+ server {
11
+ listen 80;
12
+ server_name localhost;
13
+ client_max_body_size 100M;
14
+
15
+ location / {
16
+ proxy_pass http://django;
17
+ proxy_http_version 1.1;
18
+ proxy_set_header Upgrade $http_upgrade;
19
+ proxy_set_header Connection "upgrade";
20
+ proxy_set_header Host $host;
21
+ proxy_set_header X-Real-IP $remote_addr;
22
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
23
+ proxy_set_header X-Forwarded-Proto $scheme;
24
+ proxy_redirect off;
25
+ proxy_buffering off;
26
+ }
27
+
28
+ location /static/ {
29
+ alias /app/staticfiles/;
30
+ expires 30d;
31
+ add_header Cache-Control "public, immutable";
32
+ }
33
+
34
+ location /media/ {
35
+ alias /app/media/;
36
+ expires 7d;
37
+ add_header Cache-Control "public";
38
+ }
39
+ }
40
+ }
render.yaml ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ services:
2
+ - type: web
3
+ name: aimeet
4
+ runtime: docker
5
+ region: oregon
6
+ plan: starter
7
+ branch: master
8
+ dockerfilePath: ./Dockerfile
9
+ envVars:
10
+ - key: PYTHON_VERSION
11
+ value: 3.11.4
12
+ - key: DJANGO_SECRET_KEY
13
+ generateValue: true
14
+ - key: DJANGO_DEBUG
15
+ value: "false"
16
+ - key: DJANGO_ALLOWED_HOSTS
17
+ sync: false
18
+ - key: DATABASE_URL
19
+ fromDatabase:
20
+ name: aimeet-db
21
+ property: connectionString
22
+ - key: REDIS_URL
23
+ fromDatabase:
24
+ name: aimeet-redis
25
+ property: connectionString
26
+ - key: AWS_ACCESS_KEY_ID
27
+ sync: false
28
+ - key: AWS_SECRET_ACCESS_KEY
29
+ sync: false
30
+ - key: AWS_STORAGE_BUCKET_NAME
31
+ sync: false
32
+ - key: AWS_S3_REGION_NAME
33
+ value: us-east-1
34
+ - key: AGORA_APP_ID
35
+ sync: false
36
+ - key: AGORA_APP_CERTIFICATE
37
+ sync: false
38
+ - key: AGORA_CUSTOMER_ID
39
+ sync: false
40
+ - key: AGORA_CUSTOMER_SECRET
41
+ sync: false
42
+ - key: AGORA_RECORDING_REGION
43
+ value: NA
44
+ - key: ASSEMBLYAI_API_KEY
45
+ sync: false
46
+ - key: GOOGLE_API_KEY
47
+ sync: false
48
+ - key: GOOGLE_GENERATE_MODEL
49
+ value: gemini-2.5-flash-lite
50
+ - key: GOOGLE_CONNECT_TIMEOUT
51
+ value: 10
52
+ - key: GOOGLE_READ_TIMEOUT
53
+ value: 600
54
+ - key: GOOGLE_MAX_TOKENS
55
+ value: 1000
56
+ - key: HF_EMBEDDING_MODEL
57
+ value: sentence-transformers/all-MiniLM-L6-v2
58
+ - key: HF_EMBEDDING_DIMENSION
59
+ value: 384
60
+ - key: QDRANT_URL
61
+ sync: false
62
+ - key: QDRANT_API_KEY
63
+ sync: false
64
+ - key: QDRANT_COLLECTION_NAME
65
+ value: meeting_transcripts
66
+ - key: PUSHER_APP_ID
67
+ sync: false
68
+ - key: PUSHER_KEY
69
+ sync: false
70
+ - key: PUSHER_SECRET
71
+ sync: false
72
+ - key: PUSHER_CLUSTER
73
+ sync: false
74
+
75
+ - type: worker
76
+ name: aimeet-worker
77
+ runtime: docker
78
+ region: oregon
79
+ plan: starter
80
+ branch: master
81
+ dockerfilePath: ./Dockerfile.worker
82
+ envVars:
83
+ - key: PYTHON_VERSION
84
+ value: 3.11.4
85
+ - key: DJANGO_SECRET_KEY
86
+ sync: false
87
+ - key: DATABASE_URL
88
+ fromDatabase:
89
+ name: aimeet-db
90
+ property: connectionString
91
+ - key: REDIS_URL
92
+ fromDatabase:
93
+ name: aimeet-redis
94
+ property: connectionString
95
+ - key: AWS_ACCESS_KEY_ID
96
+ sync: false
97
+ - key: AWS_SECRET_ACCESS_KEY
98
+ sync: false
99
+ - key: AWS_STORAGE_BUCKET_NAME
100
+ sync: false
101
+ - key: AWS_S3_REGION_NAME
102
+ value: us-east-1
103
+ - key: ASSEMBLYAI_API_KEY
104
+ sync: false
105
+ - key: GOOGLE_API_KEY
106
+ sync: false
107
+ - key: GOOGLE_GENERATE_MODEL
108
+ value: gemini-2.5-flash-lite
109
+ - key: GOOGLE_CONNECT_TIMEOUT
110
+ value: 10
111
+ - key: GOOGLE_READ_TIMEOUT
112
+ value: 600
113
+ - key: GOOGLE_MAX_TOKENS
114
+ value: 1000
115
+ - key: HF_EMBEDDING_MODEL
116
+ value: sentence-transformers/all-MiniLM-L6-v2
117
+ - key: HF_EMBEDDING_DIMENSION
118
+ value: 384
119
+ - key: QDRANT_URL
120
+ sync: false
121
+ - key: QDRANT_API_KEY
122
+ sync: false
123
+ - key: QDRANT_COLLECTION_NAME
124
+ value: meeting_transcripts
125
+
126
+ databases:
127
+ - name: aimeet-db
128
+ databaseName: aimeet
129
+ user: aimeet
130
+ region: oregon
131
+ plan: starter
132
+
133
+ - name: aimeet-redis
134
+ plan: starter
135
+ region: oregon
requirements-windows.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Windows-specific dependencies
2
+ # Install with: pip install -r requirements.txt -r requirements-windows.txt
3
+
4
+ python-magic-bin==0.4.14
requirements.txt ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Django==4.1.5
2
+ setuptools==69.5.1
3
+ packaging==24.2
4
+ python-dotenv==0.21.0
5
+ pusher==3.3.1
6
+ boto3==1.26.137
7
+ requests==2.31.0
8
+ daphne==4.1.2
9
+ unstructured[pdf]==0.18.31
10
+ django-q==1.3.9
11
+ redis==3.5.3
12
+ google-generativeai==0.7.2
13
+ langchain==0.2.16
14
+ langchain-core==0.2.38
15
+ langchain-google-genai==1.0.10
16
+ langchain-qdrant==0.1.4
17
+ qdrant-client==1.7.3
18
+ langchain-text-splitters==0.2.4
19
+ langchain-huggingface==0.0.3
20
+ sentence-transformers==2.7.0
21
+ psycopg2-binary==2.9.9
22
+ gunicorn==21.2.0
23
+ whitenoise==6.6.0
24
+ dj-database-url==2.1.0
runtime.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ python-3.11.7
start.hf.sh ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -e
3
+
4
+ cd /app/videocaller
5
+
6
+ echo "=== Collecting static files ==="
7
+ python manage.py collectstatic --noinput
8
+
9
+ echo "=== Running database migrations ==="
10
+ python manage.py migrate --noinput
11
+
12
+ # Start Django-Q worker in background only if Redis is configured.
13
+ # Without REDIS_URL the Q_CLUSTER falls back to the ORM broker (database-backed).
14
+ if [ -n "$REDIS_URL" ]; then
15
+ echo "=== Starting Django-Q worker (Redis broker) ==="
16
+ python manage.py qcluster &
17
+ echo "Worker started with PID $!"
18
+ else
19
+ echo "=== No REDIS_URL set — starting Django-Q worker with ORM broker ==="
20
+ python manage.py qcluster &
21
+ echo "Worker started with PID $!"
22
+ fi
23
+
24
+ echo "=== Starting Daphne ASGI server on 0.0.0.0:7860 ==="
25
+ exec daphne -b 0.0.0.0 -p 7860 videocaller.asgi:application
start.sh ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -e
3
+
4
+ cd /app/videocaller
5
+
6
+ # Run migrations
7
+ python manage.py migrate --noinput
8
+
9
+ # Collect static files (if needed)
10
+ python manage.py collectstatic --noinput
11
+
12
+ # Start Daphne web server
13
+ exec daphne -b 0.0.0.0 -p 8000 videocaller.asgi:application
videocaller/agora/__init__.py ADDED
File without changes
videocaller/agora/admin.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from django.contrib import admin
2
+
3
+ # Register your models here.
videocaller/agora/agenda_utils.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import re
3
+ from typing import List
4
+ import requests
5
+ from django.conf import settings
6
+ from .embedding_utils import search_similar_chunks
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ GOOGLE_API_KEY = getattr(settings, 'GOOGLE_API_KEY', '')
11
+ GOOGLE_GENERATE_MODEL = getattr(settings, 'GOOGLE_GENERATE_MODEL', 'gemini-2.5-flash-lite')
12
+ GOOGLE_API_BASE = "https://generativelanguage.googleapis.com/v1beta/models/"
13
+ GOOGLE_CONNECT_TIMEOUT = getattr(settings, 'GOOGLE_CONNECT_TIMEOUT', 10)
14
+ GOOGLE_READ_TIMEOUT = getattr(settings, 'GOOGLE_READ_TIMEOUT', 600)
15
+
16
+
17
+ def _google_generate(prompt: str) -> str:
18
+ if not GOOGLE_API_KEY:
19
+ raise ValueError("GOOGLE_API_KEY is not configured")
20
+
21
+ url = f"{GOOGLE_API_BASE}{GOOGLE_GENERATE_MODEL}:generateContent?key={GOOGLE_API_KEY}"
22
+ payload = {
23
+ "contents": [
24
+ {
25
+ "role": "user",
26
+ "parts": [
27
+ {"text": prompt}
28
+ ]
29
+ }
30
+ ]
31
+ }
32
+ response = requests.post(
33
+ url,
34
+ json=payload,
35
+ timeout=(GOOGLE_CONNECT_TIMEOUT, GOOGLE_READ_TIMEOUT)
36
+ )
37
+ response.raise_for_status()
38
+ data = response.json()
39
+
40
+ text_parts: List[str] = []
41
+ for candidate in data.get("candidates", []):
42
+ for part in candidate.get("content", {}).get("parts", []):
43
+ part_text = part.get("text")
44
+ if part_text:
45
+ text_parts.append(part_text)
46
+ return "".join(text_parts).strip()
47
+
48
+
49
+ def _parse_points(text: str, max_points: int = 8) -> List[str]:
50
+ lines = [line.strip() for line in text.splitlines() if line.strip()]
51
+ points: List[str] = []
52
+ for line in lines:
53
+ cleaned = re.sub(r"^[\-\*•\.\d\)\(]+\s*", "", line).strip()
54
+ if cleaned:
55
+ points.append(cleaned)
56
+ if len(points) >= max_points:
57
+ break
58
+ if not points and text.strip():
59
+ points = [text.strip()]
60
+ return points
61
+
62
+
63
+ def generate_agenda_points(meeting_title: str, meeting_description: str, meeting_id: int | None) -> List[str]:
64
+ agenda_hint = (meeting_title or "").strip()
65
+ query = f"Agenda: {agenda_hint}" if agenda_hint else "meeting agenda"
66
+ relevant_chunks = search_similar_chunks(query, meeting_id, top_k=12)
67
+ if not relevant_chunks:
68
+ return []
69
+
70
+ context = "\n\n".join([
71
+ f"[Source: {chunk.get('source_type', 'meeting_transcript')}] {chunk['text']}"
72
+ for chunk in relevant_chunks
73
+ ])
74
+
75
+ prompt = (
76
+ "You are preparing concise discussion points for a meeting.\n"
77
+ "Use ONLY the PAST NOTES below. Do not add new information or assumptions.\n"
78
+ "If the notes do not support a point, do not include it.\n"
79
+ "Return 5-8 short points (max 14 words each), one per line, no numbering.\n\n"
80
+ f"MEETING TITLE: {agenda_hint or 'N/A'}\n\n"
81
+ f"PAST NOTES:\n{context}"
82
+ )
83
+
84
+ try:
85
+ raw = _google_generate(prompt)
86
+ points = _parse_points(raw)
87
+ return points
88
+ except Exception as e:
89
+ logger.error("Error generating agenda points: %s", str(e))
90
+ return []
videocaller/agora/agora_key/AccessToken.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import hmac
2
+ from hashlib import sha256
3
+ import base64
4
+ import struct
5
+ from zlib import crc32
6
+ import secrets
7
+ import time
8
+ from collections import OrderedDict
9
+
10
+ kJoinChannel = 1
11
+ kPublishAudioStream = 2
12
+ kPublishVideoStream = 3
13
+ kPublishDataStream = 4
14
+ kPublishAudiocdn = 5
15
+ kPublishVideoCdn = 6
16
+ kRequestPublishAudioStream = 7
17
+ kRequestPublishVideoStream = 8
18
+ kRequestPublishDataStream = 9
19
+ kInvitePublishAudioStream = 10
20
+ kInvitePublishVideoStream = 11
21
+ kInvitePublishDataStream = 12
22
+ kAdministrateChannel = 101
23
+ kRtmLogin = 1000
24
+
25
+ VERSION_LENGTH = 3
26
+ APP_ID_LENGTH = 32
27
+
28
+
29
+ def getVersion():
30
+ return '006'
31
+
32
+
33
+ def packUint16(x):
34
+ return struct.pack('<H', int(x))
35
+
36
+
37
+ def packUint32(x):
38
+ return struct.pack('<I', int(x))
39
+
40
+
41
+ def packInt32(x):
42
+ return struct.pack('<i', int(x))
43
+
44
+
45
+ def packString(string):
46
+ return packUint16(len(string)) + string
47
+
48
+
49
+ def packMap(m):
50
+ ret = packUint16(len(list(m.items())))
51
+ for k, v in list(m.items()):
52
+ ret += packUint16(k) + packString(v)
53
+ return ret
54
+
55
+
56
+ def packMapUint32(m):
57
+ ret = packUint16(len(list(m.items())))
58
+ for k, v in list(m.items()):
59
+ ret += packUint16(k) + packUint32(v)
60
+ return ret
61
+
62
+
63
+ class ReadByteBuffer:
64
+
65
+ def __init__(self, bytes):
66
+ self.buffer = bytes
67
+ self.position = 0
68
+
69
+ def unPackUint16(self):
70
+ len = struct.calcsize('H')
71
+ buff = self.buffer[self.position: self.position + len]
72
+ ret = struct.unpack('<H', buff)[0]
73
+ self.position += len
74
+ return ret
75
+
76
+ def unPackUint32(self):
77
+ len = struct.calcsize('I')
78
+ buff = self.buffer[self.position: self.position + len]
79
+ ret = struct.unpack('<I', buff)[0]
80
+ self.position += len
81
+ return ret
82
+
83
+ def unPackString(self):
84
+ strlen = self.unPackUint16()
85
+ buff = self.buffer[self.position: self.position + strlen]
86
+ ret = struct.unpack('<' + str(strlen) + 's', buff)[0]
87
+ self.position += strlen
88
+ return ret
89
+
90
+ def unPackMapUint32(self):
91
+ messages = {}
92
+ maplen = self.unPackUint16()
93
+
94
+ for index in range(maplen):
95
+ key = self.unPackUint16()
96
+ value = self.unPackUint32()
97
+ messages[key] = value
98
+ return messages
99
+
100
+
101
+ def unPackContent(buff):
102
+ readbuf = ReadByteBuffer(buff)
103
+ signature = readbuf.unPackString()
104
+ crc_channel_name = readbuf.unPackUint32()
105
+ crc_uid = readbuf.unPackUint32()
106
+ m = readbuf.unPackString()
107
+
108
+ return signature, crc_channel_name, crc_uid, m
109
+
110
+
111
+ def unPackMessages(buff):
112
+ readbuf = ReadByteBuffer(buff)
113
+ salt = readbuf.unPackUint32()
114
+ ts = readbuf.unPackUint32()
115
+ messages = readbuf.unPackMapUint32()
116
+
117
+ return salt, ts, messages
118
+
119
+
120
+ class AccessToken:
121
+
122
+ def __init__(self, appID='', appCertificate='', channelName='', uid=''):
123
+ self.appID = appID
124
+ self.appCertificate = appCertificate
125
+ self.channelName = channelName
126
+ self.ts = int(time.time()) + 24 * 3600
127
+ self.salt = secrets.SystemRandom().randint(1, 99999999)
128
+ self.messages = {}
129
+ if (uid == 0):
130
+ self.uidStr = ""
131
+ else:
132
+ self.uidStr = str(uid)
133
+
134
+ def addPrivilege(self, privilege, expireTimestamp):
135
+ self.messages[privilege] = expireTimestamp
136
+
137
+ def fromString(self, originToken):
138
+ try:
139
+ dk6version = getVersion()
140
+ originVersion = originToken[:VERSION_LENGTH]
141
+ if (originVersion != dk6version):
142
+ return False
143
+
144
+ originAppID = originToken[VERSION_LENGTH:(
145
+ VERSION_LENGTH + APP_ID_LENGTH)]
146
+ originContent = originToken[(VERSION_LENGTH + APP_ID_LENGTH):]
147
+ originContentDecoded = base64.b64decode(originContent)
148
+
149
+ signature, crc_channel_name, crc_uid, m = unPackContent(
150
+ originContentDecoded)
151
+ self.salt, self.ts, self.messages = unPackMessages(m)
152
+
153
+ except Exception as e:
154
+ print("error:", str(e))
155
+ return False
156
+
157
+ return True
158
+
159
+ def build(self):
160
+
161
+ self.messages = OrderedDict(
162
+ sorted(iter(self.messages.items()), key=lambda x: int(x[0])))
163
+
164
+ m = packUint32(self.salt) + packUint32(self.ts) \
165
+ + packMapUint32(self.messages)
166
+
167
+ val = self.appID.encode(
168
+ 'utf-8') + self.channelName.encode('utf-8') + self.uidStr.encode('utf-8') + m
169
+
170
+ signature = hmac.new(self.appCertificate.encode(
171
+ 'utf-8'), val, sha256).digest()
172
+ crc_channel_name = crc32(self.channelName.encode('utf-8')) & 0xffffffff
173
+ crc_uid = crc32(self.uidStr.encode('utf-8')) & 0xffffffff
174
+
175
+ content = packString(signature) \
176
+ + packUint32(crc_channel_name) \
177
+ + packUint32(crc_uid) \
178
+ + packString(m)
179
+
180
+ version = getVersion()
181
+ ret = version + self.appID + base64.b64encode(content).decode('utf-8')
182
+ return ret
videocaller/agora/agora_key/RtcTokenBuilder.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from .AccessToken import *
3
+ import os
4
+ import sys
5
+ from collections import OrderedDict
6
+
7
+
8
+ Role_Attendee = 0 # depreated, same as publisher
9
+ Role_Publisher = 1 # for live broadcaster
10
+ Role_Subscriber = 2 # default, for live audience
11
+ Role_Admin = 101 # deprecated, same as publisher
12
+
13
+
14
+ class RtcTokenBuilder:
15
+ # appID: The App ID issued to you by Agora. Apply for a new App ID from
16
+ # Agora Dashboard if it is missing from your kit. See Get an App ID.
17
+ # appCertificate: Certificate of the application that you registered in
18
+ # the Agora Dashboard. See Get an App Certificate.
19
+ # channelName:Unique channel name for the AgoraRTC session in the string format
20
+ # uid: User ID. A 32-bit unsigned integer with a value ranging from
21
+ # 1 to (232-1). optionalUid must be unique.
22
+ # role: Role_Publisher = 1: A broadcaster (host) in a live-broadcast profile.
23
+ # Role_Subscriber = 2: (Default) A audience in a live-broadcast profile.
24
+ # privilegeExpireTs: represented by the number of seconds elapsed since
25
+ # 1/1/1970. If, for example, you want to access the
26
+ # Agora Service within 10 minutes after the token is
27
+ # generated, set expireTimestamp as the current
28
+ # timestamp + 600 (seconds)./
29
+ @staticmethod
30
+ def buildTokenWithUid(appId, appCertificate, channelName, uid, role, privilegeExpiredTs):
31
+ return RtcTokenBuilder.buildTokenWithAccount(appId, appCertificate, channelName, uid, role, privilegeExpiredTs)
32
+
33
+ # appID: The App ID issued to you by Agora. Apply for a new App ID from
34
+ # Agora Dashboard if it is missing from your kit. See Get an App ID.
35
+ # appCertificate: Certificate of the application that you registered in
36
+ # the Agora Dashboard. See Get an App Certificate.
37
+ # channelName:Unique channel name for the AgoraRTC session in the string format
38
+ # userAccount: The user account.
39
+ # role: Role_Publisher = 1: A broadcaster (host) in a live-broadcast profile.
40
+ # Role_Subscriber = 2: (Default) A audience in a live-broadcast profile.
41
+ # privilegeExpireTs: represented by the number of seconds elapsed since
42
+ # 1/1/1970. If, for example, you want to access the
43
+ # Agora Service within 10 minutes after the token is
44
+ # generated, set expireTimestamp as the current
45
+ @staticmethod
46
+ def buildTokenWithAccount(appId, appCertificate, channelName, account, role, privilegeExpiredTs):
47
+ token = AccessToken(appId, appCertificate, channelName, account)
48
+ token.addPrivilege(kJoinChannel, privilegeExpiredTs)
49
+ if (role == Role_Attendee) | (role == Role_Admin) | (role == Role_Publisher):
50
+ token.addPrivilege(kPublishVideoStream, privilegeExpiredTs)
51
+ token.addPrivilege(kPublishAudioStream, privilegeExpiredTs)
52
+ token.addPrivilege(kPublishDataStream, privilegeExpiredTs)
53
+ return token.build()
videocaller/agora/apps.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from django.apps import AppConfig
2
+
3
+
4
+ class AgoraConfig(AppConfig):
5
+ default_auto_field = 'django.db.models.BigAutoField'
6
+ name = 'agora'
videocaller/agora/assemblyai_utils.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """AssemblyAI transcription helpers."""
2
+ import time
3
+ import requests
4
+ from django.conf import settings
5
+
6
+
7
+ class AssemblyAIClient:
8
+ def __init__(self):
9
+ self.api_key = settings.ASSEMBLYAI_API_KEY
10
+ self.base_url = "https://api.assemblyai.com/v2"
11
+
12
+ def _headers(self):
13
+ return {
14
+ "authorization": self.api_key,
15
+ "content-type": "application/json"
16
+ }
17
+
18
+ def start_transcription(self, audio_url):
19
+ payload = {
20
+ "audio_url": audio_url,
21
+ "language_detection": True
22
+ }
23
+ response = requests.post(f"{self.base_url}/transcript", json=payload, headers=self._headers())
24
+ response.raise_for_status()
25
+ return response.json()
26
+
27
+ def get_transcription(self, transcript_id):
28
+ response = requests.get(f"{self.base_url}/transcript/{transcript_id}", headers=self._headers())
29
+ response.raise_for_status()
30
+ return response.json()
31
+
32
+ def wait_for_transcription(self, transcript_id, timeout_seconds=60, poll_interval=3):
33
+ start = time.time()
34
+ while time.time() - start < timeout_seconds:
35
+ data = self.get_transcription(transcript_id)
36
+ status = data.get("status")
37
+ if status in ("completed", "failed"):
38
+ return data
39
+ time.sleep(poll_interval)
40
+ return {"status": "processing"}
videocaller/agora/document_processing.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Document processing strategies for external uploads."""
2
+ from __future__ import annotations
3
+
4
+ import os
5
+ from abc import ABC, abstractmethod
6
+ from typing import Dict, List, Optional
7
+
8
+ from django.conf import settings
9
+ from django.utils import timezone
10
+
11
+ from .assemblyai_utils import AssemblyAIClient
12
+ from .embedding_utils import chunk_transcript, store_document_chunks_in_vector_db
13
+ from .models import DocumentUpload, DocumentChunk
14
+ from .recording_utils import S3Manager
15
+
16
+
17
+ ALLOWED_EXTENSIONS = {".pdf", ".txt", ".doc", ".docx", ".mp3"}
18
+
19
+
20
+ class BaseDocumentStrategy(ABC):
21
+ """Base strategy for processing an uploaded file."""
22
+
23
+ @abstractmethod
24
+ def process(self, document: DocumentUpload, local_path: str, s3_url: Optional[str], presigned_url: Optional[str]) -> Dict:
25
+ """Process a document and store chunks + embeddings."""
26
+ raise NotImplementedError
27
+
28
+ def _store_chunks(self, document: DocumentUpload, chunks: List[str], block_types: Optional[List[str]] = None,
29
+ metadatas: Optional[List[Dict]] = None) -> int:
30
+ chunk_objects = []
31
+ for idx, chunk_text in enumerate(chunks):
32
+ block_type = block_types[idx] if block_types and idx < len(block_types) else "text"
33
+ metadata = metadatas[idx] if metadatas and idx < len(metadatas) else {}
34
+ chunk_objects.append(
35
+ DocumentChunk.objects.create(
36
+ document=document,
37
+ chunk_text=chunk_text,
38
+ chunk_index=idx,
39
+ block_type=block_type,
40
+ metadata=metadata,
41
+ )
42
+ )
43
+
44
+ store_document_chunks_in_vector_db(document.meeting_id, document, chunks, chunk_objects)
45
+ document.embeddings_created_at = timezone.now()
46
+ document.save(update_fields=["embeddings_created_at"])
47
+ return len(chunks)
48
+
49
+
50
+ class AudioDocumentStrategy(BaseDocumentStrategy):
51
+ """Process audio files by transcribing and chunking text."""
52
+
53
+ def process(self, document: DocumentUpload, local_path: str, s3_url: Optional[str], presigned_url: Optional[str]) -> Dict:
54
+ if not settings.ASSEMBLYAI_API_KEY:
55
+ raise RuntimeError("AssemblyAI API key is not configured")
56
+
57
+ audio_url = presigned_url or s3_url
58
+ if not audio_url:
59
+ raise RuntimeError("Audio requires S3 upload or presigned URL for transcription")
60
+
61
+ assembly_client = AssemblyAIClient()
62
+ start_data = assembly_client.start_transcription(audio_url)
63
+ transcript_id = start_data.get("id")
64
+ status = start_data.get("status", "processing")
65
+
66
+ transcript_text = None
67
+ if transcript_id:
68
+ result = assembly_client.wait_for_transcription(transcript_id, timeout_seconds=120, poll_interval=4)
69
+ status = result.get("status", status)
70
+ if status == "completed":
71
+ transcript_text = result.get("text")
72
+
73
+ if status != "completed" or not transcript_text:
74
+ error_msg = result.get("error") if transcript_id else "Transcription failed"
75
+ raise RuntimeError(error_msg or "Transcription failed")
76
+
77
+ document.raw_text = transcript_text
78
+ document.status = "processing"
79
+ document.save(update_fields=["raw_text", "status"])
80
+
81
+ chunks = chunk_transcript(transcript_text)
82
+ chunk_count = self._store_chunks(document, chunks)
83
+ return {"chunk_count": chunk_count, "status": "completed"}
84
+
85
+
86
+ class TextDocumentStrategy(BaseDocumentStrategy):
87
+ """Process plain text files."""
88
+
89
+ def process(self, document: DocumentUpload, local_path: str, s3_url: Optional[str], presigned_url: Optional[str]) -> Dict:
90
+ with open(local_path, "r", encoding="utf-8", errors="ignore") as handle:
91
+ raw_text = handle.read()
92
+
93
+ if not raw_text.strip():
94
+ raise RuntimeError("Empty text file")
95
+
96
+ document.raw_text = raw_text
97
+ document.status = "processing"
98
+ document.save(update_fields=["raw_text", "status"])
99
+
100
+ chunks = chunk_transcript(raw_text)
101
+ chunk_count = self._store_chunks(document, chunks)
102
+ return {"chunk_count": chunk_count, "status": "completed"}
103
+
104
+
105
+ class UnstructuredDocumentStrategy(BaseDocumentStrategy):
106
+ """Process PDF/DOC/DOCX with unstructured partitioning."""
107
+
108
+ def process(self, document: DocumentUpload, local_path: str, s3_url: Optional[str], presigned_url: Optional[str]) -> Dict:
109
+ try:
110
+ from unstructured.partition.auto import partition
111
+ except Exception as exc:
112
+ raise RuntimeError("unstructured library is required for PDF/DOC processing") from exc
113
+
114
+ elements = partition(filename=local_path)
115
+ blocks: List[str] = []
116
+ block_types: List[str] = []
117
+ metadatas: List[Dict] = []
118
+
119
+ for element in elements:
120
+ text = getattr(element, "text", "") or ""
121
+ if not text.strip():
122
+ continue
123
+ block_type = getattr(element, "category", None) or element.__class__.__name__
124
+ metadata_obj = getattr(element, "metadata", None)
125
+ metadata = metadata_obj.to_dict() if metadata_obj and hasattr(metadata_obj, "to_dict") else {}
126
+ blocks.append(text.strip())
127
+ block_types.append(str(block_type).lower())
128
+ metadatas.append(metadata)
129
+
130
+ if not blocks:
131
+ raise RuntimeError("No readable content extracted from document")
132
+
133
+ raw_text = "\n\n".join(blocks)
134
+ document.raw_text = raw_text
135
+ document.status = "processing"
136
+ document.save(update_fields=["raw_text", "status"])
137
+
138
+ chunks: List[str] = []
139
+ chunk_block_types: List[str] = []
140
+ chunk_metadatas: List[Dict] = []
141
+
142
+ for block_text, block_type, metadata in zip(blocks, block_types, metadatas):
143
+ block_chunks = chunk_transcript(block_text)
144
+ for chunk in block_chunks:
145
+ chunks.append(chunk)
146
+ chunk_block_types.append(block_type)
147
+ chunk_metadatas.append(metadata)
148
+
149
+ chunk_count = self._store_chunks(document, chunks, chunk_block_types, chunk_metadatas)
150
+ return {"chunk_count": chunk_count, "status": "completed"}
151
+
152
+
153
+ class DocumentProcessorFactory:
154
+ """Factory to select processing strategy based on extension."""
155
+
156
+ @staticmethod
157
+ def get_strategy(file_path: str) -> BaseDocumentStrategy:
158
+ _, ext = os.path.splitext(file_path.lower())
159
+ if ext not in ALLOWED_EXTENSIONS:
160
+ raise RuntimeError(f"Unsupported file type: {ext}")
161
+
162
+ if ext == ".mp3":
163
+ return AudioDocumentStrategy()
164
+ if ext == ".txt":
165
+ return TextDocumentStrategy()
166
+ return UnstructuredDocumentStrategy()
167
+
168
+ @staticmethod
169
+ def upload_to_s3_if_configured(local_path: str, s3_key: str) -> Dict:
170
+ s3_url = None
171
+ presigned_url = None
172
+ s3_error = None
173
+ if settings.AWS_ACCESS_KEY_ID and settings.AWS_SECRET_ACCESS_KEY and settings.AWS_STORAGE_BUCKET_NAME:
174
+ try:
175
+ s3_manager = S3Manager()
176
+ uploaded = s3_manager.upload_file(local_path, s3_key)
177
+ if uploaded:
178
+ s3_url = s3_manager.get_s3_url(s3_key)
179
+ presigned_url = s3_manager.generate_presigned_url(s3_key)
180
+ else:
181
+ s3_error = "S3 upload failed"
182
+ except Exception as upload_error:
183
+ s3_error = str(upload_error)
184
+ return {
185
+ "s3_url": s3_url,
186
+ "presigned_url": presigned_url,
187
+ "s3_error": s3_error
188
+ }
videocaller/agora/embedding_utils.py ADDED
@@ -0,0 +1,280 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """LangChain embeddings and Qdrant vector DB management"""
2
+ import logging
3
+ import uuid
4
+ from typing import List, Dict
5
+ from django.conf import settings
6
+ from qdrant_client import QdrantClient
7
+ from qdrant_client.models import Distance, VectorParams, Filter, FieldCondition, MatchValue, PayloadSchemaType
8
+ from langchain_huggingface import HuggingFaceEmbeddings
9
+ from langchain_qdrant import QdrantVectorStore
10
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ qdrant_client = QdrantClient(url=settings.QDRANT_URL, api_key=getattr(settings, 'QDRANT_API_KEY', None))
15
+
16
+ EMBEDDING_MODEL = getattr(settings, 'HF_EMBEDDING_MODEL', 'sentence-transformers/all-MiniLM-L6-v2')
17
+ EMBEDDING_DIMENSION = getattr(settings, 'HF_EMBEDDING_DIMENSION', None)
18
+ COLLECTION_NAME = getattr(settings, 'QDRANT_COLLECTION_NAME', 'meeting_transcripts')
19
+ _embeddings = None
20
+
21
+
22
+ def get_embeddings() -> HuggingFaceEmbeddings:
23
+ """Lazily initialize embeddings to reduce startup memory usage."""
24
+ global _embeddings
25
+ if _embeddings is None:
26
+ _embeddings = HuggingFaceEmbeddings(
27
+ model_name=EMBEDDING_MODEL,
28
+ model_kwargs={"device": "cpu"},
29
+ )
30
+ return _embeddings
31
+
32
+
33
+ def get_embedding_dimension() -> int:
34
+ """Get embedding dimension from config or derive it from the model."""
35
+ if EMBEDDING_DIMENSION:
36
+ return int(EMBEDDING_DIMENSION)
37
+
38
+ embeddings = get_embeddings()
39
+ client = getattr(embeddings, "client", None)
40
+ if client and hasattr(client, "get_sentence_embedding_dimension"):
41
+ return int(client.get_sentence_embedding_dimension())
42
+
43
+ raise ValueError("HF_EMBEDDING_DIMENSION is not set and model dimension is unavailable")
44
+
45
+
46
+ def ensure_collection_exists():
47
+ """Create Qdrant collection if it doesn't exist"""
48
+ try:
49
+ collection = qdrant_client.get_collection(COLLECTION_NAME)
50
+ existing_size = collection.config.params.vectors.size
51
+ desired_size = get_embedding_dimension()
52
+ if existing_size != desired_size:
53
+ logger.warning(
54
+ "Qdrant collection size mismatch (%s != %s), recreating: %s",
55
+ existing_size,
56
+ desired_size,
57
+ COLLECTION_NAME
58
+ )
59
+ qdrant_client.delete_collection(COLLECTION_NAME)
60
+ qdrant_client.create_collection(
61
+ collection_name=COLLECTION_NAME,
62
+ vectors_config=VectorParams(size=desired_size, distance=Distance.COSINE),
63
+ )
64
+ except Exception:
65
+ logger.info(f"Creating Qdrant collection: {COLLECTION_NAME}")
66
+ desired_size = get_embedding_dimension()
67
+ qdrant_client.create_collection(
68
+ collection_name=COLLECTION_NAME,
69
+ vectors_config=VectorParams(size=desired_size, distance=Distance.COSINE),
70
+ )
71
+
72
+ ensure_payload_indexes()
73
+
74
+
75
+ def ensure_payload_indexes() -> None:
76
+ """Ensure payload indexes exist for filtered searches."""
77
+ try:
78
+ qdrant_client.create_payload_index(
79
+ collection_name=COLLECTION_NAME,
80
+ field_name="meeting_id",
81
+ field_schema=PayloadSchemaType.INTEGER,
82
+ )
83
+ except Exception as e:
84
+ logger.info("Skipping payload index creation for meeting_id: %s", str(e))
85
+
86
+
87
+ def chunk_transcript(transcript_text: str, chunk_size: int = 500, overlap: int = 50) -> List[str]:
88
+ """
89
+ Split transcript into overlapping chunks using RecursiveCharacterTextSplitter
90
+
91
+ Args:
92
+ transcript_text: Full transcript text
93
+ chunk_size: Target tokens per chunk (approximate)
94
+ overlap: Token overlap between chunks
95
+
96
+ Returns:
97
+ List of text chunks
98
+ """
99
+ splitter = RecursiveCharacterTextSplitter(
100
+ chunk_size=chunk_size,
101
+ chunk_overlap=overlap,
102
+ separators=["\n\n", "\n", ". ", " ", ""]
103
+ )
104
+ logger.info(splitter.split_text(transcript_text))
105
+ return splitter.split_text(transcript_text)
106
+
107
+
108
+ def get_vectorstore() -> QdrantVectorStore:
109
+ ensure_collection_exists()
110
+ return QdrantVectorStore(
111
+ client=qdrant_client,
112
+ collection_name=COLLECTION_NAME,
113
+ embedding=get_embeddings(),
114
+ )
115
+
116
+
117
+ def store_chunks_in_vector_db(
118
+ meeting_id: int,
119
+ chunks: List[str],
120
+ chunk_objects: List = None
121
+ ) -> List[str]:
122
+ """
123
+ Store chunks and their embeddings in Qdrant
124
+
125
+ Args:
126
+ meeting_id: ID of the meeting
127
+ chunks: List of text chunks
128
+ chunk_objects: Optional list of TranscriptChunk model instances
129
+
130
+ Returns:
131
+ List of vector IDs stored in Qdrant
132
+ """
133
+ ensure_collection_exists()
134
+
135
+ try:
136
+ from .models import MeetingRoom
137
+
138
+ vectorstore = get_vectorstore()
139
+ meeting_title = MeetingRoom.objects.filter(id=meeting_id).values_list("title", flat=True).first() or ""
140
+ vector_ids = [str(uuid.uuid5(uuid.NAMESPACE_URL, f"meeting:{meeting_id}:{idx}")) for idx in range(len(chunks))]
141
+
142
+ metadatas = []
143
+ logger.info("inside the store_chunk_in_vector")
144
+ logger.info(chunks)
145
+ for idx, chunk in enumerate(chunks):
146
+ payload = {
147
+ "meeting_id": meeting_id,
148
+ "meeting_title": meeting_title,
149
+ "chunk_index": idx,
150
+ "text": chunk[:512],
151
+ "chunk_length": len(chunk),
152
+ "source_type": "meeting_transcript"
153
+ }
154
+
155
+ if chunk_objects and idx < len(chunk_objects):
156
+ payload["chunk_db_id"] = chunk_objects[idx].id
157
+ if chunk_objects[idx].start_time:
158
+ payload["start_time"] = chunk_objects[idx].start_time
159
+ if chunk_objects[idx].end_time:
160
+ payload["end_time"] = chunk_objects[idx].end_time
161
+
162
+ metadatas.append(payload)
163
+
164
+ vectorstore.add_texts(texts=chunks, metadatas=metadatas, ids=vector_ids)
165
+ logger.info(f"Stored {len(chunks)} chunks for meeting {meeting_id}")
166
+ return vector_ids
167
+ except Exception as e:
168
+ logger.error(f"Error storing chunks in vector DB: {str(e)}")
169
+ raise
170
+
171
+
172
+ def store_document_chunks_in_vector_db(
173
+ meeting_id: int,
174
+ document,
175
+ chunks: List[str],
176
+ chunk_objects: List = None
177
+ ) -> List[str]:
178
+ """Store document chunks and their embeddings in Qdrant"""
179
+ ensure_collection_exists()
180
+
181
+ try:
182
+ from .models import MeetingRoom
183
+
184
+ vectorstore = get_vectorstore()
185
+ meeting_title = MeetingRoom.objects.filter(id=meeting_id).values_list("title", flat=True).first() or ""
186
+ vector_ids = [str(uuid.uuid5(uuid.NAMESPACE_URL, f"document:{document.id}:{idx}")) for idx in range(len(chunks))]
187
+
188
+ metadatas = []
189
+ logger.info(chunks)
190
+ for idx, chunk in enumerate(chunks):
191
+ payload = {
192
+ "meeting_id": meeting_id,
193
+ "meeting_title": meeting_title,
194
+ "document_id": document.id,
195
+ "document_name": document.file_name,
196
+ "chunk_index": idx,
197
+ "text": chunk[:512],
198
+ "chunk_length": len(chunk),
199
+ "source_type": "document"
200
+ }
201
+
202
+ if chunk_objects and idx < len(chunk_objects):
203
+ payload["chunk_db_id"] = chunk_objects[idx].id
204
+ payload["block_type"] = chunk_objects[idx].block_type
205
+
206
+ metadatas.append(payload)
207
+
208
+ vectorstore.add_texts(texts=chunks, metadatas=metadatas, ids=vector_ids)
209
+ logger.info(f"Stored {len(chunks)} document chunks for meeting {meeting_id}")
210
+ return vector_ids
211
+ except Exception as e:
212
+ logger.error(f"Error storing document chunks in vector DB: {str(e)}")
213
+ raise
214
+
215
+
216
+ def search_similar_chunks(query: str, meeting_id: int | None = None, top_k: int = 5) -> List[Dict]:
217
+ """
218
+ Search for chunks similar to query using vector similarity
219
+
220
+ Args:
221
+ query: User query
222
+ meeting_id: ID of the meeting to search in
223
+ top_k: Number of top results to return
224
+
225
+ Returns:
226
+ List of dicts with chunk text, score, and metadata
227
+ """
228
+ try:
229
+ print("trying to search the similiar to the query asked")
230
+ vectorstore = get_vectorstore()
231
+ filter_ = None
232
+ if meeting_id is not None:
233
+ filter_ = Filter(
234
+ must=[FieldCondition(key="meeting_id", match=MatchValue(value=meeting_id))]
235
+ )
236
+
237
+ results = vectorstore.similarity_search_with_score(query, k=top_k, filter=filter_)
238
+ formatted_results = []
239
+ for doc, score in results:
240
+ metadata = doc.metadata or {}
241
+ formatted_results.append({
242
+ "text": doc.page_content,
243
+ "score": score,
244
+ "chunk_index": metadata.get("chunk_index", 0),
245
+ "start_time": metadata.get("start_time"),
246
+ "end_time": metadata.get("end_time"),
247
+ "source_type": metadata.get("source_type", "meeting_transcript"),
248
+ "meeting_title": metadata.get("meeting_title"),
249
+ "document_id": metadata.get("document_id"),
250
+ "document_name": metadata.get("document_name"),
251
+ "metadata": metadata
252
+ })
253
+
254
+ return formatted_results
255
+
256
+ except Exception as e:
257
+ logger.error(f"Error searching similar chunks: {str(e)}")
258
+ return []
259
+
260
+
261
+ def delete_meeting_embeddings(meeting_id: int):
262
+ """Delete all embeddings for a meeting"""
263
+ try:
264
+ qdrant_client.delete(
265
+ collection_name=COLLECTION_NAME,
266
+ points_selector={
267
+ "filter": {
268
+ "must": [
269
+ {
270
+ "key": "meeting_id",
271
+ "match": {"value": meeting_id}
272
+ }
273
+ ]
274
+ }
275
+ }
276
+ )
277
+ logger.info(f"Deleted embeddings for meeting {meeting_id}")
278
+ except Exception as e:
279
+ logger.error(f"Error deleting embeddings: {str(e)}")
280
+ raise
videocaller/agora/migrations/0001_initial.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by Django 4.1.5 on 2026-02-02 12:18
2
+
3
+ from django.conf import settings
4
+ from django.db import migrations, models
5
+ import django.db.models.deletion
6
+ import uuid
7
+
8
+
9
+ class Migration(migrations.Migration):
10
+
11
+ initial = True
12
+
13
+ dependencies = [
14
+ migrations.swappable_dependency(settings.AUTH_USER_MODEL),
15
+ ]
16
+
17
+ operations = [
18
+ migrations.CreateModel(
19
+ name='MeetingRoom',
20
+ fields=[
21
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
22
+ ('room_id', models.CharField(default=uuid.uuid4, max_length=100, unique=True)),
23
+ ('room_code', models.CharField(max_length=10, unique=True)),
24
+ ('title', models.CharField(blank=True, max_length=255)),
25
+ ('description', models.TextField(blank=True)),
26
+ ('created_at', models.DateTimeField(auto_now_add=True)),
27
+ ('is_active', models.BooleanField(default=True)),
28
+ ('max_participants', models.IntegerField(default=10)),
29
+ ('host', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='hosted_meetings', to=settings.AUTH_USER_MODEL)),
30
+ ],
31
+ options={
32
+ 'ordering': ['-created_at'],
33
+ },
34
+ ),
35
+ ]
videocaller/agora/migrations/0002_meetingroom_recording_duration_and_more.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by Django 4.1.5 on 2026-02-02 15:21
2
+
3
+ from django.db import migrations, models
4
+
5
+
6
+ class Migration(migrations.Migration):
7
+
8
+ dependencies = [
9
+ ('agora', '0001_initial'),
10
+ ]
11
+
12
+ operations = [
13
+ migrations.AddField(
14
+ model_name='meetingroom',
15
+ name='recording_duration',
16
+ field=models.IntegerField(default=0, help_text='Duration in seconds'),
17
+ ),
18
+ migrations.AddField(
19
+ model_name='meetingroom',
20
+ name='recording_enabled',
21
+ field=models.BooleanField(default=False),
22
+ ),
23
+ migrations.AddField(
24
+ model_name='meetingroom',
25
+ name='recording_file',
26
+ field=models.FileField(blank=True, null=True, upload_to='recordings/%Y/%m/%d/'),
27
+ ),
28
+ migrations.AddField(
29
+ model_name='meetingroom',
30
+ name='transcript_file',
31
+ field=models.FileField(blank=True, null=True, upload_to='transcripts/%Y/%m/%d/'),
32
+ ),
33
+ ]
videocaller/agora/migrations/0003_remove_meetingroom_recording_file_and_more.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by Django 4.1.5 on 2026-02-02 16:39
2
+
3
+ from django.db import migrations, models
4
+
5
+
6
+ class Migration(migrations.Migration):
7
+
8
+ dependencies = [
9
+ ('agora', '0002_meetingroom_recording_duration_and_more'),
10
+ ]
11
+
12
+ operations = [
13
+ migrations.RemoveField(
14
+ model_name='meetingroom',
15
+ name='recording_file',
16
+ ),
17
+ migrations.RemoveField(
18
+ model_name='meetingroom',
19
+ name='transcript_file',
20
+ ),
21
+ migrations.AddField(
22
+ model_name='meetingroom',
23
+ name='recording_resource_id',
24
+ field=models.CharField(blank=True, help_text='Agora Resource ID', max_length=255, null=True),
25
+ ),
26
+ migrations.AddField(
27
+ model_name='meetingroom',
28
+ name='recording_sid',
29
+ field=models.CharField(blank=True, help_text='Agora Recording SID', max_length=255, null=True),
30
+ ),
31
+ migrations.AddField(
32
+ model_name='meetingroom',
33
+ name='recording_status',
34
+ field=models.CharField(choices=[('not_started', 'Not Started'), ('recording', 'Recording'), ('completed', 'Completed'), ('failed', 'Failed')], default='not_started', max_length=50),
35
+ ),
36
+ migrations.AddField(
37
+ model_name='meetingroom',
38
+ name='recording_uid',
39
+ field=models.IntegerField(blank=True, help_text='Agora Recording Bot UID', null=True),
40
+ ),
41
+ migrations.AddField(
42
+ model_name='meetingroom',
43
+ name='s3_recording_url',
44
+ field=models.URLField(blank=True, help_text='S3 URL for recording', max_length=500, null=True),
45
+ ),
46
+ migrations.AddField(
47
+ model_name='meetingroom',
48
+ name='s3_transcript_url',
49
+ field=models.URLField(blank=True, help_text='S3 URL for transcript', max_length=500, null=True),
50
+ ),
51
+ ]
videocaller/agora/migrations/0004_chatmessage.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by Django 4.1.5 on 2026-02-03 06:12
2
+
3
+ from django.conf import settings
4
+ from django.db import migrations, models
5
+ import django.db.models.deletion
6
+
7
+
8
+ class Migration(migrations.Migration):
9
+
10
+ dependencies = [
11
+ migrations.swappable_dependency(settings.AUTH_USER_MODEL),
12
+ ('agora', '0003_remove_meetingroom_recording_file_and_more'),
13
+ ]
14
+
15
+ operations = [
16
+ migrations.CreateModel(
17
+ name='ChatMessage',
18
+ fields=[
19
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
20
+ ('content', models.TextField()),
21
+ ('created_at', models.DateTimeField(auto_now_add=True)),
22
+ ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='chat_messages', to=settings.AUTH_USER_MODEL)),
23
+ ],
24
+ options={
25
+ 'ordering': ['-created_at'],
26
+ },
27
+ ),
28
+ ]
videocaller/agora/migrations/0005_meetingroom_transcript_id_and_more.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by Django 4.1.5 on 2026-02-03 12:40
2
+
3
+ from django.db import migrations, models
4
+
5
+
6
+ class Migration(migrations.Migration):
7
+
8
+ dependencies = [
9
+ ('agora', '0004_chatmessage'),
10
+ ]
11
+
12
+ operations = [
13
+ migrations.AddField(
14
+ model_name='meetingroom',
15
+ name='transcript_id',
16
+ field=models.CharField(blank=True, help_text='AssemblyAI Transcript ID', max_length=255, null=True),
17
+ ),
18
+ migrations.AddField(
19
+ model_name='meetingroom',
20
+ name='transcript_status',
21
+ field=models.CharField(choices=[('not_started', 'Not Started'), ('processing', 'Processing'), ('completed', 'Completed'), ('failed', 'Failed')], default='not_started', max_length=50),
22
+ ),
23
+ migrations.AddField(
24
+ model_name='meetingroom',
25
+ name='transcript_text',
26
+ field=models.TextField(blank=True, null=True),
27
+ ),
28
+ ]
videocaller/agora/migrations/0006_meetingroom_chunks_created_at_and_more.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by Django 4.1.5 on 2026-02-03 16:58
2
+
3
+ from django.conf import settings
4
+ from django.db import migrations, models
5
+ import django.db.models.deletion
6
+
7
+
8
+ class Migration(migrations.Migration):
9
+
10
+ dependencies = [
11
+ migrations.swappable_dependency(settings.AUTH_USER_MODEL),
12
+ ('agora', '0005_meetingroom_transcript_id_and_more'),
13
+ ]
14
+
15
+ operations = [
16
+ migrations.AddField(
17
+ model_name='meetingroom',
18
+ name='chunks_created_at',
19
+ field=models.DateTimeField(blank=True, help_text='Timestamp when chunks were created', null=True),
20
+ ),
21
+ migrations.AddField(
22
+ model_name='meetingroom',
23
+ name='embedding_version',
24
+ field=models.IntegerField(default=1, help_text='Version of embeddings (for migration)'),
25
+ ),
26
+ migrations.AddField(
27
+ model_name='meetingroom',
28
+ name='embeddings_created_at',
29
+ field=models.DateTimeField(blank=True, help_text='Timestamp when embeddings were generated', null=True),
30
+ ),
31
+ migrations.CreateModel(
32
+ name='TranscriptChunk',
33
+ fields=[
34
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
35
+ ('chunk_text', models.TextField(help_text='Text chunk from transcript')),
36
+ ('chunk_index', models.IntegerField(help_text='Order of chunk in transcript')),
37
+ ('start_time', models.IntegerField(blank=True, help_text='Start time in seconds', null=True)),
38
+ ('end_time', models.IntegerField(blank=True, help_text='End time in seconds', null=True)),
39
+ ('embedding_vector_id', models.CharField(blank=True, help_text='Vector DB ID', max_length=255, null=True)),
40
+ ('created_at', models.DateTimeField(auto_now_add=True)),
41
+ ('meeting', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='transcript_chunks', to='agora.meetingroom')),
42
+ ],
43
+ options={
44
+ 'ordering': ['meeting', 'chunk_index'],
45
+ },
46
+ ),
47
+ migrations.CreateModel(
48
+ name='ConversationHistory',
49
+ fields=[
50
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
51
+ ('user_question', models.TextField()),
52
+ ('assistant_response', models.TextField()),
53
+ ('relevant_chunks', models.JSONField(default=list, help_text='List of chunk IDs used for response')),
54
+ ('created_at', models.DateTimeField(auto_now_add=True)),
55
+ ('meeting', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='conversation_history', to='agora.meetingroom')),
56
+ ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
57
+ ],
58
+ options={
59
+ 'ordering': ['-created_at'],
60
+ },
61
+ ),
62
+ ]
videocaller/agora/migrations/0007_document_uploads.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by Django 4.1.5 on 2026-02-08 00:00
2
+
3
+ from django.conf import settings
4
+ from django.db import migrations, models
5
+ import django.db.models.deletion
6
+
7
+
8
+ class Migration(migrations.Migration):
9
+
10
+ dependencies = [
11
+ migrations.swappable_dependency(settings.AUTH_USER_MODEL),
12
+ ('agora', '0006_meetingroom_chunks_created_at_and_more'),
13
+ ]
14
+
15
+ operations = [
16
+ migrations.CreateModel(
17
+ name='DocumentUpload',
18
+ fields=[
19
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
20
+ ('file_name', models.CharField(max_length=255)),
21
+ ('file_type', models.CharField(help_text='File extension or MIME hint', max_length=50)),
22
+ ('s3_url', models.URLField(blank=True, max_length=500, null=True)),
23
+ ('raw_text', models.TextField(blank=True, null=True)),
24
+ ('status', models.CharField(choices=[('uploaded', 'Uploaded'), ('processing', 'Processing'), ('completed', 'Completed'), ('failed', 'Failed')], default='uploaded', max_length=50)),
25
+ ('error_message', models.TextField(blank=True, null=True)),
26
+ ('processed_at', models.DateTimeField(blank=True, null=True)),
27
+ ('embeddings_created_at', models.DateTimeField(blank=True, null=True)),
28
+ ('embedding_version', models.IntegerField(default=1, help_text='Version of embeddings (for migration)')),
29
+ ('created_at', models.DateTimeField(auto_now_add=True)),
30
+ ('meeting', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='document_uploads', to='agora.meetingroom')),
31
+ ('uploaded_by', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='document_uploads', to=settings.AUTH_USER_MODEL)),
32
+ ],
33
+ options={
34
+ 'ordering': ['-created_at'],
35
+ },
36
+ ),
37
+ migrations.CreateModel(
38
+ name='DocumentChunk',
39
+ fields=[
40
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
41
+ ('chunk_text', models.TextField(help_text='Text chunk from document')),
42
+ ('chunk_index', models.IntegerField(help_text='Order of chunk in document')),
43
+ ('block_type', models.CharField(default='text', help_text='text/table/image/other', max_length=50)),
44
+ ('metadata', models.JSONField(default=dict, help_text='Extractor metadata')),
45
+ ('embedding_vector_id', models.CharField(blank=True, help_text='Vector DB ID', max_length=255, null=True)),
46
+ ('created_at', models.DateTimeField(auto_now_add=True)),
47
+ ('document', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='chunks', to='agora.documentupload')),
48
+ ],
49
+ options={
50
+ 'ordering': ['document', 'chunk_index'],
51
+ },
52
+ ),
53
+ ]
videocaller/agora/migrations/0008_document_upload_storage_path.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by Django 4.1.5 on 2026-02-08 00:00
2
+
3
+ from django.db import migrations, models
4
+
5
+
6
+ class Migration(migrations.Migration):
7
+
8
+ dependencies = [
9
+ ('agora', '0007_document_uploads'),
10
+ ]
11
+
12
+ operations = [
13
+ migrations.AddField(
14
+ model_name='documentupload',
15
+ name='storage_path',
16
+ field=models.CharField(default='', help_text='Storage path for local processing', max_length=500),
17
+ preserve_default=False,
18
+ ),
19
+ ]
videocaller/agora/migrations/0009_split_meeting_models.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by Django 4.1.5 on 2026-02-10 00:00
2
+
3
+ from django.db import migrations, models
4
+ import django.db.models.deletion
5
+
6
+
7
+ def migrate_meeting_fields(apps, schema_editor):
8
+ MeetingRoom = apps.get_model('agora', 'MeetingRoom')
9
+ MeetingRecording = apps.get_model('agora', 'MeetingRecording')
10
+ MeetingTranscript = apps.get_model('agora', 'MeetingTranscript')
11
+ MeetingRagState = apps.get_model('agora', 'MeetingRagState')
12
+
13
+ for room in MeetingRoom.objects.all():
14
+ MeetingRecording.objects.create(
15
+ meeting=room,
16
+ recording_enabled=room.recording_enabled,
17
+ recording_sid=room.recording_sid,
18
+ recording_resource_id=room.recording_resource_id,
19
+ recording_uid=room.recording_uid,
20
+ recording_status=room.recording_status,
21
+ s3_recording_url=room.s3_recording_url,
22
+ recording_duration=room.recording_duration
23
+ )
24
+ MeetingTranscript.objects.create(
25
+ meeting=room,
26
+ transcript_text=room.transcript_text,
27
+ transcript_status=room.transcript_status,
28
+ transcript_id=room.transcript_id,
29
+ s3_transcript_url=room.s3_transcript_url
30
+ )
31
+ MeetingRagState.objects.create(
32
+ meeting=room,
33
+ chunks_created_at=room.chunks_created_at,
34
+ embeddings_created_at=room.embeddings_created_at,
35
+ embedding_version=room.embedding_version
36
+ )
37
+
38
+
39
+ def reverse_migrate_meeting_fields(apps, schema_editor):
40
+ MeetingRoom = apps.get_model('agora', 'MeetingRoom')
41
+ MeetingRecording = apps.get_model('agora', 'MeetingRecording')
42
+ MeetingTranscript = apps.get_model('agora', 'MeetingTranscript')
43
+ MeetingRagState = apps.get_model('agora', 'MeetingRagState')
44
+
45
+ for room in MeetingRoom.objects.all():
46
+ try:
47
+ recording = MeetingRecording.objects.get(meeting=room)
48
+ room.recording_enabled = recording.recording_enabled
49
+ room.recording_sid = recording.recording_sid
50
+ room.recording_resource_id = recording.recording_resource_id
51
+ room.recording_uid = recording.recording_uid
52
+ room.recording_status = recording.recording_status
53
+ room.s3_recording_url = recording.s3_recording_url
54
+ room.recording_duration = recording.recording_duration
55
+ except MeetingRecording.DoesNotExist:
56
+ pass
57
+
58
+ try:
59
+ transcript = MeetingTranscript.objects.get(meeting=room)
60
+ room.transcript_text = transcript.transcript_text
61
+ room.transcript_status = transcript.transcript_status
62
+ room.transcript_id = transcript.transcript_id
63
+ room.s3_transcript_url = transcript.s3_transcript_url
64
+ except MeetingTranscript.DoesNotExist:
65
+ pass
66
+
67
+ try:
68
+ rag_state = MeetingRagState.objects.get(meeting=room)
69
+ room.chunks_created_at = rag_state.chunks_created_at
70
+ room.embeddings_created_at = rag_state.embeddings_created_at
71
+ room.embedding_version = rag_state.embedding_version
72
+ except MeetingRagState.DoesNotExist:
73
+ pass
74
+
75
+ room.save()
76
+
77
+
78
+ class Migration(migrations.Migration):
79
+
80
+ dependencies = [
81
+ ('agora', '0008_document_upload_storage_path'),
82
+ ]
83
+
84
+ operations = [
85
+ migrations.CreateModel(
86
+ name='MeetingRecording',
87
+ fields=[
88
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
89
+ ('recording_enabled', models.BooleanField(default=False)),
90
+ ('recording_sid', models.CharField(blank=True, help_text='Agora Recording SID', max_length=255, null=True)),
91
+ ('recording_resource_id', models.CharField(blank=True, help_text='Agora Resource ID', max_length=255, null=True)),
92
+ ('recording_uid', models.IntegerField(blank=True, help_text='Agora Recording Bot UID', null=True)),
93
+ ('recording_status', models.CharField(choices=[('not_started', 'Not Started'), ('recording', 'Recording'), ('completed', 'Completed'), ('failed', 'Failed')], default='not_started', max_length=50)),
94
+ ('s3_recording_url', models.URLField(blank=True, help_text='S3 URL for recording', max_length=500, null=True)),
95
+ ('recording_duration', models.IntegerField(default=0, help_text='Duration in seconds')),
96
+ ('meeting', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name='recording', to='agora.meetingroom')),
97
+ ],
98
+ ),
99
+ migrations.CreateModel(
100
+ name='MeetingTranscript',
101
+ fields=[
102
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
103
+ ('transcript_text', models.TextField(blank=True, null=True)),
104
+ ('transcript_status', models.CharField(choices=[('not_started', 'Not Started'), ('processing', 'Processing'), ('completed', 'Completed'), ('failed', 'Failed')], default='not_started', max_length=50)),
105
+ ('transcript_id', models.CharField(blank=True, help_text='AssemblyAI Transcript ID', max_length=255, null=True)),
106
+ ('s3_transcript_url', models.URLField(blank=True, help_text='S3 URL for transcript', max_length=500, null=True)),
107
+ ('meeting', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name='transcript', to='agora.meetingroom')),
108
+ ],
109
+ ),
110
+ migrations.CreateModel(
111
+ name='MeetingRagState',
112
+ fields=[
113
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
114
+ ('chunks_created_at', models.DateTimeField(blank=True, help_text='Timestamp when chunks were created', null=True)),
115
+ ('embeddings_created_at', models.DateTimeField(blank=True, help_text='Timestamp when embeddings were generated', null=True)),
116
+ ('embedding_version', models.IntegerField(default=1, help_text='Version of embeddings (for migration)')),
117
+ ('meeting', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name='rag_state', to='agora.meetingroom')),
118
+ ],
119
+ ),
120
+ migrations.RunPython(migrate_meeting_fields, reverse_migrate_meeting_fields),
121
+ migrations.RemoveField(model_name='meetingroom', name='recording_enabled'),
122
+ migrations.RemoveField(model_name='meetingroom', name='recording_sid'),
123
+ migrations.RemoveField(model_name='meetingroom', name='recording_resource_id'),
124
+ migrations.RemoveField(model_name='meetingroom', name='recording_uid'),
125
+ migrations.RemoveField(model_name='meetingroom', name='recording_status'),
126
+ migrations.RemoveField(model_name='meetingroom', name='s3_recording_url'),
127
+ migrations.RemoveField(model_name='meetingroom', name='s3_transcript_url'),
128
+ migrations.RemoveField(model_name='meetingroom', name='recording_duration'),
129
+ migrations.RemoveField(model_name='meetingroom', name='transcript_text'),
130
+ migrations.RemoveField(model_name='meetingroom', name='transcript_status'),
131
+ migrations.RemoveField(model_name='meetingroom', name='transcript_id'),
132
+ migrations.RemoveField(model_name='meetingroom', name='chunks_created_at'),
133
+ migrations.RemoveField(model_name='meetingroom', name='embeddings_created_at'),
134
+ migrations.RemoveField(model_name='meetingroom', name='embedding_version'),
135
+ ]
videocaller/agora/migrations/0010_documentupload_chunk_count.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by Django 4.1.5 on 2026-02-10 00:00
2
+
3
+ from django.db import migrations, models
4
+
5
+
6
+ class Migration(migrations.Migration):
7
+
8
+ dependencies = [
9
+ ('agora', '0009_split_meeting_models'),
10
+ ]
11
+
12
+ operations = [
13
+ migrations.AddField(
14
+ model_name='documentupload',
15
+ name='chunk_count',
16
+ field=models.IntegerField(default=0, help_text='Number of chunks created'),
17
+ ),
18
+ ]
videocaller/agora/migrations/0011_meeting_agenda_point.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from django.db import migrations, models
2
+ import django.db.models.deletion
3
+
4
+
5
+ class Migration(migrations.Migration):
6
+ dependencies = [
7
+ ('agora', '0010_documentupload_chunk_count'),
8
+ ]
9
+
10
+ operations = [
11
+ migrations.CreateModel(
12
+ name='MeetingAgendaPoint',
13
+ fields=[
14
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
15
+ ('text', models.TextField()),
16
+ ('order', models.IntegerField(default=0)),
17
+ ('is_ai_generated', models.BooleanField(default=True)),
18
+ ('created_at', models.DateTimeField(auto_now_add=True)),
19
+ ('created_by', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='auth.user')),
20
+ ('meeting', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='agenda_points', to='agora.meetingroom')),
21
+ ],
22
+ options={
23
+ 'ordering': ['order', 'created_at'],
24
+ },
25
+ ),
26
+ ]
videocaller/agora/migrations/0012_alter_meetingroom_room_code.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from django.db import migrations, models
2
+
3
+
4
+ class Migration(migrations.Migration):
5
+ dependencies = [
6
+ ('agora', '0011_meeting_agenda_point'),
7
+ ]
8
+
9
+ operations = [
10
+ migrations.AlterField(
11
+ model_name='meetingroom',
12
+ name='room_code',
13
+ field=models.CharField(max_length=15, unique=True),
14
+ ),
15
+ ]
videocaller/agora/migrations/__init__.py ADDED
File without changes
videocaller/agora/models.py ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from django.db import models
2
+ from django.contrib.auth.models import User
3
+ import uuid
4
+
5
+ class MeetingRoom(models.Model):
6
+ room_id = models.CharField(max_length=100, unique=True, default=uuid.uuid4)
7
+ room_code = models.CharField(max_length=15, unique=True) # Shareable code like "abc-def-ghi"
8
+ host = models.ForeignKey(User, on_delete=models.CASCADE, related_name='hosted_meetings')
9
+ title = models.CharField(max_length=255, blank=True)
10
+ description = models.TextField(blank=True)
11
+ created_at = models.DateTimeField(auto_now_add=True)
12
+ is_active = models.BooleanField(default=True)
13
+ max_participants = models.IntegerField(default=10)
14
+
15
+ def __str__(self):
16
+ return f"{self.title} - {self.room_code}"
17
+
18
+ class Meta:
19
+ ordering = ['-created_at']
20
+
21
+ def get_recording(self):
22
+ try:
23
+ return self.recording
24
+ except MeetingRecording.DoesNotExist:
25
+ return MeetingRecording.objects.create(meeting=self)
26
+
27
+ def get_transcript(self):
28
+ try:
29
+ return self.transcript
30
+ except MeetingTranscript.DoesNotExist:
31
+ return MeetingTranscript.objects.create(meeting=self)
32
+
33
+ def get_rag_state(self):
34
+ try:
35
+ return self.rag_state
36
+ except MeetingRagState.DoesNotExist:
37
+ return MeetingRagState.objects.create(meeting=self)
38
+
39
+
40
+ class MeetingRecording(models.Model):
41
+ meeting = models.OneToOneField(MeetingRoom, on_delete=models.CASCADE, related_name='recording')
42
+ recording_enabled = models.BooleanField(default=False)
43
+ recording_sid = models.CharField(max_length=255, null=True, blank=True, help_text="Agora Recording SID")
44
+ recording_resource_id = models.CharField(max_length=255, null=True, blank=True, help_text="Agora Resource ID")
45
+ recording_uid = models.IntegerField(null=True, blank=True, help_text="Agora Recording Bot UID")
46
+ recording_status = models.CharField(
47
+ max_length=50,
48
+ default='not_started',
49
+ choices=[('not_started', 'Not Started'), ('recording', 'Recording'), ('completed', 'Completed'), ('failed', 'Failed')]
50
+ )
51
+ s3_recording_url = models.URLField(max_length=500, null=True, blank=True, help_text="S3 URL for recording")
52
+ recording_duration = models.IntegerField(default=0, help_text="Duration in seconds")
53
+
54
+ def __str__(self):
55
+ return f"Recording - {self.meeting.title}"
56
+
57
+
58
+ class MeetingTranscript(models.Model):
59
+ meeting = models.OneToOneField(MeetingRoom, on_delete=models.CASCADE, related_name='transcript')
60
+ transcript_text = models.TextField(null=True, blank=True)
61
+ transcript_status = models.CharField(
62
+ max_length=50,
63
+ default='not_started',
64
+ choices=[('not_started', 'Not Started'), ('processing', 'Processing'), ('completed', 'Completed'), ('failed', 'Failed')]
65
+ )
66
+ transcript_id = models.CharField(max_length=255, null=True, blank=True, help_text="AssemblyAI Transcript ID")
67
+ s3_transcript_url = models.URLField(max_length=500, null=True, blank=True, help_text="S3 URL for transcript")
68
+
69
+ def __str__(self):
70
+ return f"Transcript - {self.meeting.title}"
71
+
72
+
73
+ class MeetingRagState(models.Model):
74
+ meeting = models.OneToOneField(MeetingRoom, on_delete=models.CASCADE, related_name='rag_state')
75
+ chunks_created_at = models.DateTimeField(null=True, blank=True, help_text="Timestamp when chunks were created")
76
+ embeddings_created_at = models.DateTimeField(null=True, blank=True, help_text="Timestamp when embeddings were generated")
77
+ embedding_version = models.IntegerField(default=1, help_text="Version of embeddings (for migration)")
78
+
79
+ def __str__(self):
80
+ return f"RAG State - {self.meeting.title}"
81
+
82
+
83
+ class ChatMessage(models.Model):
84
+ user = models.ForeignKey(User, on_delete=models.CASCADE, related_name='chat_messages')
85
+ content = models.TextField()
86
+ created_at = models.DateTimeField(auto_now_add=True)
87
+
88
+ def __str__(self):
89
+ return f"{self.user.username}: {self.content[:30]}"
90
+
91
+ class Meta:
92
+ ordering = ['-created_at']
93
+
94
+
95
+ class TranscriptChunk(models.Model):
96
+ """Store transcript chunks with their embeddings for RAG"""
97
+ meeting = models.ForeignKey(MeetingRoom, on_delete=models.CASCADE, related_name='transcript_chunks')
98
+ chunk_text = models.TextField(help_text="Text chunk from transcript")
99
+ chunk_index = models.IntegerField(help_text="Order of chunk in transcript")
100
+ start_time = models.IntegerField(null=True, blank=True, help_text="Start time in seconds")
101
+ end_time = models.IntegerField(null=True, blank=True, help_text="End time in seconds")
102
+ embedding_vector_id = models.CharField(max_length=255, null=True, blank=True, help_text="Vector DB ID")
103
+ created_at = models.DateTimeField(auto_now_add=True)
104
+
105
+ def __str__(self):
106
+ return f"Chunk {self.chunk_index} - {self.meeting.title}"
107
+
108
+ class Meta:
109
+ ordering = ['meeting', 'chunk_index']
110
+
111
+
112
+ class DocumentUpload(models.Model):
113
+ """Store uploaded external documents/audio linked to a meeting"""
114
+ meeting = models.ForeignKey(MeetingRoom, on_delete=models.CASCADE, related_name='document_uploads')
115
+ uploaded_by = models.ForeignKey(User, on_delete=models.CASCADE, related_name='document_uploads')
116
+ file_name = models.CharField(max_length=255)
117
+ file_type = models.CharField(max_length=50, help_text="File extension or MIME hint")
118
+ storage_path = models.CharField(max_length=500, help_text="Storage path for local processing")
119
+ s3_url = models.URLField(max_length=500, null=True, blank=True)
120
+ raw_text = models.TextField(null=True, blank=True)
121
+ status = models.CharField(
122
+ max_length=50,
123
+ default='uploaded',
124
+ choices=[
125
+ ('uploaded', 'Uploaded'),
126
+ ('processing', 'Processing'),
127
+ ('completed', 'Completed'),
128
+ ('failed', 'Failed')
129
+ ]
130
+ )
131
+ error_message = models.TextField(null=True, blank=True)
132
+ processed_at = models.DateTimeField(null=True, blank=True)
133
+ embeddings_created_at = models.DateTimeField(null=True, blank=True)
134
+ embedding_version = models.IntegerField(default=1, help_text="Version of embeddings (for migration)")
135
+ chunk_count = models.IntegerField(default=0, help_text="Number of chunks created")
136
+ created_at = models.DateTimeField(auto_now_add=True)
137
+
138
+ def __str__(self):
139
+ return f"{self.file_name} - {self.meeting.title}"
140
+
141
+ class Meta:
142
+ ordering = ['-created_at']
143
+
144
+
145
+ class DocumentChunk(models.Model):
146
+ """Store document chunks with embeddings for RAG"""
147
+ document = models.ForeignKey(DocumentUpload, on_delete=models.CASCADE, related_name='chunks')
148
+ chunk_text = models.TextField(help_text="Text chunk from document")
149
+ chunk_index = models.IntegerField(help_text="Order of chunk in document")
150
+ block_type = models.CharField(max_length=50, default='text', help_text="text/table/image/other")
151
+ metadata = models.JSONField(default=dict, help_text="Extractor metadata")
152
+ embedding_vector_id = models.CharField(max_length=255, null=True, blank=True, help_text="Vector DB ID")
153
+ created_at = models.DateTimeField(auto_now_add=True)
154
+
155
+ def __str__(self):
156
+ return f"DocChunk {self.chunk_index} - {self.document.file_name}"
157
+
158
+ class Meta:
159
+ ordering = ['document', 'chunk_index']
160
+
161
+
162
+ class MeetingAgendaPoint(models.Model):
163
+ """Shared agenda points for a meeting."""
164
+ meeting = models.ForeignKey(MeetingRoom, on_delete=models.CASCADE, related_name='agenda_points')
165
+ text = models.TextField()
166
+ order = models.IntegerField(default=0)
167
+ created_by = models.ForeignKey(User, on_delete=models.SET_NULL, null=True, blank=True)
168
+ is_ai_generated = models.BooleanField(default=True)
169
+ created_at = models.DateTimeField(auto_now_add=True)
170
+
171
+ def __str__(self):
172
+ return f"Agenda {self.order} - {self.meeting.title}"
173
+
174
+ class Meta:
175
+ ordering = ['order', 'created_at']
176
+
177
+
178
+ class ConversationHistory(models.Model):
179
+ """Store Q&A history for context-aware responses"""
180
+ meeting = models.ForeignKey(MeetingRoom, on_delete=models.CASCADE, related_name='conversation_history')
181
+ user = models.ForeignKey(User, on_delete=models.CASCADE)
182
+ user_question = models.TextField()
183
+ assistant_response = models.TextField()
184
+ relevant_chunks = models.JSONField(default=list, help_text="List of chunk IDs used for response")
185
+ created_at = models.DateTimeField(auto_now_add=True)
186
+
187
+ def __str__(self):
188
+ return f"{self.meeting.title} - {self.user.username}"
189
+
190
+ class Meta:
191
+ ordering = ['-created_at']