=Apyhtml20 commited on
Commit
99b596a
·
1 Parent(s): bb44888

Initial deploy

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +3 -0
  2. .gitignore +0 -0
  3. Dockerfile +33 -0
  4. README.md +4 -4
  5. backend/.dockerignore +12 -0
  6. backend/.spyproject/config/backups/codestyle.ini.bak +8 -0
  7. backend/.spyproject/config/backups/encoding.ini.bak +6 -0
  8. backend/.spyproject/config/backups/vcs.ini.bak +7 -0
  9. backend/.spyproject/config/backups/workspace.ini.bak +12 -0
  10. backend/.spyproject/config/codestyle.ini +8 -0
  11. backend/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini +5 -0
  12. backend/.spyproject/config/defaults/defaults-encoding-0.2.0.ini +3 -0
  13. backend/.spyproject/config/defaults/defaults-vcs-0.2.0.ini +4 -0
  14. backend/.spyproject/config/defaults/defaults-workspace-0.2.0.ini +6 -0
  15. backend/.spyproject/config/encoding.ini +6 -0
  16. backend/.spyproject/config/vcs.ini +7 -0
  17. backend/.spyproject/config/workspace.ini +12 -0
  18. backend/Dockerfile +21 -0
  19. backend/app/.spyproject/config/codestyle.ini +8 -0
  20. backend/app/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini +5 -0
  21. backend/app/.spyproject/config/defaults/defaults-encoding-0.2.0.ini +3 -0
  22. backend/app/.spyproject/config/defaults/defaults-vcs-0.2.0.ini +4 -0
  23. backend/app/.spyproject/config/defaults/defaults-workspace-0.2.0.ini +6 -0
  24. backend/app/.spyproject/config/encoding.ini +6 -0
  25. backend/app/.spyproject/config/vcs.ini +7 -0
  26. backend/app/.spyproject/config/workspace.ini +12 -0
  27. backend/app/agent.py +313 -0
  28. backend/app/auth/jwt_handler.py +19 -0
  29. backend/app/auth/middleware.py +33 -0
  30. backend/app/db/crud.py +160 -0
  31. backend/app/db/database.py +26 -0
  32. backend/app/db/models.py +61 -0
  33. backend/app/ingest.py +123 -0
  34. backend/app/main.py +38 -0
  35. backend/app/rag.py +35 -0
  36. backend/app/rag_evaluator.py +210 -0
  37. backend/app/router_service.py +206 -0
  38. backend/app/schemas.py +50 -0
  39. backend/app/schemas_new.py +43 -0
  40. backend/app/tools/tool_flashcards.py +77 -0
  41. backend/app/tools/tool_quiz.py +92 -0
  42. backend/app/tools/tool_rag_qa.py +61 -0
  43. backend/app/tools/tool_resume.py +51 -0
  44. backend/app/tools/tool_simple_explain.py +53 -0
  45. backend/database.py +25 -0
  46. backend/migrate.py +20 -0
  47. backend/requirements.txt +30 -0
  48. docker-compose.yml +39 -0
  49. frontend/.gitignore +24 -0
  50. frontend/Dockerfile +30 -0
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.jpg filter=lfs diff=lfs merge=lfs -text
37
+ *.png filter=lfs diff=lfs merge=lfs -text
38
+ *.jpeg filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
Binary file (243 Bytes). View file
 
Dockerfile ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ── Build Frontend ────────────────────────────────────────────────────────────
2
+ FROM node:20-alpine AS frontend-builder
3
+ WORKDIR /app/frontend
4
+ COPY frontend/package*.json ./
5
+ RUN npm ci
6
+ COPY frontend/ ./
7
+ ARG VITE_API_URL=/api
8
+ ENV VITE_API_URL=$VITE_API_URL
9
+ RUN npm run build
10
+
11
+ # ── Backend + Frontend servi par FastAPI ──────────────────────────────────────
12
+ FROM python:3.10-slim
13
+ WORKDIR /app
14
+
15
+ RUN apt-get update && apt-get install -y --no-install-recommends curl \
16
+ && rm -rf /var/lib/apt/lists/*
17
+
18
+ COPY backend/requirements.txt .
19
+ RUN pip install --no-cache-dir -r requirements.txt
20
+
21
+ COPY backend/ .
22
+
23
+ # Copier le build frontend dans le dossier static du backend
24
+ COPY --from=frontend-builder /app/frontend/dist ./static
25
+
26
+ RUN mkdir -p /app/chroma_db /app/documents
27
+
28
+ EXPOSE 7860
29
+
30
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=15s --retries=3 \
31
+ CMD curl -f http://localhost:7860/health || exit 1
32
+
33
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
- title: PaperBrainAI
3
- emoji: 🐢
4
- colorFrom: blue
5
- colorTo: yellow
6
  sdk: docker
7
  pinned: false
8
  ---
 
1
  ---
2
+ title: PaperBrain
3
+ emoji: 💻
4
+ colorFrom: green
5
+ colorTo: indigo
6
  sdk: docker
7
  pinned: false
8
  ---
backend/.dockerignore ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ *.pyo
4
+ *.pyd
5
+ .venv/
6
+ venv/
7
+ env/
8
+ chroma_db/
9
+ smartstudydb
10
+ .git/
11
+ .idea/
12
+ .vscode/
backend/.spyproject/config/backups/codestyle.ini.bak ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ [codestyle]
2
+ indentation = True
3
+ edge_line = True
4
+ edge_line_columns = 79
5
+
6
+ [main]
7
+ version = 0.2.0
8
+
backend/.spyproject/config/backups/encoding.ini.bak ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ [encoding]
2
+ text_encoding = utf-8
3
+
4
+ [main]
5
+ version = 0.2.0
6
+
backend/.spyproject/config/backups/vcs.ini.bak ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ [vcs]
2
+ use_version_control = False
3
+ version_control_system =
4
+
5
+ [main]
6
+ version = 0.2.0
7
+
backend/.spyproject/config/backups/workspace.ini.bak ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [workspace]
2
+ restore_data_on_startup = True
3
+ save_data_on_exit = True
4
+ save_history = True
5
+ save_non_project_files = False
6
+ project_type = 'empty-project-type'
7
+ recent_files = ['app\\auth\\jwt_handler.py', '.dockerignore', 'app\\main.py', 'app\\tools\\tool_flashcards.py', 'app\\tools\\tool_quiz.py', 'app\\tools\\tool_rag_qa.py', 'app\\tools\\tool_resume.py', 'app\\tools\\tool_simple_explain.py', 'app\\agent.py', 'app\\ingest.py', 'app\\rag_evaluator.py']
8
+
9
+ [main]
10
+ version = 0.2.0
11
+ recent_files = []
12
+
backend/.spyproject/config/codestyle.ini ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ [codestyle]
2
+ indentation = True
3
+ edge_line = True
4
+ edge_line_columns = 79
5
+
6
+ [main]
7
+ version = 0.2.0
8
+
backend/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ [codestyle]
2
+ indentation = True
3
+ edge_line = True
4
+ edge_line_columns = 79
5
+
backend/.spyproject/config/defaults/defaults-encoding-0.2.0.ini ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [encoding]
2
+ text_encoding = utf-8
3
+
backend/.spyproject/config/defaults/defaults-vcs-0.2.0.ini ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ [vcs]
2
+ use_version_control = False
3
+ version_control_system =
4
+
backend/.spyproject/config/defaults/defaults-workspace-0.2.0.ini ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ [workspace]
2
+ restore_data_on_startup = True
3
+ save_data_on_exit = True
4
+ save_history = True
5
+ save_non_project_files = False
6
+
backend/.spyproject/config/encoding.ini ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ [encoding]
2
+ text_encoding = utf-8
3
+
4
+ [main]
5
+ version = 0.2.0
6
+
backend/.spyproject/config/vcs.ini ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ [vcs]
2
+ use_version_control = False
3
+ version_control_system =
4
+
5
+ [main]
6
+ version = 0.2.0
7
+
backend/.spyproject/config/workspace.ini ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [workspace]
2
+ restore_data_on_startup = True
3
+ save_data_on_exit = True
4
+ save_history = True
5
+ save_non_project_files = False
6
+ project_type = 'empty-project-type'
7
+ recent_files = ['app\\auth\\jwt_handler.py', '.dockerignore', 'app\\main.py', 'app\\tools\\tool_flashcards.py', 'app\\tools\\tool_quiz.py', 'app\\tools\\tool_rag_qa.py', 'app\\tools\\tool_resume.py', 'app\\tools\\tool_simple_explain.py', 'app\\agent.py', 'app\\ingest.py', 'app\\rag_evaluator.py', 'app\\rag.py', 'app\\router_service.py', 'app\\schemas_new.py', 'app\\schemas.py', 'app\\auth\\middleware.py', 'app\\db\\crud.py', 'app\\db\\database.py', 'app\\db\\models.py']
8
+
9
+ [main]
10
+ version = 0.2.0
11
+ recent_files = []
12
+
backend/Dockerfile ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ RUN apt-get update && apt-get install -y --no-install-recommends curl \
6
+ && rm -rf /var/lib/apt/lists/*
7
+
8
+ COPY requirements.txt .
9
+ RUN pip install --no-cache-dir -r requirements.txt
10
+
11
+ COPY . .
12
+
13
+ RUN mkdir -p /app/chroma_db /app/documents
14
+
15
+ # HuggingFace Spaces impose le port 7860
16
+ EXPOSE 7860
17
+
18
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=15s --retries=3 \
19
+ CMD curl -f http://localhost:7860/health || exit 1
20
+
21
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
backend/app/.spyproject/config/codestyle.ini ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ [codestyle]
2
+ indentation = True
3
+ edge_line = True
4
+ edge_line_columns = 79
5
+
6
+ [main]
7
+ version = 0.2.0
8
+
backend/app/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ [codestyle]
2
+ indentation = True
3
+ edge_line = True
4
+ edge_line_columns = 79
5
+
backend/app/.spyproject/config/defaults/defaults-encoding-0.2.0.ini ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [encoding]
2
+ text_encoding = utf-8
3
+
backend/app/.spyproject/config/defaults/defaults-vcs-0.2.0.ini ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ [vcs]
2
+ use_version_control = False
3
+ version_control_system =
4
+
backend/app/.spyproject/config/defaults/defaults-workspace-0.2.0.ini ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ [workspace]
2
+ restore_data_on_startup = True
3
+ save_data_on_exit = True
4
+ save_history = True
5
+ save_non_project_files = False
6
+
backend/app/.spyproject/config/encoding.ini ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ [encoding]
2
+ text_encoding = utf-8
3
+
4
+ [main]
5
+ version = 0.2.0
6
+
backend/app/.spyproject/config/vcs.ini ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ [vcs]
2
+ use_version_control = False
3
+ version_control_system =
4
+
5
+ [main]
6
+ version = 0.2.0
7
+
backend/app/.spyproject/config/workspace.ini ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [workspace]
2
+ restore_data_on_startup = True
3
+ save_data_on_exit = True
4
+ save_history = True
5
+ save_non_project_files = False
6
+ project_type = 'empty-project-type'
7
+ recent_files = []
8
+
9
+ [main]
10
+ version = 0.2.0
11
+ recent_files = []
12
+
backend/app/agent.py ADDED
@@ -0,0 +1,313 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import json
3
+ import re
4
+ import os
5
+ from huggingface_hub import InferenceClient
6
+
7
+ # ── Config ────────────────────────────────────────────────────────────────────
8
+ HF_TOKEN = os.getenv("HF_TOKEN", "")
9
+ MODEL_NAME = os.getenv("HF_MODEL", "Qwen/Qwen2.5-72B-Instruct")
10
+
11
+ conversation_store: dict[str, list] = {}
12
+
13
+ _client: InferenceClient | None = None
14
+
15
+
16
+ def _get_client() -> InferenceClient:
17
+ global _client
18
+ if _client is None:
19
+ _client = InferenceClient(token=HF_TOKEN or None)
20
+ return _client
21
+
22
+
23
+ # ── Core call — utilise chat_completion (compatible tous providers HF) ─────────
24
+ def _call_hf(
25
+ system: str,
26
+ user: str,
27
+ max_tokens: int = 1024,
28
+ temperature: float = 0.4,
29
+ ) -> str:
30
+ try:
31
+ client = _get_client()
32
+ response = client.chat_completion(
33
+ model=MODEL_NAME,
34
+ messages=[
35
+ {"role": "system", "content": system},
36
+ {"role": "user", "content": user},
37
+ ],
38
+ max_tokens=max_tokens,
39
+ temperature=temperature,
40
+ )
41
+ return response.choices[0].message.content.strip()
42
+ except Exception as e:
43
+ raise Exception(f"HuggingFace InferenceClient error: {str(e)}")
44
+
45
+
46
+ # ── JSON helpers ──────────────────────────────────────────────────────────────
47
+ def _fix_json(s: str) -> str:
48
+ s = re.sub(r',\s*([}\]])', r'\1', s)
49
+ s = re.sub(r'[\x00-\x1f\x7f]', ' ', s)
50
+ return s
51
+
52
+
53
+ def _extract_json_array(raw: str) -> list:
54
+ cleaned = re.sub(r'```(?:json)?\s*', '', raw)
55
+ cleaned = re.sub(r'```', '', cleaned).strip()
56
+
57
+ try:
58
+ result = json.loads(cleaned)
59
+ if isinstance(result, list):
60
+ return result
61
+ except Exception:
62
+ pass
63
+
64
+ start = cleaned.find('[')
65
+ if start != -1:
66
+ depth = 0
67
+ for i, ch in enumerate(cleaned[start:], start):
68
+ if ch == '[':
69
+ depth += 1
70
+ elif ch == ']':
71
+ depth -= 1
72
+ if depth == 0:
73
+ candidate = cleaned[start:i + 1]
74
+ for attempt in (candidate, _fix_json(candidate)):
75
+ try:
76
+ result = json.loads(attempt)
77
+ if isinstance(result, list):
78
+ return result
79
+ except Exception:
80
+ pass
81
+ break
82
+
83
+ match = re.search(r'\[[\s\S]*\]', cleaned)
84
+ if match:
85
+ for attempt in (match.group(), _fix_json(match.group())):
86
+ try:
87
+ return json.loads(attempt)
88
+ except Exception:
89
+ pass
90
+
91
+ return []
92
+
93
+
94
+ # ── Conversation history ──────────────────────────────────────────────────────
95
+ def _get_history(user_id: str) -> list:
96
+ return conversation_store.get(user_id, [])
97
+
98
+
99
+ def _save_history(user_id: str, user_msg: str, ai_msg: str) -> None:
100
+ if user_id not in conversation_store:
101
+ conversation_store[user_id] = []
102
+ conversation_store[user_id].append({"user": user_msg, "assistant": ai_msg})
103
+ conversation_store[user_id] = conversation_store[user_id][-5:]
104
+
105
+
106
+ # ── Async entry point ─────────────────────────────────────────────────────────
107
+ async def run_agent(action: str, data: dict) -> dict:
108
+ loop = asyncio.get_event_loop()
109
+ return await loop.run_in_executor(None, _run_sync, action, data)
110
+
111
+
112
+ def _run_sync(action: str, data: dict) -> dict:
113
+ dispatch = {
114
+ "chat": _chat,
115
+ "quiz": _quiz,
116
+ "flashcards": _flashcards,
117
+ "explain": _explain,
118
+ "resume": _resume,
119
+ "rag-qa": _rag_qa,
120
+ }
121
+ handler = dispatch.get(action)
122
+ if handler:
123
+ return handler(data)
124
+ return {"answer": f"Unknown action: {action}", "action": action}
125
+
126
+
127
+ # ── Action handlers ───────────────────────────────────────────────────────────
128
+
129
+ def _chat(data: dict) -> dict:
130
+ query = data.get("query", "")
131
+ user_id = data.get("user_id", "anonymous")
132
+ history = _get_history(user_id)
133
+
134
+ history_text = ""
135
+ if history:
136
+ history_text = "Conversation récente :\n" + "\n".join(
137
+ f"Utilisateur: {h['user']}\nAssistant: {h['assistant']}"
138
+ for h in history
139
+ ) + "\n\n"
140
+
141
+ system = (
142
+ "Tu es PaperBrain AI, un assistant pédagogique pour les étudiants. "
143
+ "Aide les étudiants à comprendre leurs cours, préparer leurs examens et apprendre efficacement. "
144
+ "Réponds toujours dans la même langue que la question. "
145
+ "Sois clair, structuré et pédagogique."
146
+ )
147
+ user = f"{history_text}Utilisateur : {query}"
148
+
149
+ answer = _call_hf(system, user, max_tokens=1024, temperature=0.5)
150
+ _save_history(user_id, query, answer)
151
+ return {"answer": answer, "user_id": user_id}
152
+
153
+
154
+ def _quiz(data: dict) -> dict:
155
+ topic = data.get("topic", "")
156
+ num_questions = data.get("num_questions", 5)
157
+ difficulty = data.get("difficulty", "medium")
158
+
159
+ difficulty_map = {
160
+ "easy": "simples et directes, pour débutants",
161
+ "medium": "de difficulté intermédiaire",
162
+ "hard": "difficiles et approfondies, pour experts",
163
+ }
164
+ level_desc = difficulty_map.get(difficulty, "de difficulté intermédiaire")
165
+
166
+ system = (
167
+ "Tu es un générateur de quiz pédagogique. "
168
+ "Tu réponds UNIQUEMENT avec un tableau JSON valide, sans texte avant ni après, sans balises markdown."
169
+ )
170
+ user = (
171
+ f"Génère {num_questions} questions QCM ({level_desc}) sur : \"{topic}\".\n\n"
172
+ "Chaque objet JSON doit contenir : question, options (tableau de 4 chaînes "
173
+ "\"A) ...\", \"B) ...\", \"C) ...\", \"D) ...\"), correct_answer (A/B/C/D), explanation.\n\n"
174
+ "Réponds UNIQUEMENT avec le tableau JSON."
175
+ )
176
+
177
+ raw = _call_hf(system, user, max_tokens=1500, temperature=0.3)
178
+ questions = _extract_json_array(raw)
179
+
180
+ if questions:
181
+ clean = [
182
+ {
183
+ "question": str(q.get("question", "")),
184
+ "options": list(q.get("options", [])),
185
+ "correct_answer": str(q.get("correct_answer", "A")),
186
+ "explanation": str(q.get("explanation", "")),
187
+ }
188
+ for q in questions
189
+ if isinstance(q, dict) and q.get("question") and q.get("options")
190
+ ]
191
+ if clean:
192
+ return {"questions": clean, "topic": topic, "difficulty": difficulty}
193
+
194
+ return {"questions": [], "topic": topic, "error": "JSON invalide.", "raw_preview": raw[:300]}
195
+
196
+
197
+ def _flashcards(data: dict) -> dict:
198
+ topic = data.get("topic", "")
199
+ num_cards = data.get("num_cards", 8)
200
+
201
+ system = (
202
+ "Tu es un générateur de flashcards pédagogiques. "
203
+ "Tu réponds UNIQUEMENT avec un tableau JSON valide, sans texte avant ni après, sans balises markdown."
204
+ )
205
+ user = (
206
+ f"Génère {num_cards} flashcards sur : \"{topic}\".\n\n"
207
+ "Chaque objet JSON doit contenir : front (question/terme) et back (réponse/définition).\n\n"
208
+ "Réponds UNIQUEMENT avec le tableau JSON."
209
+ )
210
+
211
+ raw = _call_hf(system, user, max_tokens=1024, temperature=0.3)
212
+ cards = _extract_json_array(raw)
213
+
214
+ if cards:
215
+ clean = [
216
+ {"front": str(c.get("front", "")), "back": str(c.get("back", ""))}
217
+ for c in cards
218
+ if isinstance(c, dict) and c.get("front") and c.get("back")
219
+ ]
220
+ if clean:
221
+ return {"flashcards": clean, "topic": topic}
222
+
223
+ return {"flashcards": [], "topic": topic, "error": "Impossible de parser les flashcards."}
224
+
225
+
226
+ def _explain(data: dict) -> dict:
227
+ concept = data.get("concept", "")
228
+ level = data.get("level", "intermediate")
229
+
230
+ level_map = {
231
+ "beginner": "de manière très simple, avec des analogies du quotidien, pour un lycéen",
232
+ "intermediate": "clairement avec les concepts essentiels, pour un étudiant universitaire",
233
+ "advanced": "de manière approfondie et technique, pour un expert du domaine",
234
+ }
235
+ level_desc = level_map.get(level, level_map["intermediate"])
236
+
237
+ system = (
238
+ "Tu es un professeur pédagogue expert. "
239
+ "Réponds dans la même langue que le concept demandé."
240
+ )
241
+ user = (
242
+ f"Explique le concept suivant {level_desc}.\n\n"
243
+ "Structure ta réponse avec :\n"
244
+ "1. Définition courte et claire\n"
245
+ "2. Points clés à retenir\n"
246
+ "3. Exemple concret\n"
247
+ "4. Applications pratiques\n\n"
248
+ f"Concept : {concept}"
249
+ )
250
+
251
+ explanation = _call_hf(system, user, max_tokens=1024, temperature=0.5)
252
+ return {"explanation": explanation, "concept": concept, "level": level}
253
+
254
+
255
+ def _resume(data: dict) -> dict:
256
+ text = data.get("text", "")
257
+ if not text:
258
+ return {"summary": "Aucun texte fourni."}
259
+
260
+ system = (
261
+ "Tu es un assistant pédagogique expert en synthèse de documents. "
262
+ "Réponds dans la même langue que le texte fourni."
263
+ )
264
+ user = (
265
+ "Résume le texte suivant de façon claire et structurée.\n"
266
+ "Utilise des titres et des points clés.\n\n"
267
+ f"Texte :\n{text[:3000]}"
268
+ )
269
+
270
+ summary = _call_hf(system, user, max_tokens=1024, temperature=0.4)
271
+ return {"summary": summary}
272
+
273
+
274
+ def _rag_qa(data: dict) -> dict:
275
+ query = data.get("query", "")
276
+
277
+ try:
278
+ from app.rag import query_documents
279
+
280
+ results = query_documents(query, n_results=4)
281
+ documents = results.get("documents", [[]])[0]
282
+ metadatas = results.get("metadatas", [[]])[0]
283
+ distances = results.get("distances", [[]])[0]
284
+
285
+ THRESHOLD = 0.8
286
+ relevant = [
287
+ (doc, meta)
288
+ for doc, meta, dist in zip(documents, metadatas, distances)
289
+ if dist < THRESHOLD
290
+ ]
291
+
292
+ if not relevant:
293
+ return {
294
+ "answer": "Aucune information pertinente trouvée dans vos documents.",
295
+ "sources": [],
296
+ }
297
+
298
+ context = "\n\n---\n\n".join([doc for doc, _ in relevant])
299
+ sources = list(set([meta.get("source", "inconnu") for _, meta in relevant]))
300
+
301
+ system = (
302
+ "Tu es un assistant pédagogique RAG. "
303
+ "Réponds à la question en te basant UNIQUEMENT sur le contexte fourni. "
304
+ "Si la réponse n'est pas dans le contexte, dis-le clairement. "
305
+ "Réponds dans la même langue que la question."
306
+ )
307
+ user = f"Contexte :\n{context[:3000]}\n\nQuestion : {query}"
308
+
309
+ answer = _call_hf(system, user, max_tokens=1024, temperature=0.4)
310
+ return {"answer": answer, "sources": sources}
311
+
312
+ except Exception as e:
313
+ return {"answer": f"Erreur RAG : {str(e)}", "sources": []}
backend/app/auth/jwt_handler.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime, timedelta
2
+ from jose import JWTError, jwt
3
+
4
+ SECRET_KEY = "smartstudy_secret_key_2024_change_in_production"
5
+ ALGORITHM = "HS256"
6
+ ACCESS_TOKEN_EXPIRE_HOURS = 24
7
+
8
+
9
+ def create_token(data: dict) -> str:
10
+ payload = data.copy()
11
+ payload["exp"] = datetime.utcnow() + timedelta(hours=ACCESS_TOKEN_EXPIRE_HOURS)
12
+ return jwt.encode(payload, SECRET_KEY, algorithm=ALGORITHM)
13
+
14
+
15
+ def decode_token(token: str) -> dict:
16
+ try:
17
+ return jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
18
+ except JWTError:
19
+ return None
backend/app/auth/middleware.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import Depends, HTTPException, status
2
+ from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
3
+ from sqlalchemy.orm import Session
4
+ from app.db.database import get_db
5
+ from app.auth.jwt_handler import decode_token
6
+ from app.db.crud import get_user_by_id
7
+
8
+ security = HTTPBearer()
9
+
10
+
11
+ def get_current_user(
12
+ credentials: HTTPAuthorizationCredentials = Depends(security),
13
+ db: Session = Depends(get_db)
14
+ ):
15
+ token = credentials.credentials
16
+ payload = decode_token(token)
17
+
18
+ if not payload:
19
+ raise HTTPException(
20
+ status_code=status.HTTP_401_UNAUTHORIZED,
21
+ detail="Token invalide ou expiré"
22
+ )
23
+
24
+ user_id = int(payload.get("sub", 0))
25
+ user = get_user_by_id(db, user_id)
26
+
27
+ if not user:
28
+ raise HTTPException(
29
+ status_code=status.HTTP_401_UNAUTHORIZED,
30
+ detail="Utilisateur non trouvé"
31
+ )
32
+
33
+ return user
backend/app/db/crud.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import hashlib
2
+ import secrets
3
+ from datetime import datetime, timedelta
4
+ from sqlalchemy.orm import Session
5
+ from app.db.models import User, QuizResult, StudySession
6
+
7
+
8
+ # ── Password
9
+ def hash_password(password: str) -> str:
10
+ salt = secrets.token_hex(16)
11
+ hashed = hashlib.sha256((password + salt).encode()).hexdigest()
12
+ return f"{salt}:{hashed}"
13
+
14
+
15
+ def verify_password(plain: str, hashed: str) -> bool:
16
+ try:
17
+ salt, hash_val = hashed.split(":")
18
+ return hashlib.sha256((plain + salt).encode()).hexdigest() == hash_val
19
+ except:
20
+ return False
21
+
22
+
23
+ # ── User CRUD
24
+ def get_user_by_email(db: Session, email: str):
25
+ return db.query(User).filter(User.email == email).first()
26
+
27
+
28
+ def get_user_by_username(db: Session, username: str):
29
+ return db.query(User).filter(User.username == username).first()
30
+
31
+
32
+ def get_user_by_id(db: Session, user_id: int):
33
+ return db.query(User).filter(User.id == user_id).first()
34
+
35
+
36
+ def create_user(db: Session, username: str, email: str, password: str):
37
+ user = User(
38
+ username=username,
39
+ email=email,
40
+ password=hash_password(password),
41
+ created_at=datetime.utcnow()
42
+ )
43
+ db.add(user)
44
+ db.commit()
45
+ db.refresh(user)
46
+ return user
47
+
48
+
49
+ def update_streak(db: Session, user: User):
50
+ now = datetime.utcnow()
51
+ if user.last_login:
52
+ diff = (now.date() - user.last_login.date()).days
53
+ if diff == 1:
54
+ user.streak_days += 1
55
+ elif diff > 1:
56
+ user.streak_days = 1
57
+ else:
58
+ user.streak_days = 1
59
+ user.last_login = now
60
+ db.commit()
61
+
62
+
63
+ # ── Quiz Results
64
+ def save_quiz_result(db: Session, user_id: int, req):
65
+ result = QuizResult(
66
+ user_id=user_id,
67
+ topic=req.topic,
68
+ score=req.score,
69
+ total_questions=req.total_questions,
70
+ correct_answers=req.correct_answers,
71
+ difficulty=req.difficulty,
72
+ duration_sec=req.duration_sec
73
+ )
74
+ db.add(result)
75
+
76
+ # Mise à jour du niveau utilisateur
77
+ user = get_user_by_id(db, user_id)
78
+ if user:
79
+ results = db.query(QuizResult).filter(QuizResult.user_id == user_id).all()
80
+ if len(results) > 0:
81
+ avg = sum(r.score for r in results) / len(results)
82
+ if avg >= 80:
83
+ user.niveau = "expert"
84
+ elif avg >= 60:
85
+ user.niveau = "intermédiaire"
86
+ else:
87
+ user.niveau = "débutant"
88
+
89
+ db.commit()
90
+ db.refresh(result)
91
+ return result
92
+
93
+
94
+ # ── Profile
95
+ def get_student_profile(db: Session, user_id: int) -> dict:
96
+ user = get_user_by_id(db, user_id)
97
+ if not user:
98
+ return {}
99
+
100
+ quiz_results = db.query(QuizResult).filter(
101
+ QuizResult.user_id == user_id
102
+ ).order_by(QuizResult.created_at.desc()).all()
103
+
104
+ sessions = db.query(StudySession).filter(
105
+ StudySession.user_id == user_id
106
+ ).all()
107
+
108
+ scores = [r.score for r in quiz_results]
109
+ avg_score = round(sum(scores) / len(scores), 1) if scores else 0
110
+ best_score = max(scores) if scores else 0
111
+
112
+ # Top matières
113
+ subjects = {}
114
+ for s in sessions:
115
+ subjects[s.subject] = subjects.get(s.subject, 0) + 1
116
+ top_subjects = sorted(
117
+ [{"subject": k, "count": v} for k, v in subjects.items()],
118
+ key=lambda x: x["count"], reverse=True
119
+ )[:5]
120
+
121
+ recent_quiz = [
122
+ {
123
+ "topic": r.topic,
124
+ "score": r.score,
125
+ "date": r.created_at.strftime("%d/%m/%Y"),
126
+ "difficulty": r.difficulty
127
+ }
128
+ for r in quiz_results[:10]
129
+ ]
130
+
131
+ return {
132
+ "user": {
133
+ "username": user.username,
134
+ "email": user.email,
135
+ "niveau": user.niveau,
136
+ "streak_days": user.streak_days,
137
+ "member_since": user.created_at.strftime("%d/%m/%Y") if user.created_at else "N/A"
138
+ },
139
+ "stats": {
140
+ "total_sessions": len(sessions),
141
+ "total_quiz": len(quiz_results),
142
+ "average_score": avg_score,
143
+ "best_score": best_score,
144
+ "top_subjects": top_subjects
145
+ },
146
+ "recent_quiz": recent_quiz
147
+ }
148
+
149
+
150
+ def get_progress(db: Session, user_id: int) -> dict:
151
+ results = db.query(QuizResult).filter(
152
+ QuizResult.user_id == user_id
153
+ ).order_by(QuizResult.created_at.asc()).all()
154
+
155
+ return {
156
+ "progression": [
157
+ {"date": r.created_at.strftime("%d/%m"), "score": r.score, "topic": r.topic}
158
+ for r in results
159
+ ]
160
+ }
backend/app/db/database.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy import create_engine
2
+ from sqlalchemy.ext.declarative import declarative_base
3
+ from sqlalchemy.orm import sessionmaker
4
+
5
+ DATABASE_URL = "sqlite:///./smartstudy.db"
6
+
7
+ engine = create_engine(
8
+ DATABASE_URL,
9
+ connect_args={"check_same_thread": False}
10
+ )
11
+
12
+ SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
13
+ Base = declarative_base()
14
+
15
+
16
+ def get_db():
17
+ db = SessionLocal()
18
+ try:
19
+ yield db
20
+ finally:
21
+ db.close()
22
+
23
+
24
+ def create_tables():
25
+ from app.db.models import User, QuizResult, StudySession, FlashcardProgress
26
+ Base.metadata.create_all(bind=engine)
backend/app/db/models.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy import Column, Integer, String, Float, DateTime, ForeignKey, Boolean
2
+ from sqlalchemy.orm import relationship
3
+ from datetime import datetime
4
+ from app.db.database import Base
5
+
6
+
7
+ class User(Base):
8
+ __tablename__ = "users"
9
+
10
+ id = Column(Integer, primary_key=True, index=True)
11
+ username = Column(String, unique=True, index=True)
12
+ email = Column(String, unique=True, index=True)
13
+ password = Column(String)
14
+ niveau = Column(String, default="débutant")
15
+ streak_days = Column(Integer, default=0)
16
+ last_login = Column(DateTime, default=datetime.utcnow)
17
+ created_at = Column(DateTime, default=datetime.utcnow)
18
+ is_active = Column(Boolean, default=True)
19
+
20
+ quiz_results = relationship("QuizResult", back_populates="user")
21
+ study_sessions = relationship("StudySession", back_populates="user")
22
+
23
+
24
+ class QuizResult(Base):
25
+ __tablename__ = "quiz_results"
26
+
27
+ id = Column(Integer, primary_key=True, index=True)
28
+ user_id = Column(Integer, ForeignKey("users.id"))
29
+ topic = Column(String)
30
+ score = Column(Float)
31
+ total_questions = Column(Integer)
32
+ correct_answers = Column(Integer)
33
+ difficulty = Column(String, default="medium")
34
+ duration_sec = Column(Integer, default=0)
35
+ created_at = Column(DateTime, default=datetime.utcnow)
36
+
37
+ user = relationship("User", back_populates="quiz_results")
38
+
39
+
40
+ class StudySession(Base):
41
+ __tablename__ = "study_sessions"
42
+
43
+ id = Column(Integer, primary_key=True, index=True)
44
+ user_id = Column(Integer, ForeignKey("users.id"))
45
+ action = Column(String) # chat, quiz, flashcards, explain, rag-qa
46
+ subject = Column(String, default="general")
47
+ duration = Column(Integer, default=0)
48
+ created_at = Column(DateTime, default=datetime.utcnow)
49
+
50
+ user = relationship("User", back_populates="study_sessions")
51
+
52
+
53
+ class FlashcardProgress(Base):
54
+ __tablename__ = "flashcard_progress"
55
+
56
+ id = Column(Integer, primary_key=True, index=True)
57
+ user_id = Column(Integer, ForeignKey("users.id"))
58
+ topic = Column(String)
59
+ known = Column(Integer, default=0)
60
+ unknown = Column(Integer, default=0)
61
+ created_at = Column(DateTime, default=datetime.utcnow)
backend/app/ingest.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import uuid
3
+ from app.rag import add_documents, get_collection
4
+
5
+ CHUNK_SIZE = 600
6
+ CHUNK_OVERLAP = 80
7
+
8
+
9
+ def chunk_text(text: str, chunk_size: int = CHUNK_SIZE, overlap: int = CHUNK_OVERLAP) -> list:
10
+ """Découpe le texte en chunks avec overlap."""
11
+ paragraphs = text.split("\n\n")
12
+ chunks = []
13
+ current = ""
14
+
15
+ for para in paragraphs:
16
+ para = para.strip()
17
+ if not para:
18
+ continue
19
+ if len(current) + len(para) < chunk_size:
20
+ current += ("\n\n" + para) if current else para
21
+ else:
22
+ if current:
23
+ chunks.append(current.strip())
24
+ current = para
25
+
26
+ if current:
27
+ chunks.append(current.strip())
28
+
29
+ # Si les paragraphes sont trop grands, découper par caractères
30
+ final_chunks = []
31
+ for chunk in chunks:
32
+ if len(chunk) > chunk_size * 2:
33
+ for i in range(0, len(chunk), chunk_size - overlap):
34
+ part = chunk[i:i + chunk_size]
35
+ if part.strip():
36
+ final_chunks.append(part.strip())
37
+ else:
38
+ final_chunks.append(chunk)
39
+
40
+ return final_chunks
41
+
42
+
43
+ def read_file(file_path: str) -> str:
44
+ """Lit un fichier PDF, DOCX ou TXT et retourne le texte."""
45
+ ext = os.path.splitext(file_path)[1].lower()
46
+
47
+ if ext == ".txt":
48
+ with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
49
+ return f.read()
50
+
51
+ elif ext == ".pdf":
52
+ try:
53
+ import pdfplumber
54
+ with pdfplumber.open(file_path) as pdf:
55
+ pages = []
56
+ for page in pdf.pages:
57
+ text = page.extract_text()
58
+ if text:
59
+ pages.append(text)
60
+ return "\n\n".join(pages)
61
+ except ImportError:
62
+ raise ImportError("pdfplumber requis: pip install pdfplumber")
63
+
64
+ elif ext in [".docx", ".doc"]:
65
+ try:
66
+ import docx
67
+ doc = docx.Document(file_path)
68
+ return "\n\n".join(p.text for p in doc.paragraphs if p.text.strip())
69
+ except ImportError:
70
+ raise ImportError("python-docx requis: pip install python-docx")
71
+
72
+ else:
73
+ raise ValueError(f"Format non supporté: {ext}. Acceptés: .pdf, .txt, .docx")
74
+
75
+
76
+ def check_duplicate(file_name: str) -> bool:
77
+ """Vérifie si le document existe déjà dans ChromaDB."""
78
+ try:
79
+ collection = get_collection()
80
+ results = collection.get(where={"source": file_name})
81
+ return len(results.get("ids", [])) > 0
82
+ except:
83
+ return False
84
+
85
+
86
+ def ingest_document(file_path: str, subject: str = "general") -> int:
87
+ """Ingère un document dans ChromaDB. Retourne le nombre de chunks."""
88
+ file_name = os.path.basename(file_path)
89
+
90
+ # Supprimer les anciens chunks si le fichier existe déjà
91
+ try:
92
+ collection = get_collection()
93
+ old = collection.get(where={"source": file_name})
94
+ if old.get("ids"):
95
+ collection.delete(ids=old["ids"])
96
+ print(f"🗑️ Anciens chunks supprimés pour '{file_name}'")
97
+ except Exception as e:
98
+ print(f"Warning suppression: {e}")
99
+
100
+ # Lire et découper
101
+ text = read_file(file_path)
102
+ if not text.strip():
103
+ raise ValueError("Le document est vide ou illisible")
104
+
105
+ chunks = chunk_text(text)
106
+ if not chunks:
107
+ raise ValueError("Impossible de découper le document en chunks")
108
+
109
+ # Préparer les métadonnées
110
+ ids = [str(uuid.uuid4()) for _ in chunks]
111
+ metadatas = [
112
+ {
113
+ "source": file_name,
114
+ "subject": subject,
115
+ "chunk_index": i,
116
+ "total_chunks": len(chunks)
117
+ }
118
+ for i in range(len(chunks))
119
+ ]
120
+
121
+ add_documents(chunks, metadatas, ids)
122
+ print(f"✅ {len(chunks)} chunks ingérés depuis '{file_name}' (matière: {subject})")
123
+ return len(chunks)
backend/app/main.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from fastapi.staticfiles import StaticFiles
4
+ from fastapi.responses import FileResponse
5
+ from app.router_service import router
6
+ from app.db.database import create_tables
7
+ import os
8
+
9
+ app = FastAPI(
10
+ title="PaperBrain API BY HICHAM",
11
+ description="API d'assistance à l'apprentissage avec auth et profils",
12
+ )
13
+
14
+ app.add_middleware(
15
+ CORSMiddleware,
16
+ allow_origins=["*"],
17
+ allow_methods=["*"],
18
+ allow_headers=["*"],
19
+ )
20
+
21
+ @app.on_event("startup")
22
+ def startup():
23
+ create_tables()
24
+ print("Tables créées avec succès")
25
+
26
+ app.include_router(router, prefix="/api")
27
+
28
+ @app.get("/health")
29
+ def health():
30
+ return {"status": "ok"}
31
+
32
+ # Servir le frontend React — DOIT être en dernier
33
+ if os.path.exists("static"):
34
+ app.mount("/", StaticFiles(directory="static", html=True), name="static")
35
+ else:
36
+ @app.get("/")
37
+ def root():
38
+ return {"message": "SmartStudyAI v2.0 running"}
backend/app/rag.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import chromadb
2
+
3
+ CHROMA_PATH = "./chroma_db"
4
+ COLLECTION_NAME = "smartstudy_docs"
5
+
6
+
7
+ def get_chroma_client():
8
+ return chromadb.PersistentClient(path=CHROMA_PATH)
9
+
10
+
11
+ def get_collection():
12
+ client = get_chroma_client()
13
+ return client.get_or_create_collection(
14
+ name=COLLECTION_NAME,
15
+ metadata={"hnsw:space": "cosine"}
16
+ )
17
+
18
+
19
+ def add_documents(chunks: list, metadatas: list, ids: list):
20
+ collection = get_collection()
21
+ collection.add(documents=chunks, metadatas=metadatas, ids=ids)
22
+
23
+
24
+ def query_documents(query: str, n_results: int = 4) -> dict:
25
+ collection = get_collection()
26
+ count = collection.count()
27
+ if count == 0:
28
+ return {"documents": [[]], "metadatas": [[]], "distances": [[]]}
29
+ actual_n = min(n_results, count)
30
+ return collection.query(query_texts=[query], n_results=actual_n)
31
+
32
+
33
+ def delete_collection():
34
+ client = get_chroma_client()
35
+ client.delete_collection(COLLECTION_NAME)
backend/app/rag_evaluator.py ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import re
3
+ import os
4
+ from huggingface_hub import InferenceClient
5
+
6
+ HF_TOKEN = os.getenv("HF_TOKEN", "")
7
+ MODEL_NAME = os.getenv("HF_MODEL", "Qwen/Qwen2.5-72B-Instruct")
8
+
9
+ _client: InferenceClient | None = None
10
+
11
+
12
+ def _get_client() -> InferenceClient:
13
+ global _client
14
+ if _client is None:
15
+ _client = InferenceClient(model=MODEL_NAME, token=HF_TOKEN or None)
16
+ return _client
17
+
18
+
19
+ def _call_hf(prompt: str, max_tokens: int = 256, temperature: float = 0.1) -> str:
20
+ client = _get_client()
21
+ response = client.text_generation(
22
+ prompt,
23
+ max_new_tokens=max_tokens,
24
+ temperature=temperature,
25
+ do_sample=False, # deterministic for evaluation
26
+ return_full_text=False,
27
+ )
28
+ return response.strip()
29
+
30
+
31
+ def _extract_score(raw: str) -> float:
32
+ try:
33
+ cleaned = re.sub(r'```(?:json)?\s*|```', '', raw).strip()
34
+ data = json.loads(cleaned)
35
+ if isinstance(data, dict):
36
+ for key in ["score", "value", "result", "rating"]:
37
+ if key in data:
38
+ val = float(data[key])
39
+ return max(0.0, min(1.0, val if val <= 1.0 else val / 10.0))
40
+ except Exception:
41
+ pass
42
+
43
+ matches = re.findall(r'\b(0\.\d+|1\.0|[0-9](?:\.[0-9]+)?)\b', raw)
44
+ for m in matches:
45
+ val = float(m)
46
+ if 0.0 <= val <= 1.0:
47
+ return val
48
+ if 1.0 < val <= 10.0:
49
+ return val / 10.0
50
+
51
+ raw_lower = raw.lower()
52
+ if any(w in raw_lower for w in ["excellent", "perfect", "fully", "completely"]):
53
+ return 0.9
54
+ if any(w in raw_lower for w in ["good", "mostly", "largely"]):
55
+ return 0.7
56
+ if any(w in raw_lower for w in ["partial", "somewhat", "moderate"]):
57
+ return 0.5
58
+ if any(w in raw_lower for w in ["poor", "barely", "little"]):
59
+ return 0.3
60
+ if any(w in raw_lower for w in ["no", "none", "not", "fail"]):
61
+ return 0.1
62
+
63
+ return 0.5
64
+
65
+
66
+ def _parse_result(raw: str) -> tuple[float, str]:
67
+ score = _extract_score(raw)
68
+ reason = "No reason provided."
69
+ try:
70
+ cleaned = re.sub(r'```(?:json)?\s*|```', '', raw).strip()
71
+ data = json.loads(cleaned)
72
+ reason = data.get("reason", reason)
73
+ except Exception:
74
+ m = re.search(r'"reason"\s*:\s*"([^"]+)"', raw)
75
+ if m:
76
+ reason = m.group(1)
77
+ return round(score, 2), reason
78
+
79
+
80
+ # ── Evaluation functions ──────────────────────────────────────────────────────
81
+
82
+ def evaluate_faithfulness(question: str, context: str, answer: str) -> dict:
83
+ prompt = f"""<s>[INST] Tu es un évaluateur RAG expert. Évalue la FIDÉLITÉ de la réponse.
84
+ La fidélité mesure si la réponse est entièrement fondée sur le contexte fourni.
85
+
86
+ Question : {question}
87
+ Contexte : {context[:2000]}
88
+ Réponse : {answer[:1000]}
89
+
90
+ Note de 0.0 à 1.0 (1.0 = entièrement fondée sur le contexte, 0.0 = totalement hallucinée).
91
+ Réponds UNIQUEMENT avec : {{"score": <float 0.0-1.0>, "reason": "<une phrase>"}} [/INST]
92
+ """
93
+ raw = _call_hf(prompt)
94
+ score, reason = _parse_result(raw)
95
+ return {"score": score, "reason": reason, "raw": raw[:200]}
96
+
97
+
98
+ def evaluate_answer_relevancy(question: str, answer: str) -> dict:
99
+ prompt = f"""<s>[INST] Tu es un évaluateur RAG expert. Évalue la PERTINENCE DE LA RÉPONSE.
100
+ La pertinence mesure si la réponse répond directement à la question posée.
101
+
102
+ Question : {question}
103
+ Réponse : {answer[:1000]}
104
+
105
+ Note de 0.0 à 1.0 (1.0 = répond parfaitement, 0.0 = hors sujet).
106
+ Réponds UNIQUEMENT avec : {{"score": <float 0.0-1.0>, "reason": "<une phrase>"}} [/INST]
107
+ """
108
+ raw = _call_hf(prompt)
109
+ score, reason = _parse_result(raw)
110
+ return {"score": score, "reason": reason, "raw": raw[:200]}
111
+
112
+
113
+ def evaluate_context_recall(question: str, context: str) -> dict:
114
+ prompt = f"""<s>[INST] Tu es un évaluateur RAG expert. Évalue le RAPPEL DU CONTEXTE.
115
+ Mesure si le contexte récupéré contient les informations nécessaires pour répondre à la question.
116
+
117
+ Question : {question}
118
+ Contexte récupéré : {context[:2000]}
119
+
120
+ Note de 0.0 à 1.0 (1.0 = contexte idéal, 0.0 = contexte inutile).
121
+ Réponds UNIQUEMENT avec : {{"score": <float 0.0-1.0>, "reason": "<une phrase>"}} [/INST]
122
+ """
123
+ raw = _call_hf(prompt)
124
+ score, reason = _parse_result(raw)
125
+ return {"score": score, "reason": reason, "raw": raw[:200]}
126
+
127
+
128
+ def evaluate_hallucination(question: str, context: str, answer: str) -> dict:
129
+ prompt = f"""<s>[INST] Tu es un évaluateur RAG expert. Détecte les HALLUCINATIONS dans la réponse.
130
+ Une hallucination = information présente dans la réponse mais ABSENTE du contexte et non-connaissance générale.
131
+
132
+ Question : {question}
133
+ Contexte : {context[:2000]}
134
+ Réponse : {answer[:1000]}
135
+
136
+ Note de 0.0 à 1.0 (1.0 = aucune hallucination, 0.0 = totalement hallucinée).
137
+ Réponds UNIQUEMENT avec : {{"score": <float 0.0-1.0>, "reason": "<une phrase>"}} [/INST]
138
+ """
139
+ raw = _call_hf(prompt)
140
+ score, reason = _parse_result(raw)
141
+ return {"score": score, "reason": reason, "raw": raw[:200]}
142
+
143
+
144
+ def evaluate_rag_response(question: str, context: str, answer: str) -> dict:
145
+ print(f"[RAG EVAL] Démarrage pour : {question[:80]}")
146
+
147
+ results: dict[str, dict] = {}
148
+
149
+ for key, fn, args in [
150
+ ("faithfulness", evaluate_faithfulness, (question, context, answer)),
151
+ ("answer_relevancy", evaluate_answer_relevancy, (question, answer)),
152
+ ("context_recall", evaluate_context_recall, (question, context)),
153
+ ("hallucination", evaluate_hallucination, (question, context, answer)),
154
+ ]:
155
+ try:
156
+ results[key] = fn(*args)
157
+ print(f"[RAG EVAL] {key}: {results[key]['score']}")
158
+ except Exception as e:
159
+ results[key] = {"score": 0.0, "reason": str(e), "error": True}
160
+
161
+ weights = {
162
+ "faithfulness": 0.35,
163
+ "answer_relevancy": 0.30,
164
+ "context_recall": 0.20,
165
+ "hallucination": 0.15,
166
+ }
167
+ overall = round(sum(
168
+ results[k]["score"] * w
169
+ for k, w in weights.items()
170
+ if not results[k].get("error")
171
+ ), 2)
172
+
173
+ grade = "A" if overall >= 0.85 else "B" if overall >= 0.70 else "C" if overall >= 0.55 else "D" if overall >= 0.40 else "F"
174
+ print(f"[RAG EVAL] Overall: {overall} ({grade})")
175
+
176
+ return {
177
+ "question": question,
178
+ "overall_score": overall,
179
+ "grade": grade,
180
+ "metrics": results,
181
+ "summary": _generate_summary(overall, results),
182
+ }
183
+
184
+
185
+ def _generate_summary(overall: float, results: dict) -> str:
186
+ label_map = {
187
+ "faithfulness": "Fidélité",
188
+ "answer_relevancy": "Pertinence",
189
+ "context_recall": "Rappel contexte",
190
+ "hallucination": "Hallucination",
191
+ }
192
+ weak = [label_map[k] for k, v in results.items() if v["score"] < 0.5 and not v.get("error")]
193
+ strong = [label_map[k] for k, v in results.items() if v["score"] >= 0.8 and not v.get("error")]
194
+
195
+ if overall >= 0.85:
196
+ verdict = "Excellente réponse RAG."
197
+ elif overall >= 0.70:
198
+ verdict = "Bonne réponse avec quelques défauts mineurs."
199
+ elif overall >= 0.50:
200
+ verdict = "Réponse acceptable — qualité du contexte à améliorer."
201
+ else:
202
+ verdict = "Réponse insuffisante — uploadez des documents plus pertinents."
203
+
204
+ parts = []
205
+ if strong:
206
+ parts.append(f"Points forts : {', '.join(strong)}.")
207
+ if weak:
208
+ parts.append(f"À améliorer : {', '.join(weak)}.")
209
+
210
+ return verdict + (" " + " ".join(parts) if parts else "")
backend/app/router_service.py ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ from fastapi import APIRouter, HTTPException, Depends, UploadFile, File, Form
4
+ from sqlalchemy.orm import Session
5
+
6
+ from app.db.database import get_db
7
+ from app.db import crud
8
+ from app.auth.jwt_handler import create_token
9
+ from app.auth.middleware import get_current_user
10
+ from app.schemas import (
11
+ RegisterRequest, LoginRequest, ChatRequest, QuizRequest,
12
+ FlashcardRequest, ExplainRequest, ResumeRequest, RAGRequest, QuizResultRequest
13
+ )
14
+
15
+ # ── Router (TOUJOURS en premier)
16
+ router = APIRouter()
17
+ UPLOAD_DIR = "./documents"
18
+ os.makedirs(UPLOAD_DIR, exist_ok=True)
19
+
20
+
21
+ # ══════════════════════════════════════════════════════════
22
+ # AUTH
23
+ # ══════════════════════════════════════════════════════════
24
+
25
+ @router.post("/auth/register")
26
+ def register(req: RegisterRequest, db: Session = Depends(get_db)):
27
+ if crud.get_user_by_email(db, req.email):
28
+ raise HTTPException(400, "Email déjà utilisé")
29
+ if crud.get_user_by_username(db, req.username):
30
+ raise HTTPException(400, "Nom d'utilisateur déjà pris")
31
+ user = crud.create_user(db, req.username, req.email, req.password)
32
+ token = create_token({"sub": str(user.id), "username": user.username})
33
+ return {"access_token": token, "username": user.username, "user_id": user.id}
34
+
35
+
36
+ @router.post("/auth/login")
37
+ def login(req: LoginRequest, db: Session = Depends(get_db)):
38
+ user = crud.get_user_by_email(db, req.email)
39
+ if not user or not crud.verify_password(req.password, user.password):
40
+ raise HTTPException(401, "Email ou mot de passe incorrect")
41
+ crud.update_streak(db, user)
42
+ token = create_token({"sub": str(user.id), "username": user.username})
43
+ return {"access_token": token, "username": user.username, "user_id": user.id}
44
+
45
+
46
+ # ══════════════════════════════════════════════════════════
47
+ # PROFILE
48
+ # ══════════════════════════════════════════════════════════
49
+
50
+ @router.get("/profile")
51
+ def get_profile(current_user=Depends(get_current_user), db: Session = Depends(get_db)):
52
+ return crud.get_student_profile(db, current_user.id)
53
+
54
+
55
+ @router.get("/progress")
56
+ def get_progress(current_user=Depends(get_current_user), db: Session = Depends(get_db)):
57
+ return crud.get_progress(db, current_user.id)
58
+
59
+
60
+ # ══════════════════════════════════════════════════════════
61
+ # QUIZ RESULT
62
+ # ══════════════════════════════════════════════════════════
63
+
64
+ @router.post("/quiz/result")
65
+ def save_quiz_result(req: QuizResultRequest, current_user=Depends(get_current_user), db: Session = Depends(get_db)):
66
+ result = crud.save_quiz_result(db, current_user.id, req)
67
+ return {"message": "Résultat sauvegardé", "id": result.id}
68
+
69
+
70
+ # ══════════════════════════════════════════════════════════
71
+ # UPLOAD & DOCUMENTS
72
+ # ══════════════════════════════════════════════════════════
73
+
74
+ @router.post("/upload")
75
+ async def upload_document(
76
+ file: UploadFile = File(...),
77
+ subject: str = Form(default="general")
78
+ ):
79
+ allowed = [".pdf", ".txt", ".docx"]
80
+ ext = os.path.splitext(file.filename)[1].lower()
81
+ if ext not in allowed:
82
+ raise HTTPException(400, f"Format non supporté. Acceptés: {allowed}")
83
+
84
+ file_path = os.path.join(UPLOAD_DIR, file.filename)
85
+ with open(file_path, "wb") as f:
86
+ shutil.copyfileobj(file.file, f)
87
+
88
+ try:
89
+ from app.ingest import ingest_document
90
+ chunks = ingest_document(file_path, subject)
91
+ return {
92
+ "message": f"Fichier '{file.filename}' ingéré avec succès",
93
+ "chunks": chunks,
94
+ "subject": subject,
95
+ "filename": file.filename
96
+ }
97
+ except Exception as e:
98
+ if os.path.exists(file_path):
99
+ os.remove(file_path)
100
+ raise HTTPException(500, f"Erreur ingestion: {str(e)}")
101
+
102
+
103
+ @router.get("/documents")
104
+ def list_documents():
105
+ try:
106
+ from app.rag import get_collection
107
+ collection = get_collection()
108
+ results = collection.get()
109
+ sources = {}
110
+ for meta in results.get("metadatas", []):
111
+ if not meta:
112
+ continue
113
+ src = meta.get("source", "inconnu")
114
+ subj = meta.get("subject", "general")
115
+ if src not in sources:
116
+ sources[src] = {"filename": src, "subject": subj, "chunks": 0}
117
+ sources[src]["chunks"] += 1
118
+ return {"documents": list(sources.values()), "total": len(sources)}
119
+ except Exception as e:
120
+ return {"documents": [], "total": 0, "error": str(e)}
121
+
122
+
123
+ @router.delete("/documents/{filename}")
124
+ def delete_document(filename: str):
125
+ try:
126
+ from app.rag import get_collection
127
+ collection = get_collection()
128
+ results = collection.get(where={"source": filename})
129
+ ids = results.get("ids", [])
130
+ if ids:
131
+ collection.delete(ids=ids)
132
+ file_path = os.path.join(UPLOAD_DIR, filename)
133
+ if os.path.exists(file_path):
134
+ os.remove(file_path)
135
+ return {"message": f"'{filename}' supprimé ({len(ids)} chunks)"}
136
+ except Exception as e:
137
+ raise HTTPException(500, str(e))
138
+
139
+
140
+ # ══════════════════════════════════════════════════════════
141
+ # AI ENDPOINTS
142
+ # ══════════════════════════════════════════════════════════
143
+
144
+ @router.post("/chat")
145
+ async def chat(req: ChatRequest):
146
+ try:
147
+ from app.agent import run_agent
148
+ result = await run_agent("chat", req.dict())
149
+ return result
150
+ except Exception as e:
151
+ raise HTTPException(500, str(e))
152
+
153
+
154
+ @router.post("/quiz")
155
+ async def generate_quiz(req: QuizRequest):
156
+ try:
157
+ from app.agent import run_agent
158
+ result = await run_agent("quiz", req.dict())
159
+ return result
160
+ except Exception as e:
161
+ raise HTTPException(500, str(e))
162
+
163
+
164
+ @router.post("/flashcards")
165
+ async def generate_flashcards(req: FlashcardRequest):
166
+ try:
167
+ from app.agent import run_agent
168
+ result = await run_agent("flashcards", req.dict())
169
+ return result
170
+ except Exception as e:
171
+ raise HTTPException(500, str(e))
172
+
173
+
174
+ @router.post("/explain")
175
+ async def explain(req: ExplainRequest):
176
+ try:
177
+ from app.agent import run_agent
178
+ result = await run_agent("explain", req.dict())
179
+ return result
180
+ except Exception as e:
181
+ raise HTTPException(500, str(e))
182
+
183
+
184
+ @router.post("/resume")
185
+ async def resume(req: ResumeRequest):
186
+ try:
187
+ from app.agent import run_agent
188
+ result = await run_agent("resume", req.dict())
189
+ return result
190
+ except Exception as e:
191
+ raise HTTPException(500, str(e))
192
+
193
+
194
+ @router.post("/rag-qa")
195
+ async def rag_qa_endpoint(req: RAGRequest):
196
+ try:
197
+ from app.agent import run_agent
198
+ result = await run_agent("rag-qa", req.dict())
199
+ return result
200
+ except Exception as e:
201
+ raise HTTPException(500, str(e))
202
+
203
+
204
+ @router.get("/health")
205
+ def health():
206
+ return {"status": "ok", "service": "SmartStudyAI"}
backend/app/schemas.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import Optional
3
+
4
+ # ── Auth
5
+ class RegisterRequest(BaseModel):
6
+ username: str
7
+ email: str
8
+ password: str
9
+
10
+ class LoginRequest(BaseModel):
11
+ email: str
12
+ password: str
13
+
14
+ # ── AI Requests
15
+ class ChatRequest(BaseModel):
16
+ query: str
17
+ user_id: Optional[str] = "anonymous"
18
+
19
+ class QuizRequest(BaseModel):
20
+ topic: str
21
+ num_questions: Optional[int] = 5
22
+ difficulty: Optional[str] = "medium"
23
+
24
+ class FlashcardRequest(BaseModel):
25
+ topic: str
26
+ num_cards: Optional[int] = 8
27
+
28
+ class ExplainRequest(BaseModel):
29
+ concept: str
30
+ level: Optional[str] = "intermediate"
31
+
32
+ class ResumeRequest(BaseModel):
33
+ text: str
34
+
35
+ class RAGRequest(BaseModel):
36
+ query: str
37
+ user_id: Optional[str] = "anonymous"
38
+
39
+ # ── Quiz Result
40
+ class QuizResultRequest(BaseModel):
41
+ topic: str
42
+ score: int
43
+ total_questions: int
44
+ correct_answers: int
45
+ difficulty: Optional[str] = "medium"
46
+ duration_sec: Optional[int] = 0
47
+ class RAGEvalRequest(BaseModel):
48
+ question: str
49
+ context: str
50
+ answer: str
backend/app/schemas_new.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import Optional
3
+
4
+ class RegisterRequest(BaseModel):
5
+ username: str
6
+ email: str
7
+ password: str
8
+
9
+ class LoginRequest(BaseModel):
10
+ email: str
11
+ password: str
12
+
13
+ class ChatRequest(BaseModel):
14
+ query: str
15
+ user_id: str = "anonymous"
16
+
17
+ class QuizRequest(BaseModel):
18
+ topic: str
19
+ num_questions: int = 5
20
+ difficulty: str = "medium"
21
+
22
+ class FlashcardRequest(BaseModel):
23
+ topic: str
24
+ num_cards: int = 8
25
+
26
+ class ExplainRequest(BaseModel):
27
+ concept: str
28
+ level: str = "intermediate"
29
+
30
+ class ResumeRequest(BaseModel):
31
+ text: str
32
+
33
+ class RAGRequest(BaseModel):
34
+ query: str
35
+ user_id: str = "anonymous"
36
+
37
+ class QuizResultRequest(BaseModel):
38
+ topic: str
39
+ score: int
40
+ total_questions: int
41
+ correct_answers: int
42
+ difficulty: str = "medium"
43
+ duration_sec: int = 0
backend/app/tools/tool_flashcards.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import re
3
+ import os
4
+ from huggingface_hub import InferenceClient
5
+
6
+ HF_TOKEN = os.getenv("HF_TOKEN", "")
7
+ MODEL_NAME = os.getenv("HF_MODEL", "Qwen/Qwen2.5-72B-Instruct")
8
+
9
+ _client = None
10
+
11
+ def _get_client() -> InferenceClient:
12
+ global _client
13
+ if _client is None:
14
+ _client = InferenceClient(token=HF_TOKEN or None)
15
+ return _client
16
+
17
+
18
+ def _call_hf(system: str, user: str, max_tokens: int = 1024, temperature: float = 0.3) -> str:
19
+ client = _get_client()
20
+ response = client.chat_completion(
21
+ model=MODEL_NAME,
22
+ messages=[
23
+ {"role": "system", "content": system},
24
+ {"role": "user", "content": user},
25
+ ],
26
+ max_tokens=max_tokens,
27
+ temperature=temperature,
28
+ )
29
+ return response.choices[0].message.content.strip()
30
+
31
+
32
+ def _extract_json_array(raw: str) -> list:
33
+ cleaned = re.sub(r'```(?:json)?\s*|```', '', raw).strip()
34
+ try:
35
+ result = json.loads(cleaned)
36
+ if isinstance(result, list):
37
+ return result
38
+ except Exception:
39
+ pass
40
+ start = cleaned.find('[')
41
+ if start != -1:
42
+ depth = 0
43
+ for i, ch in enumerate(cleaned[start:], start):
44
+ if ch == '[': depth += 1
45
+ elif ch == ']':
46
+ depth -= 1
47
+ if depth == 0:
48
+ candidate = re.sub(r',\s*([}\]])', r'\1', cleaned[start:i+1])
49
+ try:
50
+ return json.loads(candidate)
51
+ except Exception:
52
+ pass
53
+ break
54
+ return []
55
+
56
+
57
+ def generate_flashcards(topic: str, num_cards: int = 10) -> list[dict]:
58
+ system = (
59
+ "Tu es un générateur de flashcards pédagogiques. "
60
+ "Tu réponds UNIQUEMENT avec un tableau JSON valide, sans texte avant ni après."
61
+ )
62
+ user = (
63
+ f"Génère {num_cards} flashcards sur : \"{topic}\".\n"
64
+ "Chaque objet : front (question/terme) et back (réponse/définition).\n"
65
+ "Réponds UNIQUEMENT avec le tableau JSON."
66
+ )
67
+
68
+ raw = _call_hf(system, user)
69
+ cards = _extract_json_array(raw)
70
+
71
+ if cards:
72
+ return [
73
+ {"front": str(c.get("front", "")), "back": str(c.get("back", ""))}
74
+ for c in cards
75
+ if isinstance(c, dict) and c.get("front") and c.get("back")
76
+ ]
77
+ return [{"front": topic, "back": raw[:300]}]
backend/app/tools/tool_quiz.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import re
3
+ import os
4
+ from huggingface_hub import InferenceClient
5
+
6
+ HF_TOKEN = os.getenv("HF_TOKEN", "")
7
+ MODEL_NAME = os.getenv("HF_MODEL", "Qwen/Qwen2.5-72B-Instruct")
8
+
9
+ _client = None
10
+
11
+ def _get_client() -> InferenceClient:
12
+ global _client
13
+ if _client is None:
14
+ _client = InferenceClient(token=HF_TOKEN or None)
15
+ return _client
16
+
17
+
18
+ def _call_hf(system: str, user: str, max_tokens: int = 1500, temperature: float = 0.3) -> str:
19
+ client = _get_client()
20
+ response = client.chat_completion(
21
+ model=MODEL_NAME,
22
+ messages=[
23
+ {"role": "system", "content": system},
24
+ {"role": "user", "content": user},
25
+ ],
26
+ max_tokens=max_tokens,
27
+ temperature=temperature,
28
+ )
29
+ return response.choices[0].message.content.strip()
30
+
31
+
32
+ def _extract_json_array(raw: str) -> list:
33
+ cleaned = re.sub(r'```(?:json)?\s*|```', '', raw).strip()
34
+ try:
35
+ result = json.loads(cleaned)
36
+ if isinstance(result, list):
37
+ return result
38
+ except Exception:
39
+ pass
40
+ start = cleaned.find('[')
41
+ if start != -1:
42
+ depth = 0
43
+ for i, ch in enumerate(cleaned[start:], start):
44
+ if ch == '[': depth += 1
45
+ elif ch == ']':
46
+ depth -= 1
47
+ if depth == 0:
48
+ candidate = re.sub(r',\s*([}\]])', r'\1', cleaned[start:i+1])
49
+ try:
50
+ return json.loads(candidate)
51
+ except Exception:
52
+ pass
53
+ break
54
+ return []
55
+
56
+
57
+ def generate_quiz(topic: str, num_questions: int = 5, difficulty: str = "medium") -> list[dict]:
58
+ difficulty_map = {
59
+ "easy": "simples et directes, pour débutants",
60
+ "medium": "de difficulté intermédiaire",
61
+ "hard": "difficiles et approfondies, pour experts",
62
+ }
63
+ level_desc = difficulty_map.get(difficulty, "de difficulté intermédiaire")
64
+
65
+ system = (
66
+ "Tu es un générateur de quiz pédagogique. "
67
+ "Tu réponds UNIQUEMENT avec un tableau JSON valide, sans texte avant ni après."
68
+ )
69
+ user = (
70
+ f"Génère {num_questions} questions QCM ({level_desc}) sur : \"{topic}\".\n"
71
+ "Chaque objet : question, options (4 chaînes A/B/C/D), correct_answer (A/B/C/D), explanation.\n"
72
+ "Réponds UNIQUEMENT avec le tableau JSON."
73
+ )
74
+
75
+ raw = _call_hf(system, user)
76
+ questions = _extract_json_array(raw)
77
+
78
+ if questions:
79
+ clean = [
80
+ {
81
+ "question": str(q.get("question", "")),
82
+ "options": list(q.get("options", [])),
83
+ "correct_answer": str(q.get("correct_answer", "A")),
84
+ "explanation": str(q.get("explanation", "")),
85
+ }
86
+ for q in questions
87
+ if isinstance(q, dict) and q.get("question") and q.get("options")
88
+ ]
89
+ if clean:
90
+ return clean
91
+
92
+ return [{"question": f"Question sur {topic}", "options": ["A) -", "B) -", "C) -", "D) -"], "correct_answer": "A", "explanation": "Erreur de génération."}]
backend/app/tools/tool_rag_qa.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from huggingface_hub import InferenceClient
3
+
4
+ HF_TOKEN = os.getenv("HF_TOKEN", "")
5
+ MODEL_NAME = os.getenv("HF_MODEL", "Qwen/Qwen2.5-72B-Instruct")
6
+ RELEVANCE_THRESHOLD = 0.4
7
+
8
+ _client = None
9
+
10
+ def _get_client() -> InferenceClient:
11
+ global _client
12
+ if _client is None:
13
+ _client = InferenceClient(token=HF_TOKEN or None)
14
+ return _client
15
+
16
+
17
+ def _call_hf(system: str, user: str, max_tokens: int = 1024, temperature: float = 0.4) -> str:
18
+ client = _get_client()
19
+ response = client.chat_completion(
20
+ model=MODEL_NAME,
21
+ messages=[
22
+ {"role": "system", "content": system},
23
+ {"role": "user", "content": user},
24
+ ],
25
+ max_tokens=max_tokens,
26
+ temperature=temperature,
27
+ )
28
+ return response.choices[0].message.content.strip()
29
+
30
+
31
+ def rag_qa(query: str, history_text: str = "") -> tuple[str, list[str]]:
32
+ from app.rag import query_documents
33
+
34
+ results = query_documents(query, n_results=3)
35
+ documents = results.get("documents", [[]])[0]
36
+ metadatas = results.get("metadatas", [[]])[0]
37
+ distances = results.get("distances", [[]])[0]
38
+
39
+ relevant_docs = [
40
+ (doc, meta)
41
+ for doc, meta, dist in zip(documents, metadatas, distances)
42
+ if dist < RELEVANCE_THRESHOLD
43
+ ]
44
+
45
+ if not relevant_docs:
46
+ return ("Je n'ai pas trouvé d'information pertinente dans vos cours.", [])
47
+
48
+ context = "\n\n---\n\n".join([doc for doc, _ in relevant_docs])
49
+ sources = list(set([meta.get("source", "inconnu") for _, meta in relevant_docs]))
50
+
51
+ system = (
52
+ "Tu es un assistant pédagogique RAG. "
53
+ "Réponds à la question en te basant UNIQUEMENT sur le contexte fourni. "
54
+ "Si la réponse n'est pas dans le contexte, dis-le clairement. "
55
+ "Réponds dans la même langue que la question."
56
+ )
57
+ history_section = f"Historique:\n{history_text}\n\n" if history_text else ""
58
+ user = f"{history_section}Contexte :\n{context[:3000]}\n\nQuestion : {query}"
59
+
60
+ answer = _call_hf(system, user)
61
+ return answer, sources
backend/app/tools/tool_resume.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from huggingface_hub import InferenceClient
3
+
4
+ HF_TOKEN = os.getenv("HF_TOKEN", "")
5
+ MODEL_NAME = os.getenv("HF_MODEL", "Qwen/Qwen2.5-72B-Instruct")
6
+
7
+ _client = None
8
+
9
+ def _get_client() -> InferenceClient:
10
+ global _client
11
+ if _client is None:
12
+ _client = InferenceClient(token=HF_TOKEN or None)
13
+ return _client
14
+
15
+
16
+ def _call_hf(system: str, user: str, max_tokens: int = 1024, temperature: float = 0.4) -> str:
17
+ client = _get_client()
18
+ response = client.chat_completion(
19
+ model=MODEL_NAME,
20
+ messages=[
21
+ {"role": "system", "content": system},
22
+ {"role": "user", "content": user},
23
+ ],
24
+ max_tokens=max_tokens,
25
+ temperature=temperature,
26
+ )
27
+ return response.choices[0].message.content.strip()
28
+
29
+
30
+ def generate_resume(text: str = None, file_path: str = None) -> str:
31
+ if file_path and not text:
32
+ try:
33
+ with open(file_path, "r", encoding="utf-8") as f:
34
+ text = f.read()
35
+ except Exception as e:
36
+ return f"Erreur lors de la lecture du fichier : {e}"
37
+
38
+ if not text:
39
+ return "Aucun texte ou fichier fourni."
40
+
41
+ system = (
42
+ "Tu es un assistant pédagogique expert en synthèse de documents. "
43
+ "Réponds dans la même langue que le texte fourni."
44
+ )
45
+ user = (
46
+ "Résume le texte suivant de façon claire et structurée. "
47
+ "Utilise des titres et des points clés.\n\n"
48
+ f"Texte :\n{text[:4000]}"
49
+ )
50
+
51
+ return _call_hf(system, user, max_tokens=1024)
backend/app/tools/tool_simple_explain.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from huggingface_hub import InferenceClient
3
+
4
+ HF_TOKEN = os.getenv("HF_TOKEN", "")
5
+ MODEL_NAME = os.getenv("HF_MODEL", "Qwen/Qwen2.5-72B-Instruct")
6
+
7
+ LEVEL_DESCRIPTIONS = {
8
+ "beginner": "de manière très simple, comme si tu expliquais à un lycéen, avec des analogies du quotidien",
9
+ "intermediate": "de manière claire avec les concepts essentiels, pour un étudiant universitaire",
10
+ "advanced": "de manière approfondie et technique, pour un expert du domaine",
11
+ }
12
+
13
+ _client = None
14
+
15
+ def _get_client() -> InferenceClient:
16
+ global _client
17
+ if _client is None:
18
+ _client = InferenceClient(token=HF_TOKEN or None)
19
+ return _client
20
+
21
+
22
+ def _call_hf(system: str, user: str, max_tokens: int = 1024, temperature: float = 0.5) -> str:
23
+ client = _get_client()
24
+ response = client.chat_completion(
25
+ model=MODEL_NAME,
26
+ messages=[
27
+ {"role": "system", "content": system},
28
+ {"role": "user", "content": user},
29
+ ],
30
+ max_tokens=max_tokens,
31
+ temperature=temperature,
32
+ )
33
+ return response.choices[0].message.content.strip()
34
+
35
+
36
+ def simple_explain(concept: str, level: str = "intermediate") -> str:
37
+ level_desc = LEVEL_DESCRIPTIONS.get(level, LEVEL_DESCRIPTIONS["intermediate"])
38
+
39
+ system = (
40
+ "Tu es un professeur pédagogue expert. "
41
+ "Réponds dans la même langue que le concept demandé."
42
+ )
43
+ user = (
44
+ f"Explique le concept suivant {level_desc}.\n\n"
45
+ "Structure ta réponse avec :\n"
46
+ "1. Une définition courte et claire\n"
47
+ "2. Les points clés à retenir\n"
48
+ "3. Un exemple concret\n"
49
+ "4. Les applications pratiques\n\n"
50
+ f"Concept : {concept}"
51
+ )
52
+
53
+ return _call_hf(system, user, max_tokens=1024)
backend/database.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy import create_engine
2
+ from sqlalchemy.ext.declarative import declarative_base
3
+ from sqlalchemy.orm import sessionmaker
4
+
5
+ DATABASE_URL = "sqlite:///./smartstudy.db"
6
+
7
+ engine = create_engine(
8
+ DATABASE_URL,
9
+ connect_args={"check_same_thread": False}
10
+ )
11
+
12
+ SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
13
+ Base = declarative_base()
14
+
15
+ def get_db():
16
+ db = SessionLocal()
17
+ try:
18
+ yield db
19
+ finally:
20
+ db.close()
21
+
22
+ def create_tables():
23
+ from app.db.models import User, QuizResult, StudySession, FlashcardProgress
24
+ Base.metadata.create_all(bind=engine)
25
+ print("✅ Tables créées avec succès")
backend/migrate.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sqlite3
2
+ import os
3
+
4
+ db_path = os.path.join(os.path.dirname(__file__), "smartstudy.db")
5
+
6
+ conn = sqlite3.connect(db_path)
7
+ cursor = conn.cursor()
8
+
9
+ # Check if column already exists
10
+ cursor.execute("PRAGMA table_info(study_sessions)")
11
+ columns = [col[1] for col in cursor.fetchall()]
12
+
13
+ if "duration" not in columns:
14
+ cursor.execute("ALTER TABLE study_sessions ADD COLUMN duration INTEGER DEFAULT 0")
15
+ conn.commit()
16
+ print("✅ Column 'duration' added successfully.")
17
+ else:
18
+ print("ℹ️ Column 'duration' already exists, nothing to do.")
19
+
20
+ conn.close()
backend/requirements.txt ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # API & serveur
2
+ fastapi==0.111.0
3
+ uvicorn[standard]==0.29.0
4
+ python-multipart==0.0.9
5
+
6
+ # Auth & sécurité
7
+ python-jose[cryptography]==3.3.0
8
+ passlib[bcrypt]==1.7.4
9
+
10
+ # Base de données
11
+ sqlalchemy==2.0.30
12
+
13
+ # HuggingFace
14
+ huggingface-hub>=0.31.0
15
+
16
+ # NumPy — forcer 1.x pour compatibilité ChromaDB
17
+ numpy<2.0
18
+
19
+ # RAG / ChromaDB
20
+ chromadb==0.5.0
21
+ onnxruntime==1.18.0
22
+
23
+ # Lecture de documents
24
+ pdfplumber==0.11.1
25
+ python-docx==1.1.2
26
+
27
+ # Utilitaires
28
+ pydantic==2.7.1
29
+ python-dotenv==1.0.1
30
+ requests==2.32.3
docker-compose.yml ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ services:
2
+
3
+ backend:
4
+ build:
5
+ context: ./backend # Docker lit les fichiers DEPUIS ./backend
6
+ dockerfile: Dockerfile
7
+ container_name: smartstudy-backend
8
+ ports:
9
+ - "8000:8000"
10
+ environment:
11
+ HF_TOKEN: ${HF_TOKEN}
12
+ HF_MODEL: ${HF_MODEL:-mistralai/Mistral-7B-Instruct-v0.3}
13
+ volumes:
14
+ - chroma_data:/app/chroma_db
15
+ - documents_data:/app/documents
16
+ restart: unless-stopped
17
+ healthcheck:
18
+ test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
19
+ interval: 30s
20
+ timeout: 10s
21
+ retries: 3
22
+
23
+ frontend:
24
+ build:
25
+ context: ./frontend # Docker lit les fichiers DEPUIS ./frontend
26
+ dockerfile: Dockerfile
27
+ args:
28
+ VITE_API_URL: http://localhost:8000/api
29
+ container_name: smartstudy-frontend
30
+ ports:
31
+ - "5173:80"
32
+ depends_on:
33
+ backend:
34
+ condition: service_healthy
35
+ restart: unless-stopped
36
+
37
+ volumes:
38
+ chroma_data:
39
+ documents_data:
frontend/.gitignore ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Logs
2
+ logs
3
+ *.log
4
+ npm-debug.log*
5
+ yarn-debug.log*
6
+ yarn-error.log*
7
+ pnpm-debug.log*
8
+ lerna-debug.log*
9
+
10
+ node_modules
11
+ dist
12
+ dist-ssr
13
+ *.local
14
+
15
+ # Editor directories and files
16
+ .vscode/*
17
+ !.vscode/extensions.json
18
+ .idea
19
+ .DS_Store
20
+ *.suo
21
+ *.ntvs*
22
+ *.njsproj
23
+ *.sln
24
+ *.sw?
frontend/Dockerfile ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM node:20-alpine AS builder
2
+
3
+ WORKDIR /app
4
+
5
+ ARG VITE_API_URL=http://localhost:8000/api
6
+ ENV VITE_API_URL=$VITE_API_URL
7
+
8
+ COPY package*.json ./
9
+ RUN npm ci
10
+
11
+ COPY . .
12
+ RUN npm run build
13
+
14
+ # ── Runtime Nginx ─────────────────────────────────────────────────────────────
15
+ FROM nginx:alpine
16
+
17
+ COPY --from=builder /app/dist /usr/share/nginx/html
18
+
19
+ RUN echo 'server { \
20
+ listen 80; \
21
+ root /usr/share/nginx/html; \
22
+ index index.html; \
23
+ location / { \
24
+ try_files $uri $uri/ /index.html; \
25
+ } \
26
+ }' > /etc/nginx/conf.d/default.conf
27
+
28
+ EXPOSE 80
29
+
30
+ CMD ["nginx", "-g", "daemon off;"]