jwphantom
commited on
Commit
•
99ebde5
1
Parent(s):
1f7103f
remove evaluation route
Browse files- app/api/evaluation.py +0 -243
- app/api/test.py +0 -62
- main.py +1 -3
app/api/evaluation.py
DELETED
@@ -1,243 +0,0 @@
|
|
1 |
-
import json
|
2 |
-
import os
|
3 |
-
from langchain_openai import ChatOpenAI
|
4 |
-
import numpy as np
|
5 |
-
import math
|
6 |
-
import time
|
7 |
-
import csv
|
8 |
-
from fastapi import APIRouter, HTTPException
|
9 |
-
from typing import List
|
10 |
-
from dotenv import load_dotenv
|
11 |
-
import nltk
|
12 |
-
from datetime import datetime
|
13 |
-
|
14 |
-
# nltk.download("punkt")
|
15 |
-
# from nltk.tokenize import word_tokenize
|
16 |
-
|
17 |
-
# Spécifier un répertoire de données pour NLTK
|
18 |
-
nltk_data_path = os.path.join(os.path.expanduser("~"), "nltk_data")
|
19 |
-
os.makedirs(nltk_data_path, exist_ok=True)
|
20 |
-
nltk.data.path.append(nltk_data_path)
|
21 |
-
|
22 |
-
# Télécharger les données de NLTK
|
23 |
-
nltk.download("punkt", download_dir=nltk_data_path)
|
24 |
-
|
25 |
-
from nltk.tokenize import word_tokenize
|
26 |
-
|
27 |
-
|
28 |
-
from langchain_google_genai import ChatGoogleGenerativeAI
|
29 |
-
from langchain_community.embeddings import GPT4AllEmbeddings
|
30 |
-
from langchain.memory import ConversationBufferMemory
|
31 |
-
|
32 |
-
from app.schema.question_eval import Question as SchemaQuestionEval
|
33 |
-
from app.schema.question_eval import ResponseEvaluation as SchemaResponseEval
|
34 |
-
from app.utils.complex_input import (
|
35 |
-
generate_prompt,
|
36 |
-
) # Ensure this is correctly implemented
|
37 |
-
|
38 |
-
load_dotenv(".env")
|
39 |
-
|
40 |
-
router = APIRouter()
|
41 |
-
|
42 |
-
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0.2).bind(logprobs=True)
|
43 |
-
|
44 |
-
|
45 |
-
def init_embedding():
|
46 |
-
return GPT4AllEmbeddings(model_name="all-MiniLM-L6-v2.gguf2.f16.gguf")
|
47 |
-
|
48 |
-
|
49 |
-
def cosine_similarity(vec1, vec2):
|
50 |
-
dot_product = np.dot(vec1, vec2)
|
51 |
-
norm_vec1 = np.linalg.norm(vec1)
|
52 |
-
norm_vec2 = np.linalg.norm(vec2)
|
53 |
-
return dot_product / (norm_vec1 * norm_vec2)
|
54 |
-
|
55 |
-
|
56 |
-
def fraction_to_float(fraction_str):
|
57 |
-
try:
|
58 |
-
return float(fraction_str)
|
59 |
-
except ValueError:
|
60 |
-
num, denom = map(int, fraction_str.split("/"))
|
61 |
-
return num / denom
|
62 |
-
|
63 |
-
|
64 |
-
def calculate_ttr(texts: List[str], question_idx: int) -> float:
|
65 |
-
print(
|
66 |
-
f"Calcul TTR question : {question_idx + 1} .... veuillez patienter ça calcule"
|
67 |
-
)
|
68 |
-
all_tokens = []
|
69 |
-
unique_tokens = set()
|
70 |
-
|
71 |
-
for text in texts:
|
72 |
-
tokens = word_tokenize(text.lower())
|
73 |
-
all_tokens.extend(tokens)
|
74 |
-
unique_tokens.update(tokens)
|
75 |
-
|
76 |
-
if len(all_tokens) == 0:
|
77 |
-
ttr = 0
|
78 |
-
else:
|
79 |
-
ttr = len(unique_tokens) / len(all_tokens)
|
80 |
-
|
81 |
-
print(f"Result TTR question : {question_idx + 1} .... {ttr}")
|
82 |
-
return ttr
|
83 |
-
|
84 |
-
|
85 |
-
def calculate_metrics_from_log_probs(log_probs: List[float], question_idx: int):
|
86 |
-
|
87 |
-
print(
|
88 |
-
f"Calcul Entropie et Perplexité question : {question_idx + 1} .... veuillez patienter ça calcule"
|
89 |
-
)
|
90 |
-
|
91 |
-
total_log_prob = sum(log_probs)
|
92 |
-
total_tokens = len(log_probs)
|
93 |
-
|
94 |
-
moyenne_log_probs = total_log_prob / total_tokens
|
95 |
-
entropy = -moyenne_log_probs
|
96 |
-
perplexity = math.exp(entropy)
|
97 |
-
|
98 |
-
print(f"Result Entropie question : {question_idx + 1} .... {entropy}")
|
99 |
-
print(f"Result Perplexité question : {question_idx + 1} .... {perplexity}")
|
100 |
-
|
101 |
-
return {"entropy": entropy, "perplexity": perplexity}
|
102 |
-
|
103 |
-
|
104 |
-
def evaluate_response(
|
105 |
-
generated_response, reference_responses, embedding, question_idx: int
|
106 |
-
):
|
107 |
-
print(
|
108 |
-
f"Calcul Similarité question : {question_idx + 1} .... veuillez patienter ça calcule"
|
109 |
-
)
|
110 |
-
gen_vec = embedding.embed_query(generated_response)
|
111 |
-
similarities = []
|
112 |
-
for response in reference_responses:
|
113 |
-
ref_vec = embedding.embed_query(response)
|
114 |
-
similarity = cosine_similarity(gen_vec, ref_vec)
|
115 |
-
similarities.append(similarity)
|
116 |
-
|
117 |
-
print(f"Result Similarité question : {question_idx + 1} .... {similarities[0]}")
|
118 |
-
|
119 |
-
return similarities
|
120 |
-
|
121 |
-
|
122 |
-
@router.post("/evaluate-responses")
|
123 |
-
async def evaluate_responses():
|
124 |
-
results = {}
|
125 |
-
embedding = init_embedding()
|
126 |
-
|
127 |
-
with open("data.json", "r") as f:
|
128 |
-
questions = json.load(f)
|
129 |
-
|
130 |
-
# Initialiser le fichier JSON avec une structure de base
|
131 |
-
with open("results.json", "w") as f:
|
132 |
-
json.dump({category: [] for category in questions.keys()}, f, indent=4)
|
133 |
-
|
134 |
-
for category, questions_list in questions.items():
|
135 |
-
start_time_category = time.time()
|
136 |
-
perplexity_total = 0
|
137 |
-
|
138 |
-
for idx, question in enumerate(questions_list):
|
139 |
-
memory = ConversationBufferMemory(
|
140 |
-
memory_key="history", input_key="question"
|
141 |
-
)
|
142 |
-
|
143 |
-
schema_question = SchemaQuestionEval(
|
144 |
-
prompt=question["prompt"], answer_correct=question["answer_correct"]
|
145 |
-
)
|
146 |
-
|
147 |
-
start_time = time.time()
|
148 |
-
generated_response = generate_prompt(
|
149 |
-
schema_question.prompt, "QR3.pdf", "user", memory
|
150 |
-
)
|
151 |
-
end_time = time.time()
|
152 |
-
generation_time = end_time - start_time
|
153 |
-
|
154 |
-
similarity = evaluate_response(
|
155 |
-
generated_response.content,
|
156 |
-
[schema_question.answer_correct],
|
157 |
-
embedding,
|
158 |
-
idx,
|
159 |
-
)
|
160 |
-
|
161 |
-
response_metadata = generated_response.response_metadata["logprobs"][
|
162 |
-
"content"
|
163 |
-
]
|
164 |
-
log_probs = [token_info["logprob"] for token_info in response_metadata]
|
165 |
-
|
166 |
-
metrics = calculate_metrics_from_log_probs(log_probs, idx)
|
167 |
-
|
168 |
-
ttr = calculate_ttr([generated_response.content], idx)
|
169 |
-
|
170 |
-
evaluation = {
|
171 |
-
"prompt": schema_question.prompt,
|
172 |
-
"answer_correct": schema_question.answer_correct,
|
173 |
-
"answer_generated": generated_response.content,
|
174 |
-
"ttr": ttr,
|
175 |
-
"cosine_similarity": similarity[0],
|
176 |
-
"entropy": metrics["entropy"],
|
177 |
-
"perplexity": metrics["perplexity"],
|
178 |
-
"generation_time": generation_time,
|
179 |
-
}
|
180 |
-
|
181 |
-
perplexity_total += metrics["perplexity"]
|
182 |
-
|
183 |
-
# Écrire le résultat individuel dans le fichier JSON
|
184 |
-
with open("results.json", "r+") as f:
|
185 |
-
data = json.load(f)
|
186 |
-
data[category].append(evaluation)
|
187 |
-
f.seek(0)
|
188 |
-
json.dump(data, f, ensure_ascii=False, indent=4)
|
189 |
-
f.truncate()
|
190 |
-
|
191 |
-
time.sleep(3)
|
192 |
-
|
193 |
-
category_perplexity = perplexity_total / len(questions_list)
|
194 |
-
|
195 |
-
# Ajouter la perplexité de la catégorie
|
196 |
-
with open("results.json", "r+") as f:
|
197 |
-
data = json.load(f)
|
198 |
-
data[category].append({"category_perplexity": category_perplexity})
|
199 |
-
f.seek(0)
|
200 |
-
json.dump(data, f, ensure_ascii=False, indent=4)
|
201 |
-
f.truncate()
|
202 |
-
|
203 |
-
end_time_category = time.time()
|
204 |
-
category_duration = end_time_category - start_time_category
|
205 |
-
print(f"Category '{category}' processed in {category_duration:.2f} seconds")
|
206 |
-
|
207 |
-
results[category] = data[category]
|
208 |
-
|
209 |
-
return results
|
210 |
-
|
211 |
-
|
212 |
-
@router.post("/read-and-analyze-results")
|
213 |
-
def read_and_analyze_results():
|
214 |
-
analysis = []
|
215 |
-
with open("results.csv", "r") as csvfile:
|
216 |
-
reader = csv.DictReader(csvfile)
|
217 |
-
for row in reader:
|
218 |
-
perplexity = float(row["perplexity"])
|
219 |
-
if not math.isfinite(perplexity):
|
220 |
-
perplexity = "undefined"
|
221 |
-
|
222 |
-
analysis.append(
|
223 |
-
{
|
224 |
-
"category": row["category"],
|
225 |
-
"prompt": row["prompt"],
|
226 |
-
"correct_answer": row["answer_correct"],
|
227 |
-
"generated_answer": row["answer_generated"],
|
228 |
-
"cosine_similarity": float(row["cosine_similarity"]),
|
229 |
-
"entropy": (
|
230 |
-
float(row["entropy"])
|
231 |
-
if math.isfinite(float(row["entropy"]))
|
232 |
-
else "undefined"
|
233 |
-
),
|
234 |
-
"perplexity": (
|
235 |
-
float(row["perplexity"])
|
236 |
-
if math.isfinite(float(row["perplexity"]))
|
237 |
-
else "undefined"
|
238 |
-
),
|
239 |
-
"generation_time": float(row["generation_time"]),
|
240 |
-
}
|
241 |
-
)
|
242 |
-
|
243 |
-
return analysis
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/api/test.py
DELETED
@@ -1,62 +0,0 @@
|
|
1 |
-
import google.generativeai as genai
|
2 |
-
import os
|
3 |
-
from langchain_google_genai import ChatGoogleGenerativeAI
|
4 |
-
import math
|
5 |
-
import time
|
6 |
-
|
7 |
-
genai.configure(api_key=os.environ["GEMINI_API_KEY"])
|
8 |
-
|
9 |
-
modelGemini = genai.GenerativeModel("gemini-1.5-pro")
|
10 |
-
llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.2, top_p=0.2)
|
11 |
-
|
12 |
-
phrases_test = [
|
13 |
-
"Le chat mange une souris.",
|
14 |
-
"Je vais au cinéma ce soir.",
|
15 |
-
"L'avion décolle à 10h30.",
|
16 |
-
]
|
17 |
-
|
18 |
-
|
19 |
-
def fraction_to_float(fraction_str):
|
20 |
-
try:
|
21 |
-
return float(fraction_str)
|
22 |
-
except ValueError:
|
23 |
-
num, denom = map(int, fraction_str.split("/"))
|
24 |
-
return num / denom
|
25 |
-
|
26 |
-
|
27 |
-
def calculer_perplexite(phrases):
|
28 |
-
log_probs_totales = 0
|
29 |
-
total_tokens = 0
|
30 |
-
|
31 |
-
for phrase in phrases:
|
32 |
-
ids = llm.get_token_ids(phrase)
|
33 |
-
total_tokens += len(ids)
|
34 |
-
|
35 |
-
log_probs_phrase = 0
|
36 |
-
for i in range(len(ids)):
|
37 |
-
contexte = ids[:i]
|
38 |
-
token_actuel = ids[i]
|
39 |
-
|
40 |
-
result = llm.invoke(
|
41 |
-
f"Quelle est la probabilité du token {token_actuel} après la séquence {contexte}? Répondez uniquement par un nombre ou une fraction."
|
42 |
-
)
|
43 |
-
|
44 |
-
proba = fraction_to_float(result.content)
|
45 |
-
|
46 |
-
print(f"Token: {token_actuel}, Probabilité: {proba}")
|
47 |
-
|
48 |
-
log_probs_phrase += math.log(proba) if proba > 0 else float("-inf")
|
49 |
-
|
50 |
-
# Ajouter un délai de 2 secondes
|
51 |
-
time.sleep(4)
|
52 |
-
|
53 |
-
log_probs_totales += log_probs_phrase
|
54 |
-
|
55 |
-
moyenne_log_probs = log_probs_totales / total_tokens
|
56 |
-
perplexite = math.exp(-moyenne_log_probs)
|
57 |
-
|
58 |
-
return perplexite
|
59 |
-
|
60 |
-
|
61 |
-
perplexite = calculer_perplexite(phrases_test)
|
62 |
-
print(f"La perplexité du modèle est : {perplexite}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
from fastapi import FastAPI
|
2 |
-
from app.api import chat
|
3 |
|
4 |
from fastapi.middleware.cors import CORSMiddleware
|
5 |
|
@@ -19,8 +19,6 @@ origins = [
|
|
19 |
app = FastAPI()
|
20 |
|
21 |
app.include_router(chat.router, prefix="/api/chat", tags=["chat"])
|
22 |
-
app.include_router(evaluation.router, prefix="/api/evaluation", tags=["evaluation"])
|
23 |
-
|
24 |
|
25 |
app.add_middleware(
|
26 |
CORSMiddleware,
|
|
|
1 |
from fastapi import FastAPI
|
2 |
+
from app.api import chat
|
3 |
|
4 |
from fastapi.middleware.cors import CORSMiddleware
|
5 |
|
|
|
19 |
app = FastAPI()
|
20 |
|
21 |
app.include_router(chat.router, prefix="/api/chat", tags=["chat"])
|
|
|
|
|
22 |
|
23 |
app.add_middleware(
|
24 |
CORSMiddleware,
|