Ilyas KHIAT commited on
Commit
4c0c6d3
·
1 Parent(s): a336311
Files changed (4) hide show
  1. chunks_ia_signature.pkl +3 -0
  2. main.py +18 -104
  3. prompt.py +28 -9
  4. rag.py +95 -11
chunks_ia_signature.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:240d159d8dedc430a3b6049a60f0805fa423cf9abece82b36c4fb650c8c5d437
3
+ size 145837
main.py CHANGED
@@ -21,29 +21,6 @@ from email.mime.text import MIMEText
21
 
22
  load_dotenv()
23
 
24
- ## setup pinecone index
25
- pinecone_api_key = os.environ.get("PINECONE_API_KEY")
26
-
27
- pc = Pinecone(api_key=pinecone_api_key)
28
-
29
- index_name = os.environ.get("INDEX_NAME") # change if desired
30
-
31
- existing_indexes = [index_info["name"] for index_info in pc.list_indexes()]
32
-
33
- if index_name not in existing_indexes:
34
- pc.create_index(
35
- name=index_name,
36
- dimension=1536,
37
- metric="cosine",
38
- spec=ServerlessSpec(cloud="aws", region="us-east-1"),
39
- )
40
- while not pc.describe_index(index_name).status["ready"]:
41
- time.sleep(1)
42
-
43
- index = pc.Index(index_name)
44
-
45
- vector_store = PineconeVectorStore(index=index, embedding=embedding)
46
-
47
  ## setup authorization
48
  api_keys = [os.environ.get("FASTAPI_API_KEY")]
49
 
@@ -67,104 +44,41 @@ else:
67
  app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"])
68
 
69
  # Pydantic model for the form data
70
- class ContactForm(BaseModel):
71
- name: str
72
- email: EmailStr
73
- message: str
74
-
75
- def send_simple_message(to,subject,text):
76
- api_key = os.getenv("MAILGUN_API_KEY")
77
-
78
- return requests.post(
79
- "https://api.mailgun.net/v3/sandboxafc6970ffdab40ee9566a4e180b117fd.mailgun.org/messages",
80
- auth=("api", api_key),
81
- data={"from": "Excited User <mailgun@sandboxafc6970ffdab40ee9566a4e180b117fd.mailgun.org>",
82
- "to": [to],
83
- "subject": subject,
84
- "text": text})
85
-
86
- # Function to send email
87
- def send_email(form_data: ContactForm):
88
- # sender_email = os.getenv("SENDER_EMAIL")
89
- # sender_password = os.getenv("SENDER_PASSWORD")
90
-
91
- receiver_email = os.getenv("RECEIVER_EMAIL") # Your email
92
-
93
- # Setup the message content
94
- text = f"Name: {form_data.name}\nEmail: {form_data.email}\nMessage: {form_data.message}"
95
- title = "New message from your website!"
96
-
97
- # Send the email
98
- try:
99
- send_simple_message(receiver_email,title,text)
100
- except Exception as e:
101
- print(e)
102
- return {"message": "Failed to send email."}
103
-
104
- # Endpoint to handle form submission
105
- @app.post("/send_email")
106
- async def send_contact_form(form_data: ContactForm, background_tasks: BackgroundTasks):
107
- background_tasks.add_task(send_email, form_data)
108
- return {"message": "Email sent successfully!"}
109
 
110
  class UserInput(BaseModel):
111
  query: str
112
  stream: Optional[bool] = False
113
  messages: Optional[list[dict]] = []
114
 
115
- class ChunkToDB(BaseModel):
116
- message: str
117
- title: str
118
 
119
-
120
- @app.post("/add_chunk_to_db")
121
- async def add_chunk_to_db(chunk: ChunkToDB):
122
  try:
123
- title = chunk.title
124
- message = chunk.message
125
- return get_vectorstore(text_chunk=message,index=index,title=title)
126
  except Exception as e:
127
- return {"message": str(e)}
128
 
129
-
130
- @app.get("/list_vectors")
131
- async def list_vectors():
132
  try:
133
- return index.list()
 
134
  except Exception as e:
135
- return {"message": str(e)}
136
 
137
-
138
  @app.post("/generate")
139
  async def generate(user_input: UserInput):
140
  try:
141
  print(user_input.stream,user_input.query)
142
  if user_input.stream:
143
- return StreamingResponse(generate_stream(user_input.query,user_input.messages,index_name=index,stream=True,vector_store=vector_store),media_type="application/json")
144
  else:
145
- return generate_stream(user_input.query,user_input.messages,index_name=index,stream=False,vector_store=vector_store)
146
- except Exception as e:
147
- return {"message": str(e)}
148
-
149
- @app.post("/retreive_context")
150
- async def retreive_context_response(query: str):
151
- try:
152
- return retreive_context(index=index,query=query)
153
  except Exception as e:
154
- return {"message": str(e)}
155
-
156
-
157
- @app.delete("/delete_vector")
158
- async def delete_vector(filename_id: str):
159
- try:
160
- return index.delete(ids=[filename_id])
161
- except Exception as e:
162
- return {"message": str(e)}
163
-
164
- @app.get("/check_server")
165
- async def check_server():
166
- return {"message":"Server is running"}
167
-
168
- @app.get("/")
169
- async def read_root():
170
- return {"message":"Welcome to the AI API"}
 
21
 
22
  load_dotenv()
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  ## setup authorization
25
  api_keys = [os.environ.get("FASTAPI_API_KEY")]
26
 
 
44
  app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"])
45
 
46
  # Pydantic model for the form data
47
+ class verify_response_model(BaseModel):
48
+ response: str = Field(description="The response from the user to the question")
49
+ answers: list[str] = Field(description="The possible answers to the question to test if the user read the entire book")
50
+ question: str = Field(description="The question asked to the user to test if they read the entire book")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
  class UserInput(BaseModel):
53
  query: str
54
  stream: Optional[bool] = False
55
  messages: Optional[list[dict]] = []
56
 
57
+ #endpoinds
 
 
58
 
59
+ @app.post("/generate_sphinx")
60
+ async def generate_sphinx():
 
61
  try:
62
+ sphinx : sphinx_output = generate_sphinx_response()
63
+ return {"question": sphinx.question, "answers": sphinx.answers}
 
64
  except Exception as e:
65
+ raise HTTPException(status_code=500, detail=str(e))
66
 
67
+ @app.post("/verify_sphinx")
68
+ async def verify_sphinx(response: verify_response_model):
 
69
  try:
70
+ score : bool = verify_response(response.response, response.answers, response.question)
71
+ return {"score": score}
72
  except Exception as e:
73
+ raise HTTPException(status_code=500, detail=str(e))
74
 
 
75
  @app.post("/generate")
76
  async def generate(user_input: UserInput):
77
  try:
78
  print(user_input.stream,user_input.query)
79
  if user_input.stream:
80
+ return StreamingResponse(generate_stream(user_input.query,user_input.messages,stream=True),media_type="application/json")
81
  else:
82
+ return generate_stream(user_input.query,user_input.messages,stream=False)
 
 
 
 
 
 
 
83
  except Exception as e:
84
+ return {"message": str(e)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
prompt.py CHANGED
@@ -1,5 +1,5 @@
1
  template_sphinx = '''
2
- Voici un résumé et un bout du récit de {writer}, l'auteur de {book_name}. Vous êtes le Grand Sphinx, maître des énigmes et des questions.
3
  Vous devez tester si quelqu'un a lu le récit en lui posant une question qui lui ouvrira la porte vers la réalité de ce récit.
4
  Votre question doit être en français, et vous devez l'associer aux réponses possibles.
5
 
@@ -14,13 +14,32 @@ La sortie doit être une question en français, qui teste la compréhension du r
14
 
15
  '''
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  template = '''
18
- You are an AI assistant for Ilyas Khiat, a future engineer with a major in AI, and software engineering. Your job is to respond to visistors in the most human way . Always provide links if necessary (e.g., LinkedIn: https://www.linkedin.com/in/ilyas-khiat-148a73254/ ) Ensure your tone is pleaseant, and respond precisely to the user's query. if the context is not pertinent or you don't have enough information, **DON'T HALLUCINATE**.
19
- The context retreived from the user is:
20
- {context}
21
- {history}
22
- The user's query is:
23
- {query}
24
-
25
- Please respond to the user's query in a consis way and well formatted markdown with paragraphs and emojis. If the question is about my values , highlights Ilyas' technical expertise **without exageration**, projects and their **links**, and how he adds value to potential employers, plus soft skills. Add life to your answer and emphasize keywords with bold, MAKE IT **SHORT** in no more than **150 WORDS** or 200 tokens. Ensure your tone is pleasant, engaging, and matches the language of the user's query and your response is not bluffing and exaggerating but honest and convincing.
 
 
 
 
 
 
26
  '''
 
1
  template_sphinx = '''
2
+ Voici un résumé et un bout du récit de {book_name}. Vous êtes le Grand Sphinx, maître des énigmes et des questions.
3
  Vous devez tester si quelqu'un a lu le récit en lui posant une question qui lui ouvrira la porte vers la réalité de ce récit.
4
  Votre question doit être en français, et vous devez l'associer aux réponses possibles.
5
 
 
14
 
15
  '''
16
 
17
+ template_verify = '''
18
+ Vous êtes un expert en correction et comparaison de réponses. Retournez une note sur 10 sur la cohérence de la réponse de l'utilisateur avec la réponse correcte. Voici les détails :
19
+
20
+ Question : {initial_question}
21
+
22
+ Réponses correctes : {answers}
23
+
24
+ Réponse de l'utilisateur : {response}
25
+
26
+ Évaluez la réponse de l'utilisateur et attribuez une note sur 10 en fonction de sa cohérence avec la réponse correcte.
27
+
28
+ '''
29
+
30
  template = '''
31
+ Vous êtes un assistant IA très intelligent qui connaît tout sur le livre {name_book} de {writer}.
32
+ Vous allez répondre à la question de l'utilisateur, qui portera sur ce livre. Vous répondrez rigoureusement dans le style d'écriture de la nouvelle.
33
+
34
+ **Graphe de connaissances du livre :** {kg}
35
+
36
+ **Contexte récupéré (si pertinent pour votre réponse) :** {context}
37
+
38
+ **Question de l'utilisateur :** {query}
39
+
40
+ **Sortie attendue :** Votre réponse doit être bien formatée, plaisante à lire et inclure des émojis.
41
+ '''
42
+
43
+ summary_text = '''
44
+ Ce récit d'anticipation, se déroulant principalement en 2038, explore les tensions entre l'art, la technologie et les limites planétaires à travers une exposition visionnaire des œuvres de René Magritte. Anne-Hélène, nouvelle directrice des Musées Royaux des Beaux-Arts de Belgique, organise cette exposition avec plusieurs personnages clés comme Tristan, un guide discret mais observateur, Karla Madrigale, PDG de la start-up IA SIGNATURE, et Jad Wahid, un artiste engagé. L'exposition vise à confronter les œuvres de Magritte aux limites planétaires pour inspirer de nouvelles façons de penser la place du vivant et de l'art dans un futur dominé par l'IA. Cependant, un incident dramatique se produit lorsque Karla, en interaction avec une installation immersive, tombe dans le coma à la suite d'une réaction toxique des plantes. Les événements soulèvent des questions sur l'éthique, la responsabilité et les limites de l'IA et des technologies immersives. Le récit invite à réfléchir sur le potentiel et les risques de l'IA dans l'art et la société.
45
  '''
rag.py CHANGED
@@ -8,12 +8,19 @@ from langchain_core.output_parsers import StrOutputParser
8
  from langchain_core.prompts import PromptTemplate
9
  from uuid import uuid4
10
  from prompt import *
 
 
 
 
 
 
11
 
12
  from pydantic import BaseModel, Field
13
  from dotenv import load_dotenv
14
  import os
15
 
16
  from langchain_core.tools import tool
 
17
 
18
  import unicodedata
19
 
@@ -25,38 +32,115 @@ embedding_model = "text-embedding-3-small"
25
  embedding = OpenAIEmbeddings(model=embedding_model)
26
  # vector_store = PineconeVectorStore(index=index_name, embedding=embedding)
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  class sphinx_output(BaseModel):
29
  question: str = Field(description="The question to ask the user to test if they read the entire book")
30
  answers: list[str] = Field(description="The possible answers to the question to test if the user read the entire book")
31
 
32
- llm = ChatOpenAI(model="gpt-4o-mini", max_tokens=300, temperature=0.5)
 
 
 
 
 
 
 
 
 
 
33
 
34
 
35
- def get_random_chunk(chunks: list[str]) -> str:
36
- return chunks[tool.random_int(0, len(chunks) - 1)]
37
 
38
- def get_vectorstore(chunks: list[str]) -> FAISS:
39
- vector_store = FAISS(index=index_name, embedding=embedding)
40
- for chunk in chunks:
41
- document = Document(text=chunk, id=str(uuid4()))
42
- vector_store.index(document)
 
 
 
 
 
 
 
43
  return vector_store
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
  def generate_stream(query:str,messages = [], model = "gpt-4o-mini", max_tokens = 300, temperature = 0.5,index_name="",stream=True,vector_store=None):
46
  try:
47
  print("init chat")
48
  print("init template")
49
  prompt = PromptTemplate.from_template(template)
 
 
 
 
 
50
  print("retreiving context")
51
- context = retreive_context(query=query,index=index_name,vector_store=vector_store)
52
  print(f"Context: {context}")
53
  llm_chain = prompt | llm | StrOutputParser()
54
 
55
  print("streaming")
56
  if stream:
57
- return llm_chain.stream({"context":context,"history":messages,"query":query})
58
  else:
59
- return llm.invoke(query)
60
 
61
  except Exception as e:
62
  print(e)
 
8
  from langchain_core.prompts import PromptTemplate
9
  from uuid import uuid4
10
  from prompt import *
11
+ import random
12
+ from itext2kg.models import KnowledgeGraph
13
+
14
+
15
+ import faiss
16
+ from langchain_community.docstore.in_memory import InMemoryDocstore
17
 
18
  from pydantic import BaseModel, Field
19
  from dotenv import load_dotenv
20
  import os
21
 
22
  from langchain_core.tools import tool
23
+ import pickle
24
 
25
  import unicodedata
26
 
 
32
  embedding = OpenAIEmbeddings(model=embedding_model)
33
  # vector_store = PineconeVectorStore(index=index_name, embedding=embedding)
34
 
35
+ def advanced_graph_to_json(graph:KnowledgeGraph):
36
+ nodes = []
37
+ edges = []
38
+ for node in graph.entities:
39
+ node_id = node.name.replace(" ", "_")
40
+ label = node.name
41
+ type = node.label
42
+ nodes.append({"id": node_id, "label": label, "type": type})
43
+ for relationship in graph.relationships:
44
+ source = relationship.startEntity
45
+ source_id = source.name.replace(" ", "_")
46
+ target = relationship.endEntity
47
+ target_id = target.name.replace(" ", "_")
48
+ label = relationship.name
49
+ edges.append({"source": source_id, "label": label, "cible": target_id})
50
+ return {"noeuds": nodes, "relations": edges}
51
+
52
+ with open("kg_ia_signature.pkl", "rb") as file:
53
+ loaded_graph = pickle.load(file)
54
+ graph = advanced_graph_to_json(loaded_graph)
55
+ print("Graph loaded")
56
+
57
+ with open("chunks_ia_signature.pkl", "rb") as file:
58
+ chunks = pickle.load(file)
59
+ print("Chunks loaded")
60
+
61
  class sphinx_output(BaseModel):
62
  question: str = Field(description="The question to ask the user to test if they read the entire book")
63
  answers: list[str] = Field(description="The possible answers to the question to test if the user read the entire book")
64
 
65
+ class verify_response_model(BaseModel):
66
+ response: str = Field(description="The response from the user to the question")
67
+ answers: list[str] = Field(description="The possible answers to the question to test if the user read the entire book")
68
+ initial_question: str = Field(description="The question asked to the user to test if they read the entire book")
69
+
70
+ class verification_score(BaseModel):
71
+ score: float = Field(description="The score of the user's response from 0 to 10 to the question")
72
+
73
+
74
+
75
+ llm = ChatOpenAI(model="gpt-4o", max_tokens=300, temperature=0.5)
76
 
77
 
78
+ def get_random_chunk() -> str:
79
+ return chunks[random.randint(0, len(chunks) - 1)]
80
 
81
+
82
+ def get_vectorstore() -> FAISS:
83
+ index = faiss.IndexFlatL2(len(embedding.embed_query("hello world")))
84
+ vector_store = FAISS(
85
+ embedding_function=embedding,
86
+ index=index,
87
+ docstore=InMemoryDocstore(),
88
+ index_to_docstore_id={},
89
+ )
90
+ documents = [Document(page_content=chunk) for chunk in chunks]
91
+ uuids = [str(uuid4()) for _ in range(len(documents))]
92
+ vector_store.add_documents(documents=documents, ids=uuids)
93
  return vector_store
94
+
95
+ vectore_store = get_vectorstore()
96
+
97
+
98
+ def generate_sphinx_response() -> sphinx_output:
99
+ writer = "Laurent Tripied"
100
+ book_name = "Limites de l'imaginaire ou limites planétaires"
101
+ summary = summary_text
102
+ excerpt = get_random_chunk()
103
+ prompt = PromptTemplate.from_template(template_sphinx)
104
+ structured_llm = llm.with_structured_output(sphinx_output)
105
+ # Create an LLM chain with the prompt and the LLM
106
+ llm_chain = prompt | structured_llm
107
+
108
+ return llm_chain.invoke({"writer":writer,"book_name":book_name,"summary":summary,"excerpt":excerpt})
109
+
110
+ def verify_response(response:str,answers:list[str],question:str) -> bool:
111
+ prompt = PromptTemplate.from_template(template_verify)
112
+ structured_llm = llm.with_structured_output(verification_score)
113
+ llm_chain = prompt | structured_llm
114
+ score = llm_chain.invoke({"response":response,"answers":answers,"initial_question":question})
115
+ if score.score >= 0:
116
+ return True
117
+
118
+
119
+ def retrieve_context_from_vectorestore(query:str) -> str:
120
+ retriever = vectore_store.as_retriever(search_type="mmr", search_kwargs={"k": 3})
121
+ return retriever.invoke(query)
122
+
123
 
124
  def generate_stream(query:str,messages = [], model = "gpt-4o-mini", max_tokens = 300, temperature = 0.5,index_name="",stream=True,vector_store=None):
125
  try:
126
  print("init chat")
127
  print("init template")
128
  prompt = PromptTemplate.from_template(template)
129
+
130
+ writer = "Laurent Tripied"
131
+ name_book = "Limites de l'imaginaire ou limites planétaires"
132
+ name_icon = "Magritte"
133
+ kg = loaded_graph
134
  print("retreiving context")
135
+ context = retrieve_context_from_vectorestore(query)
136
  print(f"Context: {context}")
137
  llm_chain = prompt | llm | StrOutputParser()
138
 
139
  print("streaming")
140
  if stream:
141
+ return llm_chain.stream({"name_book":name_book,"writer":writer,"name_icon":name_icon,"kg":graph,"context":context,"query":query})
142
  else:
143
+ return llm_chain.invoke({"name_book":name_book,"writer":writer,"name_icon":name_icon,"kg":graph,"context":context,"query":query})
144
 
145
  except Exception as e:
146
  print(e)