Samiraxio commited on
Commit
35fb63f
1 Parent(s): e7e3a28

Upload folder using huggingface_hub

Browse files
.gitignore CHANGED
@@ -9,9 +9,15 @@ setAPIKEY.sh
9
  .AppleDouble
10
  .LSOverride
11
 
 
 
 
12
  # Icon must end with two \r
13
  Icon
14
 
 
 
 
15
 
16
  # Thumbnails
17
  ._*
 
9
  .AppleDouble
10
  .LSOverride
11
 
12
+ # Historique conversasion with chatbot
13
+ *.json
14
+
15
  # Icon must end with two \r
16
  Icon
17
 
18
+ # files for RAG
19
+ sources/*
20
+ categories.csv
21
 
22
  # Thumbnails
23
  ._*
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Clara
3
  app_file: app.py
4
  sdk: gradio
5
  sdk_version: 4.19.1
 
1
  ---
2
+ title: clara
3
  app_file: app.py
4
  sdk: gradio
5
  sdk_version: 4.19.1
app.py CHANGED
@@ -1,9 +1,7 @@
1
- #from climateqa.engine.vectorstore import get_pinecone_vectorstore,
2
- from climateqa.engine.vectorstore import build_vectores_stores
3
- from climateqa.engine.embeddings import get_embeddings_function
4
- from climateqa.engine.rag import make_rag_papers_chain
5
- from climateqa.engine.keywords import make_keywords_chain
6
- from climateqa.sample_questions import QUESTIONS
7
  from climateqa.engine.text_retriever import ClimateQARetriever
8
  from climateqa.engine.rag import make_rag_chain
9
  from climateqa.engine.llm import get_llm
@@ -12,11 +10,9 @@ from datetime import datetime
12
  import json
13
  import re
14
  import gradio as gr
15
- from climateqa.papers.openalex import OpenAlex
16
  from sentence_transformers import CrossEncoder
17
 
18
  reranker = CrossEncoder("mixedbread-ai/mxbai-rerank-xsmall-v1")
19
- oa = OpenAlex()
20
 
21
  # Load environment variables in local mode
22
  try:
@@ -26,9 +22,9 @@ except Exception as e:
26
  pass
27
 
28
  # Set up Gradio Theme
29
- theme = gr.themes.Base(
30
- primary_hue="blue",
31
- secondary_hue="red",
32
  font=[gr.themes.GoogleFont("Poppins"), "ui-sans-serif",
33
  "system-ui", "sans-serif"],
34
  )
@@ -43,6 +39,8 @@ system_template = {
43
 
44
  user_id = create_user_id()
45
 
 
 
46
 
47
  def parse_output_llm_with_sources(output):
48
  # Split the content into a list of text and "[Doc X]" references
@@ -74,21 +72,31 @@ def serialize_docs(docs):
74
 
75
 
76
  # Create vectorstore and retriever
77
- embeddings_function = get_embeddings_function()
78
-
79
- #vectorstore = get_pinecone_vectorstore(embeddings_function)
80
  vectorstore = build_vectores_stores("./sources")
81
  llm = get_llm(provider="openai", max_tokens=1024, temperature=0.0)
82
 
83
 
84
- async def chat(query, history):
85
  """taking a query and a message history, use a pipeline (reformulation, retriever, answering) to yield a tuple of:
86
  (messages in gradio format, messages in langchain format, source documents)"""
87
 
88
- print(f">> NEW QUESTION : {query}")
 
 
 
 
 
 
 
 
 
 
 
89
 
90
  retriever = ClimateQARetriever(
91
- vectorstore=vectorstore, sources=["Custom"], reports=[])
 
 
92
  rag_chain = make_rag_chain(retriever, llm)
93
 
94
  inputs = {"query": query, "audience": None}
@@ -167,7 +175,7 @@ async def chat(query, history):
167
  "answer": history[-1][1],
168
  "time": timestamp,
169
  }
170
- log_locally(log_file, logs)
171
 
172
  yield history, docs_html, output_query, output_language, gallery, output_query, output_keywords
173
 
@@ -185,25 +193,24 @@ def make_html_source(source, i):
185
  <div class="card-content">
186
  <div>
187
  <div style="float:right;width 10%;position:relative;top:0px">
188
- <a href='{meta['ax_url']}'><img style="width:20px" src='/file/assets/download.png' /></a>
189
  </div>
190
  <div>
191
- <h2>Extrait {i}</h2>
192
  <h2> {meta['ax_name']} - Page {int(meta['ax_page'])}</h2>
193
  </div>
194
  </div>
195
  <p>{text_content}</p>
196
 
197
  </div>
198
- <div class="card-footer">
199
  <span>{name}</span>
200
- </div>
201
  </div>
202
  """
203
 
204
  return card
205
 
206
-
207
  def log_locally(file, logs):
208
  # Convertit les logs en format JSON
209
  logs_json = json.dumps(logs)
@@ -213,84 +220,10 @@ def log_locally(file, logs):
213
  f.write(logs_json)
214
 
215
 
216
- def generate_keywords(query):
217
- chain = make_keywords_chain(llm)
218
- keywords = chain.invoke(query)
219
- keywords = " AND ".join(keywords["keywords"])
220
- return keywords
221
-
222
-
223
- papers_cols_widths = {
224
- "doc": 50,
225
- "id": 100,
226
- "title": 300,
227
- "doi": 100,
228
- "publication_year": 100,
229
- "abstract": 500,
230
- "rerank_score": 100,
231
- "is_oa": 50,
232
- }
233
-
234
- papers_cols = list(papers_cols_widths.keys())
235
- papers_cols_widths = list(papers_cols_widths.values())
236
-
237
-
238
- async def find_papers(query, keywords, after):
239
-
240
- summary = ""
241
-
242
- df_works = oa.search(keywords, after=after)
243
- df_works = df_works.dropna(subset=["abstract"])
244
- df_works = oa.rerank(query, df_works, reranker)
245
- df_works = df_works.sort_values("rerank_score", ascending=False)
246
- G = oa.make_network(df_works)
247
-
248
- height = "750px"
249
- network = oa.show_network(
250
- G, color_by="rerank_score", notebook=False, height=height)
251
- network_html = network.generate_html()
252
-
253
- network_html = network_html.replace("'", "\"")
254
- css_to_inject = "<style>#mynetwork { border: none !important; } .card { border: none !important; }</style>"
255
- network_html = network_html + css_to_inject
256
-
257
- network_html = f"""<iframe style="width: 100%; height: {height};margin:0 auto" name="result" allow="midi; geolocation; microphone; camera;
258
- display-capture; encrypted-media;" sandbox="allow-modals allow-forms
259
- allow-scripts allow-same-origin allow-popups
260
- allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
261
- allowpaymentrequest="" frameborder="0" srcdoc='{network_html}'></iframe>"""
262
-
263
- docs = df_works["content"].head(15).tolist()
264
-
265
- df_works = df_works.reset_index(
266
- drop=True).reset_index().rename(columns={"index": "doc"})
267
- df_works["doc"] = df_works["doc"] + 1
268
- df_works = df_works[papers_cols]
269
-
270
- yield df_works, network_html, summary
271
-
272
- chain = make_rag_papers_chain(llm)
273
- result = chain.astream_log(
274
- {"question": query, "docs": docs, "language": "English"})
275
- path_answer = "/logs/StrOutputParser/streamed_output/-"
276
-
277
- async for op in result:
278
-
279
- op = op.ops[0]
280
-
281
- if op['path'] == path_answer: # reforulated question
282
- new_token = op['value'] # str
283
- summary += new_token
284
- else:
285
- continue
286
- yield df_works, network_html, summary
287
-
288
-
289
  # --------------------------------------------------------------------
290
  # Gradio
291
  # --------------------------------------------------------------------
292
 
293
-
294
  init_prompt = """
295
  Hello, I am Clara, an AI Assistant created by Axionable. My purpose is to answer your questions using the provided extracted passages, context, and guidelines.
296
 
@@ -306,8 +239,13 @@ What would you like to know today?
306
  """
307
 
308
 
309
- with gr.Blocks(title="CLARA", css="style.css", theme=theme, elem_id="main-component") as demo:
 
 
 
 
310
 
 
311
  with gr.Tab("CLARA"):
312
 
313
  with gr.Row(elem_id="chatbot-row"):
@@ -319,59 +257,62 @@ with gr.Blocks(title="CLARA", css="style.css", theme=theme, elem_id="main-compon
319
 
320
  with gr.Row(elem_id="input-message"):
321
  textbox = gr.Textbox(placeholder="Posez votre question", show_label=False,
322
- scale=7, lines=1, interactive=True, elem_id="input-textbox")
 
323
 
324
  with gr.Column(scale=1, variant="panel", elem_id="right-panel"):
325
 
326
- with gr.Tabs() as tabs:
 
 
327
 
328
- with gr.Tab("Sources", elem_id="tab-citations", id=1):
329
- sources_textbox = gr.HTML(
330
- show_label=False, elem_id="sources-textbox")
331
- docs_textbox = gr.State("")
332
 
333
- # ---------------------------------------------------------------------------------------
334
- # OTHER TABS
335
- # ---------------------------------------------------------------------------------------
336
 
337
- with gr.Tab("Figures", elem_id="tab-images", elem_classes="max-height other-tabs"):
338
- gallery_component = gr.Gallery()
339
 
340
- with gr.Tab("Papers (beta)", elem_id="tab-papers", elem_classes="max-height other-tabs"):
341
 
342
- with gr.Row():
343
- with gr.Column(scale=1):
344
- query_papers = gr.Textbox(
345
- placeholder="Question", show_label=False, lines=1, interactive=True, elem_id="query-papers")
346
- keywords_papers = gr.Textbox(
347
- placeholder="Keywords", show_label=False, lines=1, interactive=True, elem_id="keywords-papers")
348
- after = gr.Slider(minimum=1950, maximum=2023, step=1, value=1960,
349
- label="Publication date", show_label=True, interactive=True, elem_id="date-papers")
350
- search_papers = gr.Button(
351
- "Search", elem_id="search-papers", interactive=True)
352
 
353
- with gr.Column(scale=7):
 
 
 
354
 
355
- with gr.Tab("Summary", elem_id="papers-summary-tab"):
356
- papers_summary = gr.Markdown(
357
- visible=True, elem_id="papers-summary")
358
 
359
- with gr.Tab("Relevant papers", elem_id="papers-results-tab"):
360
- papers_dataframe = gr.Dataframe(
361
- visible=True, elem_id="papers-table", headers=papers_cols)
362
 
363
- with gr.Tab("Citations network", elem_id="papers-network-tab"):
364
- citations_network = gr.HTML(
365
- visible=True, elem_id="papers-citations-network")
366
 
 
367
  with gr.Tab("À propos", elem_classes="max-height other-tabs"):
368
  with gr.Row():
369
  with gr.Column(scale=1):
370
  gr.Markdown(
371
- "CLARA (Climate LLM for Adaptation & Risks Answers) by [Axionable](https://www.axionable.com/)"
372
- "– Fork de [ClimateQ&A](https://huggingface.co/spaces/Ekimetrics/climate-question-answering/tree/main)")
 
 
 
 
 
 
 
 
 
 
373
 
374
  def start_chat(query, history):
 
375
  history = history + [(query, None)]
376
  history = [tuple(x) for x in history]
377
  return (gr.update(interactive=False), gr.update(selected=1), history)
@@ -381,26 +322,15 @@ with gr.Blocks(title="CLARA", css="style.css", theme=theme, elem_id="main-compon
381
 
382
  (textbox
383
  .submit(start_chat, [textbox, chatbot], [textbox, tabs, chatbot], queue=False, api_name="start_chat_textbox")
384
- .then(chat, [textbox, chatbot], [chatbot, sources_textbox], concurrency_limit=8, api_name="chat_textbox")
385
  .then(finish_chat, None, [textbox], api_name="finish_chat_textbox")
386
  )
 
387
 
388
 
389
-
390
- def change_sample_questions(key):
391
- index = list(QUESTIONS.keys()).index(key)
392
- visible_bools = [False] * len(samples)
393
- visible_bools[index] = True
394
- return [gr.update(visible=visible_bools[i]) for i in range(len(samples))]
395
-
396
- # dropdown_samples.change(change_sample_questions,dropdown_samples,samples)
397
-
398
- query_papers.submit(generate_keywords, [query_papers], [keywords_papers])
399
- search_papers.click(find_papers, [query_papers, keywords_papers, after], [
400
- papers_dataframe, citations_network, papers_summary])
401
-
402
  demo.queue()
403
 
 
404
  demo.launch(allowed_paths=["assets/download.png",
405
- "assets/logo4.png"],
406
- favicon_path="assets/logo4.png")
 
1
+
2
+
3
+ # , get_pinecone_vectorstore, find_similar_vectors
4
+ from climateqa.engine.vectorstore import build_vectores_stores, get_PDF_Names_from_GCP, get_categories_files
 
 
5
  from climateqa.engine.text_retriever import ClimateQARetriever
6
  from climateqa.engine.rag import make_rag_chain
7
  from climateqa.engine.llm import get_llm
 
10
  import json
11
  import re
12
  import gradio as gr
 
13
  from sentence_transformers import CrossEncoder
14
 
15
  reranker = CrossEncoder("mixedbread-ai/mxbai-rerank-xsmall-v1")
 
16
 
17
  # Load environment variables in local mode
18
  try:
 
22
  pass
23
 
24
  # Set up Gradio Theme
25
+ theme = gr.themes.Soft(
26
+ primary_hue="yellow",
27
+ secondary_hue="orange",
28
  font=[gr.themes.GoogleFont("Poppins"), "ui-sans-serif",
29
  "system-ui", "sans-serif"],
30
  )
 
39
 
40
  user_id = create_user_id()
41
 
42
+ list_categorie = get_categories_files()
43
+ categories=list_categorie["AllCat"]
44
 
45
  def parse_output_llm_with_sources(output):
46
  # Split the content into a list of text and "[Doc X]" references
 
72
 
73
 
74
  # Create vectorstore and retriever
 
 
 
75
  vectorstore = build_vectores_stores("./sources")
76
  llm = get_llm(provider="openai", max_tokens=1024, temperature=0.0)
77
 
78
 
79
+ async def chat(query, history, categories, src_nb_max, src_pertinence):
80
  """taking a query and a message history, use a pipeline (reformulation, retriever, answering) to yield a tuple of:
81
  (messages in gradio format, messages in langchain format, source documents)"""
82
 
83
+ print(f">> NEW QUESTION : {query} -> sources max:{src_nb_max} - pertience: {src_pertinence}")
84
+
85
+ filter = None
86
+ if len(categories):
87
+ filter={ "$or" : [] }
88
+ for cat in categories:
89
+ for fich in list_categorie[cat]:
90
+ filter["$or"].append({"ax_name": fich})
91
+
92
+ print( ">> Filter :" + str(filter) )
93
+ print( ">> nb sources :" + str(src_nb_max) )
94
+ print( ">> pertinence :" + str(src_pertinence) )
95
 
96
  retriever = ClimateQARetriever(
97
+ vectorstore=vectorstore, sources=["Custom"], reports=[],
98
+ threshold=src_pertinence, k_total=src_nb_max, filter=filter
99
+ )
100
  rag_chain = make_rag_chain(retriever, llm)
101
 
102
  inputs = {"query": query, "audience": None}
 
175
  "answer": history[-1][1],
176
  "time": timestamp,
177
  }
178
+ #log_locally(log_file, logs)
179
 
180
  yield history, docs_html, output_query, output_language, gallery, output_query, output_keywords
181
 
 
193
  <div class="card-content">
194
  <div>
195
  <div style="float:right;width 10%;position:relative;top:0px">
196
+ <a href='{meta['ax_url']}' target='_blank'><img style="width:20px" src='/file/assets/download.png' /></a>
197
  </div>
198
  <div>
199
+ <h2>Extrait {i} (Score:{float(meta['similarity_score'])})</h2>
200
  <h2> {meta['ax_name']} - Page {int(meta['ax_page'])}</h2>
201
  </div>
202
  </div>
203
  <p>{text_content}</p>
204
 
205
  </div>
206
+ <!-- <div class="card-footer">
207
  <span>{name}</span>
208
+ </div> -->
209
  </div>
210
  """
211
 
212
  return card
213
 
 
214
  def log_locally(file, logs):
215
  # Convertit les logs en format JSON
216
  logs_json = json.dumps(logs)
 
220
  f.write(logs_json)
221
 
222
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
  # --------------------------------------------------------------------
224
  # Gradio
225
  # --------------------------------------------------------------------
226
 
 
227
  init_prompt = """
228
  Hello, I am Clara, an AI Assistant created by Axionable. My purpose is to answer your questions using the provided extracted passages, context, and guidelines.
229
 
 
239
  """
240
 
241
 
242
+ with gr.Blocks(title="CLARA", css="style.css", theme=theme, elem_id="main-component", elem_classes="ax_background") as demo:
243
+
244
+ gr.HTML("""
245
+ <img style="width:100px" src="file/assets/axionable.svg"/>
246
+ """, elem_classes="logo-axio ")
247
 
248
+ # TAB Clara
249
  with gr.Tab("CLARA"):
250
 
251
  with gr.Row(elem_id="chatbot-row"):
 
257
 
258
  with gr.Row(elem_id="input-message"):
259
  textbox = gr.Textbox(placeholder="Posez votre question", show_label=False,
260
+ scale=7, lines=1, interactive=True, elem_id="input-textbox")
261
+
262
 
263
  with gr.Column(scale=1, variant="panel", elem_id="right-panel"):
264
 
265
+ # with gr.Column(scale=1, elem_id="tab-citations"):
266
+
267
+ # gr.HTML("<p>Sources</p>")
268
 
269
+ # slider = gr.Slider(1, 10, value=src_nb_max, step=1, label="nb max", interactive=True, elem_id="source-nb-max")
270
+ # slider_p = gr.Slider(0.0, 1.0, value=src_pertinence, step=0.01, label="pertinence", interactive=True, elem_id="source-pertinence")
 
 
271
 
272
+ # sources_textbox = gr.HTML(
273
+ # show_label=False, elem_id="sources-textbox")
274
+ # docs_textbox = gr.State("")
275
 
 
 
276
 
 
277
 
278
+ # l'object tabs est necessaire actuellement
279
+ # J'ai l'impression qu'il est utiliser pour freezre les contenu des tabs
280
+ # pendant que l'ia gènère une reponse ..
281
+ with gr.Tabs() as tabs:
282
+ # None
 
 
 
 
 
283
 
284
+ with gr.Tab("sources"):
285
+ sources_textbox = gr.HTML(
286
+ show_label=False, elem_id="sources-textbox")
287
+ docs_textbox = gr.State("")
288
 
289
+ with gr.Tab("filtres"):
 
 
290
 
291
+ cat_sel = gr.CheckboxGroup(categories,label="Catégories")
 
 
292
 
293
+ slider = gr.Slider(1, 10, value=7, step=1, label="nb max", interactive=True, elem_id="source-nb-max")
294
+ slider_p = gr.Slider(0.0, 1.0, value=0.5, step=0.01, label="pertinence", interactive=True, elem_id="source-pertinence")
 
295
 
296
+ # TAB A propos
297
  with gr.Tab("À propos", elem_classes="max-height other-tabs"):
298
  with gr.Row():
299
  with gr.Column(scale=1):
300
  gr.Markdown(
301
+ ("CLARA (Climate LLM for Adaptation & Risks Answers) by [Axionable](https://www.axionable.com/)"
302
+ "– Fork de [ClimateQ&A](https://huggingface.co/spaces/Ekimetrics/climate-question-answering/tree/main)"), elem_classes="a-propos")
303
+
304
+
305
+ # # TAB Configuration
306
+ # with gr.Tab("Configuration"):
307
+ #
308
+ # with gr.Row(elem_id="config-row"):
309
+ # with gr.Column(scale=1):
310
+ #
311
+ # for pdfName in get_PDF_Names_from_GCP():
312
+ # gr.Markdown( pdfName, elem_classes="a-propos")
313
 
314
  def start_chat(query, history):
315
+
316
  history = history + [(query, None)]
317
  history = [tuple(x) for x in history]
318
  return (gr.update(interactive=False), gr.update(selected=1), history)
 
322
 
323
  (textbox
324
  .submit(start_chat, [textbox, chatbot], [textbox, tabs, chatbot], queue=False, api_name="start_chat_textbox")
325
+ .then(chat, [textbox, chatbot, cat_sel, slider, slider_p], [chatbot, sources_textbox], concurrency_limit=8, api_name="chat_textbox")
326
  .then(finish_chat, None, [textbox], api_name="finish_chat_textbox")
327
  )
328
+
329
 
330
 
 
 
 
 
 
 
 
 
 
 
 
 
 
331
  demo.queue()
332
 
333
+
334
  demo.launch(allowed_paths=["assets/download.png",
335
+ "assets/logo4.png",
336
+ "assets/axionable.svg"],favicon_path="assets/logo4.png")
climateqa/engine/embeddings.py CHANGED
@@ -8,8 +8,12 @@ def get_embeddings_function(version = "v1.2"):
8
 
9
  # https://huggingface.co/BAAI/bge-base-en-v1.5
10
  # Best embedding model at a reasonable size at the moment (2023-11-22)
11
-
12
- model_name = "BAAI/bge-base-en-v1.5"
 
 
 
 
13
  encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity
14
  print("Loading embeddings model: ", model_name)
15
  embeddings_function = HuggingFaceBgeEmbeddings(
 
8
 
9
  # https://huggingface.co/BAAI/bge-base-en-v1.5
10
  # Best embedding model at a reasonable size at the moment (2023-11-22)
11
+ # model_name = "BAAI/bge-base-en-v1.5"
12
+
13
+ # https://huggingface.co/BAAI/bge-m3
14
+ # A better one from 2024-04
15
+ model_name = "BAAI/bge-m3"
16
+
17
  encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity
18
  print("Loading embeddings model: ", model_name)
19
  embeddings_function = HuggingFaceBgeEmbeddings(
climateqa/engine/text_retriever.py CHANGED
@@ -8,10 +8,11 @@ class ClimateQARetriever(BaseRetriever):
8
  vectorstore: VectorStore
9
  sources: list = []
10
  reports:list = []
11
- threshold: float = 0.6
12
  k_summary: int = 3
13
- k_total: int = 10
14
  min_size: int = 200
 
15
 
16
  def _get_relevant_documents(
17
  self, query: str, *, run_manager: CallbackManagerForRetrieverRun
@@ -19,29 +20,25 @@ class ClimateQARetriever(BaseRetriever):
19
 
20
  # Check if all elements in the list are either IPCC or IPBES
21
  assert isinstance(self.sources,list)
22
- assert self.k_total > self.k_summary, "k_total should be greater than k_summary"
23
 
24
  # Prepare base search kwargs
25
  filters = {}
26
 
27
  filters["source"] = { "$in":self.sources}
28
 
29
- # Build with pinecone
30
- docs_summaries = self.vectorstore.similarity_search_with_score(query=query, k=self.k_summary)
31
- docs_summaries = [x for x in docs_summaries if x[1] > self.threshold]
32
 
33
- k_full = self.k_total - len(docs_summaries)
34
- docs_full = self.vectorstore.similarity_search_with_score(query=query,k = k_full)
35
-
36
- # Concatenate documents
37
- docs = docs_summaries + docs_full
38
-
39
- # Add score to metadata
40
  results = []
41
  for i, (doc, score) in enumerate(docs):
 
 
 
42
  doc.metadata["similarity_score"] = score
43
  doc.metadata["content"] = doc.page_content
44
  doc.metadata["chunk_type"] = "text"
45
  doc.metadata["page_number"] = 1
46
  results.append(doc)
47
  return results
 
 
8
  vectorstore: VectorStore
9
  sources: list = []
10
  reports:list = []
11
+ threshold: float = 0.01
12
  k_summary: int = 3
13
+ k_total: int = 7
14
  min_size: int = 200
15
+ filter: dict = None
16
 
17
  def _get_relevant_documents(
18
  self, query: str, *, run_manager: CallbackManagerForRetrieverRun
 
20
 
21
  # Check if all elements in the list are either IPCC or IPBES
22
  assert isinstance(self.sources,list)
23
+ # assert self.k_total > self.k_summary, "k_total should be greater than k_summary"
24
 
25
  # Prepare base search kwargs
26
  filters = {}
27
 
28
  filters["source"] = { "$in":self.sources}
29
 
30
+ docs = self.vectorstore.similarity_search_with_score(query=query,k=self.k_total, filter=self.filter)
 
 
31
 
32
+ # Add score to metadata
 
 
 
 
 
 
33
  results = []
34
  for i, (doc, score) in enumerate(docs):
35
+ # filtre les sources sous le seuil
36
+ if score < self.threshold:
37
+ continue
38
  doc.metadata["similarity_score"] = score
39
  doc.metadata["content"] = doc.page_content
40
  doc.metadata["chunk_type"] = "text"
41
  doc.metadata["page_number"] = 1
42
  results.append(doc)
43
  return results
44
+
climateqa/engine/vectorstore.py CHANGED
@@ -1,74 +1,166 @@
1
- # Pinecone
2
- # More info at https://docs.pinecone.io/docs/langchain
3
- # And https://python.langchain.com/docs/integrations/vectorstores/pinecone
4
- # import os
5
- # from pinecone import Pinecone
6
- # from langchain_community.vectorstores import Pinecone as PineconeVectorstore
7
 
8
- # # LOAD ENVIRONMENT VARIABLES
9
- # try:
10
- # from dotenv import load_dotenv
11
- # load_dotenv()
12
- # except:
13
- # pass
14
 
 
15
 
16
- # def get_pinecone_vectorstore(embeddings,text_key = "content"):
17
-
18
- # pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
19
- # index = pc.Index(os.getenv("PINECONE_API_INDEX"))
20
-
21
- # vectorstore = PineconeVectorstore(
22
- # index, embeddings, text_key,
23
- # )
24
- # return vectorstore
25
-
26
-
27
- from langchain_community.vectorstores import Annoy
28
  from langchain_community.document_loaders import TextLoader
29
  from langchain_text_splitters import CharacterTextSplitter
30
  from climateqa.engine.embeddings import get_embeddings_function
31
  embeddings_function = get_embeddings_function()
 
 
 
 
 
 
 
32
  import os
33
  import pdfplumber
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  def build_vectores_stores(folder_path, pdf_folder="./PDF", vectors_path = "./vectors"):
36
 
37
- if os.path.isfile(vectors_path+"/index.annoy"):
38
- return Annoy.load_local(vectors_path, embeddings_function,allow_dangerous_deserialization=True)
39
-
40
- # Extract text from PDF files
41
- print("Extraction PDF ...")
42
- for pdf_file in os.listdir(pdf_folder):
43
- if pdf_file.startswith("."):
44
- continue
45
- print(" > "+pdf_folder+"/"+pdf_file)
46
- with pdfplumber.open(pdf_folder+"/"+pdf_file) as pdf:
47
- for pdf_page in pdf.pages:
48
- f = open(folder_path+"/"+pdf_file+" page "+str(pdf_page.page_number), "w")
49
- # f.write(pdf_file+" page "+str(pdf_page.page_number))
50
- for char_pdf in pdf_page.chars:
51
- f.write(char_pdf["text"])
52
- f.close()
53
-
54
- docs = []
55
- vector_store_from_docs = () # Créer un nouvel objet Annoy ou utiliser celui déjà initialisé selon votre code existant
56
- for filename in os.listdir(folder_path):
57
- if filename.startswith("."):
58
- continue
59
- file_path = os.path.join(folder_path, filename)
60
- if os.path.isfile(file_path):
61
- loader = TextLoader(file_path)
62
- documents = loader.load()
63
-
64
- for doc in documents:
65
- if (doc.metadata):
66
- doc.metadata["ax_page"] = doc.metadata['source'].split(" ")[-1]
67
- doc.metadata["ax_name"] = doc.metadata['source'].split(" ")[0].split("/")[-1]
68
- doc.metadata["ax_url"] = "https://storage.googleapis.com/docs-axio-clara/sources/"+doc.metadata["ax_name"]
69
-
70
- text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
71
- docs += text_splitter.split_documents(documents)
72
- vector_store_from_docs = Annoy.from_documents(docs, embeddings_function)
73
- vector_store_from_docs.save_local(vectors_path)
74
- return vector_store_from_docs
 
 
 
 
 
 
 
1
 
2
+ from google.cloud import storage
3
+ storage_client = storage.Client()
4
+ #storage_client = storage.Client.create_anonymous_client()
5
+ bucket_name = "docs-axio-clara"
 
 
6
 
7
+ from langchain_pinecone import PineconeVectorStore
8
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  from langchain_community.document_loaders import TextLoader
10
  from langchain_text_splitters import CharacterTextSplitter
11
  from climateqa.engine.embeddings import get_embeddings_function
12
  embeddings_function = get_embeddings_function()
13
+
14
+
15
+
16
+ index_name = "clara-index"
17
+ namespace = "my-namespace"
18
+
19
+
20
  import os
21
  import pdfplumber
22
 
23
+
24
+ def get_categories_files():
25
+
26
+ finale = {}
27
+ listCat = []
28
+
29
+ CAT_DIR="config_categorie/"
30
+ FOLDER_PATH="."
31
+
32
+ bucket = storage_client.get_bucket(bucket_name)
33
+
34
+ blob = bucket.blob(CAT_DIR+"categories.csv")
35
+ lines = blob.download_as_text().split("\n")
36
+
37
+ blob_label = bucket.blob(CAT_DIR+"libelle.csv")
38
+ lines_label = blob_label.download_as_text().split("\n")
39
+
40
+ labels = {}
41
+ # récupération des libelles
42
+ first = True
43
+ for line in lines_label:
44
+ # evite la première ligne
45
+ if first:
46
+ first = False
47
+ continue
48
+ lab = line.split(";")[-1].replace("\n","").replace("\r","").replace("\t","")
49
+ labels[line.split(";")[0]] = lab
50
+ print( "label :"+lab )
51
+
52
+ # premier passage récupération des catégories existantes
53
+ first = True
54
+ for line in lines:
55
+ # evite la première ligne
56
+ if first:
57
+ first = False
58
+ continue
59
+ categories = line.split(";")[-1].split(" ")
60
+
61
+ for cat in categories:
62
+ categ = cat.replace(" ","").replace("\n","").replace("\r","").replace("\t","")
63
+
64
+ # si la categorie n'a pas de label on utilise le champ technique
65
+ try :
66
+ test = labels[categ] # plante si la clé n'exsite pas
67
+ except :
68
+ labels[categ] = categ
69
+
70
+ # on ajoute la catégorie (le label) dans la liste si pas déjà croisée
71
+ if not labels[categ] in listCat:
72
+ print(" - ["+categ+"] > "+ labels[categ] )
73
+ listCat.append(labels[categ])
74
+
75
+ # initialisation de la structure finale
76
+ for cat in listCat:
77
+ finale[cat] = []
78
+ finale["AllCat"] = listCat
79
+
80
+ # deuxième passage association fichier, catégorie
81
+ first = True
82
+ for line in lines:
83
+ # evite la première ligne
84
+ if first:
85
+ first = False
86
+ continue
87
+ fichier = line.split(";")[0]
88
+ categories = line.split(";")[-1].split(" ")
89
+ listCat = []
90
+
91
+ # on place le fichier dans les catégories associées
92
+ for cat in categories:
93
+ categ = cat.replace(" ","").replace("\n","").replace("\r","").replace("\t","")
94
+ print( fichier +" dans "+ labels[categ] +"("+categ+")")
95
+ finale[labels[categ]].append(fichier)
96
+
97
+ return finale
98
+
99
+ def get_PDF_Names_from_GCP():
100
+
101
+ listName = []
102
+ # Récupération des fichier depuis GCP storage
103
+ blobs = storage_client.list_blobs(bucket_name, prefix='sources/')
104
+ for blob in blobs:
105
+ listName.append(blob.name)
106
+ return listName
107
+
108
+ def get_PDF_from_GCP(folder_path, pdf_folder="./PDF"):
109
+
110
+ # Récupération des fichier depuis GCP storage
111
+ #blobs = storage_client.list_blobs(bucket_name, prefix='sources/')
112
+ #for blob in blobs:
113
+
114
+ # print( "\n"+blob.name+":")
115
+ # print( " <- Téléchargement Depuis GCP")
116
+ # blob.download_to_filename(pdf_folder+"/"+blob.name)
117
+
118
+ # Extraction des textes dpuis les fichiers PDF
119
+ print(" >>> Extraction PDF")
120
+ for pdf_file in os.listdir(pdf_folder):
121
+ if pdf_file.startswith("."):
122
+ continue
123
+ print(" > "+pdf_folder+"/"+pdf_file)
124
+ pdf_total_pages = 0
125
+ with pdfplumber.open(pdf_folder+"/"+pdf_file) as pdf:
126
+ pdf_total_pages = len(pdf.pages)
127
+
128
+ # Fuite mémoire pour les gros fichiers
129
+ # Reouvrir le fichier à chaque N page semble rélgler le problème
130
+ N_page = 300
131
+ page_number = 0
132
+ while page_number < pdf_total_pages:
133
+
134
+ print(" -- ouverture du fichier pour "+str(N_page)+ " pages --" )
135
+ with pdfplumber.open(pdf_folder+"/"+pdf_file) as pdf:
136
+
137
+ npage = 0
138
+ while (npage < N_page and page_number < pdf_total_pages) :
139
+
140
+ print(" >>> "+str(page_number+1))
141
+ f = open(folder_path+"/"+pdf_file+"..:page:.."+str(page_number+1), "w")
142
+ for char_pdf in pdf.pages[page_number].chars:
143
+ f.write(char_pdf["text"])
144
+ f.close()
145
+
146
+ npage = npage + 1
147
+ page_number = page_number + 1
148
+
149
+
150
+ print(" X removing: " + blob.name )
151
+ os.remove(pdf_folder+"/"+blob.name)
152
+
153
+
154
  def build_vectores_stores(folder_path, pdf_folder="./PDF", vectors_path = "./vectors"):
155
 
156
+ vectorstore = PineconeVectorStore(
157
+ index_name=index_name,
158
+ embedding=embeddings_function,
159
+ #namespace=namespace
160
+ )
161
+ print(" Vectorisation ...")
162
+ return vectorstore
163
+
164
+
165
+ print("MISSING VECTORS")
166
+ exit(0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
climateqa/engine/vectorstore_annoy.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from google.cloud import storage
3
+ #storage_client = storage.Client()
4
+ storage_client = storage.Client.create_anonymous_client()
5
+ bucket_name = "docs-axio-clara"
6
+
7
+
8
+ from langchain_community.vectorstores import Annoy
9
+
10
+ from langchain_community.document_loaders import TextLoader
11
+ from langchain_text_splitters import CharacterTextSplitter
12
+ from climateqa.engine.embeddings import get_embeddings_function
13
+ embeddings_function = get_embeddings_function()
14
+
15
+
16
+ import os
17
+ import pdfplumber
18
+
19
+ def get_PDF_Names_from_GCP():
20
+
21
+ listName = []
22
+ # Récupération des fichier depuis GCP storage
23
+ blobs = storage_client.list_blobs(bucket_name, prefix='sources/')
24
+ for blob in blobs:
25
+ listName.append(blob.name)
26
+ return listName
27
+
28
+ def get_PDF_from_GCP(folder_path, pdf_folder="./PDF"):
29
+
30
+ # Récupération des fichier depuis GCP storage
31
+ blobs = storage_client.list_blobs(bucket_name, prefix='sources/')
32
+ for blob in blobs:
33
+
34
+ print( "\n"+blob.name+":")
35
+ print( " <- Téléchargement Depuis GCP")
36
+ blob.download_to_filename(pdf_folder+"/"+blob.name)
37
+
38
+ # Extraction des textes dpuis les fichiers PDF
39
+ print(" >>> Extraction PDF")
40
+ for pdf_file in os.listdir(pdf_folder):
41
+ if pdf_file.startswith("."):
42
+ continue
43
+ print(" > "+pdf_folder+"/"+pdf_file)
44
+ pdf_total_pages = 0
45
+ with pdfplumber.open(pdf_folder+"/"+pdf_file) as pdf:
46
+ pdf_total_pages = len(pdf.pages)
47
+
48
+ # Fuite mémoire pour les gros fichiers
49
+ # Reouvrir le fichier à chaque N page semble rélgler le problème
50
+ N_page = 300
51
+ page_number = 0
52
+ while page_number < pdf_total_pages:
53
+
54
+ print(" -- ouverture du fichier pour "+str(N_page)+ " pages --" )
55
+ with pdfplumber.open(pdf_folder+"/"+pdf_file) as pdf:
56
+
57
+ npage = 0
58
+ while (npage < N_page and page_number < pdf_total_pages) :
59
+
60
+ print(" >>> "+str(page_number+1))
61
+ f = open(folder_path+"/"+pdf_file+"..:page:.."+str(page_number+1), "w")
62
+ for char_pdf in pdf.pages[page_number].chars:
63
+ f.write(char_pdf["text"])
64
+ f.close()
65
+
66
+ npage = npage + 1
67
+ page_number = page_number + 1
68
+
69
+
70
+ print(" X removing: " + blob.name )
71
+ os.remove(pdf_folder+"/"+blob.name)
72
+
73
+
74
+ def build_vectores_stores(folder_path, pdf_folder="./PDF", vectors_path = "./vectors"):
75
+
76
+ if os.path.isfile(vectors_path+"/index.annoy"):
77
+ return Annoy.load_local(vectors_path, embeddings_function,allow_dangerous_deserialization=True)
78
+
79
+ try:
80
+ os.mkdir(vectors_path)
81
+ except:
82
+ pass
83
+
84
+ try:
85
+ # Récupération des fichier depuis GCP storage
86
+ blobs = storage_client.list_blobs(bucket_name, prefix='testvectors/')
87
+ for blob in blobs:
88
+
89
+ print( "\n"+blob.name.split("/")[-1]+":")
90
+ print( " <- Téléchargement Depuis GCP")
91
+ blob.download_to_filename(vectors_path+"/"+blob.name.split("/")[-1])
92
+ except:
93
+ pass
94
+
95
+ # TODO A FUNCTION FOR THAT TO AVOID CODE DUPLICATION
96
+ if os.path.isfile(vectors_path+"/index.annoy"):
97
+ return Annoy.load_local(vectors_path, embeddings_function,allow_dangerous_deserialization=True)
98
+
99
+ print("MISSING VECTORS")
100
+ exit(0)
101
+
102
+ # get_PDF_from_GCP(folder_path, pdf_folder)
103
+
104
+ # print(" Vectorisation ...")
105
+
106
+ # docs = []
107
+ # vector_store_from_docs = () # Créer un nouvel objet Annoy ou utiliser celui déjà initialisé selon votre code existant
108
+ # for filename in os.listdir(folder_path):
109
+ # if filename.startswith("."):
110
+ # continue
111
+ # file_path = os.path.join(folder_path, filename)
112
+ # if os.path.isfile(file_path):
113
+ # loader = TextLoader(file_path)
114
+ # documents = loader.load()
115
+ #
116
+ # for doc in documents:
117
+ # if (doc.metadata):
118
+ # doc.metadata["ax_page"] = doc.metadata['source'].split("..:page:..")[-1]
119
+ # doc.metadata["ax_name"] = doc.metadata['source'].split("..:page:..")[0].split("/")[-1]
120
+ # doc.metadata["ax_url"] = "https://storage.googleapis.com/docs-axio-clara/sources/"+doc.metadata["ax_name"]
121
+ #
122
+ # text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
123
+ # docs += text_splitter.split_documents(documents)
124
+ # vector_store_from_docs = Annoy.from_documents(docs, embeddings_function)
125
+ # vector_store_from_docs.save_local(vectors_path)
126
+ # return vector_store_from_docs
127
+
128
+
129
+
130
+
131
+
132
+
133
+
134
+ # Pinecone
135
+ # More info at https://docs.pinecone.io/docs/langchain
136
+ # And https://python.langchain.com/docs/integrations/vectorstores/pinecone
137
+ #import os
138
+ #from pinecone import Pinecone
139
+ #from langchain_community.vectorstores import Pinecone as PineconeVectorstore
140
+
141
+ # LOAD ENVIRONMENT VARIABLES
142
+ #try:
143
+ # from dotenv import load_dotenv
144
+ # load_dotenv()
145
+ #except:
146
+ # pass
147
+
148
+
149
+ #def get_pinecone_vectorstore(embeddings,text_key = "content"):
150
+
151
+ # # initialize pinecone
152
+ # pinecone.init(
153
+ # api_key=os.getenv("PINECONE_API_KEY"), # find at app.pinecone.io
154
+ # environment=os.getenv("PINECONE_API_ENVIRONMENT"), # next to api key in console
155
+ # )
156
+
157
+ # index_name = os.getenv("PINECONE_API_INDEX")
158
+ # vectorstore = Pinecone.from_existing_index(index_name, embeddings,text_key = text_key)
159
+
160
+ # return vectorstore
161
+
162
+ # pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
163
+ # index = pc.Index(os.getenv("PINECONE_API_INDEX"))
164
+
165
+ # vectorstore = PineconeVectorstore(
166
+ # index, embeddings, text_key,
167
+ # )
168
+ # return vectorstore
169
+
170
+
171
+
172
+ # def get_pinecone_retriever(vectorstore,k = 10,namespace = "vectors",sources = ["IPBES","IPCC"]):
173
+
174
+ # assert isinstance(sources,list)
175
+
176
+ # # Check if all elements in the list are either IPCC or IPBES
177
+ # filter = {
178
+ # "source": { "$in":sources},
179
+ # }
180
+
181
+ # retriever = vectorstore.as_retriever(search_kwargs={
182
+ # "k": k,
183
+ # "namespace":"vectors",
184
+ # "filter":filter
185
+ # })
186
+
187
+ # return retriever
requirements.txt CHANGED
@@ -1,5 +1,5 @@
 
1
  gradio==4.19.1
2
- gunicorn==22.0.0
3
  python-dotenv==1.0.0
4
  langchain==0.1.10
5
  langchain_openai==0.0.6
@@ -10,5 +10,4 @@ msal
10
  pyalex==0.13
11
  networkx==3.2.1
12
  pyvis==0.3.2
13
- annoy==1.17.3
14
- pdfplumber
 
1
+ google-cloud-storage==2.16.0
2
  gradio==4.19.1
 
3
  python-dotenv==1.0.0
4
  langchain==0.1.10
5
  langchain_openai==0.0.6
 
10
  pyalex==0.13
11
  networkx==3.2.1
12
  pyvis==0.3.2
13
+ annoy==1.17.3
 
style.css CHANGED
@@ -3,6 +3,91 @@
3
  --user-image: url('https://ih1.redbubble.net/image.4776899543.6215/st,small,507x507-pad,600x600,f8f8f8.jpg');
4
  } */
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  .telecharger {
7
  border: 1px solid;
8
  padding: 5px;
@@ -43,7 +128,7 @@ body.dark .warning-box * {
43
 
44
 
45
  body.dark .tip-box * {
46
- color:black !important;
47
  }
48
 
49
 
 
3
  --user-image: url('https://ih1.redbubble.net/image.4776899543.6215/st,small,507x507-pad,600x600,f8f8f8.jpg');
4
  } */
5
 
6
+ .fordataonly {
7
+ display:none !important
8
+ }
9
+
10
+
11
+ label {
12
+ color: #000000 !important;
13
+ }
14
+
15
+ strong {
16
+ color: #888888 !important;
17
+ }
18
+
19
+ .logo-axio {
20
+ float: right;
21
+ position: absolute;
22
+ right: 0px;
23
+ }
24
+
25
+
26
+ /* couleur text */
27
+ p {
28
+ color: black !important;
29
+ }
30
+ li {
31
+ color: black !important;
32
+ }
33
+
34
+ button.selected {
35
+ border-radius: 20px !important;
36
+ }
37
+ button:hover {
38
+ color: #ffc000 !important;
39
+ }
40
+
41
+
42
+ /* fond panels/blocks */
43
+ .panel {
44
+ background-color: #eeeeee !important;
45
+ border: 0px;
46
+ }
47
+ .block {
48
+ background-color: #eeeeee !important;
49
+ }
50
+
51
+ /* fond bot */
52
+ .bot {
53
+ background-color: #eeeeee !important;
54
+ }
55
+
56
+ /* avatar en debut de reponse */
57
+ .avatar-container {
58
+ align-self: baseline !important;
59
+ margin-top: 35px;
60
+ }
61
+
62
+
63
+
64
+ /* fond user */
65
+ .user {
66
+ background-color: #d2d2d2 !important;
67
+ }
68
+ textarea {
69
+ background-color: #d2d2d2 !important;
70
+ color: black !important;
71
+ }
72
+
73
+
74
+ /* fond app */
75
+ gradio-app {
76
+ background-color: #ffffff !important;
77
+ }
78
+ .gradio-container {
79
+ background-color: #ffffff !important;
80
+ max-width: 100% !important;
81
+ width: 100% !important;
82
+ }
83
+
84
+
85
+ .a-propos {
86
+ margin: 20px !important;
87
+ }
88
+
89
+
90
+
91
  .telecharger {
92
  border: 1px solid;
93
  padding: 5px;
 
128
 
129
 
130
  body.dark .tip-box * {
131
+ color:rgb(216, 216, 216) !important;
132
  }
133
 
134
 
test CHANGED
@@ -19,8 +19,7 @@ ENV HOME=/home/user \
19
  GRADIO_NUM_PORTS=1 \
20
  GRADIO_SERVER_NAME=0.0.0.0 \
21
  GRADIO_THEME=huggingface \
22
- SYSTEM=spaces \
23
- PORT=7860
24
 
25
  # Set the working directory to the user's home directory
26
  WORKDIR $HOME/app
@@ -28,8 +27,6 @@ WORKDIR $HOME/app
28
  # Copy the current directory contents into the container at $HOME/app setting the owner to the user
29
  COPY --chown=user . $HOME/app
30
 
31
- #CMD ["python","setup.py"]
32
 
33
- #CMD ["python", "app.py"]
34
-
35
- CMD gunicorn -b 0.0.0.0:$PORT app:demo
 
19
  GRADIO_NUM_PORTS=1 \
20
  GRADIO_SERVER_NAME=0.0.0.0 \
21
  GRADIO_THEME=huggingface \
22
+ SYSTEM=spaces
 
23
 
24
  # Set the working directory to the user's home directory
25
  WORKDIR $HOME/app
 
27
  # Copy the current directory contents into the container at $HOME/app setting the owner to the user
28
  COPY --chown=user . $HOME/app
29
 
30
+ CMD ["python","setup.py"]
31
 
32
+ CMD ["python", "app.py"]