xavierbarbier commited on
Commit
a3ad85c
·
verified ·
1 Parent(s): c9862ec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -25
app.py CHANGED
@@ -52,15 +52,29 @@ chunk_size = 2048
52
 
53
  # creating a pdf reader object
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  print("Finish the model init process")
56
 
57
 
58
- dir_ = Path(__file__).parent
59
 
60
- p = pipeline(
61
- "document-question-answering",
62
- model="impira/layoutlm-document-qa",
63
- )
64
 
65
  def get_text_embedding(text):
66
 
@@ -68,24 +82,7 @@ def get_text_embedding(text):
68
 
69
  def qa(question: str, doc: str) -> str:
70
 
71
- reader = PdfReader(doc)
72
-
73
- text = []
74
- for p in np.arange(0, len(reader.pages), 1):
75
- page = reader.pages[int(p)]
76
-
77
- # extracting text from page
78
- text.append(page.extract_text())
79
-
80
- text = ' '.join(text)
81
-
82
- chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
83
-
84
- text_embeddings = np.array([get_text_embedding(chunk) for chunk in chunks])
85
-
86
- d = text_embeddings.shape[1]
87
- index = faiss.IndexFlatL2(d)
88
- index.add(text_embeddings)
89
 
90
  question_embeddings = np.array([get_text_embedding(question)])
91
 
@@ -102,8 +99,8 @@ def qa(question: str, doc: str) -> str:
102
  [INST] Requête: {question} [/INST]
103
  Réponse:
104
  """
105
- outputs = model.generate(prompt=prompt, temp=0.5, top_k = 40, top_p = 1, max_tokens = max_new_tokens)
106
- return "".join(outputs)
107
 
108
 
109
  demo = gr.Interface(
 
52
 
53
  # creating a pdf reader object
54
 
55
+ reader = PdfReader("./resource/NGAP 01042024.pdf")
56
+
57
+ text = []
58
+ for p in np.arange(0, len(reader.pages), 1):
59
+ page = reader.pages[int(p)]
60
+
61
+ # extracting text from page
62
+ text.append(page.extract_text())
63
+
64
+ text = ' '.join(text)
65
+
66
+ chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
67
+
68
+ text_embeddings = np.array([get_text_embedding(chunk) for chunk in chunks])
69
+
70
+ d = text_embeddings.shape[1]
71
+ index = faiss.IndexFlatL2(d)
72
+ index.add(text_embeddings)
73
+
74
  print("Finish the model init process")
75
 
76
 
 
77
 
 
 
 
 
78
 
79
  def get_text_embedding(text):
80
 
 
82
 
83
  def qa(question: str, doc: str) -> str:
84
 
85
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
  question_embeddings = np.array([get_text_embedding(question)])
88
 
 
99
  [INST] Requête: {question} [/INST]
100
  Réponse:
101
  """
102
+ #outputs = model.generate(prompt=prompt, temp=0.5, top_k = 40, top_p = 1, max_tokens = max_new_tokens)
103
+ return prompt #"".join(outputs)
104
 
105
 
106
  demo = gr.Interface(