pritish commited on
Commit
0a497cb
1 Parent(s): 4106d45

Added scikit learn to requirements

Browse files
Files changed (2) hide show
  1. app.py +2 -3
  2. requirements.txt +2 -1
app.py CHANGED
@@ -6,7 +6,6 @@ import tensorflow_hub as hub
6
  import openai
7
  import gradio as gr
8
  import os
9
- from tqdm.auto import tqdm
10
  from sklearn.neighbors import NearestNeighbors
11
 
12
 
@@ -29,7 +28,7 @@ def pdf_to_text(path, start_page=1, end_page=None):
29
 
30
  text_list = []
31
 
32
- for i in tqdm(range(start_page-1, end_page)):
33
  text = doc.load_page(i).get_text("text")
34
  text = preprocess(text)
35
  text_list.append(text)
@@ -84,7 +83,7 @@ class SemanticSearch:
84
 
85
  def get_text_embedding(self, texts, batch=1000):
86
  embeddings = []
87
- for i in tqdm(range(0, len(texts), batch)):
88
  text_batch = texts[i:(i+batch)]
89
  emb_batch = self.use(text_batch)
90
  embeddings.append(emb_batch)
 
6
  import openai
7
  import gradio as gr
8
  import os
 
9
  from sklearn.neighbors import NearestNeighbors
10
 
11
 
 
28
 
29
  text_list = []
30
 
31
+ for i in range(start_page-1, end_page):
32
  text = doc.load_page(i).get_text("text")
33
  text = preprocess(text)
34
  text_list.append(text)
 
83
 
84
  def get_text_embedding(self, texts, batch=1000):
85
  embeddings = []
86
+ for i in range(0, len(texts), batch):
87
  text_batch = texts[i:(i+batch)]
88
  emb_batch = self.use(text_batch)
89
  embeddings.append(emb_batch)
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  PyMuPDF
2
  openai
3
  tensorflow==2.9.2
4
- tensorflow-hub==0.12.0
 
 
1
  PyMuPDF
2
  openai
3
  tensorflow==2.9.2
4
+ tensorflow-hub==0.12.0
5
+ scikit-learn==1.0.2