Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,6 +7,7 @@ load_dotenv()
|
|
| 7 |
|
| 8 |
from flask import Flask, jsonify, render_template, request
|
| 9 |
import requests, json
|
|
|
|
| 10 |
|
| 11 |
# import nltk
|
| 12 |
# nltk.download("punkt")
|
|
@@ -100,6 +101,27 @@ def clearKBUploadDirectory(uploads_dir):
|
|
| 100 |
except Exception as e:
|
| 101 |
print('Failed to delete %s. Reason: %s' % (file_path, e))
|
| 102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
def loadKB(fileprovided, urlProvided, uploads_dir, request):
|
| 105 |
documents = []
|
|
|
|
| 7 |
|
| 8 |
from flask import Flask, jsonify, render_template, request
|
| 9 |
import requests, json
|
| 10 |
+
import PyPDF2
|
| 11 |
|
| 12 |
# import nltk
|
| 13 |
# nltk.download("punkt")
|
|
|
|
| 101 |
except Exception as e:
|
| 102 |
print('Failed to delete %s. Reason: %s' % (file_path, e))
|
| 103 |
|
| 104 |
+
def PDFChunkerWithSeparator(filepath, separator):
|
| 105 |
+
# creating a pdf reader object
|
| 106 |
+
reader = PyPDF2.PdfReader(filepath)
|
| 107 |
+
|
| 108 |
+
# print the number of pages in pdf file
|
| 109 |
+
print(len(reader.pages))
|
| 110 |
+
content = ""
|
| 111 |
+
for page in reader.pages:
|
| 112 |
+
content += page.extract_text()
|
| 113 |
+
|
| 114 |
+
splitted_content_list = content.split(separator)
|
| 115 |
+
|
| 116 |
+
doclist = []
|
| 117 |
+
for splitted_content in splitted_content_list:
|
| 118 |
+
new_doc = Document(page_content=splitted_content, metadata={"source": filepath})
|
| 119 |
+
# print(type(new_doc))
|
| 120 |
+
doclist.append(new_doc)
|
| 121 |
+
if len(doclist)>3:
|
| 122 |
+
print(doclist[len(doclist) - 3])
|
| 123 |
+
return doclist
|
| 124 |
+
|
| 125 |
|
| 126 |
def loadKB(fileprovided, urlProvided, uploads_dir, request):
|
| 127 |
documents = []
|