Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -27,7 +27,7 @@ import io
|
|
27 |
import requests
|
28 |
import numpy as np
|
29 |
from urllib.parse import quote
|
30 |
-
import PyPDF2
|
31 |
|
32 |
# =============================================================================
|
33 |
# βββββββββββββ EXTERNAL HELP LINKS βββββββββββββ
|
@@ -117,8 +117,10 @@ def preprocess_text(text):
|
|
117 |
return text.strip()
|
118 |
|
119 |
def sanitize_json_text(text):
|
120 |
-
|
121 |
-
|
|
|
|
|
122 |
|
123 |
def extract_pdf_text(pdf_file):
|
124 |
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
@@ -163,7 +165,7 @@ def delete_record(container, record):
|
|
163 |
try:
|
164 |
doc_id = record["id"]
|
165 |
partition_key_value = record.get("pk", doc_id)
|
166 |
-
st.write(f"
|
167 |
container.delete_item(item=doc_id, partition_key=partition_key_value)
|
168 |
return True, f"Record {doc_id} deleted. ποΈ"
|
169 |
except exceptions.CosmosResourceNotFoundError:
|
@@ -392,12 +394,21 @@ def edit_all_documents(container, search_keyword=None):
|
|
392 |
with col_save:
|
393 |
if st.button("πΎ Save", key=f"save_{doc['id']}"):
|
394 |
try:
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
updated_doc.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
401 |
success, message = update_record(container, updated_doc)
|
402 |
if success:
|
403 |
st.success(f"Saved {doc['id']}")
|
@@ -446,7 +457,7 @@ def new_item_from_pdf(container, pdf_file):
|
|
446 |
"id": new_id,
|
447 |
"pk": new_id,
|
448 |
"name": pdf_file.name,
|
449 |
-
"content": pdf_text[:1000],
|
450 |
"timestamp": datetime.now().isoformat(),
|
451 |
"type": "pdf_document",
|
452 |
"pdf_data": pdf_base64
|
|
|
27 |
import requests
|
28 |
import numpy as np
|
29 |
from urllib.parse import quote
|
30 |
+
import PyPDF2
|
31 |
|
32 |
# =============================================================================
|
33 |
# βββββββββββββ EXTERNAL HELP LINKS βββββββββββββ
|
|
|
117 |
return text.strip()
|
118 |
|
119 |
def sanitize_json_text(text):
|
120 |
+
# Escape special characters for JSON compatibility
|
121 |
+
text = text.encode().decode('unicode_escape') # Handle existing escapes
|
122 |
+
text = json.dumps(text)[1:-1] # Wrap in quotes and remove them to escape properly
|
123 |
+
return text
|
124 |
|
125 |
def extract_pdf_text(pdf_file):
|
126 |
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
|
|
165 |
try:
|
166 |
doc_id = record["id"]
|
167 |
partition_key_value = record.get("pk", doc_id)
|
168 |
+
st.write(f"Attempting to delete {doc_id} with partition key {partition_key_value}")
|
169 |
container.delete_item(item=doc_id, partition_key=partition_key_value)
|
170 |
return True, f"Record {doc_id} deleted. ποΈ"
|
171 |
except exceptions.CosmosResourceNotFoundError:
|
|
|
394 |
with col_save:
|
395 |
if st.button("πΎ Save", key=f"save_{doc['id']}"):
|
396 |
try:
|
397 |
+
# Handle raw text pasted into editor
|
398 |
+
cleaned_content = edited_content.strip()
|
399 |
+
if not (cleaned_content.startswith('{') and cleaned_content.endswith('}')):
|
400 |
+
# If not JSON, assume raw text and wrap it
|
401 |
+
sanitized_text = sanitize_json_text(cleaned_content)
|
402 |
+
updated_doc = doc.copy()
|
403 |
+
updated_doc['content'] = sanitized_text
|
404 |
+
else:
|
405 |
+
# If JSON, parse it directly
|
406 |
+
updated_doc = json.loads(cleaned_content)
|
407 |
+
updated_doc['id'] = doc['id']
|
408 |
+
updated_doc['pk'] = doc.get('pk', doc['id'])
|
409 |
+
for field in ['_ts', '_rid', '_self', '_etag', '_attachments']:
|
410 |
+
updated_doc.pop(field, None)
|
411 |
+
|
412 |
success, message = update_record(container, updated_doc)
|
413 |
if success:
|
414 |
st.success(f"Saved {doc['id']}")
|
|
|
457 |
"id": new_id,
|
458 |
"pk": new_id,
|
459 |
"name": pdf_file.name,
|
460 |
+
"content": pdf_text[:1000],
|
461 |
"timestamp": datetime.now().isoformat(),
|
462 |
"type": "pdf_document",
|
463 |
"pdf_data": pdf_base64
|