Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,6 +4,7 @@ from rss_processor import fetch_rss_feeds, process_and_store_articles, vector_db
|
|
| 4 |
import logging
|
| 5 |
import time
|
| 6 |
from threading import Thread
|
|
|
|
| 7 |
|
| 8 |
app = Flask(__name__)
|
| 9 |
|
|
@@ -32,9 +33,10 @@ def loading():
|
|
| 32 |
@app.route('/check_feeds', methods=['GET'])
|
| 33 |
def check_feeds():
|
| 34 |
try:
|
| 35 |
-
# Check if vector DB has documents
|
| 36 |
docs = vector_db.similarity_search("news", k=1)
|
| 37 |
if docs:
|
|
|
|
| 38 |
return jsonify({"status": "loaded"})
|
| 39 |
return jsonify({"status": "loading"}), 202
|
| 40 |
except Exception as e:
|
|
@@ -43,28 +45,20 @@ def check_feeds():
|
|
| 43 |
|
| 44 |
@app.route('/index', methods=['GET'])
|
| 45 |
def index():
|
| 46 |
-
#
|
| 47 |
-
while True:
|
| 48 |
-
response = check_feeds()
|
| 49 |
-
if response.status_code == 200 and response.get_json()["status"] == "loaded":
|
| 50 |
-
break
|
| 51 |
-
time.sleep(1) # Check every second
|
| 52 |
-
|
| 53 |
-
stored_docs = vector_db.similarity_search("news", k=1000) # Increased k for all unique articles
|
| 54 |
# Use a set to ensure unique articles by title, link, and description hash
|
| 55 |
unique_articles = {}
|
| 56 |
for doc in stored_docs:
|
| 57 |
-
import hashlib
|
| 58 |
title = doc.metadata["title"]
|
| 59 |
link = doc.metadata["link"]
|
| 60 |
-
|
| 61 |
-
desc_hash = hashlib.md5(
|
| 62 |
key = f"{title}|{link}|{desc_hash}"
|
| 63 |
if key not in unique_articles:
|
| 64 |
unique_articles[key] = {
|
| 65 |
"title": title,
|
| 66 |
"link": link,
|
| 67 |
-
"description":
|
| 68 |
"category": doc.metadata["category"],
|
| 69 |
"published": doc.metadata["published"],
|
| 70 |
"image": doc.metadata.get("image", "svg"),
|
|
@@ -81,14 +75,14 @@ def index():
|
|
| 81 |
for doc in results:
|
| 82 |
title = doc.metadata["title"]
|
| 83 |
link = doc.metadata["link"]
|
| 84 |
-
|
| 85 |
-
desc_hash = hashlib.md5(
|
| 86 |
key = f"{title}|{link}|{desc_hash}"
|
| 87 |
if key not in unique_search_articles:
|
| 88 |
unique_search_articles[key] = {
|
| 89 |
"title": title,
|
| 90 |
"link": link,
|
| 91 |
-
"description":
|
| 92 |
"category": doc.metadata["category"],
|
| 93 |
"published": doc.metadata["published"],
|
| 94 |
"image": doc.metadata.get("image", "svg"),
|
|
|
|
| 4 |
import logging
|
| 5 |
import time
|
| 6 |
from threading import Thread
|
| 7 |
+
import hashlib
|
| 8 |
|
| 9 |
app = Flask(__name__)
|
| 10 |
|
|
|
|
| 33 |
@app.route('/check_feeds', methods=['GET'])
|
| 34 |
def check_feeds():
|
| 35 |
try:
|
| 36 |
+
# Check if vector DB has documents
|
| 37 |
docs = vector_db.similarity_search("news", k=1)
|
| 38 |
if docs:
|
| 39 |
+
logger.info("Feeds loaded successfully in vector DB")
|
| 40 |
return jsonify({"status": "loaded"})
|
| 41 |
return jsonify({"status": "loading"}), 202
|
| 42 |
except Exception as e:
|
|
|
|
| 45 |
|
| 46 |
@app.route('/index', methods=['GET'])
|
| 47 |
def index():
|
| 48 |
+
stored_docs = vector_db.similarity_search("news", k=1000) # Ensure all unique articles
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
# Use a set to ensure unique articles by title, link, and description hash
|
| 50 |
unique_articles = {}
|
| 51 |
for doc in stored_docs:
|
|
|
|
| 52 |
title = doc.metadata["title"]
|
| 53 |
link = doc.metadata["link"]
|
| 54 |
+
description = doc.metadata["original_description"]
|
| 55 |
+
desc_hash = hashlib.md5(description.encode()).hexdigest()[:10]
|
| 56 |
key = f"{title}|{link}|{desc_hash}"
|
| 57 |
if key not in unique_articles:
|
| 58 |
unique_articles[key] = {
|
| 59 |
"title": title,
|
| 60 |
"link": link,
|
| 61 |
+
"description": description,
|
| 62 |
"category": doc.metadata["category"],
|
| 63 |
"published": doc.metadata["published"],
|
| 64 |
"image": doc.metadata.get("image", "svg"),
|
|
|
|
| 75 |
for doc in results:
|
| 76 |
title = doc.metadata["title"]
|
| 77 |
link = doc.metadata["link"]
|
| 78 |
+
description = doc.metadata["original_description"]
|
| 79 |
+
desc_hash = hashlib.md5(description.encode()).hexdigest()[:10]
|
| 80 |
key = f"{title}|{link}|{desc_hash}"
|
| 81 |
if key not in unique_search_articles:
|
| 82 |
unique_search_articles[key] = {
|
| 83 |
"title": title,
|
| 84 |
"link": link,
|
| 85 |
+
"description": description,
|
| 86 |
"category": doc.metadata["category"],
|
| 87 |
"published": doc.metadata["published"],
|
| 88 |
"image": doc.metadata.get("image", "svg"),
|