Spaces:
Sleeping
Sleeping
Apps isolation
Browse files- app-memora.py +171 -0
- app-news-content.py +105 -0
app-memora.py
ADDED
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
|
3 |
+
import streamlit as st
|
4 |
+
from googlesearch import search
|
5 |
+
import pandas as pd
|
6 |
+
import os
|
7 |
+
from rag_sec.document_search_system import DocumentSearchSystem
|
8 |
+
from chainguard.blockchain_logger import BlockchainLogger
|
9 |
+
from PIL import Image
|
10 |
+
from itertools import cycle
|
11 |
+
|
12 |
+
# Blockchain Logger
|
13 |
+
blockchain_logger = BlockchainLogger()
|
14 |
+
|
15 |
+
# Directory for storing uploaded files
|
16 |
+
UPLOAD_DIR = "uploaded_files"
|
17 |
+
os.makedirs(UPLOAD_DIR, exist_ok=True)
|
18 |
+
|
19 |
+
# Initialize DocumentSearchSystem
|
20 |
+
@st.cache_resource
|
21 |
+
def initialize_system():
|
22 |
+
"""Initialize the DocumentSearchSystem and load documents."""
|
23 |
+
system = DocumentSearchSystem(
|
24 |
+
neo4j_uri="neo4j+s://0ca71b10.databases.neo4j.io",
|
25 |
+
neo4j_user="neo4j",
|
26 |
+
neo4j_password="HwGDOxyGS1-79nLeTiX5bx5ohoFSpvHCmTv8IRgt-lY"
|
27 |
+
)
|
28 |
+
system.retriever.load_documents()
|
29 |
+
return system
|
30 |
+
|
31 |
+
# Initialize the system
|
32 |
+
system = initialize_system()
|
33 |
+
|
34 |
+
st.title("Memora: Secure File Upload and Search with Blockchain & Neo4j")
|
35 |
+
st.subheader("Personalized news and global updates at your fingertips")
|
36 |
+
# File Upload Section
|
37 |
+
uploaded_files = st.file_uploader("Upload your files", accept_multiple_files=True, type=['jpg', 'jpeg', 'png', 'mp4', 'avi'])
|
38 |
+
|
39 |
+
if uploaded_files:
|
40 |
+
for uploaded_file in uploaded_files:
|
41 |
+
# Save file locally
|
42 |
+
file_path = os.path.join(UPLOAD_DIR, uploaded_file.name)
|
43 |
+
with open(file_path, "wb") as f:
|
44 |
+
f.write(uploaded_file.getbuffer())
|
45 |
+
st.success(f"File saved locally: {file_path}")
|
46 |
+
|
47 |
+
# Display uploaded file details
|
48 |
+
if uploaded_file.type.startswith('image'):
|
49 |
+
image = Image.open(uploaded_file)
|
50 |
+
st.image(image, caption=uploaded_file.name, use_column_width=True)
|
51 |
+
|
52 |
+
# Metadata Input
|
53 |
+
album = st.text_input(f"Album for {uploaded_file.name}", "Default Album")
|
54 |
+
tags = st.text_input(f"Tags for {uploaded_file.name} (comma-separated)", "")
|
55 |
+
|
56 |
+
# Log Metadata and Transaction
|
57 |
+
if st.button(f"Log Metadata for {uploaded_file.name}"):
|
58 |
+
metadata = {"file_name": uploaded_file.name, "tags": tags.split(','), "album": album}
|
59 |
+
blockchain_details = blockchain_logger.log_data(metadata)
|
60 |
+
blockchain_hash = blockchain_details.get("block_hash", "N/A")
|
61 |
+
|
62 |
+
# Use Neo4jHandler from DocumentSearchSystem to log the transaction
|
63 |
+
system.neo4j_handler.log_relationships(uploaded_file.name, tags, blockchain_hash, [album])
|
64 |
+
st.write(f"Metadata logged successfully! Blockchain Details: {blockchain_details}")
|
65 |
+
|
66 |
+
# Blockchain Integrity Validation
|
67 |
+
if st.button("Validate Blockchain Integrity"):
|
68 |
+
is_valid = blockchain_logger.is_blockchain_valid()
|
69 |
+
st.write("Blockchain Integrity:", "Valid ✅" if is_valid else "Invalid ❌")
|
70 |
+
|
71 |
+
# Document Search Section
|
72 |
+
st.subheader("Search Documents")
|
73 |
+
|
74 |
+
# Google Search: User-Specific News
|
75 |
+
st.subheader("1. Latest News About You")
|
76 |
+
user_name = st.text_input("Enter your name or handle to search for recent news", value="Talex Maxim")
|
77 |
+
|
78 |
+
if st.button("Search News About Me"):
|
79 |
+
if user_name:
|
80 |
+
st.write(f"Searching Google for news about **{user_name}**...")
|
81 |
+
try:
|
82 |
+
results = list(search(user_name, num_results=5))
|
83 |
+
if results:
|
84 |
+
st.success(f"Top {len(results)} results for '{user_name}':")
|
85 |
+
user_news_data = {"URL": results}
|
86 |
+
df_user_news = pd.DataFrame(user_news_data)
|
87 |
+
st.dataframe(df_user_news)
|
88 |
+
else:
|
89 |
+
st.warning("No recent news found about you.")
|
90 |
+
except Exception as e:
|
91 |
+
st.error(f"An error occurred during the search: {str(e)}")
|
92 |
+
else:
|
93 |
+
st.warning("Please enter your name or handle to search.")
|
94 |
+
|
95 |
+
# Google Search: Global News Categories
|
96 |
+
categories = ["Technology", "Sports", "Politics", "Entertainment", "Science"]
|
97 |
+
|
98 |
+
st.title("Global News Insights")
|
99 |
+
|
100 |
+
# News Results Dictionary
|
101 |
+
news_results = {}
|
102 |
+
|
103 |
+
try:
|
104 |
+
# Fetch News for Each Category
|
105 |
+
for category in categories:
|
106 |
+
try:
|
107 |
+
news_results[category] = list(search(f"latest {category} news", num_results=3))
|
108 |
+
except Exception as e:
|
109 |
+
news_results[category] = [f"Error fetching news: {str(e)}"]
|
110 |
+
|
111 |
+
# Display Results with Styled Buttons
|
112 |
+
for category, articles in news_results.items():
|
113 |
+
st.subheader(f"{category} News")
|
114 |
+
cols = st.columns(3) # Create 3 columns for the layout
|
115 |
+
|
116 |
+
if articles and "Error fetching news" not in articles[0]:
|
117 |
+
for idx, article in enumerate(articles):
|
118 |
+
with cols[idx % 3]: # Cycle through columns
|
119 |
+
st.markdown(
|
120 |
+
f"""
|
121 |
+
<div style="padding: 10px; border: 1px solid #ccc; border-radius: 5px; margin: 10px; text-align: center;">
|
122 |
+
<a href="{article}" target="_blank" style="text-decoration: none;">
|
123 |
+
<button style="background-color: #c4ccc8; color: white; border: none; padding: 10px 20px; text-align: center; display: inline-block; font-size: 16px; border-radius: 5px;">
|
124 |
+
{category}-{idx + 1}
|
125 |
+
</button>
|
126 |
+
</a>
|
127 |
+
</div>
|
128 |
+
""",
|
129 |
+
unsafe_allow_html=True,
|
130 |
+
)
|
131 |
+
else:
|
132 |
+
st.warning(f"Could not fetch news for **{category}**.")
|
133 |
+
except Exception as e:
|
134 |
+
st.error(f"An unexpected error occurred: {str(e)}")
|
135 |
+
|
136 |
+
|
137 |
+
# # Display results
|
138 |
+
# for category, articles in news_results.items():
|
139 |
+
# st.write(f"### Top News in {category}:")
|
140 |
+
# for idx, article in enumerate(articles, start=1):
|
141 |
+
# st.write(f"{idx}. [Read here]({article})")
|
142 |
+
# except Exception as e:
|
143 |
+
# st.error(f"An error occurred while fetching global news: {str(e)}")
|
144 |
+
|
145 |
+
# Document Search
|
146 |
+
st.subheader("3. Search Documents")
|
147 |
+
query = st.text_input("Enter your query (e.g., 'sports news', 'machine learning')")
|
148 |
+
|
149 |
+
if st.button("Search Documents"):
|
150 |
+
if query:
|
151 |
+
result = system.process_query(query)
|
152 |
+
if result["status"] == "success":
|
153 |
+
st.success(f"Query processed successfully!")
|
154 |
+
st.write("### Query Response:")
|
155 |
+
st.write(result["response"])
|
156 |
+
st.write("### Retrieved Documents:")
|
157 |
+
for idx, doc in enumerate(result["retrieved_documents"], start=1):
|
158 |
+
st.write(f"**Document {idx}:**")
|
159 |
+
st.write(doc[:500]) # Display the first 500 characters
|
160 |
+
st.write("### Blockchain Details:")
|
161 |
+
st.json(result["blockchain_details"])
|
162 |
+
elif result["status"] == "no_results":
|
163 |
+
st.warning("No relevant documents found for your query.")
|
164 |
+
elif result["status"] == "rejected":
|
165 |
+
st.error(result["message"])
|
166 |
+
else:
|
167 |
+
st.warning("Please enter a query to search.")
|
168 |
+
|
169 |
+
# Debugging Section
|
170 |
+
if st.checkbox("Show Debug Information"):
|
171 |
+
st.write(f"Total documents loaded: {len(system.retriever.documents)}")
|
app-news-content.py
ADDED
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import requests
|
3 |
+
from transformers import T5Tokenizer, T5ForConditionalGeneration
|
4 |
+
from collections import OrderedDict
|
5 |
+
|
6 |
+
# Load Models
|
7 |
+
@st.cache_resource
|
8 |
+
def load_models():
|
9 |
+
tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
10 |
+
model = T5ForConditionalGeneration.from_pretrained("t5-small")
|
11 |
+
return tokenizer, model
|
12 |
+
|
13 |
+
t5_tokenizer, t5_model = load_models()
|
14 |
+
|
15 |
+
# API Key for NewsAPI
|
16 |
+
NEWS_API_KEY = "66db8e116ae24c49beef53e8b879600a"
|
17 |
+
|
18 |
+
# Fetch news articles based on the user query
|
19 |
+
@st.cache_data
|
20 |
+
def fetch_news(user_query):
|
21 |
+
NEWS_API_URL = "https://newsapi.org/v2/everything"
|
22 |
+
params = {
|
23 |
+
'q': user_query,
|
24 |
+
'apiKey': NEWS_API_KEY,
|
25 |
+
'language': 'en',
|
26 |
+
'pageSize': 10, # Fetch 10 articles
|
27 |
+
'sortBy': 'relevance',
|
28 |
+
}
|
29 |
+
response = requests.get(NEWS_API_URL, params=params)
|
30 |
+
if response.status_code == 200:
|
31 |
+
articles = response.json().get('articles', [])
|
32 |
+
return [
|
33 |
+
{
|
34 |
+
'title': article.get('title', 'No Title'),
|
35 |
+
'description': article.get('description', 'No Description')
|
36 |
+
}
|
37 |
+
for article in articles if article.get('description')
|
38 |
+
]
|
39 |
+
return []
|
40 |
+
|
41 |
+
# Summarize articles
|
42 |
+
def summarize_articles(articles):
|
43 |
+
summaries = []
|
44 |
+
for article in articles:
|
45 |
+
input_text = f"summarize: Title: {article['title']}. Description: {article['description']}"
|
46 |
+
inputs = t5_tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
|
47 |
+
outputs = t5_model.generate(inputs, max_length=100, min_length=50, length_penalty=2.0, num_beams=4, early_stopping=True)
|
48 |
+
summary = t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
49 |
+
summaries.append(remove_redundancy(summary))
|
50 |
+
return summaries
|
51 |
+
|
52 |
+
# Remove redundancy in summaries
|
53 |
+
def remove_redundancy(summary):
|
54 |
+
sentences = summary.split('. ')
|
55 |
+
seen = OrderedDict()
|
56 |
+
return '. '.join([seen.setdefault(s, s) for s in sentences if s not in seen])
|
57 |
+
|
58 |
+
# Generate catchy content based on all 10 summaries
|
59 |
+
def generate_catchy_content(summarized_content):
|
60 |
+
combined_prompt = f"""
|
61 |
+
Write a blog post based on these insights:
|
62 |
+
{', '.join(summarized_content)}
|
63 |
+
"""
|
64 |
+
inputs = t5_tokenizer.encode(combined_prompt, return_tensors="pt", max_length=512, truncation=True)
|
65 |
+
outputs = t5_model.generate(inputs, max_length=300, length_penalty=2.0, num_beams=4, early_stopping=True)
|
66 |
+
return t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
67 |
+
|
68 |
+
# Main App
|
69 |
+
st.title("Hackathon-Ready News Summarizer & Blog Generator")
|
70 |
+
st.subheader("Enter a topic to fetch news, summarize and generate engaging content!")
|
71 |
+
|
72 |
+
# Query Input
|
73 |
+
user_query = st.text_input("Enter a query (e.g., 'AI trends', 'Climate change impact'):")
|
74 |
+
|
75 |
+
if st.button("Fetch, Summarize and Generate"):
|
76 |
+
if user_query:
|
77 |
+
st.info(f"Fetching articles related to: {user_query}")
|
78 |
+
with st.spinner("Fetching news articles..."):
|
79 |
+
articles = fetch_news(user_query)
|
80 |
+
if articles:
|
81 |
+
st.success(f"Fetched {len(articles)} articles!")
|
82 |
+
|
83 |
+
# Display only the first 4 articles
|
84 |
+
st.subheader("Fetched Articles")
|
85 |
+
for i, article in enumerate(articles[:4], 1):
|
86 |
+
st.write(f"**Article {i}:** {article['title']}")
|
87 |
+
st.write(f"*Description:* {article['description']}")
|
88 |
+
|
89 |
+
# Summarize All Articles
|
90 |
+
st.info("Summarizing articles...")
|
91 |
+
summaries = summarize_articles(articles) # Summarize all 10 articles
|
92 |
+
st.subheader("Summarized Articles")
|
93 |
+
for i, summary in enumerate(summaries[:4], 1): # Display summaries for first 4 articles
|
94 |
+
st.write(f"**Summary {i}:** {summary}")
|
95 |
+
|
96 |
+
# Generate Blog Post
|
97 |
+
st.info("Generating blog post...")
|
98 |
+
generated_content = generate_catchy_content(summaries) # Use all 10 summaries
|
99 |
+
st.subheader("Generated Blog Post")
|
100 |
+
st.write(generated_content)
|
101 |
+
|
102 |
+
else:
|
103 |
+
st.warning("No articles found. Try a different query.")
|
104 |
+
else:
|
105 |
+
st.error("Please enter a query to proceed!")
|