vikee commited on
Commit
9228cad
1 Parent(s): 791e079

Apps isolation

Browse files
Files changed (2) hide show
  1. app-memora.py +171 -0
  2. app-news-content.py +105 -0
app-memora.py ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ import streamlit as st
4
+ from googlesearch import search
5
+ import pandas as pd
6
+ import os
7
+ from rag_sec.document_search_system import DocumentSearchSystem
8
+ from chainguard.blockchain_logger import BlockchainLogger
9
+ from PIL import Image
10
+ from itertools import cycle
11
+
12
+ # Blockchain Logger
13
+ blockchain_logger = BlockchainLogger()
14
+
15
+ # Directory for storing uploaded files
16
+ UPLOAD_DIR = "uploaded_files"
17
+ os.makedirs(UPLOAD_DIR, exist_ok=True)
18
+
19
+ # Initialize DocumentSearchSystem
20
+ @st.cache_resource
21
+ def initialize_system():
22
+ """Initialize the DocumentSearchSystem and load documents."""
23
+ system = DocumentSearchSystem(
24
+ neo4j_uri="neo4j+s://0ca71b10.databases.neo4j.io",
25
+ neo4j_user="neo4j",
26
+ neo4j_password="HwGDOxyGS1-79nLeTiX5bx5ohoFSpvHCmTv8IRgt-lY"
27
+ )
28
+ system.retriever.load_documents()
29
+ return system
30
+
31
+ # Initialize the system
32
+ system = initialize_system()
33
+
34
+ st.title("Memora: Secure File Upload and Search with Blockchain & Neo4j")
35
+ st.subheader("Personalized news and global updates at your fingertips")
36
+ # File Upload Section
37
+ uploaded_files = st.file_uploader("Upload your files", accept_multiple_files=True, type=['jpg', 'jpeg', 'png', 'mp4', 'avi'])
38
+
39
+ if uploaded_files:
40
+ for uploaded_file in uploaded_files:
41
+ # Save file locally
42
+ file_path = os.path.join(UPLOAD_DIR, uploaded_file.name)
43
+ with open(file_path, "wb") as f:
44
+ f.write(uploaded_file.getbuffer())
45
+ st.success(f"File saved locally: {file_path}")
46
+
47
+ # Display uploaded file details
48
+ if uploaded_file.type.startswith('image'):
49
+ image = Image.open(uploaded_file)
50
+ st.image(image, caption=uploaded_file.name, use_column_width=True)
51
+
52
+ # Metadata Input
53
+ album = st.text_input(f"Album for {uploaded_file.name}", "Default Album")
54
+ tags = st.text_input(f"Tags for {uploaded_file.name} (comma-separated)", "")
55
+
56
+ # Log Metadata and Transaction
57
+ if st.button(f"Log Metadata for {uploaded_file.name}"):
58
+ metadata = {"file_name": uploaded_file.name, "tags": tags.split(','), "album": album}
59
+ blockchain_details = blockchain_logger.log_data(metadata)
60
+ blockchain_hash = blockchain_details.get("block_hash", "N/A")
61
+
62
+ # Use Neo4jHandler from DocumentSearchSystem to log the transaction
63
+ system.neo4j_handler.log_relationships(uploaded_file.name, tags, blockchain_hash, [album])
64
+ st.write(f"Metadata logged successfully! Blockchain Details: {blockchain_details}")
65
+
66
+ # Blockchain Integrity Validation
67
+ if st.button("Validate Blockchain Integrity"):
68
+ is_valid = blockchain_logger.is_blockchain_valid()
69
+ st.write("Blockchain Integrity:", "Valid ✅" if is_valid else "Invalid ❌")
70
+
71
+ # Document Search Section
72
+ st.subheader("Search Documents")
73
+
74
+ # Google Search: User-Specific News
75
+ st.subheader("1. Latest News About You")
76
+ user_name = st.text_input("Enter your name or handle to search for recent news", value="Talex Maxim")
77
+
78
+ if st.button("Search News About Me"):
79
+ if user_name:
80
+ st.write(f"Searching Google for news about **{user_name}**...")
81
+ try:
82
+ results = list(search(user_name, num_results=5))
83
+ if results:
84
+ st.success(f"Top {len(results)} results for '{user_name}':")
85
+ user_news_data = {"URL": results}
86
+ df_user_news = pd.DataFrame(user_news_data)
87
+ st.dataframe(df_user_news)
88
+ else:
89
+ st.warning("No recent news found about you.")
90
+ except Exception as e:
91
+ st.error(f"An error occurred during the search: {str(e)}")
92
+ else:
93
+ st.warning("Please enter your name or handle to search.")
94
+
95
+ # Google Search: Global News Categories
96
+ categories = ["Technology", "Sports", "Politics", "Entertainment", "Science"]
97
+
98
+ st.title("Global News Insights")
99
+
100
+ # News Results Dictionary
101
+ news_results = {}
102
+
103
+ try:
104
+ # Fetch News for Each Category
105
+ for category in categories:
106
+ try:
107
+ news_results[category] = list(search(f"latest {category} news", num_results=3))
108
+ except Exception as e:
109
+ news_results[category] = [f"Error fetching news: {str(e)}"]
110
+
111
+ # Display Results with Styled Buttons
112
+ for category, articles in news_results.items():
113
+ st.subheader(f"{category} News")
114
+ cols = st.columns(3) # Create 3 columns for the layout
115
+
116
+ if articles and "Error fetching news" not in articles[0]:
117
+ for idx, article in enumerate(articles):
118
+ with cols[idx % 3]: # Cycle through columns
119
+ st.markdown(
120
+ f"""
121
+ <div style="padding: 10px; border: 1px solid #ccc; border-radius: 5px; margin: 10px; text-align: center;">
122
+ <a href="{article}" target="_blank" style="text-decoration: none;">
123
+ <button style="background-color: #c4ccc8; color: white; border: none; padding: 10px 20px; text-align: center; display: inline-block; font-size: 16px; border-radius: 5px;">
124
+ {category}-{idx + 1}
125
+ </button>
126
+ </a>
127
+ </div>
128
+ """,
129
+ unsafe_allow_html=True,
130
+ )
131
+ else:
132
+ st.warning(f"Could not fetch news for **{category}**.")
133
+ except Exception as e:
134
+ st.error(f"An unexpected error occurred: {str(e)}")
135
+
136
+
137
+ # # Display results
138
+ # for category, articles in news_results.items():
139
+ # st.write(f"### Top News in {category}:")
140
+ # for idx, article in enumerate(articles, start=1):
141
+ # st.write(f"{idx}. [Read here]({article})")
142
+ # except Exception as e:
143
+ # st.error(f"An error occurred while fetching global news: {str(e)}")
144
+
145
+ # Document Search
146
+ st.subheader("3. Search Documents")
147
+ query = st.text_input("Enter your query (e.g., 'sports news', 'machine learning')")
148
+
149
+ if st.button("Search Documents"):
150
+ if query:
151
+ result = system.process_query(query)
152
+ if result["status"] == "success":
153
+ st.success(f"Query processed successfully!")
154
+ st.write("### Query Response:")
155
+ st.write(result["response"])
156
+ st.write("### Retrieved Documents:")
157
+ for idx, doc in enumerate(result["retrieved_documents"], start=1):
158
+ st.write(f"**Document {idx}:**")
159
+ st.write(doc[:500]) # Display the first 500 characters
160
+ st.write("### Blockchain Details:")
161
+ st.json(result["blockchain_details"])
162
+ elif result["status"] == "no_results":
163
+ st.warning("No relevant documents found for your query.")
164
+ elif result["status"] == "rejected":
165
+ st.error(result["message"])
166
+ else:
167
+ st.warning("Please enter a query to search.")
168
+
169
+ # Debugging Section
170
+ if st.checkbox("Show Debug Information"):
171
+ st.write(f"Total documents loaded: {len(system.retriever.documents)}")
app-news-content.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ from transformers import T5Tokenizer, T5ForConditionalGeneration
4
+ from collections import OrderedDict
5
+
6
+ # Load Models
7
+ @st.cache_resource
8
+ def load_models():
9
+ tokenizer = T5Tokenizer.from_pretrained("t5-small")
10
+ model = T5ForConditionalGeneration.from_pretrained("t5-small")
11
+ return tokenizer, model
12
+
13
+ t5_tokenizer, t5_model = load_models()
14
+
15
+ # API Key for NewsAPI
16
+ NEWS_API_KEY = "66db8e116ae24c49beef53e8b879600a"
17
+
18
+ # Fetch news articles based on the user query
19
+ @st.cache_data
20
+ def fetch_news(user_query):
21
+ NEWS_API_URL = "https://newsapi.org/v2/everything"
22
+ params = {
23
+ 'q': user_query,
24
+ 'apiKey': NEWS_API_KEY,
25
+ 'language': 'en',
26
+ 'pageSize': 10, # Fetch 10 articles
27
+ 'sortBy': 'relevance',
28
+ }
29
+ response = requests.get(NEWS_API_URL, params=params)
30
+ if response.status_code == 200:
31
+ articles = response.json().get('articles', [])
32
+ return [
33
+ {
34
+ 'title': article.get('title', 'No Title'),
35
+ 'description': article.get('description', 'No Description')
36
+ }
37
+ for article in articles if article.get('description')
38
+ ]
39
+ return []
40
+
41
+ # Summarize articles
42
+ def summarize_articles(articles):
43
+ summaries = []
44
+ for article in articles:
45
+ input_text = f"summarize: Title: {article['title']}. Description: {article['description']}"
46
+ inputs = t5_tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
47
+ outputs = t5_model.generate(inputs, max_length=100, min_length=50, length_penalty=2.0, num_beams=4, early_stopping=True)
48
+ summary = t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
49
+ summaries.append(remove_redundancy(summary))
50
+ return summaries
51
+
52
+ # Remove redundancy in summaries
53
+ def remove_redundancy(summary):
54
+ sentences = summary.split('. ')
55
+ seen = OrderedDict()
56
+ return '. '.join([seen.setdefault(s, s) for s in sentences if s not in seen])
57
+
58
+ # Generate catchy content based on all 10 summaries
59
+ def generate_catchy_content(summarized_content):
60
+ combined_prompt = f"""
61
+ Write a blog post based on these insights:
62
+ {', '.join(summarized_content)}
63
+ """
64
+ inputs = t5_tokenizer.encode(combined_prompt, return_tensors="pt", max_length=512, truncation=True)
65
+ outputs = t5_model.generate(inputs, max_length=300, length_penalty=2.0, num_beams=4, early_stopping=True)
66
+ return t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
67
+
68
+ # Main App
69
+ st.title("Hackathon-Ready News Summarizer & Blog Generator")
70
+ st.subheader("Enter a topic to fetch news, summarize and generate engaging content!")
71
+
72
+ # Query Input
73
+ user_query = st.text_input("Enter a query (e.g., 'AI trends', 'Climate change impact'):")
74
+
75
+ if st.button("Fetch, Summarize and Generate"):
76
+ if user_query:
77
+ st.info(f"Fetching articles related to: {user_query}")
78
+ with st.spinner("Fetching news articles..."):
79
+ articles = fetch_news(user_query)
80
+ if articles:
81
+ st.success(f"Fetched {len(articles)} articles!")
82
+
83
+ # Display only the first 4 articles
84
+ st.subheader("Fetched Articles")
85
+ for i, article in enumerate(articles[:4], 1):
86
+ st.write(f"**Article {i}:** {article['title']}")
87
+ st.write(f"*Description:* {article['description']}")
88
+
89
+ # Summarize All Articles
90
+ st.info("Summarizing articles...")
91
+ summaries = summarize_articles(articles) # Summarize all 10 articles
92
+ st.subheader("Summarized Articles")
93
+ for i, summary in enumerate(summaries[:4], 1): # Display summaries for first 4 articles
94
+ st.write(f"**Summary {i}:** {summary}")
95
+
96
+ # Generate Blog Post
97
+ st.info("Generating blog post...")
98
+ generated_content = generate_catchy_content(summaries) # Use all 10 summaries
99
+ st.subheader("Generated Blog Post")
100
+ st.write(generated_content)
101
+
102
+ else:
103
+ st.warning("No articles found. Try a different query.")
104
+ else:
105
+ st.error("Please enter a query to proceed!")