smokxy commited on
Commit
c930c7f
Β·
1 Parent(s): 43ec9c8

threading implemented, Backend configured

Browse files
.env DELETED
@@ -1,5 +0,0 @@
1
- # Gemini
2
- GEMINI_API_KEY=YOUR_API_KEY
3
-
4
- # MongoDB
5
- MONGO_URI=your_mongo_uri
 
 
 
 
 
 
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .env
2
+ __pycache__/
paperflux/main.py CHANGED
@@ -1,30 +1,24 @@
1
- import signal
2
- import sys
3
  from src.scheduler.jobs import PaperProcessingScheduler
4
  from src.web.app import PaperFluxUI
5
- import threading
6
 
7
- def signal_handler(signum, frame):
8
- print("\nShutting down gracefully...")
9
- scheduler.stop()
10
- sys.exit(0)
11
 
12
- def main():
13
- global scheduler
14
-
15
- # Set up signal handlers
16
- signal.signal(signal.SIGINT, signal_handler)
17
- signal.signal(signal.SIGTERM, signal_handler)
18
 
19
- # Start the scheduler in a background thread
20
- scheduler = PaperProcessingScheduler()
21
- scheduler_thread = threading.Thread(target=scheduler.start, daemon=True)
22
- scheduler_thread.start()
23
 
24
- # Create and launch the Gradio interface
25
- ui = PaperFluxUI()
26
- interface = ui.create_interface()
27
- interface.launch(server_name="0.0.0.0", share=True)
28
-
29
- if __name__ == "__main__":
30
- main()
 
1
+ import logging
 
2
  from src.scheduler.jobs import PaperProcessingScheduler
3
  from src.web.app import PaperFluxUI
4
+ import streamlit as st
5
 
6
+ # logger
7
+ logging.basicConfig(level=logging.INFO)
8
+ logger = logging.getLogger("paperflux.main")
9
+ logger.info("Initializing PaperFlux")
10
 
11
+ # Initialize scheduler
12
+ logger.info("Creating scheduler")
13
+ scheduler = PaperProcessingScheduler()
 
 
 
14
 
15
+ # Start scheduler
16
+ logger.info("Starting scheduler")
17
+ scheduler.start()
18
+ logger.info("Scheduler started")
19
 
20
+ # Create and render UI
21
+ logger.info("Creating UI")
22
+ ui = PaperFluxUI(scheduler=scheduler)
23
+ logger.info("Rendering UI")
24
+ ui.render_app()
 
 
paperflux/src/config/settings.py CHANGED
@@ -1,9 +1,9 @@
1
  import os
2
 
3
- MONGODB_URI = "mongodb+srv:"
4
  DB_NAME = "papers_summary_database"
5
  COLLECTION_NAME = "papers"
6
  HF_API_URL = "https://huggingface.co/api/daily_papers"
7
  PDF_BASE_URL = "https://arxiv.org/pdf/{id}.pdf"
8
  TEMP_DIR = "temp_papers"
9
- GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 
1
  import os
2
 
3
+ MONGODB_URI = ""
4
  DB_NAME = "papers_summary_database"
5
  COLLECTION_NAME = "papers"
6
  HF_API_URL = "https://huggingface.co/api/daily_papers"
7
  PDF_BASE_URL = "https://arxiv.org/pdf/{id}.pdf"
8
  TEMP_DIR = "temp_papers"
9
+ GEMINI_API_KEY = ""
paperflux/src/models/__init__.py DELETED
File without changes
paperflux/src/models/{paper.py β†’ models.py} RENAMED
File without changes
paperflux/src/scheduler/__init__.py DELETED
File without changes
paperflux/src/scheduler/jobs.py CHANGED
@@ -5,6 +5,8 @@ import os
5
  from src.services.paper_fetcher import PaperFetcher
6
  from src.services.paper_analyzer import PaperAnalyzer
7
  from src.services.database import DatabaseService
 
 
8
 
9
  class PaperProcessingScheduler:
10
  def __init__(self):
@@ -13,6 +15,52 @@ class PaperProcessingScheduler:
13
  self.analyzer = PaperAnalyzer()
14
  self.db = DatabaseService()
15
  self._running = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  async def process_papers(self):
18
  if self._running:
@@ -24,23 +72,29 @@ class PaperProcessingScheduler:
24
 
25
  try:
26
  self.db.clear_collection()
 
27
  papers = await self.fetcher.fetch_papers()
28
 
29
- for paper in papers:
30
- if not self._running: # Check if we should stop
31
- break
32
-
33
- pdf_path = await self.fetcher.download_paper(paper)
34
- if pdf_path:
35
- try:
36
- explanation = self.analyzer.analyze_paper(pdf_path)
37
- paper_obj = self.fetcher.parse_paper_data(paper)
38
- paper_obj.explanation = explanation
39
- self.db.insert_paper(paper_obj)
40
- finally:
41
- if os.path.exists(pdf_path):
42
- os.remove(pdf_path)
 
 
43
 
 
 
 
44
  except Exception as e:
45
  print(f"Error in paper processing: {str(e)}")
46
  finally:
 
5
  from src.services.paper_fetcher import PaperFetcher
6
  from src.services.paper_analyzer import PaperAnalyzer
7
  from src.services.database import DatabaseService
8
+ import threading
9
+ from concurrent.futures import ThreadPoolExecutor
10
 
11
  class PaperProcessingScheduler:
12
  def __init__(self):
 
15
  self.analyzer = PaperAnalyzer()
16
  self.db = DatabaseService()
17
  self._running = False
18
+ self.paper_processed_event = asyncio.Event()
19
+ self._lock = threading.Lock()
20
+ self.refresh_callbacks = []
21
+
22
+ def register_refresh_callback(self, callback):
23
+ """Register a callback to be called when a paper is processed"""
24
+ with self._lock:
25
+ self.refresh_callbacks.append(callback)
26
+
27
+ def unregister_refresh_callback(self, callback):
28
+ """Unregister a callback"""
29
+ with self._lock:
30
+ if callback in self.refresh_callbacks:
31
+ self.refresh_callbacks.remove(callback)
32
+
33
+ def _notify_refresh(self):
34
+ """Notify all registered callbacks that a paper has been processed"""
35
+ with self._lock:
36
+ callbacks = list(self.refresh_callbacks)
37
+
38
+ for callback in callbacks:
39
+ try:
40
+ callback()
41
+ except Exception as e:
42
+ print(f"Error in refresh callback: {str(e)}")
43
+
44
+ def analyze_and_store_paper(self, paper_entry, pdf_path):
45
+ """Analyze a paper and store it in the database"""
46
+ try:
47
+ explanation = self.analyzer.analyze_paper(pdf_path)
48
+ paper_obj = self.fetcher.parse_paper_data(paper_entry)
49
+ paper_obj.explanation = explanation
50
+ self.db.insert_paper(paper_obj)
51
+
52
+ self._notify_refresh()
53
+
54
+ return True
55
+ except Exception as e:
56
+ print(f"Error analyzing paper {paper_entry['paper']['id']}: {str(e)}")
57
+ return False
58
+ finally:
59
+ if os.path.exists(pdf_path):
60
+ try:
61
+ os.remove(pdf_path)
62
+ except:
63
+ pass
64
 
65
  async def process_papers(self):
66
  if self._running:
 
72
 
73
  try:
74
  self.db.clear_collection()
75
+ # Fetch list of all papers
76
  papers = await self.fetcher.fetch_papers()
77
 
78
+ # Download all papers in parallel(BG thread)
79
+ paper_paths = await self.fetcher.download_papers(papers)
80
+
81
+ with ThreadPoolExecutor(max_workers=2) as executor:
82
+ futures = []
83
+
84
+ for paper in papers:
85
+ paper_id = paper["paper"]["id"]
86
+ if paper_id in paper_paths:
87
+ futures.append(
88
+ executor.submit(
89
+ self.analyze_and_store_paper,
90
+ paper,
91
+ paper_paths[paper_id]
92
+ )
93
+ )
94
 
95
+ for future in futures:
96
+ future.result()
97
+
98
  except Exception as e:
99
  print(f"Error in paper processing: {str(e)}")
100
  finally:
paperflux/src/services/database.py CHANGED
@@ -1,21 +1,70 @@
 
1
  from pymongo import MongoClient
2
  from src.config.settings import MONGODB_URI, DB_NAME, COLLECTION_NAME
3
- from src.models.paper import Paper
 
4
 
5
  class DatabaseService:
 
 
 
 
 
 
 
 
 
 
6
  def __init__(self):
 
 
7
  self.client = MongoClient(MONGODB_URI)
8
  self.db = self.client[DB_NAME]
9
  self.collection = self.db[COLLECTION_NAME]
 
 
 
 
10
 
11
  def clear_collection(self):
12
  self.collection.delete_many({})
 
 
 
13
 
14
  def insert_paper(self, paper: Paper):
15
- return self.collection.insert_one(paper.to_dict())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
- def get_all_papers(self):
18
- return list(self.collection.find())
 
 
 
 
19
 
20
  def get_paper_by_id(self, paper_id: str):
21
- return self.collection.find_one({"paper_id": paper_id})
 
 
 
 
 
 
 
 
1
+ import time
2
  from pymongo import MongoClient
3
  from src.config.settings import MONGODB_URI, DB_NAME, COLLECTION_NAME
4
+ from src.models.models import Paper
5
+ import threading
6
 
7
  class DatabaseService:
8
+ _instance = None
9
+ _lock = threading.Lock()
10
+
11
+ def __new__(cls):
12
+ with cls._lock:
13
+ if cls._instance is None:
14
+ cls._instance = super(DatabaseService, cls).__new__(cls)
15
+ cls._instance._initialized = False
16
+ return cls._instance
17
+
18
  def __init__(self):
19
+ if self._initialized:
20
+ return
21
  self.client = MongoClient(MONGODB_URI)
22
  self.db = self.client[DB_NAME]
23
  self.collection = self.db[COLLECTION_NAME]
24
+ self._cache = {}
25
+ self._cache_timestamp = 0
26
+ self._cache_lock = threading.Lock()
27
+ self._initialized = True
28
 
29
  def clear_collection(self):
30
  self.collection.delete_many({})
31
+ with self._cache_lock:
32
+ self._cache = {}
33
+ self._cache_timestamp = 0
34
 
35
  def insert_paper(self, paper: Paper):
36
+ result = self.collection.insert_one(paper.to_dict())
37
+ # Invalidate cache
38
+ with self._cache_lock:
39
+ self._cache = {}
40
+ self._cache_timestamp = 0
41
+ return result
42
+
43
+ def get_all_papers(self, max_cache_age_seconds=10):
44
+ """Get all papers, with caching for better performance"""
45
+ current_time = time.time()
46
+
47
+ # check cache validity
48
+ with self._cache_lock:
49
+ if self._cache and current_time - self._cache_timestamp <= max_cache_age_seconds:
50
+ return self._cache.get('all_papers', [])
51
+
52
+ # cache miss
53
+ papers = list(self.collection.find())
54
 
55
+ # update cache
56
+ with self._cache_lock:
57
+ self._cache['all_papers'] = papers
58
+ self._cache_timestamp = current_time
59
+
60
+ return papers
61
 
62
  def get_paper_by_id(self, paper_id: str):
63
+ """Get a paper by ID with caching"""
64
+ with self._cache_lock:
65
+ if 'all_papers' in self._cache:
66
+ for paper in self._cache['all_papers']:
67
+ if paper['id'] == paper_id:
68
+ return paper
69
+ # cache miss
70
+ return self.collection.find_one({'id': paper_id})
paperflux/src/services/paper_fetcher.py CHANGED
@@ -4,7 +4,7 @@ import asyncio
4
  from datetime import datetime
5
  from typing import List, Tuple, Optional
6
  from src.config.settings import HF_API_URL, PDF_BASE_URL, TEMP_DIR
7
- from src.models.paper import Paper
8
 
9
  class PaperFetcher:
10
  def __init__(self):
@@ -47,43 +47,20 @@ class PaperFetcher:
47
  print(f"Error downloading {paper_id}: {str(e)}")
48
  return None
49
 
50
- async def download_all_papers(self, papers: List[dict]) -> List[Tuple[str, bool]]:
51
  """Download all papers in parallel."""
52
- async with aiohttp.ClientSession() as session:
53
- tasks = []
54
- for paper in papers:
55
- paper_id = paper["paper"]["id"]
56
- pdf_url = PDF_BASE_URL.format(id=paper_id)
57
- clean_id = paper_id.replace("/", "_")
58
- filename = f"{datetime.now().date()}_{clean_id}.pdf"
59
- filepath = os.path.join(TEMP_DIR, filename)
60
-
61
- tasks.append(self.download_single_paper(session, paper_id, pdf_url, filepath))
62
-
63
- results = await asyncio.gather(*tasks)
64
- successful = sum(1 for status in results if status[1])
65
- print(f"Downloaded {successful}/{len(papers)} papers successfully")
66
- return results
67
 
68
- async def download_single_paper(
69
- self,
70
- session: aiohttp.ClientSession,
71
- paper_id: str,
72
- pdf_url: str,
73
- filepath: str
74
- ) -> Tuple[str, bool]:
75
- """Download a single paper with the given session."""
76
- try:
77
- async with session.get(pdf_url) as response:
78
- if response.status == 200:
79
- content = await response.read()
80
- with open(filepath, "wb") as f:
81
- f.write(content)
82
- return (paper_id, True)
83
- return (paper_id, False)
84
- except Exception as e:
85
- print(f"Error downloading {paper_id}: {str(e)}")
86
- return (paper_id, False)
87
 
88
  def parse_paper_data(self, paper_entry: dict) -> Paper:
89
  """Convert raw paper data to Paper model."""
 
4
  from datetime import datetime
5
  from typing import List, Tuple, Optional
6
  from src.config.settings import HF_API_URL, PDF_BASE_URL, TEMP_DIR
7
+ from paperflux.src.models.models import Paper
8
 
9
  class PaperFetcher:
10
  def __init__(self):
 
47
  print(f"Error downloading {paper_id}: {str(e)}")
48
  return None
49
 
50
+ async def download_papers(self, papers: List[dict]) -> List[Tuple[str, bool]]:
51
  """Download all papers in parallel."""
52
+ tasks = []
53
+ for paper in papers:
54
+ tasks.append(self.download_paper(paper))
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
+ results = await asyncio.gather(*tasks)
57
+ paper_paths = {}
58
+ for paper, file_path in zip(papers, results):
59
+ if file_path:
60
+ paper_paths[paper["paper"]["id"]] = file_path
61
+ successful = sum(1 for status in results if status[1])
62
+ print(f"Downloaded {successful}/{len(papers)} papers successfully")
63
+ return paper_paths
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  def parse_paper_data(self, paper_entry: dict) -> Paper:
66
  """Convert raw paper data to Paper model."""
paperflux/src/web/__init__.py DELETED
File without changes
paperflux/src/web/app.py CHANGED
@@ -1,95 +1,237 @@
1
- import gradio as gr
 
 
 
 
 
2
  from src.services.database import DatabaseService
3
 
 
 
4
  class PaperFluxUI:
5
- def __init__(self):
 
 
6
  self.db = DatabaseService()
7
- self.papers = self.db.get_all_papers()
8
- self.current_index = 0
9
-
10
- def get_current_paper(self):
11
- if not self.papers:
12
- return {
13
- "title": "No papers available",
14
- "explanation": "Please wait for papers to be processed.",
15
- "pdf_url": ""
16
- }
17
- paper = self.papers[self.current_index]
18
- authors = ", ".join([author["name"] for author in paper["authors"]])
19
- title = f"# {paper['title']}\n\nAuthors: {authors}"
20
- return {
21
- "title": title,
22
- "explanation": paper["explanation"],
23
- "pdf_url": paper["pdf_url"]
 
 
 
 
 
24
  }
25
-
26
- def next_paper(self):
27
- if self.current_index < len(self.papers) - 1:
28
- self.current_index += 1
29
- return self.get_current_paper()
30
-
31
- def previous_paper(self):
32
- if self.current_index > 0:
33
- self.current_index -= 1
34
- return self.get_current_paper()
35
-
36
- def create_interface(self):
37
- with gr.Blocks(theme=gr.themes.Base()) as interface:
38
- title = gr.Markdown()
39
- explanation = gr.Markdown()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
- # Create an HTML component for the download link
42
- download_html = gr.HTML()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
- with gr.Row():
45
- prev_btn = gr.Button("Previous Paper")
46
- next_btn = gr.Button("Next Paper")
47
-
48
- def update_ui(paper_data):
49
- download_link = f"""
50
- <div style="text-align: center; margin-top: 10px;">
51
- <a href="{paper_data['pdf_url']}" target="_blank"
52
- style="text-decoration: none;">
53
- <button style="padding: 10px 20px; background-color: #4CAF50;
54
- color: white; border: none; border-radius: 5px;
55
- cursor: pointer;">
56
- Download Paper
57
- </button>
58
- </a>
59
- </div>
60
- """
61
- return (
62
- paper_data["title"],
63
- paper_data["explanation"],
64
- download_link
65
- )
66
-
67
- next_btn.click(
68
- fn=lambda: update_ui(self.next_paper()),
69
- outputs=[title, explanation, download_html]
70
  )
71
 
72
- prev_btn.click(
73
- fn=lambda: update_ui(self.previous_paper()),
74
- outputs=[title, explanation, download_html]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  )
76
-
77
- # Initialize with first paper
78
- paper_data = self.get_current_paper()
79
- init_download_link = f"""
80
- <div style="text-align: center; margin-top: 10px;">
81
- <a href="{paper_data['pdf_url']}" target="_blank"
82
- style="text-decoration: none;">
83
- <button style="padding: 10px 20px; background-color: #4CAF50;
84
- color: white; border: none; border-radius: 5px;
85
- cursor: pointer;">
86
- Download Paper
87
- </button>
88
- </a>
89
- </div>
90
- """
91
- title.value = paper_data["title"]
92
- explanation.value = paper_data["explanation"]
93
- download_html.value = init_download_link
94
-
95
- return interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import asyncio
3
+ import threading
4
+ import time
5
+ import logging
6
+ from src.scheduler.jobs import PaperProcessingScheduler
7
  from src.services.database import DatabaseService
8
 
9
+ logger = logging.getLogger("paperflux.app")
10
+
11
  class PaperFluxUI:
12
+ def __init__(self, scheduler: PaperProcessingScheduler):
13
+ logger.info("Initializing PaperFluxUI")
14
+ self.scheduler = scheduler
15
  self.db = DatabaseService()
16
+ self.callback_registered = False
17
+
18
+ # Register callback if not already done
19
+ if not self.callback_registered:
20
+ logger.info("Registering refresh callback")
21
+ self.scheduler.register_refresh_callback(self.refresh_callback)
22
+ self.callback_registered = True
23
+
24
+ # Set up the page configuration
25
+ logger.info("Setting up page config")
26
+ st.set_page_config(
27
+ page_title="PaperFlux - Research Paper Summaries",
28
+ page_icon="πŸ“š",
29
+ layout="wide",
30
+ initial_sidebar_state="expanded"
31
+ )
32
+
33
+ # Add custom CSS
34
+ st.markdown("""
35
+ <style>
36
+ .main {
37
+ padding: 2rem;
38
  }
39
+ .paper-title {
40
+ font-size: 1.8rem;
41
+ font-weight: bold;
42
+ margin-bottom: 1rem;
43
+ }
44
+ .author-list {
45
+ margin-bottom: 1rem;
46
+ color: #666;
47
+ }
48
+ .summary-header {
49
+ font-size: 1.3rem;
50
+ font-weight: bold;
51
+ margin-top: 1rem;
52
+ margin-bottom: 0.5rem;
53
+ }
54
+ .explanation-header {
55
+ font-size: 1.5rem;
56
+ font-weight: bold;
57
+ margin-top: 2rem;
58
+ margin-bottom: 1rem;
59
+ }
60
+ .paper-date {
61
+ color: #888;
62
+ font-style: italic;
63
+ }
64
+ .paper-container {
65
+ padding: 1.5rem;
66
+ border-radius: 10px;
67
+ background-color: #f8f9fa;
68
+ margin-bottom: 2rem;
69
+ }
70
+ </style>
71
+ """, unsafe_allow_html=True)
72
+ logger.info("PaperFluxUI initialization complete")
73
+
74
+ def refresh_callback(self):
75
+ """Callback that will be called when a new paper is processed"""
76
+ logger.info("Refresh callback triggered")
77
+ # Use Streamlit's session state to signal a refresh is needed
78
+ if 'needs_rerun' not in st.session_state:
79
+ st.session_state['needs_rerun'] = True
80
+
81
+ def render_app(self):
82
+ """Render the main app content"""
83
+ # App header
84
+ col1, col2 = st.columns([5, 1])
85
+ with col1:
86
+ st.title("πŸ“š PaperFlux")
87
+ st.subheader("Curated Research Papers with AI-Generated Summaries")
88
+ with col2:
89
+ if st.button("Refresh Data", key="refresh_button"):
90
+ # Clear session state
91
+ if 'current_paper_index' in st.session_state:
92
+ del st.session_state['current_paper_index']
93
+ # Trigger rerun
94
+ st.rerun()
95
+
96
+ # Get papers from database
97
+ papers = self.db.get_all_papers()
98
+
99
+ # Show processing status if no papers
100
+ if not papers:
101
+ st.info("⏳ Waiting for papers to be processed. Please wait or check back later.")
102
 
103
+ # Add progress indicator
104
+ if self.scheduler._running:
105
+ st.markdown("### πŸ”„ Paper processing is currently running...")
106
+ progress = st.progress(0)
107
+ for i in range(100):
108
+ # Simulating progress as we don't know the actual progress
109
+ time.sleep(0.1)
110
+ progress.progress(i + 1)
111
+ # Break if papers are available or processing stopped
112
+ updated_papers = self.db.get_all_papers(max_cache_age_seconds=1)
113
+ if updated_papers or not self.scheduler._running:
114
+ if updated_papers:
115
+ st.success("βœ… Papers have been processed!")
116
+ time.sleep(1)
117
+ st.rerun()
118
+ break
119
+ else:
120
+ st.warning("Paper processing is not currently running. It may be scheduled for midnight.")
121
+
122
+ # Add manual trigger button
123
+ if st.button("Process Papers Now", key="process_now"):
124
+ st.info("Starting paper processing...")
125
+ # Use threading to avoid blocking the Streamlit interface
126
+ threading.Thread(
127
+ target=lambda: asyncio.run(self.scheduler.process_papers()),
128
+ daemon=True
129
+ ).start()
130
+ st.rerun()
131
+
132
+ return
133
+
134
+ # Sidebar for navigation
135
+ with st.sidebar:
136
+ st.header("Navigation")
137
 
138
+ # Store current paper index in session state
139
+ if 'current_paper_index' not in st.session_state:
140
+ st.session_state['current_paper_index'] = 0
141
+
142
+ # Paper selection widget
143
+ paper_titles = [p['title'] for p in papers]
144
+ selected_title = st.selectbox(
145
+ "Select Paper",
146
+ paper_titles,
147
+ index=st.session_state['current_paper_index']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  )
149
 
150
+ # Update current paper index when selection changes
151
+ st.session_state['current_paper_index'] = paper_titles.index(selected_title)
152
+
153
+ # Navigation buttons
154
+ col1, col2 = st.columns(2)
155
+ with col1:
156
+ prev_disabled = st.session_state['current_paper_index'] <= 0
157
+ if st.button("Previous", disabled=prev_disabled):
158
+ st.session_state['current_paper_index'] -= 1
159
+ st.rerun()
160
+
161
+ with col2:
162
+ next_disabled = st.session_state['current_paper_index'] >= len(papers) - 1
163
+ if st.button("Next", disabled=next_disabled):
164
+ st.session_state['current_paper_index'] += 1
165
+ st.rerun()
166
+
167
+ st.markdown(f"Paper {st.session_state['current_paper_index'] + 1} of {len(papers)}")
168
+
169
+ # Additional information
170
+ st.subheader("Information")
171
+ st.info(
172
+ "Papers are automatically refreshed daily at midnight. "
173
+ "The database is cleared and new papers are downloaded and processed."
174
  )
175
+
176
+ # Show processing date
177
+ st.caption("Last Updated:")
178
+ if papers and 'processed_at' in papers[st.session_state['current_paper_index']]:
179
+ processed_time = papers[st.session_state['current_paper_index']]['processed_at']
180
+ st.caption(f"{processed_time.strftime('%Y-%m-%d %H:%M:%S')} UTC")
181
+
182
+ # Display selected paper
183
+ if papers:
184
+ current_paper = papers[st.session_state['current_paper_index']]
185
+ self.display_paper(current_paper)
186
+
187
+ def display_paper(self, paper):
188
+ """Display a single paper with all its details"""
189
+ # Paper title
190
+ st.markdown(f"<h1 class='paper-title'>{paper['title']}</h1>", unsafe_allow_html=True)
191
+
192
+ # Publication date and authors
193
+ col1, col2 = st.columns([1, 3])
194
+ with col1:
195
+ published_date = paper.get('published_at', '')
196
+ if published_date:
197
+ try:
198
+ if isinstance(published_date, str):
199
+ formatted_date = published_date.split('T')[0]
200
+ else:
201
+ formatted_date = published_date.strftime("%Y-%m-%d")
202
+ st.markdown(f"**Published:** {formatted_date}")
203
+ except:
204
+ st.markdown(f"**Published:** {published_date}")
205
+
206
+ with col2:
207
+ # Format authors
208
+ authors = paper.get('authors', [])
209
+ if authors:
210
+ if isinstance(authors[0], dict) and 'name' in authors[0]:
211
+ author_names = [a.get('name', '') for a in authors]
212
+ else:
213
+ author_names = [str(a) for a in authors]
214
+
215
+ st.markdown(f"**Authors:** {', '.join(author_names)}")
216
+
217
+ # PDF download button
218
+ if paper.get('pdf_url'):
219
+ st.markdown("### πŸ“„ Paper Download")
220
+ st.markdown(f"[Download Original PDF]({paper['pdf_url']})")
221
+
222
+ # Paper summary
223
+ st.markdown("<h2 class='summary-header'>Abstract</h2>", unsafe_allow_html=True)
224
+ st.markdown(paper.get('summary', 'No summary available.'))
225
+
226
+ # Paper explanation
227
+ if paper.get('explanation'):
228
+ st.markdown("<h2 class='explanation-header'>AI Analysis</h2>", unsafe_allow_html=True)
229
+
230
+ with st.expander("Show Full Analysis", expanded=True):
231
+ st.markdown(paper['explanation'])
232
+ else:
233
+ st.warning("Detailed analysis not available for this paper.")
234
+
235
+ # Footer
236
+ st.markdown("---")
237
+ st.caption("PaperFlux - Powered by Gemini")
poetry.lock CHANGED
The diff for this file is too large to render. See raw diff
 
pyproject.toml CHANGED
@@ -10,7 +10,6 @@ license = {text = "MIT"}
10
  readme = "README.md"
11
  requires-python = ">=3.10"
12
  dependencies = [
13
- "gradio (>=5.16.0,<6.0.0)",
14
  "google-generativeai (>=0.8.4,<0.9.0)",
15
  "redis (>=5.2.1,<6.0.0)",
16
  "python-dotenv (>=1.0.1,<2.0.0)",
@@ -23,7 +22,9 @@ dependencies = [
23
  "pymongo (>=4.11.1,<5.0.0)",
24
  "flask (>=3.1.0,<4.0.0)",
25
  "tqdm (>=4.67.1,<5.0.0)",
26
- "aiohttp (>=3.11.12,<4.0.0)"
 
 
27
  ]
28
 
29
 
 
10
  readme = "README.md"
11
  requires-python = ">=3.10"
12
  dependencies = [
 
13
  "google-generativeai (>=0.8.4,<0.9.0)",
14
  "redis (>=5.2.1,<6.0.0)",
15
  "python-dotenv (>=1.0.1,<2.0.0)",
 
22
  "pymongo (>=4.11.1,<5.0.0)",
23
  "flask (>=3.1.0,<4.0.0)",
24
  "tqdm (>=4.67.1,<5.0.0)",
25
+ "aiohttp (>=3.11.12,<4.0.0)",
26
+ "dotenv (>=0.9.9,<0.10.0)",
27
+ "streamlit (>=1.42.2,<2.0.0)"
28
  ]
29
 
30