acecalisto3 commited on
Commit
09095fe
·
verified ·
1 Parent(s): ac588ec

Update app2.py

Browse files
Files changed (1) hide show
  1. app2.py +135 -324
app2.py CHANGED
@@ -1,235 +1,31 @@
1
  import asyncio
2
- import csv
3
- import hashlib
4
- import os
5
- from typing import List, Tuple, Dict, Any, Optional
6
- import datetime
7
- import signal
8
- import feedparser
9
- import aiohttp
10
  import gradio as gr
11
- from huggingface_hub import InferenceClient
12
- from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime
13
  from sqlalchemy.exc import SQLAlchemyError
14
- from sqlalchemy.orm import declarative_base, sessionmaker
15
- from urllib.parse import urljoin
 
 
16
  import logging
17
- from sqlalchemy.orm import Session
18
- from sqlalchemy.future import select
19
- from bs4 import BeautifulSoup
20
- import validators
21
 
22
- Base = declarative_base()
23
-
24
- class Article(Base):
25
- __tablename__ = 'articles'
26
- id = Column(Integer, primary_key=True)
27
- url = Column('url', String(2048), nullable=False, unique=True)
28
- title = Column('title', String(255))
29
- content = Column('content', Text())
30
- hash_value = Column('hash', String(32))
31
- timestamp = Column('timestamp', DateTime(), default=datetime.datetime.utcnow)
32
-
33
- # Configure logging
34
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
35
  logger = logging.getLogger(__name__)
36
 
37
- # Configuration
38
- HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
39
- DEFAULT_MONITORING_INTERVAL = 300
40
- MAX_MONITORING_INTERVAL = 600
41
- CHANGE_FREQUENCY_THRESHOLD = 3
42
-
43
- # Global variables
44
- monitoring_tasks = {}
45
- url_monitoring_intervals = {}
46
- change_counts = {}
47
- history = []
48
- db_session = None # Initialize db_session globally
49
-
50
- async def create_db_engine(db_url):
51
- try:
52
- engine = create_engine(db_url)
53
- Base.metadata.create_all(engine)
54
- return engine, sessionmaker(bind=engine)
55
- except SQLAlchemyError as e:
56
- logger.error(f"Database error: {e}")
57
- raise
58
-
59
-
60
- def sanitize_url(url: str) -> str:
61
- return url if validators.url(url) else None
62
-
63
-
64
- async def fetch_url_content(url: str,
65
- session: aiohttp.ClientSession) -> Tuple[str, str]:
66
- async with session.get(url) as response:
67
- content = await response.text()
68
- soup = BeautifulSoup(content, 'html.parser')
69
- title = soup.title.string if soup.title else "No Title"
70
- return title, content
71
-
72
-
73
- async def save_to_database(session, url: str, title: str, content: str,
74
- hash: str):
75
- try:
76
- article = Article(url=url, title=title, content=content, hash=hash)
77
- session.add(article)
78
- await session.commit()
79
- except SQLAlchemyError as e:
80
- logger.error(f"Database error: {e}")
81
- await session.rollback()
82
-
83
-
84
- async def save_to_csv(storage_location: str, url: str, title: str,
85
- content: str, timestamp: datetime.datetime):
86
- try:
87
- os.makedirs(os.path.dirname(storage_location), exist_ok=True)
88
- with open(storage_location, "a", newline='',
89
- encoding="utf-8") as csvfile:
90
- csv_writer = csv.writer(csvfile)
91
- csv_writer.writerow([
92
- timestamp.strftime("%Y-%m-%d %H:%M:%S"), url, title, content
93
- ])
94
- except IOError as e:
95
- logger.error(f"IOError saving to CSV: {e}")
96
- except Exception as e:
97
- logger.error(f"Unexpected error saving to CSV: {e}")
98
-
99
-
100
- async def monitor_url(url: str, interval: int, storage_location: str,
101
- feed_rss: bool, db_session):
102
- previous_hash = ""
103
- async with aiohttp.ClientSession() as session:
104
- while True:
105
- try:
106
- title, content = await fetch_url_content(url, session)
107
- current_hash = hashlib.md5(content.encode('utf-8')).hexdigest()
108
-
109
- if current_hash != previous_hash:
110
- previous_hash = current_hash
111
- timestamp = datetime.datetime.now()
112
-
113
- if feed_rss:
114
- try:
115
- await save_to_database(db_session, url, title,
116
- content, current_hash)
117
- except SQLAlchemyError as e:
118
- logger.error(
119
- f"Database error while saving {url}: {e}")
120
-
121
- if storage_location:
122
- await save_to_csv(storage_location, url, title,
123
- content, timestamp)
124
-
125
- history.append(
126
- f"Change detected at {url} on {timestamp.strftime('%Y-%m-%d %H:%M:%S')}"
127
- )
128
- logger.info(f"Change detected at {url}")
129
-
130
- change_counts[url] = change_counts.get(url, 0) + 1
131
- if change_counts[url] >= CHANGE_FREQUENCY_THRESHOLD:
132
- interval = max(60, interval // 2)
133
- else:
134
- change_counts[url] = 0
135
- interval = min(interval * 2, MAX_MONITORING_INTERVAL)
136
-
137
- url_monitoring_intervals[url] = interval
138
- except aiohttp.ClientError as e:
139
- logger.error(f"Network error monitoring {url}: {e}")
140
- history.append(f"Network error monitoring {url}: {e}")
141
- except Exception as e:
142
- logger.error(f"Unexpected error monitoring {url}: {e}")
143
- history.append(f"Unexpected error monitoring {url}: {e}")
144
-
145
- await asyncio.sleep(interval)
146
-
147
-
148
- async def start_monitoring(urls: List[str], storage_location: str,
149
- feed_rss: bool):
150
- global db_session
151
- for url in urls:
152
- if url not in monitoring_tasks:
153
- sanitized_url = sanitize_url(url)
154
- if sanitized_url:
155
- task = asyncio.create_task(
156
- monitor_url(sanitized_url, DEFAULT_MONITORING_INTERVAL,
157
- storage_location, feed_rss, db_session))
158
- monitoring_tasks[sanitized_url] = task
159
- else:
160
- logger.warning(f"Invalid URL: {url}")
161
- history.append(f"Invalid URL: {url}")
162
- return "Monitoring started"
163
-
164
-
165
- async def cleanup_resources(url: str):
166
- # Add any cleanup logic here, e.g., closing database connections
167
- pass
168
-
169
-
170
- def stop_monitoring(url: str):
171
- if url in monitoring_tasks:
172
- monitoring_tasks[url].cancel()
173
- asyncio.create_task(cleanup_resources(url))
174
- del monitoring_tasks[url]
175
- return "Monitoring stopped"
176
-
177
-
178
- def generate_rss_feed():
179
- session = Session()
180
- try:
181
- articles = session.query(Article).order_by(
182
- Article.timestamp.desc()).limit(20).all()
183
- feed = feedparser.FeedParserDict()
184
- feed['title'] = 'Website Changes Feed'
185
- feed['link'] = 'http://yourwebsite.com/feed' # Replace if needed
186
- feed['description'] = 'Feed of changes detected on monitored websites.'
187
- feed['entries'] = [{
188
- 'title': article.title,
189
- 'link': article.url,
190
- 'description': article.content,
191
- 'published': article.timestamp
192
- } for article in articles]
193
- return feedparser.FeedGenerator().feed_from_dictionary(
194
- feed).writeString('utf-8')
195
- except SQLAlchemyError as e:
196
- logger.error(f"Database error: {e}")
197
- return None
198
- finally:
199
- session.close()
200
-
201
-
202
- async def chatbot_response(message: str, history: List[Tuple[str, str]]):
203
- try:
204
- client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1",
205
- token=HUGGINGFACE_API_KEY)
206
- response = await client.text_generation(message, max_new_tokens=100)
207
-
208
- history.append((message, response[0]['generated_text']))
209
-
210
- return history, history
211
- except Exception as e:
212
- logger.error(f"Chatbot error: {e}")
213
- history.append((message,
214
- "Error: Could not get a response from the chatbot."))
215
- return history, history
216
-
217
-
218
  async def update_db_status(db_status_textbox):
219
  while True:
220
  try:
221
- await db_session.execute("SELECT 1")
222
- db_status = "Connected"
223
  except SQLAlchemyError:
224
- db_status = "Disconnected"
225
- yield db_status # Use a generator to update the textbox
226
- await asyncio.sleep(60) # Check every minute
227
-
228
 
229
- async def update_feed_content(): # Remove db_session parameter
 
230
  try:
231
- articles = await db_session.query(Article).order_by(
232
- Article.timestamp.desc()).limit(20).all()
 
 
233
  feed = {
234
  'title': 'Website Changes Feed',
235
  'link': 'http://yourwebsite.com/feed',
@@ -238,7 +34,7 @@ async def update_feed_content(): # Remove db_session parameter
238
  'title': article.title,
239
  'link': article.url,
240
  'description': article.content,
241
- 'pubDate': str(article.timestamp) # Convert datetime to string
242
  } for article in articles]
243
  }
244
  return feed
@@ -246,119 +42,134 @@ async def update_feed_content(): # Remove db_session parameter
246
  logger.error(f"Database error: {e}")
247
  return None
248
 
 
249
  async def periodic_update_with_error_handling():
250
  while True:
251
  try:
252
  await asyncio.sleep(300) # Wait for 5 minutes
253
- await update_feed_content()
254
  except Exception as e:
255
  logger.error(f"Error in periodic update: {e}")
256
 
257
-
258
- async def main():
259
- global db_session
260
  try:
261
- engine, Session = await create_db_engine("sqlite:///monitoring.db")
 
 
 
 
262
  db_session = Session()
263
-
264
- demo = gr.Blocks()
265
-
266
- with demo:
267
- gr.Markdown("# Website Monitor and Chatbot")
268
-
269
- with gr.Row():
270
- with gr.Column():
271
- db_url = gr.Textbox(
272
- label="Database URL", value="sqlite:///monitoring.db")
273
- db_status_textbox = gr.Textbox(label="Database Status",
274
- interactive=False,
275
- value="Connected")
276
-
277
- with gr.Column():
278
- with gr.Tab("Configuration"):
279
- target_urls = gr.Textbox(
280
- label="Target URLs (comma-separated)",
281
- placeholder=
282
- "https://example.com, https://another-site.com")
283
- storage_location = gr.Textbox(
284
- label="Storage Location (CSV file path)",
285
- placeholder="/path/to/your/file.csv")
286
- feed_rss_checkbox = gr.Checkbox(label="Enable RSS Feed")
287
- start_button = gr.Button("Start Monitoring")
288
- stop_button = gr.Button("Stop Monitoring")
289
- status_text = gr.Textbox(label="Status",
290
- interactive=False)
291
- history_text = gr.Textbox(label="History",
292
- lines=10,
293
- interactive=False)
294
-
295
- with gr.Tab("User-End View"):
296
- feed_content = gr.JSON(label="RSS Feed Content")
297
-
298
- with gr.Tab("Chatbot"):
299
- chatbot_interface = gr.Chatbot(type='messages')
300
- message_input = gr.Textbox(
301
- placeholder="Type your message here...")
302
- send_button = gr.Button("Send")
303
-
304
- async def on_start_click(target_urls_str: str, storage_loc: str,
305
- feed_enabled: bool):
306
- urls = [url.strip() for url in target_urls_str.split(",")]
307
- await start_monitoring(urls,
308
- storage_loc if storage_loc else None,
309
- feed_enabled)
310
- return "Monitoring started for valid URLs."
311
-
312
- async def on_stop_click():
313
- for url in list(monitoring_tasks.keys()):
314
- stop_monitoring(url)
315
- return "Monitoring stopped for all URLs."
316
-
317
- start_button.click(
318
- on_start_click,
319
- inputs=[target_urls, storage_location, feed_rss_checkbox],
320
- outputs=[status_text])
321
- stop_button.click(on_stop_click, outputs=[status_text])
322
- send_button.click(
323
- chatbot_response,
324
- inputs=[message_input, chatbot_interface],
325
- outputs=[chatbot_interface, message_input])
326
-
327
- # Set up the timer
328
- feed_updater = gr.Timer(interval=300)
329
- feed_updater.tick(fn=update_feed_content,
330
- outputs=feed_content)
331
-
332
- # Create background tasks
333
- demo.load(update_db_status, outputs=db_status_textbox)
334
- asyncio.create_task(periodic_update_with_error_handling())
335
-
336
- # Launch the demo
337
- await demo.launch()
338
-
339
  except Exception as e:
340
- logger.error(f"Error in main: {e}")
341
- finally:
342
- if db_session:
343
- await db_session.close()
344
- if engine:
345
- engine.dispose()
346
-
347
- def signal_handler():
348
- for task in asyncio.all_tasks():
349
- task.cancel()
350
-
351
- return f"Processed: {input_text}"
352
 
353
- # Create Gradio interface
354
- with gr.Blocks() as demo:
355
- gr.Markdown("# My Gradio App")
356
- input_text = gr.Textbox(label="Input Text")
357
- output_text = gr.Textbox(label="Output Text")
358
- submit_button = gr.Button("Submit")
359
-
360
- submit_button.click(fn=generate, inputs=input_text, outputs=output_text)
361
-
362
- # Launch the app
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
363
  if __name__ == "__main__":
364
- demo.launch(share=True)
 
1
  import asyncio
 
 
 
 
 
 
 
 
2
  import gradio as gr
 
 
3
  from sqlalchemy.exc import SQLAlchemyError
4
+ from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession
5
+ from sqlalchemy.future import select # Correct async query API
6
+ from sqlalchemy.orm import sessionmaker
7
+ from models import Article # Assuming the Article model is defined in models.py
8
  import logging
 
 
 
 
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  logger = logging.getLogger(__name__)
11
 
12
+ # This will constantly check the database status and update the textbox
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  async def update_db_status(db_status_textbox):
14
  while True:
15
  try:
16
+ await db_session.execute("SELECT 1") # Simple query to check connection
17
+ db_status_textbox.update("Connected")
18
  except SQLAlchemyError:
19
+ db_status_textbox.update("Disconnected")
20
+ await asyncio.sleep(60) # Wait for 60 seconds before checking again
 
 
21
 
22
+ # Fetch the latest 20 articles and return them in a feed format
23
+ async def update_feed_content():
24
  try:
25
+ result = await db_session.execute(
26
+ select(Article).order_by(Article.timestamp.desc()).limit(20)
27
+ )
28
+ articles = result.scalars().all() # Fetch latest articles
29
  feed = {
30
  'title': 'Website Changes Feed',
31
  'link': 'http://yourwebsite.com/feed',
 
34
  'title': article.title,
35
  'link': article.url,
36
  'description': article.content,
37
+ 'pubDate': str(article.timestamp)
38
  } for article in articles]
39
  }
40
  return feed
 
42
  logger.error(f"Database error: {e}")
43
  return None
44
 
45
+ # Periodic feed updater with error handling
46
  async def periodic_update_with_error_handling():
47
  while True:
48
  try:
49
  await asyncio.sleep(300) # Wait for 5 minutes
50
+ await update_feed_content() # Update the feed content
51
  except Exception as e:
52
  logger.error(f"Error in periodic update: {e}")
53
 
54
+ # Function for dynamically setting the database connection
55
+ async def set_db_connection(host, port, user, password, db_name):
56
+ global db_session, engine
57
  try:
58
+ engine = create_async_engine(
59
+ f"mysql+aiomysql://{user}:{password}@{host}:{port}/{db_name}",
60
+ echo=False
61
+ )
62
+ Session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
63
  db_session = Session()
64
+ return "Database connection established."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  except Exception as e:
66
+ logger.error(f"Failed to establish database connection: {e}")
67
+ return f"Failed to connect to database: {e}"
 
 
 
 
 
 
 
 
 
 
68
 
69
+ # Main application that runs Gradio UI and background tasks
70
+ async def main():
71
+ global db_session
72
+ engine = None
73
+
74
+ demo = gr.Blocks()
75
+
76
+ # Define the Gradio interface
77
+ with demo:
78
+ gr.Markdown("# Website Monitor and Chatbot")
79
+
80
+ with gr.Row():
81
+ with gr.Column():
82
+ gr.Markdown("## Database Settings")
83
+ db_host = gr.Textbox(label="Database Host", placeholder="localhost", value="localhost")
84
+ db_port = gr.Textbox(label="Database Port", placeholder="3306", value="3306")
85
+ db_user = gr.Textbox(label="Database User", placeholder="username", value="")
86
+ db_pass = gr.Textbox(label="Database Password", placeholder="password", type="password", value="")
87
+ db_name = gr.Textbox(label="Database Name", placeholder="database_name", value="monitoring")
88
+
89
+ db_status_textbox = gr.Textbox(label="Database Status", interactive=False)
90
+ status_text = gr.Textbox(label="Status", interactive=False)
91
+
92
+ gr.Markdown("## RSS Feed Reader Settings")
93
+ feed_target_url = gr.Textbox(label="RSS Feed Target URL", placeholder="http://yourwebsite.com/feed")
94
+ view_button = gr.Button("View Feed")
95
+
96
+ target_urls = gr.Textbox(
97
+ label="Target URLs (comma-separated)",
98
+ placeholder="https://example.com, https://another-site.com"
99
+ )
100
+ storage_location = gr.Textbox(
101
+ label="Storage Location (CSV file path)",
102
+ placeholder="/path/to/your/file.csv"
103
+ )
104
+ feed_rss_checkbox = gr.Checkbox(label="Enable RSS Feed")
105
+ start_button = gr.Button("Start Monitoring")
106
+ stop_button = gr.Button("Stop Monitoring")
107
+
108
+ with gr.Column():
109
+ feed_content = gr.JSON(label="RSS Feed Content")
110
+ chatbot_interface = gr.Chatbot(type='messages')
111
+ message_input = gr.Textbox(placeholder="Type your message here...")
112
+ send_button = gr.Button("Send")
113
+
114
+ # Define button actions
115
+ async def on_start_click(target_urls_str: str, storage_loc: str, feed_enabled: bool,
116
+ host: str, port: str, user: str, password: str, db_name: str):
117
+ urls = [url.strip() for url in target_urls_str.split(",")]
118
+ await set_db_connection(host, port, user, password, db_name) # Connect to the DB
119
+ asyncio.create_task(start_monitoring(urls, storage_loc, feed_enabled)) # Start monitoring
120
+ return "Monitoring started."
121
+
122
+ async def on_view_feed_click(feed_url: str):
123
+ # Logic to fetch and view RSS feed data based on URL
124
+ return await fetch_feed_content(feed_url)
125
+
126
+ stop_button.click(on_stop_click, outputs=[status_text])
127
+ view_button.click(on_view_feed_click, inputs=[feed_target_url], outputs=[feed_content])
128
+ send_button.click(
129
+ chatbot_response,
130
+ inputs=[message_input, chatbot_interface],
131
+ outputs=[chatbot_interface, message_input]
132
+ )
133
+
134
+ # Set up the timer for periodic updates
135
+ feed_updater = gr.Timer(interval=300)
136
+ feed_updater.tick(fn=update_feed_content, outputs=feed_content)
137
+
138
+ # Load and check database status when the UI is loaded
139
+ demo.load(update_db_status, outputs=db_status_textbox)
140
+ asyncio.create_task(periodic_update_with_error_handling()) # Run periodic updates in the background
141
+
142
+ # Launch the Gradio demo
143
+ await demo.launch()
144
+
145
+ async def fetch_feed_content(feed_url: str):
146
+ # Logic to fetch RSS feed content from the provided URL
147
+ # You would replace this with actual RSS fetching and parsing logic
148
+ return {
149
+ 'title': 'Sample Feed',
150
+ 'link': feed_url,
151
+ 'items': [
152
+ {'title': 'Sample Item 1', 'link': feed_url + '/1', 'description': 'This is a sample item.', 'pubDate': '2024-01-01'},
153
+ {'title': 'Sample Item 2', 'link': feed_url + '/2', 'description': 'This is another sample item.', 'pubDate': '2024-01-02'}
154
+ ]
155
+ }
156
+
157
+ async def start_monitoring(urls, storage_location, feed_enabled):
158
+ # Logic to start monitoring URLs and optionally save to CSV or enable RSS
159
+ print(f"Starting monitoring for {urls}, saving to {storage_location}, RSS enabled: {feed_enabled}")
160
+ return
161
+
162
+ def stop_monitoring(url):
163
+ # Logic to stop monitoring a specific URL
164
+ print(f"Stopping monitoring for {url}")
165
+ return
166
+
167
+ async def chatbot_response(message, chat_interface):
168
+ # Example chatbot logic to respond to a user message
169
+ response = f"Echo: {message}"
170
+ chat_interface.append((message, response))
171
+ return chat_interface, ""
172
+
173
+ # Launch the app using asyncio
174
  if __name__ == "__main__":
175
+ asyncio.run(main())