acecalisto3 commited on
Commit
6e6128e
1 Parent(s): 5df9041

Upload improved_script (2).py

Browse files
Files changed (1) hide show
  1. improved_script (2).py +269 -0
improved_script (2).py ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import csv
3
+ import logging
4
+ import os
5
+ from typing import List, Tuple
6
+ import asyncio
7
+ import datetime
8
+ import hashlib
9
+
10
+ import aiohttp
11
+ import feedparser
12
+ import gradio as gr
13
+ from huggingface_hub import InferenceClient
14
+ from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime
15
+ from sqlalchemy.orm import declarative_base, sessionmaker
16
+ from sqlalchemy.exc import SQLAlchemyError
17
+ import validators
18
+ from bs4 import BeautifulSoup
19
+
20
+ # Configure logging
21
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
22
+ logger = logging.getLogger(__name__)
23
+
24
+ # Configuration
25
+ HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
26
+ DEFAULT_MONITORING_INTERVAL = 300
27
+ MAX_MONITORING_INTERVAL = 600
28
+ CHANGE_FREQUENCY_THRESHOLD = 3
29
+
30
+ # Global variables
31
+ monitoring_tasks = {}
32
+ url_monitoring_intervals = {}
33
+ change_counts = {}
34
+ history = []
35
+
36
+ # Database setup
37
+ Base = declarative_base()
38
+
39
+ class Article(Base):
40
+ __tablename__ = 'articles'
41
+ id = Column(Integer, primary_key=True)
42
+ url = Column(String(255), nullable=False)
43
+ title = Column(String(255))
44
+ content = Column(Text)
45
+ hash = Column(String(32))
46
+ timestamp = Column(DateTime, default=datetime.datetime.utcnow)
47
+
48
+ async def create_db_engine(db_url):
49
+ try:
50
+ engine = create_engine(db_url)
51
+ Base.metadata.create_all(engine)
52
+ return engine, sessionmaker(bind=engine)
53
+ except SQLAlchemyError as e:
54
+ logger.error(f"Database error: {e}")
55
+ raise
56
+
57
+ def sanitize_url(url: str) -> str:
58
+ return url if validators.url(url) else None
59
+
60
+ async def fetch_url_content(url: str, session: aiohttp.ClientSession) -> Tuple[str, str]:
61
+ async with session.get(url) as response:
62
+ content = await response.text()
63
+ soup = BeautifulSoup(content, 'html.parser')
64
+ title = soup.title.string if soup.title else "No Title"
65
+ return title, content
66
+
67
+ async def save_to_database(session, url: str, title: str, content: str, hash: str):
68
+ try:
69
+ article = Article(url=url, title=title, content=content, hash=hash)
70
+ session.add(article)
71
+ await session.commit()
72
+ except SQLAlchemyError as e:
73
+ logger.error(f"Database error: {e}")
74
+ await session.rollback()
75
+
76
+ async def save_to_csv(storage_location: str, url: str, title: str, content: str, timestamp: datetime.datetime):
77
+ try:
78
+ os.makedirs(os.path.dirname(storage_location), exist_ok=True)
79
+ with open(storage_location, "a", newline='', encoding="utf-8") as csvfile:
80
+ csv_writer = csv.writer(csvfile)
81
+ csv_writer.writerow([timestamp.strftime("%Y-%m-%d %H:%M:%S"), url, title, content])
82
+ except IOError as e:
83
+ logger.error(f"IOError saving to CSV: {e}")
84
+ except Exception as e:
85
+ logger.error(f"Unexpected error saving to CSV: {e}")
86
+
87
+ async def monitor_url(url: str, interval: int, storage_location: str, feed_rss: bool, db_session):
88
+ previous_hash = ""
89
+ async with aiohttp.ClientSession() as session:
90
+ while True:
91
+ try:
92
+ title, content = await fetch_url_content(url, session)
93
+ current_hash = hashlib.md5(content.encode('utf-8')).hexdigest()
94
+
95
+ if current_hash != previous_hash:
96
+ previous_hash = current_hash
97
+ timestamp = datetime.datetime.now()
98
+
99
+ if feed_rss:
100
+ try:
101
+ await save_to_database(db_session, url, title, content, current_hash)
102
+ except SQLAlchemyError as e:
103
+ logger.error(f"Database error while saving {url}: {e}")
104
+
105
+ if storage_location:
106
+ await save_to_csv(storage_location, url, title, content, timestamp)
107
+
108
+ history.append(f"Change detected at {url} on {timestamp.strftime('%Y-%m-%d %H:%M:%S')}")
109
+ logger.info(f"Change detected at {url}")
110
+
111
+ change_counts[url] = change_counts.get(url, 0) + 1
112
+ if change_counts[url] >= CHANGE_FREQUENCY_THRESHOLD:
113
+ interval = max(60, interval // 2)
114
+ else:
115
+ change_counts[url] = 0
116
+ interval = min(interval * 2, MAX_MONITORING_INTERVAL)
117
+
118
+ url_monitoring_intervals[url] = interval
119
+ except aiohttp.ClientError as e:
120
+ logger.error(f"Network error monitoring {url}: {e}")
121
+ history.append(f"Network error monitoring {url}: {e}")
122
+ except Exception as e:
123
+ logger.error(f"Unexpected error monitoring {url}: {e}")
124
+ history.append(f"Unexpected error monitoring {url}: {e}")
125
+
126
+ await asyncio.sleep(interval)
127
+
128
+ async def start_monitoring(urls: List[str], storage_location: str, feed_rss: bool):
129
+ global db_session
130
+ for url in urls:
131
+ if url not in monitoring_tasks:
132
+ sanitized_url = sanitize_url(url)
133
+ if sanitized_url:
134
+ task = asyncio.create_task(monitor_url(sanitized_url, DEFAULT_MONITORING_INTERVAL, storage_location, feed_rss, db_session))
135
+ monitoring_tasks[sanitized_url] = task
136
+ else:
137
+ logger.warning(f"Invalid URL: {url}")
138
+ history.append(f"Invalid URL: {url}")
139
+ return "Monitoring started"
140
+
141
+ async def cleanup_resources(url: str):
142
+ # Add any cleanup logic here, e.g., closing database connections
143
+ pass
144
+
145
+ def stop_monitoring(url: str):
146
+ if url in monitoring_tasks:
147
+ monitoring_tasks[url].cancel()
148
+ asyncio.create_task(cleanup_resources(url))
149
+ del monitoring_tasks[url]
150
+ return "Monitoring stopped"
151
+
152
+ async def chatbot_response(message: str, history: List[Tuple[str, str]]):
153
+ try:
154
+ client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1", token=HUGGINGFACE_API_KEY)
155
+ response = await client.text_generation(message, max_new_tokens=100)
156
+
157
+ history.append({"role": "user", "content": message})
158
+ history.append({"role": "assistant", "content": response[0]['generated_text']})
159
+
160
+ return history, history
161
+ except Exception as e:
162
+ logger.error(f"Chatbot error: {e}")
163
+ history.append({"role": "user", "content": message})
164
+ history.append({"role": "assistant", "content": "Error: Could not get a response from the chatbot."})
165
+ return history, history
166
+
167
+ async def update_db_status(db_status):
168
+ while True:
169
+ try:
170
+ await db_session.execute("SELECT 1")
171
+ await db_status.update(value="Connected")
172
+ except SQLAlchemyError:
173
+ await db_status.update(value="Disconnected")
174
+ await asyncio.sleep(60) # Check every minute
175
+
176
+ async def update_feed_content(db_session):
177
+ try:
178
+ articles = db_session.query(Article).order_by(Article.timestamp.desc()).limit(20).all()
179
+ feed = {
180
+ 'title': 'Website Changes Feed',
181
+ 'link': 'http://yourwebsite.com/feed',
182
+ 'description': 'Feed of changes detected on monitored websites.',
183
+ 'items': [{
184
+ 'title': article.title,
185
+ 'link': article.url,
186
+ 'description': article.content,
187
+ 'pubDate': article.timestamp
188
+ } for article in articles]
189
+ }
190
+ return feed
191
+ except SQLAlchemyError as e:
192
+ logger.error(f"Database error: {e}")
193
+ return None
194
+
195
+ async def periodic_update_with_error_handling(db_session):
196
+ while True:
197
+ try:
198
+ await asyncio.sleep(300) # Wait for 5 minutes
199
+ await update_feed_content(db_session)
200
+ except Exception as e:
201
+ logger.error(f"Error in periodic update: {e}")
202
+
203
+ async def main():
204
+ global db_session
205
+ try:
206
+ engine, Session = await create_db_engine("sqlite:///monitoring.db")
207
+ db_session = Session()
208
+ except SQLAlchemyError as e:
209
+ logger.error(f"Failed to connect to database: {e}")
210
+ return
211
+
212
+ demo = gr.Blocks()
213
+
214
+ with demo:
215
+ gr.Markdown("# Website Monitor and Chatbot")
216
+
217
+ with gr.Row():
218
+ with gr.Column():
219
+ db_url = gr.Textbox(label="Database URL", value="sqlite:///monitoring.db")
220
+ db_status = gr.Textbox(label="Database Status", interactive=False, value="Connected")
221
+
222
+ with gr.Column():
223
+ with gr.Tab("Configuration"):
224
+ target_urls = gr.Textbox(label="Target URLs (comma-separated)", placeholder="https://example.com, https://another-site.com")
225
+ storage_location = gr.Textbox(label="Storage Location (CSV file path)", placeholder="/path/to/your/file.csv")
226
+ feed_rss_checkbox = gr.Checkbox(label="Enable RSS Feed")
227
+ start_button = gr.Button("Start Monitoring")
228
+ stop_button = gr.Button("Stop Monitoring")
229
+ status_text = gr.Textbox(label="Status", interactive=False)
230
+ history_text = gr.Textbox(label="History", lines=10, interactive=False)
231
+
232
+ with gr.Tab("User-End View"):
233
+ feed_content = gr.JSON(label="RSS Feed Content")
234
+
235
+ with gr.Tab("Chatbot"):
236
+ chatbot_interface = gr.Chatbot()
237
+ message_input = gr.Textbox(placeholder="Type your message here...")
238
+ send_button = gr.Button("Send")
239
+
240
+ start_button.click(
241
+ start_monitoring,
242
+ inputs=[target_urls, storage_location, feed_rss_checkbox],
243
+ outputs=status_text
244
+ )
245
+
246
+ stop_button.click(
247
+ lambda url: stop_monitoring(url),
248
+ inputs=target_urls,
249
+ outputs=status_text
250
+ )
251
+
252
+ send_button.click(
253
+ chatbot_response,
254
+ inputs=[message_input, chatbot_interface],
255
+ outputs=[chatbot_interface, message_input]
256
+ )
257
+
258
+ asyncio.create_task(periodic_update_with_error_handling(db_session))
259
+ asyncio.create_task(update_db_status(db_status))
260
+
261
+ try:
262
+ await demo.launch()
263
+ finally:
264
+ if db_session:
265
+ await db_session.close()
266
+ engine.dispose()
267
+
268
+ if __name__ == "__main__":
269
+ asyncio.run(main())