acecalisto3 commited on
Commit
4e15fdc
·
verified ·
1 Parent(s): 104c1af

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -7
app.py CHANGED
@@ -1,11 +1,105 @@
 
 
 
 
 
 
1
  import gradio as gr
 
 
 
 
2
  from huggingface_hub import InferenceClient
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  def respond(
11
  message,
@@ -39,9 +133,7 @@ def respond(
39
  response += token
40
  yield response
41
 
42
- """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
45
  demo = gr.ChatInterface(
46
  respond,
47
  additional_inputs=[
@@ -55,9 +147,15 @@ demo = gr.ChatInterface(
55
  step=0.05,
56
  label="Top-p (nucleus sampling)",
57
  ),
 
 
 
 
 
58
  ],
 
 
59
  )
60
 
61
-
62
  if __name__ == "__main__":
63
  demo.launch()
 
1
+ import datetime
2
+ import os
3
+ import csv
4
+ import time
5
+ import hashlib
6
+ import logging
7
  import gradio as gr
8
+ from selenium import webdriver
9
+ from selenium.webdriver.chrome.service import Service
10
+ from selenium.webdriver.chrome.options import Options
11
+ from webdriver_manager.chrome import ChromeDriverManager
12
  from huggingface_hub import InferenceClient
13
 
14
+ # Configure logging
15
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
16
+
17
+ # Define constants
18
+ PREFIX = "Task started at {date_time_str}. Purpose: {purpose}"
19
+ TASK_PROMPT = "Current task: {task}. History:\n{history}"
20
+
21
+ # Define current date/time
22
+ date_time_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
23
+
24
+ # Define purpose
25
+ purpose = """
26
+ You go to Culvers sites, you continuously seek changes on them since your last observation.
27
+ Anything new that gets logged and dumped into csv, stored in your log folder at user/app/scraped_data.
28
  """
 
 
 
29
 
30
+ # Define history
31
+ history = []
32
+
33
+ # Define current task
34
+ current_task = None
35
+
36
+ # Default file path
37
+ default_file_path = "user/app/scraped_data/culver/culvers_changes.csv"
38
+
39
+ # Ensure the directory exists
40
+ os.makedirs(os.path.dirname(default_file_path), exist_ok=True)
41
+
42
+ # Function to monitor URLs for changes
43
+ def monitor_urls(storage_location, url1, url2, scrape_interval, content_type):
44
+ global history
45
+ urls = [url1, url2]
46
+ previous_hashes = ["", ""]
47
+
48
+ # Ensure the directory exists
49
+ os.makedirs(os.path.dirname(storage_location), exist_ok=True)
50
+
51
+ with open(storage_location, "w", newline='') as csvfile:
52
+ csv_toolkit = csv.DictWriter(csvfile, fieldnames=["date", "time", "url", "change"])
53
+ csv_toolkit.writeheader()
54
+
55
+ options = Options()
56
+ options.headless = True
57
+ options.add_argument("--disable-gpu")
58
+ options.add_argument("--no-sandbox")
59
+ options.add_argument("--disable-dev-shm-usage")
60
+
61
+ with webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) as driver:
62
+ try:
63
+ while True:
64
+ for i, url in enumerate(urls):
65
+ try:
66
+ driver.get(url)
67
+ time.sleep(2) # Wait for the page to load
68
+ if content_type == "text":
69
+ current_content = driver.page_source
70
+ elif content_type == "media":
71
+ current_content = driver.find_elements_by_tag_name("img")
72
+ else:
73
+ current_content = driver.page_source
74
+
75
+ current_hash = hashlib.md5(str(current_content).encode('utf-8')).hexdigest()
76
+
77
+ if current_hash != previous_hashes[i]:
78
+ previous_hashes[i] = current_hash
79
+ date_time_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
80
+ history.append(f"Change detected at {url} on {date_time_str}")
81
+ csv_toolkit.writerow({"date": date_time_str.split()[0], "time": date_time_str.split()[1], "url": url, "change": "Content changed"})
82
+ logging.info(f"Change detected at {url} on {date_time_str}")
83
+ except Exception as e:
84
+ logging.error(f"Error accessing {url}: {e}")
85
+
86
+ time.sleep(scrape_interval * 60) # Check every scrape_interval minutes
87
+ except KeyboardInterrupt:
88
+ logging.info("Monitoring stopped by user.")
89
+ finally:
90
+ driver.quit()
91
+
92
+ # Define main function to handle user input
93
+ def handle_input(storage_location, url1, url2, scrape_interval, content_type):
94
+ global current_task, history
95
+
96
+ current_task = f"Monitoring URLs: {url1}, {url2}"
97
+ history.append(f"Task started: {current_task}")
98
+ monitor_urls(storage_location, url1, url2, scrape_interval, content_type)
99
+ return TASK_PROMPT.format(task=current_task, history="\n".join(history))
100
+
101
+ # Define the chat response function
102
+ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
103
 
104
  def respond(
105
  message,
 
133
  response += token
134
  yield response
135
 
136
+ # Create Gradio interface
 
 
137
  demo = gr.ChatInterface(
138
  respond,
139
  additional_inputs=[
 
147
  step=0.05,
148
  label="Top-p (nucleus sampling)",
149
  ),
150
+ gr.Textbox(value=default_file_path, label="Storage Location"),
151
+ gr.Textbox(value="https://www.culver.k12.in.us/", label="URL 1"),
152
+ gr.Textbox(value="https://www.facebook.com/CulverCommunitySchools", label="URL 2"),
153
+ gr.Slider(minimum=1, maximum=60, value=5, step=1, label="Scrape Interval (minutes)"),
154
+ gr.Radio(choices=["text", "media", "both"], value="text", label="Content Type"),
155
  ],
156
+ title="Culvers Site Monitor and Chatbot",
157
+ description="Monitor changes on Culvers' websites and log them into a CSV file. Also, chat with a friendly chatbot."
158
  )
159
 
 
160
  if __name__ == "__main__":
161
  demo.launch()