awacke1 commited on
Commit
6c7b003
β€’
1 Parent(s): 1e7150f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -17
app.py CHANGED
@@ -12,6 +12,14 @@ import zipfile
12
  from PIL import Image
13
 
14
  EXCLUDED_FILES = ['app.py', 'requirements.txt', 'pre-requirements.txt', 'packages.txt', 'README.md','.gitattributes', "backup.py","Dockerfile"]
 
 
 
 
 
 
 
 
15
  URLS = {
16
  "Lumiere": "https://lumiere-video.github.io/",
17
  "National Library of Medicine": "https://www.nlm.nih.gov/",
@@ -83,14 +91,18 @@ def download_html_and_files(url, subdir):
83
  html_content = requests.get(url).text
84
  soup = BeautifulSoup(html_content, 'html.parser')
85
  base_url = urllib.parse.urlunparse(urllib.parse.urlparse(url)._replace(path='', params='', query='', fragment=''))
 
86
  for link in soup.find_all('a'):
87
  file_url = urllib.parse.urljoin(base_url, link.get('href'))
88
  local_filename = os.path.join(subdir, urllib.parse.urlparse(file_url).path.split('/')[-1])
89
  if not local_filename.endswith('/') and local_filename != subdir:
90
  link['href'] = local_filename
91
- download_file(file_url, local_filename)
 
92
  with open(os.path.join(subdir, "index.html"), "w") as file:
93
  file.write(str(soup))
 
 
94
 
95
  def list_files(directory_path='.'):
96
  files = [f for f in os.listdir(directory_path) if os.path.isfile(os.path.join(directory_path, f))]
@@ -135,25 +147,21 @@ def show_file_operations(file_path, sequence_number):
135
 
136
  file_sequence_numbers = {}
137
 
138
- def show_file_content(file_path):
139
  _, file_extension = os.path.splitext(file_path)
 
140
  try:
 
141
  if file_extension in ['.png', '.jpg', '.jpeg']:
142
  image_url = file_path.replace('File:','').replace('/','')
143
- st.write('Image URL:' + image_url)
144
- markdown_link = f"[![Image]({image_url})]({image_url})" #file_path
145
  st.markdown(markdown_link, unsafe_allow_html=True)
146
- elif file_extension in ['.md', '.markdown']:
147
- with open(file_path, "r") as file:
148
- content = file.read()
149
- edited_content = st.text_area(f"Edit {os.path.basename(file_path)}", value=content, height=250)
150
- if st.button(f"Save {os.path.basename(file_path)}"):
151
- with open(file_path, "w") as file:
152
- file.write(edited_content)
153
- st.success(f"Saved {os.path.basename(file_path)}!")
154
- elif file_extension in ['.html', '.txt']:
155
  with open(file_path, "r") as file:
 
156
  st.markdown(file.read(), unsafe_allow_html=True)
 
 
157
  except Exception as e:
158
  st.error(f"Error reading file {file_path}: {e}")
159
 
@@ -231,14 +239,16 @@ def main():
231
  json.dump(history, f)
232
 
233
  if st.sidebar.button('πŸ“₯ Get All the Content', help="Download content from the selected URL"):
234
- download_html_and_files(url, history[url])
235
- show_download_links(history[url])
236
-
 
 
 
237
  if st.sidebar.button('πŸ“‚ Show Download Links', help="Show all available download links"):
238
  for subdir in history.values():
239
  show_download_links(subdir)
240
 
241
-
242
  if st.sidebar.button("πŸ—‘ Delete All", help="Delete all downloaded content"):
243
  # Clear history file
244
  with open("history.json", "w") as f:
 
12
  from PIL import Image
13
 
14
  EXCLUDED_FILES = ['app.py', 'requirements.txt', 'pre-requirements.txt', 'packages.txt', 'README.md','.gitattributes', "backup.py","Dockerfile"]
15
+ # Emoji mapping for different file types
16
+ FILE_EMOJIS = {
17
+ ".jpg": "πŸ–ΌοΈ", ".jpeg": "πŸ–ΌοΈ", ".png": "πŸ–ΌοΈ",
18
+ ".html": "🌐", ".htm": "🌐", ".txt": "πŸ“„",
19
+ ".pdf": "πŸ“š", ".doc": "πŸ“", ".docx": "πŸ“",
20
+ ".xls": "πŸ“Š", ".xlsx": "πŸ“Š", ".ppt": "πŸ“Š", ".pptx": "πŸ“Š",
21
+ # Add more mappings as needed
22
+ }
23
  URLS = {
24
  "Lumiere": "https://lumiere-video.github.io/",
25
  "National Library of Medicine": "https://www.nlm.nih.gov/",
 
91
  html_content = requests.get(url).text
92
  soup = BeautifulSoup(html_content, 'html.parser')
93
  base_url = urllib.parse.urlunparse(urllib.parse.urlparse(url)._replace(path='', params='', query='', fragment=''))
94
+ file_urls = {} # To store original URLs of files
95
  for link in soup.find_all('a'):
96
  file_url = urllib.parse.urljoin(base_url, link.get('href'))
97
  local_filename = os.path.join(subdir, urllib.parse.urlparse(file_url).path.split('/')[-1])
98
  if not local_filename.endswith('/') and local_filename != subdir:
99
  link['href'] = local_filename
100
+ if download_file(file_url, local_filename):
101
+ file_urls[local_filename] = file_url # Store original URL
102
  with open(os.path.join(subdir, "index.html"), "w") as file:
103
  file.write(str(soup))
104
+ return file_urls
105
+
106
 
107
  def list_files(directory_path='.'):
108
  files = [f for f in os.listdir(directory_path) if os.path.isfile(os.path.join(directory_path, f))]
 
147
 
148
  file_sequence_numbers = {}
149
 
150
+ def show_file_content(file_path, original_url):
151
  _, file_extension = os.path.splitext(file_path)
152
+ emoji = FILE_EMOJIS.get(file_extension, "πŸ“") # Default emoji for unknown file types
153
  try:
154
+ # Display file content and original URL with emoji
155
  if file_extension in ['.png', '.jpg', '.jpeg']:
156
  image_url = file_path.replace('File:','').replace('/','')
157
+ markdown_link = f"{emoji} [![Image]({image_url})]({original_url})"
 
158
  st.markdown(markdown_link, unsafe_allow_html=True)
159
+ elif file_extension in ['.html', '.htm', '.txt']:
 
 
 
 
 
 
 
 
160
  with open(file_path, "r") as file:
161
+ st.markdown(f"{emoji} [{os.path.basename(file_path)}]({original_url})", unsafe_allow_html=True)
162
  st.markdown(file.read(), unsafe_allow_html=True)
163
+ else:
164
+ st.markdown(f"{emoji} [{os.path.basename(file_path)}]({original_url})", unsafe_allow_html=True)
165
  except Exception as e:
166
  st.error(f"Error reading file {file_path}: {e}")
167
 
 
239
  json.dump(history, f)
240
 
241
  if st.sidebar.button('πŸ“₯ Get All the Content', help="Download content from the selected URL"):
242
+ file_urls = download_html_and_files(url, history[url])
243
+ for file in list_files(history[url]):
244
+ file_path = os.path.join(history[url], file)
245
+ original_url = file_urls.get(file_path, "#")
246
+ show_file_content(file_path, original_url)
247
+
248
  if st.sidebar.button('πŸ“‚ Show Download Links', help="Show all available download links"):
249
  for subdir in history.values():
250
  show_download_links(subdir)
251
 
 
252
  if st.sidebar.button("πŸ—‘ Delete All", help="Delete all downloaded content"):
253
  # Clear history file
254
  with open("history.json", "w") as f: