broadfield-dev commited on
Commit
ba14112
·
verified ·
1 Parent(s): 41d78cf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -327
app.py CHANGED
@@ -1,335 +1,49 @@
1
- from flask import Flask, render_template, request, jsonify, send_file
2
- from huggingface_hub import HfApi
3
- import requests
4
- import base64
5
- import markdown
6
- import json
7
- import mimetypes
8
- import os
9
- import io
10
- import uuid
11
- from pathlib import Path
12
-
13
- app = Flask(__name__)
14
-
15
- GITHUB_API = "https://api.github.com/repos/"
16
- #HF_API = "https://huggingface.co/api/spaces/"
17
-
18
- def generate_file_tree(paths):
19
- """Generate a simple file tree from a list of paths."""
20
- print("generating file tree")
21
- tree = ["📁 Root"]
22
- sorted_paths = sorted(paths)
23
- for path in sorted_paths:
24
- parts = path.split('/')
25
- indent = " " * (len(parts) - 1)
26
- tree.append(f"{indent}📄 {parts[-1]}")
27
- print("generating file tree - Complete")
28
-
29
- return "\n".join(tree) + "\n\n"
30
-
31
- def get_all_files(owner, repo, path="", is_hf=False):
32
- """Recursively fetch all files from a repository."""
33
- if is_hf:
34
- # Attempt to fetch file list from Hugging Face Space (publicly accessible files)
35
- api_url = f"https://huggingface.co/spaces/{owner}/{repo}/tree/main/{path}".rstrip('/')
36
- else:
37
- api_url = f"{GITHUB_API}{owner}/{repo}/contents/{path}".rstrip('/')
38
-
39
- try:
40
- response = requests.get(api_url)
41
- print(response.content)
42
- response = requests.get(api_url, headers={"Accept": "application/json"})
43
- print(str(response.json()))
44
- response.raise_for_status()
45
- items = response.json()
46
-
47
- # Hugging Face might not return JSON in the same format; adjust if HTML is returned
48
- if isinstance(items, str): # If response isn’t JSON, it’s likely HTML
49
- return None # Fallback to error handling
50
-
51
- files = []
52
- for item in items:
53
- if item['type'] == 'file':
54
- files.append(item)
55
- elif item['type'] == 'dir':
56
- files.extend(get_all_files(owner, repo, item['path'], is_hf))
57
- print(files)
58
- return files
59
-
60
- except Exception as e:
61
- return None
62
-
63
- def get_hf_files(repo,name,path=""):
64
  api = HfApi()
65
- f_ist = (api.list_repo_files(repo_id=f'{repo}/{name}', repo_type="space"))
66
- print (f_ist)
67
- file_list = []
68
- #file_out = []
69
 
 
70
  if not os.path.exists(name):
71
- os.makedirs(name)
72
- for d_app in f_ist:
73
- if "/" in d_app:
74
- dir_1=d_app.split("/",1)[0]
75
- file_list.extend(get_hf_files(repo,name,dir_1))
76
- '''
77
- rem_1=d_app.split("/",1)[1]
78
- if not os.path.exists(f'{name}/{dir_1}'):
79
- os.makedirs(f'{name}/{dir_1}')
80
-
81
- if "/" in rem_1:
82
- dir_2=rem_1.split("/",1)[0]
83
- rem_2=rem_1.split("/",1)[1]
84
- if not os.path.exists(f'{name}/{dir_1}/{dir_2}'):
85
- os.makedirs(f'{name}/{dir_1}/{dir_2}')
86
- sf=rem_2.split(".",1)[1]
87
- pf=rem_2.split(".",1)[0]
88
- f_name=f'{dir_1}/{dir2}/{pf}.{sf}'
89
-
90
- else:
91
- sf=rem_1.split(".",1)[1]
92
- pf=rem_1.split(".",1)[0]
93
- f_name=f'{dir_1}/{pf}.{sf}'
94
- print(f_name)
95
- '''
96
  else:
97
-
98
- sf=d_app.split(".",1)[1]
99
- pf=d_app.split(".",1)[0]
100
- f_name=f'{pf}.{sf}'
101
- pass
102
-
103
- r = requests.get(f'https://huggingface.co/spaces/{repo}/{name}/raw/main/{d_app}')
104
- print(d_app)
105
- #print (r.text)
106
- uid = uuid.uuid4()
107
- file = open(f'{name}/{f_name}','wb')
108
- file.write(r.content)
109
- file.close()
110
-
111
- file_list.append({"path":f'{d_app}'})
112
- print(file_list)
113
- return file_list
114
-
115
- def get_repo_contents(url):
116
- """Parse URL and fetch repository contents."""
117
- try:
118
- if "huggingface.co" in url:
119
- parts = url.rstrip('/').split('/')
120
- owner, repo = parts[-2], parts[-1]
121
- # Fallback approach: manually fetch known files or use a simpler file list
122
- # For now, assume a flat structure and fetch known files directly
123
- # This is a workaround until a proper API token or endpoint is confirmed
124
- known_files = [
125
- {'path': 'app.py', 'type': 'file'},
126
- {'path': 'README.md', 'type': 'file'}
127
- # Add more known paths or implement HTML scraping if needed
128
- ]
129
- #files = get_all_files(owner, repo, "", True) or known_files
130
- files = get_hf_files(owner, repo)
131
- return owner, repo, files, True
132
- else: # Assume GitHub URL
133
- parts = url.rstrip('/').split('/')
134
- owner, repo = parts[-2], parts[-1]
135
- files = get_all_files(owner, repo, "", False)
136
- if files is None:
137
- raise Exception("Failed to fetch GitHub repository contents")
138
- return owner, repo, files, False
139
- except Exception as e:
140
- return None, None, f"Error fetching repo contents: {str(e)}", False
141
-
142
- def process_file_content(file_info, owner, repo, is_hf=False):
143
- """Process individual file content from a repository."""
144
- content = ""
145
- file_path = file_info['path']
146
-
147
- try:
148
- if is_hf:
149
- file_url = f"https://huggingface.co/spaces/{owner}/{repo}/raw/main/{file_path}"
150
- else:
151
- file_url = f"{GITHUB_API}{owner}/{repo}/contents/{file_path}"
152
-
153
- response = requests.get(file_url)
154
- response.raise_for_status()
155
 
156
- if is_hf:
157
- content_raw = response.content
158
- size = len(content_raw)
159
- file_extension = file_path.split('.')[-1] if '.' in file_path else ''
160
- mime_type, _ = mimetypes.guess_type(file_path)
161
- is_text = (mime_type and mime_type.startswith('text')) or file_extension in ['py', 'md', 'txt', 'js', 'html', 'css', 'json']
162
-
163
- if is_text:
164
- try:
165
- text_content = content_raw.decode('utf-8')
166
- if file_extension == 'json':
167
- try:
168
- json_data = json.loads(text_content)
169
- formatted_json = json.dumps(json_data, indent=2)
170
- content = f"### File: {file_path}\n```json\n{formatted_json}\n```\n\n"
171
- except json.JSONDecodeError:
172
- content = f"### File: {file_path}\n```json\n{text_content}\n```\n[Note: Invalid JSON format]\n\n"
173
- else:
174
- content = f"### File: {file_path}\n```{file_extension or 'text'}\n{text_content}\n```\n\n"
175
- except UnicodeDecodeError:
176
- content = f"### File: {file_path}\n[Binary file - {size} bytes]\n\n"
177
- else:
178
- content = f"### File: {file_path}\n[Binary file - {size} bytes]\n\n"
179
- else: # GitHub
180
- data = response.json()
181
- if 'content' in data:
182
- content_raw = base64.b64decode(data['content'])
183
- size = data['size']
184
- file_extension = file_path.split('.')[-1] if '.' in file_path else ''
185
- mime_type, _ = mimetypes.guess_type(file_path)
186
- is_text = (mime_type and mime_type.startswith('text')) or file_extension in ['py', 'md', 'txt', 'js', 'html', 'css', 'json']
187
-
188
- if is_text:
189
- try:
190
- text_content = content_raw.decode('utf-8')
191
- if file_extension == 'json':
192
- try:
193
- json_data = json.loads(text_content)
194
- formatted_json = json.dumps(json_data, indent=2)
195
- content = f"### File: {file_path}\n```json\n{formatted_json}\n```\n\n"
196
- except json.JSONDecodeError:
197
- content = f"### File: {file_path}\n```json\n{text_content}\n```\n[Note: Invalid JSON format]\n\n"
198
- else:
199
- content = f"### File: {file_path}\n```{file_extension or 'text'}\n{text_content}\n```\n\n"
200
- except UnicodeDecodeError:
201
- content = f"### File: {file_path}\n[Binary file - {size} bytes]\n\n"
202
- else:
203
- content = f"### File: {file_path}\n[Binary file - {size} bytes]\n\n"
204
- else:
205
- content = f"### File: {file_path}\n[No content available]\n\n"
206
- except Exception as e:
207
- content = f"### File: {file_path}\n[Error fetching file content: {str(e)}]\n\n"
208
-
209
- return content
210
 
211
- def process_uploaded_file(file):
212
- """Process uploaded file content."""
213
- content = ""
214
- filename = file.filename
215
- file_extension = filename.split('.')[-1] if '.' in filename else ''
216
-
217
- try:
218
- content_raw = file.read() # Read file content into memory
219
- size = len(content_raw) # Compute size in bytes
220
-
221
- mime_type, _ = mimetypes.guess_type(filename)
222
- is_text = (mime_type and mime_type.startswith('text')) or file_extension in ['py', 'md', 'txt', 'js', 'html', 'css', 'json']
223
 
224
- if is_text:
225
- try:
226
- text_content = content_raw.decode('utf-8')
227
- if file_extension == 'json':
228
- try:
229
- json_data = json.loads(text_content)
230
- formatted_json = json.dumps(json_data, indent=2)
231
- content = f"### File: {filename}\n```json\n{formatted_json}\n```\n\n"
232
- except json.JSONDecodeError:
233
- content = f"### File: {filename}\n```json\n{text_content}\n```\n[Note: Invalid JSON format]\n\n"
234
- else:
235
- content = f"### File: {filename}\n```{file_extension or 'text'}\n{text_content}\n```\n\n"
236
- except UnicodeDecodeError:
237
- content = f"### File: {filename}\n[Binary file - {size} bytes]\n\n"
238
- else:
239
- content = f"### File: {filename}\n[Binary file - {size} bytes]\n\n"
240
- except Exception as e:
241
- content = f"### File: {filename}\n[Error processing file: {str(e)}]\n\n"
242
-
243
- return content
244
-
245
- def create_markdown_document(url=None, files=None):
246
- """Create markdown document from repo contents or uploaded files."""
247
- if url:
248
- owner, repo, contents, is_hf = get_repo_contents(url)
249
-
250
- if isinstance(contents, str): # Error case
251
- return f"Error: {contents}"
252
-
253
- markdown_content = f"# {'Space' if is_hf else 'Repository'}: {owner}/{repo}\n\n"
254
- markdown_content += "## File Structure\n```\n"
255
- markdown_content += generate_file_tree([item['path'] for item in contents])
256
- markdown_content += "```\n\n"
257
- markdown_content += f"Below are the contents of all files in the {'space' if is_hf else 'repository'}:\n\n"
258
-
259
- for item in contents:
260
- markdown_content += process_file_content(item, owner, repo, is_hf)
261
- else: # Handle uploaded files
262
- markdown_content = "# Uploaded Files\n\n"
263
- markdown_content += "## File Structure\n```\n"
264
- markdown_content += generate_file_tree([file.filename for file in files])
265
- markdown_content += "```\n\n"
266
- markdown_content += "Below are the contents of all uploaded files:\n\n"
267
- for file in files:
268
- markdown_content += process_uploaded_file(file)
269
-
270
- return markdown_content
271
-
272
- @app.route('/')
273
- def index():
274
- return render_template('index.html')
275
-
276
- @app.route('/process', methods=['POST'])
277
- def process():
278
- # Ensure consistent response structure
279
- response_data = {'markdown': '', 'html': '', 'filename': '', 'error': None}
280
-
281
- if 'files[]' in request.files:
282
- files = request.files.getlist('files[]')
283
- if not files:
284
- response_data['error'] = 'No files uploaded'
285
- return jsonify(response_data), 400
286
-
287
- markdown_content = create_markdown_document(files=files)
288
- response_data['markdown'] = "```markdown\n" + markdown_content + "\n```"
289
- response_data['html'] = markdown.markdown(markdown_content)
290
- response_data['filename'] = "uploaded_files_summary.md"
291
- else:
292
- repo_url = request.json.get('repo_url')
293
- if not repo_url:
294
- response_data['error'] = 'Please provide a repository URL or upload files'
295
- return jsonify(response_data), 400
296
-
297
- markdown_content = create_markdown_document(repo_url)
298
- owner, repo, contents, is_hf = get_repo_contents(repo_url)
299
- if not owner:
300
- response_data['error'] = markdown_content # Error message from get_repo_contents
301
- return jsonify(response_data), 400
302
-
303
- response_data['markdown'] = markdown_content
304
- response_data['html'] = markdown.markdown(markdown_content)
305
- response_data['filename'] = f"{owner}_{repo}_summary.md"
306
-
307
- return jsonify(response_data)
308
-
309
- @app.route('/download', methods=['POST'])
310
- def download():
311
- markdown_content = request.json.get('markdown')
312
- filename = request.json.get('filename')
313
-
314
- buffer = io.BytesIO()
315
- buffer.write(markdown_content.encode('utf-8'))
316
- buffer.seek(0)
317
-
318
- return send_file(
319
- buffer,
320
- as_attachment=True,
321
- download_name=filename,
322
- mimetype='text/markdown'
323
- )
324
- with open("html_template.html", "r") as f:
325
- html_template=f.read()
326
- f.close()
327
-
328
-
329
- if not os.path.exists('templates'):
330
- os.makedirs('templates')
331
- with open('templates/index.html', 'w') as f:
332
- f.write(html_template)
333
 
334
- if __name__ == '__main__':
335
- app.run(host="0.0.0.0", port=7860, debug=True)
 
1
+ def get_hf_files(repo, name, path=""):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  api = HfApi()
3
+ file_list = api.list_repo_files(repo_id=f'{repo}/{name}', repo_type="space")
4
+ print(f"Files in {repo}/{name}: {file_list}")
5
+ processed_files = []
 
6
 
7
+ # Create base directory if it doesn't exist
8
  if not os.path.exists(name):
9
+ os.makedirs(name)
10
+
11
+ for file_path in file_list:
12
+ # Handle nested directories
13
+ if "/" in file_path:
14
+ # Split into directory and remainder
15
+ dir_part, file_part = file_path.split("/", 1)
16
+ # Ensure directory exists
17
+ dir_path = os.path.join(name, dir_part)
18
+ if not os.path.exists(dir_path):
19
+ os.makedirs(dir_path)
20
+ # Recursively handle subdirectories if needed
21
+ if "/" in file_part:
22
+ processed_files.extend(get_hf_files(repo, name, dir_part))
23
+ continue
24
+
25
+ # Safely split filename into prefix and extension
26
+ filename = os.path.basename(file_path)
27
+ if "." in filename:
28
+ pf, sf = filename.rsplit(".", 1) # Safely split on last period
29
+ f_name = f"{pf}.{sf}"
 
 
 
 
30
  else:
31
+ pf = filename
32
+ sf = ""
33
+ f_name = pf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
+ # Construct full local path
36
+ local_path = os.path.join(name, file_path)
37
+ # Ensure subdirectory exists for nested files
38
+ os.makedirs(os.path.dirname(local_path), exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
+ # Download file content
41
+ r = requests.get(f'https://huggingface.co/spaces/{repo}/{name}/raw/main/{file_path}')
42
+ print(f"Downloading: {file_path}")
43
+ with open(local_path, 'wb') as file:
44
+ file.write(r.content)
 
 
 
 
 
 
 
45
 
46
+ processed_files.append({"path": file_path})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
+ print(f"Processed files: {processed_files}")
49
+ return processed_files