dwb2023 commited on
Commit
4006c1a
1 Parent(s): e568c28

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -0
app.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import zipfile
4
+ import io
5
+ import gradio as gr
6
+
7
+ def get_file_summary(file_info):
8
+ return {
9
+ "name": file_info.filename,
10
+ "type": "binary" if file_info.file_size > 1024 * 1024 else "text",
11
+ "size": file_info.file_size,
12
+ }
13
+
14
+ def extract_repo_content(url):
15
+ if "huggingface.co" not in url:
16
+ return "Invalid URL. Please provide a valid Hugging Face URL."
17
+
18
+ repo_name = url.split('/')[-2]
19
+ repo_type = url.split('/')[-3]
20
+ api_url = f"https://huggingface.co/api/{repo_type}/{repo_name}/tree/main"
21
+
22
+ response = requests.get(api_url)
23
+ if response.status_code != 200:
24
+ return f"Failed to fetch repository content. Status code: {response.status_code}"
25
+
26
+ repo_content = response.json()
27
+ extracted_content = []
28
+ headers = []
29
+
30
+ for file_info in repo_content:
31
+ file_summary = get_file_summary(file_info)
32
+ headers.append(file_summary)
33
+
34
+ if file_summary["type"] == "text" and file_summary["size"] <= 1024 * 1024:
35
+ file_url = f"https://huggingface.co/{repo_type}/{repo_name}/resolve/main/{file_info['filename']}"
36
+ file_response = requests.get(file_url)
37
+ if file_response.status_code == 200:
38
+ file_content = file_response.text
39
+ extracted_content.append({"header": file_summary, "content": file_content})
40
+ else:
41
+ extracted_content.append({"header": file_summary, "content": "Failed to fetch file content."})
42
+ else:
43
+ extracted_content.append({"header": file_summary, "content": "File too large or binary, content not captured."})
44
+
45
+ return extracted_content
46
+
47
+ def format_output(extracted_content):
48
+ formatted_output = ""
49
+ for file_data in extracted_content:
50
+ formatted_output += f"### File: {file_data['header']['name']}\n"
51
+ formatted_output += f"**Type:** {file_data['header']['type']}\n"
52
+ formatted_output += f"**Size:** {file_data['header']['size']} bytes\n"
53
+ formatted_output += "#### Content:\n"
54
+ formatted_output += f"```\n{file_data['content']}\n```\n\n"
55
+ return formatted_output
56
+
57
+ def extract_and_display(url):
58
+ extracted_content = extract_repo_content(url)
59
+ formatted_output = format_output(extracted_content)
60
+ return formatted_output
61
+
62
+ app = gr.Blocks()
63
+
64
+ with app:
65
+ gr.Markdown("# Gradio Space/Model Content Extractor")
66
+ url_input = gr.Textbox(label="Hugging Face Space/Model URL")
67
+ output_display = gr.Markdown()
68
+ extract_button = gr.Button("Extract Content")
69
+
70
+ extract_button.click(fn=extract_and_display, inputs=url_input, outputs=output_display)
71
+
72
+ app.launch()