mic3333 commited on
Commit
86d82de
Β·
1 Parent(s): 21b70d2
Files changed (4) hide show
  1. .gradio/certificate.pem +31 -0
  2. CLAUDE.md +33 -0
  3. app.py +301 -0
  4. requirements.txt +10 -0
.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
CLAUDE.md ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## Repository Overview
6
+
7
+ This is a Hugging Face Spaces repository configured for a text summarization project. The repository currently contains minimal setup with just configuration files.
8
+
9
+ ### Current Structure
10
+ - `README.md`: Hugging Face Spaces configuration with Docker SDK setup
11
+ - Repository is configured as a Hugging Face Space with:
12
+ - Docker SDK
13
+ - Pink to purple gradient theme
14
+ - MIT license
15
+
16
+ ### Development Setup
17
+
18
+ This appears to be an early-stage Hugging Face Spaces project. Based on the configuration:
19
+ - Uses Docker for deployment
20
+ - Intended for text summarization functionality
21
+ - Currently lacks implementation files
22
+
23
+ ### Next Steps for Development
24
+
25
+ When developing this project, you'll likely need to:
26
+ - Add Python requirements file (`requirements.txt`) with Gradio library
27
+ - Create main application file (typically `app.py` for Hugging Face Spaces)
28
+ - Add text summarization function dashboard template using Gradio
29
+ - Configure appropriate Docker setup if not using default
30
+
31
+ ### Hugging Face Spaces Reference
32
+
33
+ Configuration follows Hugging Face Spaces format. See: https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import whisper
3
+ import PyPDF2
4
+ import docx
5
+ from transformers import pipeline
6
+ import io
7
+ import tempfile
8
+ import os
9
+ import numpy as np
10
+
11
+ class TextSummarizer:
12
+ def __init__(self):
13
+ self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
14
+ self.whisper_model = whisper.load_model("base")
15
+
16
+ def extract_text_from_pdf(self, pdf_file):
17
+ """Extract text from a PDF file object"""
18
+ try:
19
+ reader = PyPDF2.PdfReader(pdf_file)
20
+ text = ""
21
+ for page in reader.pages:
22
+ text += page.extract_text() or ""
23
+ return text
24
+ except Exception as e:
25
+ return f"Error reading PDF: {str(e)}"
26
+
27
+ def extract_text_from_docx(self, docx_file):
28
+ """Extract text from a DOCX file object"""
29
+ try:
30
+ doc = docx.Document(docx_file)
31
+ text = ""
32
+ for paragraph in doc.paragraphs:
33
+ text += paragraph.text + "\n"
34
+ return text
35
+ except Exception as e:
36
+ return f"Error reading DOCX: {str(e)}"
37
+
38
+ def process_text_file(self, txt_file):
39
+ """Extract text from a TXT file object"""
40
+ try:
41
+ # The file from Gradio is a temporary file, we can read it directly
42
+ with open(txt_file.name, 'r', encoding='utf-8') as f:
43
+ return f.read()
44
+ except Exception as e:
45
+ return f"Error reading TXT file: {str(e)}"
46
+
47
+ def transcribe_audio(self, audio_file):
48
+ """Transcribe audio file to text using Whisper"""
49
+ try:
50
+ result = self.whisper_model.transcribe(audio_file)
51
+ return result["text"]
52
+ except Exception as e:
53
+ return f"Error transcribing audio: {str(e)}"
54
+
55
+ def summarize_text(self, text, max_length=150, min_length=50):
56
+ """Summarize text using BART model"""
57
+ try:
58
+ if len(text.strip()) < 50:
59
+ return "Text is too short to summarize."
60
+
61
+ summary = self.summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
62
+ return summary[0]['summary_text']
63
+ except Exception as e:
64
+ return f"Error summarizing text: {str(e)}"
65
+
66
+ def process_file(self, file, summary_length):
67
+ """Process uploaded file and return summary"""
68
+ if file is None:
69
+ return "No file uploaded."
70
+
71
+ file_path = file.name
72
+ file_extension = os.path.splitext(file_path)[1].lower()
73
+
74
+ max_length = {"Short": 100, "Medium": 150, "Long": 250}[summary_length]
75
+ min_length = max_length // 3
76
+
77
+ text_extractors = {
78
+ ".txt": self.process_text_file,
79
+ ".pdf": self.extract_text_from_pdf,
80
+ ".docx": self.extract_text_from_docx,
81
+ }
82
+
83
+ audio_transcribers = {
84
+ ".mp3": self.transcribe_audio,
85
+ ".wav": self.transcribe_audio,
86
+ ".m4a": self.transcribe_audio,
87
+ ".flac": self.transcribe_audio,
88
+ }
89
+
90
+ if file_extension in text_extractors:
91
+ text = text_extractors[file_extension](file)
92
+ elif file_extension in audio_transcribers:
93
+ text = audio_transcribers[file_extension](file_path)
94
+ else:
95
+ return f"Unsupported file format: {file_extension}"
96
+
97
+ if isinstance(text, str) and text.startswith("Error"):
98
+ return text
99
+
100
+ summary = self.summarize_text(text, max_length, min_length)
101
+
102
+ return f"**Original Text Length:** {len(text)} characters\n\n**Summary:**\n{summary}"
103
+
104
+ def transcribe_stream(self, audio_chunk, current_transcript):
105
+ """Transcribe a stream of audio chunks and append to the transcript."""
106
+ if audio_chunk is None:
107
+ return current_transcript, current_transcript
108
+
109
+ try:
110
+ sample_rate, data = audio_chunk
111
+ # Convert from int16 to float32
112
+ data = data.astype(np.float32) / 32768.0
113
+
114
+ # Transcribe the audio chunk
115
+ result = self.whisper_model.transcribe(data, fp16=False)
116
+ new_text = result['text']
117
+
118
+ updated_transcript = current_transcript + new_text + " "
119
+ return updated_transcript, updated_transcript
120
+ except Exception as e:
121
+ return f"Error during transcription: {str(e)}", current_transcript
122
+
123
+ def convert_file_to_text(self, file):
124
+ """Extract text from any supported file format."""
125
+ if file is None:
126
+ return "No file uploaded for conversion."
127
+
128
+ file_path = file.name
129
+ file_extension = os.path.splitext(file_path)[1].lower()
130
+
131
+ text_extractors = {
132
+ ".txt": self.process_text_file,
133
+ ".pdf": self.extract_text_from_pdf,
134
+ ".docx": self.extract_text_from_docx,
135
+ }
136
+
137
+ audio_transcribers = {
138
+ ".mp3": self.transcribe_audio,
139
+ ".wav": self.transcribe_audio,
140
+ ".m4a": self.transcribe_audio,
141
+ ".flac": self.transcribe_audio,
142
+ }
143
+
144
+ if file_extension in text_extractors:
145
+ return text_extractors[file_extension](file)
146
+ elif file_extension in audio_transcribers:
147
+ return audio_transcribers[file_extension](file_path)
148
+ else:
149
+ return f"Unsupported file format for conversion: {file_extension}"
150
+
151
+ def create_interface():
152
+ summarizer = TextSummarizer()
153
+
154
+ with gr.Blocks(title="Text Summarization Dashboard") as interface:
155
+ gr.Markdown("Text Summarization Dashboard")
156
+ gr.Markdown("Manage files, and interact with specialized AI agents for various tasks.")
157
+
158
+ # State component to store the uploaded file
159
+ uploaded_file_state = gr.State(None)
160
+
161
+ with gr.Tabs():
162
+ with gr.TabItem("πŸ“„ File Management & Conversion"):
163
+ with gr.Row():
164
+ with gr.Column(scale=1):
165
+ gr.Markdown("### Upload File")
166
+ file_input = gr.File(
167
+ label="Select a file",
168
+ file_types=[".txt", ".pdf", ".docx", ".mp3", ".wav", ".m4a", ".flac"]
169
+ )
170
+ uploaded_file_name = gr.Textbox(label="Current File", interactive=False)
171
+
172
+ def store_file(file):
173
+ if file:
174
+ return file, file.name
175
+ return None, "No file uploaded"
176
+
177
+ file_input.upload(
178
+ fn=store_file,
179
+ inputs=[file_input],
180
+ outputs=[uploaded_file_state, uploaded_file_name]
181
+ )
182
+
183
+ with gr.Column(scale=1):
184
+ gr.Markdown("### Convert to TXT")
185
+ gr.Markdown("Supported formats for conversion to .txt: `.pdf`, `.docx`, `.mp3`, `.wav`, `.m4a`, `.flac`")
186
+ convert_btn = gr.Button("Convert to TXT", variant="secondary")
187
+ conversion_output = gr.Textbox(
188
+ label="Conversion Output",
189
+ placeholder="Converted text will appear here...",
190
+ lines=8,
191
+ interactive=False
192
+ )
193
+
194
+ convert_btn.click(
195
+ fn=summarizer.convert_file_to_text,
196
+ inputs=[uploaded_file_state],
197
+ outputs=[conversion_output]
198
+ )
199
+
200
+ with gr.TabItem("✍️ Meeting Summarization"):
201
+ gr.Markdown("### Meeting Summarization")
202
+ gr.Markdown("Generate summaries from your meeting transcripts and other documents.")
203
+ with gr.Row():
204
+ with gr.Column(scale=1):
205
+ summary_length = gr.Dropdown(
206
+ choices=["Short", "Medium", "Long"],
207
+ value="Medium",
208
+ label="Summary Length",
209
+ info="Short: ~300 words, Medium: ~500+ words, Long: ~1000+ words"
210
+ )
211
+ submit_btn = gr.Button("Generate Summary", variant="primary")
212
+
213
+ with gr.Column(scale=2):
214
+ output = gr.Textbox(
215
+ label="Summary Output",
216
+ lines=10,
217
+ placeholder="Your summary will appear here..."
218
+ )
219
+
220
+ with gr.Accordion("βš™οΈ Model Settings", open=False):
221
+ gr.Markdown("### Model Selection & Fine-Tuning")
222
+ gr.Markdown("Choose different models and configure their parameters.")
223
+ with gr.Row():
224
+ gr.Dropdown(
225
+ label="Select Summarization Model",
226
+ choices=["facebook/bart-large-cnn", "t5-small", "google/pegasus-xsum"],
227
+ value="facebook/bart-large-cnn"
228
+ )
229
+ with gr.Accordion("Fine-Tuning Options", open=False):
230
+ gr.Slider(label="Min Tokens", minimum=10, maximum=200, step=5, value=50)
231
+ gr.Slider(label="Max Tokens", minimum=50, maximum=500, step=10, value=150)
232
+ gr.Slider(label="Temperature", minimum=0.1, maximum=1.5, step=0.1, value=0.7)
233
+ gr.Slider(label="Top-K", minimum=0, maximum=100, step=1, value=50, info="0 to disable")
234
+ gr.Slider(label="Top-P (Nucleus Sampling)", minimum=0.0, maximum=1.0, step=0.05, value=0.95, info="0 to disable")
235
+ gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, step=0.1, value=1.2)
236
+ gr.Slider(label="Number of Beams", minimum=1, maximum=8, step=1, value=4)
237
+
238
+ with gr.TabItem("πŸ”΄ Live Meeting Recording & Summarization"):
239
+ gr.Markdown("### Live Meeting Transcription & Summarization")
240
+ gr.Markdown("Record audio from your microphone, get a live transcript, and generate a summary.")
241
+
242
+ live_transcript_state = gr.State("")
243
+
244
+ with gr.Row():
245
+ with gr.Column(scale=1):
246
+ audio_input = gr.Audio(
247
+ label="Live Audio",
248
+ sources="microphone",
249
+ streaming=True,
250
+ )
251
+ with gr.Column(scale=2):
252
+ live_transcript_output = gr.Textbox(
253
+ label="Live Transcript",
254
+ placeholder="Transcript will appear here...",
255
+ lines=15,
256
+ )
257
+
258
+ with gr.Row():
259
+ with gr.Column(scale=1):
260
+ live_summary_length = gr.Dropdown(
261
+ choices=["Short", "Medium", "Long"],
262
+ value="Medium",
263
+ label="Summary Length"
264
+ )
265
+ live_summary_btn = gr.Button("Generate Summary", variant="primary")
266
+
267
+ with gr.Column(scale=2):
268
+ live_summary_output = gr.Textbox(
269
+ label="Meeting Summary",
270
+ placeholder="Summary will appear here...",
271
+ lines=5,
272
+ )
273
+
274
+ audio_input.stream(
275
+ fn=summarizer.transcribe_stream,
276
+ inputs=[audio_input, live_transcript_state],
277
+ outputs=[live_transcript_output, live_transcript_state],
278
+ )
279
+
280
+ def generate_live_summary(transcript, length_option):
281
+ max_len = {"Short": 100, "Medium": 150, "Long": 250}[length_option]
282
+ min_len = max_len // 3
283
+ return summarizer.summarize_text(transcript, max_length=max_len, min_length=min_len)
284
+
285
+ live_summary_btn.click(
286
+ fn=generate_live_summary,
287
+ inputs=[live_transcript_output, live_summary_length],
288
+ outputs=[live_summary_output],
289
+ )
290
+
291
+ submit_btn.click(
292
+ fn=summarizer.process_file,
293
+ inputs=[uploaded_file_state, summary_length],
294
+ outputs=output
295
+ )
296
+
297
+ return interface
298
+
299
+ if __name__ == "__main__":
300
+ interface = create_interface()
301
+ interface.launch(share=True)
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio==4.44.1
2
+ transformers==4.35.2
3
+ torch==2.1.1
4
+ openai-whisper==20231117
5
+ PyPDF2==3.0.1
6
+ python-docx==1.1.0
7
+ datasets==2.14.6
8
+ accelerate==0.24.1
9
+ sentencepiece==0.1.99
10
+ protobuf==4.25.0