suprimedev commited on
Commit
e5fd7e2
·
verified ·
1 Parent(s): bc8a714

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -349
app.py CHANGED
@@ -2,13 +2,11 @@ import gradio as gr
2
  from pydub import AudioSegment
3
  import json
4
  import uuid
 
5
  import asyncio
6
- import aiofiles
7
  import os
8
  import time
9
- import mimetypes
10
  from typing import List, Dict
11
- import aiohttp # برای درخواست های HTTP به Talkbot.ir
12
 
13
  # Constants
14
  MAX_FILE_SIZE_MB = 20
@@ -16,11 +14,11 @@ MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024 # Convert MB to bytes
16
 
17
  class PodcastGenerator:
18
  def __init__(self):
19
- self.talkbot_tts_url = "https://talkbot.ir/TTS-tkun"
20
- # OpenRouter API URL از Talkbot.ir
21
- self.talkbot_openrouter_api_url = "https://talkbot.ir/api/v1/chat/completions"
22
 
23
- async def generate_script(self, prompt: str, language: str, api_key: str, file_obj=None, progress=None) -> Dict:
24
  example = """
25
  {
26
  "topic": "AGI",
@@ -32,182 +30,6 @@ class PodcastGenerator:
32
  {
33
  "speaker": 1,
34
  "line": "Yeah, it's definitely having a moment, isn't it?"
35
- },
36
- {
37
- "speaker": 2,
38
- "line": "It is and for good reason, right? I mean, you've been digging into this stuff, listening to the podcasts and everything. What really stood out to you? What got you hooked?"
39
- },
40
- {
41
- "speaker": 1,
42
- "line": "Honestly, it's the sheer scale of what AGI could do. We're talking about potentially reshaping well everything."
43
- },
44
- {
45
- "speaker": 2,
46
- "line": "No kidding, but let's be real. Sometimes it feels like every other headline is either hyping AGI up as this technological utopia or painting it as our inevitable robot overlords."
47
- },
48
- {
49
- "speaker": 1,
50
- "line": "It's easy to get lost in the noise, for sure."
51
- },
52
- {
53
- "speaker": 2,
54
- "line": "Exactly. So how about we try to cut through some of that, shall we?"
55
- },
56
- {
57
- "speaker": 1,
58
- "line": "Sounds like a plan."
59
- },
60
- {
61
- "speaker": 2,
62
- "line": "Okay, so first things first, AGI, what is it really? And I don't just mean some dictionary definition, we're talking about something way bigger than just a super smart computer, right?"
63
- },
64
- {
65
- "speaker": 1,
66
- "line": "Right, it's not just about more processing power or better algorithms, it's about a fundamental shift in how we think about intelligence itself."
67
- },
68
- {
69
- "speaker": 2,
70
- "line": "So like, instead of programming a machine for a specific task, we're talking about creating something that can learn and adapt like we do."
71
- },
72
- {
73
- "speaker": 1,
74
- "line": "Exactly, think of it this way: Right now, we've got AI that can beat a grandmaster at chess but ask that same AI to, say, write a poem or compose a symphony. No chance."
75
- },
76
- {
77
- "speaker": 2,
78
- "line": "Okay, I see. So, AGI is about bridging that gap, creating something that can move between those different realms of knowledge seamlessly."
79
- },
80
- {
81
- "speaker": 1,
82
- "line": "Precisely. It's about replicating that uniquely human ability to learn something new and apply that knowledge in completely different contexts and that's a tall order, let me tell you."
83
- },
84
- {
85
- "speaker": 2,
86
- "line": "I bet. I mean, think about how much we still don't even understand about our own brains."
87
- },
88
- {
89
- "speaker": 1,
90
- "line": "That's exactly it. We're essentially trying to reverse-engineer something we don't fully comprehend."
91
- },
92
- {
93
- "speaker": 2,
94
- "line": "And how are researchers even approaching that? What are some of the big ideas out there?"
95
- },
96
- {
97
- "speaker": 1,
98
- "line": "Well, there are a few different schools of thought. One is this idea of neuromorphic computing where they're literally trying to build computer chips that mimic the structure and function of the human brain."
99
- },
100
- {
101
- "speaker": 2,
102
- "line": "Wow, so like actually replicating the physical architecture of the brain. That's wild."
103
- },
104
- {
105
- "speaker": 1,
106
- "line": "It's pretty mind-blowing stuff and then you've got folks working on something called whole brain emulation."
107
- },
108
- {
109
- "speaker": 2,
110
- "line": "Okay, and what's that all about?"
111
- },
112
- {
113
- "speaker": 1,
114
- "line": "The basic idea there is to create a complete digital copy of a human brain down to the last neuron and synapse and run it on a sufficiently powerful computer simulation."
115
- },
116
- {
117
- "speaker": 2,
118
- "line": "Hold on, a digital copy of an entire brain, that sounds like something straight out of science fiction."
119
- },
120
- {
121
- "speaker": 1,
122
- "line": "It does, doesn't it? But it gives you an idea of the kind of ambition we're talking about here and the truth is we're still a long way off from truly achieving AGI, no matter which approach you look at."
123
- },
124
- {
125
- "speaker": 2,
126
- "line": "That makes sense but it's still exciting to think about the possibilities, even if they're a ways off."
127
- },
128
- {
129
- "speaker": 1,
130
- "line": "Absolutely and those possibilities are what really get people fired up about AGI, right? Yeah."
131
- },
132
- {
133
- "speaker": 2,
134
- "line": "For sure. In fact, I remember you mentioning something in that podcast about AGI's potential to revolutionize scientific research. Something about supercharging breakthroughs."
135
- },
136
- {
137
- "speaker": 1,
138
- "line": "Oh, absolutely. Imagine an AI that doesn't just crunch numbers but actually understands scientific data the way a human researcher does. We're talking about potential breakthroughs in everything from medicine and healthcare to material science and climate change."
139
- },
140
- {
141
- "speaker": 2,
142
- "line": "It's like giving scientists this incredibly powerful new tool to tackle some of the biggest challenges we face."
143
- },
144
- {
145
- "speaker": 1,
146
- "line": "Exactly, it could be a total game changer."
147
- },
148
- {
149
- "speaker": 2,
150
- "line": "Okay, but let's be real, every coin has two sides. What about the potential downsides of AGI? Because it can't all be sunshine and roses, right?"
151
- },
152
- {
153
- "speaker": 1,
154
- "line": "Right, there are definitely valid concerns. Probably the biggest one is the impact on the job market. As AGI gets more sophisticated, there're a real chance it could automate a lot of jobs that are currently done by humans."
155
- },
156
- {
157
- "speaker": 2,
158
- "line": "So we're not just talking about robots taking over factories but potentially things like, what, legal work, analysis, even creative fields?"
159
- },
160
- {
161
- "speaker": 1,
162
- "line": "Potentially, yes. And that raises a whole host of questions about what happens to those workers, how we retrain them, how we ensure that the benefits of AGI are shared equitably."
163
- },
164
- {
165
- "speaker": 2,
166
- "line": "Right, because it's not just about the technology itself, but how we choose to integrate it into society."
167
- },
168
- {
169
- "speaker": 1,
170
- "line": "Absolutely. We need to be having these conversations now about ethics, about regulation, about how to make sure AGI is developed and deployed responsibly."
171
- },
172
- {
173
- "speaker": 2,
174
- "line": "So it's less about preventing some kind of sci-fi robot apocalypse and more about making sure we're steering this technology in the right direction from the get-go."
175
- },
176
- {
177
- "speaker": 1,
178
- "line": "Exactly, AGI has the potential to be incredibly beneficial, but it's not going to magically solve all our problems. It's on us to make sure we're using it for good."
179
- },
180
- {
181
- "speaker": 2,
182
- "line": "It's like you said earlier, it's about shaping the future of intelligence."
183
- },
184
- {
185
- "speaker": 1,
186
- "line": "I like that. It really is."
187
- },
188
- {
189
- "speaker": 2,
190
- "line": "And honestly, that's a responsibility that extends beyond just the researchers and the policymakers."
191
- },
192
- {
193
- "speaker": 1,
194
- "line": "100%"
195
- },
196
- {
197
- "speaker": 2,
198
- "line": "So to everyone listening out there I'll leave you with this. As AGI continues to develop, what role do you want to play in shaping its future?"
199
- },
200
- {
201
- "speaker": 1,
202
- "line": "That's a question worth pondering."
203
- },
204
- {
205
- "speaker": 2,
206
- "line": "It certainly is and on that note, we'll wrap up this deep dive. Thanks for listening, everyone."
207
- },
208
- {
209
- "speaker": 1,
210
- "line": "Peace."
211
  }
212
  ]
213
  }
@@ -230,157 +52,134 @@ You are a professional podcast generator. Your task is to generate a professiona
230
  Follow this example structure:
231
  {example}
232
  """
233
- user_prompt_text = ""
 
234
  if prompt and file_obj:
235
- user_prompt_text = f"Please generate a podcast script based on the uploaded file following user input:\n{prompt}"
236
  elif prompt:
237
- user_prompt_text = f"Please generate a podcast script based on the following user input:\n{prompt}"
238
  else:
239
- user_prompt_text = "Please generate a podcast script based on the uploaded file."
 
 
 
 
 
 
 
 
 
 
240
 
241
  messages = [
242
  {"role": "system", "content": system_prompt},
243
- {"role": "user", "content": user_prompt_text}
244
  ]
245
 
246
- # افزودن فایل به درخواست (اگر Talkbot.ir API از این فرمت پشتیبانی کند)
247
- # توجه: API های LLM معمولا فایل ها را به صورت "base64 encoded" یا "multipart/form-data"
248
- # دریافت می کنند. اگر Talkbot.ir API دقیقا همانند Google Gemini API در اینجا عمل نکند،
249
- # این بخش نیاز به تغییر دارد. برای سادگی، فرض می کنیم فقط متن اصلی ارسال می شود
250
- # و فایل باید جداگانه پردازش و به متن تبدیل شود، سپس متن آن به مدل ارسال شود.
251
- # در این مثال، فرض می کنیم که Gemini Pro از این طریق فایل را دریافت می کند.
252
- # اما برای OpenRouter/Deepseek باید فایل را به متن تبدیل کنید.
253
-
254
- # برای سادگی و تطابق با OpenRouter (معمولا فایل را مستقیم دریافت نمی‌کند)،
255
- # اینجا فرض می‌کنیم که اگر فایل بود، محتوایش قبلاً به user_prompt_text اضافه شده.
256
- # اگر Talkbot.ir قابلیت پردازش فایل مستقیم را دارد، این بخش را مطابق مستنداتشان تغییر دهید.
257
- if file_obj:
258
- # این بخش را باید مطابق با نحوه ارسال فایل به Talkbot.ir API تغییر دهید
259
- # مثلاً اگر Talkbot API از آپلود فایل پشتیبانی می‌کند، باید از await self._read_file_bytes(file_obj) استفاده کنید.
260
- # برای ساده‌سازی فعلی، فرض می‌کنیم فایل‌ها صرفاً برای زمینه (context) هستند
261
- # و باید محتوای آن‌ها به صورت متنی به مدل فرستاده شود.
262
- # فعلاً، فایل به صورت مستقیم به API چت Deepseek فرستاده نمی‌شود،
263
- # مگر اینکه Talkbot.ir API برای Deepseek V3 قابلیت ورودی فایل را داشته باشد.
264
- # در صورتی که فایل واقعاً نیاز به پردازش توسط مدل دارد، باید محتوای آن را خوانده و
265
- # به string تبدیل کنید و به prompt اضافه کنید.
266
- pass # نیاز به پیاده‌سازی تبدیل فایل به متن و اضافه کردن به prompt
267
-
268
  headers = {
269
- "Authorization": f"Bearer {api_key}",
270
  "Content-Type": "application/json"
271
  }
272
 
273
- json_payload = {
274
  "model": "deepseek-v3-0324",
275
  "messages": messages,
276
  "temperature": 1,
277
- # "response_mime_type": "application/json" # این پارامتر ممکن است برای Deepseek V3 نباشد
278
- # در حالت عادی باید مدل خودش خروجی JSON دهد.
279
- # اگر Talkbot.ir برای این مدل پارامتری برای تضمین JSON دارد، اضافه کنید.
280
  }
281
 
282
  try:
283
  if progress:
284
  progress(0.3, "Generating podcast script...")
285
-
286
  async with aiohttp.ClientSession() as session:
287
- async with session.post(self.talkbot_openrouter_api_url, headers=headers, json=json_payload, timeout=60) as response:
288
- response.raise_for_status() # برای بررسی خطاهای HTTP
289
- response_data = await response.json()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
 
291
- if not response_data or not response_data.get('choices'):
292
- raise Exception("Invalid response from Talkbot.ir API or no choices found.")
293
-
294
- generated_text = response_data['choices'][0]['message']['content']
295
-
296
- # مدل ممکن است پاسخ را داخل یک JSON block برگرداند (مثل "```json\n...\n```")
297
- # باید آن را استخراج کنیم.
298
- if generated_text.startswith("```json"):
299
- generated_text = generated_text[len("```json"):].strip()
300
- if generated_text.endswith("```"):
301
- generated_text = generated_text[:-len("```")].strip()
302
-
303
- print(f"Generated raw text from Deepseek-v3:\n{generated_text}")
304
-
305
- script_json = json.loads(generated_text)
306
-
307
- if progress:
308
- progress(0.4, "Script generated successfully!")
309
-
310
- return script_json
311
  except asyncio.TimeoutError:
312
  raise Exception("The script generation request timed out. Please try again later.")
313
- except aiohttp.ClientResponseError as e:
314
- if e.status == 401:
315
- raise Exception("Unauthorized: Invalid API key or insufficient permissions for Talkbot.ir.")
316
- elif e.status == 429:
317
- raise Exception("Rate limit exceeded for the API key. Please try again later or provide your own API key.")
318
- else:
319
- raise Exception(f"Failed to generate podcast script from Talkbot.ir: HTTP Error {e.status} - {e.message}")
320
- except json.JSONDecodeError as e:
321
- raise Exception(f"Failed to decode JSON from Talkbot.ir response. Raw text: '{generated_text}'. Error: {e}")
322
  except Exception as e:
323
- raise Exception(f"Failed to generate podcast script: {e}")
 
 
 
324
 
325
- async def tts_generate(self, text: str, speaker: int, speaker1_type: str, speaker2_type: str, api_key: str) -> str:
326
- # speaker1_type و speaker2_type اینجا همان "male" یا "female" هستند
327
- voice_type = speaker1_type if speaker == 1 else speaker2_type
328
-
329
- # پارامتر voice_gender در Talkbot.ir TTS API
330
- voice_gender = "male" if voice_type == "male" else "female"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
331
 
332
  params = {
333
- "text": text,
334
- "voice_gender": voice_gender,
335
- # اگر Talkbot.ir TTS نیاز به Auth Token دارد:
336
- # "api_key": api_key
337
- # یا در هدر Authorization
338
  }
339
 
340
- headers = {
341
- # اگر Talkbot.ir TTS نیاز به تایید هویت با Bearer Token دارد
342
- "Authorization": f"Bearer {api_key}"
343
- }
344
-
345
  temp_filename = f"temp_{uuid.uuid4()}.wav"
346
 
347
  try:
348
  async with aiohttp.ClientSession() as session:
349
- # Talkbot.ir/TTS-tkun URL با متد GET و پارامترها
350
- async with session.get(self.talkbot_tts_url, params=params, headers=headers, timeout=30) as response:
351
- response.raise_for_status() # برای بررسی خطاهای HTTP
352
- # فرض بر این است که پاسخ مستقیم فایل صوتی با فرمت wav است.
353
- # اگر لینک باشد، باید آن لینک را دانلود کنید.
 
354
 
355
- # بررسی Content-Type برای اطمینان از اینکه فایل صوتی است
356
- content_type = response.headers.get('Content-Type', '')
357
- if 'audio' not in content_type:
358
- raise Exception(f"Unexpected content type from TTS API: {content_type}. Expected audio.")
359
-
360
- audio_data = await response.read()
361
 
362
- if not audio_data:
363
- raise Exception("Received empty audio data from TTS API.")
364
-
365
  async with aiofiles.open(temp_filename, 'wb') as f:
366
- await f.write(audio_data)
367
-
368
- return temp_filename
 
369
  except asyncio.TimeoutError:
370
  if os.path.exists(temp_filename):
371
  os.remove(temp_filename)
372
- raise Exception("Text-to-speech generation timed out. Please try with a shorter text.")
373
- except aiohttp.ClientResponseError as e:
374
- if os.path.exists(temp_filename):
375
- os.remove(temp_filename)
376
- if e.status == 401:
377
- raise Exception("Unauthorized access to Talkbot.ir TTS. Invalid API key or token.")
378
- else:
379
- raise Exception(f"Error from Talkbot.ir TTS API: HTTP {e.status} - {e.message}")
380
  except Exception as e:
381
  if os.path.exists(temp_filename):
382
  os.remove(temp_filename)
383
- raise Exception(f"Failed to generate speech with Talkbot.ir TTS: {e}")
384
 
385
  async def combine_audio_files(self, audio_files: List[str], progress=None) -> str:
386
  if progress:
@@ -388,13 +187,8 @@ Follow this example structure:
388
 
389
  combined_audio = AudioSegment.empty()
390
  for audio_file in audio_files:
391
- try:
392
- combined_audio += AudioSegment.from_file(audio_file)
393
- except Exception as e:
394
- print(f"Warning: Could not load audio file {audio_file}: {e}")
395
- finally:
396
- if os.path.exists(audio_file):
397
- os.remove(audio_file) # Clean up temporary files
398
 
399
  output_filename = f"output_{uuid.uuid4()}.wav"
400
  combined_audio.export(output_filename, format="wav")
@@ -404,13 +198,14 @@ Follow this example structure:
404
 
405
  return output_filename
406
 
407
- async def generate_podcast(self, input_text: str, language: str, speaker1_type: str, speaker2_type: str, api_key: str, file_obj=None, progress=None) -> str:
408
  try:
409
  if progress:
410
  progress(0.1, "Starting podcast generation...")
411
 
 
412
  return await asyncio.wait_for(
413
- self._generate_podcast_internal(input_text, language, speaker1_type, speaker2_type, api_key, file_obj, progress),
414
  timeout=600 # 10 minutes total timeout
415
  )
416
  except asyncio.TimeoutError:
@@ -418,26 +213,28 @@ Follow this example structure:
418
  except Exception as e:
419
  raise Exception(f"Error generating podcast: {str(e)}")
420
 
421
- async def _generate_podcast_internal(self, input_text: str, language: str, speaker1_type: str, speaker2_type: str, api_key: str, file_obj=None, progress=None) -> str:
422
  if progress:
423
  progress(0.2, "Generating podcast script...")
424
 
425
- podcast_json = await self.generate_script(input_text, language, api_key, file_obj, progress)
426
 
427
  if progress:
428
  progress(0.5, "Converting text to speech...")
429
 
430
  audio_files = []
431
  total_lines = len(podcast_json['podcast'])
432
- batch_size = 5 # تعداد درخواست های TTS همزمان
433
 
 
 
434
  for batch_start in range(0, total_lines, batch_size):
435
  batch_end = min(batch_start + batch_size, total_lines)
436
  batch = podcast_json['podcast'][batch_start:batch_end]
437
 
 
438
  tts_tasks = []
439
  for item in batch:
440
- tts_task = self.tts_generate(item['line'], item['speaker'], speaker1_type, speaker2_type, api_key)
441
  tts_tasks.append(tts_task)
442
 
443
  try:
@@ -445,20 +242,22 @@ Follow this example structure:
445
 
446
  for i, result in enumerate(batch_results):
447
  if isinstance(result, Exception):
448
- # اگر خطایی در یک TTS بود، همه فایل‌های تولید شده را پاک کنید و خطا دهید
449
  for file in audio_files:
450
  if os.path.exists(file):
451
  os.remove(file)
452
- raise Exception(f"Error generating speech for line '{batch[i]['line']}': {str(result)}")
453
  else:
454
  audio_files.append(result)
455
 
 
456
  if progress:
457
  current_progress = 0.5 + (0.4 * (batch_end / total_lines))
458
  progress(current_progress, f"Processed {batch_end}/{total_lines} speech segments...")
459
 
460
  except Exception as e:
461
- for file in audio_files: # اطمینان از پاک شدن فایل‌های موقت در صورت بروز خطا
 
462
  if os.path.exists(file):
463
  os.remove(file)
464
  raise Exception(f"Error in batch TTS generation: {str(e)}")
@@ -466,24 +265,15 @@ Follow this example structure:
466
  combined_audio = await self.combine_audio_files(audio_files, progress)
467
  return combined_audio
468
 
469
- async def process_input(input_text: str, input_file, language: str, speaker1_type: str, speaker2_type: str, api_key: str = "", progress=None) -> str:
470
  start_time = time.time()
471
 
472
- # Talkbot.ir TTS فقط male/female دارد. پس mapping لازم نیست.
473
- # speaker1_type و speaker2_type مستقیماً به API فرستاده می‌شوند.
474
-
475
  try:
476
  if progress:
477
  progress(0.05, "Processing input...")
478
 
479
- if not api_key:
480
- # سعی می‌کنیم API key را از متغیر محیطی بخوانیم
481
- api_key = os.getenv("TALKBOT_API_KEY")
482
- if not api_key:
483
- raise Exception("No API key provided. Please provide your Talkbot.ir/OpenRouter API key.")
484
-
485
  podcast_generator = PodcastGenerator()
486
- podcast = await podcast_generator.generate_podcast(input_text, language, speaker1_type, speaker2_type, api_key, input_file, progress)
487
 
488
  end_time = time.time()
489
  print(f"Total podcast generation time: {end_time - start_time:.2f} seconds")
@@ -492,41 +282,35 @@ async def process_input(input_text: str, input_file, language: str, speaker1_typ
492
  except Exception as e:
493
  error_msg = str(e)
494
  if "rate limit" in error_msg.lower():
495
- raise Exception("Rate limit exceeded. Please try again later or use your own API key.")
496
  elif "timeout" in error_msg.lower():
497
- raise Exception("The request timed out. This could be due to server load or the length of your input. Please try again with shorter text.")
498
- elif "unauthorized" in error_msg.lower() or "api key" in error_msg.lower():
499
- raise Exception("Invalid API key. Please check your Talkbot.ir/OpenRouter API key.")
500
  else:
501
  raise Exception(f"Error: {error_msg}")
502
 
503
  # Gradio UI
504
- def generate_podcast_gradio(input_text, input_file, language, speaker1, speaker2, api_key, progress=gr.Progress()):
505
  # Handle the file if uploaded
506
  file_obj = None
507
  if input_file is not None:
508
  file_obj = input_file
509
 
 
510
  def progress_callback(value, text):
511
  progress(value, text)
512
 
513
- # Note: Gradio's event handling may automatically manage the asyncio loop.
514
- # If not, you might need to run it explicitly using asyncio.run() or similar.
515
- # For now, let's assume Gradio handles it for async functions.
516
-
517
- # برای Gradio، تابعی که به .run() یا .click() متصل می‌شود، می‌تواند async باشد.
518
- # Gradio به طور خودکار آن را در یک event loop اجرا می‌کند.
519
- return asyncio.run(process_input(
520
  input_text,
521
  file_obj,
522
- language,
523
- speaker1,
524
- speaker2,
525
- api_key,
526
  progress_callback
527
  ))
 
 
528
 
529
  def main():
 
530
  language_options = [
531
  "Auto Detect",
532
  "Afrikaans", "Albanian", "Amharic", "Arabic", "Armenian", "Azerbaijani",
@@ -544,10 +328,7 @@ def main():
544
  "Uzbek", "Vietnamese", "Welsh", "Zulu"
545
  ]
546
 
547
- # Talkbot.ir TTS فقط gender را تعیین می‌کند، نه اسم خاص.
548
- # پس اینجا male/female را انتخاب می‌کنیم.
549
- voice_options = ["male", "female"]
550
-
551
  with gr.Blocks(title="PodcastGen 🎙️") as demo:
552
  gr.Markdown("# PodcastGen 🎙️")
553
  gr.Markdown("Generate a 2-speaker podcast from text input or documents!")
@@ -559,14 +340,7 @@ def main():
559
  with gr.Column(scale=1):
560
  input_file = gr.File(label="Or Upload a PDF or TXT file", file_types=[".pdf", ".txt"])
561
 
562
- with gr.Row():
563
- with gr.Column():
564
- api_key = gr.Textbox(label="Your Talkbot.ir/OpenRouter API Key (Required)", placeholder="Enter your Talkbot.ir API key here for Gemini Deepseek and TTS.", type="password")
565
- language = gr.Dropdown(label="Language", choices=language_options, value="Auto Detect")
566
-
567
- with gr.Column():
568
- speaker1 = gr.Dropdown(label="Speaker 1 Voice (Gender)", choices=voice_options, value="male")
569
- speaker2 = gr.Dropdown(label="Speaker 2 Voice (Gender)", choices=voice_options, value="female")
570
 
571
  generate_btn = gr.Button("Generate Podcast", variant="primary")
572
 
@@ -575,12 +349,11 @@ def main():
575
 
576
  generate_btn.click(
577
  fn=generate_podcast_gradio,
578
- inputs=[input_text, input_file, language, speaker1, speaker2, api_key],
579
  outputs=[output_audio]
580
  )
581
 
582
- demo.launch(share=False) # share=True برای اشتراک گذاری لینک عمومی
583
 
584
  if __name__ == "__main__":
585
  main()
586
-
 
2
  from pydub import AudioSegment
3
  import json
4
  import uuid
5
+ import aiohttp
6
  import asyncio
 
7
  import os
8
  import time
 
9
  from typing import List, Dict
 
10
 
11
  # Constants
12
  MAX_FILE_SIZE_MB = 20
 
14
 
15
  class PodcastGenerator:
16
  def __init__(self):
17
+ self.api_key = "sk-4fb613f56acfccf731e801b904cd89f5"
18
+ self.api_url = "https://talkbot.ir/api/v1/chat/completions"
19
+ self.tts_url = "https://talkbot.ir/TTS-tkun"
20
 
21
+ async def generate_script(self, prompt: str, language: str, file_obj=None, progress=None) -> Dict:
22
  example = """
23
  {
24
  "topic": "AGI",
 
30
  {
31
  "speaker": 1,
32
  "line": "Yeah, it's definitely having a moment, isn't it?"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  }
34
  ]
35
  }
 
52
  Follow this example structure:
53
  {example}
54
  """
55
+
56
+ user_prompt = ""
57
  if prompt and file_obj:
58
+ user_prompt = f"Please generate a podcast script based on the uploaded file following user input:\n{prompt}"
59
  elif prompt:
60
+ user_prompt = f"Please generate a podcast script based on the following user input:\n{prompt}"
61
  else:
62
+ user_prompt = "Please generate a podcast script based on the uploaded file."
63
+
64
+ # If file is provided, read its content
65
+ file_content = ""
66
+ if file_obj:
67
+ try:
68
+ file_bytes = await self._read_file_bytes(file_obj)
69
+ file_content = file_bytes.decode('utf-8', errors='ignore')
70
+ user_prompt = f"{user_prompt}\n\nFile content:\n{file_content}"
71
+ except Exception as e:
72
+ raise Exception(f"Failed to read file: {str(e)}")
73
 
74
  messages = [
75
  {"role": "system", "content": system_prompt},
76
+ {"role": "user", "content": user_prompt}
77
  ]
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  headers = {
80
+ "Authorization": f"Bearer {self.api_key}",
81
  "Content-Type": "application/json"
82
  }
83
 
84
+ payload = {
85
  "model": "deepseek-v3-0324",
86
  "messages": messages,
87
  "temperature": 1,
88
+ "response_format": { "type": "json_object" }
 
 
89
  }
90
 
91
  try:
92
  if progress:
93
  progress(0.3, "Generating podcast script...")
94
+
95
  async with aiohttp.ClientSession() as session:
96
+ async with session.post(
97
+ self.api_url,
98
+ headers=headers,
99
+ json=payload,
100
+ timeout=60
101
+ ) as response:
102
+
103
+ if response.status != 200:
104
+ error_msg = await response.text()
105
+ raise Exception(f"API request failed: {error_msg}")
106
+
107
+ data = await response.json()
108
+ response_text = data.get('choices', [{}])[0].get('message', {}).get('content', '')
109
+
110
+ if not response_text:
111
+ raise Exception("Empty response from API")
112
+
113
+ if progress:
114
+ progress(0.4, "Script generated successfully!")
115
+
116
+ return json.loads(response_text)
117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  except asyncio.TimeoutError:
119
  raise Exception("The script generation request timed out. Please try again later.")
120
+ except json.JSONDecodeError:
121
+ raise Exception("Invalid JSON response from API")
 
 
 
 
 
 
 
122
  except Exception as e:
123
+ if "rate limit" in str(e).lower():
124
+ raise Exception("Rate limit exceeded. Please try again later.")
125
+ else:
126
+ raise Exception(f"Failed to generate podcast script: {e}")
127
 
128
+ async def _read_file_bytes(self, file_obj) -> bytes:
129
+ """Read file bytes from a file object"""
130
+ # Check file size before reading
131
+ if hasattr(file_obj, 'size'):
132
+ file_size = file_obj.size
133
+ else:
134
+ file_size = os.path.getsize(file_obj.name)
135
+
136
+ if file_size > MAX_FILE_SIZE_BYTES:
137
+ raise Exception(f"File size exceeds the {MAX_FILE_SIZE_MB}MB limit. Please upload a smaller file.")
138
+
139
+ if hasattr(file_obj, 'read'):
140
+ return file_obj.read()
141
+ else:
142
+ async with aiofiles.open(file_obj.name, 'rb') as f:
143
+ return await f.read()
144
+
145
+ async def tts_generate(self, text: str) -> str:
146
+ headers = {
147
+ 'accept': 'application/json',
148
+ }
149
 
150
  params = {
151
+ 'text': text,
 
 
 
 
152
  }
153
 
 
 
 
 
 
154
  temp_filename = f"temp_{uuid.uuid4()}.wav"
155
 
156
  try:
157
  async with aiohttp.ClientSession() as session:
158
+ async with session.get(
159
+ self.tts_url,
160
+ params=params,
161
+ headers=headers,
162
+ timeout=30
163
+ ) as response:
164
 
165
+ if response.status != 200:
166
+ error_msg = await response.text()
167
+ raise Exception(f"TTS API error: {error_msg}")
 
 
 
168
 
169
+ # Save the audio file
 
 
170
  async with aiofiles.open(temp_filename, 'wb') as f:
171
+ await f.write(await response.read())
172
+
173
+ return temp_filename
174
+
175
  except asyncio.TimeoutError:
176
  if os.path.exists(temp_filename):
177
  os.remove(temp_filename)
178
+ raise Exception("Text-to-speech generation timed out.")
 
 
 
 
 
 
 
179
  except Exception as e:
180
  if os.path.exists(temp_filename):
181
  os.remove(temp_filename)
182
+ raise e
183
 
184
  async def combine_audio_files(self, audio_files: List[str], progress=None) -> str:
185
  if progress:
 
187
 
188
  combined_audio = AudioSegment.empty()
189
  for audio_file in audio_files:
190
+ combined_audio += AudioSegment.from_file(audio_file)
191
+ os.remove(audio_file) # Clean up temporary files
 
 
 
 
 
192
 
193
  output_filename = f"output_{uuid.uuid4()}.wav"
194
  combined_audio.export(output_filename, format="wav")
 
198
 
199
  return output_filename
200
 
201
+ async def generate_podcast(self, input_text: str, language: str, file_obj=None, progress=None) -> str:
202
  try:
203
  if progress:
204
  progress(0.1, "Starting podcast generation...")
205
 
206
+ # Set overall timeout for the entire process
207
  return await asyncio.wait_for(
208
+ self._generate_podcast_internal(input_text, language, file_obj, progress),
209
  timeout=600 # 10 minutes total timeout
210
  )
211
  except asyncio.TimeoutError:
 
213
  except Exception as e:
214
  raise Exception(f"Error generating podcast: {str(e)}")
215
 
216
+ async def _generate_podcast_internal(self, input_text: str, language: str, file_obj=None, progress=None) -> str:
217
  if progress:
218
  progress(0.2, "Generating podcast script...")
219
 
220
+ podcast_json = await self.generate_script(input_text, language, file_obj, progress)
221
 
222
  if progress:
223
  progress(0.5, "Converting text to speech...")
224
 
225
  audio_files = []
226
  total_lines = len(podcast_json['podcast'])
 
227
 
228
+ # Process in batches
229
+ batch_size = 5 # Conservative batch size
230
  for batch_start in range(0, total_lines, batch_size):
231
  batch_end = min(batch_start + batch_size, total_lines)
232
  batch = podcast_json['podcast'][batch_start:batch_end]
233
 
234
+ # Create tasks for concurrent processing
235
  tts_tasks = []
236
  for item in batch:
237
+ tts_task = self.tts_generate(item['line'])
238
  tts_tasks.append(tts_task)
239
 
240
  try:
 
242
 
243
  for i, result in enumerate(batch_results):
244
  if isinstance(result, Exception):
245
+ # Clean up any files already created
246
  for file in audio_files:
247
  if os.path.exists(file):
248
  os.remove(file)
249
+ raise Exception(f"Error generating speech: {str(result)}")
250
  else:
251
  audio_files.append(result)
252
 
253
+ # Update progress
254
  if progress:
255
  current_progress = 0.5 + (0.4 * (batch_end / total_lines))
256
  progress(current_progress, f"Processed {batch_end}/{total_lines} speech segments...")
257
 
258
  except Exception as e:
259
+ # Clean up any files already created
260
+ for file in audio_files:
261
  if os.path.exists(file):
262
  os.remove(file)
263
  raise Exception(f"Error in batch TTS generation: {str(e)}")
 
265
  combined_audio = await self.combine_audio_files(audio_files, progress)
266
  return combined_audio
267
 
268
+ async def process_input(input_text: str, input_file, language: str, progress=None) -> str:
269
  start_time = time.time()
270
 
 
 
 
271
  try:
272
  if progress:
273
  progress(0.05, "Processing input...")
274
 
 
 
 
 
 
 
275
  podcast_generator = PodcastGenerator()
276
+ podcast = await podcast_generator.generate_podcast(input_text, language, input_file, progress)
277
 
278
  end_time = time.time()
279
  print(f"Total podcast generation time: {end_time - start_time:.2f} seconds")
 
282
  except Exception as e:
283
  error_msg = str(e)
284
  if "rate limit" in error_msg.lower():
285
+ raise Exception("Rate limit exceeded. Please try again later.")
286
  elif "timeout" in error_msg.lower():
287
+ raise Exception("The request timed out. Please try again with shorter text.")
 
 
288
  else:
289
  raise Exception(f"Error: {error_msg}")
290
 
291
  # Gradio UI
292
+ def generate_podcast_gradio(input_text, input_file, language, progress=gr.Progress()):
293
  # Handle the file if uploaded
294
  file_obj = None
295
  if input_file is not None:
296
  file_obj = input_file
297
 
298
+ # Use the progress function from Gradio
299
  def progress_callback(value, text):
300
  progress(value, text)
301
 
302
+ # Run the async function in the event loop
303
+ result = asyncio.run(process_input(
 
 
 
 
 
304
  input_text,
305
  file_obj,
306
+ language,
 
 
 
307
  progress_callback
308
  ))
309
+
310
+ return result
311
 
312
  def main():
313
+ # Define language options
314
  language_options = [
315
  "Auto Detect",
316
  "Afrikaans", "Albanian", "Amharic", "Arabic", "Armenian", "Azerbaijani",
 
328
  "Uzbek", "Vietnamese", "Welsh", "Zulu"
329
  ]
330
 
331
+ # Create Gradio interface
 
 
 
332
  with gr.Blocks(title="PodcastGen 🎙️") as demo:
333
  gr.Markdown("# PodcastGen 🎙️")
334
  gr.Markdown("Generate a 2-speaker podcast from text input or documents!")
 
340
  with gr.Column(scale=1):
341
  input_file = gr.File(label="Or Upload a PDF or TXT file", file_types=[".pdf", ".txt"])
342
 
343
+ language = gr.Dropdown(label="Language", choices=language_options, value="Auto Detect")
 
 
 
 
 
 
 
344
 
345
  generate_btn = gr.Button("Generate Podcast", variant="primary")
346
 
 
349
 
350
  generate_btn.click(
351
  fn=generate_podcast_gradio,
352
+ inputs=[input_text, input_file, language],
353
  outputs=[output_audio]
354
  )
355
 
356
+ demo.launch()
357
 
358
  if __name__ == "__main__":
359
  main()