Opera8 commited on
Commit
7910e40
·
verified ·
1 Parent(s): 1966f5f

Delete app (26).py

Browse files
Files changed (1) hide show
  1. app (26).py +0 -422
app (26).py DELETED
@@ -1,422 +0,0 @@
1
- import os
2
- import google.generativeai as genai
3
- import json
4
- import time
5
- import io
6
- import threading
7
- import uuid
8
- import requests
9
- import re
10
- import logging
11
- import random
12
- import base64
13
- import atexit
14
- from datetime import datetime, timedelta
15
- from itertools import cycle
16
- from flask import Flask, request, jsonify, render_template, send_file
17
- from flask_cors import CORS
18
- from pydub import AudioSegment
19
- from huggingface_hub import HfApi, hf_hub_download
20
- from huggingface_hub.utils import RepositoryNotFoundError, EntryNotFoundError
21
-
22
- # --- CONFIGURATION & LOGGING ---
23
- CACHE_DIRECTORY = "/tmp/huggingface_cache_ezmary"
24
- os.makedirs(CACHE_DIRECTORY, exist_ok=True)
25
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
26
-
27
- app = Flask(__name__)
28
- CORS(app)
29
-
30
- # --- WORKER POOL SETUP ---
31
- # آدرس اسپیس‌های کارگر تولید صدا (TTS)
32
- WORKER_URLS = [
33
- "https://hamed744-ttspro.hf.space/generate",
34
- "https://hamed744-ttspro2.hf.space/generate",
35
- "https://hamed744-ttspro3.hf.space/generate",
36
- "https://hamed744-ttspro4.hf.space/generate",
37
- "https://hamed744-ttspro5.hf.space/generate",
38
- "https://hamed744-ttspro6.hf.space/generate",
39
- "https://hamed744-ttspro7.hf.space/generate",
40
- "https://hamed744-ttspro8.hf.space/generate",
41
- "https://hamed744-ttspro9.hf.space/generate",
42
- ]
43
- worker_pool = cycle(WORKER_URLS)
44
-
45
- def get_next_worker_url():
46
- return next(worker_pool)
47
-
48
- # --- آدرس سرویس تغییر صدا (VC) ---
49
- # این آدرس اسپیس جدید که در درخواست فید
50
- VC_SPACE_URL = "https://ezmarynoori-sada.hf.space"
51
-
52
- # --- GLOBAL VARIABLES ---
53
- tasks = {}
54
- tasks_lock = threading.Lock()
55
- request_counter = 0
56
- request_counter_lock = threading.Lock()
57
- DATASET_REPO = "opera8/Karbaran-rayegan-tedad"
58
- DATASET_FILENAME = "usage_data.json"
59
- USAGE_LIMIT = 5
60
- HF_TOKEN = os.environ.get("HF_TOKEN")
61
- CLEANUP_INTERVAL_SECONDS = 6 * 30 * 24 * 60 * 60
62
- last_cleanup_time = time.time()
63
- usage_data_cache = []
64
- cache_lock = threading.Lock()
65
- data_changed = threading.Event()
66
- api = None
67
-
68
- # --- DATABASE LOGIC ---
69
- if not HF_TOKEN:
70
- logging.error("CRITICAL: Secret 'HF_TOKEN' not found.")
71
- else:
72
- api = HfApi(token=HF_TOKEN)
73
- logging.info("HfApi initialized.")
74
-
75
- def load_initial_data():
76
- global usage_data_cache
77
- with cache_lock:
78
- if not api: return
79
- try:
80
- local_path = hf_hub_download(
81
- repo_id=DATASET_REPO, filename=DATASET_FILENAME, repo_type="dataset", token=HF_TOKEN, force_download=True, cache_dir=CACHE_DIRECTORY
82
- )
83
- with open(local_path, 'r', encoding='utf-8') as f:
84
- content = f.read()
85
- if content: usage_data_cache = json.loads(content)
86
- except (RepositoryNotFoundError, EntryNotFoundError):
87
- logging.warning("Dataset not found, creating new.")
88
- except Exception as e:
89
- logging.error(f"Failed to load data: {e}")
90
-
91
- def persist_data_to_hub():
92
- global last_cleanup_time, usage_data_cache
93
- with cache_lock:
94
- now = time.time()
95
- if (now - last_cleanup_time) > CLEANUP_INTERVAL_SECONDS:
96
- six_months_ago = now - CLEANUP_INTERVAL_SECONDS
97
- usage_data_cache = [u for u in usage_data_cache if u.get('week_start', 0) > six_months_ago]
98
- last_cleanup_time = now
99
- data_changed.set()
100
-
101
- if not data_changed.is_set() or not api: return
102
- try:
103
- data_to_write = list(usage_data_cache)
104
- temp_filepath = os.path.join(CACHE_DIRECTORY, "temp_usage_data.json")
105
- with open(temp_filepath, 'w', encoding='utf-8') as f:
106
- json.dump(data_to_write, f, indent=2, ensure_ascii=False)
107
- api.upload_file(path_or_fileobj=temp_filepath, path_in_repo=DATASET_FILENAME, repo_id=DATASET_REPO, repo_type="dataset", commit_message="Update")
108
- os.remove(temp_filepath)
109
- data_changed.clear()
110
- except Exception as e:
111
- logging.error(f"Persist failed: {e}")
112
-
113
- def background_persister():
114
- while True:
115
- time.sleep(10)
116
- persist_data_to_hub()
117
-
118
- def get_user_ip():
119
- if request.headers.getlist("X-Forwarded-For"):
120
- return request.headers.getlist("X-Forwarded-For")[0].split(',')[0].strip()
121
- return request.remote_addr
122
-
123
- # --- TTS HELPER FUNCTIONS ---
124
- def merge_audio_segments(audio_segments):
125
- if not audio_segments: return None
126
- combined = AudioSegment.empty()
127
- for segment in audio_segments:
128
- combined += segment
129
- output_buffer = io.BytesIO()
130
- combined.export(output_buffer, format="wav")
131
- output_buffer.seek(0)
132
- return output_buffer
133
-
134
- def call_worker(index, chunk_payload):
135
- text_length = len(chunk_payload.get("text", ""))
136
- use_live = True if text_length <= 500 else False
137
-
138
- target_speaker = chunk_payload.get("speaker")
139
- is_custom = chunk_payload.get("is_custom", False)
140
- # اگر صدای اختصاصی بود، اب��دا با صدای Charon تولید می‌کنیم و بعد تغییر می‌دهیم
141
- actual_speaker_request = "Charon" if is_custom else target_speaker
142
-
143
- worker_payload = {
144
- "text": chunk_payload.get("text"),
145
- "speaker": actual_speaker_request,
146
- "temperature": chunk_payload.get("temperature", 0.9),
147
- "use_live_model": use_live,
148
- "retry_limit": 50,
149
- "fallback_to_live": True
150
- }
151
-
152
- total_workers = len(WORKER_URLS)
153
- for attempt in range(total_workers * 2):
154
- worker_url = get_next_worker_url()
155
- try:
156
- logging.info(f"Chunk {index} (Len: {text_length}) -> Sending to {worker_url} (LiveMode: {use_live})")
157
- response = requests.post(worker_url, json=worker_payload, timeout=300)
158
- if response.status_code == 200:
159
- audio_data = io.BytesIO(response.content)
160
- audio_segment = AudioSegment.from_file(audio_data)
161
- return index, audio_segment
162
- else:
163
- logging.warning(f"Worker Error {worker_url}: {response.status_code}")
164
- except Exception as e:
165
- logging.warning(f"Worker Connection Fail {worker_url}: {e}")
166
-
167
- return index, None
168
-
169
- # --- AI PODCAST SCRIPT LOGIC ---
170
- def generate_podcast_in_background(task_id, system_prompt, safety_settings):
171
- try:
172
- keys_str = os.environ.get("ALL_GEMINI_API_KEYS")
173
- keys_list = [k.strip() for k in keys_str.split(',') if k.strip()] if keys_str else []
174
- if not keys_list: raise ValueError("No AI Keys")
175
-
176
- MAX_ATTEMPTS = 50
177
- for attempt in range(MAX_ATTEMPTS):
178
- key = random.choice(keys_list)
179
- try:
180
- genai.configure(api_key=key)
181
- model = genai.GenerativeModel('gemini-2.5-flash')
182
- res = model.generate_content(system_prompt, safety_settings=safety_settings)
183
-
184
- raw_text = res.text
185
- json_string = None
186
- match = re.search(r"```json\s*(\{.*?\})\s*```", raw_text, re.DOTALL)
187
- if match: json_string = match.group(1)
188
- else:
189
- s_idx = raw_text.find('{')
190
- e_idx = raw_text.rfind('}')
191
- if s_idx != -1 and e_idx != -1: json_string = raw_text[s_idx:e_idx+1]
192
-
193
- if not json_string: raise ValueError("No JSON found")
194
-
195
- data = json.loads(json_string)
196
- if "script" in data:
197
- for t in data["script"]:
198
- if "dialogue" in t:
199
- t["dialogue"] = re.sub(r'\[.*?\]|\(.*?\)', '', t["dialogue"]).strip()
200
-
201
- with tasks_lock:
202
- tasks[task_id].update({'status': 'completed', 'data': data})
203
- return
204
-
205
- except Exception as e:
206
- logging.warning(f"AI Attempt {attempt} failed: {e}")
207
- time.sleep(1)
208
-
209
- with tasks_lock: tasks[task_id].update({'status': 'failed', 'error': 'Max retries reached'})
210
-
211
- except Exception as e:
212
- with tasks_lock: tasks[task_id].update({'status': 'failed', 'error': str(e)})
213
-
214
- # --- VC LOGIC (اصلاح شده برای هماهنگی با اسپیس جدید) ---
215
- def process_voice_conversion(tts_audio_io, ref_audio_base64):
216
- try:
217
- tts_audio_io.seek(0)
218
-
219
- # دیکد کردن Base64 صدای رفرنس
220
- if "," in ref_audio_base64:
221
- ref_audio_base64 = ref_audio_base64.split(",")[1]
222
- ref_bytes = base64.b64decode(ref_audio_base64)
223
-
224
- files = {
225
- 'source_audio': ('source.wav', tts_audio_io, 'audio/wav'),
226
- 'ref_audio': ('ref.wav', io.BytesIO(ref_bytes), 'audio/wav')
227
- }
228
-
229
- # 1. آپلود فایل‌ها به سرویس VC
230
- logging.info(f"VC: Uploading to {VC_SPACE_URL}/upload")
231
- res = requests.post(f"{VC_SPACE_URL}/upload", files=files, timeout=120)
232
-
233
- if res.status_code != 200:
234
- raise Exception(f"VC Upload Failed: {res.text}")
235
-
236
- # دریافت اطلاعات کامل پروژه (شامل chunks)
237
- job_data = res.json()
238
-
239
- # 2. بررسی وضعیت (Polling)
240
- # افزایش زمان انتظار چون پردازش مدل اختصاصی طولانی است
241
- for _ in range(120): # تا 8 دقیقه انتظار
242
- time.sleep(4)
243
-
244
- # نکته مهم: ارسال کل آبجکت job_data به check_status
245
- chk = requests.post(f"{VC_SPACE_URL}/check_status", json=job_data, timeout=30)
246
-
247
- if chk.status_code == 200:
248
- stat = chk.json()
249
-
250
- if stat.get("status") == "completed":
251
- filename = stat.get("filename")
252
- # 3. دانلود فایل نهایی
253
- dl = requests.get(f"{VC_SPACE_URL}/download/{filename}")
254
- if dl.status_code == 200:
255
- return io.BytesIO(dl.content)
256
- else:
257
- raise Exception("VC Download Failed")
258
-
259
- elif stat.get("status") == "failed":
260
- detail = stat.get("detail", "Unknown error")
261
- raise Exception(f"VC Remote Failed: {detail}")
262
-
263
- # اگر وضعیت processing بود، ادامه میدهد...
264
-
265
- raise Exception("VC Timeout (Processing took too long)")
266
-
267
- except Exception as e:
268
- logging.error(f"VC Error: {e}")
269
- return None
270
-
271
- # --- ROUTES ---
272
- @app.route('/')
273
- def index():
274
- return render_template('index.html')
275
-
276
- @app.route('/api/check-credit', methods=['POST'])
277
- def check_credit():
278
- data = request.get_json()
279
- fingerprint = data.get('fingerprint')
280
- if not fingerprint: return jsonify({"status": "error"}), 400
281
- with cache_lock:
282
- ip = get_user_ip()
283
- now = time.time()
284
- week_ago = now - (7*24*60*60)
285
-
286
- user = next((u for u in usage_data_cache if u.get('fingerprint') == fingerprint), None)
287
- user = user or next((u for u in usage_data_cache if ip in u.get('ips', [])), None)
288
-
289
- limit_reached = False
290
- remaining = USAGE_LIMIT
291
- reset_ts = 0
292
-
293
- if user:
294
- if user.get('week_start', 0) < week_ago:
295
- user['count'] = 0
296
- user['week_start'] = now
297
- data_changed.set()
298
-
299
- remaining = USAGE_LIMIT - user.get('count', 0)
300
- if remaining <= 0:
301
- limit_reached = True
302
- remaining = 0
303
- reset_ts = user.get('week_start', now) + (7*24*60*60)
304
-
305
- return jsonify({"credits_remaining": remaining, "limit_reached": limit_reached, "reset_timestamp": reset_ts})
306
-
307
- @app.route('/api/use-credit', methods=['POST'])
308
- def use_credit():
309
- data = request.get_json()
310
- fingerprint = data.get('fingerprint')
311
- with cache_lock:
312
- ip = get_user_ip()
313
- now = time.time()
314
- week_ago = now - (7*24*60*60)
315
-
316
- user = next((u for u in usage_data_cache if u.get('fingerprint') == fingerprint), None)
317
- user = user or next((u for u in usage_data_cache if ip in u.get('ips', [])), None)
318
-
319
- if user:
320
- if user.get('week_start', 0) < week_ago:
321
- user['count'] = 0
322
- user['week_start'] = now
323
- if user['count'] >= USAGE_LIMIT:
324
- return jsonify({"status": "limit"}), 429
325
- user['count'] += 1
326
- if ip not in user['ips']: user['ips'].append(ip)
327
- else:
328
- user = {"fingerprint": fingerprint, "ips": [ip], "count": 1, "week_start": now}
329
- usage_data_cache.append(user)
330
-
331
- data_changed.set()
332
- return jsonify({"status": "success", "credits_remaining": USAGE_LIMIT - user['count']})
333
-
334
- @app.route('/api/create-full-podcast', methods=['POST'])
335
- def create_full_podcast():
336
- try:
337
- data = request.get_json()
338
- prompt = data.get('prompt')
339
- speakers = data.get('available_speakers')
340
- if not prompt or not speakers: return jsonify({"error": "Bad request"}), 400
341
-
342
- spk_text = "\n".join([f"- {s['id']}: {s['name']}" for s in speakers])
343
-
344
- sys_prompt = f"""Act as a Podcast Producer.
345
- Topic: "{prompt}"
346
- Speakers Available:
347
- {spk_text}
348
-
349
- Output ONLY valid JSON.
350
- Format: {{"selected_speakers": ["id1", "id2"], "script": [{{"speaker_id": "id1", "dialogue": "..."}}]}}
351
- Dialogue rules: No stage directions like [laugh], (sigh). Just spoken words."""
352
-
353
- task_id = str(uuid.uuid4())
354
- with tasks_lock: tasks[task_id] = {'status': 'pending'}
355
-
356
- safety = [{"category": c, "threshold": "BLOCK_NONE"} for c in ["HARM_CATEGORY_HARASSMENT", "HARM_CATEGORY_HATE_SPEECH", "HARM_CATEGORY_SEXUALLY_EXPLICIT", "HARM_CATEGORY_DANGEROUS_CONTENT"]]
357
-
358
- threading.Thread(target=generate_podcast_in_background, args=(task_id, sys_prompt, safety)).start()
359
- return jsonify({"task_id": task_id}), 202
360
- except Exception as e:
361
- return jsonify({"error": str(e)}), 500
362
-
363
- @app.route('/api/podcast-status/<task_id>', methods=['GET'])
364
- def podcast_status(task_id):
365
- with tasks_lock:
366
- return jsonify(tasks.get(task_id, {'status': 'not_found'})), 200
367
-
368
- @app.route('/api/generate', methods=['POST'])
369
- def generate_audio_route():
370
- try:
371
- data = request.get_json()
372
- if not data: return jsonify({"error": "No data"}), 400
373
-
374
- text = data.get("text", "")
375
- speaker = data.get("speaker")
376
- temperature = data.get("temperature", 0.9)
377
- ref_base64 = data.get("ref_audio_base64")
378
-
379
- if not text: return jsonify({"error": "Text empty"}), 400
380
-
381
- is_custom = bool(speaker.startswith("custom_") and ref_base64)
382
-
383
- payload = {
384
- "text": text,
385
- "speaker": speaker,
386
- "temperature": temperature,
387
- "is_custom": is_custom
388
- }
389
-
390
- # تولید صدای اولیه (TTS)
391
- idx, audio_seg = call_worker(0, payload)
392
-
393
- if not audio_seg:
394
- return jsonify({"error": "Worker generation failed"}), 503
395
-
396
- final_buffer = io.BytesIO()
397
- audio_seg.export(final_buffer, format="wav")
398
- final_buffer.seek(0)
399
-
400
- # اگر صدای اختصاصی بود، تبدیل صدا (VC) را اجرا کن
401
- if is_custom:
402
- logging.info("Starting Custom VC...")
403
- vc_out = process_voice_conversion(final_buffer, ref_base64)
404
- if vc_out:
405
- return send_file(vc_out, mimetype="audio/wav", as_attachment=True, download_name=f"vc_{uuid.uuid4()}.wav")
406
- else:
407
- return jsonify({"error": "Voice Conversion failed"}), 500
408
-
409
- return send_file(final_buffer, mimetype="audio/wav", as_attachment=True, download_name=f"gen_{uuid.uuid4()}.wav")
410
-
411
- except Exception as e:
412
- logging.error(f"Generate route error: {e}")
413
- return jsonify({"error": str(e)}), 500
414
-
415
- # --- STARTUP ---
416
- load_initial_data()
417
- threading.Thread(target=background_persister, daemon=True).start()
418
- atexit.register(persist_data_to_hub)
419
-
420
- if __name__ == '__main__':
421
- port = int(os.environ.get('PORT', 7860))
422
- app.run(host='0.0.0.0', port=port)