File size: 2,329 Bytes
3a89103
c9030d6
 
 
 
b0b4114
c9030d6
fad83e4
c9030d6
a9a000b
6643aa1
5d8bb2b
a15157a
cb8ee6b
5796090
a9a000b
 
7fee81b
 
40ff02c
 
c9030d6
a9a000b
7fee81b
a9a000b
40ff02c
b0b4114
 
 
 
 
 
 
 
 
 
 
 
 
 
7d340ee
b0b4114
defc453
 
62c0db4
 
 
 
 
ee90b65
62c0db4
 
 
 
 
 
 
 
 
 
 
b3b964d
62c0db4
6ad5936
 
 
 
 
 
62c0db4
 
9b9929d
 
 
 
 
58ced3b
9b9929d
 
5d8bb2b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
from moviepy.editor import *
from PIL import Image
import pytesseract
import numpy as np
from gtts import gTTS
import edge_tts
from mutagen.mp3 import MP3
import uuid
import os
from pathlib import Path
import rust_highlight
import rust_combiner
import shutil
import asyncio

# Use /app/data which we created with proper permissions
BASE_DIR = "/app/data"
IMAGE_DIR = "/tmp/images"
os.makedirs(IMAGE_DIR, exist_ok=True)
AUDIO_DIR = os.path.join(BASE_DIR, "sound")
CLIPS_DIR = os.path.join(BASE_DIR, "video")

# Create directories (no chmod needed)
for path in [BASE_DIR, AUDIO_DIR, CLIPS_DIR]:
    Path(path).mkdir(parents=True, exist_ok=True)

async def generate_tts(id,lines):
    voice = "en-US-GuyNeural" 
    audio_name = f"audio{id}.mp3"
    audio_path = os.path.join(AUDIO_DIR, audio_name)

    communicate = edge_tts.Communicate(text=lines[id], voice=voice, rate="+0%")
    await communicate.save(audio_path)

    if os.path.exists(audio_path):
        audio = MP3(audio_path)
        duration = audio.info.length
        return duration, audio_path
    return None, None

def audio_func(id,lines):
    return asyncio.run(generate_tts(id,lines))
    
# --- CONFIGURATION ---
def video_func(id, lines):
    duration, audio_path = audio_func(id, lines)
    image_path = os.path.join(IMAGE_DIR, f"slide{id}.png")
    img = Image.open(image_path)
    data = pytesseract.image_to_data(img, output_type=pytesseract.Output.DICT)

    words = []
    for i in range(len(data['text'])):
        txt = data['text'][i].strip()
        if txt and int(data['conf'][i]) > 60:
            box = (
                data['left'][i],
                data['top'][i],
                data['width'][i],
                data['height'][i],
            )
            words.append((txt, box))

    clip_file = rust_highlight.render_video(
        id=id,
        image_path=image_path,
        audio_path=audio_path,
        duration=duration,
        words=words,
        output_dir=CLIPS_DIR  # Add your output directory here
    )
    print(f"Created {clip_file}")

def video_com(lines):
    video_path = f"/tmp/video_{uuid.uuid4().hex}.mp4"
    clips = []
    for id in range(len(lines)):
        clip = f"/app/data/video/clip{id}.mp4"
        clips.append(clip)
        
    video_path = rust_combiner.combine_clips(clips)
    return video_path