File size: 3,661 Bytes
3a09141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
from tqdm import tqdm
import logging

from .agent import BloggerAgent, WriterAgent, StructureAgent, Conversation
from .fetcher import AutoFetcher
from .voicevox import VoiceVoxClient, SpeakerId, Audio


class PodcastStudio:
    def __init__(self, api_key: str, logging_level: int = logging.INFO):
        self.blogger = BloggerAgent(api_key=api_key)
        self.writer = WriterAgent(api_key=api_key)
        self.structure_agent = StructureAgent(api_key=api_key)

        self.logger = logging.getLogger(__name__)
        self.logger.setLevel(logging_level)

        self.fetcher = AutoFetcher()

    async def create_conversation(self, url: str) -> tuple[str, str, Conversation]:
        self.logger.info(f"Fetching paper from {url}...")
        paper = await self.fetcher.fetch(url)
        self.logger.info("Paper fetched successfully.")
        self.logger.debug(
            f"Paper content: {paper[:100]}..."
        )  # Log first 100 characters

        self.logger.info("Creating blog from paper...")
        blog = await self.blogger.task(paper)
        self.logger.info("Blog created successfully.")
        self.logger.debug(f"{blog[:100]}...")  # Log first 100 characters

        self.logger.info("Creating dialogue from blog...")
        dialogue = await self.writer.task(paper, blog)
        self.logger.info("Dialogue created successfully.")
        self.logger.debug(f"{dialogue[:100]}...")  # Log first 100 characters

        self.logger.info("Structuring conversation from dialogue...")
        conversation = await self.structure_agent.task(dialogue)
        self.logger.info("Conversation structured successfully.")
        for _d in conversation.conversation:
            self.logger.debug(f"{_d.role}: {_d.content[:100]}...")

        return blog, dialogue, conversation

    async def record_podcast(
        self,
        conversation: Conversation,
        voicevox_client: VoiceVoxClient,
        speaker_id: SpeakerId,
        supporter_id: SpeakerId,
    ) -> Audio:
        progress_bar = tqdm(
            total=len(conversation.conversation),
            desc="Synthesizing audio",
            ncols=100,
        )

        async def _synthesis(
            speaker_id: SpeakerId,
            text: str,
            index: int,
            progress: tqdm,
        ) -> tuple[int, Audio]:
            audio_query = await voicevox_client.post_audio_query(
                text=text,
                speaker=speaker_id,
            )
            if audio_query.tempoDynamicsScale is not None:
                audio_query.tempoDynamicsScale = 1.1
            else:
                audio_query.speedScale = 1.1

            audio = await voicevox_client.post_synthesis(
                speaker=speaker_id,
                audio_query=audio_query,
            )
            progress.update(1)

            progress.set_postfix({"text": text[:20] + "..."})

            return index, audio

        results = []
        for i, dialogue in enumerate(conversation.conversation):
            results.append(
                await _synthesis(
                    speaker_id=(
                        speaker_id if dialogue.role == "speaker" else supporter_id
                    ),
                    text=dialogue.content,
                    index=i,
                    progress=progress_bar,
                )
            )
        progress_bar.close()

        # sort results by index
        results.sort(key=lambda x: x[0])

        audios = [audio for _, audio in results]

        # connect audio files
        podcast = await voicevox_client.post_connect_waves(
            audio_list=audios,
        )
        return podcast