Spaces:
Sleeping
Sleeping
from __future__ import annotations | |
import argparse | |
import os | |
import tempfile | |
from typing import Callable | |
from gradio_client import Client | |
# import loguru | |
from groq import Groq | |
from loguru import logger | |
from hf import hf_transcript, get_whisper_hf_client | |
from logs import configure_logging | |
from rate_limit import rate_limit_bypass | |
# from remote_whisper import hf_transcribe_audio | |
from settings import app_settings | |
from transcribe import get_full_transcript, parse_audio | |
def summarize_groq(client: Groq, text: str): | |
completion = client.chat.completions.create( | |
model=app_settings.model, | |
messages=[ | |
{ | |
"role": "system", | |
"content": app_settings.system_prompt, | |
}, | |
{ | |
"role": "user", | |
"content": f"Кратко перескажи видео по транскрипции, " | |
f"как будто это только часть видео. " | |
f"Используй оформление и ненумерованные пункты. " | |
f"Оформи название блока через **Название**" | |
f"Не пиши о том, что это краткое изложение. " | |
f"Вот транскрипция: {text}", | |
} | |
], | |
temperature=app_settings.temperature, | |
max_tokens=app_settings.max_tokens, | |
top_p=1, | |
stream=False, | |
stop=None, | |
) | |
return completion.choices[0].message.content | |
def summarize( | |
texts: list[str], | |
client: Client | Groq, | |
summarizer: Callable[[Client | Groq, str], str] = summarize_groq, | |
) -> str: | |
logger.info("Summarizing transcript...") | |
result = "" | |
i = 1 | |
for chunk in texts: | |
logger.info(f"Summarizing chunk #{i}") | |
i += 1 | |
result += summarizer(client, chunk) | |
result += "\n" | |
return result | |
if __name__ == "__main__": | |
configure_logging() | |
parser = argparse.ArgumentParser("Video transcript summarizer") | |
parser.add_argument("video_path", help="Path to video file", type=str) | |
args = parser.parse_args() | |
groq_client = Groq(api_key=app_settings.groq_api_key) | |
hf_client = get_whisper_hf_client() | |
with tempfile.TemporaryDirectory() as tmpdirname: | |
parse_audio(args.video_path, os.path.join(tmpdirname, "audio.mp3")) | |
transcript = get_full_transcript(tmpdirname, hf_client, one_file_transcript_func=hf_transcript) | |
print(summarize(transcript, groq_client)) | |