from celery import Celery, chain import os import time,subprocess import cgi from App import celery_config import yt_dlp import tempfile from App.Transcription.Utils.audio_transcription import transcribe_file from App.Embedding.utils.Initialize import encode, generateChunks celery = Celery() celery.config_from_object(celery_config) celery.conf.update( # Other Celery configuration settings CELERYD_LOG_LEVEL='INFO', # Set log level to DEBUG for the worker ) @celery.task(name="embbeding", bind=True) def generate_store(self, data, task_id): print('generating chunks') chunks = generateChunks(data, task_id) encode(chunks) print("hellooo") def download_with_wget(link, download_dir, filename): subprocess.run(["aria2c", link, "-d", download_dir, "-o", filename]) @celery.task(name="download", bind=True) def downloadUrl(self, link, download_dir, filename, model_size="base"): file_path=os.path.join(download_dir,filename) download_with_wget(link=link,download_dir=download_dir,filename=filename) data = transcribe_file(state=self, file_path=file_path, model_size=model_size) # generate_store.delay(data["content"], self.request.id) return data @celery.task(name="transcription", bind=True) def transcription_task(self, file_path, model_size="tiny"): data = transcribe_file(state=self, file_path=file_path, model_size=model_size) generate_store.delay(data["content"], self.request.id) return data @celery.task(name="downloadx", bind=True) def downloadfile(self, url, ydl_opts, model_size="base"): self.update_state(state="Downloading File..", meta={}) with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download([url]) # updated self.update_state(state="Downloading complete", meta={}) audio_file = ydl_opts["outtmpl"] print(model_size, "worker after") data = transcribe_file( state=self, file_path=audio_file["default"], model_size=model_size ) generate_store.delay(data["content"], self.request.id) return data