Spaces:

WhisperTube
/

whispertube_backend

Runtime error

App Files Files Community

uzi007 commited on Sep 18, 2023

Commit

955f567

1 Parent(s): 1c387c0

Removed Unneeded Files

Browse files

Files changed (7) hide show

.gitignore +3 -0
audiobook.py +52 -0
main.py +196 -0
media_download.py +395 -0
requirements.txt +13 -0
summarizer.py +429 -0
transcription.py +221 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+Output/*
+*.pyc
+*.sh

audiobook.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import os
+from IPython.display import Audio
+import nltk  # we'll use this to split into sentences
+import numpy as np
+from bark.generation import (
+    generate_text_semantic,
+    preload_models,
+)
+from bark.api import semantic_to_waveform
+from bark import generate_audio, SAMPLE_RATE
+import soundfile as sf
+os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+# Loads the model, should be run one time
+preload_models()
+class AudioBook:
+    def __init__(self, output_folder="output"):
+        self.output_folder = output_folder
+        # Create the output folder if it doesn't exist
+        if not os.path.exists(output_folder):
+            os.makedirs(output_folder)
+    def generate_audio_from_text(self, text, speaker="male", filename="output_audio"):
+        # Preprocess text
+        text = text.replace("\n", " ").strip()
+        sentences = nltk.sent_tokenize(text)
+        # Choose the speaker based on the input
+        if speaker == "male":
+            SPEAKER = "v2/en_speaker_6"
+        elif speaker == "female":
+            SPEAKER = "v2/en_speaker_9"
+        else:
+            raise ValueError("Invalid speaker selection. Use 'male' or 'female'.")
+        silence = np.zeros(int(0.25 * SAMPLE_RATE))  # quarter-second of silence
+        pieces = []
+        for sentence in sentences:
+            audio_array = generate_audio(sentence, history_prompt=SPEAKER, text_temp=0.7, waveform_temp=0.7)
+            pieces += [audio_array, silence.copy()]
+        audio_data = np.concatenate(pieces)
+        # Save the audio to a WAV file in the output folder
+        output_path = os.path.join(self.output_folder, f"{filename}.wav")
+        sf.write(output_path, audio_data, SAMPLE_RATE)
+        return output_path

main.py ADDED Viewed

	@@ -0,0 +1,196 @@

+import uvicorn
+from fastapi import FastAPI, Request
+from media_download import YoutubeDownloader
+from transcription import StableWhisper
+from summarizer import Extract_Summary, AudioBookNarration
+from audiobook import AudioBook
+app = FastAPI()
+output_folder = 'Output'
+# Create a context variable to store the contexts for each user
+users_context = dict()
+@app.get("/get_media_info")
+async def get_media_info(request: Request, url: str):
+    # Getting User's IP
+    user_ip = request.client.host
+    # Getting User's Youtube Downloader
+    youtube_downloader = YoutubeDownloader(url, output_folder)
+    # Getting Youtube Media Info
+    media_info = youtube_downloader.get_media_info()
+    # Storing Info in the context for this user's session
+    users_context[user_ip] = dict()
+    users_context[user_ip]['downloader'] = youtube_downloader
+    # users_context[user_ip]['media_info'] = media_info
+    users_context[user_ip]['url'] = url
+    return media_info
+@app.get("/download_media")
+async def download_media(request: Request, media_type: str, media_format: str, media_quality: str):
+    # Getting User's IP
+    user_ip = request.client.host
+    # Downloading Media for User
+    media_path = users_context[user_ip]['downloader'].download(media_type, media_format, media_quality)
+    # Getting Status
+    status = 1 if media_path else 0
+    if status:
+        # Storing Media Info in the context for this user's session
+        users_context[user_ip]['media_path'] = media_path
+        users_context[user_ip]['media_type'] = media_type
+    return {"status": status}
+@app.get("/get_transcript")
+async def get_transcript(request: Request, subtitle_format: str = 'srt', word_level: bool = True):
+    # Getting User's IP
+    user_ip = request.client.host
+    # Retrieving the media_path from the context for this user's session
+    media_path = users_context[user_ip]['media_path']
+    # Checking if the media_type is Video, then extract it's audio
+    media_type = users_context[user_ip]['media_type']
+    if media_type == 'video':
+        media_path = users_context[user_ip]['downloader'].extract_audio(media_path)
+    # Whisper based transcription
+    stable_whisper_transcript = StableWhisper(media_path, output_folder, subtitle_format=subtitle_format, word_level=word_level)
+    transcript = stable_whisper_transcript.generate_transcript()
+    transcript_path = stable_whisper_transcript.save_transcript()
+    # Getting Status
+    status = 1 if transcript else 0
+    if status:
+        # Storing Transcript Info in the context for this user's session
+        users_context[user_ip]['transcript'] = transcript
+        users_context[user_ip]['transcript_path'] = transcript_path
+    return {"status": status, "transcript": transcript}
+@app.get("/get_summary")
+async def get_summary(request: Request, Summary_type: str, Summary_strategy: str, Target_Person_type: str,
+                      Response_length:  str, Writing_style: str, text_input: str = None):
+    # Getting User's IP
+    user_ip = request.client.host
+    # Getting Transcript if not provided
+    if not text_input:
+        text_input = users_context[user_ip]['transcript']
+    # Extracting Summary
+    summary_extractor = Extract_Summary(text_input=text_input)
+    output = summary_extractor.define_chain(Summary_type=Summary_type,
+                                            Summary_strategy=Summary_strategy,
+                                            Target_Person_type=Target_Person_type,
+                                            Response_length=Response_length,
+                                            Writing_style=Writing_style,
+                                            key_information=False)
+    # Getting Status
+    status = 1 if output else 0
+    if status:
+        # Storing Summary Info in the context for this user's session
+        users_context[user_ip]['summary'] = output
+    return {"status": status, "summary": output}
+@app.get("/get_key_info")
+async def get_key_info(request: Request, Summary_type: str, Summary_strategy: str, Target_Person_type: str,
+                       Response_length: str, Writing_style: str, text_input: str = None):
+    # Getting User's IP
+    user_ip = request.client.host
+    # Getting Transcript if not provided
+    if not text_input:
+        text_input = users_context[user_ip]['transcript']
+    # Extracting Summary
+    summary_extractor = Extract_Summary(text_input=text_input)
+    output = summary_extractor.define_chain(Summary_type=Summary_type,
+                                            Summary_strategy=Summary_strategy,
+                                            Target_Person_type=Target_Person_type,
+                                            Response_length=Response_length,
+                                            Writing_style=Writing_style,
+                                            key_information=True)
+    # Getting Status
+    status = 1 if output else 0
+    if status:
+        # Storing Key Info in the context for this user's session
+        users_context[user_ip]['key_info'] = output
+    return {"status": status, "key_info": output}
+@app.get("/get_narration")
+async def get_narration(request: Request, Narration_style: str, text_input: str = None):
+    # Getting User's IP
+    user_ip = request.client.host
+    # Getting Transcript if not provided
+    if not text_input:
+        text_input = users_context[user_ip]['transcript']
+    # Extracting Narration
+    narrator = AudioBookNarration(text_input=text_input)
+    output = narrator.define_chain(Narration_style=Narration_style)
+    # Getting Status
+    status = 1 if output else 0
+    if status:
+        # Storing Narration Info in the context for this user's session
+        users_context[user_ip]['narration'] = output
+    return {"status": status, "narration": output}
+@app.get("/get_audiobook")
+async def get_audiobook(request: Request, speaker: str = "male", text_input: str = None):
+    # Getting User's IP
+    user_ip = request.client.host
+    # Getting Transcript if not provided
+    if not text_input:
+        text_input = users_context[user_ip]['narration']
+    # Generating Audiobook
+    audiobook = AudioBook(output_folder=output_folder)
+    audio_path = audiobook.generate_audio_from_text(text_input, speaker=speaker, filename="output_audio")
+    # Getting Status
+    status = 1 if audio_path else 0
+    if status:
+        # Storing Audiobook path in the context for this user's session
+        users_context[user_ip]['audiobook_path'] = audio_path
+    return {"status": status, "audiobook_path": audio_path}
+if __name__ == "__main__":
+    uvicorn.run(app, host="127.0.0.1", port=8000)

media_download.py ADDED Viewed

	@@ -0,0 +1,395 @@

+import os
+import re
+import json
+import time
+import subprocess
+import numpy as np
+import pandas as pd
+from abc import ABC, abstractmethod
+from pytube import YouTube
+class MediaDownloader(ABC):
+    def __init__(self, url, output_path, start_time=None, end_time=None):
+        self.url = url
+        self.output_path = os.path.join(os.getcwd(), output_path)
+        self.start_time = start_time
+        self.end_time = end_time
+        self.__create_output_dir()
+    def __create_output_dir(self):
+        if not os.path.exists(self.output_path):
+            os.makedirs(self.output_path)
+    @abstractmethod
+    def _get_supported_media_formats(self):
+        pass
+    @abstractmethod
+    def download(self, media_type, media_format, media_quality):
+        pass
+    @abstractmethod
+    def _download_media(self, media_type, media_format, media_quality):
+        pass
+    @abstractmethod
+    def _download_audio(self, audio_format, audio_quality):
+        pass
+    @abstractmethod
+    def _download_video(self, video_format, video_quality):
+        pass
+    @abstractmethod
+    def _download_audio_and_video(self, media_format, media_quality):
+        pass
+    @abstractmethod
+    def _download_media_chunk(self, media_type, media_format, media_quality):
+        pass
+    @abstractmethod
+    def _download_audio_chunk(self, audio_format, audio_quality):
+        pass
+    @abstractmethod
+    def _download_video_chunk(self, video_format, video_quality):
+        pass
+class YoutubeDownloader(MediaDownloader):
+    def __init__(self, url, output_path, start_time=None, end_time=None):
+        super().__init__(url, output_path, start_time, end_time)
+        self.youtube = YouTube(url)
+        self.title = self.youtube.title
+        self.media_length = self.youtube.length
+        self.thumbnail_url = self.youtube.thumbnail_url
+        self.streams = self.youtube.streams
+        self.streams_df, self.media_formats_dict = self._get_supported_media_formats()
+    def __get_quality_int(self, media_quality):
+        '''
+        Returns the Quality in Integer
+        E.g: Given input 1080p, it returns 1080
+        '''
+        match = re.search(r'^\d+', media_quality)
+        if match:
+            return int(match.group())
+        else:
+            return None
+    def _get_supported_media_formats(self):
+        '''
+        Returns all supported media formats for both audio & video
+        '''
+        # Creating Pandas Dataframe for Video Streams' Details
+        streams_details = []
+        for stream in self.streams.filter(only_video=True):
+            media_type = stream.type
+            media_format = stream.mime_type.split('/')[1]
+            quality = stream.resolution
+            progressive = stream.is_progressive
+            stream_details = [media_type, media_format, quality, progressive]
+            streams_details.append(stream_details)
+        cols = ['media_type', 'media_format', 'media_quality', 'progressive']
+        streams_df = pd.DataFrame(streams_details, columns=cols)
+        # Adding Custom Audio Streams
+        streams_df.loc[len(streams_df)] = ['audio', 'mp3', '128kbps', False]
+        streams_df.loc[len(streams_df)] = ['audio', 'mp3', '256kbps', False]
+        streams_df.loc[len(streams_df)] = ['audio', 'wav', '1411kbps', False]
+        # Converting to Dictionary for Unique User Options
+        media_formats_dict = dict()
+        for media_type in sorted(streams_df['media_type'].unique()):
+            media_formats_dict[media_type] = dict()
+            media_type_df = streams_df[streams_df['media_type'] == media_type]
+            for media_format in sorted(media_type_df['media_format'].unique()):
+                media_format_df = media_type_df[media_type_df['media_format'] == media_format]
+                media_qualities = sorted(media_format_df['media_quality'].unique(), key=self.__get_quality_int)
+                media_formats_dict[media_type][media_format] = media_qualities
+        return streams_df, media_formats_dict
+    def get_media_formats(self):
+        '''
+        Returns a dictioary for supported media formats
+        '''
+        return self.media_formats_dict
+    def _select_media_format(self):
+        '''
+        For selecting media format to download
+        '''
+        print(json.dumps(self.media_formats_dict, indent=12))
+        # Getting Media Type
+        media_types = list(self.media_formats_dict.keys())
+        media_type = input(f'Select a Media Type from {media_types}: ')
+        assert(media_type in media_types)
+        # Getting Media Format
+        media_formats = list(self.media_formats_dict[media_type].keys())
+        media_format = input(f'Select a Media Format from {media_formats}: ')
+        assert(media_format in media_formats)
+        # Getting Media Type
+        media_qualities = self.media_formats_dict[media_type][media_format]
+        media_quality = input(f'Select a Media Quality from {media_qualities}: ')
+        assert(media_quality in media_qualities)
+        return media_type, media_format, media_quality
+    def download(self, media_type, media_format, media_quality):
+        '''
+        Download Handler Function:
+            Handles all types of media download
+        '''
+        if (self.start_time) or (self.end_time):
+            output_path = self._download_media_chunk(media_type, media_format, media_quality)
+        else:
+            output_path = self._download_media(media_type, media_format, media_quality)
+        return output_path
+    def _download_media(self, media_type, media_format, media_quality):
+        '''
+        Media Download Handler Function:
+            Checks which type of media download is required & passes it onto the relevant method
+        '''
+        # Checking for the Media in Dataframe
+        media_mask = (self.streams_df['media_type'] == media_type) & \
+                     (self.streams_df['media_format'] == media_format) & \
+                     (self.streams_df['media_quality'] == media_quality)
+        media_df = self.streams_df[media_mask]
+        # Downloading Media according to the Arguments
+        if media_type == 'audio':
+            output_path = self._download_audio(media_format, media_quality)
+        elif media_type == 'video':
+            # Checking if Progressive Video is Available
+            is_progressive = True if True in media_df['progressive'].unique() else False
+            if is_progressive:
+                output_path = self._download_video(media_format, media_quality)
+            else:
+                output_path = self._download_audio_and_video(media_format, media_quality)
+        return output_path
+    def _download_audio(self, audio_format, audio_quality):
+        '''
+        Filters the required audio stream & downloads it
+        '''
+        # Getting Quality Command String
+        quality = str(self.__get_quality_int(audio_quality)) + 'K'
+        # Getting Output Path
+        output_path = os.path.join(self.output_path, f"{self.title}.{audio_format}")
+        # Download Command
+        command = [
+            "yt-dlp",
+            "-x", "--audio-format", audio_format,
+            "--audio-quality",  quality,
+            "-o", output_path,
+            self.url, "-q"
+        ]
+        # Running the command using Subprocess
+        subprocess.run(command)
+        return output_path
+    def _download_video(self, video_format, video_quality):
+        '''
+        Filters the required video stream & downloads it
+        Only for Progressive media i.e containing both audio & video streams
+        '''
+        stream = self.streams.filter(progressive=True, file_extension=video_format, resolution=video_quality).first()
+        print(stream)
+        video_path = stream.download(output_path=self.output_path, filename=f"{self.title}.{video_format}")
+        return video_path
+    def _download_audio_and_video(self, media_format, media_quality):
+        '''
+        Filters the required video stream & downloads it
+        Filters the best quality audio stream of the same format & downloads it
+        '''
+        # Downloading Audio
+        stream = self.streams.filter(file_extension=media_format, only_audio=True).order_by('abr').desc().first()
+        print(stream)
+        audio_filename = f"{self.title} - Audio.{media_format}"
+        audio_path = stream.download(output_path=self.output_path, filename=audio_filename)
+        # Downloading Video
+        stream = self.streams.filter(file_extension=media_format, resolution=media_quality).first()
+        print(stream)
+        video_filename = f"{self.title} - Video.{media_format}"
+        video_path = stream.download(output_path=self.output_path, filename=video_filename)
+        # Combining the Audio & Video Files using FFMPEG Command
+        output_path = os.path.join(self.output_path, f"{self.title}.{media_format}")
+        command = ['ffmpeg', '-i', video_path, '-i', audio_path,
+                   '-c:v', 'copy', '-c:a', 'copy', output_path,
+                   '-loglevel', 'quiet']
+        subprocess.run(command)
+        os.remove(audio_path)
+        os.remove(video_path)
+        return output_path
+    def _download_media_chunk(self, media_type, media_format, media_quality):
+        '''
+        Media Download Handler Function:
+            Checks which type of media download is required for particular chunk & passes it onto the relevant method
+        '''
+        # Downloading Media according to the Arguments
+        if media_type == 'audio':
+            output_path = self._download_audio_chunk(media_format, media_quality)
+        elif media_type == 'video':
+            output_path = self._download_video_chunk(media_format, media_quality)
+        return output_path
+    def _download_audio_chunk(self, audio_format, audio_quality):
+        '''
+        Filters the required audio stream & downloads it for particular chunk
+        '''
+        # Getting Chunk Command String
+        if (self.start_time) and (self.end_time):
+            chunk_string = f"-ss {self.start_time} -to {self.end_time}"
+        elif (self.start_time) and (not self.end_time):
+            chunk_string = f"-ss {self.start_time}"
+        elif (not self.start_time) and (self.end_time):
+            chunk_string = f"-to {self.end_time}"
+        # Getting Quality Command String
+        quality = str(self.__get_quality_int(audio_quality)) + 'K'
+        # Getting Output Path
+        output_path = os.path.join(self.output_path, f"{self.title}.{audio_format}")
+        # Download Command
+        command = [
+            "yt-dlp",
+            "-x", "--audio-format", audio_format,
+            "--audio-quality",  quality,
+            "--external-downloader", "ffmpeg",
+            "--external-downloader-args", chunk_string,
+            "-o", output_path,
+            url, "-q"
+        ]
+        # Running the command using Subprocess
+        subprocess.run(command)
+        return output_path
+    def _download_video_chunk(self, video_format, video_quality):
+        '''
+        Filters the required video stream & downloads it for particular chunk
+        '''
+        # Getting Chunk Command String
+        if (self.start_time) and (self.end_time):
+            chunk_string = f"-ss {self.start_time} -to {self.end_time}"
+        elif (self.start_time) and (not self.end_time):
+            chunk_string = f"-ss {self.start_time}"
+        elif (not self.start_time) and (self.end_time):
+            chunk_string = f"-to {self.end_time}"
+        # Getting Output Path
+        output_path = os.path.join(self.output_path, f"{self.title}.{video_format}")
+        # Getting Video Quality Integer
+        video_quality = self.__get_quality_int(video_quality)
+        # Download Command
+        if video_format == 'mp4':
+            video_codec  = "h264"
+            audio_codec  = "m4a"
+        elif video_format == 'webm':
+            video_codec  = "vp9"
+            audio_codec  = "opus"
+        else:
+            print('Unexpected Video Format Encountered:', video_format)
+            os.exit(0)
+        command = [
+            "yt-dlp",
+            url,
+            "-S", f"res:{video_quality},vcodec:{video_codec},acodec:{audio_codec}",
+           "--merge-output-format", video_format,
+            "--download-sections", f"*{self.start_time}-{self.end_time}",
+            "-o", f"{output_path}",
+        #     "-q"
+        ]
+        print(' '.join(command))
+        # Running the command using Subprocess
+        subprocess.run(command)
+        return output_path
+    def get_media_info(self):
+        media_info = {
+            'title': self.title,
+            'media_length': self.media_length,
+            'thumbnail_url': self.thumbnail_url,
+            'formats': self.media_formats_dict
+        }
+        return media_info
+    @staticmethod
+    def extract_audio(video_path):
+        """
+        Extract audio from a video file (MP4 or WebM) and save it as an MP3 file using ffmpeg.
+        Args:
+            video_path (str): Path to the input video file.
+        Returns:
+            bool: True if extraction is successful, False otherwise.
+        """
+        try:
+            # Determine the file format (MP4 or WebM) based on the file extension
+            filename, extension = os.path.splitext(video_path)
+            # Extracted audio path
+            audio_path = filename + '.mp3'
+            # Choose the appropriate codec for the output audio format (MP3)
+            audio_codec = "libmp3lame" if extension.lower() in (".mp4", ".webm") else "mp3"
+            # Run the ffmpeg command to extract audio
+            subprocess.run(["ffmpeg", "-i", video_path, "-vn", "-acodec",
+                            audio_codec, audio_path, '-loglevel', 'quiet'], check=True)
+            return audio_path
+        except subprocess.CalledProcessError as e:
+            print(f"Error: {e}")

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+fastapi
+faster-whisper
+langchain
+openai
+pandas
+pytube
+scikit-learn
+soundfile
+stable-ts
+uvicorn
+wordcloud
+youtube-transcript-api
+git+https://github.com/suno-ai/bark.git

summarizer.py ADDED Viewed

	@@ -0,0 +1,429 @@

+from langchain import PromptTemplate
+from langchain.chat_models import ChatOpenAI
+from langchain.chains.summarize import load_summarize_chain
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.document_loaders import DirectoryLoader
+from wordcloud import WordCloud, STOPWORDS
+import numpy as np
+from langchain.embeddings import OpenAIEmbeddings
+from sklearn.cluster import KMeans
+from sklearn.metrics import silhouette_score
+import os
+from langchain.docstore.document import Document
+os.environ["OPENAI_API_KEY"] = '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'
+class Extract_Summary:
+    def __init__(self,text_input, file_path=None, chunks=2000, chunking_strategy=None, LLM_Model="gpt-3.5-turbo", temperature=1, top_p=None, top_k=None):
+        self.chunks = chunks
+        self.file_path = file_path
+        self.text_input = text_input
+        self.chuking_strategy = chunking_strategy
+        self.LLM_Model = LLM_Model
+        self.temperature = temperature
+        self.top_p = top_p
+        self.top_k = top_k
+    def doc_summary(self, docs):
+        # print(f'You have {len(docs)} documents')
+        num_words = sum([len(doc.page_content.split(" ")) for doc in docs])
+        # print(f"You have {num_words} words in documents")
+        return num_words, len(docs)
+    def load_docs(self):
+        if self.file_path is not None:
+            docs = DirectoryLoader(self.file_path, glob="**/*.txt").load()
+        else:
+            docs =  Document(page_content=f"{self.text_input}", metadata={"source": "local"})
+            docs = [docs]
+            # docs = self.text_input
+        tokens, documents_count = self.doc_summary(docs)
+        if documents_count > 8 or tokens > 6000: ## Add token checks as well. Add Model availabilty checks
+            docs = self.chunk_docs(docs) ## Handling Large Document with token more than 6000
+            docs = self.summarise_large_documents(docs)
+            tokens, documents_count = self.doc_summary(docs)
+        if tokens > 2000:
+            docs = self.chunk_docs(docs)
+            chain_type = 'map_reduce'
+        else:
+            chain_type = 'stuff'
+        print("=="*20)
+        print(tokens)
+        print(chain_type)
+        return docs, chain_type
+    ## Add ensemble retriver for this as well.
+    def summarise_large_documents(self, docs):
+        print("=="*20)
+        print('Orignial Docs size : ' ,len(docs))
+        embeddings = OpenAIEmbeddings()
+        vectors = embeddings. embed_documents([x.page_content for x in docs])
+        # Silhoute Score
+        n_clusters_range = range(2, 11)
+        silhouette_scores = []
+        for i in n_clusters_range:
+            kmeans = KMeans(n_clusters=i, init='k-means++',
+                            max_iter=300, n_init=10, random_state=0)
+            kmeans.fit(vectors)
+            score = silhouette_score(vectors, kmeans.labels_)
+            silhouette_scores.append(score)
+        optimal_n_clusters = n_clusters_range[np.argmax(silhouette_scores)]
+        # n_clusters = 5
+        kmeans = KMeans(n_clusters=optimal_n_clusters,
+                        random_state=42).fit(vectors)
+        # Getting documents closers to centeriod
+        closest_indices = []
+        # Loop through the number of clusters you have
+        for i in range(optimal_n_clusters):
+            # Get the list of distances from that particular cluster center
+            distances = np.linalg.norm(
+                vectors - kmeans.cluster_centers_[i], axis=1)
+            # Find the list position of the closest one (using argmin to find the smallest distance)
+            closest_index = np.argmin(distances)
+            # Append that position to your closest indices list
+            closest_indices.append(closest_index)
+        sorted_indices = sorted(closest_indices)
+        selected_docs = [docs[doc] for doc in sorted_indices]
+        print('Selected Docs size : ' ,len(selected_docs))
+        return selected_docs
+    def chunk_docs(self, docs):
+        text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=self.chunks,
+            chunk_overlap=50,
+            length_function=len,
+            is_separator_regex=False,
+        )
+        splitted_document = text_splitter.split_documents(docs)
+        return splitted_document
+    def get_key_information_stuff(self):
+        prompt_template = """
+            Extract Key Informtion from the text below. This key information can include People Names & their Role/rank, Locations, Organization,Nationalities,Religions,
+            Events such as Historical, social, sporting and naturally occurring events, Products , Address & email, URL, Date & Time, Provide the list of Key information each
+            should be labeled with thier crossponding category.if key information related to category is not present, dont add that category in Response.
+                    {text}
+                    """
+        prompt = PromptTemplate(
+            template=prompt_template, input_variables=['text'])
+        return prompt
+    def get_key_information_map_reduce(self):
+        map_prompts = """
+                    Extract Key Informtion from the text below. This key information can include People Names & their Role/rank, Locations, Organization,Nationalities,Religions,
+                    Events such as Historical, social, sporting and naturally occurring events, Products , Address & email, URL, Date & Time, Provide the list of Key information each
+                    should be labeled with thier crossponding category.if key information related to category is not present, dont add that category in Response.
+                            {text}
+                            """
+        combine_prompt = """
+                    Below Text contains Key Information that was extracted from text. You job is to combine the Key Information and Return the results.This key information can include People Names & their Role/rank,
+                    Locations, Organization,Nationalities,Religions,Events such as Historical, social, sporting and naturally occurring events, Products ,
+                    Address & email, URL, Date & Time, Provide the list of Key information each should be labeled with thier crossponding category.
+                    if key information related to category is not present, dont add that category in Response.
+                            {text}
+                            """
+        map_template = PromptTemplate(template=map_prompts,input_variables=['text']
+                                            )
+            # combine_template = PromptTemplate(template=combine_prompt,input_variables=['Summary_type','Summary_strategy','Target_Person_type','Response_lenght','Writing_style','text']
+                                            #  )
+        combine_template = PromptTemplate(template=combine_prompt,input_variables=['text'])
+        return map_template, combine_template
+    def get_stuff_prompt(self):
+        prompt_template = """
+        Write a {Summary_type} and {Summary_strategy} for {Target_Person_type} lenght of the summary should be of {Response_length} words and writing style should be of {Writing_style}.
+        From the text below by identifying most important topics based on their importance in text corpus and summary should be based on these important topics.
+        {text}
+        """
+        # prompt = PromptTemplate.from_template(prompt_template,input_variables=['Summary_type','Summary_strategy','Target_Person_type','Response_lenght','Writing_style','text'])
+        prompt = PromptTemplate(
+            template=prompt_template, input_variables=['Summary_type','Summary_strategy','Target_Person_type','Response_length','Writing_style','text'])
+        return prompt
+    def define_prompts(self):
+        map_prompts = """
+        "Identify the key topics in the following text. in your response only add the most relevant and most important topics and Concised yet eloborative summary of text below.
+        Dont add all the topics that you find.if you didnt find any important topic,dont return anything in response.Also provide me importance score of each idenfied topics out of 1.
+        'Your response  should be  like this , eg:  Summary of text: blah blah blah,list of comma saperated topic names `Topic 1 Topic 2 Topic 3`
+        and list of comma saperated importance scores for these topics `1 , 0.5,0.2`, so response should be formated like this.
+        Summary:
+        blah Blah blah
+        Topic Names : Topic 1, Topic 2, Topic 3
+        Importance Score: 1,0.4,0.3
+        {text}
+        """
+        combine_prompt = """
+        Here is list of summaries ,Topics Names and thier respective importance score that were extracted from text.
+        your job is to provide best possible summary based on the list of summaries below  and Use most important topics present based on thier importance score.
+        Write a {Summary_type} and {Summary_strategy} for {Target_Person_type} lenght of the summary should be of {Response_length} words and writing style should be of {Writing_style}.
+        {text}
+        output Format should be like this.Dont try Return to multiple summaries.Only return one combined summary for above mentioned summaries.
+        Summary:
+        blah blah blah
+        """
+        map_template = PromptTemplate(template=map_prompts, input_variables=['text']
+                                      )
+        combine_template = PromptTemplate(
+            template=combine_prompt, input_variables=['Summary_type','Summary_strategy','Target_Person_type','Response_length','Writing_style','text'])
+        return map_template, combine_template
+        # pass
+    def define_chain(self,Summary_type,Summary_strategy,
+                        Target_Person_type,Response_length,Writing_style,chain_type=None,key_information=False):
+        docs, chain_type = self.load_docs()
+        llm = ChatOpenAI(model='gpt-3.5-turbo', temperature=0)
+        if chain_type == 'stuff':
+            if key_information:
+                prompt = self.get_key_information_stuff()
+            else:
+                prompt = self.get_stuff_prompt()
+            chain = load_summarize_chain(
+                llm=llm, chain_type='stuff', verbose=False,prompt=prompt)
+        elif chain_type == 'map_reduce':
+            if key_information:
+                map_prompts, combine_prompt  = self.get_key_information_map_reduce()
+            else:
+                map_prompts, combine_prompt = self.define_prompts()
+            chain = load_summarize_chain(
+                llm=llm, map_prompt=map_prompts, combine_prompt=combine_prompt, chain_type='map_reduce', verbose=False)
+        # elif chain_type == 'refine':
+        #     chain = load_summarize_chain(llm=llm, question_prompt=map_prompts,
+        #                                  refine_prompt=combine_prompt, chain_type='refine', verbose=False)
+        if ~key_information:
+            output = chain.run(Summary_type=Summary_type,Summary_strategy=Summary_strategy,
+                            Target_Person_type=Target_Person_type,Response_length=Response_length,Writing_style=Writing_style,input_documents = docs)
+        else:
+            output = chain.run(input_documents = docs)
+        # self.create_wordcloud(output=output)
+        # display(Markdown(f"Text: {docs}"))
+        # display(Markdown(f"Summary Response: {output}"))
+        return output
+    def create_wordcloud(self, output):
+        wc = WordCloud(stopwords=STOPWORDS, height=500, width=300)
+        wc.generate(output)
+        wc.to_file('WordCloud.png')
+class AudioBookNarration:
+    def __init__(self,text_input ,file_path=None, chunks=2000, chunking_strategy=None, LLM_Model="gpt-3.5-turbo", temperature=1, top_p=None, top_k=None):
+        self.chunks = chunks
+        self.file_path = file_path
+        self.text_input = text_input
+        self.chuking_strategy = chunking_strategy
+        self.LLM_Model = LLM_Model
+        self.temperature = temperature
+        self.top_p = top_p
+        self.top_k = top_k
+    def doc_summary(self, docs):
+        # print(f'You have {len(docs)} documents')
+        num_words = sum([len(doc.page_content.split(" ")) for doc in docs])
+        # print(f"You have {num_words} words in documents")
+        return num_words, len(docs)
+    def load_docs(self):
+        if self.file_path is not None:
+            docs = DirectoryLoader(self.file_path, glob="**/*.txt").load()
+        else:
+            docs =  Document(page_content=f"{self.text_input}", metadata={"source": "local"})
+            docs = [docs]
+            # docs = self.text_input
+        tokens, documents_count = self.doc_summary(docs)
+        if documents_count > 8 or tokens > 6000: ## Add token checks as well. Add Model availabilty checks
+            docs = self.chunk_docs(docs) ## Handling Large Document with token more than 6000
+            docs = self.summarise_large_documents(docs)
+            tokens, documents_count = self.doc_summary(docs)
+        if tokens > 2000:
+            docs = self.chunk_docs(docs)
+            chain_type = 'map_reduce'
+        else:
+            chain_type = 'stuff'
+        print("=="*20)
+        print(tokens)
+        print(chain_type)
+        return docs, chain_type
+    ## Add ensemble retriver for this as well.
+    def summarise_large_documents(self, docs):
+        print("=="*20)
+        print('Orignial Docs size : ' ,len(docs))
+        embeddings = OpenAIEmbeddings()
+        vectors = embeddings. embed_documents([x.page_content for x in docs])
+        # Silhoute Score
+        n_clusters_range = range(2, 11)
+        silhouette_scores = []
+        for i in n_clusters_range:
+            kmeans = KMeans(n_clusters=i, init='k-means++',
+                            max_iter=300, n_init=10, random_state=0)
+            kmeans.fit(vectors)
+            score = silhouette_score(vectors, kmeans.labels_)
+            silhouette_scores.append(score)
+        optimal_n_clusters = n_clusters_range[np.argmax(silhouette_scores)]
+        # n_clusters = 5
+        kmeans = KMeans(n_clusters=optimal_n_clusters,
+                        random_state=42).fit(vectors)
+        # Getting documents closers to centeriod
+        closest_indices = []
+        # Loop through the number of clusters you have
+        for i in range(optimal_n_clusters):
+            # Get the list of distances from that particular cluster center
+            distances = np.linalg.norm(
+                vectors - kmeans.cluster_centers_[i], axis=1)
+            # Find the list position of the closest one (using argmin to find the smallest distance)
+            closest_index = np.argmin(distances)
+            # Append that position to your closest indices list
+            closest_indices.append(closest_index)
+        sorted_indices = sorted(closest_indices)
+        selected_docs = [docs[doc] for doc in sorted_indices]
+        print('Selected Docs size : ' ,len(selected_docs))
+        return selected_docs
+    def chunk_docs(self, docs):
+        text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=self.chunks,
+            chunk_overlap=50,
+            length_function=len,
+            is_separator_regex=False,
+        )
+        splitted_document = text_splitter.split_documents(docs)
+        return splitted_document
+    def get_stuff_prompt(self):
+        prompt_template = """
+            Create a {Narration_style} narration for this below text. This narration will be used for audiobook generation.
+            So provide the output that is verbose, easier to understand and full of expressions.
+                    {text}
+                    """
+        prompt = PromptTemplate(
+            template=prompt_template, input_variables=['Narration_style','text'])
+        return prompt
+    def define_prompts(self):
+        map_prompts = """
+            Create a {Narration_style} narration for this below text. This narration will be used for audiobook generation.
+            So provide the output that is verbose, easier to understand and full of expressions.
+                {text}
+                """
+        combine_prompt = """
+            Below are the list of text that represent narration from the text.
+            Your job is to combine these narrations and craete one verbose,easier to understand and full of experssions {Narration_style} narration.
+            {text}
+            """
+        map_template = PromptTemplate(template=map_prompts, input_variables=['Narration_style','text']
+                                      )
+        combine_template = PromptTemplate(
+            template=combine_prompt, input_variables=['Narration_style','text'])
+        return map_template, combine_template
+        # pass
+    def define_chain(self,Narration_style=None,chain_type=None):
+        docs, chain_type = self.load_docs()
+        llm = ChatOpenAI(model='gpt-3.5-turbo', temperature=0)
+        if chain_type == 'stuff':
+            prompt = self.get_stuff_prompt()
+            chain = load_summarize_chain(
+                llm=llm, chain_type='stuff', verbose=False,prompt=prompt)
+        elif chain_type == 'map_reduce':
+            map_prompts, combine_prompt = self.define_prompts()
+            chain = load_summarize_chain(
+                llm=llm, map_prompt=map_prompts, combine_prompt=combine_prompt, chain_type='map_reduce', verbose=False)
+        output = chain.run(Narration_style = Narration_style,input_documents = docs)
+        # self.create_wordcloud(output=output)
+        # display(Markdown(f"Text: {docs}"))
+        # display(Markdown(f"Summary Response: {output}"))
+        return output

transcription.py ADDED Viewed

	@@ -0,0 +1,221 @@

+import os
+from abc import ABC, abstractmethod
+from youtube_transcript_api import YouTubeTranscriptApi
+from youtube_transcript_api.formatters import SRTFormatter, WebVTTFormatter
+# import whisperx
+import stable_whisper
+from faster_whisper import WhisperModel
+class Transcription(ABC):
+    def __init__(self, media_path, output_path, subtitle_format):
+        self.media_path = media_path
+        self.output_path = os.path.join(os.getcwd(), output_path)
+        self.filename = os.path.splitext(media_path)[0]
+        self.subtitle_format = subtitle_format
+    @abstractmethod
+    def generate_transcript(self):
+        pass
+    @abstractmethod
+    def save_transcript(self):
+        pass
+class YouTubeTranscriptAPI(Transcription):
+    def __init__(self, url, media_path, output_path, subtitle_format='srt', transcript_language='en'):
+        super().__init__(media_path, output_path, subtitle_format)
+        self.url = url
+        self.video_id = url.split('v=')[1]
+        self.transcript_language = transcript_language
+        self.supported_subtitle_formats = ['srt', 'vtt']
+        assert(self.subtitle_format.lower() in self.supported_subtitle_formats)
+    def get_available_transcripts(self):
+        '''
+        Returns a dictionary of available transcripts & their info
+        '''
+        # Getting List of all Available Transcripts
+        transcript_list = YouTubeTranscriptApi.list_transcripts(self.video_id)
+        # Converting to Available Transcripts to Dictionary
+        transcripts_info = dict()
+        for transcript in transcript_list:
+            transcript_info = {
+                'language': transcript.language,
+                'is_generated': transcript.is_generated,
+                'is_translatable': transcript.is_translatable
+            }
+            transcripts_info[transcript.language_code] = transcript_info
+        return transcripts_info
+    def generate_transcript(self):
+        '''
+        Generates the transcript for the media file
+        '''
+        self.transcript = YouTubeTranscriptApi.get_transcript(self.video_id, languages=[self.transcript_language])
+    def save_transcript(self):
+        '''
+        Writes the transcript into file
+        '''
+        # Getting the Formatter
+        if self.subtitle_format == 'srt':
+            formatter = SRTFormatter()
+        elif self.subtitle_format == 'vtt':
+            formatter = WebVTTFormatter()
+        # Getting the Formatted Transcript
+        formatted_transcript = formatter.format_transcript(self.transcript)
+        # Writing the Formatted Transcript
+        file_path = f'{self.filename}.{self.subtitle_format}'
+        with open(file_path, 'w', encoding='utf-8') as transcript_file:
+            transcript_file.write(formatted_transcript)
+        return file_path
+class Whisper(Transcription):
+    def __init__(self, media_path, output_path, subtitle_format, word_level):
+        super().__init__(media_path, output_path, subtitle_format)
+        self.word_level = word_level
+        self.supported_subtitle_formats = ['ass', 'srt', 'vtt']
+        assert(self.subtitle_format.lower() in self.supported_subtitle_formats)
+class FasterWhisper(Whisper):
+    def __init__(self, media_path, output_path, subtitle_format='srt', word_level=True):
+        super().__init__(media_path, output_path, subtitle_format, word_level)
+        self.model = WhisperModel("large-v2", device="cuda", compute_type="float16")
+    def generate_transcript(self):
+        '''
+        Generates the transcript for the media file
+        '''
+        all_text = []
+        all_segments = []
+        if self.word_level:
+            # Generating Word Level Transcript
+            segments, info = self.model.transcribe(self.media_path, word_timestamps=True)
+            # Converting to Dictionary
+            all_segments = []
+            for segment in segments:
+                for word in segment.words:
+                    all_text.append(word.word)
+                    segment_info = {
+                        'text': word.word,
+                        'start': round(word.start, 2),
+                        'end': round(word.end, 2)
+                    }
+                    all_segments.append(segment_info)
+        else:
+            # Generating Word Level Transcript
+            segments, info = self.model.transcribe(self.media_path, beam_size=5)
+            # Converting to Dictionary
+            for segment in segments:
+                all_text.append(segment.text)
+                segment_info = {
+                    'text': segment.text,
+                    'start': round(segment.start, 2),
+                    'end': round(segment.end, 2)
+                }
+                all_segments.append(segment_info)
+        # Setting Transcript Properties
+        self.text = ' '.join(all_text)
+        self.language = info.language
+        self.segments = all_segments
+        # Returning Transcript Properties as Dictionary
+        transcript_dict = {
+            'language': self.language,
+            'text': self.text,
+            'segments': self.segments
+        }
+        return transcript_dict
+    def save_transcript(self, transcript, output_file):
+        '''
+        Writes the transcript into file
+        '''
+        # TODO: Can't seem to find any built-in methods for writing transcript
+        pass
+class StableWhisper(Whisper):
+    def __init__(self, media_path, output_path, subtitle_format='srt', word_level=True):
+        super().__init__(media_path, output_path, subtitle_format, word_level)
+        self.model = stable_whisper.load_model('large-v2')
+    def generate_transcript(self):
+        '''
+        Generates the transcript for the media file
+        '''
+        # Generating Word Level Transcript
+        self.result = self.model.transcribe(self.media_path, word_timestamps=self.word_level)
+        # Converting to Dictionary
+        self.resultdict = self.result.to_dict()
+        # Formatting Dictionary
+        all_segments = []
+        if self.word_level:
+            all_segments = []
+            for segment in self.resultdict['segments']:
+                for word in segment['words']:
+                    segment_info = {
+                        'text': word['word'],
+                        'start': round(word['start'], 2),
+                        'end': round(word['end'], 2)
+                    }
+                    all_segments.append(segment_info)
+        else:
+            for segment in self.resultdict['segments']:
+                segment_info = {
+                    'text': segment['text'],
+                    'start': round(segment['start'], 2),
+                    'end': round(segment['end'], 2)
+                }
+                all_segments.append(segment_info)
+        # Setting Transcript Properties
+        self.text = self.resultdict['text']
+        self.language = self.resultdict['language']
+        self.segments = all_segments
+        # Returning Transcript Properties as Dictionary
+        transcript_dict = {
+            'language': self.language,
+            'text': self.text,
+            'segments': self.segments
+        }
+        return transcript_dict
+    def save_transcript(self):
+        '''
+        Writes the transcript into file
+        '''
+        # Writing according to the Format
+        file_path = f'{self.filename}.{self.subtitle_format}'
+        if self.subtitle_format == 'ass':
+            self.result.to_ass(file_path, segment_level=True, word_level=self.word_level)
+        elif self.subtitle_format in ['srt', 'vtt']:
+            self.result.to_srt_vtt(file_path, segment_level=True, word_level=self.word_level)
+        return file_path