Spaces:

AlbertoFH98
/

PodCastena

Running

App Files Files Community

AlbertoFH98 commited on Jan 4

Commit

e73a11f

•

1 Parent(s): d3f801e

Update utils.py

Browse files

Files changed (1) hide show

utils.py +125 -52

utils.py CHANGED Viewed

@@ -223,62 +223,135 @@ def get_gpt_response(transcription_path, query, logger):
     return llm_output
 # -- Text summarisation with OpenAI (map-reduce technique)
-def summarise_doc(transcription_path):
-    llm = ChatOpenAI(temperature=0, max_tokens=1024)
-    # -- Map
-    loader = TextLoader(transcription_path)
-    docs   = loader.load()
-    map_template = """Lo siguiente es listado de fragmentos de una conversacion:
-    {docs}
-    En base a este listado, por favor identifica los temas/topics principales.
-    Respuesta:"""
-    map_prompt = PromptTemplate.from_template(map_template)
-    map_chain = LLMChain(llm=llm, prompt=map_prompt)
-    # -- Reduce
-    reduce_template = """A continuacion se muestra un conjunto de resumenes:
-    {docs}
-    Usalos para crear un unico resumen consolidado de todos los temas/topics principales.
-    Respuesta:"""
-    reduce_prompt = PromptTemplate.from_template(reduce_template)
-    # Run chain
-    reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt)
-    # Takes a list of documents, combines them into a single string, and passes this to an LLMChain
-    combine_documents_chain = StuffDocumentsChain(
-        llm_chain=reduce_chain, document_variable_name="docs"
-    )
-    # Combines and iteravely reduces the mapped documents
-    reduce_documents_chain = ReduceDocumentsChain(
-        # This is final chain that is called.
-        combine_documents_chain=combine_documents_chain,
-        # If documents exceed context for `StuffDocumentsChain`
-        collapse_documents_chain=combine_documents_chain,
-        # The maximum number of tokens to group documents into.
-        token_max=3000,
-    )
-    # Combining documents by mapping a chain over them, then combining results
-    map_reduce_chain = MapReduceDocumentsChain(
-        # Map chain
-        llm_chain=map_chain,
-        # Reduce chain
-        reduce_documents_chain=reduce_documents_chain,
-        # The variable name in the llm_chain to put the documents in
-        document_variable_name="docs",
-        # Return the results of the map steps in the output
-        return_intermediate_steps=False,
-    )
-    text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
-        chunk_size=3000, chunk_overlap=0
-    )
-    split_docs = text_splitter.split_documents(docs)
-    return map_reduce_chain.run(split_docs)
 # -- Python function to setup basic features: SpaCy pipeline and LLM model
 @st.cache_resource

     return llm_output
 # -- Text summarisation with OpenAI (map-reduce technique)
+def summarise_doc(transcription_path, model_name, model=None):
+    if model_name == 'gpt':
+        llm = ChatOpenAI(temperature=0, max_tokens=1024)
+        # -- Map
+        loader = TextLoader(transcription_path)
+        docs   = loader.load()
+        map_template = """Lo siguiente es listado de fragmentos de una conversacion:
+        {docs}
+        En base a este listado, por favor identifica los temas/topics principales.
+        Respuesta:"""
+        map_prompt = PromptTemplate.from_template(map_template)
+        map_chain = LLMChain(llm=llm, prompt=map_prompt)
+        # -- Reduce
+        reduce_template = """A continuacion se muestra un conjunto de resumenes:
+        {docs}
+        Usalos para crear un unico resumen consolidado de todos los temas/topics principales.
+        Respuesta:"""
+        reduce_prompt = PromptTemplate.from_template(reduce_template)
+        # Run chain
+        reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt)
+        # Takes a list of documents, combines them into a single string, and passes this to an LLMChain
+        combine_documents_chain = StuffDocumentsChain(
+            llm_chain=reduce_chain, document_variable_name="docs"
+        )
+        # Combines and iteravely reduces the mapped documents
+        reduce_documents_chain = ReduceDocumentsChain(
+            # This is final chain that is called.
+            combine_documents_chain=combine_documents_chain,
+            # If documents exceed context for `StuffDocumentsChain`
+            collapse_documents_chain=combine_documents_chain,
+            # The maximum number of tokens to group documents into.
+            token_max=3000,
+        )
+        # Combining documents by mapping a chain over them, then combining results
+        map_reduce_chain = MapReduceDocumentsChain(
+            # Map chain
+            llm_chain=map_chain,
+            # Reduce chain
+            reduce_documents_chain=reduce_documents_chain,
+            # The variable name in the llm_chain to put the documents in
+            document_variable_name="docs",
+            # Return the results of the map steps in the output
+            return_intermediate_steps=False,
+        )
+        text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
+            chunk_size=3000, chunk_overlap=0
+        )
+        split_docs  = text_splitter.split_documents(docs)
+        doc_summary = map_reduce_chain.run(split_docs)
+    else:
+        loader = TextLoader(transcription_path)
+        docs   = loader.load()
+        # -- Keep original transcription
+        with open(transcription_path, 'r') as f:
+            formatted_transcription = f.read()
+        llm = TogetherLLM(
+            model= model,
+            temperature = 0.0,
+            max_tokens = 1024,
+            original_transcription = formatted_transcription
+        )
+        # Map
+        map_template = """Lo siguiente es un extracto de una conversación entre dos hablantes en español.
+{docs}
+Por favor resuma la conversación en español.
+Resumen:"""
+        map_prompt = PromptTemplate(template=map_template, input_variables=["docs"])
+        map_chain  = LLMChain(llm=llm, prompt=map_prompt)
+        # Reduce
+        reduce_template = """Lo siguiente es una lista de resumenes en español:
+{doc_summaries}
+Tómelos y descríbalos en un resumen final consolidado en español. Además, enumera los temas principales de la conversación en español.
+Resumen:"""
+        reduce_prompt   = PromptTemplate(template=reduce_template, input_variables=["doc_summaries"])
+        # Run chain
+        reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt)
+        # Takes a list of documents, combines them into a single string, and passes this to an LLMChain
+        combine_documents_chain = StuffDocumentsChain(
+            llm_chain=reduce_chain, document_variable_name="doc_summaries"
+        )
+        # Combines and iteravely reduces the mapped documents
+        reduce_documents_chain = ReduceDocumentsChain(
+            # This is final chain that is called.
+            combine_documents_chain=combine_documents_chain,
+            # If documents exceed context for `StuffDocumentsChain`
+            collapse_documents_chain=combine_documents_chain,
+            # The maximum number of tokens to group documents into.
+            verbose=True,
+            token_max=1024
+        )
+        # Combining documents by mapping a chain over them, then combining results
+        map_reduce_chain = MapReduceDocumentsChain(
+            # Map chain
+            llm_chain=map_chain,
+            # Reduce chain
+            reduce_documents_chain=reduce_documents_chain,
+            # The variable name in the llm_chain to put the documents in
+            document_variable_name="docs",
+            # Return the results of the map steps in the output
+            return_intermediate_steps=False,
+            verbose=True
+        )
+        text_splitter = CharacterTextSplitter(
+            separator = "\n\n",
+            chunk_size = 2000,
+            chunk_overlap  = 50,
+            length_function = len,
+            is_separator_regex = True,
+        )
+        split_docs = text_splitter.create_documents([docs])
+    return doc_summary
 # -- Python function to setup basic features: SpaCy pipeline and LLM model
 @st.cache_resource