Spaces:

AlbertoFH98
/

PodCastena

Running

App Files Files Community

AlbertoFH98 commited on Jan 3

Commit

4ec86c0

•

1 Parent(s): 887ecbd

Update utils.py

Browse files

Files changed (1) hide show

utils.py +75 -17

utils.py CHANGED Viewed

@@ -27,7 +27,7 @@ import os
 import re
 #os.environ["TOGETHER_API_KEY"] = "6101599d6e33e3bda336b8d007ca22e35a64c72cfd52c2d8197f663389fc50c5"
-os.environ["OPENAI_API_KEY"]   = "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"
 os.environ["LANGCHAIN_TRACING_V2"] = "true"
 client = Client()
@@ -198,28 +198,86 @@ def get_gpt_response(transcription_path, query, logger):
         | StrOutputParser()
     )
     llm_output = rag_chain.invoke(query)
-    dataset    = client.create_dataset(dataset_name="Sample LLM dataset", description="A dataset with LLM inputs and outputs", data_type="llm")
-    client.create_example(
-      inputs={"input": query},
-      outputs={"output": llm_output},
-      dataset_id=dataset.id,
-    )
     # -- Run custom evaluator
-    evaluation_config = RunEvalConfig(
-        custom_evaluators = [RelevanceEvaluator()],
     )
-    eval_output = run_on_dataset(
-       dataset_name="Sample LLM dataset",
-       llm_or_chain_factory=rag_chain,
-       evaluation=evaluation_config,
-       client=client,
     )
-    logger.info("Eval output!!!!")
-    logger.info(eval_output)
-    return llm_output
 # -- Python function to setup basic features: SpaCy pipeline and LLM model
 @st.cache_resource

 import re
 #os.environ["TOGETHER_API_KEY"] = "6101599d6e33e3bda336b8d007ca22e35a64c72cfd52c2d8197f663389fc50c5"
+#os.environ["OPENAI_API_KEY"]   = "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"
 os.environ["LANGCHAIN_TRACING_V2"] = "true"
 client = Client()
         | StrOutputParser()
     )
     llm_output = rag_chain.invoke(query)
+    # dataset    = client.create_dataset(dataset_name="Sample LLM dataset", description="A dataset with LLM inputs and outputs", data_type="llm")
+    # client.create_example(
+    #   inputs={"input": query},
+    #   outputs={"output": llm_output},
+    #   dataset_id=dataset.id,
+    # )
     # -- Run custom evaluator
+    # evaluation_config = RunEvalConfig(
+    #     custom_evaluators = [RelevanceEvaluator()],
+    # )
+    # eval_output = run_on_dataset(
+    #    dataset_name="Sample LLM dataset",
+    #    llm_or_chain_factory=rag_chain,
+    #    evaluation=evaluation_config,
+    #    client=client,
+    # )
+    # logger.info("Eval output!!!!")
+    # logger.info(eval_output)
+    return llm_output
+# -- Text summarisation with OpenAI (map-reduce technique)
+def summarise_doc(transcription_path):
+    llm = ChatOpenAI(temperature=0)
+    # -- Map
+    loader = TextLoader(transcription_path)
+    docs   = loader.load()
+    map_template = """Lo siguiente es listado de fragmentos de una conversacion:
+    {docs}
+    En base a este listado, por favor identifica los temas/topics principales.
+    Respuesta:"""
+    map_prompt = PromptTemplate.from_template(map_template)
+    map_chain = LLMChain(llm=llm, prompt=map_prompt)
+    # -- Reduce
+    reduce_template = """A continuacion se muestra un conjunto de resumenes:
+    {docs}
+    Usalos para crear un unico resumen consolidado de todos los temas/topics principales.
+    Respuesta:"""
+    reduce_prompt = PromptTemplate.from_template(reduce_template)
+    # Run chain
+    reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt)
+    # Takes a list of documents, combines them into a single string, and passes this to an LLMChain
+    combine_documents_chain = StuffDocumentsChain(
+        llm_chain=reduce_chain, document_variable_name="docs"
+    )
+    # Combines and iteravely reduces the mapped documents
+    reduce_documents_chain = ReduceDocumentsChain(
+        # This is final chain that is called.
+        combine_documents_chain=combine_documents_chain,
+        # If documents exceed context for `StuffDocumentsChain`
+        collapse_documents_chain=combine_documents_chain,
+        # The maximum number of tokens to group documents into.
+        token_max=4000,
     )
+    # Combining documents by mapping a chain over them, then combining results
+    map_reduce_chain = MapReduceDocumentsChain(
+        # Map chain
+        llm_chain=map_chain,
+        # Reduce chain
+        reduce_documents_chain=reduce_documents_chain,
+        # The variable name in the llm_chain to put the documents in
+        document_variable_name="docs",
+        # Return the results of the map steps in the output
+        return_intermediate_steps=False,
     )
+    text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
+        chunk_size=1000, chunk_overlap=0
+    )
+    split_docs = text_splitter.split_documents(docs)
+    return map_reduce_chain.run(split_docs)
 # -- Python function to setup basic features: SpaCy pipeline and LLM model
 @st.cache_resource