Spaces:
Runtime error
Runtime error
| from pydantic import NoneStr | |
| import os | |
| from langchain.chains.question_answering import load_qa_chain | |
| from langchain.document_loaders import UnstructuredFileLoader | |
| from langchain.embeddings.openai import OpenAIEmbeddings | |
| from langchain.llms import OpenAI | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain.vectorstores import FAISS | |
| import gradio as gr | |
| import openai | |
| class ChemicalIdentifier: | |
| def __init__(self): | |
| openai.api_key = os.getenv("OPENAI_API_KEY") | |
| def get_empty_state(self): | |
| """ Create empty Knowledge base""" | |
| return {"knowledge_base": None} | |
| def create_knowledge_base(self,docs): | |
| """Create a knowledge base from the given documents. | |
| Args: | |
| docs (List[str]): List of documents. | |
| Returns: | |
| FAISS: Knowledge base built from the documents. | |
| """ | |
| # Initialize a CharacterTextSplitter to split the documents into chunks | |
| # Each chunk has a maximum length of 500 characters | |
| # There is no overlap between the chunks | |
| text_splitter = CharacterTextSplitter( | |
| separator="\n", chunk_size=500, chunk_overlap=0, length_function=len | |
| ) | |
| # Split the documents into chunks using the text_splitter | |
| chunks = text_splitter.split_documents(docs) | |
| # Initialize an OpenAIEmbeddings model to compute embeddings of the chunks | |
| embeddings = OpenAIEmbeddings() | |
| # Build a knowledge base using FAISS from the chunks and their embeddings | |
| knowledge_base = FAISS.from_documents(chunks, embeddings) | |
| # Return the resulting knowledge base | |
| return knowledge_base | |
| def upload_file(self, file_obj): | |
| """Upload a file and create a knowledge base from its contents. | |
| Args: | |
| file_obj (file-like object): The file to upload. | |
| Returns: | |
| tuple: A tuple containing the file name and the knowledge base. | |
| """ | |
| try: | |
| # Initialize an UnstructuredFileLoader to load the contents of the file | |
| # The loader uses a "fast" strategy for efficient loading | |
| loader = UnstructuredFileLoader(file_obj.name, strategy="fast") | |
| # Load the contents of the file using the loader | |
| docs = loader.load() | |
| # Create a knowledge base from the loaded documents using the create_knowledge_base() method | |
| knowledge_base = self.create_knowledge_base(docs) | |
| except: | |
| # If an error occurs during file loading return file name and an empty string | |
| return file_obj.name, "" | |
| # Return a tuple containing the file name and the knowledge base | |
| return file_obj.name, {"knowledge_base": knowledge_base} | |
| def answer_question(self, state): | |
| """Answer a question based on the current knowledge base. | |
| Args: | |
| state (dict): The current state containing the knowledge base. | |
| Returns: | |
| str: The answer to the question. | |
| """ | |
| try: | |
| # Retrieve the knowledge base from the state dictionary | |
| knowledge_base = state["knowledge_base"] | |
| # Set the question for which we want to find the answer | |
| question = "Identify the chemical capabilities" | |
| # Perform a similarity search on the knowledge base to retrieve relevant documents | |
| docs = knowledge_base.similarity_search(question) | |
| # Initialize an OpenAI language model for question answering | |
| llm = OpenAI(temperature=0.4) | |
| # Load a question-answering chain using the language model | |
| chain = load_qa_chain(llm, chain_type="stuff") | |
| # Run the question-answering chain on the input documents and question | |
| response = chain.run(input_documents=docs, question=question) | |
| # Return the response as the answer to the question | |
| return response | |
| except: | |
| # If an error occurs, return a default error message | |
| return "Please upload Proper Document" | |
| def gradio_interface(self): | |
| """Create the Gradio interface for the Chemical Identifier.""" | |
| with gr.Blocks(css="style.css",theme=gr.themes.Soft()) as demo: | |
| state = gr.State(self.get_empty_state()) | |
| gr.HTML("""<img class="leftimage" align="left" src="https://templates.images.credential.net/1612472097627370951721412474196.png" alt="Image" width="210" height="210"> | |
| <img class="rightimage" align="right" src="https://logos-download.com/wp-content/uploads/2016/06/Syngenta_logo.png" alt="Image" width="150" height="140">""") | |
| with gr.Column(elem_id="col-container"): | |
| gr.HTML( | |
| """<hr style="border-top: 5px solid white;">""" | |
| ) | |
| gr.HTML( | |
| """<br> | |
| <h1 style="text-align:center;"> | |
| Syngenta Chemical Identifier | |
| </h1> """ | |
| ) | |
| gr.HTML( | |
| """<hr style="border-top: 5px solid white;">""" | |
| ) | |
| gr.Markdown("**Upload your file**") | |
| with gr.Row(elem_id="row-flex"): | |
| with gr.Column(scale=0.90, min_width=160): | |
| file_output = gr.File(elem_classes="heightfit") | |
| with gr.Column(scale=0.10, min_width=160): | |
| upload_button = gr.UploadButton( | |
| "Browse File", file_types=[".txt", ".pdf", ".doc", ".docx"], | |
| elem_classes="heightfit") | |
| with gr.Row(): | |
| with gr.Column(scale=1, min_width=0): | |
| analyse_btn = gr.Button(value="Analyse") | |
| with gr.Row(): | |
| with gr.Column(scale=1, min_width=0): | |
| answer = gr.Textbox(value="",label='Chemicals :',show_label=True, placeholder="",lines=5) | |
| upload_button.upload(self.upload_file, upload_button, [file_output,state]) | |
| analyse_btn.click(self.answer_question, [state], [answer]) | |
| demo.queue().launch() | |
| if __name__=="__main__": | |
| chemical = ChemicalIdentifier() | |
| chemical.gradio_interface() | |