Spaces:

eagle0504
/

document-search-q-series

Running

App Files Files Community

eagle0504 commited on May 7, 2024

Commit

451d492

1 Parent(s): ecab2ea

files cleaned up

Browse files

Files changed (2) hide show

app.py +6 -4
helper/utils.py +16 -60

app.py CHANGED Viewed

@@ -49,7 +49,9 @@ with st.sidebar:
     # Chunk size
     chunk_size_input = st.number_input(
-        "Insert an integer (for size of chunks, i.e. 2 means 2 sentences a chunk):", value=2, step=1
     )
     # Quantization
@@ -68,8 +70,8 @@ with st.sidebar:
     # Select FM
     option = st.selectbox(
-        "Which foundational model would you like?",
-        ("GPT4", "LLAMA3"))
     # Clear button
     clear_button = st.sidebar.button("Clear Conversation", key="clear")
@@ -135,7 +137,7 @@ elif uploaded_files:
             result = refs_tab
             # Call FM
-            content = ' '.join(list(result.sentences))
             if option == "GPT4":
                 response = call_gpt(prompt, content)
             else:

     # Chunk size
     chunk_size_input = st.number_input(
+        "Insert an integer (for size of chunks, i.e. 2 means 2 sentences a chunk):",
+        value=2,
+        step=1,
     )
     # Quantization
     # Select FM
     option = st.selectbox(
+        "Which foundational model would you like?", ("GPT4", "LLAMA3")
+    )
     # Clear button
     clear_button = st.sidebar.button("Clear Conversation", key="clear")
             result = refs_tab
             # Call FM
+            content = " ".join(list(result.sentences))
             if option == "GPT4":
                 response = call_gpt(prompt, content)
             else:

helper/utils.py CHANGED Viewed

@@ -15,44 +15,6 @@ def current_year():
     return now.year
-# def read_and_textify(
-#     files: List[str],
-# ) -> Tuple[List[str], List[str]]:
-#     """
-#     Reads PDF files and extracts text from each page.
-#     This function iterates over a list of uploaded PDF files, extracts text from each page,
-#     and compiles a list of texts and corresponding source information.
-#     Args:
-#     files (List[st.uploaded_file_manager.UploadedFile]): A list of uploaded PDF files.
-#     Returns:
-#     Tuple[List[str], List[str]]: A tuple containing two lists:
-#         1. A list of strings, where each string is the text extracted from a PDF page.
-#         2. A list of strings indicating the source of each text (file name and page number).
-#     """
-#     # Initialize lists to store extracted texts and their sources
-#     text_list = []  # List to store extracted text
-#     sources_list = []  # List to store source information
-#     # Iterate over each file
-#     for file in files:
-#         pdfReader = PyPDF2.PdfReader(file)  # Create a PDF reader object
-#         # Iterate over each page in the PDF
-#         for i in range(len(pdfReader.pages)):
-#             pageObj = pdfReader.pages[i]  # Get the page object
-#             text = pageObj.extract_text()  # Extract text from the page
-#             pageObj.clear()  # Clear the page object (optional, for memory management)
-#             text_list.append(text)  # Add extracted text to the list
-#             # Create a source identifier and add it to the list
-#             sources_list.append(file.name + "_page_" + str(i))
-#     # Return the lists of texts and sources
-#     return [text_list, sources_list]
 def read_and_textify(
     files: List[str], chunk_size: int = 2  # Default chunk size set to 50
 ) -> Tuple[List[str], List[str]]:
@@ -85,9 +47,9 @@ def read_and_textify(
             text = pageObj.extract_text()  # Extract text from the page
             if text:
                 # Split text into chunks of approximately 'chunk_size' words
-                words = text.split('. ')
                 for j in range(0, len(words), chunk_size):
-                    chunk = ". ".join(words[j : j + chunk_size]) + '.'
                     text_list.append(chunk)
                     # Create a source identifier for each chunk and add it to the list
                     sources_list.append(f"{file.name}_page_{i}_chunk_{j // chunk_size}")
@@ -134,22 +96,22 @@ def call_gpt(prompt: str, content: str) -> str:
     """
     Sends a structured conversation context including a system prompt, user prompt,
     and additional background content to the GPT-3.5-turbo model for a response.
     This function is responsible for generating an AI-powered response by interacting
     with the OpenAI API. It puts together a preset system message, a formatted user query,
     and additional background information before requesting the completion from the model.
     Args:
         prompt (str): The main question or topic that the user wants to address.
         content (str): Additional background information or details relevant to the prompt.
     Returns:
         str: The generated response from the GPT model based on the given prompts and content.
     Note: 'openai_client' is assumed to be an already created and authenticated instance of the OpenAI
           openai_client, which should be set up prior to calling this function.
     """
     # Generates a response from the model based on the interactive messages provided
     response = openai_client.chat.completions.create(
         model="gpt-3.5-turbo",  # The AI model being queried for a response
@@ -162,7 +124,7 @@ def call_gpt(prompt: str, content: str) -> str:
             {"role": "assistant", "content": "What is the background content?"},
             # User providing the background content
             {"role": "user", "content": content},
-        ]
     )
     # Extracts and returns the response content from the model's completion
@@ -171,28 +133,22 @@ def call_gpt(prompt: str, content: str) -> str:
 together_client = Together(api_key=os.environ["TOGETHER_API_KEY"])
 def call_llama(prompt: str) -> str:
     """
-        Send a prompt to the Llama model and return the response.
-        Args:
-            prompt (str): The input prompt to send to the Llama model.
-        Returns:
-            str: The response from the Llama model.
     """
     # Create a completion request with the prompt
     response = together_client.chat.completions.create(
         # Use the Llama-3-8b-chat-hf model
         model="meta-llama/Llama-3-8b-chat-hf",
         # Define the prompt as a user message
-        messages=[
-            {
-                "role": "user",
-                "content": prompt  # Use the input prompt
-            }
-        ],
     )
     # Return the content of the first response message
@@ -321,4 +277,4 @@ def query_search(
     # Sort the DataFrame based on the 'qim' score in descending order
     refs = refs.sort_values(by="qim", ascending=False)
-    return refs

     return now.year
 def read_and_textify(
     files: List[str], chunk_size: int = 2  # Default chunk size set to 50
 ) -> Tuple[List[str], List[str]]:
             text = pageObj.extract_text()  # Extract text from the page
             if text:
                 # Split text into chunks of approximately 'chunk_size' words
+                words = text.split(". ")
                 for j in range(0, len(words), chunk_size):
+                    chunk = ". ".join(words[j : j + chunk_size]) + "."
                     text_list.append(chunk)
                     # Create a source identifier for each chunk and add it to the list
                     sources_list.append(f"{file.name}_page_{i}_chunk_{j // chunk_size}")
     """
     Sends a structured conversation context including a system prompt, user prompt,
     and additional background content to the GPT-3.5-turbo model for a response.
     This function is responsible for generating an AI-powered response by interacting
     with the OpenAI API. It puts together a preset system message, a formatted user query,
     and additional background information before requesting the completion from the model.
     Args:
         prompt (str): The main question or topic that the user wants to address.
         content (str): Additional background information or details relevant to the prompt.
     Returns:
         str: The generated response from the GPT model based on the given prompts and content.
     Note: 'openai_client' is assumed to be an already created and authenticated instance of the OpenAI
           openai_client, which should be set up prior to calling this function.
     """
     # Generates a response from the model based on the interactive messages provided
     response = openai_client.chat.completions.create(
         model="gpt-3.5-turbo",  # The AI model being queried for a response
             {"role": "assistant", "content": "What is the background content?"},
             # User providing the background content
             {"role": "user", "content": content},
+        ],
     )
     # Extracts and returns the response content from the model's completion
 together_client = Together(api_key=os.environ["TOGETHER_API_KEY"])
 def call_llama(prompt: str) -> str:
     """
+    Send a prompt to the Llama model and return the response.
+    Args:
+        prompt (str): The input prompt to send to the Llama model.
+    Returns:
+        str: The response from the Llama model.
     """
     # Create a completion request with the prompt
     response = together_client.chat.completions.create(
         # Use the Llama-3-8b-chat-hf model
         model="meta-llama/Llama-3-8b-chat-hf",
         # Define the prompt as a user message
+        messages=[{"role": "user", "content": prompt}],  # Use the input prompt
     )
     # Return the content of the first response message
     # Sort the DataFrame based on the 'qim' score in descending order
     refs = refs.sort_values(by="qim", ascending=False)
+    return refs