files cleaned up
Browse files- app.py +6 -4
- helper/utils.py +16 -60
app.py
CHANGED
@@ -49,7 +49,9 @@ with st.sidebar:
|
|
49 |
|
50 |
# Chunk size
|
51 |
chunk_size_input = st.number_input(
|
52 |
-
"Insert an integer (for size of chunks, i.e. 2 means 2 sentences a chunk):",
|
|
|
|
|
53 |
)
|
54 |
|
55 |
# Quantization
|
@@ -68,8 +70,8 @@ with st.sidebar:
|
|
68 |
|
69 |
# Select FM
|
70 |
option = st.selectbox(
|
71 |
-
"Which foundational model would you like?",
|
72 |
-
|
73 |
|
74 |
# Clear button
|
75 |
clear_button = st.sidebar.button("Clear Conversation", key="clear")
|
@@ -135,7 +137,7 @@ elif uploaded_files:
|
|
135 |
result = refs_tab
|
136 |
|
137 |
# Call FM
|
138 |
-
content =
|
139 |
if option == "GPT4":
|
140 |
response = call_gpt(prompt, content)
|
141 |
else:
|
|
|
49 |
|
50 |
# Chunk size
|
51 |
chunk_size_input = st.number_input(
|
52 |
+
"Insert an integer (for size of chunks, i.e. 2 means 2 sentences a chunk):",
|
53 |
+
value=2,
|
54 |
+
step=1,
|
55 |
)
|
56 |
|
57 |
# Quantization
|
|
|
70 |
|
71 |
# Select FM
|
72 |
option = st.selectbox(
|
73 |
+
"Which foundational model would you like?", ("GPT4", "LLAMA3")
|
74 |
+
)
|
75 |
|
76 |
# Clear button
|
77 |
clear_button = st.sidebar.button("Clear Conversation", key="clear")
|
|
|
137 |
result = refs_tab
|
138 |
|
139 |
# Call FM
|
140 |
+
content = " ".join(list(result.sentences))
|
141 |
if option == "GPT4":
|
142 |
response = call_gpt(prompt, content)
|
143 |
else:
|
helper/utils.py
CHANGED
@@ -15,44 +15,6 @@ def current_year():
|
|
15 |
return now.year
|
16 |
|
17 |
|
18 |
-
# def read_and_textify(
|
19 |
-
# files: List[str],
|
20 |
-
# ) -> Tuple[List[str], List[str]]:
|
21 |
-
# """
|
22 |
-
# Reads PDF files and extracts text from each page.
|
23 |
-
|
24 |
-
# This function iterates over a list of uploaded PDF files, extracts text from each page,
|
25 |
-
# and compiles a list of texts and corresponding source information.
|
26 |
-
|
27 |
-
# Args:
|
28 |
-
# files (List[st.uploaded_file_manager.UploadedFile]): A list of uploaded PDF files.
|
29 |
-
|
30 |
-
# Returns:
|
31 |
-
# Tuple[List[str], List[str]]: A tuple containing two lists:
|
32 |
-
# 1. A list of strings, where each string is the text extracted from a PDF page.
|
33 |
-
# 2. A list of strings indicating the source of each text (file name and page number).
|
34 |
-
# """
|
35 |
-
|
36 |
-
# # Initialize lists to store extracted texts and their sources
|
37 |
-
# text_list = [] # List to store extracted text
|
38 |
-
# sources_list = [] # List to store source information
|
39 |
-
|
40 |
-
# # Iterate over each file
|
41 |
-
# for file in files:
|
42 |
-
# pdfReader = PyPDF2.PdfReader(file) # Create a PDF reader object
|
43 |
-
# # Iterate over each page in the PDF
|
44 |
-
# for i in range(len(pdfReader.pages)):
|
45 |
-
# pageObj = pdfReader.pages[i] # Get the page object
|
46 |
-
# text = pageObj.extract_text() # Extract text from the page
|
47 |
-
# pageObj.clear() # Clear the page object (optional, for memory management)
|
48 |
-
# text_list.append(text) # Add extracted text to the list
|
49 |
-
# # Create a source identifier and add it to the list
|
50 |
-
# sources_list.append(file.name + "_page_" + str(i))
|
51 |
-
|
52 |
-
# # Return the lists of texts and sources
|
53 |
-
# return [text_list, sources_list]
|
54 |
-
|
55 |
-
|
56 |
def read_and_textify(
|
57 |
files: List[str], chunk_size: int = 2 # Default chunk size set to 50
|
58 |
) -> Tuple[List[str], List[str]]:
|
@@ -85,9 +47,9 @@ def read_and_textify(
|
|
85 |
text = pageObj.extract_text() # Extract text from the page
|
86 |
if text:
|
87 |
# Split text into chunks of approximately 'chunk_size' words
|
88 |
-
words = text.split(
|
89 |
for j in range(0, len(words), chunk_size):
|
90 |
-
chunk = ". ".join(words[j : j + chunk_size]) +
|
91 |
text_list.append(chunk)
|
92 |
# Create a source identifier for each chunk and add it to the list
|
93 |
sources_list.append(f"{file.name}_page_{i}_chunk_{j // chunk_size}")
|
@@ -134,22 +96,22 @@ def call_gpt(prompt: str, content: str) -> str:
|
|
134 |
"""
|
135 |
Sends a structured conversation context including a system prompt, user prompt,
|
136 |
and additional background content to the GPT-3.5-turbo model for a response.
|
137 |
-
|
138 |
This function is responsible for generating an AI-powered response by interacting
|
139 |
with the OpenAI API. It puts together a preset system message, a formatted user query,
|
140 |
and additional background information before requesting the completion from the model.
|
141 |
-
|
142 |
Args:
|
143 |
prompt (str): The main question or topic that the user wants to address.
|
144 |
content (str): Additional background information or details relevant to the prompt.
|
145 |
-
|
146 |
Returns:
|
147 |
str: The generated response from the GPT model based on the given prompts and content.
|
148 |
-
|
149 |
Note: 'openai_client' is assumed to be an already created and authenticated instance of the OpenAI
|
150 |
openai_client, which should be set up prior to calling this function.
|
151 |
"""
|
152 |
-
|
153 |
# Generates a response from the model based on the interactive messages provided
|
154 |
response = openai_client.chat.completions.create(
|
155 |
model="gpt-3.5-turbo", # The AI model being queried for a response
|
@@ -162,7 +124,7 @@ def call_gpt(prompt: str, content: str) -> str:
|
|
162 |
{"role": "assistant", "content": "What is the background content?"},
|
163 |
# User providing the background content
|
164 |
{"role": "user", "content": content},
|
165 |
-
]
|
166 |
)
|
167 |
|
168 |
# Extracts and returns the response content from the model's completion
|
@@ -171,28 +133,22 @@ def call_gpt(prompt: str, content: str) -> str:
|
|
171 |
|
172 |
together_client = Together(api_key=os.environ["TOGETHER_API_KEY"])
|
173 |
|
|
|
174 |
def call_llama(prompt: str) -> str:
|
175 |
"""
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
"""
|
182 |
|
183 |
# Create a completion request with the prompt
|
184 |
response = together_client.chat.completions.create(
|
185 |
-
|
186 |
# Use the Llama-3-8b-chat-hf model
|
187 |
model="meta-llama/Llama-3-8b-chat-hf",
|
188 |
-
|
189 |
# Define the prompt as a user message
|
190 |
-
messages=[
|
191 |
-
{
|
192 |
-
"role": "user",
|
193 |
-
"content": prompt # Use the input prompt
|
194 |
-
}
|
195 |
-
],
|
196 |
)
|
197 |
|
198 |
# Return the content of the first response message
|
@@ -321,4 +277,4 @@ def query_search(
|
|
321 |
# Sort the DataFrame based on the 'qim' score in descending order
|
322 |
refs = refs.sort_values(by="qim", ascending=False)
|
323 |
|
324 |
-
return refs
|
|
|
15 |
return now.year
|
16 |
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
def read_and_textify(
|
19 |
files: List[str], chunk_size: int = 2 # Default chunk size set to 50
|
20 |
) -> Tuple[List[str], List[str]]:
|
|
|
47 |
text = pageObj.extract_text() # Extract text from the page
|
48 |
if text:
|
49 |
# Split text into chunks of approximately 'chunk_size' words
|
50 |
+
words = text.split(". ")
|
51 |
for j in range(0, len(words), chunk_size):
|
52 |
+
chunk = ". ".join(words[j : j + chunk_size]) + "."
|
53 |
text_list.append(chunk)
|
54 |
# Create a source identifier for each chunk and add it to the list
|
55 |
sources_list.append(f"{file.name}_page_{i}_chunk_{j // chunk_size}")
|
|
|
96 |
"""
|
97 |
Sends a structured conversation context including a system prompt, user prompt,
|
98 |
and additional background content to the GPT-3.5-turbo model for a response.
|
99 |
+
|
100 |
This function is responsible for generating an AI-powered response by interacting
|
101 |
with the OpenAI API. It puts together a preset system message, a formatted user query,
|
102 |
and additional background information before requesting the completion from the model.
|
103 |
+
|
104 |
Args:
|
105 |
prompt (str): The main question or topic that the user wants to address.
|
106 |
content (str): Additional background information or details relevant to the prompt.
|
107 |
+
|
108 |
Returns:
|
109 |
str: The generated response from the GPT model based on the given prompts and content.
|
110 |
+
|
111 |
Note: 'openai_client' is assumed to be an already created and authenticated instance of the OpenAI
|
112 |
openai_client, which should be set up prior to calling this function.
|
113 |
"""
|
114 |
+
|
115 |
# Generates a response from the model based on the interactive messages provided
|
116 |
response = openai_client.chat.completions.create(
|
117 |
model="gpt-3.5-turbo", # The AI model being queried for a response
|
|
|
124 |
{"role": "assistant", "content": "What is the background content?"},
|
125 |
# User providing the background content
|
126 |
{"role": "user", "content": content},
|
127 |
+
],
|
128 |
)
|
129 |
|
130 |
# Extracts and returns the response content from the model's completion
|
|
|
133 |
|
134 |
together_client = Together(api_key=os.environ["TOGETHER_API_KEY"])
|
135 |
|
136 |
+
|
137 |
def call_llama(prompt: str) -> str:
|
138 |
"""
|
139 |
+
Send a prompt to the Llama model and return the response.
|
140 |
+
Args:
|
141 |
+
prompt (str): The input prompt to send to the Llama model.
|
142 |
+
Returns:
|
143 |
+
str: The response from the Llama model.
|
144 |
"""
|
145 |
|
146 |
# Create a completion request with the prompt
|
147 |
response = together_client.chat.completions.create(
|
|
|
148 |
# Use the Llama-3-8b-chat-hf model
|
149 |
model="meta-llama/Llama-3-8b-chat-hf",
|
|
|
150 |
# Define the prompt as a user message
|
151 |
+
messages=[{"role": "user", "content": prompt}], # Use the input prompt
|
|
|
|
|
|
|
|
|
|
|
152 |
)
|
153 |
|
154 |
# Return the content of the first response message
|
|
|
277 |
# Sort the DataFrame based on the 'qim' score in descending order
|
278 |
refs = refs.sort_values(by="qim", ascending=False)
|
279 |
|
280 |
+
return refs
|