poemsforaphrodite commited on
Commit
f1d6ab9
1 Parent(s): 302324f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -3
app.py CHANGED
@@ -143,7 +143,9 @@ def get_serp_results(query):
143
  def fetch_content(url):
144
  logger.info(f"Fetching content from URL: {url}")
145
  try:
146
- response = requests.get(url, timeout=10)
 
 
147
  response.raise_for_status()
148
  soup = BeautifulSoup(response.text, 'html.parser')
149
  content = soup.get_text(separator=' ', strip=True)
@@ -157,6 +159,10 @@ def fetch_content(url):
157
  def calculate_relevance_score(page_content, query, co):
158
  logger.info(f"Calculating relevance score for query: {query}")
159
  try:
 
 
 
 
160
  page_embedding = co.embed(texts=[page_content], model='embed-english-v3.0', input_type='search_document').embeddings[0]
161
  query_embedding = co.embed(texts=[query], model='embed-english-v3.0', input_type='search_query').embeddings[0]
162
  score = cosine_similarity([query_embedding], [page_embedding])[0][0]
@@ -177,13 +183,12 @@ def analyze_competitors(row, co):
177
  results = []
178
  for url in [our_url] + competitor_urls:
179
  try:
180
- logger.debug(f"Fetching content for URL: {url}")
181
  content = fetch_content(url)
182
  if not content:
183
  logger.warning(f"No content fetched for URL: {url}")
184
  continue
185
 
186
- logger.debug(f"Calculating relevance score for URL: {url}")
187
  score = calculate_relevance_score(content, query, co)
188
 
189
  logger.info(f"URL: {url}, Score: {score}")
@@ -209,6 +214,15 @@ def show_competitor_analysis(row, co):
209
  if our_data.empty:
210
  st.error(f"Our page '{row['page']}' is not in the results. This indicates an error in fetching or processing the page.")
211
  logger.error(f"Our page '{row['page']}' is missing from the results.")
 
 
 
 
 
 
 
 
 
212
  else:
213
  our_rank = our_data.index[0] + 1
214
  total_results = len(results_df)
@@ -220,6 +234,14 @@ def show_competitor_analysis(row, co):
220
 
221
  if our_score == 0:
222
  st.warning("Our page's relevancy score is 0. This might indicate an issue with content fetching or score calculation.")
 
 
 
 
 
 
 
 
223
  elif our_rank == 1:
224
  st.success("Your page has the highest relevancy score!")
225
  elif our_rank <= 3:
 
143
  def fetch_content(url):
144
  logger.info(f"Fetching content from URL: {url}")
145
  try:
146
+ # Decode URL-encoded characters
147
+ decoded_url = urllib.parse.unquote(url)
148
+ response = requests.get(decoded_url, timeout=10)
149
  response.raise_for_status()
150
  soup = BeautifulSoup(response.text, 'html.parser')
151
  content = soup.get_text(separator=' ', strip=True)
 
159
  def calculate_relevance_score(page_content, query, co):
160
  logger.info(f"Calculating relevance score for query: {query}")
161
  try:
162
+ if not page_content:
163
+ logger.warning("Empty page content. Returning score 0.")
164
+ return 0
165
+
166
  page_embedding = co.embed(texts=[page_content], model='embed-english-v3.0', input_type='search_document').embeddings[0]
167
  query_embedding = co.embed(texts=[query], model='embed-english-v3.0', input_type='search_query').embeddings[0]
168
  score = cosine_similarity([query_embedding], [page_embedding])[0][0]
 
183
  results = []
184
  for url in [our_url] + competitor_urls:
185
  try:
186
+ logger.debug(f"Processing URL: {url}")
187
  content = fetch_content(url)
188
  if not content:
189
  logger.warning(f"No content fetched for URL: {url}")
190
  continue
191
 
 
192
  score = calculate_relevance_score(content, query, co)
193
 
194
  logger.info(f"URL: {url}, Score: {score}")
 
214
  if our_data.empty:
215
  st.error(f"Our page '{row['page']}' is not in the results. This indicates an error in fetching or processing the page.")
216
  logger.error(f"Our page '{row['page']}' is missing from the results.")
217
+
218
+ # Additional debugging information
219
+ st.write("Debugging Information:")
220
+ st.json({
221
+ "our_url": row['page'],
222
+ "query": row['query'],
223
+ "content_fetched": fetch_content(row['page']),
224
+ "urls_processed": results_df['url'].tolist()
225
+ })
226
  else:
227
  our_rank = our_data.index[0] + 1
228
  total_results = len(results_df)
 
234
 
235
  if our_score == 0:
236
  st.warning("Our page's relevancy score is 0. This might indicate an issue with content fetching or score calculation.")
237
+ # Additional debugging information
238
+ st.write("Debugging Information:")
239
+ content = fetch_content(row['page'])
240
+ st.json({
241
+ "content_length": len(content),
242
+ "content_preview": content[:500] if content else "No content fetched",
243
+ "query": row['query']
244
+ })
245
  elif our_rank == 1:
246
  st.success("Your page has the highest relevancy score!")
247
  elif our_rank <= 3: