Spaces:
Running
Running
poemsforaphrodite
commited on
Commit
•
4068829
1
Parent(s):
8dc0c9a
Update app.py
Browse files
app.py
CHANGED
@@ -113,13 +113,14 @@ def get_serp_results(query):
|
|
113 |
}
|
114 |
|
115 |
try:
|
116 |
-
logger.debug(f"Calling Apify Actor with input: {run_input}")
|
117 |
# Run the Actor and wait for it to finish
|
118 |
run = client.actor("nFJndFXA5zjCTuudP").call(run_input=run_input)
|
119 |
logger.info(f"Apify Actor run completed. Run ID: {run.get('id')}")
|
120 |
|
121 |
# Fetch results from the run's dataset
|
122 |
-
|
|
|
123 |
results = list(client.dataset(run["defaultDatasetId"]).iterate_items())
|
124 |
logger.info(f"Fetched {len(results)} results from Apify dataset")
|
125 |
|
@@ -183,6 +184,8 @@ def analyze_competitors(row, co):
|
|
183 |
|
184 |
# Calculate score for our page first
|
185 |
our_content = fetch_content(our_url)
|
|
|
|
|
186 |
if our_content:
|
187 |
our_score = calculate_relevance_score(our_content, query, co)
|
188 |
results.append({'url': our_url, 'relevancy_score': our_score})
|
@@ -193,7 +196,7 @@ def analyze_competitors(row, co):
|
|
193 |
# Calculate scores for competitor pages
|
194 |
for url in competitor_urls:
|
195 |
try:
|
196 |
-
|
197 |
content = fetch_content(url)
|
198 |
if not content:
|
199 |
logger.warning(f"No content fetched for competitor URL: {url}")
|
@@ -245,13 +248,13 @@ def show_competitor_analysis(row, co):
|
|
245 |
if our_score == 0:
|
246 |
st.warning("Our page's relevancy score is 0. This might indicate an issue with content fetching or score calculation.")
|
247 |
# Additional debugging information
|
248 |
-
st.write("Debugging Information:")
|
249 |
-
content = fetch_content(row['page'])
|
250 |
-
st.json({
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
})
|
255 |
elif our_rank == 1:
|
256 |
st.success("Your page has the highest relevancy score!")
|
257 |
elif our_rank <= 3:
|
@@ -366,22 +369,6 @@ def fetch_gsc_data(webproperty, search_type, start_date, end_date, dimensions, d
|
|
366 |
show_error(e)
|
367 |
return pd.DataFrame()
|
368 |
|
369 |
-
def calculate_relevance_score(page_content, query, co):
|
370 |
-
logger.info(f"Calculating relevance score for query: {query}")
|
371 |
-
try:
|
372 |
-
if not page_content:
|
373 |
-
logger.warning("Empty page content. Returning score 0.")
|
374 |
-
return 0
|
375 |
-
|
376 |
-
page_embedding = co.embed(texts=[page_content], model='embed-english-v3.0', input_type='search_document').embeddings[0]
|
377 |
-
query_embedding = co.embed(texts=[query], model='embed-english-v3.0', input_type='search_query').embeddings[0]
|
378 |
-
score = cosine_similarity([query_embedding], [page_embedding])[0][0]
|
379 |
-
logger.debug(f"Relevance score calculated: {score}")
|
380 |
-
return score
|
381 |
-
except Exception as e:
|
382 |
-
logger.exception(f"Error calculating relevance score: {str(e)}")
|
383 |
-
st.error(f"Error calculating relevance score: {str(e)}")
|
384 |
-
return 0
|
385 |
|
386 |
def calculate_relevancy_scores(df, model_type):
|
387 |
logging.info("Calculating relevancy scores")
|
@@ -420,10 +407,10 @@ def calc_date_range(selection, custom_start=None, custom_end=None):
|
|
420 |
today = datetime.date.today()
|
421 |
if selection == 'Custom Range':
|
422 |
if custom_start and custom_end:
|
423 |
-
logging.debug(f"Custom date range: {custom_start} to {custom_end}")
|
424 |
return custom_start, custom_end
|
425 |
else:
|
426 |
-
logging.debug("Defaulting custom date range to last 7 days")
|
427 |
return today - datetime.timedelta(days=7), today
|
428 |
date_range = today - datetime.timedelta(days=range_map.get(selection, 0)), today
|
429 |
#logging.debug(f"Date range calculated: {date_range}")
|
|
|
113 |
}
|
114 |
|
115 |
try:
|
116 |
+
#logger.debug(f"Calling Apify Actor with input: {run_input}")
|
117 |
# Run the Actor and wait for it to finish
|
118 |
run = client.actor("nFJndFXA5zjCTuudP").call(run_input=run_input)
|
119 |
logger.info(f"Apify Actor run completed. Run ID: {run.get('id')}")
|
120 |
|
121 |
# Fetch results from the run's dataset
|
122 |
+
|
123 |
+
#logger.debug(f"Fetching results from dataset ID: {run.get('defaultDatasetId')}")
|
124 |
results = list(client.dataset(run["defaultDatasetId"]).iterate_items())
|
125 |
logger.info(f"Fetched {len(results)} results from Apify dataset")
|
126 |
|
|
|
184 |
|
185 |
# Calculate score for our page first
|
186 |
our_content = fetch_content(our_url)
|
187 |
+
print(out_url)
|
188 |
+
print(our_content)
|
189 |
if our_content:
|
190 |
our_score = calculate_relevance_score(our_content, query, co)
|
191 |
results.append({'url': our_url, 'relevancy_score': our_score})
|
|
|
196 |
# Calculate scores for competitor pages
|
197 |
for url in competitor_urls:
|
198 |
try:
|
199 |
+
# logger.debug(f"Processing competitor URL: {url}")
|
200 |
content = fetch_content(url)
|
201 |
if not content:
|
202 |
logger.warning(f"No content fetched for competitor URL: {url}")
|
|
|
248 |
if our_score == 0:
|
249 |
st.warning("Our page's relevancy score is 0. This might indicate an issue with content fetching or score calculation.")
|
250 |
# Additional debugging information
|
251 |
+
# st.write("Debugging Information:")
|
252 |
+
# content = fetch_content(row['page'])
|
253 |
+
# st.json({
|
254 |
+
# "content_length": len(content),
|
255 |
+
# "content_preview": content[:500] if content else "No content fetched",
|
256 |
+
# "query": row['query']
|
257 |
+
# })
|
258 |
elif our_rank == 1:
|
259 |
st.success("Your page has the highest relevancy score!")
|
260 |
elif our_rank <= 3:
|
|
|
369 |
show_error(e)
|
370 |
return pd.DataFrame()
|
371 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
372 |
|
373 |
def calculate_relevancy_scores(df, model_type):
|
374 |
logging.info("Calculating relevancy scores")
|
|
|
407 |
today = datetime.date.today()
|
408 |
if selection == 'Custom Range':
|
409 |
if custom_start and custom_end:
|
410 |
+
#logging.debug(f"Custom date range: {custom_start} to {custom_end}")
|
411 |
return custom_start, custom_end
|
412 |
else:
|
413 |
+
#logging.debug("Defaulting custom date range to last 7 days")
|
414 |
return today - datetime.timedelta(days=7), today
|
415 |
date_range = today - datetime.timedelta(days=range_map.get(selection, 0)), today
|
416 |
#logging.debug(f"Date range calculated: {date_range}")
|