poemsforaphrodite commited on
Commit
24aff0c
·
verified ·
1 Parent(s): 9288be8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -19
app.py CHANGED
@@ -24,6 +24,7 @@ logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %
24
 
25
  load_dotenv()
26
  logging.info("Environment variables loaded")
 
27
 
28
  # Initialize Cohere client
29
  APIFY_API_TOKEN = os.environ.get('APIFY_API_TOKEN')
@@ -31,10 +32,13 @@ COHERE_API_KEY = os.environ["COHERE_API_KEY"]
31
  co = cohere.Client(COHERE_API_KEY)
32
  logging.info("Cohere client initialized")
33
  if not APIFY_API_TOKEN:
 
34
  st.error("APIFY_API_TOKEN is not set in the environment variables. Please set it and restart the application.")
35
 
36
  # Initialize the ApifyClient with the API token
37
  client = ApifyClient(APIFY_API_TOKEN)
 
 
38
 
39
  # Configuration: Set to True if running locally, False if running on Streamlit Cloud
40
  IS_LOCAL = False
@@ -91,7 +95,9 @@ def init_session_state():
91
  # -------------
92
 
93
  def get_serp_results(query):
 
94
  if not APIFY_API_TOKEN:
 
95
  st.error("Apify API token is not set. Unable to fetch SERP results.")
96
  return []
97
 
@@ -108,56 +114,89 @@ def get_serp_results(query):
108
  }
109
 
110
  try:
 
111
  # Run the Actor and wait for it to finish
112
  run = client.actor("nFJndFXA5zjCTuudP").call(run_input=run_input)
 
113
 
114
  # Fetch results from the run's dataset
 
115
  results = list(client.dataset(run["defaultDatasetId"]).iterate_items())
 
116
 
117
  if results and 'organicResults' in results[0]:
118
- return [item['url'] for item in results[0]['organicResults']]
 
 
119
  else:
 
120
  st.warning("No organic results found in the SERP data.")
121
  return []
122
  except Exception as e:
 
123
  st.error(f"Error fetching SERP results: {str(e)}")
124
  return []
125
 
 
 
 
126
  def fetch_content(url):
 
127
  try:
128
  response = requests.get(url)
129
  response.raise_for_status()
130
  soup = BeautifulSoup(response.text, 'html.parser')
131
- return soup.get_text(separator=' ', strip=True)
 
 
132
  except requests.RequestException as e:
 
133
  st.warning(f"Error fetching content from {url}: {e}")
134
  return ""
135
 
136
- def generate_embeddings(text_list, model_type):
137
- logging.debug(f"Generating embeddings for model type: {model_type}")
138
- if not text_list:
139
- logging.warning("Text list is empty, returning empty embeddings")
140
- return []
141
- model = 'embed-english-v3.0' if model_type == 'english' else 'embed-multilingual-v3.0'
142
- input_type = 'search_document'
143
- response = co.embed(model=model, texts=text_list, input_type=input_type)
144
- embeddings = response.embeddings
145
- logging.debug(f"Embeddings generated successfully for model type: {model_type}")
146
- return embeddings
147
-
148
  def calculate_relevance_score(page_content, query, co):
149
- page_embedding = co.embed(texts=[page_content], model='embed-english-v3.0', input_type='search_document').embeddings[0]
150
- query_embedding = co.embed(texts=[query], model='embed-english-v3.0', input_type='search_query').embeddings[0]
151
- return cosine_similarity([query_embedding], [page_embedding])[0][0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
153
  def show_competitor_analysis(row, co):
154
  if st.button("Check Competitors", key=f"comp_{row['page']}"):
 
155
  with st.spinner('Analyzing competitors...'):
156
  results_df = analyze_competitors(row, co)
157
  st.write("Relevancy Score Comparison:")
158
  st.dataframe(results_df)
159
 
160
  our_rank = results_df.index[results_df['url'] == row['page']].tolist()[0] + 1
 
161
  st.write(f"Our page ranks {our_rank} out of {len(results_df)} in terms of relevancy score.")
162
 
163
 
@@ -523,9 +562,11 @@ def main():
523
  st.write("Data fetched successfully. Click the button below to calculate relevancy scores.")
524
 
525
  if st.button("Calculate Relevancy Scores"):
 
526
  st.session_state.report_data = calculate_relevancy_scores(st.session_state.report_data, model_type)
527
 
528
  for index, row in st.session_state.report_data.iterrows():
 
529
  st.write(f"Query: {row['query']}")
530
  st.write(f"Page: {row['page']}")
531
  st.write(f"Relevancy Score: {row['relevancy_score']:.4f}")
@@ -534,10 +575,11 @@ def main():
534
 
535
  download_csv_link(st.session_state.report_data)
536
  elif st.session_state.report_data is not None:
 
537
  st.warning("No data found for the selected criteria.")
538
- logging.warning("No data found for the selected criteria")
539
 
540
 
541
  if __name__ == "__main__":
542
  logging.info("Running main function")
543
- main()
 
 
24
 
25
  load_dotenv()
26
  logging.info("Environment variables loaded")
27
+ logger = logging.getLogger(__name__)
28
 
29
  # Initialize Cohere client
30
  APIFY_API_TOKEN = os.environ.get('APIFY_API_TOKEN')
 
32
  co = cohere.Client(COHERE_API_KEY)
33
  logging.info("Cohere client initialized")
34
  if not APIFY_API_TOKEN:
35
+ logger.error("APIFY_API_TOKEN is not set in the environment variables.")
36
  st.error("APIFY_API_TOKEN is not set in the environment variables. Please set it and restart the application.")
37
 
38
  # Initialize the ApifyClient with the API token
39
  client = ApifyClient(APIFY_API_TOKEN)
40
+ # Initialize the ApifyClient with the API token
41
+ logger.info("ApifyClient initialized")
42
 
43
  # Configuration: Set to True if running locally, False if running on Streamlit Cloud
44
  IS_LOCAL = False
 
95
  # -------------
96
 
97
  def get_serp_results(query):
98
+ logger.info(f"Getting SERP results for query: {query}")
99
  if not APIFY_API_TOKEN:
100
+ logger.error("Apify API token is not set. Unable to fetch SERP results.")
101
  st.error("Apify API token is not set. Unable to fetch SERP results.")
102
  return []
103
 
 
114
  }
115
 
116
  try:
117
+ logger.debug(f"Calling Apify Actor with input: {run_input}")
118
  # Run the Actor and wait for it to finish
119
  run = client.actor("nFJndFXA5zjCTuudP").call(run_input=run_input)
120
+ logger.info(f"Apify Actor run completed. Run ID: {run.get('id')}")
121
 
122
  # Fetch results from the run's dataset
123
+ logger.debug(f"Fetching results from dataset ID: {run.get('defaultDatasetId')}")
124
  results = list(client.dataset(run["defaultDatasetId"]).iterate_items())
125
+ logger.info(f"Fetched {len(results)} results from Apify dataset")
126
 
127
  if results and 'organicResults' in results[0]:
128
+ urls = [item['url'] for item in results[0]['organicResults']]
129
+ logger.info(f"Extracted {len(urls)} URLs from organic results")
130
+ return urls
131
  else:
132
+ logger.warning("No organic results found in the SERP data.")
133
  st.warning("No organic results found in the SERP data.")
134
  return []
135
  except Exception as e:
136
+ logger.exception(f"Error fetching SERP results: {str(e)}")
137
  st.error(f"Error fetching SERP results: {str(e)}")
138
  return []
139
 
140
+
141
+
142
+
143
  def fetch_content(url):
144
+ logger.info(f"Fetching content from URL: {url}")
145
  try:
146
  response = requests.get(url)
147
  response.raise_for_status()
148
  soup = BeautifulSoup(response.text, 'html.parser')
149
+ content = soup.get_text(separator=' ', strip=True)
150
+ logger.debug(f"Fetched {len(content)} characters from {url}")
151
+ return content
152
  except requests.RequestException as e:
153
+ logger.error(f"Error fetching content from {url}: {e}")
154
  st.warning(f"Error fetching content from {url}: {e}")
155
  return ""
156
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  def calculate_relevance_score(page_content, query, co):
158
+ logger.info(f"Calculating relevance score for query: {query}")
159
+ try:
160
+ page_embedding = co.embed(texts=[page_content], model='embed-english-v3.0', input_type='search_document').embeddings[0]
161
+ query_embedding = co.embed(texts=[query], model='embed-english-v3.0', input_type='search_query').embeddings[0]
162
+ score = cosine_similarity([query_embedding], [page_embedding])[0][0]
163
+ logger.debug(f"Relevance score calculated: {score}")
164
+ return score
165
+ except Exception as e:
166
+ logger.exception(f"Error calculating relevance score: {str(e)}")
167
+ st.error(f"Error calculating relevance score: {str(e)}")
168
+ return 0
169
+
170
+ def analyze_competitors(row, co):
171
+ logger.info(f"Analyzing competitors for query: {row['query']}")
172
+ query = row['query']
173
+ our_url = row['page']
174
+ our_score = row['relevancy_score']
175
+
176
+ competitor_urls = get_serp_results(query)
177
+
178
+ results = []
179
+ for url in competitor_urls:
180
+ content = fetch_content(url)
181
+ score = calculate_relevance_score(content, query, co)
182
+ results.append({'url': url, 'relevancy_score': score})
183
+
184
+ results.append({'url': our_url, 'relevancy_score': our_score})
185
+ results_df = pd.DataFrame(results).sort_values('relevancy_score', ascending=False)
186
+
187
+ logger.info(f"Competitor analysis completed. {len(results)} results obtained.")
188
+ return results_df
189
 
190
  def show_competitor_analysis(row, co):
191
  if st.button("Check Competitors", key=f"comp_{row['page']}"):
192
+ logger.info(f"Competitor analysis requested for page: {row['page']}")
193
  with st.spinner('Analyzing competitors...'):
194
  results_df = analyze_competitors(row, co)
195
  st.write("Relevancy Score Comparison:")
196
  st.dataframe(results_df)
197
 
198
  our_rank = results_df.index[results_df['url'] == row['page']].tolist()[0] + 1
199
+ logger.info(f"Our page ranks {our_rank} out of {len(results_df)} in terms of relevancy score.")
200
  st.write(f"Our page ranks {our_rank} out of {len(results_df)} in terms of relevancy score.")
201
 
202
 
 
562
  st.write("Data fetched successfully. Click the button below to calculate relevancy scores.")
563
 
564
  if st.button("Calculate Relevancy Scores"):
565
+ logger.info("Calculating relevancy scores for all rows")
566
  st.session_state.report_data = calculate_relevancy_scores(st.session_state.report_data, model_type)
567
 
568
  for index, row in st.session_state.report_data.iterrows():
569
+ logger.debug(f"Processing row {index}: Query: {row['query']}, Page: {row['page']}")
570
  st.write(f"Query: {row['query']}")
571
  st.write(f"Page: {row['page']}")
572
  st.write(f"Relevancy Score: {row['relevancy_score']:.4f}")
 
575
 
576
  download_csv_link(st.session_state.report_data)
577
  elif st.session_state.report_data is not None:
578
+ logger.warning("No data found for the selected criteria.")
579
  st.warning("No data found for the selected criteria.")
 
580
 
581
 
582
  if __name__ == "__main__":
583
  logging.info("Running main function")
584
+ main()
585
+ logger.info("Script completed")