Spaces:
Sleeping
Sleeping
poemsforaphrodite
commited on
Commit
•
f52f788
1
Parent(s):
bb2fff1
Update app.py
Browse files
app.py
CHANGED
@@ -1,7 +1,11 @@
|
|
|
|
1 |
import datetime
|
2 |
import base64
|
3 |
import os
|
|
|
|
|
4 |
import streamlit as st
|
|
|
5 |
from google_auth_oauthlib.flow import Flow
|
6 |
from googleapiclient.discovery import build
|
7 |
from dotenv import load_dotenv
|
@@ -18,25 +22,38 @@ load_dotenv()
|
|
18 |
COHERE_API_KEY = os.environ["COHERE_API_KEY"]
|
19 |
co = cohere.Client(COHERE_API_KEY)
|
20 |
|
|
|
|
|
|
|
21 |
# Constants
|
22 |
SEARCH_TYPES = ["web", "image", "video", "news", "discover", "googleNews"]
|
23 |
DATE_RANGE_OPTIONS = [
|
24 |
-
"Last 7 Days",
|
25 |
-
"Last
|
|
|
|
|
|
|
|
|
|
|
26 |
]
|
27 |
DEVICE_OPTIONS = ["All Devices", "desktop", "mobile", "tablet"]
|
28 |
BASE_DIMENSIONS = ["page", "query", "country", "date"]
|
29 |
MAX_ROWS = 250_000
|
30 |
DF_PREVIEW_ROWS = 100
|
31 |
|
|
|
32 |
# Streamlit App Configuration
|
|
|
|
|
33 |
def setup_streamlit():
|
34 |
-
st.set_page_config(page_title="✨ Simple Google Search Console Data", layout="wide")
|
35 |
-
st.title("✨ Simple Google Search Console Data")
|
|
|
36 |
st.markdown(
|
37 |
"""
|
38 |
-
|
39 |
-
|
|
|
40 |
""",
|
41 |
unsafe_allow_html=True
|
42 |
)
|
@@ -62,7 +79,10 @@ def init_session_state():
|
|
62 |
if 'custom_end_date' not in st.session_state:
|
63 |
st.session_state.custom_end_date = datetime.date.today()
|
64 |
|
|
|
65 |
# Data Processing Functions
|
|
|
|
|
66 |
def fetch_content(url):
|
67 |
try:
|
68 |
response = requests.get(url)
|
@@ -104,7 +124,10 @@ def process_gsc_data(df):
|
|
104 |
result = df_unique[['page', 'query', 'clicks', 'impressions', 'ctr', 'position', 'relevancy_score']]
|
105 |
return result
|
106 |
|
|
|
107 |
# Google Authentication Functions
|
|
|
|
|
108 |
def load_config():
|
109 |
client_config = {
|
110 |
"web": {
|
@@ -112,7 +135,7 @@ def load_config():
|
|
112 |
"client_secret": os.environ["CLIENT_SECRET"],
|
113 |
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
114 |
"token_uri": "https://oauth2.googleapis.com/token",
|
115 |
-
"redirect_uris": ["https://
|
116 |
}
|
117 |
}
|
118 |
return client_config
|
@@ -143,7 +166,10 @@ def auth_search_console(client_config, credentials):
|
|
143 |
}
|
144 |
return searchconsole.authenticate(client_config=client_config, credentials=token)
|
145 |
|
|
|
146 |
# Data Fetching Functions
|
|
|
|
|
147 |
def list_gsc_properties(credentials):
|
148 |
service = build('webmasters', 'v3', credentials=credentials)
|
149 |
site_list = service.sites().list().execute()
|
@@ -168,7 +194,10 @@ def fetch_data_loading(webproperty, search_type, start_date, end_date, dimension
|
|
168 |
processed_df = process_gsc_data(df)
|
169 |
return processed_df
|
170 |
|
|
|
171 |
# Utility Functions
|
|
|
|
|
172 |
def update_dimensions(selected_search_type):
|
173 |
return BASE_DIMENSIONS + ['device'] if selected_search_type in SEARCH_TYPES else BASE_DIMENSIONS
|
174 |
|
@@ -195,10 +224,14 @@ def show_error(e):
|
|
195 |
def property_change():
|
196 |
st.session_state.selected_property = st.session_state['selected_property_selector']
|
197 |
|
198 |
-
|
199 |
-
return f'<a href="{val}" target="_blank">{val}</a>'
|
200 |
-
|
201 |
# File & Download Operations
|
|
|
|
|
|
|
|
|
|
|
|
|
202 |
def download_csv_link(report):
|
203 |
def to_csv(df):
|
204 |
return df.to_csv(index=False, encoding='utf-8-sig')
|
@@ -207,44 +240,55 @@ def download_csv_link(report):
|
|
207 |
href = f'<a href="data:file/csv;base64,{b64_csv}" download="search_console_data.csv">Download CSV File</a>'
|
208 |
st.markdown(href, unsafe_allow_html=True)
|
209 |
|
|
|
210 |
# Streamlit UI Components
|
|
|
|
|
211 |
def show_google_sign_in(auth_url):
|
212 |
-
|
213 |
-
|
|
|
|
|
214 |
|
215 |
def show_property_selector(properties, account):
|
216 |
-
|
217 |
-
|
218 |
-
default = properties[0]
|
219 |
-
webproperty = st.selectbox(
|
220 |
-
"Select Web Property",
|
221 |
properties,
|
222 |
-
index=properties.index(
|
|
|
223 |
key='selected_property_selector',
|
224 |
on_change=property_change
|
225 |
)
|
226 |
-
return account[
|
227 |
|
228 |
def show_search_type_selector():
|
229 |
-
|
230 |
-
|
|
|
|
|
|
|
|
|
231 |
|
232 |
def show_date_range_selector():
|
233 |
-
|
234 |
-
|
|
|
|
|
|
|
|
|
235 |
|
236 |
def show_custom_date_inputs():
|
237 |
st.session_state.custom_start_date = st.date_input("Start Date", st.session_state.custom_start_date)
|
238 |
st.session_state.custom_end_date = st.date_input("End Date", st.session_state.custom_end_date)
|
239 |
|
240 |
-
def show_dimensions_selector(
|
241 |
-
available_dimensions = update_dimensions(
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
|
249 |
def show_paginated_dataframe(report, rows_per_page=20):
|
250 |
total_rows = len(report)
|
@@ -253,7 +297,7 @@ def show_paginated_dataframe(report, rows_per_page=20):
|
|
253 |
if 'current_page' not in st.session_state:
|
254 |
st.session_state.current_page = 1
|
255 |
|
256 |
-
col1, col2, col3 = st.columns([1,
|
257 |
with col1:
|
258 |
if st.button("Previous", disabled=st.session_state.current_page == 1):
|
259 |
st.session_state.current_page -= 1
|
@@ -265,18 +309,16 @@ def show_paginated_dataframe(report, rows_per_page=20):
|
|
265 |
|
266 |
start_idx = (st.session_state.current_page - 1) * rows_per_page
|
267 |
end_idx = start_idx + rows_per_page
|
|
|
268 |
|
269 |
-
|
270 |
-
paginated_df['position'] = paginated_df['position'].round(0).astype(int)
|
271 |
-
paginated_df['page'] = paginated_df['page'].apply(make_clickable)
|
272 |
-
|
273 |
-
st.write(paginated_df.to_html(escape=False, index=False), unsafe_allow_html=True)
|
274 |
-
|
275 |
# Main Streamlit App Function
|
|
|
|
|
276 |
def main():
|
277 |
setup_streamlit()
|
278 |
client_config = load_config()
|
279 |
-
|
280 |
if 'auth_flow' not in st.session_state or 'auth_url' not in st.session_state:
|
281 |
st.session_state.auth_flow, st.session_state.auth_url = google_auth(client_config)
|
282 |
|
@@ -320,5 +362,6 @@ def main():
|
|
320 |
elif st.session_state.report_data is not None:
|
321 |
st.warning("No data found for the selected criteria.")
|
322 |
|
|
|
323 |
if __name__ == "__main__":
|
324 |
-
main()
|
|
|
1 |
+
# Standard library imports
|
2 |
import datetime
|
3 |
import base64
|
4 |
import os
|
5 |
+
|
6 |
+
# Related third-party imports
|
7 |
import streamlit as st
|
8 |
+
from streamlit_elements import elements
|
9 |
from google_auth_oauthlib.flow import Flow
|
10 |
from googleapiclient.discovery import build
|
11 |
from dotenv import load_dotenv
|
|
|
22 |
COHERE_API_KEY = os.environ["COHERE_API_KEY"]
|
23 |
co = cohere.Client(COHERE_API_KEY)
|
24 |
|
25 |
+
# Configuration: Set to True if running locally, False if running on Streamlit Cloud
|
26 |
+
IS_LOCAL = False
|
27 |
+
|
28 |
# Constants
|
29 |
SEARCH_TYPES = ["web", "image", "video", "news", "discover", "googleNews"]
|
30 |
DATE_RANGE_OPTIONS = [
|
31 |
+
"Last 7 Days",
|
32 |
+
"Last 30 Days",
|
33 |
+
"Last 3 Months",
|
34 |
+
"Last 6 Months",
|
35 |
+
"Last 12 Months",
|
36 |
+
"Last 16 Months",
|
37 |
+
"Custom Range"
|
38 |
]
|
39 |
DEVICE_OPTIONS = ["All Devices", "desktop", "mobile", "tablet"]
|
40 |
BASE_DIMENSIONS = ["page", "query", "country", "date"]
|
41 |
MAX_ROWS = 250_000
|
42 |
DF_PREVIEW_ROWS = 100
|
43 |
|
44 |
+
# -------------
|
45 |
# Streamlit App Configuration
|
46 |
+
# -------------
|
47 |
+
|
48 |
def setup_streamlit():
|
49 |
+
st.set_page_config(page_title="✨ Simple Google Search Console Data | LeeFoot.co.uk", layout="wide")
|
50 |
+
st.title("✨ Simple Google Search Console Data | June 2024")
|
51 |
+
st.markdown(f"### Lightweight GSC Data Extractor. (Max {MAX_ROWS:,} Rows)")
|
52 |
st.markdown(
|
53 |
"""
|
54 |
+
<p>
|
55 |
+
Created by <a href="https://twitter.com/LeeFootSEO" target="_blank">LeeFootSEO</a> |
|
56 |
+
<a href="https://leefoot.co.uk" target="_blank">More Apps & Scripts on my Website</a>
|
57 |
""",
|
58 |
unsafe_allow_html=True
|
59 |
)
|
|
|
79 |
if 'custom_end_date' not in st.session_state:
|
80 |
st.session_state.custom_end_date = datetime.date.today()
|
81 |
|
82 |
+
# -------------
|
83 |
# Data Processing Functions
|
84 |
+
# -------------
|
85 |
+
|
86 |
def fetch_content(url):
|
87 |
try:
|
88 |
response = requests.get(url)
|
|
|
124 |
result = df_unique[['page', 'query', 'clicks', 'impressions', 'ctr', 'position', 'relevancy_score']]
|
125 |
return result
|
126 |
|
127 |
+
# -------------
|
128 |
# Google Authentication Functions
|
129 |
+
# -------------
|
130 |
+
|
131 |
def load_config():
|
132 |
client_config = {
|
133 |
"web": {
|
|
|
135 |
"client_secret": os.environ["CLIENT_SECRET"],
|
136 |
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
137 |
"token_uri": "https://oauth2.googleapis.com/token",
|
138 |
+
"redirect_uris": ["https://poemsforaphrodite-gscpro.hf.space/"],
|
139 |
}
|
140 |
}
|
141 |
return client_config
|
|
|
166 |
}
|
167 |
return searchconsole.authenticate(client_config=client_config, credentials=token)
|
168 |
|
169 |
+
# -------------
|
170 |
# Data Fetching Functions
|
171 |
+
# -------------
|
172 |
+
|
173 |
def list_gsc_properties(credentials):
|
174 |
service = build('webmasters', 'v3', credentials=credentials)
|
175 |
site_list = service.sites().list().execute()
|
|
|
194 |
processed_df = process_gsc_data(df)
|
195 |
return processed_df
|
196 |
|
197 |
+
# -------------
|
198 |
# Utility Functions
|
199 |
+
# -------------
|
200 |
+
|
201 |
def update_dimensions(selected_search_type):
|
202 |
return BASE_DIMENSIONS + ['device'] if selected_search_type in SEARCH_TYPES else BASE_DIMENSIONS
|
203 |
|
|
|
224 |
def property_change():
|
225 |
st.session_state.selected_property = st.session_state['selected_property_selector']
|
226 |
|
227 |
+
# -------------
|
|
|
|
|
228 |
# File & Download Operations
|
229 |
+
# -------------
|
230 |
+
|
231 |
+
def show_dataframe(report):
|
232 |
+
with st.expander("Preview the First 100 Rows (Unique Pages with Top Query)"):
|
233 |
+
st.dataframe(report.head(DF_PREVIEW_ROWS))
|
234 |
+
|
235 |
def download_csv_link(report):
|
236 |
def to_csv(df):
|
237 |
return df.to_csv(index=False, encoding='utf-8-sig')
|
|
|
240 |
href = f'<a href="data:file/csv;base64,{b64_csv}" download="search_console_data.csv">Download CSV File</a>'
|
241 |
st.markdown(href, unsafe_allow_html=True)
|
242 |
|
243 |
+
# -------------
|
244 |
# Streamlit UI Components
|
245 |
+
# -------------
|
246 |
+
|
247 |
def show_google_sign_in(auth_url):
|
248 |
+
with st.sidebar:
|
249 |
+
if st.button("Sign in with Google"):
|
250 |
+
st.write('Please click the link below to sign in:')
|
251 |
+
st.markdown(f'[Google Sign-In]({auth_url})', unsafe_allow_html=True)
|
252 |
|
253 |
def show_property_selector(properties, account):
|
254 |
+
selected_property = st.selectbox(
|
255 |
+
"Select a Search Console Property:",
|
|
|
|
|
|
|
256 |
properties,
|
257 |
+
index=properties.index(
|
258 |
+
st.session_state.selected_property) if st.session_state.selected_property in properties else 0,
|
259 |
key='selected_property_selector',
|
260 |
on_change=property_change
|
261 |
)
|
262 |
+
return account[selected_property]
|
263 |
|
264 |
def show_search_type_selector():
|
265 |
+
return st.selectbox(
|
266 |
+
"Select Search Type:",
|
267 |
+
SEARCH_TYPES,
|
268 |
+
index=SEARCH_TYPES.index(st.session_state.selected_search_type),
|
269 |
+
key='search_type_selector'
|
270 |
+
)
|
271 |
|
272 |
def show_date_range_selector():
|
273 |
+
return st.selectbox(
|
274 |
+
"Select Date Range:",
|
275 |
+
DATE_RANGE_OPTIONS,
|
276 |
+
index=DATE_RANGE_OPTIONS.index(st.session_state.selected_date_range),
|
277 |
+
key='date_range_selector'
|
278 |
+
)
|
279 |
|
280 |
def show_custom_date_inputs():
|
281 |
st.session_state.custom_start_date = st.date_input("Start Date", st.session_state.custom_start_date)
|
282 |
st.session_state.custom_end_date = st.date_input("End Date", st.session_state.custom_end_date)
|
283 |
|
284 |
+
def show_dimensions_selector(search_type):
|
285 |
+
available_dimensions = update_dimensions(search_type)
|
286 |
+
return st.multiselect(
|
287 |
+
"Select Dimensions:",
|
288 |
+
available_dimensions,
|
289 |
+
default=st.session_state.selected_dimensions,
|
290 |
+
key='dimensions_selector'
|
291 |
+
)
|
292 |
|
293 |
def show_paginated_dataframe(report, rows_per_page=20):
|
294 |
total_rows = len(report)
|
|
|
297 |
if 'current_page' not in st.session_state:
|
298 |
st.session_state.current_page = 1
|
299 |
|
300 |
+
col1, col2, col3 = st.columns([1,3,1])
|
301 |
with col1:
|
302 |
if st.button("Previous", disabled=st.session_state.current_page == 1):
|
303 |
st.session_state.current_page -= 1
|
|
|
309 |
|
310 |
start_idx = (st.session_state.current_page - 1) * rows_per_page
|
311 |
end_idx = start_idx + rows_per_page
|
312 |
+
st.dataframe(report.iloc[start_idx:end_idx])
|
313 |
|
314 |
+
# -------------
|
|
|
|
|
|
|
|
|
|
|
315 |
# Main Streamlit App Function
|
316 |
+
# -------------
|
317 |
+
|
318 |
def main():
|
319 |
setup_streamlit()
|
320 |
client_config = load_config()
|
321 |
+
|
322 |
if 'auth_flow' not in st.session_state or 'auth_url' not in st.session_state:
|
323 |
st.session_state.auth_flow, st.session_state.auth_url = google_auth(client_config)
|
324 |
|
|
|
362 |
elif st.session_state.report_data is not None:
|
363 |
st.warning("No data found for the selected criteria.")
|
364 |
|
365 |
+
|
366 |
if __name__ == "__main__":
|
367 |
+
main()
|