Spaces:
Runtime error
Runtime error
import gradio as gr | |
import pandas as pd | |
import requests | |
from bs4 import BeautifulSoup | |
import re | |
import io | |
def get_scroll_depth_text_mapping(url): | |
response = requests.get(url) | |
soup = BeautifulSoup(response.text, 'html.parser') | |
data = [] | |
elements = soup.select('p, table') | |
total_height = len(soup.text) # 簡易的な全体の高さの推定 | |
for i, element in enumerate(elements): | |
text = element.text.strip().replace('\n', '').replace(',', '') | |
if text: | |
scroll_percent = (i / len(elements)) * 100 | |
data.append((scroll_percent, text)) | |
return data, total_height | |
def load_click_data(csv_content): | |
return pd.read_csv(io.StringIO(csv_content), skiprows=9) | |
def load_exit_rate_data(csv_content): | |
df = pd.read_csv(io.StringIO(csv_content), skiprows=9) | |
exit_rate_data = df[['スクロールの奥行き', '% 人以上の訪問者']] | |
exit_rate_data['スクロールの奥行き'] = exit_rate_data['スクロールの奥行き'].astype(int) | |
exit_rate_data['離脱率'] = 100 - exit_rate_data['% 人以上の訪問者'].astype(float) | |
return exit_rate_data[['スクロールの奥行き', '離脱率']] | |
def integrate_data(scroll_depth_text, click_data, exit_rate_data, total_height): | |
integrated_data = [] | |
for scroll_percent, text in scroll_depth_text: | |
clicks = 0 | |
click_rate = 0 | |
exit_rate = 0 | |
for index, row in click_data.iterrows(): | |
element_clicks = int(row['クリック:']) | |
element_click_rate = float(row[' % のクリック'].strip('%')) | |
# 簡易的なマッチング(実際のブラウザでの位置取得はできないため) | |
if abs(scroll_percent - index / len(click_data) * 100) < 5: | |
clicks += element_clicks | |
click_rate += element_click_rate | |
scroll_depth = int(scroll_percent // 5 * 5) | |
exit_rate_row = exit_rate_data[exit_rate_data['スクロールの奥行き'] == scroll_depth] | |
if not exit_rate_row.empty: | |
exit_rate = exit_rate_row['離脱率'].values[0] | |
integrated_data.append({ | |
'スクロール深度': f"{scroll_percent:.2f}%", | |
'テキスト': text, | |
'クリック数': clicks, | |
'クリック率': click_rate, | |
'離脱率': exit_rate | |
}) | |
return pd.DataFrame(integrated_data) | |
def process_data(url, click_csv, exit_rate_csv): | |
scroll_depth_text, total_height = get_scroll_depth_text_mapping(url) | |
click_data = load_click_data(click_csv) | |
exit_rate_data = load_exit_rate_data(exit_rate_csv) | |
integrated_data = integrate_data(scroll_depth_text, click_data, exit_rate_data, total_height) | |
output_csv = integrated_data.to_csv(index=False) | |
return output_csv | |
iface = gr.Interface( | |
fn=process_data, | |
inputs=[ | |
gr.Textbox(label="URL"), | |
gr.File(label="Click CSV"), | |
gr.File(label="Exit Rate CSV") | |
], | |
outputs=gr.File(label="Integrated Data CSV"), | |
title="Web Page Data Integration", | |
description="Integrate scroll depth, text, click, and exit rate data from a web page." | |
) | |
iface.launch() |