nekonekokawaii's picture
Update app.py
d6851a9 verified
import gradio as gr
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
import io
def get_scroll_depth_text_mapping(url):
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
data = []
elements = soup.select('p, table')
total_height = len(soup.text) # 簡易的な全体の高さの推定
for i, element in enumerate(elements):
text = element.text.strip().replace('\n', '').replace(',', '')
if text:
scroll_percent = (i / len(elements)) * 100
data.append((scroll_percent, text))
return data, total_height
def load_click_data(csv_content):
return pd.read_csv(io.StringIO(csv_content), skiprows=9)
def load_exit_rate_data(csv_content):
df = pd.read_csv(io.StringIO(csv_content), skiprows=9)
exit_rate_data = df[['スクロールの奥行き', '% 人以上の訪問者']]
exit_rate_data['スクロールの奥行き'] = exit_rate_data['スクロールの奥行き'].astype(int)
exit_rate_data['離脱率'] = 100 - exit_rate_data['% 人以上の訪問者'].astype(float)
return exit_rate_data[['スクロールの奥行き', '離脱率']]
def integrate_data(scroll_depth_text, click_data, exit_rate_data, total_height):
integrated_data = []
for scroll_percent, text in scroll_depth_text:
clicks = 0
click_rate = 0
exit_rate = 0
for index, row in click_data.iterrows():
element_clicks = int(row['クリック:'])
element_click_rate = float(row[' % のクリック'].strip('%'))
# 簡易的なマッチング(実際のブラウザでの位置取得はできないため)
if abs(scroll_percent - index / len(click_data) * 100) < 5:
clicks += element_clicks
click_rate += element_click_rate
scroll_depth = int(scroll_percent // 5 * 5)
exit_rate_row = exit_rate_data[exit_rate_data['スクロールの奥行き'] == scroll_depth]
if not exit_rate_row.empty:
exit_rate = exit_rate_row['離脱率'].values[0]
integrated_data.append({
'スクロール深度': f"{scroll_percent:.2f}%",
'テキスト': text,
'クリック数': clicks,
'クリック率': click_rate,
'離脱率': exit_rate
})
return pd.DataFrame(integrated_data)
def process_data(url, click_csv, exit_rate_csv):
scroll_depth_text, total_height = get_scroll_depth_text_mapping(url)
click_data = load_click_data(click_csv)
exit_rate_data = load_exit_rate_data(exit_rate_csv)
integrated_data = integrate_data(scroll_depth_text, click_data, exit_rate_data, total_height)
output_csv = integrated_data.to_csv(index=False)
return output_csv
iface = gr.Interface(
fn=process_data,
inputs=[
gr.Textbox(label="URL"),
gr.File(label="Click CSV"),
gr.File(label="Exit Rate CSV")
],
outputs=gr.File(label="Integrated Data CSV"),
title="Web Page Data Integration",
description="Integrate scroll depth, text, click, and exit rate data from a web page."
)
iface.launch()