import gradio as gr import pandas as pd import requests from bs4 import BeautifulSoup import re import io def get_scroll_depth_text_mapping(url): response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') data = [] elements = soup.select('p, table') total_height = len(soup.text) # 簡易的な全体の高さの推定 for i, element in enumerate(elements): text = element.text.strip().replace('\n', '').replace(',', '') if text: scroll_percent = (i / len(elements)) * 100 data.append((scroll_percent, text)) return data, total_height def load_click_data(csv_content): return pd.read_csv(io.StringIO(csv_content), skiprows=9) def load_exit_rate_data(csv_content): df = pd.read_csv(io.StringIO(csv_content), skiprows=9) exit_rate_data = df[['スクロールの奥行き', '% 人以上の訪問者']] exit_rate_data['スクロールの奥行き'] = exit_rate_data['スクロールの奥行き'].astype(int) exit_rate_data['離脱率'] = 100 - exit_rate_data['% 人以上の訪問者'].astype(float) return exit_rate_data[['スクロールの奥行き', '離脱率']] def integrate_data(scroll_depth_text, click_data, exit_rate_data, total_height): integrated_data = [] for scroll_percent, text in scroll_depth_text: clicks = 0 click_rate = 0 exit_rate = 0 for index, row in click_data.iterrows(): element_clicks = int(row['クリック:']) element_click_rate = float(row[' % のクリック'].strip('%')) # 簡易的なマッチング(実際のブラウザでの位置取得はできないため) if abs(scroll_percent - index / len(click_data) * 100) < 5: clicks += element_clicks click_rate += element_click_rate scroll_depth = int(scroll_percent // 5 * 5) exit_rate_row = exit_rate_data[exit_rate_data['スクロールの奥行き'] == scroll_depth] if not exit_rate_row.empty: exit_rate = exit_rate_row['離脱率'].values[0] integrated_data.append({ 'スクロール深度': f"{scroll_percent:.2f}%", 'テキスト': text, 'クリック数': clicks, 'クリック率': click_rate, '離脱率': exit_rate }) return pd.DataFrame(integrated_data) def process_data(url, click_csv, exit_rate_csv): scroll_depth_text, total_height = get_scroll_depth_text_mapping(url) click_data = load_click_data(click_csv) exit_rate_data = load_exit_rate_data(exit_rate_csv) integrated_data = integrate_data(scroll_depth_text, click_data, exit_rate_data, total_height) output_csv = integrated_data.to_csv(index=False) return output_csv iface = gr.Interface( fn=process_data, inputs=[ gr.Textbox(label="URL"), gr.File(label="Click CSV"), gr.File(label="Exit Rate CSV") ], outputs=gr.File(label="Integrated Data CSV"), title="Web Page Data Integration", description="Integrate scroll depth, text, click, and exit rate data from a web page." ) iface.launch()