Spaces:
Sleeping
Sleeping
Refactor to simple Gradio app for HF Space
Browse files- README.md +16 -54
- app.py +156 -0
- app/api/source-breakdown/route.ts +0 -122
- app/api/stats/route.ts +0 -150
- app/globals.css +0 -27
- app/layout.tsx +0 -19
- app/page.module.css +0 -68
- app/page.tsx +0 -96
- components/Charts.module.css +0 -40
- components/Charts.tsx +0 -116
- next.config.js +0 -10
- package.json +0 -25
- requirements.txt +4 -0
- tsconfig.json +0 -27
- vercel.json +0 -10
README.md
CHANGED
|
@@ -1,54 +1,16 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
-
|
| 14 |
-
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
1. Install dependencies:
|
| 19 |
-
```bash
|
| 20 |
-
npm install
|
| 21 |
-
```
|
| 22 |
-
|
| 23 |
-
2. Run the development server:
|
| 24 |
-
```bash
|
| 25 |
-
npm run dev
|
| 26 |
-
```
|
| 27 |
-
|
| 28 |
-
3. Open [http://localhost:3000](http://localhost:3000) in your browser
|
| 29 |
-
|
| 30 |
-
## Deployment to Vercel
|
| 31 |
-
|
| 32 |
-
1. Push your code to GitHub
|
| 33 |
-
2. Import the project in Vercel
|
| 34 |
-
3. Deploy (no environment variables needed for basic functionality)
|
| 35 |
-
|
| 36 |
-
The app will automatically:
|
| 37 |
-
- Cache aggregated statistics to avoid reprocessing
|
| 38 |
-
- Handle timeouts gracefully by processing smaller samples
|
| 39 |
-
- Display loading states and error messages
|
| 40 |
-
|
| 41 |
-
## Technical Details
|
| 42 |
-
|
| 43 |
-
- **Framework**: Next.js 14 with TypeScript
|
| 44 |
-
- **Charts**: Recharts
|
| 45 |
-
- **Data Source**: Hugging Face Datasets Server API
|
| 46 |
-
- **Caching**: In-memory cache (1 hour TTL)
|
| 47 |
-
- **Timeout Management**: 25-second timeout with fallback to smaller samples
|
| 48 |
-
|
| 49 |
-
## Notes
|
| 50 |
-
|
| 51 |
-
- Due to Vercel's serverless function timeout limits (10s free, 50s pro), the app processes a sample of the dataset (10,000-50,000 rows) rather than the full 3.5M rows
|
| 52 |
-
- Results are cached for 1 hour to improve performance
|
| 53 |
-
- The dataset is accessed via Hugging Face's Datasets Server API
|
| 54 |
-
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: ShareLM Dataset Analysis
|
| 3 |
+
emoji: 📊
|
| 4 |
+
colorFrom: purple
|
| 5 |
+
colorTo: pink
|
| 6 |
+
sdk: gradio
|
| 7 |
+
pinned: false
|
| 8 |
+
---
|
| 9 |
+
|
| 10 |
+
# ShareLM Dataset Analysis
|
| 11 |
+
|
| 12 |
+
Interactive dashboard for analyzing the ShareLM Hugging Face dataset with visualizations of:
|
| 13 |
+
- Source breakdown (pie chart)
|
| 14 |
+
- Time series of conversations over time
|
| 15 |
+
|
| 16 |
+
The app fetches data from the ShareLM dataset and displays interactive charts using Plotly.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import plotly.graph_objects as go
|
| 3 |
+
import plotly.express as px
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
import requests
|
| 6 |
+
from collections import defaultdict
|
| 7 |
+
|
| 8 |
+
HF_DATASET_API = 'https://datasets-server.huggingface.co/rows'
|
| 9 |
+
DATASET_NAME = 'shachardon/ShareLM'
|
| 10 |
+
|
| 11 |
+
def fetch_dataset_sample(max_rows=500):
|
| 12 |
+
"""Fetch a sample from the Hugging Face dataset"""
|
| 13 |
+
MAX_BATCH_SIZE = 100
|
| 14 |
+
batches = min((max_rows + MAX_BATCH_SIZE - 1) // MAX_BATCH_SIZE, 10)
|
| 15 |
+
all_rows = []
|
| 16 |
+
|
| 17 |
+
for i in range(batches):
|
| 18 |
+
offset = i * MAX_BATCH_SIZE
|
| 19 |
+
length = min(MAX_BATCH_SIZE, max_rows - offset)
|
| 20 |
+
|
| 21 |
+
if length <= 0:
|
| 22 |
+
break
|
| 23 |
+
|
| 24 |
+
url = f"{HF_DATASET_API}?dataset={DATASET_NAME.replace('/', '%2F')}&config=default&split=train&offset={offset}&length={length}"
|
| 25 |
+
|
| 26 |
+
try:
|
| 27 |
+
response = requests.get(url, headers={'Accept': 'application/json'}, timeout=25)
|
| 28 |
+
response.raise_for_status()
|
| 29 |
+
data = response.json()
|
| 30 |
+
|
| 31 |
+
if data.get('rows') and isinstance(data['rows'], list):
|
| 32 |
+
all_rows.extend(data['rows'])
|
| 33 |
+
|
| 34 |
+
# Small delay to avoid rate limiting
|
| 35 |
+
if i < batches - 1:
|
| 36 |
+
import time
|
| 37 |
+
time.sleep(0.1)
|
| 38 |
+
except Exception as e:
|
| 39 |
+
print(f"Error fetching batch {i}: {e}")
|
| 40 |
+
if i == 0:
|
| 41 |
+
raise
|
| 42 |
+
|
| 43 |
+
return all_rows
|
| 44 |
+
|
| 45 |
+
def process_data():
|
| 46 |
+
"""Process dataset and return charts"""
|
| 47 |
+
try:
|
| 48 |
+
rows = fetch_dataset_sample(500)
|
| 49 |
+
|
| 50 |
+
if not rows:
|
| 51 |
+
return None, "No data fetched. Please try again."
|
| 52 |
+
|
| 53 |
+
source_counts = defaultdict(int)
|
| 54 |
+
time_series = defaultdict(int)
|
| 55 |
+
|
| 56 |
+
for row in rows:
|
| 57 |
+
row_data = row.get('row', row) if isinstance(row, dict) else row
|
| 58 |
+
|
| 59 |
+
# Count by source
|
| 60 |
+
source = row_data.get('source', 'unknown')
|
| 61 |
+
source_counts[source] += 1
|
| 62 |
+
|
| 63 |
+
# Count by date
|
| 64 |
+
if 'timestamp' in row_data:
|
| 65 |
+
try:
|
| 66 |
+
date = datetime.fromisoformat(str(row_data['timestamp']).replace('Z', '+00:00'))
|
| 67 |
+
date_key = date.strftime('%Y-%m-%d')
|
| 68 |
+
time_series[date_key] += 1
|
| 69 |
+
except:
|
| 70 |
+
pass
|
| 71 |
+
|
| 72 |
+
# Create source breakdown pie chart
|
| 73 |
+
if source_counts:
|
| 74 |
+
sources = list(source_counts.keys())
|
| 75 |
+
values = list(source_counts.values())
|
| 76 |
+
|
| 77 |
+
fig_pie = go.Figure(data=[go.Pie(
|
| 78 |
+
labels=sources,
|
| 79 |
+
values=values,
|
| 80 |
+
hole=0.4,
|
| 81 |
+
textinfo='label+percent',
|
| 82 |
+
textposition='outside'
|
| 83 |
+
)])
|
| 84 |
+
fig_pie.update_layout(
|
| 85 |
+
title="Source Breakdown",
|
| 86 |
+
height=500,
|
| 87 |
+
showlegend=True
|
| 88 |
+
)
|
| 89 |
+
else:
|
| 90 |
+
fig_pie = None
|
| 91 |
+
|
| 92 |
+
# Create time series chart
|
| 93 |
+
if time_series:
|
| 94 |
+
sorted_dates = sorted(time_series.keys())
|
| 95 |
+
counts = [time_series[date] for date in sorted_dates]
|
| 96 |
+
|
| 97 |
+
fig_line = go.Figure()
|
| 98 |
+
fig_line.add_trace(go.Scatter(
|
| 99 |
+
x=sorted_dates,
|
| 100 |
+
y=counts,
|
| 101 |
+
mode='lines+markers',
|
| 102 |
+
name='Conversations',
|
| 103 |
+
line=dict(width=2)
|
| 104 |
+
))
|
| 105 |
+
fig_line.update_layout(
|
| 106 |
+
title="Total Count Over Time",
|
| 107 |
+
xaxis_title="Date",
|
| 108 |
+
yaxis_title="Count",
|
| 109 |
+
height=500,
|
| 110 |
+
hovermode='x unified'
|
| 111 |
+
)
|
| 112 |
+
else:
|
| 113 |
+
fig_line = None
|
| 114 |
+
|
| 115 |
+
total = sum(source_counts.values())
|
| 116 |
+
info = f"Processed {len(rows)} rows\nTotal conversations: {total:,}\nSources: {len(source_counts)}\nTime points: {len(time_series)}"
|
| 117 |
+
|
| 118 |
+
return (fig_pie, fig_line, info)
|
| 119 |
+
|
| 120 |
+
except Exception as e:
|
| 121 |
+
return (None, None, f"Error: {str(e)}")
|
| 122 |
+
|
| 123 |
+
def create_interface():
|
| 124 |
+
"""Create the Gradio interface"""
|
| 125 |
+
with gr.Blocks(title="ShareLM Dataset Analysis", theme=gr.themes.Soft()) as demo:
|
| 126 |
+
gr.Markdown("# ShareLM Dataset Analysis")
|
| 127 |
+
gr.Markdown("Analyzing conversations from the ShareLM Hugging Face dataset")
|
| 128 |
+
|
| 129 |
+
with gr.Row():
|
| 130 |
+
btn = gr.Button("Load & Analyze Data", variant="primary")
|
| 131 |
+
|
| 132 |
+
with gr.Row():
|
| 133 |
+
with gr.Column():
|
| 134 |
+
pie_chart = gr.Plot(label="Source Breakdown")
|
| 135 |
+
with gr.Column():
|
| 136 |
+
line_chart = gr.Plot(label="Time Series")
|
| 137 |
+
|
| 138 |
+
info_text = gr.Textbox(label="Statistics", lines=4, interactive=False)
|
| 139 |
+
|
| 140 |
+
btn.click(
|
| 141 |
+
fn=process_data,
|
| 142 |
+
outputs=[pie_chart, line_chart, info_text]
|
| 143 |
+
)
|
| 144 |
+
|
| 145 |
+
# Load data on startup
|
| 146 |
+
demo.load(
|
| 147 |
+
fn=process_data,
|
| 148 |
+
outputs=[pie_chart, line_chart, info_text]
|
| 149 |
+
)
|
| 150 |
+
|
| 151 |
+
return demo
|
| 152 |
+
|
| 153 |
+
if __name__ == "__main__":
|
| 154 |
+
demo = create_interface()
|
| 155 |
+
demo.launch()
|
| 156 |
+
|
app/api/source-breakdown/route.ts
DELETED
|
@@ -1,122 +0,0 @@
|
|
| 1 |
-
import { NextResponse } from 'next/server';
|
| 2 |
-
|
| 3 |
-
// In-memory cache
|
| 4 |
-
let cachedBreakdown: {
|
| 5 |
-
data: Array<{ name: string; value: number }>;
|
| 6 |
-
lastUpdated: number;
|
| 7 |
-
} | null = null;
|
| 8 |
-
|
| 9 |
-
const CACHE_TTL = 60 * 60 * 1000; // 1 hour
|
| 10 |
-
|
| 11 |
-
const HF_DATASET_API = 'https://datasets-server.huggingface.co/parquet';
|
| 12 |
-
|
| 13 |
-
async function fetchDatasetSample(maxRows: number = 50000) {
|
| 14 |
-
const url = `${HF_DATASET_API}?dataset=shachardon%2FShareLM&config=default&split=train&offset=0&length=${maxRows}`;
|
| 15 |
-
|
| 16 |
-
try {
|
| 17 |
-
const controller = new AbortController();
|
| 18 |
-
const timeoutId = setTimeout(() => controller.abort(), 25000); // 25 second timeout
|
| 19 |
-
|
| 20 |
-
const response = await fetch(url, {
|
| 21 |
-
headers: {
|
| 22 |
-
'Accept': 'application/json',
|
| 23 |
-
},
|
| 24 |
-
signal: controller.signal,
|
| 25 |
-
});
|
| 26 |
-
|
| 27 |
-
clearTimeout(timeoutId);
|
| 28 |
-
|
| 29 |
-
if (!response.ok) {
|
| 30 |
-
throw new Error(`HTTP error! status: ${response.status}`);
|
| 31 |
-
}
|
| 32 |
-
|
| 33 |
-
const data = await response.json();
|
| 34 |
-
return data;
|
| 35 |
-
} catch (error) {
|
| 36 |
-
if (error instanceof Error && error.name === 'AbortError') {
|
| 37 |
-
throw new Error('Request timeout - dataset is too large to process in time limit');
|
| 38 |
-
}
|
| 39 |
-
console.error('Error fetching from HF API:', error);
|
| 40 |
-
throw error;
|
| 41 |
-
}
|
| 42 |
-
}
|
| 43 |
-
|
| 44 |
-
export async function GET() {
|
| 45 |
-
try {
|
| 46 |
-
// Check cache first
|
| 47 |
-
if (cachedBreakdown && Date.now() - cachedBreakdown.lastUpdated < CACHE_TTL) {
|
| 48 |
-
return NextResponse.json({
|
| 49 |
-
data: cachedBreakdown.data,
|
| 50 |
-
cached: true,
|
| 51 |
-
});
|
| 52 |
-
}
|
| 53 |
-
|
| 54 |
-
// Fetch dataset sample - start with smaller sample
|
| 55 |
-
let datasetData;
|
| 56 |
-
let maxRows = 10000;
|
| 57 |
-
|
| 58 |
-
try {
|
| 59 |
-
datasetData = await fetchDatasetSample(maxRows);
|
| 60 |
-
} catch (error) {
|
| 61 |
-
// If timeout, try with even smaller sample
|
| 62 |
-
if (error instanceof Error && error.message.includes('timeout')) {
|
| 63 |
-
maxRows = 5000;
|
| 64 |
-
datasetData = await fetchDatasetSample(maxRows);
|
| 65 |
-
} else {
|
| 66 |
-
throw error;
|
| 67 |
-
}
|
| 68 |
-
}
|
| 69 |
-
|
| 70 |
-
const sourceCounts: Record<string, number> = {};
|
| 71 |
-
|
| 72 |
-
// Handle different response formats
|
| 73 |
-
let rows: any[] = [];
|
| 74 |
-
if (datasetData.rows) {
|
| 75 |
-
rows = datasetData.rows;
|
| 76 |
-
} else if (Array.isArray(datasetData)) {
|
| 77 |
-
rows = datasetData;
|
| 78 |
-
} else if (datasetData.data) {
|
| 79 |
-
rows = datasetData.data;
|
| 80 |
-
}
|
| 81 |
-
|
| 82 |
-
let processedCount = 0;
|
| 83 |
-
|
| 84 |
-
for (const row of rows) {
|
| 85 |
-
processedCount++;
|
| 86 |
-
|
| 87 |
-
// Get row data - handle different formats
|
| 88 |
-
let rowData: any = {};
|
| 89 |
-
if (row.row) {
|
| 90 |
-
rowData = row.row;
|
| 91 |
-
} else if (typeof row === 'object') {
|
| 92 |
-
rowData = row;
|
| 93 |
-
}
|
| 94 |
-
|
| 95 |
-
const source = rowData.source || 'unknown';
|
| 96 |
-
sourceCounts[source] = (sourceCounts[source] || 0) + 1;
|
| 97 |
-
}
|
| 98 |
-
|
| 99 |
-
// Convert to array format for chart
|
| 100 |
-
const data = Object.entries(sourceCounts)
|
| 101 |
-
.map(([name, value]) => ({ name, value }))
|
| 102 |
-
.sort((a, b) => b.value - a.value); // Sort by value descending
|
| 103 |
-
|
| 104 |
-
// Update cache
|
| 105 |
-
cachedBreakdown = {
|
| 106 |
-
data,
|
| 107 |
-
lastUpdated: Date.now(),
|
| 108 |
-
};
|
| 109 |
-
|
| 110 |
-
return NextResponse.json({
|
| 111 |
-
data,
|
| 112 |
-
processedCount,
|
| 113 |
-
cached: false,
|
| 114 |
-
});
|
| 115 |
-
} catch (error) {
|
| 116 |
-
console.error('Error processing source breakdown:', error);
|
| 117 |
-
return NextResponse.json(
|
| 118 |
-
{ error: 'Failed to process dataset', details: error instanceof Error ? error.message : 'Unknown error' },
|
| 119 |
-
{ status: 500 }
|
| 120 |
-
);
|
| 121 |
-
}
|
| 122 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/api/stats/route.ts
DELETED
|
@@ -1,150 +0,0 @@
|
|
| 1 |
-
import { NextResponse } from 'next/server';
|
| 2 |
-
|
| 3 |
-
// In-memory cache (will be reset on serverless function restart)
|
| 4 |
-
let cachedStats: {
|
| 5 |
-
sourceBreakdown: Record<string, number>;
|
| 6 |
-
timeSeries: Record<string, number>;
|
| 7 |
-
lastUpdated: number;
|
| 8 |
-
} | null = null;
|
| 9 |
-
|
| 10 |
-
const CACHE_TTL = 60 * 60 * 1000; // 1 hour in milliseconds
|
| 11 |
-
|
| 12 |
-
// Hugging Face API endpoint for dataset
|
| 13 |
-
const HF_DATASET_API = 'https://datasets-server.huggingface.co/parquet';
|
| 14 |
-
|
| 15 |
-
async function fetchDatasetSample(maxRows: number = 50000) {
|
| 16 |
-
// Use Hugging Face Datasets Server API to get a sample
|
| 17 |
-
// For large datasets, we'll process a sample to avoid timeout
|
| 18 |
-
const url = `${HF_DATASET_API}?dataset=shachardon%2FShareLM&config=default&split=train&offset=0&length=${maxRows}`;
|
| 19 |
-
|
| 20 |
-
try {
|
| 21 |
-
const controller = new AbortController();
|
| 22 |
-
const timeoutId = setTimeout(() => controller.abort(), 25000); // 25 second timeout
|
| 23 |
-
|
| 24 |
-
const response = await fetch(url, {
|
| 25 |
-
headers: {
|
| 26 |
-
'Accept': 'application/json',
|
| 27 |
-
},
|
| 28 |
-
signal: controller.signal,
|
| 29 |
-
});
|
| 30 |
-
|
| 31 |
-
clearTimeout(timeoutId);
|
| 32 |
-
|
| 33 |
-
if (!response.ok) {
|
| 34 |
-
throw new Error(`HTTP error! status: ${response.status}`);
|
| 35 |
-
}
|
| 36 |
-
|
| 37 |
-
const data = await response.json();
|
| 38 |
-
return data;
|
| 39 |
-
} catch (error) {
|
| 40 |
-
if (error instanceof Error && error.name === 'AbortError') {
|
| 41 |
-
throw new Error('Request timeout - dataset is too large to process in time limit');
|
| 42 |
-
}
|
| 43 |
-
console.error('Error fetching from HF API:', error);
|
| 44 |
-
throw error;
|
| 45 |
-
}
|
| 46 |
-
}
|
| 47 |
-
|
| 48 |
-
export async function GET() {
|
| 49 |
-
try {
|
| 50 |
-
// Check cache first
|
| 51 |
-
if (cachedStats && Date.now() - cachedStats.lastUpdated < CACHE_TTL) {
|
| 52 |
-
return NextResponse.json({
|
| 53 |
-
sourceBreakdown: cachedStats.sourceBreakdown,
|
| 54 |
-
timeSeries: cachedStats.timeSeries,
|
| 55 |
-
cached: true,
|
| 56 |
-
});
|
| 57 |
-
}
|
| 58 |
-
|
| 59 |
-
// Fetch dataset sample from Hugging Face API
|
| 60 |
-
// Start with smaller sample to avoid timeout
|
| 61 |
-
let datasetData;
|
| 62 |
-
let maxRows = 10000;
|
| 63 |
-
|
| 64 |
-
try {
|
| 65 |
-
datasetData = await fetchDatasetSample(maxRows);
|
| 66 |
-
} catch (error) {
|
| 67 |
-
// If timeout, try with even smaller sample
|
| 68 |
-
if (error instanceof Error && error.message.includes('timeout')) {
|
| 69 |
-
maxRows = 5000;
|
| 70 |
-
datasetData = await fetchDatasetSample(maxRows);
|
| 71 |
-
} else {
|
| 72 |
-
throw error;
|
| 73 |
-
}
|
| 74 |
-
}
|
| 75 |
-
|
| 76 |
-
const sourceBreakdown: Record<string, number> = {};
|
| 77 |
-
const timeSeries: Record<string, number> = {};
|
| 78 |
-
|
| 79 |
-
// Process the data - handle different response formats
|
| 80 |
-
let rows: any[] = [];
|
| 81 |
-
if (datasetData.rows) {
|
| 82 |
-
rows = datasetData.rows;
|
| 83 |
-
} else if (Array.isArray(datasetData)) {
|
| 84 |
-
rows = datasetData;
|
| 85 |
-
} else if (datasetData.data) {
|
| 86 |
-
rows = datasetData.data;
|
| 87 |
-
}
|
| 88 |
-
|
| 89 |
-
let processedCount = 0;
|
| 90 |
-
|
| 91 |
-
for (const row of rows) {
|
| 92 |
-
processedCount++;
|
| 93 |
-
|
| 94 |
-
// Get row data - handle different formats
|
| 95 |
-
let rowData: any = {};
|
| 96 |
-
if (row.row) {
|
| 97 |
-
rowData = row.row;
|
| 98 |
-
} else if (typeof row === 'object') {
|
| 99 |
-
rowData = row;
|
| 100 |
-
}
|
| 101 |
-
|
| 102 |
-
// Aggregate by source
|
| 103 |
-
const source = rowData.source || 'unknown';
|
| 104 |
-
sourceBreakdown[source] = (sourceBreakdown[source] || 0) + 1;
|
| 105 |
-
|
| 106 |
-
// Aggregate by timestamp (group by date)
|
| 107 |
-
if (rowData.timestamp) {
|
| 108 |
-
try {
|
| 109 |
-
const date = new Date(rowData.timestamp);
|
| 110 |
-
if (!isNaN(date.getTime())) {
|
| 111 |
-
const dateKey = date.toISOString().split('T')[0]; // YYYY-MM-DD
|
| 112 |
-
timeSeries[dateKey] = (timeSeries[dateKey] || 0) + 1;
|
| 113 |
-
}
|
| 114 |
-
} catch (e) {
|
| 115 |
-
// Skip invalid timestamps
|
| 116 |
-
}
|
| 117 |
-
}
|
| 118 |
-
}
|
| 119 |
-
|
| 120 |
-
// Sort time series by date
|
| 121 |
-
const sortedTimeSeries: Record<string, number> = {};
|
| 122 |
-
Object.keys(timeSeries)
|
| 123 |
-
.sort()
|
| 124 |
-
.forEach((key) => {
|
| 125 |
-
sortedTimeSeries[key] = timeSeries[key];
|
| 126 |
-
});
|
| 127 |
-
|
| 128 |
-
// Update cache
|
| 129 |
-
cachedStats = {
|
| 130 |
-
sourceBreakdown,
|
| 131 |
-
timeSeries: sortedTimeSeries,
|
| 132 |
-
lastUpdated: Date.now(),
|
| 133 |
-
};
|
| 134 |
-
|
| 135 |
-
return NextResponse.json({
|
| 136 |
-
sourceBreakdown,
|
| 137 |
-
timeSeries: sortedTimeSeries,
|
| 138 |
-
processedCount,
|
| 139 |
-
cached: false,
|
| 140 |
-
sampleSize: maxRows,
|
| 141 |
-
note: processedCount < maxRows ? 'Full sample processed' : `Processed ${processedCount} rows (limited to avoid timeout)`,
|
| 142 |
-
});
|
| 143 |
-
} catch (error) {
|
| 144 |
-
console.error('Error processing dataset:', error);
|
| 145 |
-
return NextResponse.json(
|
| 146 |
-
{ error: 'Failed to process dataset', details: error instanceof Error ? error.message : 'Unknown error' },
|
| 147 |
-
{ status: 500 }
|
| 148 |
-
);
|
| 149 |
-
}
|
| 150 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/globals.css
DELETED
|
@@ -1,27 +0,0 @@
|
|
| 1 |
-
* {
|
| 2 |
-
box-sizing: border-box;
|
| 3 |
-
padding: 0;
|
| 4 |
-
margin: 0;
|
| 5 |
-
}
|
| 6 |
-
|
| 7 |
-
html,
|
| 8 |
-
body {
|
| 9 |
-
max-width: 100vw;
|
| 10 |
-
overflow-x: hidden;
|
| 11 |
-
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen',
|
| 12 |
-
'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue',
|
| 13 |
-
sans-serif;
|
| 14 |
-
-webkit-font-smoothing: antialiased;
|
| 15 |
-
-moz-osx-font-smoothing: grayscale;
|
| 16 |
-
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
| 17 |
-
min-height: 100vh;
|
| 18 |
-
}
|
| 19 |
-
|
| 20 |
-
body {
|
| 21 |
-
color: #333;
|
| 22 |
-
}
|
| 23 |
-
|
| 24 |
-
a {
|
| 25 |
-
color: inherit;
|
| 26 |
-
text-decoration: none;
|
| 27 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/layout.tsx
DELETED
|
@@ -1,19 +0,0 @@
|
|
| 1 |
-
import type { Metadata } from 'next'
|
| 2 |
-
import './globals.css'
|
| 3 |
-
|
| 4 |
-
export const metadata: Metadata = {
|
| 5 |
-
title: 'ShareLM Dataset Analysis',
|
| 6 |
-
description: 'Analysis dashboard for the ShareLM Hugging Face dataset',
|
| 7 |
-
}
|
| 8 |
-
|
| 9 |
-
export default function RootLayout({
|
| 10 |
-
children,
|
| 11 |
-
}: {
|
| 12 |
-
children: React.ReactNode
|
| 13 |
-
}) {
|
| 14 |
-
return (
|
| 15 |
-
<html lang="en">
|
| 16 |
-
<body>{children}</body>
|
| 17 |
-
</html>
|
| 18 |
-
)
|
| 19 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/page.module.css
DELETED
|
@@ -1,68 +0,0 @@
|
|
| 1 |
-
.mainContainer {
|
| 2 |
-
padding: 2rem;
|
| 3 |
-
max-width: 1400px;
|
| 4 |
-
margin: 0 auto;
|
| 5 |
-
}
|
| 6 |
-
|
| 7 |
-
.header {
|
| 8 |
-
text-align: center;
|
| 9 |
-
margin-bottom: 3rem;
|
| 10 |
-
color: white;
|
| 11 |
-
}
|
| 12 |
-
|
| 13 |
-
.header h1 {
|
| 14 |
-
font-size: 3rem;
|
| 15 |
-
font-weight: 700;
|
| 16 |
-
margin-bottom: 0.5rem;
|
| 17 |
-
text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.2);
|
| 18 |
-
}
|
| 19 |
-
|
| 20 |
-
.subtitle {
|
| 21 |
-
font-size: 1.2rem;
|
| 22 |
-
opacity: 0.9;
|
| 23 |
-
}
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
.errorBanner {
|
| 27 |
-
background: #fee;
|
| 28 |
-
border: 1px solid #fcc;
|
| 29 |
-
border-radius: 8px;
|
| 30 |
-
padding: 1rem;
|
| 31 |
-
margin-bottom: 2rem;
|
| 32 |
-
display: flex;
|
| 33 |
-
justify-content: space-between;
|
| 34 |
-
align-items: center;
|
| 35 |
-
}
|
| 36 |
-
|
| 37 |
-
.errorBanner button {
|
| 38 |
-
background: #667eea;
|
| 39 |
-
color: white;
|
| 40 |
-
border: none;
|
| 41 |
-
padding: 0.5rem 1rem;
|
| 42 |
-
border-radius: 4px;
|
| 43 |
-
cursor: pointer;
|
| 44 |
-
font-size: 0.9rem;
|
| 45 |
-
}
|
| 46 |
-
|
| 47 |
-
.errorBanner button:hover {
|
| 48 |
-
background: #5568d3;
|
| 49 |
-
}
|
| 50 |
-
|
| 51 |
-
.statsInfo {
|
| 52 |
-
background: white;
|
| 53 |
-
border-radius: 12px;
|
| 54 |
-
padding: 1.5rem;
|
| 55 |
-
box-shadow: 0 10px 30px rgba(0, 0, 0, 0.2);
|
| 56 |
-
display: flex;
|
| 57 |
-
justify-content: space-around;
|
| 58 |
-
flex-wrap: wrap;
|
| 59 |
-
gap: 1rem;
|
| 60 |
-
text-align: center;
|
| 61 |
-
}
|
| 62 |
-
|
| 63 |
-
.statsInfo p {
|
| 64 |
-
font-size: 1rem;
|
| 65 |
-
color: #333;
|
| 66 |
-
font-weight: 500;
|
| 67 |
-
}
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/page.tsx
DELETED
|
@@ -1,96 +0,0 @@
|
|
| 1 |
-
'use client';
|
| 2 |
-
|
| 3 |
-
import { useEffect, useState } from 'react';
|
| 4 |
-
import { Charts } from '@/components/Charts';
|
| 5 |
-
import styles from './page.module.css';
|
| 6 |
-
|
| 7 |
-
interface SourceData {
|
| 8 |
-
name: string;
|
| 9 |
-
value: number;
|
| 10 |
-
}
|
| 11 |
-
|
| 12 |
-
interface TimeSeriesData {
|
| 13 |
-
date: string;
|
| 14 |
-
count: number;
|
| 15 |
-
}
|
| 16 |
-
|
| 17 |
-
export default function Home() {
|
| 18 |
-
const [sourceData, setSourceData] = useState<SourceData[]>([]);
|
| 19 |
-
const [timeSeriesData, setTimeSeriesData] = useState<TimeSeriesData[]>([]);
|
| 20 |
-
const [loading, setLoading] = useState(true);
|
| 21 |
-
const [error, setError] = useState<string | null>(null);
|
| 22 |
-
|
| 23 |
-
useEffect(() => {
|
| 24 |
-
async function fetchData() {
|
| 25 |
-
try {
|
| 26 |
-
setLoading(true);
|
| 27 |
-
setError(null);
|
| 28 |
-
|
| 29 |
-
// Fetch stats from API
|
| 30 |
-
const response = await fetch('/api/stats');
|
| 31 |
-
|
| 32 |
-
if (!response.ok) {
|
| 33 |
-
throw new Error(`Failed to fetch data: ${response.statusText}`);
|
| 34 |
-
}
|
| 35 |
-
|
| 36 |
-
const data = await response.json();
|
| 37 |
-
|
| 38 |
-
if (data.error) {
|
| 39 |
-
throw new Error(data.error);
|
| 40 |
-
}
|
| 41 |
-
|
| 42 |
-
// Format source breakdown data
|
| 43 |
-
const sourceBreakdown = Object.entries(data.sourceBreakdown || {}).map(([name, value]) => ({
|
| 44 |
-
name,
|
| 45 |
-
value: value as number,
|
| 46 |
-
})).sort((a, b) => b.value - a.value);
|
| 47 |
-
|
| 48 |
-
// Format time series data
|
| 49 |
-
const timeSeries = Object.entries(data.timeSeries || {}).map(([date, count]) => ({
|
| 50 |
-
date,
|
| 51 |
-
count: count as number,
|
| 52 |
-
})).sort((a, b) => a.date.localeCompare(b.date));
|
| 53 |
-
|
| 54 |
-
setSourceData(sourceBreakdown);
|
| 55 |
-
setTimeSeriesData(timeSeries);
|
| 56 |
-
} catch (err) {
|
| 57 |
-
console.error('Error fetching data:', err);
|
| 58 |
-
setError(err instanceof Error ? err.message : 'An unknown error occurred');
|
| 59 |
-
} finally {
|
| 60 |
-
setLoading(false);
|
| 61 |
-
}
|
| 62 |
-
}
|
| 63 |
-
|
| 64 |
-
fetchData();
|
| 65 |
-
}, []);
|
| 66 |
-
|
| 67 |
-
return (
|
| 68 |
-
<main className={styles.mainContainer}>
|
| 69 |
-
<div className={styles.header}>
|
| 70 |
-
<h1>ShareLM Dataset Analysis</h1>
|
| 71 |
-
<p className={styles.subtitle}>Analyzing conversations from the ShareLM Hugging Face dataset</p>
|
| 72 |
-
</div>
|
| 73 |
-
|
| 74 |
-
{error && (
|
| 75 |
-
<div className={styles.errorBanner}>
|
| 76 |
-
<p>Error: {error}</p>
|
| 77 |
-
<button onClick={() => window.location.reload()}>Retry</button>
|
| 78 |
-
</div>
|
| 79 |
-
)}
|
| 80 |
-
|
| 81 |
-
<Charts
|
| 82 |
-
sourceData={sourceData}
|
| 83 |
-
timeSeriesData={timeSeriesData}
|
| 84 |
-
loading={loading}
|
| 85 |
-
/>
|
| 86 |
-
|
| 87 |
-
{!loading && !error && (
|
| 88 |
-
<div className={styles.statsInfo}>
|
| 89 |
-
<p>Total sources: {sourceData.length}</p>
|
| 90 |
-
<p>Total time points: {timeSeriesData.length}</p>
|
| 91 |
-
<p>Total conversations analyzed: {sourceData.reduce((sum, item) => sum + item.value, 0).toLocaleString()}</p>
|
| 92 |
-
</div>
|
| 93 |
-
)}
|
| 94 |
-
</main>
|
| 95 |
-
);
|
| 96 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
components/Charts.module.css
DELETED
|
@@ -1,40 +0,0 @@
|
|
| 1 |
-
.chartContainer {
|
| 2 |
-
background: white;
|
| 3 |
-
border-radius: 12px;
|
| 4 |
-
padding: 2rem;
|
| 5 |
-
box-shadow: 0 10px 30px rgba(0, 0, 0, 0.2);
|
| 6 |
-
}
|
| 7 |
-
|
| 8 |
-
.chartContainer h2 {
|
| 9 |
-
margin-bottom: 1.5rem;
|
| 10 |
-
color: #333;
|
| 11 |
-
font-size: 1.5rem;
|
| 12 |
-
text-align: center;
|
| 13 |
-
}
|
| 14 |
-
|
| 15 |
-
.loadingState,
|
| 16 |
-
.errorState {
|
| 17 |
-
text-align: center;
|
| 18 |
-
padding: 3rem;
|
| 19 |
-
color: #666;
|
| 20 |
-
font-size: 1.1rem;
|
| 21 |
-
}
|
| 22 |
-
|
| 23 |
-
.chartsWrapper {
|
| 24 |
-
display: grid;
|
| 25 |
-
grid-template-columns: 1fr;
|
| 26 |
-
gap: 2rem;
|
| 27 |
-
margin-bottom: 2rem;
|
| 28 |
-
}
|
| 29 |
-
|
| 30 |
-
@media (min-width: 768px) {
|
| 31 |
-
.chartsWrapper {
|
| 32 |
-
grid-template-columns: 1fr 1fr;
|
| 33 |
-
}
|
| 34 |
-
}
|
| 35 |
-
|
| 36 |
-
@media (min-width: 1200px) {
|
| 37 |
-
.chartsWrapper {
|
| 38 |
-
grid-template-columns: 1fr 1fr;
|
| 39 |
-
}
|
| 40 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
components/Charts.tsx
DELETED
|
@@ -1,116 +0,0 @@
|
|
| 1 |
-
'use client';
|
| 2 |
-
|
| 3 |
-
import { PieChart, Pie, Cell, ResponsiveContainer, Tooltip, Legend } from 'recharts';
|
| 4 |
-
import { LineChart, Line, XAxis, YAxis, CartesianGrid, ResponsiveContainer as LineResponsiveContainer } from 'recharts';
|
| 5 |
-
import styles from './Charts.module.css';
|
| 6 |
-
|
| 7 |
-
interface SourceData {
|
| 8 |
-
name: string;
|
| 9 |
-
value: number;
|
| 10 |
-
}
|
| 11 |
-
|
| 12 |
-
interface ChartsProps {
|
| 13 |
-
sourceData: SourceData[];
|
| 14 |
-
timeSeriesData: Array<{ date: string; count: number }>;
|
| 15 |
-
loading: boolean;
|
| 16 |
-
}
|
| 17 |
-
|
| 18 |
-
const COLORS = ['#0088FE', '#00C49F', '#FFBB28', '#FF8042', '#8884d8', '#82ca9d', '#ffc658', '#ff7300'];
|
| 19 |
-
|
| 20 |
-
export function SourceDoughnutChart({ data, loading }: { data: SourceData[]; loading: boolean }) {
|
| 21 |
-
if (loading) {
|
| 22 |
-
return (
|
| 23 |
-
<div className={styles.chartContainer}>
|
| 24 |
-
<div className={styles.loadingState}>Loading source breakdown...</div>
|
| 25 |
-
</div>
|
| 26 |
-
);
|
| 27 |
-
}
|
| 28 |
-
|
| 29 |
-
if (!data || data.length === 0) {
|
| 30 |
-
return (
|
| 31 |
-
<div className={styles.chartContainer}>
|
| 32 |
-
<div className={styles.errorState}>No data available</div>
|
| 33 |
-
</div>
|
| 34 |
-
);
|
| 35 |
-
}
|
| 36 |
-
|
| 37 |
-
return (
|
| 38 |
-
<div className={styles.chartContainer}>
|
| 39 |
-
<h2>Source Breakdown</h2>
|
| 40 |
-
<ResponsiveContainer width="100%" height={400}>
|
| 41 |
-
<PieChart>
|
| 42 |
-
<Pie
|
| 43 |
-
data={data}
|
| 44 |
-
cx="50%"
|
| 45 |
-
cy="50%"
|
| 46 |
-
labelLine={false}
|
| 47 |
-
label={({ name, percent }) => `${name}: ${(percent * 100).toFixed(1)}%`}
|
| 48 |
-
outerRadius={120}
|
| 49 |
-
innerRadius={60}
|
| 50 |
-
fill="#8884d8"
|
| 51 |
-
dataKey="value"
|
| 52 |
-
>
|
| 53 |
-
{data.map((entry, index) => (
|
| 54 |
-
<Cell key={`cell-${index}`} fill={COLORS[index % COLORS.length]} />
|
| 55 |
-
))}
|
| 56 |
-
</Pie>
|
| 57 |
-
<Tooltip />
|
| 58 |
-
<Legend />
|
| 59 |
-
</PieChart>
|
| 60 |
-
</ResponsiveContainer>
|
| 61 |
-
</div>
|
| 62 |
-
);
|
| 63 |
-
}
|
| 64 |
-
|
| 65 |
-
export function TimeSeriesChart({ data, loading }: { data: Array<{ date: string; count: number }>; loading: boolean }) {
|
| 66 |
-
if (loading) {
|
| 67 |
-
return (
|
| 68 |
-
<div className={styles.chartContainer}>
|
| 69 |
-
<div className={styles.loadingState}>Loading time series data...</div>
|
| 70 |
-
</div>
|
| 71 |
-
);
|
| 72 |
-
}
|
| 73 |
-
|
| 74 |
-
if (!data || data.length === 0) {
|
| 75 |
-
return (
|
| 76 |
-
<div className={styles.chartContainer}>
|
| 77 |
-
<div className={styles.errorState}>No data available</div>
|
| 78 |
-
</div>
|
| 79 |
-
);
|
| 80 |
-
}
|
| 81 |
-
|
| 82 |
-
// Format data for chart (sample if too many points)
|
| 83 |
-
const chartData = data.length > 100
|
| 84 |
-
? data.filter((_, i) => i % Math.ceil(data.length / 100) === 0)
|
| 85 |
-
: data;
|
| 86 |
-
|
| 87 |
-
return (
|
| 88 |
-
<div className={styles.chartContainer}>
|
| 89 |
-
<h2>Total Count Over Time</h2>
|
| 90 |
-
<LineResponsiveContainer width="100%" height={400}>
|
| 91 |
-
<LineChart data={chartData}>
|
| 92 |
-
<CartesianGrid strokeDasharray="3 3" />
|
| 93 |
-
<XAxis
|
| 94 |
-
dataKey="date"
|
| 95 |
-
angle={-45}
|
| 96 |
-
textAnchor="end"
|
| 97 |
-
height={100}
|
| 98 |
-
interval="preserveStartEnd"
|
| 99 |
-
/>
|
| 100 |
-
<YAxis />
|
| 101 |
-
<Tooltip />
|
| 102 |
-
<Line type="monotone" dataKey="count" stroke="#8884d8" strokeWidth={2} dot={{ r: 3 }} />
|
| 103 |
-
</LineChart>
|
| 104 |
-
</LineResponsiveContainer>
|
| 105 |
-
</div>
|
| 106 |
-
);
|
| 107 |
-
}
|
| 108 |
-
|
| 109 |
-
export function Charts({ sourceData, timeSeriesData, loading }: ChartsProps) {
|
| 110 |
-
return (
|
| 111 |
-
<div className={styles.chartsWrapper}>
|
| 112 |
-
<SourceDoughnutChart data={sourceData} loading={loading} />
|
| 113 |
-
<TimeSeriesChart data={timeSeriesData} loading={loading} />
|
| 114 |
-
</div>
|
| 115 |
-
);
|
| 116 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
next.config.js
DELETED
|
@@ -1,10 +0,0 @@
|
|
| 1 |
-
/** @type {import('next').NextConfig} */
|
| 2 |
-
const nextConfig = {
|
| 3 |
-
experimental: {
|
| 4 |
-
serverActions: {
|
| 5 |
-
bodySizeLimit: '10mb',
|
| 6 |
-
},
|
| 7 |
-
},
|
| 8 |
-
}
|
| 9 |
-
|
| 10 |
-
module.exports = nextConfig
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
package.json
DELETED
|
@@ -1,25 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"name": "sharelm-analysis",
|
| 3 |
-
"version": "0.1.0",
|
| 4 |
-
"private": true,
|
| 5 |
-
"scripts": {
|
| 6 |
-
"dev": "next dev",
|
| 7 |
-
"build": "next build",
|
| 8 |
-
"start": "next start",
|
| 9 |
-
"lint": "next lint"
|
| 10 |
-
},
|
| 11 |
-
"dependencies": {
|
| 12 |
-
"next": "^14.2.0",
|
| 13 |
-
"react": "^18.3.0",
|
| 14 |
-
"react-dom": "^18.3.0",
|
| 15 |
-
"recharts": "^2.12.0"
|
| 16 |
-
},
|
| 17 |
-
"devDependencies": {
|
| 18 |
-
"@types/node": "^20.11.0",
|
| 19 |
-
"@types/react": "^18.2.0",
|
| 20 |
-
"@types/react-dom": "^18.2.0",
|
| 21 |
-
"typescript": "^5.3.0",
|
| 22 |
-
"eslint": "^8.56.0",
|
| 23 |
-
"eslint-config-next": "^14.2.0"
|
| 24 |
-
}
|
| 25 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio>=4.0.0
|
| 2 |
+
plotly>=5.0.0
|
| 3 |
+
requests>=2.31.0
|
| 4 |
+
|
tsconfig.json
DELETED
|
@@ -1,27 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"compilerOptions": {
|
| 3 |
-
"target": "ES2020",
|
| 4 |
-
"lib": ["dom", "dom.iterable", "esnext"],
|
| 5 |
-
"allowJs": true,
|
| 6 |
-
"skipLibCheck": true,
|
| 7 |
-
"strict": true,
|
| 8 |
-
"noEmit": true,
|
| 9 |
-
"esModuleInterop": true,
|
| 10 |
-
"module": "esnext",
|
| 11 |
-
"moduleResolution": "bundler",
|
| 12 |
-
"resolveJsonModule": true,
|
| 13 |
-
"isolatedModules": true,
|
| 14 |
-
"jsx": "preserve",
|
| 15 |
-
"incremental": true,
|
| 16 |
-
"plugins": [
|
| 17 |
-
{
|
| 18 |
-
"name": "next"
|
| 19 |
-
}
|
| 20 |
-
],
|
| 21 |
-
"paths": {
|
| 22 |
-
"@/*": ["./*"]
|
| 23 |
-
}
|
| 24 |
-
},
|
| 25 |
-
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
|
| 26 |
-
"exclude": ["node_modules"]
|
| 27 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vercel.json
DELETED
|
@@ -1,10 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"functions": {
|
| 3 |
-
"app/api/stats/route.ts": {
|
| 4 |
-
"maxDuration": 30
|
| 5 |
-
},
|
| 6 |
-
"app/api/source-breakdown/route.ts": {
|
| 7 |
-
"maxDuration": 30
|
| 8 |
-
}
|
| 9 |
-
}
|
| 10 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|