Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import numpy as np | |
| import pandas as pd | |
| import torch | |
| import matplotlib.pyplot as plt | |
| import json | |
| import time | |
| import os | |
| from functools import partial | |
| import datetime | |
| import plotly.graph_objects as go | |
| from plotly.subplots import make_subplots | |
| # Global variables to store models | |
| tokenizer = None | |
| ner_pipeline = None | |
| pos_pipeline = None | |
| intent_classifier = None | |
| semantic_model = None | |
| stt_model = None # Speech-to-text model | |
| models_loaded = False | |
| # Database to store keyword ranking history (in-memory database for this example) | |
| # In a real app, you would use a proper database | |
| ranking_history = {} | |
| def load_models(progress=gr.Progress()): | |
| """Lazy-load models only when needed""" | |
| global tokenizer, ner_pipeline, pos_pipeline, intent_classifier, semantic_model, stt_model, models_loaded | |
| if models_loaded: | |
| return True | |
| try: | |
| progress(0.1, desc="Loading models...") | |
| # Use smaller models and load them sequentially to reduce memory pressure | |
| from transformers import AutoTokenizer, pipeline | |
| progress(0.2, desc="Loading tokenizer...") | |
| tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") | |
| progress(0.3, desc="Loading NER model...") | |
| ner_pipeline = pipeline("ner", model="dslim/bert-base-NER") | |
| progress(0.4, desc="Loading POS model...") | |
| # Use smaller POS model | |
| from transformers import AutoModelForTokenClassification, BertTokenizerFast | |
| pos_model = AutoModelForTokenClassification.from_pretrained("vblagoje/bert-english-uncased-finetuned-pos") | |
| pos_tokenizer = BertTokenizerFast.from_pretrained("vblagoje/bert-english-uncased-finetuned-pos") | |
| pos_pipeline = pipeline("token-classification", model=pos_model, tokenizer=pos_tokenizer) | |
| progress(0.6, desc="Loading intent classifier...") | |
| # Use a smaller model for zero-shot classification | |
| intent_classifier = pipeline( | |
| "zero-shot-classification", | |
| model="typeform/distilbert-base-uncased-mnli", # Smaller than BART | |
| device=0 if torch.cuda.is_available() else -1 # Use GPU if available | |
| ) | |
| progress(0.7, desc="Loading speech-to-text model...") | |
| try: | |
| # Load automatic speech recognition model | |
| from transformers import WhisperProcessor, WhisperForConditionalGeneration | |
| processor = WhisperProcessor.from_pretrained("openai/whisper-small.en") | |
| stt_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small.en") | |
| stt_model = (processor, stt_model) | |
| except Exception as e: | |
| print(f"Warning: Could not load speech-to-text model: {str(e)}") | |
| stt_model = None # Set to None so we can check if it's available | |
| progress(0.8, desc="Loading semantic model...") | |
| try: | |
| from sentence_transformers import SentenceTransformer | |
| semantic_model = SentenceTransformer('all-MiniLM-L6-v2') | |
| except Exception as e: | |
| print(f"Warning: Could not load semantic model: {str(e)}") | |
| semantic_model = None # Set to None so we can check if it's available | |
| progress(1.0, desc="Models loaded successfully!") | |
| models_loaded = True | |
| return True | |
| except Exception as e: | |
| print(f"Error loading models: {str(e)}") | |
| return f"Error: {str(e)}" | |
| def speech_to_text(audio_path): | |
| """Convert speech to text using the loaded speech-to-text model""" | |
| if stt_model is None: | |
| return "Speech-to-text model not loaded. Please try text input instead." | |
| try: | |
| import librosa | |
| import numpy as np | |
| # Load audio file | |
| audio, sr = librosa.load(audio_path, sr=16000) | |
| # Process audio with Whisper | |
| processor, model = stt_model | |
| input_features = processor(audio, sampling_rate=16000, return_tensors="pt").input_features | |
| # Generate token ids | |
| predicted_ids = model.generate(input_features) | |
| # Decode token ids to text | |
| transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0] | |
| return transcription | |
| except Exception as e: | |
| print(f"Error in speech_to_text: {str(e)}") | |
| return f"Error processing speech: {str(e)}" | |
| def handle_voice_input(audio): | |
| """Handle voice input and convert to text""" | |
| if audio is None: | |
| return "No audio detected. Please try again." | |
| try: | |
| # Convert speech to text | |
| text = speech_to_text(audio) | |
| return text | |
| except Exception as e: | |
| print(f"Error in handle_voice_input: {str(e)}") | |
| return f"Error: {str(e)}" | |
| def simulate_google_serp(keyword, num_results=10): | |
| """Simulate Google SERP results for a keyword""" | |
| try: | |
| # In a real implementation, this would call the Google API | |
| # For now, we'll generate fake SERP data | |
| # Deterministic seed for consistent results by keyword | |
| np.random.seed(sum(ord(c) for c in keyword)) | |
| serp_results = [] | |
| domains = [ | |
| "example.com", "wikipedia.org", "medium.com", "github.com", | |
| "stackoverflow.com", "amazon.com", "youtube.com", "reddit.com", | |
| "linkedin.com", "twitter.com", "facebook.com", "instagram.com" | |
| ] | |
| for i in range(1, num_results + 1): | |
| domain = domains[i % len(domains)] | |
| title = f"{keyword.title()} - {domain.split('.')[0].title()} Resource #{i}" | |
| snippet = f"This is a simulated SERP result for '{keyword}'. Result #{i} would provide relevant information about this topic." | |
| url = f"https://www.{domain}/{keyword.replace(' ', '-')}-resource-{i}" | |
| position = i | |
| ctr = round(0.3 * (0.85 ** (i - 1)), 4) # Simulate click-through rate decay | |
| serp_results.append({ | |
| "position": position, | |
| "title": title, | |
| "url": url, | |
| "domain": domain, | |
| "snippet": snippet, | |
| "ctr_estimate": ctr, | |
| "impressions_estimate": np.random.randint(1000, 10000) | |
| }) | |
| return serp_results | |
| except Exception as e: | |
| print(f"Error in simulate_google_serp: {str(e)}") | |
| return [] | |
| def update_ranking_history(keyword, serp_results): | |
| """Update the ranking history for a keyword""" | |
| try: | |
| # Get current timestamp | |
| timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
| # Initialize if keyword not in history | |
| if keyword not in ranking_history: | |
| ranking_history[keyword] = [] | |
| # Add new entry | |
| ranking_history[keyword].append({ | |
| "timestamp": timestamp, | |
| "results": serp_results[:5] # Store top 5 results for history | |
| }) | |
| # Keep only last 10 entries for each keyword | |
| if len(ranking_history[keyword]) > 10: | |
| ranking_history[keyword] = ranking_history[keyword][-10:] | |
| return True | |
| except Exception as e: | |
| print(f"Error in update_ranking_history: {str(e)}") | |
| return False | |
| def get_semantic_similarity(token, comparison_terms): | |
| """Calculate semantic similarity between a token and comparison terms""" | |
| try: | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| token_embedding = semantic_model.encode([token])[0] | |
| comparison_embeddings = semantic_model.encode(comparison_terms) | |
| similarities = [] | |
| for i, emb in enumerate(comparison_embeddings): | |
| similarity = cosine_similarity([token_embedding], [emb])[0][0] | |
| similarities.append((comparison_terms[i], float(similarity))) | |
| return sorted(similarities, key=lambda x: x[1], reverse=True) | |
| except Exception as e: | |
| print(f"Error in semantic similarity: {str(e)}") | |
| # Return dummy data on error | |
| return [(term, 0.5) for term in comparison_terms] | |
| def get_token_colors(token_type): | |
| colors = { | |
| "prefix": "#D8BFD8", # Light purple | |
| "suffix": "#AEDAA4", # Light green | |
| "stem": "#A4C2F4", # Light blue | |
| "compound_first": "#FFCC80", # Light orange | |
| "compound_second": "#FFCC80", # Light orange | |
| "word": "#E5E5E5" # Light gray | |
| } | |
| return colors.get(token_type, "#E5E5E5") | |
| def simulate_historical_data(token): | |
| """Generate simulated historical usage data for a token""" | |
| eras = ["1900s", "1950s", "1980s", "2000s", "2010s", "Present"] | |
| # Different patterns based on token characteristics | |
| if len(token) > 8: | |
| # Possibly a technical term - recent growth | |
| values = [10, 20, 30, 60, 85, 95] | |
| elif token.startswith(("un", "re", "de", "pre")): | |
| # Prefix words tend to be older | |
| values = [45, 50, 60, 70, 75, 80] | |
| else: | |
| # Standard pattern for common words | |
| # Use token hash value modulo instead of hash() directly to avoid different results across runs | |
| base = 50 + (sum(ord(c) for c in token) % 30) | |
| # Use a fixed seed for reproducibility | |
| np.random.seed(sum(ord(c) for c in token)) | |
| noise = np.random.normal(0, 5, 6) | |
| values = [max(5, min(95, base + i*5 + n)) for i, n in enumerate(noise)] | |
| return list(zip(eras, values)) | |
| def generate_origin_data(token): | |
| """Generate simulated origin/etymology data for a token""" | |
| origins = [ | |
| {"era": "Ancient", "language": "Latin"}, | |
| {"era": "Ancient", "language": "Greek"}, | |
| {"era": "Medieval", "language": "Old English"}, | |
| {"era": "16th century", "language": "French"}, | |
| {"era": "18th century", "language": "Germanic"}, | |
| {"era": "19th century", "language": "Anglo-Saxon"}, | |
| {"era": "20th century", "language": "Modern English"} | |
| ] | |
| # Deterministic selection based on the token | |
| index = sum(ord(c) for c in token) % len(origins) | |
| origin = origins[index] | |
| note = f"First appeared in {origin['era']} texts derived from {origin['language']}." | |
| origin["note"] = note | |
| return origin | |
| def analyze_token_types(tokens): | |
| """Identify token types (prefix, suffix, compound, etc.)""" | |
| processed_tokens = [] | |
| prefixes = ["un", "re", "de", "pre", "post", "anti", "pro", "inter", "sub", "super"] | |
| suffixes = ["ing", "ed", "ly", "ment", "tion", "able", "ible", "ness", "ful", "less"] | |
| for token in tokens: | |
| token_text = token.lower() | |
| token_type = "word" | |
| # Check for prefixes | |
| for prefix in prefixes: | |
| if token_text.startswith(prefix) and len(token_text) > len(prefix) + 2: | |
| if token_text != prefix: # Make sure the word isn't just the prefix | |
| token_type = "prefix" | |
| break | |
| # Check for suffixes | |
| if token_type == "word": | |
| for suffix in suffixes: | |
| if token_text.endswith(suffix) and len(token_text) > len(suffix) + 2: | |
| token_type = "suffix" | |
| break | |
| # Check for compound words (simplified) | |
| if token_type == "word" and len(token_text) > 8: | |
| token_type = "compound_first" # Simplified - in reality would need more analysis | |
| processed_tokens.append({ | |
| "text": token_text, | |
| "type": token_type | |
| }) | |
| return processed_tokens | |
| def plot_historical_data(historical_data): | |
| """Create a plot of historical usage data, with error handling""" | |
| try: | |
| eras = [item[0] for item in historical_data] | |
| values = [item[1] for item in historical_data] | |
| plt.figure(figsize=(8, 3)) | |
| plt.bar(eras, values, color='skyblue') | |
| plt.title('Historical Usage') | |
| plt.xlabel('Era') | |
| plt.ylabel('Usage Level') | |
| plt.ylim(0, 100) | |
| plt.xticks(rotation=45) | |
| plt.tight_layout() | |
| return plt | |
| except Exception as e: | |
| print(f"Error in plot_historical_data: {str(e)}") | |
| # Return a simple error plot | |
| plt.figure(figsize=(8, 3)) | |
| plt.text(0.5, 0.5, f"Error creating plot: {str(e)}", | |
| horizontalalignment='center', verticalalignment='center') | |
| plt.axis('off') | |
| return plt | |
| def create_evolution_chart(data, forecast_months=6, growth_scenario="Moderate"): | |
| """Create a simpler chart that's more compatible with Gradio""" | |
| try: | |
| import plotly.graph_objects as go | |
| # Create a basic figure without subplots | |
| fig = go.Figure() | |
| # Add main trace for search volume | |
| fig.add_trace( | |
| go.Scatter( | |
| x=[item["month"] for item in data], | |
| y=[item["searchVolume"] for item in data], | |
| name="Search Volume", | |
| line=dict(color="#8884d8", width=3), | |
| mode="lines+markers" | |
| ) | |
| ) | |
| # Scale the other metrics to be visible on the same chart | |
| max_volume = max([item["searchVolume"] for item in data]) | |
| scale_factor = max_volume / 100 | |
| # Add competition score (scaled) | |
| fig.add_trace( | |
| go.Scatter( | |
| x=[item["month"] for item in data], | |
| y=[item["competitionScore"] * scale_factor for item in data], | |
| name="Competition Score", | |
| line=dict(color="#82ca9d", width=2, dash="dot"), | |
| mode="lines+markers" | |
| ) | |
| ) | |
| # Add intent clarity (scaled) | |
| fig.add_trace( | |
| go.Scatter( | |
| x=[item["month"] for item in data], | |
| y=[item["intentClarity"] * scale_factor for item in data], | |
| name="Intent Clarity", | |
| line=dict(color="#ffc658", width=2, dash="dash"), | |
| mode="lines+markers" | |
| ) | |
| ) | |
| # Simple layout | |
| fig.update_layout( | |
| title=f"Keyword Evolution Forecast ({growth_scenario} Growth)", | |
| xaxis_title="Month", | |
| yaxis_title="Value", | |
| legend=dict(orientation="h", y=1.1), | |
| height=500 | |
| ) | |
| return fig | |
| except Exception as e: | |
| print(f"Error in chart creation: {str(e)}") | |
| # Fallback to an even simpler chart | |
| fig = go.Figure(data=go.Scatter(x=[1, 2, 3], y=[4, 1, 2])) | |
| fig.update_layout(title="Fallback Chart (Error occurred)") | |
| return fig | |
| def create_ranking_history_chart(keyword_history): | |
| """Create a chart showing keyword ranking history over time""" | |
| try: | |
| if not keyword_history or len(keyword_history) < 2: | |
| # Not enough data for a meaningful chart | |
| fig = go.Figure() | |
| fig.update_layout( | |
| title="Insufficient Ranking Data", | |
| annotations=[{ | |
| "text": "Need at least 2 data points for ranking history", | |
| "showarrow": False, | |
| "font": {"size": 16}, | |
| "xref": "paper", | |
| "yref": "paper", | |
| "x": 0.5, | |
| "y": 0.5 | |
| }] | |
| ) | |
| return fig | |
| # Create a figure | |
| fig = go.Figure() | |
| # Extract timestamps and convert to datetime objects | |
| timestamps = [entry["timestamp"] for entry in keyword_history] | |
| dates = [datetime.datetime.strptime(ts, "%Y-%m-%d %H:%M:%S") for ts in timestamps] | |
| # Get unique domains from all results | |
| all_domains = set() | |
| for entry in keyword_history: | |
| for result in entry["results"]: | |
| all_domains.add(result["domain"]) | |
| # Colors for different domains | |
| domain_colors = {} | |
| color_palette = [ | |
| "#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd", | |
| "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf" | |
| ] | |
| for i, domain in enumerate(all_domains): | |
| domain_colors[domain] = color_palette[i % len(color_palette)] | |
| # Track domains and their positions over time | |
| domain_tracking = {domain: {"x": [], "y": [], "text": []} for domain in all_domains} | |
| for i, entry in enumerate(keyword_history): | |
| for result in entry["results"]: | |
| domain = result["domain"] | |
| position = result["position"] | |
| title = result["title"] | |
| domain_tracking[domain]["x"].append(dates[i]) | |
| domain_tracking[domain]["y"].append(position) | |
| domain_tracking[domain]["text"].append(title) | |
| # Add traces for each domain | |
| for domain, data in domain_tracking.items(): | |
| if len(data["x"]) > 0: # Only add domains that have data | |
| fig.add_trace( | |
| go.Scatter( | |
| x=data["x"], | |
| y=data["y"], | |
| mode="lines+markers", | |
| name=domain, | |
| line=dict(color=domain_colors[domain]), | |
| hovertemplate="%{text}<br>Position: %{y}<br>Date: %{x}<extra></extra>", | |
| text=data["text"], | |
| marker=dict(size=8) | |
| ) | |
| ) | |
| # Update layout | |
| fig.update_layout( | |
| title="Keyword Ranking History", | |
| xaxis_title="Date", | |
| yaxis_title="Position", | |
| yaxis=dict(autorange="reversed"), # Invert y-axis so position 1 is on top | |
| hovermode="closest", | |
| height=500 | |
| ) | |
| return fig | |
| except Exception as e: | |
| print(f"Error in create_ranking_history_chart: {str(e)}") | |
| # Return fallback chart | |
| fig = go.Figure() | |
| fig.update_layout( | |
| title="Error Creating Ranking Chart", | |
| annotations=[{ | |
| "text": f"Error: {str(e)}", | |
| "showarrow": False, | |
| "font": {"size": 14}, | |
| "xref": "paper", | |
| "yref": "paper", | |
| "x": 0.5, | |
| "y": 0.5 | |
| }] | |
| ) | |
| return fig | |
| def generate_serp_html(keyword, serp_results): | |
| """Generate HTML for SERP results""" | |
| if not serp_results: | |
| return "<div>No SERP results available</div>" | |
| html = f""" | |
| <div style="font-family: Arial, sans-serif; padding: 20px; border: 1px solid #ddd; border-radius: 8px;"> | |
| <h2 style="margin-top: 0;">SERP Results for "{keyword}"</h2> | |
| <div style="background-color: #f5f5f5; padding: 10px; border-radius: 4px; margin-bottom: 20px;"> | |
| <div style="color: #666; font-size: 12px;">This is a simulated SERP. In a real application, this would use the Google API.</div> | |
| </div> | |
| <div class="serp-results" style="display: flex; flex-direction: column; gap: 16px;"> | |
| """ | |
| for result in serp_results: | |
| position = result["position"] | |
| title = result["title"] | |
| url = result["url"] | |
| snippet = result["snippet"] | |
| domain = result["domain"] | |
| ctr = result["ctr_estimate"] | |
| impressions = result["impressions_estimate"] | |
| html += f""" | |
| <div class="serp-result" style="padding: 15px; border: 1px solid #e2e8f0; border-radius: 6px; position: relative;"> | |
| <div style="position: absolute; top: -10px; left: -10px; background-color: #4299e1; color: white; width: 24px; height: 24px; border-radius: 50%; display: flex; align-items: center; justify-content: center; font-size: 12px;"> | |
| {position} | |
| </div> | |
| <div style="margin-bottom: 5px;"> | |
| <a href="#" style="font-size: 18px; color: #1a73e8; text-decoration: none; font-weight: 500;">{title}</a> | |
| </div> | |
| <div style="margin-bottom: 8px; color: #006621; font-size: 14px;">{url}</div> | |
| <div style="color: #4d5156; font-size: 14px;">{snippet}</div> | |
| <div style="display: flex; margin-top: 10px; font-size: 12px; color: #666;"> | |
| <div style="margin-right: 15px;"><span style="font-weight: 500;">CTR:</span> {ctr:.2%}</div> | |
| <div><span style="font-weight: 500;">Est. Impressions:</span> {impressions:,}</div> | |
| </div> | |
| </div> | |
| """ | |
| html += """ | |
| </div> | |
| </div> | |
| """ | |
| return html | |
| def generate_token_visualization_html(token_analysis, full_analysis): | |
| """Generate HTML for token visualization""" | |
| html = """ | |
| <div style="font-family: Arial, sans-serif; padding: 20px; border: 1px solid #ddd; border-radius: 8px;"> | |
| <h2 style="margin-top: 0;">Token Visualization</h2> | |
| <div style="margin-bottom: 20px; padding: 15px; background-color: #f8f9fa; border-radius: 6px;"> | |
| <div style="margin-bottom: 8px; font-weight: bold; color: #4a5568;">Human View:</div> | |
| <div style="display: flex; flex-wrap: wrap; gap: 8px;"> | |
| """ | |
| # Add human view tokens | |
| for token in token_analysis: | |
| html += f""" | |
| <div style="padding: 6px 12px; background-color: white; border: 1px solid #cbd5e0; border-radius: 4px;"> | |
| {token['text']} | |
| </div> | |
| """ | |
| html += """ | |
| </div> | |
| </div> | |
| <div style="text-align: center; margin: 15px 0;"> | |
| <span style="font-size: 20px;">↓</span> | |
| </div> | |
| <div style="padding: 15px; background-color: #f0fff4; border-radius: 6px;"> | |
| <div style="margin-bottom: 8px; font-weight: bold; color: #2f855a;">Machine View:</div> | |
| <div style="display: flex; flex-wrap: wrap; gap: 8px;"> | |
| """ | |
| # Add machine view tokens | |
| for token in full_analysis: | |
| bg_color = get_token_colors(token["type"]) | |
| html += f""" | |
| <div style="padding: 6px 12px; background-color: {bg_color}; border: 1px solid #a0aec0; border-radius: 4px; font-family: monospace;"> | |
| {token['token']} | |
| <span style="font-size: 10px; opacity: 0.7; display: block;">{token['type']}</span> | |
| </div> | |
| """ | |
| html += """ | |
| </div> | |
| </div> | |
| <div style="margin-top: 20px; display: grid; grid-template-columns: repeat(3, 1fr); gap: 10px; text-align: center;"> | |
| """ | |
| # Add stats | |
| word_count = len(token_analysis) | |
| token_count = len(full_analysis) | |
| ratio = round(token_count / max(1, word_count), 2) | |
| html += f""" | |
| <div style="background-color: #ebf8ff; padding: 10px; border-radius: 6px;"> | |
| <div style="font-size: 24px; font-weight: bold; color: #3182ce;">{word_count}</div> | |
| <div style="font-size: 14px; color: #4299e1;">Words</div> | |
| </div> | |
| <div style="background-color: #f0fff4; padding: 10px; border-radius: 6px;"> | |
| <div style="font-size: 24px; font-weight: bold; color: #38a169;">{token_count}</div> | |
| <div style="font-size: 14px; color: #48bb78;">Tokens</div> | |
| </div> | |
| <div style="background-color: #faf5ff; padding: 10px; border-radius: 6px;"> | |
| <div style="font-size: 24px; font-weight: bold; color: #805ad5;">{ratio}</div> | |
| <div style="font-size: 14px; color: #9f7aea;">Tokens per Word</div> | |
| </div> | |
| """ | |
| html += """ | |
| </div> | |
| </div> | |
| """ | |
| return html | |
| def generate_full_analysis_html(keyword, token_analysis, intent_analysis, evolution_potential, trends): | |
| """Generate HTML for full keyword analysis""" | |
| html = f""" | |
| <div style="font-family: Arial, sans-serif; padding: 20px; border: 1px solid #ddd; border-radius: 8px;"> | |
| <h2 style="margin-top: 0;">Keyword DNA Analysis for: {keyword}</h2> | |
| <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 15px; margin-bottom: 20px;"> | |
| <div style="padding: 15px; border: 1px solid #e2e8f0; border-radius: 6px;"> | |
| <h3 style="margin-top: 0; font-size: 16px;">Intent Gene</h3> | |
| <div style="display: flex; justify-content: space-between; margin-bottom: 10px;"> | |
| <span>Type:</span> | |
| <span>{intent_analysis['type']}</span> | |
| </div> | |
| <div style="display: flex; justify-content: space-between; align-items: center;"> | |
| <span>Strength:</span> | |
| <div style="width: 120px; height: 8px; background-color: #edf2f7; border-radius: 4px; overflow: hidden;"> | |
| <div style="height: 100%; background-color: #48bb78; width: {intent_analysis['strength']}%;"></div> | |
| </div> | |
| </div> | |
| </div> | |
| <div style="padding: 15px; border: 1px solid #e2e8f0; border-radius: 6px;"> | |
| <h3 style="margin-top: 0; font-size: 16px;">Evolution Potential</h3> | |
| <div style="display: flex; justify-content: center; align-items: center; height: 100px;"> | |
| <div style="position: relative; width: 100px; height: 100px;"> | |
| <div style="position: absolute; inset: 0; display: flex; align-items: center; justify-content: center;"> | |
| <span style="font-size: 24px; font-weight: bold;">{evolution_potential}</span> | |
| </div> | |
| <svg width="100" height="100" viewBox="0 0 36 36"> | |
| <path | |
| d="M18 2.0845 a 15.9155 15.9155 0 0 1 0 31.831 a 15.9155 15.9155 0 0 1 0 -31.831" | |
| fill="none" | |
| stroke="#4CAF50" | |
| stroke-width="3" | |
| stroke-dasharray="{evolution_potential}, 100" | |
| /> | |
| </svg> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| <div style="padding: 15px; border: 1px solid #e2e8f0; border-radius: 6px; margin-bottom: 20px;"> | |
| <h3 style="margin-top: 0; font-size: 16px;">Future Mutations</h3> | |
| <div style="display: flex; flex-direction: column; gap: 8px;"> | |
| """ | |
| # Add trends | |
| for trend in trends: | |
| html += f""" | |
| <div style="display: flex; align-items: center; gap: 8px;"> | |
| <span style="color: #48bb78;">↗</span> | |
| <span>{trend}</span> | |
| </div> | |
| """ | |
| html += """ | |
| </div> | |
| </div> | |
| <h3 style="margin-bottom: 10px;">Token Details & Historical Analysis</h3> | |
| """ | |
| # Add token details | |
| for token in token_analysis: | |
| html += f""" | |
| <div style="padding: 15px; border: 1px solid #e2e8f0; border-radius: 6px; margin-bottom: 15px;"> | |
| <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 10px;"> | |
| <div style="display: flex; align-items: center; gap: 8px;"> | |
| <span style="font-size: 18px; font-weight: medium;">{token['token']}</span> | |
| <span style="padding: 2px 8px; background-color: #edf2f7; border-radius: 4px; font-size: 12px;">{token['posTag']}</span> | |
| """ | |
| if token['entityType']: | |
| html += f""" | |
| <span style="padding: 2px 8px; background-color: #ebf8ff; color: #3182ce; border-radius: 4px; font-size: 12px; display: flex; align-items: center;"> | |
| ⓘ {token['entityType']} | |
| </span> | |
| """ | |
| html += f""" | |
| </div> | |
| <div style="display: flex; align-items: center; gap: 4px;"> | |
| <span style="font-size: 12px; color: #718096;">Importance:</span> | |
| <div style="width: 64px; height: 8px; background-color: #edf2f7; border-radius: 4px; overflow: hidden;"> | |
| <div style="height: 100%; background-color: #4299e1; width: {token['importance']}%;"></div> | |
| </div> | |
| </div> | |
| </div> | |
| <div style="margin-top: 15px;"> | |
| <div style="font-size: 12px; color: #718096; margin-bottom: 4px;">Historical Relevance:</div> | |
| <div style="border: 1px solid #e2e8f0; border-radius: 4px; padding: 10px; background-color: #f7fafc;"> | |
| <div style="font-size: 12px; margin-bottom: 8px;"> | |
| <span style="font-weight: 500;">Origin: </span> | |
| <span>{token['origin']['era']}, </span> | |
| <span style="font-style: italic;">{token['origin']['language']}</span> | |
| </div> | |
| <div style="font-size: 12px; margin-bottom: 12px;">{token['origin']['note']}</div> | |
| <div style="display: flex; align-items: flex-end; height: 50px; gap: 4px; margin-top: 8px;"> | |
| """ | |
| # Add historical data bars | |
| for period, value in token['historicalData']: | |
| opacity = 0.3 + (token['historicalData'].index((period, value)) * 0.1) | |
| html += f""" | |
| <div style="display: flex; flex-direction: column; align-items: center; flex: 1;"> | |
| <div style="width: 100%; background-color: rgba(66, 153, 225, {opacity}); border-radius: 2px 2px 0 0; height: {max(4, value)}%;"></div> | |
| <div style="font-size: 9px; margin-top: 4px; color: #718096; transform: rotate(45deg); transform-origin: top left; white-space: nowrap;"> | |
| {period} | |
| </div> | |
| </div> | |
| """ | |
| html += """ | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| """ | |
| html += """ | |
| </div> | |
| """ | |
| return html | |
| def analyze_keyword(keyword, forecast_months=6, growth_scenario="Moderate", get_serp=False, progress=gr.Progress()): | |
| """Main function to analyze a keyword""" | |
| if not keyword or not keyword.strip(): | |
| return ( | |
| "<div>Please enter a keyword to analyze</div>", | |
| "<div>Please enter a keyword to analyze</div>", | |
| None, | |
| None, | |
| None, | |
| None, | |
| None | |
| ) | |
| progress(0.1, desc="Starting analysis...") | |
| # Load models if not already loaded | |
| model_status = load_models(progress) | |
| if isinstance(model_status, str) and model_status.startswith("Error"): | |
| return ( | |
| f"<div style='color:red;'>{model_status}</div>", | |
| f"<div style='color:red;'>{model_status}</div>", | |
| None, | |
| None, | |
| None, | |
| None, | |
| None | |
| ) | |
| try: | |
| # Basic tokenization - just split on spaces for simplicity | |
| words = keyword.strip().lower().split() | |
| progress(0.2, desc="Analyzing tokens...") | |
| # Get token types | |
| token_analysis = analyze_token_types(words) | |
| progress(0.3, desc="Running NER...") | |
| # Get NER tags - handle potential errors | |
| try: | |
| ner_results = ner_pipeline(keyword) | |
| except Exception as e: | |
| print(f"NER error: {str(e)}") | |
| ner_results = [] | |
| progress(0.4, desc="Running POS tagging...") | |
| # Get POS tags - handle potential errors | |
| try: | |
| pos_results = pos_pipeline(keyword) | |
| except Exception as e: | |
| print(f"POS error: {str(e)}") | |
| pos_results = [] | |
| # Process and organize results | |
| full_token_analysis = [] | |
| for token in token_analysis: | |
| # Find POS tag for this token | |
| pos_tag = "NOUN" # Default | |
| for pos_result in pos_results: | |
| if pos_result["word"].lower() == token["text"]: | |
| pos_tag = pos_result["entity"] | |
| break | |
| # Find entity type if any | |
| entity_type = None | |
| for ner_result in ner_results: | |
| if ner_result["word"].lower() == token["text"]: | |
| entity_type = ner_result["entity"] | |
| break | |
| # Generate historical data | |
| historical_data = simulate_historical_data(token["text"]) | |
| # Generate origin data | |
| origin = generate_origin_data(token["text"]) | |
| # Calculate importance (simplified algorithm) | |
| importance = 60 + (len(token["text"]) * 2) | |
| importance = min(95, importance) | |
| # Generate more meaningful related terms using semantic similarity | |
| if semantic_model is not None: | |
| try: | |
| # Generate some potential related terms | |
| prefix_related = [f"about {token['text']}", f"what is {token['text']}", f"how to {token['text']}"] | |
| synonym_candidates = ["similar", "equivalent", "comparable", "like", "related", "alternative"] | |
| domain_terms = ["software", "marketing", "business", "science", "education", "technology"] | |
| comparison_terms = prefix_related + synonym_candidates + domain_terms | |
| # Get similarities | |
| similarities = get_semantic_similarity(token['text'], comparison_terms) | |
| # Use top 3 most similar terms | |
| related_terms = [term for term, score in similarities[:3]] | |
| except Exception as e: | |
| print(f"Error generating semantic related terms: {str(e)}") | |
| related_terms = [f"{token['text']}-related-1", f"{token['text']}-related-2"] | |
| else: | |
| # Fallback if semantic model isn't loaded | |
| related_terms = [f"{token['text']}-related-1", f"{token['text']}-related-2"] | |
| full_token_analysis.append({ | |
| "token": token["text"], | |
| "type": token["type"], | |
| "posTag": pos_tag, | |
| "entityType": entity_type, | |
| "importance": importance, | |
| "historicalData": historical_data, | |
| "origin": origin, | |
| "relatedTerms": related_terms | |
| }) | |
| progress(0.5, desc="Analyzing intent...") | |
| # Intent analysis - handle potential errors | |
| try: | |
| intent_result = intent_classifier( | |
| keyword, | |
| candidate_labels=["informational", "navigational", "transactional"] | |
| ) | |
| intent_analysis = { | |
| "type": intent_result["labels"][0].capitalize(), | |
| "strength": round(intent_result["scores"][0] * 100), | |
| "mutations": [ | |
| f"{intent_result['labels'][0]}-variation-1", | |
| f"{intent_result['labels'][0]}-variation-2" | |
| ] | |
| } | |
| except Exception as e: | |
| print(f"Intent classification error: {str(e)}") | |
| intent_analysis = { | |
| "type": "Informational", # Default fallback | |
| "strength": 70, | |
| "mutations": ["fallback-variation-1", "fallback-variation-2"] | |
| } | |
| # Evolution potential (simplified calculation) | |
| evolution_potential = min(95, 65 + (len(keyword) % 30)) | |
| # Predicted trends (simplified) | |
| trends = [ | |
| "Voice search adaptation", | |
| "Visual search integration" | |
| ] | |
| # Generate more realistic and keyword-specific evolution data | |
| base_volume = 1000 + (len(keyword) * 100) | |
| # Adjust growth factor based on scenario | |
| if growth_scenario == "Conservative": | |
| growth_factor = 1.05 + (0.02 * (sum(ord(c) for c in keyword) % 5)) | |
| elif growth_scenario == "Aggressive": | |
| growth_factor = 1.15 + (0.05 * (sum(ord(c) for c in keyword) % 5)) | |
| else: # Moderate | |
| growth_factor = 1.1 + (0.03 * (sum(ord(c) for c in keyword) % 5)) | |
| evolution_data = [] | |
| months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][:int(forecast_months)] | |
| current_volume = base_volume | |
| for month in months: | |
| # Add some randomness to make it look more realistic | |
| np.random.seed(sum(ord(c) for c in month + keyword)) | |
| random_factor = 0.9 + (0.2 * np.random.random()) | |
| current_volume *= growth_factor * random_factor | |
| evolution_data.append({ | |
| "month": month, | |
| "searchVolume": int(current_volume), | |
| "competitionScore": min(95, 45 + (months.index(month) * 3) + (sum(ord(c) for c in keyword) % 10)), | |
| "intentClarity": min(95, 80 + (months.index(month) * 2) + (sum(ord(c) for c in keyword) % 5)) | |
| }) | |
| progress(0.6, desc="Creating visualizations...") | |
| # Create interactive evolution chart | |
| evolution_chart = create_evolution_chart(evolution_data, forecast_months, growth_scenario) | |
| # SERP results and ranking history (new feature) | |
| serp_results = None | |
| ranking_chart = None | |
| serp_html = None | |
| if get_serp: | |
| progress(0.7, desc="Fetching SERP data...") | |
| # Get SERP results | |
| serp_results = simulate_google_serp(keyword) | |
| # Update ranking history | |
| update_ranking_history(keyword, serp_results) | |
| progress(0.8, desc="Creating ranking charts...") | |
| # Create ranking history chart | |
| if keyword in ranking_history and len(ranking_history[keyword]) > 0: | |
| ranking_chart = create_ranking_history_chart(ranking_history[keyword]) | |
| # Generate SERP HTML | |
| serp_html = generate_serp_html(keyword, serp_results) | |
| # Generate HTML for token visualization | |
| token_viz_html = generate_token_visualization_html(token_analysis, full_token_analysis) | |
| # Generate HTML for full analysis | |
| analysis_html = generate_full_analysis_html( | |
| keyword, | |
| full_token_analysis, | |
| intent_analysis, | |
| evolution_potential, | |
| trends | |
| ) | |
| # Generate JSON results | |
| json_results = { | |
| "keyword": keyword, | |
| "tokenAnalysis": full_token_analysis, | |
| "intentAnalysis": intent_analysis, | |
| "evolutionPotential": evolution_potential, | |
| "predictedTrends": trends, | |
| "forecast": { | |
| "months": forecast_months, | |
| "scenario": growth_scenario, | |
| "data": evolution_data | |
| }, | |
| "serpResults": serp_results | |
| } | |
| progress(1.0, desc="Analysis complete!") | |
| return token_viz_html, analysis_html, json_results, evolution_chart, serp_html, ranking_chart, keyword | |
| except Exception as e: | |
| error_message = f"<div style='color:red;padding:20px;'>Error analyzing keyword: {str(e)}</div>" | |
| print(f"Error in analyze_keyword: {str(e)}") | |
| return error_message, error_message, None, None, None, None, None | |
| # Create the Gradio interface | |
| with gr.Blocks(css="footer {visibility: hidden}") as demo: | |
| gr.Markdown("# Keyword DNA Analyzer") | |
| gr.Markdown("Analyze the linguistic DNA of your keywords to understand their structure, intent, and potential.") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| # Add voice search capabilities | |
| with gr.Group(): | |
| gr.Markdown("### Enter Keyword") | |
| with gr.Row(): | |
| input_text = gr.Textbox(label="Enter keyword to analyze", placeholder="e.g. artificial intelligence") | |
| with gr.Row(): | |
| audio_input = gr.Audio(type="filepath", label="Or use voice search") | |
| voice_submit_btn = gr.Button("Convert Voice to Text", variant="secondary") | |
| # Add SERP settings | |
| with gr.Accordion("Analysis Settings", open=False): | |
| with gr.Row(): | |
| forecast_months = gr.Slider(minimum=3, maximum=12, value=6, step=1, label="Forecast Months") | |
| include_serp = gr.Checkbox(label="Include SERP Analysis", value=True) | |
| growth_scenario = gr.Radio( | |
| ["Conservative", "Moderate", "Aggressive"], | |
| value="Moderate", | |
| label="Growth Scenario" | |
| ) | |
| # Add loading indicator | |
| status_html = gr.HTML('<div style="color:gray;text-align:center;">Enter a keyword and click "Analyze DNA"</div>') | |
| analyze_btn = gr.Button("Analyze DNA", variant="primary") | |
| with gr.Row(): | |
| example_btns = [] | |
| for example in ["preprocessing", "breakdown", "artificial intelligence", "transformer model", "machine learning"]: | |
| example_btns.append(gr.Button(example)) | |
| with gr.Column(scale=2): | |
| with gr.Tabs(): | |
| with gr.Tab("Token Visualization"): | |
| token_viz_html = gr.HTML() | |
| with gr.Tab("Full Analysis"): | |
| analysis_html = gr.HTML() | |
| with gr.Tab("Evolution Chart"): | |
| evolution_chart = gr.Plot(label="Keyword Evolution Forecast") | |
| with gr.Tab("SERP Results"): | |
| serp_html = gr.HTML() | |
| with gr.Tab("Ranking History"): | |
| ranking_chart = gr.Plot(label="Keyword Ranking History") | |
| with gr.Tab("Raw Data"): | |
| json_output = gr.JSON() | |
| # Voice to text conversion handler | |
| voice_submit_btn.click( | |
| handle_voice_input, | |
| inputs=[audio_input], | |
| outputs=[input_text] | |
| ) | |
| # Set up event handlers | |
| analyze_btn.click( | |
| lambda: '<div style="color:blue;text-align:center;">Loading models and analyzing... This may take a moment.</div>', | |
| outputs=status_html | |
| ).then( | |
| analyze_keyword, | |
| inputs=[input_text, forecast_months, growth_scenario, include_serp], | |
| outputs=[token_viz_html, analysis_html, json_output, evolution_chart, serp_html, ranking_chart, input_text] | |
| ).then( | |
| lambda: '<div style="color:green;text-align:center;">Analysis complete!</div>', | |
| outputs=status_html | |
| ) | |
| # Example buttons | |
| for btn in example_btns: | |
| # Define the function that will be called when an example button is clicked | |
| def set_example(btn_label): | |
| return btn_label | |
| btn.click( | |
| set_example, | |
| inputs=[btn], | |
| outputs=[input_text] | |
| ).then( | |
| lambda: '<div style="color:blue;text-align:center;">Loading models and analyzing... This may take a moment.</div>', | |
| outputs=status_html | |
| ).then( | |
| analyze_keyword, | |
| inputs=[input_text, forecast_months, growth_scenario, include_serp], | |
| outputs=[token_viz_html, analysis_html, json_output, evolution_chart, serp_html, ranking_chart, input_text] | |
| ).then( | |
| lambda: '<div style="color:green;text-align:center;">Analysis complete!</div>', | |
| outputs=status_html | |
| ) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| demo.launch() |