Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import requests | |
| from transformers import pipeline | |
| import plotly.express as px | |
| import pandas as pd | |
| from collections import Counter | |
| import re | |
| def get_markdown_from_github(url): | |
| response = requests.get(url) | |
| markdown = response.text | |
| return markdown | |
| def preprocess_text(text): | |
| text = text.lower() | |
| text = re.sub('[^A-Za-z0-9]+', ' ', text) | |
| return text | |
| def get_most_frequent_words(text, n): | |
| words = re.findall(r'\b\w{5,}\b', text) | |
| word_count = Counter(words) | |
| most_common_words = word_count.most_common(n) | |
| return most_common_words | |
| def get_sentences_with_common_words(text, common_words): | |
| sentences = re.split('[.?!]', text) | |
| selected_sentences = [] | |
| for sentence in sentences: | |
| for word in common_words: | |
| if word in sentence: | |
| selected_sentences.append(sentence.strip()) | |
| break | |
| return selected_sentences | |
| def render_heatmap(words, sentences): | |
| df = pd.DataFrame(words, columns=['word', 'frequency']) | |
| fig = px.treemap(df, path=['word'], values='frequency', color='frequency', hover_data=['frequency'], color_continuous_scale='reds') | |
| st.plotly_chart(fig, use_container_width=True) | |
| def main(): | |
| st.title('Markdown Analyzer') | |
| # Get markdown from GitHub | |
| default_markdown_url = 'https://github.com/AaronCWacker/Yggdrasil/blob/main/README.md' | |
| markdown_url = st.sidebar.text_input("Enter a URL to analyze (default is provided):", default_markdown_url) | |
| markdown = get_markdown_from_github(markdown_url) | |
| # Preprocess text | |
| text = preprocess_text(markdown) | |
| # Get most frequent words | |
| n_most_frequent_words = st.sidebar.slider('Number of most frequent words to display', 1, 20, 10) | |
| most_frequent_words = get_most_frequent_words(text, n_most_frequent_words) | |
| # Get sentences containing common words | |
| common_words = [word for word, _ in most_frequent_words] | |
| sentences = get_sentences_with_common_words(text, common_words) | |
| # Render heatmap | |
| render_heatmap(most_frequent_words, sentences) | |
| if __name__ == '__main__': | |
| main() | |