Spaces:
Running
Running
File size: 2,108 Bytes
43216af dc4c2ba 43216af 1f37f4b 43216af |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import streamlit as st
import requests
from transformers import pipeline
import plotly.express as px
import pandas as pd
from collections import Counter
import re
def get_markdown_from_github(url):
response = requests.get(url)
markdown = response.text
return markdown
def preprocess_text(text):
text = text.lower()
text = re.sub('[^A-Za-z0-9]+', ' ', text)
return text
def get_most_frequent_words(text, n):
words = re.findall(r'\b\w{5,}\b', text)
word_count = Counter(words)
most_common_words = word_count.most_common(n)
return most_common_words
def get_sentences_with_common_words(text, common_words):
sentences = re.split('[.?!]', text)
selected_sentences = []
for sentence in sentences:
for word in common_words:
if word in sentence:
selected_sentences.append(sentence.strip())
break
return selected_sentences
def render_heatmap(words, sentences):
df = pd.DataFrame(words, columns=['word', 'frequency'])
fig = px.treemap(df, path=['word'], values='frequency', color='frequency', hover_data=['frequency'], color_continuous_scale='reds')
st.plotly_chart(fig, use_container_width=True)
def main():
st.title('Markdown Analyzer')
# Get markdown from GitHub
default_markdown_url = 'https://github.com/AaronCWacker/Yggdrasil/blob/main/README.md'
markdown_url = st.sidebar.text_input("Enter a URL to analyze (default is provided):", default_markdown_url)
markdown = get_markdown_from_github(markdown_url)
# Preprocess text
text = preprocess_text(markdown)
# Get most frequent words
n_most_frequent_words = st.sidebar.slider('Number of most frequent words to display', 1, 20, 10)
most_frequent_words = get_most_frequent_words(text, n_most_frequent_words)
# Get sentences containing common words
common_words = [word for word, _ in most_frequent_words]
sentences = get_sentences_with_common_words(text, common_words)
# Render heatmap
render_heatmap(most_frequent_words, sentences)
if __name__ == '__main__':
main()
|