Spaces:

awacke1
/

Markdown-Analyzer

Running

App Files Files Community

Markdown-Analyzer / app.py

awacke1

Update app.py

43216af over 1 year ago

raw

history blame

No virus

2.11 kB

	import streamlit as st
	import requests
	from transformers import pipeline
	import plotly.express as px
	import pandas as pd
	from collections import Counter
	import re

	def get_markdown_from_github(url):
	response = requests.get(url)
	markdown = response.text
	return markdown

	def preprocess_text(text):
	text = text.lower()
	text = re.sub('[^A-Za-z0-9]+', ' ', text)
	return text

	def get_most_frequent_words(text, n):
	words = re.findall(r'\b\w{5,}\b', text)
	word_count = Counter(words)
	most_common_words = word_count.most_common(n)
	return most_common_words

	def get_sentences_with_common_words(text, common_words):
	sentences = re.split('[.?!]', text)
	selected_sentences = []
	for sentence in sentences:
	for word in common_words:
	if word in sentence:
	selected_sentences.append(sentence.strip())
	break
	return selected_sentences

	def render_heatmap(words, sentences):
	df = pd.DataFrame(words, columns=['word', 'frequency'])
	fig = px.treemap(df, path=['word'], values='frequency', color='frequency', hover_data=['frequency'], color_continuous_scale='reds')
	st.plotly_chart(fig, use_container_width=True)

	def main():
	st.title('Markdown Analyzer')

	# Get markdown from GitHub
	default_markdown_url = 'https://github.com/AaronCWacker/Yggdrasil/blob/main/README.md'
	markdown_url = st.sidebar.text_input("Enter a URL to analyze (default is provided):", default_markdown_url)
	markdown = get_markdown_from_github(markdown_url)

	# Preprocess text
	text = preprocess_text(markdown)

	# Get most frequent words
	n_most_frequent_words = st.sidebar.slider('Number of most frequent words to display', 1, 20, 10)
	most_frequent_words = get_most_frequent_words(text, n_most_frequent_words)

	# Get sentences containing common words
	common_words = [word for word, _ in most_frequent_words]
	sentences = get_sentences_with_common_words(text, common_words)

	# Render heatmap
	render_heatmap(most_frequent_words, sentences)

	if __name__ == '__main__':
	main()