Spaces:

awacke1
/

Markdown-Analyzer

Running

File size: 2,108 Bytes

43216af
 
 
 
 
 
 
dc4c2ba
43216af
 
 
 
1f37f4b
43216af

import streamlit as st
import requests
from transformers import pipeline
import plotly.express as px
import pandas as pd
from collections import Counter
import re

def get_markdown_from_github(url):
    response = requests.get(url)
    markdown = response.text
    return markdown

def preprocess_text(text):
    text = text.lower()
    text = re.sub('[^A-Za-z0-9]+', ' ', text)
    return text
    
def get_most_frequent_words(text, n):
    words = re.findall(r'\b\w{5,}\b', text)
    word_count = Counter(words)
    most_common_words = word_count.most_common(n)
    return most_common_words

def get_sentences_with_common_words(text, common_words):
    sentences = re.split('[.?!]', text)
    selected_sentences = []
    for sentence in sentences:
        for word in common_words:
            if word in sentence:
                selected_sentences.append(sentence.strip())
                break
    return selected_sentences

def render_heatmap(words, sentences):
    df = pd.DataFrame(words, columns=['word', 'frequency'])
    fig = px.treemap(df, path=['word'], values='frequency', color='frequency', hover_data=['frequency'], color_continuous_scale='reds')
    st.plotly_chart(fig, use_container_width=True)

def main():
    st.title('Markdown Analyzer')

    # Get markdown from GitHub
    default_markdown_url = 'https://github.com/AaronCWacker/Yggdrasil/blob/main/README.md'
    markdown_url = st.sidebar.text_input("Enter a URL to analyze (default is provided):", default_markdown_url)
    markdown = get_markdown_from_github(markdown_url)

    # Preprocess text
    text = preprocess_text(markdown)

    # Get most frequent words
    n_most_frequent_words = st.sidebar.slider('Number of most frequent words to display', 1, 20, 10)
    most_frequent_words = get_most_frequent_words(text, n_most_frequent_words)

    # Get sentences containing common words
    common_words = [word for word, _ in most_frequent_words]
    sentences = get_sentences_with_common_words(text, common_words)

    # Render heatmap
    render_heatmap(most_frequent_words, sentences)

if __name__ == '__main__':
    main()