Keywords / app.py
abdulllah01's picture
Update app.py
0f5c3da verified
import streamlit as st
import pandas as pd
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
import string
from nltk.corpus import stopwords
import nltk
import io
nltk.download('stopwords')
# Helper functions
def clean_text(text):
text = text.lower()
text = text.translate(str.maketrans('', '', string.punctuation))
return text
def clean_text_fuzzy(text):
stop_words = set(stopwords.words('english'))
text = text.lower()
text = text.translate(str.maketrans('', '', string.punctuation))
words = text.split()
words = [word for word in words if word not in stop_words]
return " ".join(words)
def process_matching(keywords, article, fuzzy, mode):
keywords = [clean_text(k) for k in keywords]
article_exact = clean_text(article)
article_fuzzy = clean_text_fuzzy(article)
results = {}
max_keyword_length = max(len(k.split()) for k in keywords)
for keyword in keywords:
if keyword not in results:
results[keyword] = 0
if fuzzy:
for n in range(1, max_keyword_length + 1):
n_grams_fuzzy = [" ".join(article_fuzzy.split()[i:i + n]) for i in range(len(article_fuzzy.split()) - n + 1)]
matches = process.extract(keyword, n_grams_fuzzy, scorer=fuzz.partial_ratio, limit=None)
results[keyword] += sum(1 for match, score in matches if score > 90)
else:
for n in range(1, max_keyword_length + 1):
n_grams_exact = [" ".join(article_exact.split()[i:i + n]) for i in range(len(article_exact.split()) - n + 1)]
results[keyword] += n_grams_exact.count(keyword)
if mode == "filter":
results = {k: v for k, v in results.items() if v > 0}
total_count = sum(results.values())
return results, total_count
# Streamlit app
st.title("Keyword Matcher")
# Mode selection
mode = st.radio("Select Mode:", ["Keyword Frequency", "Keyword Filter"], horizontal=True)
mode = "frequency" if mode == "Keyword Frequency" else "filter"
# Keyword input
st.subheader("Keywords")
keywords_input = st.text_area("Enter keywords (comma separated):")
uploaded_file = st.file_uploader("Or upload a CSV/Excel file with keywords (first column):", type=["csv", "xlsx"])
keywords = []
if uploaded_file:
if uploaded_file.name.endswith(".csv"):
df = pd.read_csv(uploaded_file)
else:
df = pd.read_excel(uploaded_file)
if not df.empty:
keywords = df.iloc[:, 0].dropna().tolist()
else:
keywords = [k.strip() for k in keywords_input.split(",") if k.strip()]
# Article input
st.subheader("Article")
article = st.text_area("Paste the article text here:")
# Fuzzy matching checkbox
fuzzy = st.checkbox("Enable Fuzzy Matching")
# Process button
if st.button("Process"):
if not keywords:
st.error("Please provide keywords.")
elif not article:
st.error("Please provide an article.")
else:
results, total_count = process_matching(keywords, article, fuzzy, mode)
st.subheader("Results")
for keyword, count in results.items():
st.write(f"{keyword}: {count}")
st.write(f"**Total Count:** {total_count}")
# Save to Excel
st.subheader("Download Results")
df = pd.DataFrame(list(results.items()), columns=["Keyword", "Count"])
output = io.BytesIO()
with pd.ExcelWriter(output, engine='openpyxl') as writer:
df.to_excel(writer, index=False, sheet_name="Results")
output.seek(0)
st.download_button(
label="Download Results as Excel",
data=output,
file_name="results.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
)