Spaces:

abdulllah01
/

Keywords

Running

App Files Files Community

Keywords / app.py

abdulllah01

Update app.py

0f5c3da verified 8 months ago

raw

history blame contribute delete

3.76 kB

	import streamlit as st
	import pandas as pd
	from fuzzywuzzy import fuzz
	from fuzzywuzzy import process
	import string
	from nltk.corpus import stopwords
	import nltk
	import io

	nltk.download('stopwords')

	# Helper functions
	def clean_text(text):
	text = text.lower()
	text = text.translate(str.maketrans('', '', string.punctuation))
	return text

	def clean_text_fuzzy(text):
	stop_words = set(stopwords.words('english'))
	text = text.lower()
	text = text.translate(str.maketrans('', '', string.punctuation))
	words = text.split()
	words = [word for word in words if word not in stop_words]
	return " ".join(words)

	def process_matching(keywords, article, fuzzy, mode):
	keywords = [clean_text(k) for k in keywords]

	article_exact = clean_text(article)
	article_fuzzy = clean_text_fuzzy(article)

	results = {}
	max_keyword_length = max(len(k.split()) for k in keywords)

	for keyword in keywords:
	if keyword not in results:
	results[keyword] = 0

	if fuzzy:
	for n in range(1, max_keyword_length + 1):
	n_grams_fuzzy = [" ".join(article_fuzzy.split()[i:i + n]) for i in range(len(article_fuzzy.split()) - n + 1)]
	matches = process.extract(keyword, n_grams_fuzzy, scorer=fuzz.partial_ratio, limit=None)
	results[keyword] += sum(1 for match, score in matches if score > 90)
	else:
	for n in range(1, max_keyword_length + 1):
	n_grams_exact = [" ".join(article_exact.split()[i:i + n]) for i in range(len(article_exact.split()) - n + 1)]
	results[keyword] += n_grams_exact.count(keyword)

	if mode == "filter":
	results = {k: v for k, v in results.items() if v > 0}

	total_count = sum(results.values())
	return results, total_count

	# Streamlit app
	st.title("Keyword Matcher")

	# Mode selection
	mode = st.radio("Select Mode:", ["Keyword Frequency", "Keyword Filter"], horizontal=True)
	mode = "frequency" if mode == "Keyword Frequency" else "filter"

	# Keyword input
	st.subheader("Keywords")
	keywords_input = st.text_area("Enter keywords (comma separated):")
	uploaded_file = st.file_uploader("Or upload a CSV/Excel file with keywords (first column):", type=["csv", "xlsx"])

	keywords = []
	if uploaded_file:
	if uploaded_file.name.endswith(".csv"):
	df = pd.read_csv(uploaded_file)
	else:
	df = pd.read_excel(uploaded_file)
	if not df.empty:
	keywords = df.iloc[:, 0].dropna().tolist()
	else:
	keywords = [k.strip() for k in keywords_input.split(",") if k.strip()]

	# Article input
	st.subheader("Article")
	article = st.text_area("Paste the article text here:")

	# Fuzzy matching checkbox
	fuzzy = st.checkbox("Enable Fuzzy Matching")

	# Process button
	if st.button("Process"):
	if not keywords:
	st.error("Please provide keywords.")
	elif not article:
	st.error("Please provide an article.")
	else:
	results, total_count = process_matching(keywords, article, fuzzy, mode)

	st.subheader("Results")
	for keyword, count in results.items():
	st.write(f"{keyword}: {count}")
	st.write(f"Total Count: {total_count}")

	# Save to Excel
	st.subheader("Download Results")
	df = pd.DataFrame(list(results.items()), columns=["Keyword", "Count"])
	output = io.BytesIO()
	with pd.ExcelWriter(output, engine='openpyxl') as writer:
	df.to_excel(writer, index=False, sheet_name="Results")
	output.seek(0)

	st.download_button(
	label="Download Results as Excel",
	data=output,
	file_name="results.xlsx",
	mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
	)