Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import mailparser | |
| from email_reply_parser import EmailReplyParser | |
| from bs4 import BeautifulSoup | |
| # Function to extract the latest email message from raw email content | |
| def extract_latest_message(raw_email): | |
| try: | |
| # Parse the email using mail-parser | |
| mail = mailparser.parse_from_string(raw_email) | |
| # Debugging: Display entire mail object to inspect its content | |
| st.write("Parsed Email Object:") | |
| st.json(mail.mail_json) | |
| # Inspect text parts of the email (plain text and HTML) | |
| text_parts = mail.text_plain | |
| html_parts = mail.text_html | |
| # Debugging: Output all parts to check what's available | |
| st.write("Text Parts:", text_parts) | |
| st.write("HTML Parts:", html_parts) | |
| # Check if the email contains plain text parts | |
| if mail.text_plain: | |
| body = mail.text_plain[0] | |
| st.write("Extracted plain text body from email.") | |
| elif mail.text_html: | |
| # If no plain text is available, fall back to HTML body | |
| body = mail.text_html[0] | |
| st.write("Extracted HTML body from email. Converting to plain text...") | |
| # Use BeautifulSoup to strip HTML tags and convert to plain text | |
| body = BeautifulSoup(body, "html.parser").get_text() | |
| else: | |
| body = "No body content found in email." | |
| # Debugging: Output the cleaned-up email body before using EmailReplyParser | |
| st.write("Cleaned-up email body before parsing:") | |
| st.text_area("Parsed Body", value=body, height=200) | |
| # Use email-reply-parser to extract only the latest reply (remove quoted thread) | |
| latest_reply = EmailReplyParser.parse_reply(body) | |
| return latest_reply | |
| except Exception as e: | |
| return f"Error: {e}" | |
| # Streamlit app | |
| def main(): | |
| st.title("Email Latest Message Extractor") | |
| st.write(""" | |
| This tool extracts the latest message from a raw MIME email and removes any quoted thread or previous messages. | |
| Paste the raw email in MIME format in the text area below, and the tool will display the latest message. | |
| """) | |
| # Input field for the raw email content | |
| raw_email = st.text_area("Paste the raw MIME email content here", height=300) | |
| # Button to process the input | |
| if st.button("Extract Latest Message"): | |
| if raw_email.strip(): | |
| # Call the function to extract the latest message | |
| latest_message = extract_latest_message(raw_email) | |
| st.subheader("Extracted Latest Message:") | |
| st.text_area("Latest Message", value=latest_message, height=200) | |
| else: | |
| st.warning("Please paste the raw MIME email content.") | |
| if __name__ == "__main__": | |
| main() |