Spaces:
Build error
Build error
Upload streamlit_app.py
Browse files- streamlit_app.py +111 -0
streamlit_app.py
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Import necessary libraries
|
2 |
+
import streamlit as st
|
3 |
+
from transformers import GPT2Tokenizer, GPT2LMHeadModel
|
4 |
+
from wordcloud import WordCloud
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
+
import pandas as pd
|
7 |
+
import nltk
|
8 |
+
from nltk.corpus import stopwords
|
9 |
+
from nltk.tokenize import word_tokenize
|
10 |
+
|
11 |
+
# Download NLTK resources if not already downloaded
|
12 |
+
try:
|
13 |
+
nltk.data.find('tokenizers/punkt')
|
14 |
+
nltk.data.find('corpora/stopwords')
|
15 |
+
nltk.data.find('sentiment/vader_lexicon')
|
16 |
+
except LookupError:
|
17 |
+
nltk.download('punkt')
|
18 |
+
nltk.download('stopwords')
|
19 |
+
nltk.download('vader_lexicon')
|
20 |
+
|
21 |
+
# Load GPT-2 tokenizer and model
|
22 |
+
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
|
23 |
+
model = GPT2LMHeadModel.from_pretrained('gpt2')
|
24 |
+
|
25 |
+
# Load and preprocess data
|
26 |
+
# Assuming df is a DataFrame containing the cleaned text
|
27 |
+
# Make sure df is defined or loaded correctly before using it in Streamlit components
|
28 |
+
|
29 |
+
txt_files = [
|
30 |
+
"Cognition in Pregnancy- Perceptions and Performance, 2005-2006 - Dataset - B2FIND.txt",
|
31 |
+
"Frontiers | Cognitive disorder and associated factors among pregnant women attending antenatal servi.txt",
|
32 |
+
"Frustrated By Brain Fog? How Pregnancy Actually Alters Yo....txt",
|
33 |
+
"Is Pregnancy Brain Real?.txt",
|
34 |
+
"Is ‘pregnancy brain’ real or just a myth? | Your Pregnancy Matters | UT Southwestern Medical Center.txt",
|
35 |
+
"Memory and affective changes during the antepartum- A narrative review and integrative hypothesis- J.txt",
|
36 |
+
"Pregnancy 'does cause memory loss' | Medical research | The Guardian.txt",
|
37 |
+
"Pregnancy Brain — Forgetfulness During Pregnancy.txt",
|
38 |
+
"Pregnancy brain- When it starts and what causes pregnancy brain fog | BabyCenter.txt",
|
39 |
+
"Pregnancy does cause memory loss, study says - CNN.txt",
|
40 |
+
"Textbook J.A. Russell, A.J. Douglas, R.J. Windle, C.D. Ingram - The Maternal Brain_ Neurobiological and Neuroendocrine Adaptation and Disorders in Pregnancy & Post Partum-Elsevier Science (2001).txt",
|
41 |
+
"The effect of pregnancy on maternal cognition - PMC.txt",
|
42 |
+
"This Is Your Brain on Motherhood - The New York Times.txt",
|
43 |
+
"Working memory from pregnancy to postpartum.txt",
|
44 |
+
"What Is Mom Brain and Is It Real?.txt",
|
45 |
+
"Memory loss in Pregnancy- Myth or Fact? - International Forum for Wellbeing in Pregnancy.txt",
|
46 |
+
"Memory and mood changes in pregnancy- a qualitative content analysis of women’s first-hand accounts.txt",
|
47 |
+
"Is Mom Brain real? Understanding and coping with postpartum brain fog.txt",
|
48 |
+
"Everyday Life Memory Deficits in Pregnant Women.txt",
|
49 |
+
"Cognitive Function Decline in the Third Trimester.txt",
|
50 |
+
"'Mommy brain' might be a good thing, new research suggests | CBC Radio.txt"
|
51 |
+
]
|
52 |
+
|
53 |
+
data = []
|
54 |
+
for file_path in txt_files:
|
55 |
+
with open(file_path, "r") as file:
|
56 |
+
text = file.read()
|
57 |
+
data.append({"text": text})
|
58 |
+
|
59 |
+
df = pd.DataFrame(data)
|
60 |
+
|
61 |
+
# Cleaning the text
|
62 |
+
nltk.download('punkt') # Download the 'punkt' tokenizer models for tokenization
|
63 |
+
df['tokens'] = df['text'].apply(word_tokenize) # Tokenize each text in the 'text' column into a list of words
|
64 |
+
|
65 |
+
nltk.download('stopwords') # Download the stopwords corpus for English
|
66 |
+
stop_words = set(stopwords.words('english')) # Load the English stopwords into a set
|
67 |
+
|
68 |
+
df['cleaned_text'] = df['tokens'].apply(lambda x: [word.lower() for word in x if (word.isalnum() and word.lower() not in stop_words)])
|
69 |
+
|
70 |
+
df['cleaned_text'] = df['cleaned_text'].apply(lambda x: ' '.join(x))
|
71 |
+
|
72 |
+
# Streamlit app
|
73 |
+
st.title("Memory Support Chatbox for Pregnant Women")
|
74 |
+
user_input = st.text_input("You:", "Enter your message here...")
|
75 |
+
if user_input:
|
76 |
+
input_ids = tokenizer.encode(user_input, return_tensors='pt')
|
77 |
+
reply_ids = model.generate(input_ids, max_length=100, pad_token_id=tokenizer.eos_token_id)
|
78 |
+
reply_text = tokenizer.decode(reply_ids[0], skip_special_tokens=True)
|
79 |
+
st.text_area("Chatbot:", value=reply_text, height=200)
|
80 |
+
|
81 |
+
# Text Analysis
|
82 |
+
st.subheader("Text Analysis")
|
83 |
+
|
84 |
+
# Word Cloud
|
85 |
+
st.subheader("Word Cloud")
|
86 |
+
all_text = " ".join(df["cleaned_text"])
|
87 |
+
wordcloud = WordCloud(width=800, height=400, background_color="white").generate(all_text)
|
88 |
+
plt.figure(figsize=(10, 5))
|
89 |
+
plt.imshow(wordcloud, interpolation="bilinear")
|
90 |
+
plt.axis("off")
|
91 |
+
st.pyplot(plt)
|
92 |
+
|
93 |
+
# Sample Prompts
|
94 |
+
st.subheader("Sample Prompts")
|
95 |
+
sample_prompts = [
|
96 |
+
"What causes pregnancy brain fog?",
|
97 |
+
"How does pregnancy affect the brain?",
|
98 |
+
"How can I improve my memory during pregnancy?",
|
99 |
+
"Can pregnancy brain fog affect my ability to work or perform daily tasks?",
|
100 |
+
]
|
101 |
+
|
102 |
+
for prompt in sample_prompts:
|
103 |
+
input_ids = tokenizer.encode(prompt, return_tensors='pt')
|
104 |
+
output = model.generate(input_ids, max_length=100, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id, attention_mask=input_ids.ne(tokenizer.eos_token_id))
|
105 |
+
response = tokenizer.decode(output[0], skip_special_tokens=True)
|
106 |
+
st.write(f"**Prompt:** {prompt}\n**Response:** {response}\n")
|
107 |
+
|
108 |
+
# Displaying DataFrame
|
109 |
+
st.subheader("DataFrame")
|
110 |
+
st.write(df)
|
111 |
+
|