nneka commited on
Commit
d091a7a
1 Parent(s): bf8b87a

Upload streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +111 -0
streamlit_app.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import necessary libraries
2
+ import streamlit as st
3
+ from transformers import GPT2Tokenizer, GPT2LMHeadModel
4
+ from wordcloud import WordCloud
5
+ import matplotlib.pyplot as plt
6
+ import pandas as pd
7
+ import nltk
8
+ from nltk.corpus import stopwords
9
+ from nltk.tokenize import word_tokenize
10
+
11
+ # Download NLTK resources if not already downloaded
12
+ try:
13
+ nltk.data.find('tokenizers/punkt')
14
+ nltk.data.find('corpora/stopwords')
15
+ nltk.data.find('sentiment/vader_lexicon')
16
+ except LookupError:
17
+ nltk.download('punkt')
18
+ nltk.download('stopwords')
19
+ nltk.download('vader_lexicon')
20
+
21
+ # Load GPT-2 tokenizer and model
22
+ tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
23
+ model = GPT2LMHeadModel.from_pretrained('gpt2')
24
+
25
+ # Load and preprocess data
26
+ # Assuming df is a DataFrame containing the cleaned text
27
+ # Make sure df is defined or loaded correctly before using it in Streamlit components
28
+
29
+ txt_files = [
30
+ "Cognition in Pregnancy- Perceptions and Performance, 2005-2006 - Dataset - B2FIND.txt",
31
+ "Frontiers | Cognitive disorder and associated factors among pregnant women attending antenatal servi.txt",
32
+ "Frustrated By Brain Fog? How Pregnancy Actually Alters Yo....txt",
33
+ "Is Pregnancy Brain Real?.txt",
34
+ "Is ‘pregnancy brain’ real or just a myth? | Your Pregnancy Matters | UT Southwestern Medical Center.txt",
35
+ "Memory and affective changes during the antepartum- A narrative review and integrative hypothesis- J.txt",
36
+ "Pregnancy 'does cause memory loss' | Medical research | The Guardian.txt",
37
+ "Pregnancy Brain — Forgetfulness During Pregnancy.txt",
38
+ "Pregnancy brain- When it starts and what causes pregnancy brain fog | BabyCenter.txt",
39
+ "Pregnancy does cause memory loss, study says - CNN.txt",
40
+ "Textbook J.A. Russell, A.J. Douglas, R.J. Windle, C.D. Ingram - The Maternal Brain_ Neurobiological and Neuroendocrine Adaptation and Disorders in Pregnancy & Post Partum-Elsevier Science (2001).txt",
41
+ "The effect of pregnancy on maternal cognition - PMC.txt",
42
+ "This Is Your Brain on Motherhood - The New York Times.txt",
43
+ "Working memory from pregnancy to postpartum.txt",
44
+ "What Is Mom Brain and Is It Real?.txt",
45
+ "Memory loss in Pregnancy- Myth or Fact? - International Forum for Wellbeing in Pregnancy.txt",
46
+ "Memory and mood changes in pregnancy- a qualitative content analysis of women’s first-hand accounts.txt",
47
+ "Is Mom Brain real? Understanding and coping with postpartum brain fog.txt",
48
+ "Everyday Life Memory Deficits in Pregnant Women.txt",
49
+ "Cognitive Function Decline in the Third Trimester.txt",
50
+ "'Mommy brain' might be a good thing, new research suggests | CBC Radio.txt"
51
+ ]
52
+
53
+ data = []
54
+ for file_path in txt_files:
55
+ with open(file_path, "r") as file:
56
+ text = file.read()
57
+ data.append({"text": text})
58
+
59
+ df = pd.DataFrame(data)
60
+
61
+ # Cleaning the text
62
+ nltk.download('punkt') # Download the 'punkt' tokenizer models for tokenization
63
+ df['tokens'] = df['text'].apply(word_tokenize) # Tokenize each text in the 'text' column into a list of words
64
+
65
+ nltk.download('stopwords') # Download the stopwords corpus for English
66
+ stop_words = set(stopwords.words('english')) # Load the English stopwords into a set
67
+
68
+ df['cleaned_text'] = df['tokens'].apply(lambda x: [word.lower() for word in x if (word.isalnum() and word.lower() not in stop_words)])
69
+
70
+ df['cleaned_text'] = df['cleaned_text'].apply(lambda x: ' '.join(x))
71
+
72
+ # Streamlit app
73
+ st.title("Memory Support Chatbox for Pregnant Women")
74
+ user_input = st.text_input("You:", "Enter your message here...")
75
+ if user_input:
76
+ input_ids = tokenizer.encode(user_input, return_tensors='pt')
77
+ reply_ids = model.generate(input_ids, max_length=100, pad_token_id=tokenizer.eos_token_id)
78
+ reply_text = tokenizer.decode(reply_ids[0], skip_special_tokens=True)
79
+ st.text_area("Chatbot:", value=reply_text, height=200)
80
+
81
+ # Text Analysis
82
+ st.subheader("Text Analysis")
83
+
84
+ # Word Cloud
85
+ st.subheader("Word Cloud")
86
+ all_text = " ".join(df["cleaned_text"])
87
+ wordcloud = WordCloud(width=800, height=400, background_color="white").generate(all_text)
88
+ plt.figure(figsize=(10, 5))
89
+ plt.imshow(wordcloud, interpolation="bilinear")
90
+ plt.axis("off")
91
+ st.pyplot(plt)
92
+
93
+ # Sample Prompts
94
+ st.subheader("Sample Prompts")
95
+ sample_prompts = [
96
+ "What causes pregnancy brain fog?",
97
+ "How does pregnancy affect the brain?",
98
+ "How can I improve my memory during pregnancy?",
99
+ "Can pregnancy brain fog affect my ability to work or perform daily tasks?",
100
+ ]
101
+
102
+ for prompt in sample_prompts:
103
+ input_ids = tokenizer.encode(prompt, return_tensors='pt')
104
+ output = model.generate(input_ids, max_length=100, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id, attention_mask=input_ids.ne(tokenizer.eos_token_id))
105
+ response = tokenizer.decode(output[0], skip_special_tokens=True)
106
+ st.write(f"**Prompt:** {prompt}\n**Response:** {response}\n")
107
+
108
+ # Displaying DataFrame
109
+ st.subheader("DataFrame")
110
+ st.write(df)
111
+