Spaces:

Mohit-321
/

WhatsappchatAnalyzer

Runtime error

App Files Files Community

Mohit-321 commited on Mar 6, 2023

Commit

8c46649

•

1 Parent(s): fb406a8

Upload 5 files

Browse files

Files changed (5) hide show

app.py +209 -0
helper.py +68 -0
language.xlsx +0 -0
preprocessor.py +127 -0
stop_hinglish.txt +1055 -0

app.py ADDED Viewed

	@@ -0,0 +1,209 @@

+import streamlit as st
+from transformers import pipeline
+from transformers import AutoTokenizer
+from transformers import AutoModelForSequenceClassification
+import warnings
+warnings.filterwarnings("ignore")
+import nltk
+nltk.download('all')
+import matplotlib.pyplot as plt
+import helper
+import preprocessor
+from mtranslate import translate
+import pandas as pd
+import os
+from gtts import gTTS
+import base64
+import torch
+st.sidebar.title("Whatsapp Chat analyzer")
+uploaded_file= st.sidebar.file_uploader("Choose a file")
+if uploaded_file is not None:
+    bytes_data = uploaded_file.getvalue()
+    data=bytes_data.decode("utf-8")
+    df_new= preprocessor.preprocess(data)
+    st.dataframe(df_new)
+    user_list= df_new['users'].unique().tolist()
+    user_list.sort()
+    user_list.insert(0,"Group analysis")
+    selected_user=st.sidebar.selectbox("show analysis wrt",user_list)
+    if st.sidebar.button("Show Analysis"):
+        num_messages,words,num_links=helper.fetch_stats(selected_user,df_new)
+        col1,col2,col3=st.columns(3)
+        with col1:
+            st.header("Total Messages")
+            st.title(num_messages)
+        with col2:
+            st.header("Total Words")
+            st.title(words)
+        with col3:
+            st.header("Links Shared")
+            st.title(num_links)
+        if selected_user == "Group analysis":
+            st.title("Most busy users")
+            x,new_df=helper.most_busy_users(df_new)
+            fig,ax=plt.subplots()
+            col1,col2=st.columns(2)
+            with col1:
+                ax.bar(x.index, x.values)
+                plt.xticks(rotation='vertical')
+                st.pyplot(fig)
+            with col2:
+                st.dataframe(new_df)
+        st.title("Positive Word cloud")
+        df_wc = helper.create_word_cloud(selected_user, df_new)
+        fig, ax = plt.subplots()
+        ax.imshow(df_wc)
+        plt.axis('off')
+        st.pyplot(fig)
+        st.title("Most Common Words")
+        most_common_df=helper.most_common_words(selected_user,df_new)
+        fig,ax=plt.subplots()
+        ax.barh(most_common_df[0],most_common_df[1])
+        st.pyplot(fig)
+        st.dataframe(most_common_df)
+        if selected_user == "Group analysis":
+            st.title("Sentiment Analysis")
+            x = helper.sentiment_analysis(df_new)
+            fig, ax = plt.subplots()
+            ax.bar(x[0],x[1])
+            st.pyplot(fig)
+st.title("Sentiment Analysis")
+@st.cache(allow_output_mutation=True)
+def get_model():
+    MODEL = f"cardiffnlp/twitter-roberta-base-sentiment"
+    tokenizer = AutoTokenizer.from_pretrained(MODEL)
+    model = AutoModelForSequenceClassification.from_pretrained(MODEL)
+    return tokenizer,model
+tokenizer, model = get_model()
+user_input = st.text_area('Enter Text to Analyze')
+button = st.button("Analyze")
+sent_pipeline = pipeline("sentiment-analysis")
+if user_input and button:
+    test_sample = tokenizer([user_input], padding=True, truncation=True, max_length=512, return_tensors='pt')
+    # test_sample
+    output = model(**test_sample)
+    st.write("Prediction: ", sent_pipeline(user_input))
+    showWarningOnDirectExecution = False
+df = pd.read_excel(os.path.join( 'language.xlsx'),sheet_name='wiki')
+df.dropna(inplace=True)
+lang = df['name'].to_list()
+langlist=tuple(lang)
+langcode = df['iso'].to_list()
+# create dictionary of language and 2 letter langcode
+lang_array = {lang[i]: langcode[i] for i in range(len(langcode))}
+# layout
+st.title("Language-Translation + Text-To-Speech")
+st.markdown("In Python 🐍 with Streamlit ! (https://www.streamlit.io/)")
+st.markdown("Languages are pulled from language.xlsx dynamically. If translation is available it will be displayed in TRANSLATED TEXT window.\n In addition if text-to-Speech is supported it will display audio file to play and download." )
+inputtext = st.text_area("INPUT",height=200)
+choice = st.sidebar.radio('SELECT LANGUAGE',langlist)
+speech_langs = {
+    "af": "Afrikaans",
+    "ar": "Arabic",
+    "bg": "Bulgarian",
+    "bn": "Bengali",
+    "bs": "Bosnian",
+    "ca": "Catalan",
+    "cs": "Czech",
+    "cy": "Welsh",
+    "da": "Danish",
+    "de": "German",
+    "el": "Greek",
+    "en": "English",
+    "eo": "Esperanto",
+    "es": "Spanish",
+    "et": "Estonian",
+    "fi": "Finnish",
+    "fr": "French",
+    "gu": "Gujarati",
+    "hi": "Hindi",
+    "hr": "Croatian",
+    "hu": "Hungarian",
+    "hy": "Armenian",
+    "id": "Indonesian",
+    "is": "Icelandic",
+    "it": "Italian",
+    "ja": "Japanese",
+    "jw": "Javanese",
+    "km": "Khmer",
+    "kn": "Kannada",
+    "ko": "Korean",
+    "la": "Latin",
+    "lv": "Latvian",
+    "mk": "Macedonian",
+    "ml": "Malayalam",
+    "mr": "Marathi",
+    "my": "Myanmar (Burmese)",
+    "ne": "Nepali",
+    "nl": "Dutch",
+    "no": "Norwegian",
+    "pl": "Polish",
+    "pt": "Portuguese",
+    "ro": "Romanian",
+    "ru": "Russian",
+    "si": "Sinhala",
+    "sk": "Slovak",
+    "sq": "Albanian",
+    "sr": "Serbian",
+    "su": "Sundanese",
+    "sv": "Swedish",
+    "sw": "Swahili",
+    "ta": "Tamil",
+    "te": "Telugu",
+    "th": "Thai",
+    "tl": "Filipino",
+    "tr": "Turkish",
+    "uk": "Ukrainian",
+    "ur": "Urdu",
+    "vi": "Vietnamese",
+    "zh-CN": "Chinese"
+}
+# function to decode audio file for download
+def get_binary_file_downloader_html(bin_file, file_label='File'):
+    with open(bin_file, 'rb') as f:
+        data = f.read()
+    bin_str = base64.b64encode(data).decode()
+    href = f'<a href="data:application/octet-stream;base64,{bin_str}" download="{os.path.basename(bin_file)}">Download {file_label}</a>'
+    return href
+c1,c2 = st.columns([4,3])
+# I/O
+if len(inputtext) > 0 :
+    try:
+        output = translate(inputtext,lang_array[choice])
+        with c1:
+            st.text_area("TRANSLATED TEXT",output,height=200)
+        # if speech support is available will render autio file
+        if choice in speech_langs.values():
+            with c2:
+                aud_file = gTTS(text=output, lang=lang_array[choice], slow=False)
+                aud_file.save("lang.mp3")
+                audio_file_read = open('lang.mp3', 'rb')
+                audio_bytes = audio_file_read.read()
+                bin_str = base64.b64encode(audio_bytes).decode()
+                st.audio(audio_bytes, format='audio/mp3')
+                st.markdown(get_binary_file_downloader_html("lang.mp3", 'Audio File'), unsafe_allow_html=True)
+    except Exception as e:
+        st.error(e)

helper.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import matplotlib.pyplot as plt
+from urlextract import URLExtract
+from collections import Counter
+from wordcloud import WordCloud, STOPWORDS ,ImageColorGenerator
+import pandas as pd
+import matplotlib.pylab as plt
+import PIL.Image
+import numpy as np
+extract=URLExtract()
+def fetch_stats(selected_user,df):
+    if selected_user!= "Group analysis":
+        df=df[df['users']==selected_user]
+    num_messages = df.shape[0]
+    words = []
+    for message in df['message']:
+        words.extend(message.split())
+    links=[]
+    for message in df['message']:
+        links.extend(extract.find_urls(message))
+    return num_messages, len(words),len(links)
+def most_busy_users(df):
+    x = df['users'].value_counts().head()
+    df=round((df['users'].value_counts() / df.shape[0]) * 100, 2).reset_index().rename(
+        columns={'index': 'name', 'user': 'percent'})
+    return x,df
+def most_common_words(selected_user,df):
+    f = open('stop_hinglish.txt', 'r')
+    stop_words = f.read()
+    if selected_user != "Group analysis":
+        df = df[df['users'] == selected_user]
+    temp = df[df['users'] != 'group_notification']
+    temp = temp[temp['message'] != '<Media omitted>\n']
+    words = []
+    for message in temp['message']:
+        for word in message.lower().split():
+            if word not in stop_words:
+                words.append(word)
+    most_common_df=pd.DataFrame(Counter(words).most_common(30))
+    return most_common_df
+def positive_word_cloud(selected_user,df):
+    if selected_user != "Group analysis":
+        df = df[df['users'] == selected_user]
+    pos_word = df[df['roberta_pos'] > 0.5]
+    pos_word = pos_word.pop('message')
+    pos_word_df = pd.DataFrame(pos_word)
+    stopwords = set(STOPWORDS)
+    mask = np.array(PIL.Image.open('wcc.png'))
+    # wordcloud
+    wordcloud = WordCloud(stopwords=stopwords, mask=mask, background_color="White").generate(
+        ''.join(pos_word_df['message']))
+    plt.figure(figsize=(20, 10), facecolor='k')
+    plt.imshow(wordcloud, interpolation='bilinear')
+    plt.show()
+    return wordcloud

language.xlsx ADDED Viewed

Binary file (28.4 kB). View file

preprocessor.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import pandas as pd
+import re
+from textblob import TextBlob
+import numpy as np
+import nltk
+import nltk.data
+from nltk.sentiment.vader import SentimentIntensityAnalyzer
+from transformers import AutoTokenizer
+from transformers import AutoModelForSequenceClassification
+from scipy.special import softmax
+from tqdm.notebook import tqdm
+sia=SentimentIntensityAnalyzer()
+nltk.download('vader_lexicon')
+def preprocess(data):
+    pattern ='\d{1,2}/\d{1,2}/\d{2,4},\s\d{1,2}:\d{2}\s-\s'
+    messages = re.split(pattern, data)[1:]
+    dates = re.findall(pattern, data)
+    df = pd.DataFrame({'user_message': messages, 'message_date': dates})
+    df['message_date'] = pd.to_datetime(df['message_date'], format='%m/%d/%y, %H:%M - ')
+    df.rename(columns={'message_date': 'date'}, inplace=True)
+    users = []
+    messages = []
+    for message in df['user_message']:
+        entry = re.split('([\w\W]+?):\s', message)
+        if entry[1:]:
+            users.append(entry[1])
+            messages.append(entry[2])
+        else:
+            users.append('group_notification')
+            messages.append(entry[0])
+    df['users'] = users
+    df['message'] = messages
+    df.drop(columns=['user_message'], inplace=True)
+    df['year'] = df['date'].dt.year
+    df['day'] = df['date'].dt.day
+    df['hour'] = df['date'].dt.hour
+    df['minute'] = df['date'].dt.minute
+    df['Day_name'] = df['date'].dt.day_name()
+    df['Month_name'] = df['date'].dt.month_name()
+    temp = df[df['users'] != 'group_notification']
+    temp = temp[temp['message'] != '<Media omitted>\n']
+    temp.replace("", np.nan, inplace=True)
+    temp = temp.dropna()
+    def cleanTxt(text):
+        text = re.sub(r'@[A-Za-z0-9]+', '', text)
+        text = re.sub(r'#', '', text)
+        text = text.replace('\n', "")
+        return text
+    temp['message'] = temp['message'].apply(cleanTxt)
+    temp['users'] = temp['users'].apply(cleanTxt)
+    res = {}
+    for i, row in tqdm(temp.iterrows(), total=len(temp)):
+        text = row['message']
+        myid = row['users']
+        res[myid] = sia.polarity_scores(text)
+    vaders = pd.DataFrame(res).T
+    vaders = vaders.reset_index().rename(columns={'index': 'users'})
+    vaders = vaders.merge(temp, how="right")
+    MODEL = f"cardiffnlp/twitter-roberta-base-sentiment"
+    tokenizer = AutoTokenizer.from_pretrained(MODEL)
+    model = AutoModelForSequenceClassification.from_pretrained(MODEL)
+    def polarity_scores_roberts(example):
+        encoded_text = tokenizer(example, return_tensors="pt")
+        output = model(**encoded_text)
+        scores = output[0][0].detach().numpy()
+        scores = softmax(scores)
+        scores_dict = {
+            'roberta_neg': scores[0],
+            'roberta_neu': scores[1],
+            'roberta_pos': scores[2]
+        }
+        return scores_dict
+    res = {}
+    for i, row in tqdm(vaders.iterrows(), total=len(vaders)):
+        try:
+            text = row['message']
+            myid = row['users']
+            vader_result = sia.polarity_scores(text)
+            vader_result_rename = {}
+            for key, value in vader_result.items():
+                vader_result_rename[f"vader_{key}"] = value
+            roberta_result = polarity_scores_roberts(text)
+            both = {**vader_result, **roberta_result}
+            res[myid] = both
+        except RuntimeError:
+            print(f"Broke for id {myid}")
+    results_df = pd.DataFrame(res).T
+    results_df = results_df.reset_index().rename(columns={'index': 'users'})
+    results_df = results_df.merge(vaders, how="right")
+    def getSubjectivity(text):
+        return TextBlob(text).sentiment.subjectivity
+    def getPolarity(text):
+        return TextBlob(text).sentiment.polarity
+    results_df['Subjectivity'] = results_df['message'].apply(getSubjectivity)
+    results_df['Polarity'] = results_df['message'].apply(getPolarity)
+    def getAnalysis(score):
+        if score < 0:
+            return 'Negative'
+        if score == 0:
+            return 'Neutral'
+        else:
+            return 'Positive'
+    results_df['Analysis'] = results_df['Polarity'].apply(getAnalysis)
+    return results_df

stop_hinglish.txt ADDED Viewed

	@@ -0,0 +1,1055 @@

+.
+..
+...
+?
+-
+--
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+a
+aadi
+aaj
+aap
+aapne
+aata
+aati
+aaya
+aaye
+ab
+abbe
+abbey
+abe
+abhi
+able
+about
+above
+accha
+according
+accordingly
+acha
+achcha
+across
+actually
+after
+afterwards
+again
+against
+agar
+ain
+aint
+ain't
+aisa
+aise
+aisi
+alag
+all
+allow
+allows
+almost
+alone
+along
+already
+also
+although
+always
+am
+among
+amongst
+an
+and
+andar
+another
+any
+anybody
+anyhow
+anyone
+anything
+anyway
+anyways
+anywhere
+ap
+apan
+apart
+apna
+apnaa
+apne
+apni
+appear
+are
+aren
+arent
+aren't
+around
+arre
+as
+aside
+ask
+asking
+at
+aur
+avum
+aya
+aye
+baad
+baar
+bad
+bahut
+bana
+banae
+banai
+banao
+banaya
+banaye
+banayi
+banda
+bande
+bandi
+bane
+bani
+bas
+bata
+batao
+bc
+be
+became
+because
+become
+becomes
+becoming
+been
+before
+beforehand
+behind
+being
+below
+beside
+besides
+best
+better
+between
+beyond
+bhai
+bheetar
+bhi
+bhitar
+bht
+bilkul
+bohot
+bol
+bola
+bole
+boli
+bolo
+bolta
+bolte
+bolti
+both
+brief
+bro
+btw
+but
+by
+came
+can
+cannot
+cant
+can't
+cause
+causes
+certain
+certainly
+chahiye
+chaiye
+chal
+chalega
+chhaiye
+clearly
+c'mon
+com
+come
+comes
+could
+couldn
+couldnt
+couldn't
+d
+de
+dede
+dega
+degi
+dekh
+dekha
+dekhe
+dekhi
+dekho
+denge
+dhang
+di
+did
+didn
+didnt
+didn't
+dijiye
+diya
+diyaa
+diye
+diyo
+do
+does
+doesn
+doesnt
+doesn't
+doing
+done
+dono
+dont
+don't
+doosra
+doosre
+down
+downwards
+dude
+dunga
+dungi
+during
+dusra
+dusre
+dusri
+dvaara
+dvara
+dwaara
+dwara
+each
+edu
+eg
+eight
+either
+ek
+else
+elsewhere
+enough
+etc
+even
+ever
+every
+everybody
+everyone
+everything
+everywhere
+ex
+exactly
+example
+except
+far
+few
+fifth
+fir
+first
+five
+followed
+following
+follows
+for
+forth
+four
+from
+further
+furthermore
+gaya
+gaye
+gayi
+get
+gets
+getting
+ghar
+given
+gives
+go
+goes
+going
+gone
+good
+got
+gotten
+greetings
+guys
+haan
+had
+hadd
+hadn
+hadnt
+hadn't
+hai
+hain
+hamara
+hamare
+hamari
+hamne
+han
+happens
+har
+hardly
+has
+hasn
+hasnt
+hasn't
+have
+haven
+havent
+haven't
+having
+he
+hello
+help
+hence
+her
+here
+hereafter
+hereby
+herein
+here's
+hereupon
+hers
+herself
+he's
+hi
+him
+himself
+his
+hither
+hm
+hmm
+ho
+hoga
+hoge
+hogi
+hona
+honaa
+hone
+honge
+hongi
+honi
+hopefully
+hota
+hotaa
+hote
+hoti
+how
+howbeit
+however
+hoyenge
+hoyengi
+hu
+hua
+hue
+huh
+hui
+hum
+humein
+humne
+hun
+huye
+huyi
+i
+i'd
+idk
+ie
+if
+i'll
+i'm
+imo
+in
+inasmuch
+inc
+inhe
+inhi
+inho
+inka
+inkaa
+inke
+inki
+inn
+inner
+inse
+insofar
+into
+inward
+is
+ise
+isi
+iska
+iskaa
+iske
+iski
+isme
+isn
+isne
+isnt
+isn't
+iss
+isse
+issi
+isski
+it
+it'd
+it'll
+itna
+itne
+itni
+itno
+its
+it's
+itself
+ityaadi
+ityadi
+i've
+ja
+jaa
+jab
+jabh
+jaha
+jahaan
+jahan
+jaisa
+jaise
+jaisi
+jata
+jayega
+jidhar
+jin
+jinhe
+jinhi
+jinho
+jinhone
+jinka
+jinke
+jinki
+jinn
+jis
+jise
+jiska
+jiske
+jiski
+jisme
+jiss
+jisse
+jitna
+jitne
+jitni
+jo
+just
+jyaada
+jyada
+k
+ka
+kaafi
+kab
+kabhi
+kafi
+kaha
+kahaa
+kahaan
+kahan
+kahi
+kahin
+kahte
+kaisa
+kaise
+kaisi
+kal
+kam
+kar
+kara
+kare
+karega
+karegi
+karen
+karenge
+kari
+karke
+karna
+karne
+karni
+karo
+karta
+karte
+karti
+karu
+karun
+karunga
+karungi
+kaun
+kaunsa
+kayi
+kch
+ke
+keep
+keeps
+keh
+kehte
+kept
+khud
+ki
+kin
+kine
+kinhe
+kinho
+kinka
+kinke
+kinki
+kinko
+kinn
+kino
+kis
+kise
+kisi
+kiska
+kiske
+kiski
+kisko
+kisliye
+kisne
+kitna
+kitne
+kitni
+kitno
+kiya
+kiye
+know
+known
+knows
+ko
+koi
+kon
+konsa
+koyi
+krna
+krne
+kuch
+kuchch
+kuchh
+kul
+kull
+kya
+kyaa
+kyu
+kyuki
+kyun
+kyunki
+lagta
+lagte
+lagti
+last
+lately
+later
+le
+least
+lekar
+lekin
+less
+lest
+let
+let's
+li
+like
+liked
+likely
+little
+liya
+liye
+ll
+lo
+log
+logon
+lol
+look
+looking
+looks
+ltd
+lunga
+m
+maan
+maana
+maane
+maani
+maano
+magar
+mai
+main
+maine
+mainly
+mana
+mane
+mani
+mano
+many
+mat
+may
+maybe
+me
+mean
+meanwhile
+mein
+mera
+mere
+merely
+meri
+might
+mightn
+mightnt
+mightn't
+mil
+mjhe
+more
+moreover
+most
+mostly
+much
+mujhe
+must
+mustn
+mustnt
+mustn't
+my
+myself
+na
+naa
+naah
+nahi
+nahin
+nai
+name
+namely
+nd
+ne
+near
+nearly
+necessary
+neeche
+need
+needn
+neednt
+needn't
+needs
+neither
+never
+nevertheless
+new
+next
+nhi
+nine
+no
+nobody
+non
+none
+noone
+nope
+nor
+normally
+not
+nothing
+novel
+now
+nowhere
+o
+obviously
+of
+off
+often
+oh
+ok
+okay
+old
+on
+once
+one
+ones
+only
+onto
+or
+other
+others
+otherwise
+ought
+our
+ours
+ourselves
+out
+outside
+over
+overall
+own
+par
+pata
+pe
+pehla
+pehle
+pehli
+people
+per
+perhaps
+phla
+phle
+phli
+placed
+please
+plus
+poora
+poori
+provides
+pura
+puri
+q
+que
+quite
+raha
+rahaa
+rahe
+rahi
+rakh
+rakha
+rakhe
+rakhen
+rakhi
+rakho
+rather
+re
+really
+reasonably
+regarding
+regardless
+regards
+rehte
+rha
+rhaa
+rhe
+rhi
+ri
+right
+s
+sa
+saara
+saare
+saath
+sab
+sabhi
+sabse
+sahi
+said
+sakta
+saktaa
+sakte
+sakti
+same
+sang
+sara
+sath
+saw
+say
+saying
+says
+se
+second
+secondly
+see
+seeing
+seem
+seemed
+seeming
+seems
+seen
+self
+selves
+sensible
+sent
+serious
+seriously
+seven
+several
+shall
+shan
+shant
+shan't
+she
+she's
+should
+shouldn
+shouldnt
+shouldn't
+should've
+si
+sir
+sir.
+since
+six
+so
+soch
+some
+somebody
+somehow
+someone
+something
+sometime
+sometimes
+somewhat
+somewhere
+soon
+still
+sub
+such
+sup
+sure
+t
+tab
+tabh
+tak
+take
+taken
+tarah
+teen
+teeno
+teesra
+teesre
+teesri
+tell
+tends
+tera
+tere
+teri
+th
+tha
+than
+thank
+thanks
+thanx
+that
+that'll
+thats
+that's
+the
+theek
+their
+theirs
+them
+themselves
+then
+thence
+there
+thereafter
+thereby
+therefore
+therein
+theres
+there's
+thereupon
+these
+they
+they'd
+they'll
+they're
+they've
+thi
+thik
+thing
+think
+thinking
+third
+this
+tho
+thoda
+thodi
+thorough
+thoroughly
+those
+though
+thought
+three
+through
+throughout
+thru
+thus
+tjhe
+to
+together
+toh
+too
+took
+toward
+towards
+tried
+tries
+true
+truly
+try
+trying
+tu
+tujhe
+tum
+tumhara
+tumhare
+tumhari
+tune
+twice
+two
+um
+umm
+un
+under
+unhe
+unhi
+unho
+unhone
+unka
+unkaa
+unke
+unki
+unko
+unless
+unlikely
+unn
+unse
+until
+unto
+up
+upar
+upon
+us
+use
+used
+useful
+uses
+usi
+using
+uska
+uske
+usne
+uss
+usse
+ussi
+usually
+vaala
+vaale
+vaali
+vahaan
+vahan
+vahi
+vahin
+vaisa
+vaise
+vaisi
+vala
+vale
+vali
+various
+ve
+very
+via
+viz
+vo
+waala
+waale
+waali
+wagaira
+wagairah
+wagerah
+waha
+wahaan
+wahan
+wahi
+wahin
+waisa
+waise
+waisi
+wala
+wale
+wali
+want
+wants
+was
+wasn
+wasnt
+wasn't
+way
+we
+we'd
+well
+we'll
+went
+were
+we're
+weren
+werent
+weren't
+we've
+what
+whatever
+what's
+when
+whence
+whenever
+where
+whereafter
+whereas
+whereby
+wherein
+where's
+whereupon
+wherever
+whether
+which
+while
+who
+whoever
+whole
+whom
+who's
+whose
+why
+will
+willing
+with
+within
+without
+wo
+woh
+wohi
+won
+wont
+won't
+would
+wouldn
+wouldnt
+wouldn't
+y
+ya
+yadi
+yah
+yaha
+yahaan
+yahan
+yahi
+yahin
+ye
+yeah
+yeh
+yehi
+yes
+yet
+you
+you'd
+you'll
+your
+you're
+yours
+yourself
+yourselves
+you've
+yup