Mohit-321 commited on
Commit
8c46649
1 Parent(s): fb406a8

Upload 5 files

Browse files
Files changed (5) hide show
  1. app.py +209 -0
  2. helper.py +68 -0
  3. language.xlsx +0 -0
  4. preprocessor.py +127 -0
  5. stop_hinglish.txt +1055 -0
app.py ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import pipeline
3
+ from transformers import AutoTokenizer
4
+ from transformers import AutoModelForSequenceClassification
5
+ import warnings
6
+ warnings.filterwarnings("ignore")
7
+ import nltk
8
+ nltk.download('all')
9
+ import matplotlib.pyplot as plt
10
+ import helper
11
+ import preprocessor
12
+ from mtranslate import translate
13
+ import pandas as pd
14
+ import os
15
+ from gtts import gTTS
16
+ import base64
17
+ import torch
18
+ st.sidebar.title("Whatsapp Chat analyzer")
19
+
20
+ uploaded_file= st.sidebar.file_uploader("Choose a file")
21
+
22
+ if uploaded_file is not None:
23
+
24
+ bytes_data = uploaded_file.getvalue()
25
+ data=bytes_data.decode("utf-8")
26
+ df_new= preprocessor.preprocess(data)
27
+ st.dataframe(df_new)
28
+
29
+ user_list= df_new['users'].unique().tolist()
30
+ user_list.sort()
31
+ user_list.insert(0,"Group analysis")
32
+ selected_user=st.sidebar.selectbox("show analysis wrt",user_list)
33
+ if st.sidebar.button("Show Analysis"):
34
+ num_messages,words,num_links=helper.fetch_stats(selected_user,df_new)
35
+ col1,col2,col3=st.columns(3)
36
+
37
+ with col1:
38
+ st.header("Total Messages")
39
+ st.title(num_messages)
40
+ with col2:
41
+ st.header("Total Words")
42
+ st.title(words)
43
+ with col3:
44
+ st.header("Links Shared")
45
+ st.title(num_links)
46
+
47
+ if selected_user == "Group analysis":
48
+ st.title("Most busy users")
49
+ x,new_df=helper.most_busy_users(df_new)
50
+ fig,ax=plt.subplots()
51
+ col1,col2=st.columns(2)
52
+
53
+ with col1:
54
+ ax.bar(x.index, x.values)
55
+ plt.xticks(rotation='vertical')
56
+ st.pyplot(fig)
57
+ with col2:
58
+ st.dataframe(new_df)
59
+
60
+ st.title("Positive Word cloud")
61
+ df_wc = helper.create_word_cloud(selected_user, df_new)
62
+ fig, ax = plt.subplots()
63
+ ax.imshow(df_wc)
64
+ plt.axis('off')
65
+ st.pyplot(fig)
66
+
67
+ st.title("Most Common Words")
68
+ most_common_df=helper.most_common_words(selected_user,df_new)
69
+ fig,ax=plt.subplots()
70
+ ax.barh(most_common_df[0],most_common_df[1])
71
+ st.pyplot(fig)
72
+ st.dataframe(most_common_df)
73
+
74
+ if selected_user == "Group analysis":
75
+ st.title("Sentiment Analysis")
76
+ x = helper.sentiment_analysis(df_new)
77
+ fig, ax = plt.subplots()
78
+ ax.bar(x[0],x[1])
79
+ st.pyplot(fig)
80
+
81
+ st.title("Sentiment Analysis")
82
+ @st.cache(allow_output_mutation=True)
83
+ def get_model():
84
+ MODEL = f"cardiffnlp/twitter-roberta-base-sentiment"
85
+ tokenizer = AutoTokenizer.from_pretrained(MODEL)
86
+ model = AutoModelForSequenceClassification.from_pretrained(MODEL)
87
+ return tokenizer,model
88
+
89
+
90
+ tokenizer, model = get_model()
91
+
92
+ user_input = st.text_area('Enter Text to Analyze')
93
+ button = st.button("Analyze")
94
+
95
+ sent_pipeline = pipeline("sentiment-analysis")
96
+ if user_input and button:
97
+ test_sample = tokenizer([user_input], padding=True, truncation=True, max_length=512, return_tensors='pt')
98
+ # test_sample
99
+ output = model(**test_sample)
100
+ st.write("Prediction: ", sent_pipeline(user_input))
101
+ showWarningOnDirectExecution = False
102
+
103
+ df = pd.read_excel(os.path.join( 'language.xlsx'),sheet_name='wiki')
104
+ df.dropna(inplace=True)
105
+ lang = df['name'].to_list()
106
+ langlist=tuple(lang)
107
+ langcode = df['iso'].to_list()
108
+
109
+ # create dictionary of language and 2 letter langcode
110
+ lang_array = {lang[i]: langcode[i] for i in range(len(langcode))}
111
+
112
+ # layout
113
+ st.title("Language-Translation + Text-To-Speech")
114
+ st.markdown("In Python 🐍 with Streamlit ! (https://www.streamlit.io/)")
115
+ st.markdown("Languages are pulled from language.xlsx dynamically. If translation is available it will be displayed in TRANSLATED TEXT window.\n In addition if text-to-Speech is supported it will display audio file to play and download." )
116
+ inputtext = st.text_area("INPUT",height=200)
117
+
118
+ choice = st.sidebar.radio('SELECT LANGUAGE',langlist)
119
+
120
+ speech_langs = {
121
+ "af": "Afrikaans",
122
+ "ar": "Arabic",
123
+ "bg": "Bulgarian",
124
+ "bn": "Bengali",
125
+ "bs": "Bosnian",
126
+ "ca": "Catalan",
127
+ "cs": "Czech",
128
+ "cy": "Welsh",
129
+ "da": "Danish",
130
+ "de": "German",
131
+ "el": "Greek",
132
+ "en": "English",
133
+ "eo": "Esperanto",
134
+ "es": "Spanish",
135
+ "et": "Estonian",
136
+ "fi": "Finnish",
137
+ "fr": "French",
138
+ "gu": "Gujarati",
139
+ "hi": "Hindi",
140
+ "hr": "Croatian",
141
+ "hu": "Hungarian",
142
+ "hy": "Armenian",
143
+ "id": "Indonesian",
144
+ "is": "Icelandic",
145
+ "it": "Italian",
146
+ "ja": "Japanese",
147
+ "jw": "Javanese",
148
+ "km": "Khmer",
149
+ "kn": "Kannada",
150
+ "ko": "Korean",
151
+ "la": "Latin",
152
+ "lv": "Latvian",
153
+ "mk": "Macedonian",
154
+ "ml": "Malayalam",
155
+ "mr": "Marathi",
156
+ "my": "Myanmar (Burmese)",
157
+ "ne": "Nepali",
158
+ "nl": "Dutch",
159
+ "no": "Norwegian",
160
+ "pl": "Polish",
161
+ "pt": "Portuguese",
162
+ "ro": "Romanian",
163
+ "ru": "Russian",
164
+ "si": "Sinhala",
165
+ "sk": "Slovak",
166
+ "sq": "Albanian",
167
+ "sr": "Serbian",
168
+ "su": "Sundanese",
169
+ "sv": "Swedish",
170
+ "sw": "Swahili",
171
+ "ta": "Tamil",
172
+ "te": "Telugu",
173
+ "th": "Thai",
174
+ "tl": "Filipino",
175
+ "tr": "Turkish",
176
+ "uk": "Ukrainian",
177
+ "ur": "Urdu",
178
+ "vi": "Vietnamese",
179
+ "zh-CN": "Chinese"
180
+ }
181
+
182
+ # function to decode audio file for download
183
+ def get_binary_file_downloader_html(bin_file, file_label='File'):
184
+ with open(bin_file, 'rb') as f:
185
+ data = f.read()
186
+ bin_str = base64.b64encode(data).decode()
187
+ href = f'<a href="data:application/octet-stream;base64,{bin_str}" download="{os.path.basename(bin_file)}">Download {file_label}</a>'
188
+ return href
189
+
190
+ c1,c2 = st.columns([4,3])
191
+
192
+ # I/O
193
+ if len(inputtext) > 0 :
194
+ try:
195
+ output = translate(inputtext,lang_array[choice])
196
+ with c1:
197
+ st.text_area("TRANSLATED TEXT",output,height=200)
198
+ # if speech support is available will render autio file
199
+ if choice in speech_langs.values():
200
+ with c2:
201
+ aud_file = gTTS(text=output, lang=lang_array[choice], slow=False)
202
+ aud_file.save("lang.mp3")
203
+ audio_file_read = open('lang.mp3', 'rb')
204
+ audio_bytes = audio_file_read.read()
205
+ bin_str = base64.b64encode(audio_bytes).decode()
206
+ st.audio(audio_bytes, format='audio/mp3')
207
+ st.markdown(get_binary_file_downloader_html("lang.mp3", 'Audio File'), unsafe_allow_html=True)
208
+ except Exception as e:
209
+ st.error(e)
helper.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib.pyplot as plt
2
+ from urlextract import URLExtract
3
+ from collections import Counter
4
+ from wordcloud import WordCloud, STOPWORDS ,ImageColorGenerator
5
+ import pandas as pd
6
+ import matplotlib.pylab as plt
7
+ import PIL.Image
8
+ import numpy as np
9
+
10
+ extract=URLExtract()
11
+ def fetch_stats(selected_user,df):
12
+
13
+ if selected_user!= "Group analysis":
14
+ df=df[df['users']==selected_user]
15
+ num_messages = df.shape[0]
16
+ words = []
17
+ for message in df['message']:
18
+ words.extend(message.split())
19
+
20
+
21
+ links=[]
22
+ for message in df['message']:
23
+ links.extend(extract.find_urls(message))
24
+
25
+ return num_messages, len(words),len(links)
26
+
27
+ def most_busy_users(df):
28
+ x = df['users'].value_counts().head()
29
+ df=round((df['users'].value_counts() / df.shape[0]) * 100, 2).reset_index().rename(
30
+ columns={'index': 'name', 'user': 'percent'})
31
+ return x,df
32
+
33
+ def most_common_words(selected_user,df):
34
+ f = open('stop_hinglish.txt', 'r')
35
+ stop_words = f.read()
36
+
37
+ if selected_user != "Group analysis":
38
+ df = df[df['users'] == selected_user]
39
+ temp = df[df['users'] != 'group_notification']
40
+ temp = temp[temp['message'] != '<Media omitted>\n']
41
+
42
+ words = []
43
+
44
+ for message in temp['message']:
45
+ for word in message.lower().split():
46
+ if word not in stop_words:
47
+ words.append(word)
48
+ most_common_df=pd.DataFrame(Counter(words).most_common(30))
49
+ return most_common_df
50
+
51
+ def positive_word_cloud(selected_user,df):
52
+ if selected_user != "Group analysis":
53
+ df = df[df['users'] == selected_user]
54
+
55
+ pos_word = df[df['roberta_pos'] > 0.5]
56
+ pos_word = pos_word.pop('message')
57
+ pos_word_df = pd.DataFrame(pos_word)
58
+ stopwords = set(STOPWORDS)
59
+ mask = np.array(PIL.Image.open('wcc.png'))
60
+
61
+ # wordcloud
62
+ wordcloud = WordCloud(stopwords=stopwords, mask=mask, background_color="White").generate(
63
+ ''.join(pos_word_df['message']))
64
+ plt.figure(figsize=(20, 10), facecolor='k')
65
+ plt.imshow(wordcloud, interpolation='bilinear')
66
+ plt.show()
67
+
68
+ return wordcloud
language.xlsx ADDED
Binary file (28.4 kB). View file
 
preprocessor.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import re
3
+ from textblob import TextBlob
4
+ import numpy as np
5
+ import nltk
6
+ import nltk.data
7
+ from nltk.sentiment.vader import SentimentIntensityAnalyzer
8
+ from transformers import AutoTokenizer
9
+ from transformers import AutoModelForSequenceClassification
10
+ from scipy.special import softmax
11
+ from tqdm.notebook import tqdm
12
+ sia=SentimentIntensityAnalyzer()
13
+ nltk.download('vader_lexicon')
14
+
15
+ def preprocess(data):
16
+ pattern ='\d{1,2}/\d{1,2}/\d{2,4},\s\d{1,2}:\d{2}\s-\s'
17
+
18
+ messages = re.split(pattern, data)[1:]
19
+ dates = re.findall(pattern, data)
20
+ df = pd.DataFrame({'user_message': messages, 'message_date': dates})
21
+ df['message_date'] = pd.to_datetime(df['message_date'], format='%m/%d/%y, %H:%M - ')
22
+ df.rename(columns={'message_date': 'date'}, inplace=True)
23
+ users = []
24
+ messages = []
25
+ for message in df['user_message']:
26
+ entry = re.split('([\w\W]+?):\s', message)
27
+
28
+ if entry[1:]:
29
+ users.append(entry[1])
30
+ messages.append(entry[2])
31
+
32
+ else:
33
+ users.append('group_notification')
34
+ messages.append(entry[0])
35
+ df['users'] = users
36
+ df['message'] = messages
37
+ df.drop(columns=['user_message'], inplace=True)
38
+ df['year'] = df['date'].dt.year
39
+ df['day'] = df['date'].dt.day
40
+ df['hour'] = df['date'].dt.hour
41
+ df['minute'] = df['date'].dt.minute
42
+ df['Day_name'] = df['date'].dt.day_name()
43
+ df['Month_name'] = df['date'].dt.month_name()
44
+
45
+ temp = df[df['users'] != 'group_notification']
46
+ temp = temp[temp['message'] != '<Media omitted>\n']
47
+ temp.replace("", np.nan, inplace=True)
48
+ temp = temp.dropna()
49
+
50
+ def cleanTxt(text):
51
+ text = re.sub(r'@[A-Za-z0-9]+', '', text)
52
+ text = re.sub(r'#', '', text)
53
+ text = text.replace('\n', "")
54
+ return text
55
+
56
+ temp['message'] = temp['message'].apply(cleanTxt)
57
+ temp['users'] = temp['users'].apply(cleanTxt)
58
+
59
+ res = {}
60
+ for i, row in tqdm(temp.iterrows(), total=len(temp)):
61
+ text = row['message']
62
+ myid = row['users']
63
+ res[myid] = sia.polarity_scores(text)
64
+
65
+ vaders = pd.DataFrame(res).T
66
+ vaders = vaders.reset_index().rename(columns={'index': 'users'})
67
+ vaders = vaders.merge(temp, how="right")
68
+
69
+ MODEL = f"cardiffnlp/twitter-roberta-base-sentiment"
70
+ tokenizer = AutoTokenizer.from_pretrained(MODEL)
71
+ model = AutoModelForSequenceClassification.from_pretrained(MODEL)
72
+
73
+ def polarity_scores_roberts(example):
74
+ encoded_text = tokenizer(example, return_tensors="pt")
75
+ output = model(**encoded_text)
76
+ scores = output[0][0].detach().numpy()
77
+ scores = softmax(scores)
78
+ scores_dict = {
79
+ 'roberta_neg': scores[0],
80
+ 'roberta_neu': scores[1],
81
+ 'roberta_pos': scores[2]
82
+
83
+ }
84
+ return scores_dict
85
+
86
+ res = {}
87
+ for i, row in tqdm(vaders.iterrows(), total=len(vaders)):
88
+ try:
89
+ text = row['message']
90
+ myid = row['users']
91
+ vader_result = sia.polarity_scores(text)
92
+ vader_result_rename = {}
93
+ for key, value in vader_result.items():
94
+ vader_result_rename[f"vader_{key}"] = value
95
+ roberta_result = polarity_scores_roberts(text)
96
+ both = {**vader_result, **roberta_result}
97
+ res[myid] = both
98
+ except RuntimeError:
99
+ print(f"Broke for id {myid}")
100
+
101
+ results_df = pd.DataFrame(res).T
102
+ results_df = results_df.reset_index().rename(columns={'index': 'users'})
103
+ results_df = results_df.merge(vaders, how="right")
104
+
105
+
106
+
107
+ def getSubjectivity(text):
108
+ return TextBlob(text).sentiment.subjectivity
109
+
110
+ def getPolarity(text):
111
+ return TextBlob(text).sentiment.polarity
112
+
113
+ results_df['Subjectivity'] = results_df['message'].apply(getSubjectivity)
114
+ results_df['Polarity'] = results_df['message'].apply(getPolarity)
115
+
116
+ def getAnalysis(score):
117
+ if score < 0:
118
+ return 'Negative'
119
+ if score == 0:
120
+ return 'Neutral'
121
+ else:
122
+ return 'Positive'
123
+
124
+ results_df['Analysis'] = results_df['Polarity'].apply(getAnalysis)
125
+
126
+
127
+ return results_df
stop_hinglish.txt ADDED
@@ -0,0 +1,1055 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .
2
+ ..
3
+ ...
4
+ ?
5
+ -
6
+ --
7
+ 1
8
+ 2
9
+ 3
10
+ 4
11
+ 5
12
+ 6
13
+ 7
14
+ 8
15
+ 9
16
+ 0
17
+ a
18
+ aadi
19
+ aaj
20
+ aap
21
+ aapne
22
+ aata
23
+ aati
24
+ aaya
25
+ aaye
26
+ ab
27
+ abbe
28
+ abbey
29
+ abe
30
+ abhi
31
+ able
32
+ about
33
+ above
34
+ accha
35
+ according
36
+ accordingly
37
+ acha
38
+ achcha
39
+ across
40
+ actually
41
+ after
42
+ afterwards
43
+ again
44
+ against
45
+ agar
46
+ ain
47
+ aint
48
+ ain't
49
+ aisa
50
+ aise
51
+ aisi
52
+ alag
53
+ all
54
+ allow
55
+ allows
56
+ almost
57
+ alone
58
+ along
59
+ already
60
+ also
61
+ although
62
+ always
63
+ am
64
+ among
65
+ amongst
66
+ an
67
+ and
68
+ andar
69
+ another
70
+ any
71
+ anybody
72
+ anyhow
73
+ anyone
74
+ anything
75
+ anyway
76
+ anyways
77
+ anywhere
78
+ ap
79
+ apan
80
+ apart
81
+ apna
82
+ apnaa
83
+ apne
84
+ apni
85
+ appear
86
+ are
87
+ aren
88
+ arent
89
+ aren't
90
+ around
91
+ arre
92
+ as
93
+ aside
94
+ ask
95
+ asking
96
+ at
97
+ aur
98
+ avum
99
+ aya
100
+ aye
101
+ baad
102
+ baar
103
+ bad
104
+ bahut
105
+ bana
106
+ banae
107
+ banai
108
+ banao
109
+ banaya
110
+ banaye
111
+ banayi
112
+ banda
113
+ bande
114
+ bandi
115
+ bane
116
+ bani
117
+ bas
118
+ bata
119
+ batao
120
+ bc
121
+ be
122
+ became
123
+ because
124
+ become
125
+ becomes
126
+ becoming
127
+ been
128
+ before
129
+ beforehand
130
+ behind
131
+ being
132
+ below
133
+ beside
134
+ besides
135
+ best
136
+ better
137
+ between
138
+ beyond
139
+ bhai
140
+ bheetar
141
+ bhi
142
+ bhitar
143
+ bht
144
+ bilkul
145
+ bohot
146
+ bol
147
+ bola
148
+ bole
149
+ boli
150
+ bolo
151
+ bolta
152
+ bolte
153
+ bolti
154
+ both
155
+ brief
156
+ bro
157
+ btw
158
+ but
159
+ by
160
+ came
161
+ can
162
+ cannot
163
+ cant
164
+ can't
165
+ cause
166
+ causes
167
+ certain
168
+ certainly
169
+ chahiye
170
+ chaiye
171
+ chal
172
+ chalega
173
+ chhaiye
174
+ clearly
175
+ c'mon
176
+ com
177
+ come
178
+ comes
179
+ could
180
+ couldn
181
+ couldnt
182
+ couldn't
183
+ d
184
+ de
185
+ dede
186
+ dega
187
+ degi
188
+ dekh
189
+ dekha
190
+ dekhe
191
+ dekhi
192
+ dekho
193
+ denge
194
+ dhang
195
+ di
196
+ did
197
+ didn
198
+ didnt
199
+ didn't
200
+ dijiye
201
+ diya
202
+ diyaa
203
+ diye
204
+ diyo
205
+ do
206
+ does
207
+ doesn
208
+ doesnt
209
+ doesn't
210
+ doing
211
+ done
212
+ dono
213
+ dont
214
+ don't
215
+ doosra
216
+ doosre
217
+ down
218
+ downwards
219
+ dude
220
+ dunga
221
+ dungi
222
+ during
223
+ dusra
224
+ dusre
225
+ dusri
226
+ dvaara
227
+ dvara
228
+ dwaara
229
+ dwara
230
+ each
231
+ edu
232
+ eg
233
+ eight
234
+ either
235
+ ek
236
+ else
237
+ elsewhere
238
+ enough
239
+ etc
240
+ even
241
+ ever
242
+ every
243
+ everybody
244
+ everyone
245
+ everything
246
+ everywhere
247
+ ex
248
+ exactly
249
+ example
250
+ except
251
+ far
252
+ few
253
+ fifth
254
+ fir
255
+ first
256
+ five
257
+ followed
258
+ following
259
+ follows
260
+ for
261
+ forth
262
+ four
263
+ from
264
+ further
265
+ furthermore
266
+ gaya
267
+ gaye
268
+ gayi
269
+ get
270
+ gets
271
+ getting
272
+ ghar
273
+ given
274
+ gives
275
+ go
276
+ goes
277
+ going
278
+ gone
279
+ good
280
+ got
281
+ gotten
282
+ greetings
283
+ guys
284
+ haan
285
+ had
286
+ hadd
287
+ hadn
288
+ hadnt
289
+ hadn't
290
+ hai
291
+ hain
292
+ hamara
293
+ hamare
294
+ hamari
295
+ hamne
296
+ han
297
+ happens
298
+ har
299
+ hardly
300
+ has
301
+ hasn
302
+ hasnt
303
+ hasn't
304
+ have
305
+ haven
306
+ havent
307
+ haven't
308
+ having
309
+ he
310
+ hello
311
+ help
312
+ hence
313
+ her
314
+ here
315
+ hereafter
316
+ hereby
317
+ herein
318
+ here's
319
+ hereupon
320
+ hers
321
+ herself
322
+ he's
323
+ hi
324
+ him
325
+ himself
326
+ his
327
+ hither
328
+ hm
329
+ hmm
330
+ ho
331
+ hoga
332
+ hoge
333
+ hogi
334
+ hona
335
+ honaa
336
+ hone
337
+ honge
338
+ hongi
339
+ honi
340
+ hopefully
341
+ hota
342
+ hotaa
343
+ hote
344
+ hoti
345
+ how
346
+ howbeit
347
+ however
348
+ hoyenge
349
+ hoyengi
350
+ hu
351
+ hua
352
+ hue
353
+ huh
354
+ hui
355
+ hum
356
+ humein
357
+ humne
358
+ hun
359
+ huye
360
+ huyi
361
+ i
362
+ i'd
363
+ idk
364
+ ie
365
+ if
366
+ i'll
367
+ i'm
368
+ imo
369
+ in
370
+ inasmuch
371
+ inc
372
+ inhe
373
+ inhi
374
+ inho
375
+ inka
376
+ inkaa
377
+ inke
378
+ inki
379
+ inn
380
+ inner
381
+ inse
382
+ insofar
383
+ into
384
+ inward
385
+ is
386
+ ise
387
+ isi
388
+ iska
389
+ iskaa
390
+ iske
391
+ iski
392
+ isme
393
+ isn
394
+ isne
395
+ isnt
396
+ isn't
397
+ iss
398
+ isse
399
+ issi
400
+ isski
401
+ it
402
+ it'd
403
+ it'll
404
+ itna
405
+ itne
406
+ itni
407
+ itno
408
+ its
409
+ it's
410
+ itself
411
+ ityaadi
412
+ ityadi
413
+ i've
414
+ ja
415
+ jaa
416
+ jab
417
+ jabh
418
+ jaha
419
+ jahaan
420
+ jahan
421
+ jaisa
422
+ jaise
423
+ jaisi
424
+ jata
425
+ jayega
426
+ jidhar
427
+ jin
428
+ jinhe
429
+ jinhi
430
+ jinho
431
+ jinhone
432
+ jinka
433
+ jinke
434
+ jinki
435
+ jinn
436
+ jis
437
+ jise
438
+ jiska
439
+ jiske
440
+ jiski
441
+ jisme
442
+ jiss
443
+ jisse
444
+ jitna
445
+ jitne
446
+ jitni
447
+ jo
448
+ just
449
+ jyaada
450
+ jyada
451
+ k
452
+ ka
453
+ kaafi
454
+ kab
455
+ kabhi
456
+ kafi
457
+ kaha
458
+ kahaa
459
+ kahaan
460
+ kahan
461
+ kahi
462
+ kahin
463
+ kahte
464
+ kaisa
465
+ kaise
466
+ kaisi
467
+ kal
468
+ kam
469
+ kar
470
+ kara
471
+ kare
472
+ karega
473
+ karegi
474
+ karen
475
+ karenge
476
+ kari
477
+ karke
478
+ karna
479
+ karne
480
+ karni
481
+ karo
482
+ karta
483
+ karte
484
+ karti
485
+ karu
486
+ karun
487
+ karunga
488
+ karungi
489
+ kaun
490
+ kaunsa
491
+ kayi
492
+ kch
493
+ ke
494
+ keep
495
+ keeps
496
+ keh
497
+ kehte
498
+ kept
499
+ khud
500
+ ki
501
+ kin
502
+ kine
503
+ kinhe
504
+ kinho
505
+ kinka
506
+ kinke
507
+ kinki
508
+ kinko
509
+ kinn
510
+ kino
511
+ kis
512
+ kise
513
+ kisi
514
+ kiska
515
+ kiske
516
+ kiski
517
+ kisko
518
+ kisliye
519
+ kisne
520
+ kitna
521
+ kitne
522
+ kitni
523
+ kitno
524
+ kiya
525
+ kiye
526
+ know
527
+ known
528
+ knows
529
+ ko
530
+ koi
531
+ kon
532
+ konsa
533
+ koyi
534
+ krna
535
+ krne
536
+ kuch
537
+ kuchch
538
+ kuchh
539
+ kul
540
+ kull
541
+ kya
542
+ kyaa
543
+ kyu
544
+ kyuki
545
+ kyun
546
+ kyunki
547
+ lagta
548
+ lagte
549
+ lagti
550
+ last
551
+ lately
552
+ later
553
+ le
554
+ least
555
+ lekar
556
+ lekin
557
+ less
558
+ lest
559
+ let
560
+ let's
561
+ li
562
+ like
563
+ liked
564
+ likely
565
+ little
566
+ liya
567
+ liye
568
+ ll
569
+ lo
570
+ log
571
+ logon
572
+ lol
573
+ look
574
+ looking
575
+ looks
576
+ ltd
577
+ lunga
578
+ m
579
+ maan
580
+ maana
581
+ maane
582
+ maani
583
+ maano
584
+ magar
585
+ mai
586
+ main
587
+ maine
588
+ mainly
589
+ mana
590
+ mane
591
+ mani
592
+ mano
593
+ many
594
+ mat
595
+ may
596
+ maybe
597
+ me
598
+ mean
599
+ meanwhile
600
+ mein
601
+ mera
602
+ mere
603
+ merely
604
+ meri
605
+ might
606
+ mightn
607
+ mightnt
608
+ mightn't
609
+ mil
610
+ mjhe
611
+ more
612
+ moreover
613
+ most
614
+ mostly
615
+ much
616
+ mujhe
617
+ must
618
+ mustn
619
+ mustnt
620
+ mustn't
621
+ my
622
+ myself
623
+ na
624
+ naa
625
+ naah
626
+ nahi
627
+ nahin
628
+ nai
629
+ name
630
+ namely
631
+ nd
632
+ ne
633
+ near
634
+ nearly
635
+ necessary
636
+ neeche
637
+ need
638
+ needn
639
+ neednt
640
+ needn't
641
+ needs
642
+ neither
643
+ never
644
+ nevertheless
645
+ new
646
+ next
647
+ nhi
648
+ nine
649
+ no
650
+ nobody
651
+ non
652
+ none
653
+ noone
654
+ nope
655
+ nor
656
+ normally
657
+ not
658
+ nothing
659
+ novel
660
+ now
661
+ nowhere
662
+ o
663
+ obviously
664
+ of
665
+ off
666
+ often
667
+ oh
668
+ ok
669
+ okay
670
+ old
671
+ on
672
+ once
673
+ one
674
+ ones
675
+ only
676
+ onto
677
+ or
678
+ other
679
+ others
680
+ otherwise
681
+ ought
682
+ our
683
+ ours
684
+ ourselves
685
+ out
686
+ outside
687
+ over
688
+ overall
689
+ own
690
+ par
691
+ pata
692
+ pe
693
+ pehla
694
+ pehle
695
+ pehli
696
+ people
697
+ per
698
+ perhaps
699
+ phla
700
+ phle
701
+ phli
702
+ placed
703
+ please
704
+ plus
705
+ poora
706
+ poori
707
+ provides
708
+ pura
709
+ puri
710
+ q
711
+ que
712
+ quite
713
+ raha
714
+ rahaa
715
+ rahe
716
+ rahi
717
+ rakh
718
+ rakha
719
+ rakhe
720
+ rakhen
721
+ rakhi
722
+ rakho
723
+ rather
724
+ re
725
+ really
726
+ reasonably
727
+ regarding
728
+ regardless
729
+ regards
730
+ rehte
731
+ rha
732
+ rhaa
733
+ rhe
734
+ rhi
735
+ ri
736
+ right
737
+ s
738
+ sa
739
+ saara
740
+ saare
741
+ saath
742
+ sab
743
+ sabhi
744
+ sabse
745
+ sahi
746
+ said
747
+ sakta
748
+ saktaa
749
+ sakte
750
+ sakti
751
+ same
752
+ sang
753
+ sara
754
+ sath
755
+ saw
756
+ say
757
+ saying
758
+ says
759
+ se
760
+ second
761
+ secondly
762
+ see
763
+ seeing
764
+ seem
765
+ seemed
766
+ seeming
767
+ seems
768
+ seen
769
+ self
770
+ selves
771
+ sensible
772
+ sent
773
+ serious
774
+ seriously
775
+ seven
776
+ several
777
+ shall
778
+ shan
779
+ shant
780
+ shan't
781
+ she
782
+ she's
783
+ should
784
+ shouldn
785
+ shouldnt
786
+ shouldn't
787
+ should've
788
+ si
789
+ sir
790
+ sir.
791
+ since
792
+ six
793
+ so
794
+ soch
795
+ some
796
+ somebody
797
+ somehow
798
+ someone
799
+ something
800
+ sometime
801
+ sometimes
802
+ somewhat
803
+ somewhere
804
+ soon
805
+ still
806
+ sub
807
+ such
808
+ sup
809
+ sure
810
+ t
811
+ tab
812
+ tabh
813
+ tak
814
+ take
815
+ taken
816
+ tarah
817
+ teen
818
+ teeno
819
+ teesra
820
+ teesre
821
+ teesri
822
+ tell
823
+ tends
824
+ tera
825
+ tere
826
+ teri
827
+ th
828
+ tha
829
+ than
830
+ thank
831
+ thanks
832
+ thanx
833
+ that
834
+ that'll
835
+ thats
836
+ that's
837
+ the
838
+ theek
839
+ their
840
+ theirs
841
+ them
842
+ themselves
843
+ then
844
+ thence
845
+ there
846
+ thereafter
847
+ thereby
848
+ therefore
849
+ therein
850
+ theres
851
+ there's
852
+ thereupon
853
+ these
854
+ they
855
+ they'd
856
+ they'll
857
+ they're
858
+ they've
859
+ thi
860
+ thik
861
+ thing
862
+ think
863
+ thinking
864
+ third
865
+ this
866
+ tho
867
+ thoda
868
+ thodi
869
+ thorough
870
+ thoroughly
871
+ those
872
+ though
873
+ thought
874
+ three
875
+ through
876
+ throughout
877
+ thru
878
+ thus
879
+ tjhe
880
+ to
881
+ together
882
+ toh
883
+ too
884
+ took
885
+ toward
886
+ towards
887
+ tried
888
+ tries
889
+ true
890
+ truly
891
+ try
892
+ trying
893
+ tu
894
+ tujhe
895
+ tum
896
+ tumhara
897
+ tumhare
898
+ tumhari
899
+ tune
900
+ twice
901
+ two
902
+ um
903
+ umm
904
+ un
905
+ under
906
+ unhe
907
+ unhi
908
+ unho
909
+ unhone
910
+ unka
911
+ unkaa
912
+ unke
913
+ unki
914
+ unko
915
+ unless
916
+ unlikely
917
+ unn
918
+ unse
919
+ until
920
+ unto
921
+ up
922
+ upar
923
+ upon
924
+ us
925
+ use
926
+ used
927
+ useful
928
+ uses
929
+ usi
930
+ using
931
+ uska
932
+ uske
933
+ usne
934
+ uss
935
+ usse
936
+ ussi
937
+ usually
938
+ vaala
939
+ vaale
940
+ vaali
941
+ vahaan
942
+ vahan
943
+ vahi
944
+ vahin
945
+ vaisa
946
+ vaise
947
+ vaisi
948
+ vala
949
+ vale
950
+ vali
951
+ various
952
+ ve
953
+ very
954
+ via
955
+ viz
956
+ vo
957
+ waala
958
+ waale
959
+ waali
960
+ wagaira
961
+ wagairah
962
+ wagerah
963
+ waha
964
+ wahaan
965
+ wahan
966
+ wahi
967
+ wahin
968
+ waisa
969
+ waise
970
+ waisi
971
+ wala
972
+ wale
973
+ wali
974
+ want
975
+ wants
976
+ was
977
+ wasn
978
+ wasnt
979
+ wasn't
980
+ way
981
+ we
982
+ we'd
983
+ well
984
+ we'll
985
+ went
986
+ were
987
+ we're
988
+ weren
989
+ werent
990
+ weren't
991
+ we've
992
+ what
993
+ whatever
994
+ what's
995
+ when
996
+ whence
997
+ whenever
998
+ where
999
+ whereafter
1000
+ whereas
1001
+ whereby
1002
+ wherein
1003
+ where's
1004
+ whereupon
1005
+ wherever
1006
+ whether
1007
+ which
1008
+ while
1009
+ who
1010
+ whoever
1011
+ whole
1012
+ whom
1013
+ who's
1014
+ whose
1015
+ why
1016
+ will
1017
+ willing
1018
+ with
1019
+ within
1020
+ without
1021
+ wo
1022
+ woh
1023
+ wohi
1024
+ won
1025
+ wont
1026
+ won't
1027
+ would
1028
+ wouldn
1029
+ wouldnt
1030
+ wouldn't
1031
+ y
1032
+ ya
1033
+ yadi
1034
+ yah
1035
+ yaha
1036
+ yahaan
1037
+ yahan
1038
+ yahi
1039
+ yahin
1040
+ ye
1041
+ yeah
1042
+ yeh
1043
+ yehi
1044
+ yes
1045
+ yet
1046
+ you
1047
+ you'd
1048
+ you'll
1049
+ your
1050
+ you're
1051
+ yours
1052
+ yourself
1053
+ yourselves
1054
+ you've
1055
+ yup