0xEmir commited on
Commit
c9652bd
·
verified ·
1 Parent(s): 0f7b5a7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -0
app.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Importing the required packages
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ import nltk
7
+ # Set the style sheet for plots
8
+ plt.style.use('ggplot')
9
+
10
+ # Read the data
11
+ df = pd.read_csv("hf://datasets/patrickbdevaney/tripadvisor_hotel_reviews/data/tripadvisor_hotel_reviews.csv")
12
+
13
+ df = df.reset_index().rename(columns={'index': 'Id'})
14
+
15
+ df.head()
16
+
17
+ df.head()
18
+
19
+ # Check the shape of the DataFrame
20
+ print(df.shape)
21
+
22
+ # Count the number of reviews for each rating and plot a bar chart
23
+ ax = df['Rating'].value_counts().sort_index() \
24
+ .plot(kind='bar',
25
+ title='Count of Reviews by Stars',
26
+ figsize=(10, 5))
27
+ ax.set_xlabel('Review Stars')
28
+ ax.set_ylabel('No. of Stars')
29
+ plt.show()
30
+
31
+ # Select a review for sentiment analysis
32
+ rev250 = df['Review'][200]
33
+ print(rev250)
34
+
35
+ # Preprocess the review text
36
+ tokens = nltk.word_tokenize(rev250) # Tokenization
37
+ tagged = nltk.pos_tag(tokens) # Part-of-speech tagging
38
+ entities = nltk.chunk.ne_chunk(tagged) # Entity recognition
39
+
40
+ entities.pprint()
41
+
42
+ # Perform sentiment analysis using VADER
43
+ from nltk.sentiment import SentimentIntensityAnalyzer
44
+ sia = SentimentIntensityAnalyzer()
45
+
46
+ # Analyze sentiment for a positive sentence
47
+ print(sia.polarity_scores('I am so happy!'))
48
+ #>> {'neg': 0.0, 'neu': 0.318, 'pos': 0.682, 'compound': 0.6468}
49
+
50
+ # Analyze sentiment for a negative sentence
51
+ print(sia.polarity_scores('I hate sweet aroma!'))
52
+ #>> {'neg': 0.499, 'neu': 0.125, 'pos': 0.375, 'compound': -0.2481}
53
+
54
+ # Analyze sentiment for the selected review
55
+ print(sia.polarity_scores(rev250))
56
+ #>> {'neg': 0.1, 'neu': 0.612, 'pos': 0.288, 'compound': 0.9556}
57
+
58
+ # Perform sentiment analysis on the entire dataset
59
+ from tqdm import tqdm
60
+
61
+ res = {} # Store the sentiment scores
62
+
63
+ for i, row in tqdm(df.iterrows(), total=len(df)):
64
+ text = row['Review']
65
+ myid = row['Id']
66
+ res[myid] = sia.polarity_scores(text)
67
+
68
+ # Create a DataFrame from the sentiment scores and merge it with the original DataFrame
69
+ vaders = pd.DataFrame(res).T
70
+ vaders = vaders.reset_index().rename(columns={'index': 'Id'})
71
+ vaders = vaders.merge(df, how='left')
72
+
73
+ vaders.head()
74
+
75
+ # Visualize the sentiment scores
76
+ fig, axs = plt.subplots(1, 3, figsize=(12, 3))
77
+ sns.barplot(data=vaders, x='Rating', y='pos', ax=axs[0])
78
+ sns.barplot(data=vaders, x='Rating', y='neu', ax=axs[1])
79
+ sns.barplot(data=vaders, x='Rating', y='neg', ax=axs[2])
80
+
81
+ # Set titles for the subplots
82
+ axs[0].set_title('Positive')
83
+ axs[1].set_title('Neutral')
84
+ axs[2].set_title('Negative')
85
+
86
+ # Add spacing between the subplots
87
+ plt.tight_layout()
88
+ plt.show()