Spaces:

JThosantad
/

workshop13-senti

Sleeping

App Files Files Community

JThosantad commited on Oct 29, 2023

Commit

fe4dee2

•

1 Parent(s): 852bf17

Upload 2 files

Browse files

Files changed (3) hide show

.gitattributes +1 -0
WorkshopSentimentsAna-std.ipynb +847 -0
imdb_reviews.csv +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+imdb_reviews.csv filter=lfs diff=lfs merge=lfs -text

WorkshopSentimentsAna-std.ipynb ADDED Viewed

	@@ -0,0 +1,847 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "c454c018-02b7-4c3d-a21f-411748963a3f",
+   "metadata": {},
+   "source": [
+    "# Workshop: Sentiment Analysis"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2eda2e01-dfc4-42a6-9b6a-5cdf39fbce78",
+   "metadata": {},
+   "source": [
+    "<div>\n",
+    "<img src=\"https://lh3.googleusercontent.com/pw/ADCreHdzakFbNdHwBE1ZrwOiNCQibViWOir9DF9Dv4fbZEdWpx4mzFOT_RxkUGLTyDW7fQ0OwEyNQwqllupbvm0WiU0RNuFs-kWx1fTIvjiSkPGE5m64PilOIeApxQLwX_rl-JU7uYT-ROxdppIsJimCeos=w406-h451-s-no-gm?authuser=0\" width=\"390\"/> \n",
+    "</div>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7ef9db65-1fda-4fc6-8bb9-bc52bdbb9529",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# !pip install nltk\n",
+    "# !pip install transformers "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1a0b8ed9-f240-47b4-aa62-0cf48bdd7868",
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
+    "tags": []
+   },
+   "source": [
+    "## Rule-Based Approaches\n",
+    "\n",
+    "- **Lexicon-Based Methods**: Use sentiment lexicons or dictionaries that contain words annotated with their sentiment polarity (positive, negative, neutral).\n",
+    "- **Pattern Matching**: Identify sentiment based on predefined patterns or rules in the text.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9f7f14b4-60ba-4a92-a9d0-a124e62fe03b",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import nltk\n",
+    "from nltk.tokenize import word_tokenize\n",
+    "from nltk.corpus import stopwords\n",
+    "\n",
+    "# nltk.download('stopwords')\n",
+    "# nltk.download('punkt')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8a25f60f-f202-49cd-b965-e3ebb1676786",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "print(stopwords.words('english'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7652d6d2-ba4c-4d02-bfe3-313b6e0f24a5",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "text = \"I had a good experience with the product. Highly recommended!\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "53fc7d50-59fa-4bec-9ae4-b93a1a3847f1",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "tokens = word_tokenize(text.lower())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "faac761f-912e-44f7-b7b0-626baaea6a56",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "print(tokens)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9f6543a2-76f4-4993-b535-f90e50bada72",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "stop_words = set(stopwords.words('english'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4d7f529d-f006-48db-a092-2262f17cb3cd",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "tokens = [word for word in tokens if word.isalnum() and word not in stop_words]  #alnum = alphanumeric"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4acfb41c-615d-4e8b-92dc-3f73a4188402",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "print(tokens)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c3cfd1cc-3f30-43de-a469-dec0b3816313",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aed2ad01-27e5-45e3-a55c-63084966a482",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Sample positive and negative words\n",
+    "positive_words = set(['good', 'awesome', 'excellent', 'happy', 'positive'])\n",
+    "negative_words = set(['bad', 'terrible', 'poor', 'unhappy', 'negative'])\n",
+    "\n",
+    "def rule_based_sentiment_analysis(text):\n",
+    "    # Tokenize the text\n",
+    "    tokens = word_tokenize(text.lower())\n",
+    "\n",
+    "    # Remove stopwords\n",
+    "    stop_words = set(stopwords.words('english'))\n",
+    "    tokens = [word for word in tokens if word.isalnum() and word not in stop_words]  #alnum = alphanumeric\n",
+    "\n",
+    "    # Calculate sentiment score\n",
+    "    sentiment_score = sum(1 for word in tokens if word in positive_words) - sum(1 for word in tokens if word in negative_words)\n",
+    "\n",
+    "    # Classify sentiment\n",
+    "    if sentiment_score > 0:\n",
+    "        return 'Positive'\n",
+    "    elif sentiment_score < 0:\n",
+    "        return 'Negative'\n",
+    "    else:\n",
+    "        return 'Neutral'\n",
+    "\n",
+    "# Example usage\n",
+    "text_to_analyze = \"I had a good experience with the product. Highly recommended!\"\n",
+    "sentiment_result = rule_based_sentiment_analysis(text_to_analyze)\n",
+    "print(f\"Sentiment: {sentiment_result}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "21764069-0b07-4b3e-8103-b2ab464a9182",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "## Machine Learning Approaches"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "dc739c8a-a453-43d1-bdc5-ad10d823d748",
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
+    "tags": []
+   },
+   "source": [
+    "### Import packages"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7e030b97-e111-45ea-b00f-09a360f3400e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "from sklearn.pipeline import Pipeline\n",
+    "from sklearn.utils import shuffle\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+    "# from sklearn.svm import SVC\n",
+    "from sklearn.naive_bayes import MultinomialNB\n",
+    "from sklearn.metrics import classification_report, confusion_matrix\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "54c4fe66-f52f-487f-bfd5-0ea6e05206ce",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "### TF-IDF vectorizer"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3f5b7e92-5de4-4894-b2be-47dac1cf2482",
+   "metadata": {},
+   "source": [
+    "\n",
+    "<div>\n",
+    "<img src=\"https://www.kdnuggets.com/wp-content/uploads/awan_convert_text_documents_tfidf_matrix_tfidfvectorizer_3.png\" width=\"590\"/> \n",
+    "</div>\n",
+    "\n",
+    "\n",
+    "Image sources: https://www.kdnuggets.com/2022/09/convert-text-documents-tfidf-matrix-tfidfvectorizer.html\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9bd125fc-11fd-414a-b8f0-ff7ef628fb94",
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
+    "tags": []
+   },
+   "source": [
+    "##### Example on Small data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8a61fdce-6544-4774-bc29-265bf4afaa90",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "\n",
+    "# Sample data\n",
+    "documents = [\n",
+    "    \"This is the first document.\",\n",
+    "    \"This document is the second document.\",\n",
+    "    \"And this is the third one.\",\n",
+    "    \"Is this the first document?\"\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5794027b-2bee-46d9-9b4d-9cbaa7c4120f",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Create a DataFrame for better visualization\n",
+    "df = pd.DataFrame({'Text': documents})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b49d5272-0383-4e39-910b-87276c4ffca2",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# TF-IDF vectorization\n",
+    "vectorizer = TfidfVectorizer()\n",
+    "tfidf_matrix = vectorizer.fit_transform(df['Text'].tolist())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "46c0b47d-80ab-498b-91a2-7202f1c429fd",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Convert the TF-IDF matrix to a DataFrame\n",
+    "tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "91c2bee0-5bb6-44b9-a609-1f3d0e891ad4",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Print the original data\n",
+    "print(\"Original Data:\")\n",
+    "print(df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "24c4a522-8ef4-4001-ada6-031a043b9a54",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "print(tfidf_matrix)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6feb5892-284f-43d1-ab7b-5b13dbfadd0b",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Print the TF-IDF matrix\n",
+    "print(\"\\nTF-IDF Matrix:\")\n",
+    "print(tfidf_df)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6802c239-edfa-462e-99ea-31386fd7aed4",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "## Naive Bayes classifier trained on the TF-IDF features."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3accf6f8-6cae-4265-8d5d-fb5d40a07a2d",
+   "metadata": {},
+   "source": [
+    "<div>\n",
+    "<img src=\"fig_bayes-nw.png\" width=\"800\"/> \n",
+    "</div>\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9062063a-557b-4971-ad84-e3601b1a520e",
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
+    "tags": []
+   },
+   "source": [
+    "### Read data/Preparation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8d2eab09-03c7-441e-9c78-0c2e069f4d25",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# df = pd.read_csv(\"Womens_Clothing_E_Commerce_Reviews.csv\")\n",
+    "df = pd.read_csv(\"imdb_reviews.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aca597f3-c8da-4314-990e-253d5ed719da",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "df.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7d8131e4-4a69-45af-aa12-335c926e308f",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "df.head(3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "43a27caf-779b-4bd1-a3cf-fa641021172e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "df['label'].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c72dd5ec-59b2-4c7f-a8fb-fdade866984d",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "df['label'].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ba556f9b-da1c-4d13-8d70-563e0bd528a1",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "df.isna().sum()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "819c31c3-873d-4d31-a21a-759059bd4c6d",
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
+    "tags": []
+   },
+   "source": [
+    "### Split the dataset into training and testing sets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6ca318a2-26d7-446e-8324-6660171f239d",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "train_data, test_data, train_labels, test_labels = train_test_split(df['text'], df['label'], test_size=0.3, random_state=42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f0cfc8fc-49e5-4c88-bb33-8084dcf00100",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "print(train_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "51d0a415-4982-43dd-8864-c189ba6826f4",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "print(train_labels)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "42987cdb-4cdf-46df-95d8-7c2b2824c1ee",
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
+    "tags": []
+   },
+   "source": [
+    "### Create a pipeline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "06ffd548-c333-4c1a-87ce-9699ddd116ee",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "sentiment_pipeline = Pipeline([\n",
+    "    ('tfidf', TfidfVectorizer()),\n",
+    "    ('nb', MultinomialNB())\n",
+    "])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6bafa7cd-8d0b-4725-bd40-4a3b04634fab",
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
+    "tags": []
+   },
+   "source": [
+    "### Train the model using the pipeline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "712dea09-52c2-4a9f-8bf9-3cbb273fe4b5",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "sentiment_pipeline.fit(train_data, train_labels)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4c95c599-ae0d-433f-9ed5-856fd9fa35e0",
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
+    "tags": []
+   },
+   "source": [
+    "### Make predictions on the test set"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "37ae9eda-4a02-4f40-bdeb-ecb8ea67f9d3",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "predictions = sentiment_pipeline.predict(test_data)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a33458e2-90cb-4c94-b977-8cc8ea5a273e",
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
+    "tags": []
+   },
+   "source": [
+    "### Evaluate the model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9ad90567-93d2-4090-81be-5c77f41e379a",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "report = classification_report(test_labels, predictions)\n",
+    "\n",
+    "print(\"Classification Report:\\n\", report)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ef002e29-d065-4825-a076-3d23fdfa7b59",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "cm = confusion_matrix(test_labels, predictions)\n",
+    "cm"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6e7729bb-a833-4feb-bd2a-b04a2741bd70",
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
+    "tags": []
+   },
+   "source": [
+    "## Huggingface: Pre-trained sentiment analysis model\n",
+    "\n",
+    "https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9afad444-c2cc-4f3d-b49d-07a723be6154",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "from transformers import pipeline\n",
+    "sentiment_analyzer = pipeline('sentiment-analysis', model =\"distilbert-base-uncased-finetuned-sst-2-english\") #, revision =\"af0f99b\")\n",
+    "data = [\"I love you\", \"I hate you\"]\n",
+    "sentiment_analyzer(data)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4987efd9-8ca8-40b1-90cc-ff361207fb8f",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "result = sentiment_analyzer(\"I love using this model!\")\n",
+    "print(result)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "68436dda-e3c3-499d-b390-60443f9a1796",
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
+    "tags": []
+   },
+   "source": [
+    "## Huggingface: Thai "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "72a9f8e0-12bf-403b-8b78-e381a65e9eaa",
+   "metadata": {},
+   "source": [
+    "### model=\"poom-sci/WangchanBERTa-finetuned-sentiment\"\n",
+    "\n",
+    "https://huggingface.co/poom-sci/WangchanBERTa-finetuned-sentiment"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d698825b-3bd7-4370-871f-ac6e5fe5fe47",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from transformers import pipeline\n",
+    "\n",
+    "sentiment_analyzer = pipeline('sentiment-analysis', model=\"poom-sci/WangchanBERTa-finetuned-sentiment\")#, revision=\"b78d071\")\n",
+    "\n",
+    "data = [\"อร่อยจัดๆ\", \"รอนานแท้\"]\n",
+    "sentiment_analyzer(data)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "87d815d4-135c-471e-93ee-cacc93653d4e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "sentiment_analyzer(\"ข้าวบูด\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "60f5c43a-6cb7-47f1-85c5-751e91599ad9",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f894a4bd-1f04-4126-aa8d-e0211b41687e",
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
+    "tags": []
+   },
+   "source": [
+    "## Deploy on Streamlit Sharing\n",
+    "\n",
+    "https://share.streamlit.io/  or https://huggingface.co/spaces\n",
+    "\n",
+    "https://docs.streamlit.io/library/api-reference\n",
+    "\n",
+    "https://github.com/\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dfd5baee-dc74-4f6d-84be-52a2b89d0f28",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "%%writefile app_senti.py\n",
+    "\n",
+    "\n",
+    "import streamlit as st\n",
+    "from transformers import pipeline\n",
+    "\n",
+    "# Load the sentiment analysis model\n",
+    "model_name = \"poom-sci/WangchanBERTa-finetuned-sentiment\"\n",
+    "sentiment_analyzer = pipeline('sentiment-analysis', model=model_name)\n",
+    "\n",
+    "# Streamlit app\n",
+    "st.title(\"Thai Sentiment Analysis App\")\n",
+    "\n",
+    "# Input text\n",
+    "text_input = st.text_area(\"Enter Thai text for sentiment analysis\", \"ขอความเห็นหน่อย... \")\n",
+    "\n",
+    "# Button to trigger analysis\n",
+    "if st.button(\"Analyze Sentiment\"):\n",
+    "    # Analyze sentiment using the model\n",
+    "    results = sentiment_analyzer([text_input])\n",
+    "\n",
+    "    # Extract sentiment and score\n",
+    "    sentiment = results[0]['label']\n",
+    "    score = results[0]['score']\n",
+    "    \n",
+    "\n",
+    "    # Display result as progress bars\n",
+    "    st.subheader(\"Sentiment Analysis Result:\")\n",
+    "\n",
+    "    if sentiment == 'pos':\n",
+    "        st.success(f\"Positive Sentiment (Score: {score:.2f})\")\n",
+    "        st.progress(score)\n",
+    "    elif sentiment == 'neg':\n",
+    "        st.error(f\"Negative Sentiment (Score: {score:.2f})\")\n",
+    "        st.progress(score)\n",
+    "    else:\n",
+    "        st.warning(f\"Neutral Sentiment (Score: {score:.2f})\")\n",
+    "        st.progress(score)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "70111967-b904-4f18-a8d0-0c8701ec35ab",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%writefile requirements.txt\n",
+    "transformers\n",
+    "torch\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "88001002-587d-403d-ab65-d060bde9d42d",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

imdb_reviews.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:94752f1d468e32222c9190c99dc1758a2e81ec6ad5e76528fe4ce31d3edd495c
+size 66262304