Spaces:

Bedirhan
/

bitirme-proje

Runtime error

App Files Files Community

Bedirhan commited on Mar 30, 2022

Commit

a1e75ef

•

1 Parent(s): 37ed8b3

Upload sentimentAnalysis.py

Browse files

Files changed (1) hide show

sentimentAnalysis.py +91 -0

sentimentAnalysis.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import nltk
+import pandas as pd
+from nltk.corpus import stopwords
+import re
+import numpy as np
+from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
+from scipy.special import softmax
+class sentimentAnalysis():
+    def __init__(self, lang, text2analysePath):
+        self.lang = lang
+        self.text2analysePath = text2analysePath
+        self.engLabels = ["negative", "neutral", "positive"]
+        nltk.download("stopwords")
+    def downloadModels(self):
+        txtt = open(self.text2analysePath, 'r', encoding="utf-8")
+        if self.lang == "English" or self.lang == "İngilizce" or self.lang == "ingilizce" or self.lang == "english":
+            MODEL = f"sentimentModels/cardiffnlp/twitter-roberta-base-sentiment"
+            self.tokenizer = AutoTokenizer.from_pretrained(MODEL)
+            self.model = AutoModelForSequenceClassification.from_pretrained(MODEL)
+            self.model.save_pretrained(MODEL)
+            self.tokenizer.save_pretrained(MODEL)
+            self.engPrepareText(txtt)
+        elif self.lang == "Turkish" or self.lang == "Türkçe" or self.lang == "türkçe" or self.lang == "turkish":
+            self.model = AutoModelForSequenceClassification.from_pretrained("savasy/bert-base-turkish-sentiment-cased")
+            self.tokenizer = AutoTokenizer.from_pretrained("savasy/bert-base-turkish-sentiment-cased")
+            self.sa = pipeline("sentiment-analysis", tokenizer=self.tokenizer, model=self.model)
+            self.trPrepareText(txtt)
+        else:
+            print("Dil bulunamadı!------The language has not been found!")
+    def engPrepareText(self, txtt):
+        a = []
+        for i in txtt.readlines():
+            i = i.lower()
+            i = re.sub("[^a-zA-Z0-9ğüşöçıİĞÜŞÖÇ]", ' ', i)
+            spl = i.split(' ')
+            new_word = [word for word in spl if not word in set(stopwords.words("english"))]
+            a.append(' '.join(new_word))
+            dFen = pd.DataFrame(a, columns=["texts"])
+        self.engAnalyse(dFen)
+    def trPrepareText(self, txtt):
+        a = []
+        for i in txtt.readlines():
+            i = i.lower()
+            i = re.sub("[^a-zA-Z0-9ğüşöçıİĞÜŞÖÇ]", ' ', i)
+            spl = i.split(' ')
+            new_word = [word for word in spl if not word in set(stopwords.words("turkish"))]
+            a.append(' '.join(new_word))
+            dFtr = pd.DataFrame(a, columns=["metinler"])
+        self.trAnalyse(dFtr)
+    def engAnalyse(self, dFen):
+        for i in range(len(dFen)):
+            text = dFen["texts"][i]
+            encoded_input = self.tokenizer(text, return_tensors='pt')
+            output = self.model(**encoded_input)
+            scores = output[0][0].detach().numpy()
+            scores = softmax(scores)
+            ranking = np.argsort(scores)
+            ranking = ranking[::-1]
+            print(f"text: {text}")
+            for i in range(scores.shape[0]):
+                l = self.engLabels[ranking[i]]
+                s = scores[ranking[i]]
+                print(f"{i + 1}) {l + ':'} {np.round(float(s), 4)}")
+    def trAnalyse(self, dFtr):
+        for i in range(len(dFtr)):
+            text = dFtr["metinler"][i]
+            p = self.sa(text)[0]
+            if p["label"] == "positive":
+                print(f"text: {text}")
+                print(f"1-) positive: {np.round(float(p['score']), 4)}")
+                print(f"2-) negative: {np.round(float(1 - p['score']), 4)}")
+            else:
+                print(f"text: {text}")
+                print(f"1-) positive: {np.round(float(1 - p['score']), 4)}")
+                print(f"2-) negative: {np.round(float(p['score']), 4)}")
+lang = "ingilizce"
+path = "texts/denemeler/text.txt"
+sA = sentimentAnalysis(lang, path).downloadModels()