Spaces:

politweet-sh
/

politweet

Runtime error

App Files Files Community

Demea9000 commited on Aug 16, 2022

Commit

5c4ad0b

•

1 Parent(s): a74ddbf

some more minor changes

Browse files

Files changed (2) hide show

app.py +1 -0
textclassifier/TextClassifier.py +17 -12

app.py CHANGED Viewed

@@ -79,6 +79,7 @@ def main(from_date,
          ):
     save_file_bool = s1, s2, s3, s4, s5, s6, s7, s8
     def add_pie_chart(df, leaders, plot_choices):
         df_list = []

          ):
     save_file_bool = s1, s2, s3, s4, s5, s6, s7, s8
+    # Describe what save_file_bool is for: if you want to save the dataframe to a file, this is the boolean for that
     def add_pie_chart(df, leaders, plot_choices):
         df_list = []

textclassifier/TextClassifier.py CHANGED Viewed

@@ -116,16 +116,16 @@ class TextClassifier:
         diff = 4 - len(item_control)
         if diff < 0:  # If response gave more than four predictions
             cutout = item_control[diff - 1:]  # Cut out the superflous predictions
-            item_control = item_control[:diff - 1] # Save the rest
             new_s = ""
             for i in range(len(cutout)):
                 new_s += cutout[i]
                 if i < -diff:
                     new_s += " and "  # Merge superflous predictions. E.g. target = 's', 'mp', 'v' -> target = 's and mp and v'
             item_control.append(new_s)
-        elif diff > 0: # If response gave less than four predictions
             for i in range(diff):
-                item_control.append("none") # Fill out tuple with nones
         new_item = str(tuple(item_control))
         new_item = new_item.replace("'", "")
         return new_item
@@ -180,16 +180,19 @@ class TextClassifier:
         # Manually add columns to self.df
         self.df['main_topic'] = df_topic_split['main_topic'].tolist()
         self.df['main_topic'] = self.df['main_topic'].replace(["n/a", "", " "], "none", regex=True)
-        self.df['main_topic'] = self.df['main_topic'].apply(lambda x: x.strip() if not (len(x) == 1 and x == "-") else "none")
         self.df['sub_topic'] = df_topic_split['sub_topic'].tolist()
         # In a few of the outputs from GPT-3 the sub_topic = "sentiment"
         self.df['sub_topic'] = self.df['sub_topic'].replace(["n/a", "sentiment", "", " "], "none", regex=True)
-        self.df['sub_topic'] = self.df['sub_topic'].apply(lambda x: x.strip() if not (len(x) == 1 and x == "-") else "none")
         self.df['sentiment'] = df_topic_split['sentiment'].tolist()
         self.df['sentiment'] = self.df['sentiment'].replace(["n/a", "sentiment", "", " "], "none", regex=True)
-        self.df['sentiment'] = self.df['sentiment'].apply(lambda x: x.strip() if not (len(x) == 1 and x == "-") else "none")
         self.df['target'] = df_topic_split['target'].tolist()
         self.df['target'] = self.df['target'].replace(["n/a", "", " "], "none", regex=True)
@@ -246,8 +249,9 @@ class TextClassifier:
         main_topic_list = self.clean_party_names(main_topic_list)
         sub_topic_list = self.clean_party_names(sub_topic_list)
         for i in range(len(main_topic_list)):
-            if main_topic_list[i].lower() == "none" and sub_topic_list[i].lower() == "none": # If the predictions are faulty
-                new_list.append("ERROR_496") # Label as ERROR_496 (faulty prediction)
             elif main_topic_list[i].lower() == "none":
                 new_list.append(sub_topic_list[i])
             elif sub_topic_list[i].lower() == "none":
@@ -276,7 +280,7 @@ class TextClassifier:
             if not topic.endswith("####"):
                 temp_list.append(topic)
             else:
-                temp_list.append(topic[:-4]) # Remove the marker (####)
                 topic_list.append(temp_list)
                 temp_list = []
@@ -441,7 +445,7 @@ class TextClassifier:
             model = SentenceTransformer(model_name)
             # Encode the topics/targets with the sentence transformer model
             old_list_embeddings = model.encode(old_list, batch_size=64, show_progress_bar=True,
-                                                 convert_to_tensor=True)
             # Encode the synonyms with the sentence transformer model
             synonym_list_embeddings = model.encode(synonym_list, batch_size=64, show_progress_bar=True,
                                                    convert_to_tensor=True)
@@ -532,7 +536,8 @@ class TextClassifier:
         """
         df_topic = df.copy()
         df_topic_split = pd.DataFrame(df_topic['merged_tuple'].tolist(),
-                                      columns=['merged_topic', 'cos_sim_topic', 'synonym_topic', 'merged_target', 'cos_sim_target', 'synonym_target'])
         self.df['merged_tuple'] = df_topic['merged_tuple'].tolist()
         # Manually add columns to self.df
         self.df['merged_topic'] = df_topic_split['merged_topic'].tolist()
@@ -609,4 +614,4 @@ if __name__ == "__main__":
         to_date = start_date.strftime("%Y-%m-%d")
         print("curr_date: ", from_date)
         tc = TextClassifier(from_date=from_date, to_date=to_date, user_list=USER_LIST, num_tweets=6000)
-        tc.run_main_pipeline()

         diff = 4 - len(item_control)
         if diff < 0:  # If response gave more than four predictions
             cutout = item_control[diff - 1:]  # Cut out the superflous predictions
+            item_control = item_control[:diff - 1]  # Save the rest
             new_s = ""
             for i in range(len(cutout)):
                 new_s += cutout[i]
                 if i < -diff:
                     new_s += " and "  # Merge superflous predictions. E.g. target = 's', 'mp', 'v' -> target = 's and mp and v'
             item_control.append(new_s)
+        elif diff > 0:  # If response gave less than four predictions
             for i in range(diff):
+                item_control.append("none")  # Fill out tuple with nones
         new_item = str(tuple(item_control))
         new_item = new_item.replace("'", "")
         return new_item
         # Manually add columns to self.df
         self.df['main_topic'] = df_topic_split['main_topic'].tolist()
         self.df['main_topic'] = self.df['main_topic'].replace(["n/a", "", " "], "none", regex=True)
+        self.df['main_topic'] = self.df['main_topic'].apply(
+            lambda x: x.strip() if not (len(x) == 1 and x == "-") else "none")
         self.df['sub_topic'] = df_topic_split['sub_topic'].tolist()
         # In a few of the outputs from GPT-3 the sub_topic = "sentiment"
         self.df['sub_topic'] = self.df['sub_topic'].replace(["n/a", "sentiment", "", " "], "none", regex=True)
+        self.df['sub_topic'] = self.df['sub_topic'].apply(
+            lambda x: x.strip() if not (len(x) == 1 and x == "-") else "none")
         self.df['sentiment'] = df_topic_split['sentiment'].tolist()
         self.df['sentiment'] = self.df['sentiment'].replace(["n/a", "sentiment", "", " "], "none", regex=True)
+        self.df['sentiment'] = self.df['sentiment'].apply(
+            lambda x: x.strip() if not (len(x) == 1 and x == "-") else "none")
         self.df['target'] = df_topic_split['target'].tolist()
         self.df['target'] = self.df['target'].replace(["n/a", "", " "], "none", regex=True)
         main_topic_list = self.clean_party_names(main_topic_list)
         sub_topic_list = self.clean_party_names(sub_topic_list)
         for i in range(len(main_topic_list)):
+            if main_topic_list[i].lower() == "none" and sub_topic_list[
+                i].lower() == "none":  # If the predictions are faulty
+                new_list.append("ERROR_496")  # Label as ERROR_496 (faulty prediction)
             elif main_topic_list[i].lower() == "none":
                 new_list.append(sub_topic_list[i])
             elif sub_topic_list[i].lower() == "none":
             if not topic.endswith("####"):
                 temp_list.append(topic)
             else:
+                temp_list.append(topic[:-4])  # Remove the marker (####)
                 topic_list.append(temp_list)
                 temp_list = []
             model = SentenceTransformer(model_name)
             # Encode the topics/targets with the sentence transformer model
             old_list_embeddings = model.encode(old_list, batch_size=64, show_progress_bar=True,
+                                               convert_to_tensor=True)
             # Encode the synonyms with the sentence transformer model
             synonym_list_embeddings = model.encode(synonym_list, batch_size=64, show_progress_bar=True,
                                                    convert_to_tensor=True)
         """
         df_topic = df.copy()
         df_topic_split = pd.DataFrame(df_topic['merged_tuple'].tolist(),
+                                      columns=['merged_topic', 'cos_sim_topic', 'synonym_topic', 'merged_target',
+                                               'cos_sim_target', 'synonym_target'])
         self.df['merged_tuple'] = df_topic['merged_tuple'].tolist()
         # Manually add columns to self.df
         self.df['merged_topic'] = df_topic_split['merged_topic'].tolist()
         to_date = start_date.strftime("%Y-%m-%d")
         print("curr_date: ", from_date)
         tc = TextClassifier(from_date=from_date, to_date=to_date, user_list=USER_LIST, num_tweets=6000)
+        tc.run_main_pipeline()