Spaces:
Sleeping
Sleeping
fruitpicker01
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -21,6 +21,8 @@ from utils import best_text_choice
|
|
21 |
|
22 |
tokenizer = AutoTokenizer.from_pretrained("ai-forever/ru-en-RoSBERTa")
|
23 |
model = AutoModel.from_pretrained("ai-forever/ru-en-RoSBERTa")
|
|
|
|
|
24 |
|
25 |
MISTRAL_API_KEY = os.getenv('MISTRAL_API_KEY')
|
26 |
token = os.getenv('GITHUB_TOKEN')
|
@@ -789,29 +791,60 @@ def generate_all_messages(desc, benefits, key_message, gender, generation, psych
|
|
789 |
save_statistics_to_github(approach_stats)
|
790 |
|
791 |
def rank_messages(non_personalized_messages, personalized_messages):
|
792 |
-
# Загружаем DataFrame unique_sms_df, используемый в функции best_text_choice
|
793 |
-
unique_sms_df = pd.read_parquet('unique_texts.parquet')
|
794 |
|
795 |
-
#
|
796 |
-
|
797 |
-
|
|
|
|
|
|
|
798 |
|
799 |
-
#
|
800 |
-
|
801 |
-
|
802 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
803 |
|
804 |
-
#
|
805 |
-
|
|
|
|
|
|
|
|
|
806 |
ranked_non_personalized_messages_list = ranked_non_personalized_df['text'].tolist()
|
807 |
ranked_personalized_messages_list = ranked_personalized_df['text'].tolist()
|
808 |
|
809 |
-
#
|
810 |
-
|
811 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
812 |
|
813 |
return ranked_non_personalized_messages, ranked_personalized_messages
|
814 |
|
|
|
815 |
# ФУНКЦИИ ПРОВЕРОК (НАЧАЛО)
|
816 |
|
817 |
# 1. Запрещенные слова
|
|
|
21 |
|
22 |
tokenizer = AutoTokenizer.from_pretrained("ai-forever/ru-en-RoSBERTa")
|
23 |
model = AutoModel.from_pretrained("ai-forever/ru-en-RoSBERTa")
|
24 |
+
# Load the DataFrame used in the best_text_choice function
|
25 |
+
unique_sms_df = pd.read_parquet('unique_texts.parquet')
|
26 |
|
27 |
MISTRAL_API_KEY = os.getenv('MISTRAL_API_KEY')
|
28 |
token = os.getenv('GITHUB_TOKEN')
|
|
|
791 |
save_statistics_to_github(approach_stats)
|
792 |
|
793 |
def rank_messages(non_personalized_messages, personalized_messages):
|
|
|
|
|
794 |
|
795 |
+
# Function to clean each message by removing metadata
|
796 |
+
def clean_message(msg):
|
797 |
+
# Remove metadata after '------'
|
798 |
+
if '------' in msg:
|
799 |
+
msg = msg.split('------')[0].strip()
|
800 |
+
return msg.strip()
|
801 |
|
802 |
+
# Split and clean non-personalized messages
|
803 |
+
non_personalized_list = [
|
804 |
+
clean_message(msg)
|
805 |
+
for msg in non_personalized_messages.strip().split('\n\n')
|
806 |
+
if msg.strip()
|
807 |
+
]
|
808 |
+
|
809 |
+
# Split and clean personalized messages
|
810 |
+
personalized_list = [
|
811 |
+
clean_message(msg)
|
812 |
+
for msg in personalized_messages.strip().split('\n\n')
|
813 |
+
if msg.strip()
|
814 |
+
]
|
815 |
+
|
816 |
+
# Rank non-personalized messages
|
817 |
+
ranked_non_personalized_df = best_text_choice(
|
818 |
+
non_personalized_list, unique_sms_df, tokenizer, model
|
819 |
+
)
|
820 |
|
821 |
+
# Rank personalized messages
|
822 |
+
ranked_personalized_df = best_text_choice(
|
823 |
+
personalized_list, unique_sms_df, tokenizer, model
|
824 |
+
)
|
825 |
+
|
826 |
+
# Extract messages from DataFrames
|
827 |
ranked_non_personalized_messages_list = ranked_non_personalized_df['text'].tolist()
|
828 |
ranked_personalized_messages_list = ranked_personalized_df['text'].tolist()
|
829 |
|
830 |
+
# Recompute lengths and reconstruct messages with metadata
|
831 |
+
def reconstruct_messages(messages_list):
|
832 |
+
display_list = []
|
833 |
+
for msg in messages_list:
|
834 |
+
msg_length = len(msg)
|
835 |
+
display_list.append(f"{msg}\n------\nКоличество знаков: {msg_length}")
|
836 |
+
return display_list
|
837 |
+
|
838 |
+
ranked_non_personalized_display = reconstruct_messages(ranked_non_personalized_messages_list)
|
839 |
+
ranked_personalized_display = reconstruct_messages(ranked_personalized_messages_list)
|
840 |
+
|
841 |
+
# Join the messages back into strings
|
842 |
+
ranked_non_personalized_messages = '\n\n'.join(ranked_non_personalized_display)
|
843 |
+
ranked_personalized_messages = '\n\n'.join(ranked_personalized_display)
|
844 |
|
845 |
return ranked_non_personalized_messages, ranked_personalized_messages
|
846 |
|
847 |
+
|
848 |
# ФУНКЦИИ ПРОВЕРОК (НАЧАЛО)
|
849 |
|
850 |
# 1. Запрещенные слова
|