Upload 6 files

Browse files

Updated an option to generate images on direct call

Files changed (5) hide show

WarBot.py +43 -4
WarOnline_Chat.py +33 -12
WarServer.py +18 -2
config.py +6 -3
requirements.txt +4 -1

WarBot.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # Main library for WarBot
-from transformers import AutoTokenizer ,AutoModelForCausalLM
 import re
 # Speller and punctuation:
 import os
@@ -10,6 +10,8 @@ from torch import package
 # not very necessary
 #import textwrap
 from textwrap3 import wrap
 # util function to get expected len after tokenizing
 def get_length_param(text: str, tokenizer) -> str:
@@ -41,8 +43,9 @@ def removeSigns(S):
 def prepare_punct():
     # Prepare the Punctuation Model
-    # Important! Enable for Unix version (python related)
-    # torch.backends.quantized.engine = 'qnnpack'
     torch.hub.download_url_to_file('https://raw.githubusercontent.com/snakers4/silero-models/master/models.yml',
                                    'latest_silero_models.yml',
                                    progress=False)
@@ -69,12 +72,47 @@ def prepare_punct():
     return model_punct
 def initialize():
     """ Loading the model """
     fit_checkpoint = "WarBot"
     tokenizer = AutoTokenizer.from_pretrained(fit_checkpoint)
     model = AutoModelForCausalLM.from_pretrained(fit_checkpoint)
     model_punсt = prepare_punct()
-    return (model,tokenizer,model_punсt)
 def split_string(string,n=256):
     return [string[i:i+n] for i in range(0, len(string), n)]
@@ -84,6 +122,7 @@ def get_response(quote:str,model,tokenizer,model_punct,temperature=0.2):
     try:
         user_inpit_ids = tokenizer.encode(f"|0|{get_length_param(quote, tokenizer)}|" \
                                                       + quote + tokenizer.eos_token, return_tensors="pt")
     except:
         return "Exception in tokenization" # Exception in tokenization

 # Main library for WarBot
+from transformers import AutoTokenizer ,AutoModelForCausalLM, AutoModelForSeq2SeqLM
 import re
 # Speller and punctuation:
 import os
 # not very necessary
 #import textwrap
 from textwrap3 import wrap
+import replicate #imaging
 # util function to get expected len after tokenizing
 def get_length_param(text: str, tokenizer) -> str:
 def prepare_punct():
     # Prepare the Punctuation Model
+    # Important! Enable next line for Unix version (python related):
+    torch.backends.quantized.engine = 'qnnpack'
     torch.hub.download_url_to_file('https://raw.githubusercontent.com/snakers4/silero-models/master/models.yml',
                                    'latest_silero_models.yml',
                                    progress=False)
     return model_punct
 def initialize():
+    # Initializes all the settings
     """ Loading the model """
     fit_checkpoint = "WarBot"
     tokenizer = AutoTokenizer.from_pretrained(fit_checkpoint)
     model = AutoModelForCausalLM.from_pretrained(fit_checkpoint)
     model_punсt = prepare_punct()
+    """ Initialize the translational model """
+    os.environ['REPLICATE_API_TOKEN'] = '2254e586b1380c49a948fd00d6802d45962492e4'
+    translation_model_name = "Helsinki-NLP/opus-mt-ru-en"
+    translation_tokenizer = AutoTokenizer.from_pretrained(translation_model_name)
+    translation_model = AutoModelForSeq2SeqLM.from_pretrained(translation_model_name)
+    """ Initialize the image model """
+    imageModel = replicate.models.get("stability-ai/stable-diffusion")
+    imgModel_version = imageModel.versions.get("27b93a2413e7f36cd83da926f3656280b2931564ff050bf9575f1fdf9bcd7478")
+    return (model, tokenizer, model_punсt, translation_model, translation_tokenizer, imgModel_version)
+def translate(text:str,translation_model,translation_tokenizer):
+    # Translates from Russian to English
+    src = "ru"  # source language
+    trg = "en"  # target language
+    try:
+        batch = translation_tokenizer([text], return_tensors="pt")
+        generated_ids = translation_model.generate(**batch)
+        translated = translation_tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    except:
+        translated = ""
+    return translated
+def generate_image(prompt:str, imgModel_version):
+    # Generates an image from prompt and returns a url
+    prompt = prompt.replace("?","")
+    try:
+        output_url = imgModel_version.predict(prompt=prompt)[0]
+    except:
+        output_url = ""
+    return output_url
 def split_string(string,n=256):
     return [string[i:i+n] for i in range(0, len(string), n)]
     try:
         user_inpit_ids = tokenizer.encode(f"|0|{get_length_param(quote, tokenizer)}|" \
                                                       + quote + tokenizer.eos_token, return_tensors="pt")
+        # Better to force the lenparameter to be = {2}
     except:
         return "Exception in tokenization" # Exception in tokenization

WarOnline_Chat.py CHANGED Viewed

@@ -53,7 +53,7 @@ def remove_non_english_russian_chars(s):
 def remove_extra_spaces(s):
     s = re.sub(r"\s+", " ", s)  # replace all sequences of whitespace with a single space
-    s = re.sub(r"\s+([.,])", r"\1", s)  # remove spaces before period or comma
     return(s)
 def getLastPage(thread_url=config.thread_url):
@@ -68,7 +68,6 @@ def getLastPage(thread_url=config.thread_url):
             lastPage = True
     return page
 def login(username=config.username, password=config.password, thread_url=config.thread_url):
     # Log-In to the forum and redirect to thread
@@ -92,12 +91,14 @@ def login(username=config.username, password=config.password, thread_url=config.
         print('Login failed!')
         exit()
-def post(message="", thread_url=config.thread_url, post_url=config.post_url, quoted_by="",quote_text="",quote_source=""):
     #Post a message to the forum (with or without the quote
     #quote_source is in format 'post-3920992'
     quote_source = quote_source.split('-')[-1] # Take the numbers only
     if quoted_by:
         message = f'[QUOTE="{quoted_by}, post: {quote_source}"]{quote_text}[/QUOTE]{message}'
     # Retrieve the thread page HTML
@@ -315,16 +316,27 @@ def WarOnlineBot():
             message = fixString(message)
             print('Reply: ', message)
-            # Add the new conversation pair to the database
-            db.setmessages(username=msg['messengerName'], message_text=originalQuote, bot_reply=message)
-            # Clean up the excessive records, leaving only the remaining messages
-            db.cleanup(username=msg['messengerName'], remaining_messages=config.remaining_messages)
-            # Delete the duplicate records
-            db.deleteDuplicates()
-            login(username=config.username, password=config.password, thread_url=config.thread_url)
-            time.sleep(1)
-            post(message=message, thread_url=config.thread_url, post_url=config.post_url, quoted_by=msg['messengerName'], quote_text=originalQuote, quote_source=msg['messageID'])
             time.sleep(10)  # Standby time for server load release
@@ -335,6 +347,15 @@ if __name__ == '__main__':
     while True:
         print('Starting Session')
         WarOnlineBot()
         print('Session finished. Timeout...')
         timer = range(60 * config.timeout)

 def remove_extra_spaces(s):
     s = re.sub(r"\s+", " ", s)  # replace all sequences of whitespace with a single space
+    s = re.sub(r"\s+([.,-])", r"\1", s)  # remove spaces before period, dash or comma
     return(s)
 def getLastPage(thread_url=config.thread_url):
             lastPage = True
     return page
 def login(username=config.username, password=config.password, thread_url=config.thread_url):
     # Log-In to the forum and redirect to thread
         print('Login failed!')
         exit()
+def post(message="", thread_url=config.thread_url, post_url=config.post_url, quoted_by="",quote_text="",quote_source="",img_url=""):
     #Post a message to the forum (with or without the quote
     #quote_source is in format 'post-3920992'
     quote_source = quote_source.split('-')[-1] # Take the numbers only
     if quoted_by:
+        if img_url: # It is an image
+            message = f'Примерно вот так: \n[IMG]{img_url}[/IMG]' # Set the image block
         message = f'[QUOTE="{quoted_by}, post: {quote_source}"]{quote_text}[/QUOTE]{message}'
     # Retrieve the thread page HTML
             message = fixString(message)
             print('Reply: ', message)
+            if message.endswith('.png'): # It is an image reply:
+                # Post an image reply:
+                login(username=config.username, password=config.password, thread_url=config.thread_url)
+                time.sleep(1)
+                post(message="", thread_url=config.thread_url, post_url=config.post_url, quoted_by=msg['messengerName'],
+                     quote_text=originalQuote, quote_source=msg['messageID'],
+                     img_url=message)
+                # will not be added to the database, if image is a reply
+            else:
+                # Add the new conversation pair to the database
+                db.setmessages(username=msg['messengerName'], message_text=originalQuote, bot_reply=message)
+                # Clean up the excessive records, leaving only the remaining messages
+                db.cleanup(username=msg['messengerName'], remaining_messages=config.remaining_messages)
+                # Delete the duplicate records
+                db.deleteDuplicates()
+                login(username=config.username, password=config.password, thread_url=config.thread_url)
+                time.sleep(1)
+                post(message=message, thread_url=config.thread_url, post_url=config.post_url, quoted_by=msg['messengerName'], quote_text=originalQuote, quote_source=msg['messageID'])
             time.sleep(10)  # Standby time for server load release
     while True:
         print('Starting Session')
         WarOnlineBot()
+        # Debug Only:
+        #imgWord = 'как выглядит'
+        """
+        login(username=config.username, password=config.password, thread_url=config.thread_url)
+        print("logged in")
+        post(message="", thread_url=config.thread_url, post_url=config.post_url, quoted_by='Test',
+             quote_text="posting an image",img_url='https://replicate.delivery/pbxt/knKBiJt8DPZ0B1o25PaLJSZjgv3D5HcwLoBIn0JESbe3nISIA/out-0.png')
+        """
         print('Session finished. Timeout...')
         timer = range(60 * config.timeout)

WarServer.py CHANGED Viewed

@@ -2,15 +2,20 @@
 import socket
 import WarBot
 import warnings
 warnings.filterwarnings("ignore")
-model,tokenizer,model_punct = WarBot.initialize()
 HOST = '10.0.0.125'
 PORT = 5000
 with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as server_socket:
     server_socket.bind((HOST, PORT))
     server_socket.listen()
     print(f'Server is listening on port {PORT}')
@@ -20,11 +25,22 @@ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as server_socket:
             print(f'Connected by {addr}')
             data = conn.recv(1024)
             received_string = data.decode()
             print(f'Received string from client: {received_string}')
             response = ""
             while not response:
-                response = WarBot.get_response(received_string, model, tokenizer, model_punct, temperature=0.6)
             response_string = response

 import socket
 import WarBot
+# Kill all warnings
 import warnings
 warnings.filterwarnings("ignore")
+imgWord = 'как выглядит'
+# Initialize the base models
+model,tokenizer,model_punct,translation_model,translation_tokenizer,imgModel_version = WarBot.initialize()
 HOST = '10.0.0.125'
 PORT = 5000
 with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as server_socket:
+    # Server sockets operation
     server_socket.bind((HOST, PORT))
     server_socket.listen()
     print(f'Server is listening on port {PORT}')
             print(f'Connected by {addr}')
             data = conn.recv(1024)
             received_string = data.decode()
+            #received_string = data.decode('utf-8')
             print(f'Received string from client: {received_string}')
             response = ""
             while not response:
+                if received_string.lower().startswith(imgWord): # check if that is a call for an image:
+                    received_string = received_string.lower().split(imgWord)[1:][0].strip()  # cut the code word for image
+                    # Translate it to english:
+                    translated_string = WarBot.translate(received_string, translation_model=translation_model,
+                                                         translation_tokenizer=translation_tokenizer)
+                    # Generated image url
+                    response = WarBot.generate_image(prompt=translated_string, imgModel_version=imgModel_version)
+                else:
+                    response = WarBot.get_response(received_string, model, tokenizer, model_punct, temperature=0.6)
             response_string = response

config.py CHANGED Viewed

@@ -8,8 +8,8 @@ post_url = "https://waronline.org/fora/index.php?threads/warbot-playground.17636
 # SSH settings
 ssh_host = '129.159.146.88'
 ssh_user = 'ubuntu'
-ssh_key_path = 'C:/Users/kerts/OneDrive/Documents/Keys/Ubuntu_Oracle/ssh-key-2023-02-12.key'
-#ssh_key_path = 'ssh-key-2023-02-12.key'
 # MySQL settings:
 mysql_host = 'localhost'  # because we will connect through the SSH tunnel
@@ -35,4 +35,7 @@ MaxWords = 50 # The server is relatively weak to fast-process the long messages
 remaining_messages = 2
 # Time between the reply sessions:
-timeout = 5 # min

 # SSH settings
 ssh_host = '129.159.146.88'
 ssh_user = 'ubuntu'
+#ssh_key_path = 'C:/Users/kerts/OneDrive/Documents/Keys/Ubuntu_Oracle/ssh-key-2023-02-12.key'
+ssh_key_path = 'ssh-key-2023-02-12.key' #Important to change this on the target machine!
 # MySQL settings:
 mysql_host = 'localhost'  # because we will connect through the SSH tunnel
 remaining_messages = 2
 # Time between the reply sessions:
+timeout = 5 # min
+# Call for image generation:
+imgWord = 'как выглядит'

requirements.txt CHANGED Viewed

@@ -3,7 +3,7 @@ requests
 bs4
 transformers
 scikit-learn
-tensorboardX
 #gradio
 schedule
 tqdm
@@ -11,5 +11,8 @@ pyspellchecker
 paramiko
 pymysql
 sshtunnel
 #pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu116
 #pip install torch==1.13.1+cu117 --extra-index-url https://download.pytorch.org/whl/cu117

 bs4
 transformers
 scikit-learn
+#tensorboardX
 #gradio
 schedule
 tqdm
 paramiko
 pymysql
 sshtunnel
+# Following ones are foo translation and replication
+sentencepiece
+replicate
 #pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu116
 #pip install torch==1.13.1+cu117 --extra-index-url https://download.pytorch.org/whl/cu117