Spaces:

Beav3r
/

Dark_humor_generator

Running

App Files Files Community

Beav3r commited on Dec 1, 2024

Commit

d8dca23

verified ·

1 Parent(s): 70f323f

Upload folder using huggingface_hub

Browse files

Files changed (8) hide show

__pycache__/LLM_usage.cpython-310.pyc +0 -0
__pycache__/prompt.cpython-310.pyc +0 -0
__pycache__/retriever.cpython-310.pyc +0 -0
__pycache__/tokenizing.cpython-310.pyc +0 -0
app.py +11 -7
requirements.txt +3 -3
retriever.py +3 -4
test_docs.py +9 -7

__pycache__/LLM_usage.cpython-310.pyc ADDED Viewed

Binary file (1.78 kB). View file

__pycache__/prompt.cpython-310.pyc ADDED Viewed

Binary file (653 Bytes). View file

__pycache__/retriever.cpython-310.pyc ADDED Viewed

Binary file (4.88 kB). View file

__pycache__/tokenizing.cpython-310.pyc ADDED Viewed

Binary file (1.28 kB). View file

app.py CHANGED Viewed

@@ -19,7 +19,9 @@ def initialize_bot(api_key):
     # Set the API key
     os.environ['GROQ_API_KEY'] = api_key
-    pathes = ["./Data/hate_speech_processed.json", "./Data/reddit_jokes2_processed.json", "./Data/stupidstuff_processed.json", "./Data/wocka_processed.json", "./Data/reddit_jokes1_processed.json"]
     # Load documents (done once)
     if not docs:  # Only load if docs are not already loaded
@@ -70,7 +72,11 @@ setup_demo = gr.Interface(
     inputs=[gr.Textbox(label="Enter your GROQ API Key")],
     outputs=[gr.Textbox(label="Setup Status")],
     title="Setup Joke Generator",
-    description="Initialize the Joke Generator Bot by providing the GROQ API key. (If there is a connection error just submit the key again. It will work.)",
 )
 regime_options = ["BM25 Only", "Semantic Only", "Scores Combination"]
@@ -88,9 +94,8 @@ joke_demo = gr.Interface(
     title="Joke Generator",
     description="Generate jokes based on your input message(Only in English :( )). Select a retrieval regime and view the context used.\
                 Be careful, the jokes can be offensive! Try to write a message that is related to the joke you want to hear.\
-                (tell me a joke and its title about... or tell me a one liner about...). Sometimes bot works bad :(\
-                In this case, try to rewrite a message and send again. Or close the window and enter\
-                the link again, after reinitialize joke generator with API KEY.\
                 Or try to change the regime or BM25 Coefficient.\
                 BM25 Coefficient is used to balance the BM25 and semantic scores(It is active only in Scores Combination mode). Semantic scores are multiplied by (1 - BM25 Coefficient).\
                 If you want to use only BM25 or semantic scores, select the corresponding regime or set it to 0.0 or 1.0. respectively.",
@@ -105,6 +110,5 @@ demo = gr.TabbedInterface(
 )
 # Launch the interface
-# demo.launch()
-# demo.launch(share=True)
 demo.launch()

     # Set the API key
     os.environ['GROQ_API_KEY'] = api_key
+    pathes = ["./Data/hate_speech_processed.json", "./Data/reddit_jokes2_processed.json",
+              "./Data/stupidstuff_processed.json", "./Data/wocka_processed.json",
+              "./Data/reddit_jokes1_processed.json"]
     # Load documents (done once)
     if not docs:  # Only load if docs are not already loaded
     inputs=[gr.Textbox(label="Enter your GROQ API Key")],
     outputs=[gr.Textbox(label="Setup Status")],
     title="Setup Joke Generator",
+    description="Initialize the Joke Generator Bot by providing the GROQ API key. \
+        (If there is a connection error(on this or next tab) reload the page, wait 5-10 imnutes, \
+        reload the page again and reinitialize the joke generator with the API KEY)\
+        If you see some runtime error like memory limit exceeded, tell me on mail: vasyarusynb@gmail.com(I can see your email not so fast)\
+        or tg: @Beav3rrr and I will redeploy or turn on new instance",
 )
 regime_options = ["BM25 Only", "Semantic Only", "Scores Combination"]
     title="Joke Generator",
     description="Generate jokes based on your input message(Only in English :( )). Select a retrieval regime and view the context used.\
                 Be careful, the jokes can be offensive! Try to write a message that is related to the joke you want to hear.\
+                (tell me a joke and its title about... or tell me a joke and its title about... it should be a oneliner, dark, pervy, etc.). Sometimes bot works bad :(\
+                In this case, try to rewrite a message and send again.\
                 Or try to change the regime or BM25 Coefficient.\
                 BM25 Coefficient is used to balance the BM25 and semantic scores(It is active only in Scores Combination mode). Semantic scores are multiplied by (1 - BM25 Coefficient).\
                 If you want to use only BM25 or semantic scores, select the corresponding regime or set it to 0.0 or 1.0. respectively.",
 )
 # Launch the interface
 demo.launch()
+# demo.launch(share=True)

requirements.txt CHANGED Viewed

@@ -18,7 +18,7 @@ distro==1.9.0
 docopt==0.6.2
 exceptiongroup==1.2.2
 executing==2.1.0
-fastapi==0.115.5
 ffmpy==0.4.0
 filelock==3.16.1
 frozenlist==1.5.0
@@ -108,7 +108,7 @@ sentence-transformers==3.3.1
 shellingham==1.5.4
 six==1.16.0
 sniffio==1.3.1
-stack-data==0.6.
 starlette==0.41.3
 sympy==1.13.3
 threadpoolctl==3.5.0
@@ -130,5 +130,5 @@ uvicorn==0.32.0
 watchdog==5.0.3
 wcwidth==0.2.13
 websockets==12.0
-yarl==1.17.2
 zipp==3.21.0

 docopt==0.6.2
 exceptiongroup==1.2.2
 executing==2.1.0
+fastapi==0.115.5
 ffmpy==0.4.0
 filelock==3.16.1
 frozenlist==1.5.0
 shellingham==1.5.4
 six==1.16.0
 sniffio==1.3.1
+stack-data==0.6.3
 starlette==0.41.3
 sympy==1.13.3
 threadpoolctl==3.5.0
 watchdog==5.0.3
 wcwidth==0.2.13
 websockets==12.0
+yarl==1.17.2
 zipp==3.21.0

retriever.py CHANGED Viewed

@@ -92,6 +92,9 @@ class Retriever:
         # In case of BM25 only, return the top n documents based on BM25 scores, if somebody sets a couple
         # of flags to True, the func will return the top n documents based on the first flag set to True
         if bm25_only:
             semantic_only = False
             scores_combination = False
@@ -112,10 +115,6 @@ class Retriever:
         # Sort the documents by their BM25 scores in descending order
         sorted_doc_indices = np.argsort(scores)
-        print("Score:", scores[sorted_doc_indices[-1]] )
-        print(self.docs[sorted_doc_indices[-1]])
-        print("Doc number:", sorted_doc_indices[-1])
         result_docs = [self.docs[i] for i in sorted_doc_indices[-n:] if scores[i] > 0]
         return result_docs[::-1] # Return the top n documents in descending order which means the most relevant documents are first

         # In case of BM25 only, return the top n documents based on BM25 scores, if somebody sets a couple
         # of flags to True, the func will return the top n documents based on the first flag set to True
+        # remove "tell me a joke about" ot "tell me a joke and its title about" from the user message
+        user_message = user_message.replace("tell me a joke about", "").replace("tell me a joke and its title about", "")
         if bm25_only:
             semantic_only = False
             scores_combination = False
         # Sort the documents by their BM25 scores in descending order
         sorted_doc_indices = np.argsort(scores)
         result_docs = [self.docs[i] for i in sorted_doc_indices[-n:] if scores[i] > 0]
         return result_docs[::-1] # Return the top n documents in descending order which means the most relevant documents are first

test_docs.py CHANGED Viewed

@@ -15,17 +15,18 @@ tokenized_docs_path = os.path.join(base_path, "tokenized_docs.pkl")
 # Take all json files with names that end '_processed'
 for path in glob.glob(f"{base_path}/*_processed.json"):
     with open(path, 'r') as f:
         docs.extend(json.load(f))
 index = 0
-for i, doc in enumerate(docs):
-    if 'body' in doc:
-        if doc['body'] == "I don't fuck the sandwich before eating it":
-            tokenized_doc = tokenize_doc(doc)
-            print(tokenized_doc)
-            index = i
 with open(bm25_path, 'rb') as f:
     bm25 = pickle.load(f)
@@ -39,7 +40,7 @@ with open(bm25_path, 'rb') as f:
 # with open(bm25_path, 'wb') as f:
 #     pickle.dump(bm25, f)
-message = "tell me a joke about I don't fuck the sandwich before eating it"
 tokenized_message = tokenize_text(message)
 print(tokenized_message)
 scores = torch.tensor(bm25.get_scores(tokenized_message))
@@ -48,6 +49,7 @@ sorted_doc_indices = np.argsort(scores)
 for i in range(1, 2):
     print("Score:", scores[sorted_doc_indices[-i]] )
     print(docs[sorted_doc_indices[-i]])
 # result_docs = [docs[i] for i in sorted_doc_indices[-30:] if scores[i] > 0]

 # Take all json files with names that end '_processed'
 for path in glob.glob(f"{base_path}/*_processed.json"):
+    print(path)
     with open(path, 'r') as f:
         docs.extend(json.load(f))
 index = 0
+# for i, doc in enumerate(docs):
+#     if 'body' in doc:
+#         if doc['body'] == "I don't fuck the sandwich before eating it":
+#             tokenized_doc = tokenize_doc(doc)
+#             print(tokenized_doc)
+#             index = i
 with open(bm25_path, 'rb') as f:
     bm25 = pickle.load(f)
 # with open(bm25_path, 'wb') as f:
 #     pickle.dump(bm25, f)
+message = "tell me a joke about sandwich before eating it"
 tokenized_message = tokenize_text(message)
 print(tokenized_message)
 scores = torch.tensor(bm25.get_scores(tokenized_message))
 for i in range(1, 2):
     print("Score:", scores[sorted_doc_indices[-i]] )
     print(docs[sorted_doc_indices[-i]])
+    print("Doc number:", sorted_doc_indices[-i])
 # result_docs = [docs[i] for i in sorted_doc_indices[-30:] if scores[i] > 0]