Beav3r commited on
Commit
d8dca23
·
verified ·
1 Parent(s): 70f323f

Upload folder using huggingface_hub

Browse files
__pycache__/LLM_usage.cpython-310.pyc ADDED
Binary file (1.78 kB). View file
 
__pycache__/prompt.cpython-310.pyc ADDED
Binary file (653 Bytes). View file
 
__pycache__/retriever.cpython-310.pyc ADDED
Binary file (4.88 kB). View file
 
__pycache__/tokenizing.cpython-310.pyc ADDED
Binary file (1.28 kB). View file
 
app.py CHANGED
@@ -19,7 +19,9 @@ def initialize_bot(api_key):
19
  # Set the API key
20
  os.environ['GROQ_API_KEY'] = api_key
21
 
22
- pathes = ["./Data/hate_speech_processed.json", "./Data/reddit_jokes2_processed.json", "./Data/stupidstuff_processed.json", "./Data/wocka_processed.json", "./Data/reddit_jokes1_processed.json"]
 
 
23
 
24
  # Load documents (done once)
25
  if not docs: # Only load if docs are not already loaded
@@ -70,7 +72,11 @@ setup_demo = gr.Interface(
70
  inputs=[gr.Textbox(label="Enter your GROQ API Key")],
71
  outputs=[gr.Textbox(label="Setup Status")],
72
  title="Setup Joke Generator",
73
- description="Initialize the Joke Generator Bot by providing the GROQ API key. (If there is a connection error just submit the key again. It will work.)",
 
 
 
 
74
  )
75
 
76
  regime_options = ["BM25 Only", "Semantic Only", "Scores Combination"]
@@ -88,9 +94,8 @@ joke_demo = gr.Interface(
88
  title="Joke Generator",
89
  description="Generate jokes based on your input message(Only in English :( )). Select a retrieval regime and view the context used.\
90
  Be careful, the jokes can be offensive! Try to write a message that is related to the joke you want to hear.\
91
- (tell me a joke and its title about... or tell me a one liner about...). Sometimes bot works bad :(\
92
- In this case, try to rewrite a message and send again. Or close the window and enter\
93
- the link again, after reinitialize joke generator with API KEY.\
94
  Or try to change the regime or BM25 Coefficient.\
95
  BM25 Coefficient is used to balance the BM25 and semantic scores(It is active only in Scores Combination mode). Semantic scores are multiplied by (1 - BM25 Coefficient).\
96
  If you want to use only BM25 or semantic scores, select the corresponding regime or set it to 0.0 or 1.0. respectively.",
@@ -105,6 +110,5 @@ demo = gr.TabbedInterface(
105
  )
106
 
107
  # Launch the interface
108
- # demo.launch()
109
- # demo.launch(share=True)
110
  demo.launch()
 
 
19
  # Set the API key
20
  os.environ['GROQ_API_KEY'] = api_key
21
 
22
+ pathes = ["./Data/hate_speech_processed.json", "./Data/reddit_jokes2_processed.json",
23
+ "./Data/stupidstuff_processed.json", "./Data/wocka_processed.json",
24
+ "./Data/reddit_jokes1_processed.json"]
25
 
26
  # Load documents (done once)
27
  if not docs: # Only load if docs are not already loaded
 
72
  inputs=[gr.Textbox(label="Enter your GROQ API Key")],
73
  outputs=[gr.Textbox(label="Setup Status")],
74
  title="Setup Joke Generator",
75
+ description="Initialize the Joke Generator Bot by providing the GROQ API key. \
76
+ (If there is a connection error(on this or next tab) reload the page, wait 5-10 imnutes, \
77
+ reload the page again and reinitialize the joke generator with the API KEY)\
78
+ If you see some runtime error like memory limit exceeded, tell me on mail: vasyarusynb@gmail.com(I can see your email not so fast)\
79
+ or tg: @Beav3rrr and I will redeploy or turn on new instance",
80
  )
81
 
82
  regime_options = ["BM25 Only", "Semantic Only", "Scores Combination"]
 
94
  title="Joke Generator",
95
  description="Generate jokes based on your input message(Only in English :( )). Select a retrieval regime and view the context used.\
96
  Be careful, the jokes can be offensive! Try to write a message that is related to the joke you want to hear.\
97
+ (tell me a joke and its title about... or tell me a joke and its title about... it should be a oneliner, dark, pervy, etc.). Sometimes bot works bad :(\
98
+ In this case, try to rewrite a message and send again.\
 
99
  Or try to change the regime or BM25 Coefficient.\
100
  BM25 Coefficient is used to balance the BM25 and semantic scores(It is active only in Scores Combination mode). Semantic scores are multiplied by (1 - BM25 Coefficient).\
101
  If you want to use only BM25 or semantic scores, select the corresponding regime or set it to 0.0 or 1.0. respectively.",
 
110
  )
111
 
112
  # Launch the interface
 
 
113
  demo.launch()
114
+ # demo.launch(share=True)
requirements.txt CHANGED
@@ -18,7 +18,7 @@ distro==1.9.0
18
  docopt==0.6.2
19
  exceptiongroup==1.2.2
20
  executing==2.1.0
21
- fastapi==0.115.5
22
  ffmpy==0.4.0
23
  filelock==3.16.1
24
  frozenlist==1.5.0
@@ -108,7 +108,7 @@ sentence-transformers==3.3.1
108
  shellingham==1.5.4
109
  six==1.16.0
110
  sniffio==1.3.1
111
- stack-data==0.6.
112
  starlette==0.41.3
113
  sympy==1.13.3
114
  threadpoolctl==3.5.0
@@ -130,5 +130,5 @@ uvicorn==0.32.0
130
  watchdog==5.0.3
131
  wcwidth==0.2.13
132
  websockets==12.0
133
- yarl==1.17.2
134
  zipp==3.21.0
 
18
  docopt==0.6.2
19
  exceptiongroup==1.2.2
20
  executing==2.1.0
21
+ fastapi==0.115.5
22
  ffmpy==0.4.0
23
  filelock==3.16.1
24
  frozenlist==1.5.0
 
108
  shellingham==1.5.4
109
  six==1.16.0
110
  sniffio==1.3.1
111
+ stack-data==0.6.3
112
  starlette==0.41.3
113
  sympy==1.13.3
114
  threadpoolctl==3.5.0
 
130
  watchdog==5.0.3
131
  wcwidth==0.2.13
132
  websockets==12.0
133
+ yarl==1.17.2
134
  zipp==3.21.0
retriever.py CHANGED
@@ -92,6 +92,9 @@ class Retriever:
92
  # In case of BM25 only, return the top n documents based on BM25 scores, if somebody sets a couple
93
  # of flags to True, the func will return the top n documents based on the first flag set to True
94
 
 
 
 
95
  if bm25_only:
96
  semantic_only = False
97
  scores_combination = False
@@ -112,10 +115,6 @@ class Retriever:
112
  # Sort the documents by their BM25 scores in descending order
113
  sorted_doc_indices = np.argsort(scores)
114
 
115
- print("Score:", scores[sorted_doc_indices[-1]] )
116
- print(self.docs[sorted_doc_indices[-1]])
117
- print("Doc number:", sorted_doc_indices[-1])
118
-
119
  result_docs = [self.docs[i] for i in sorted_doc_indices[-n:] if scores[i] > 0]
120
 
121
  return result_docs[::-1] # Return the top n documents in descending order which means the most relevant documents are first
 
92
  # In case of BM25 only, return the top n documents based on BM25 scores, if somebody sets a couple
93
  # of flags to True, the func will return the top n documents based on the first flag set to True
94
 
95
+ # remove "tell me a joke about" ot "tell me a joke and its title about" from the user message
96
+ user_message = user_message.replace("tell me a joke about", "").replace("tell me a joke and its title about", "")
97
+
98
  if bm25_only:
99
  semantic_only = False
100
  scores_combination = False
 
115
  # Sort the documents by their BM25 scores in descending order
116
  sorted_doc_indices = np.argsort(scores)
117
 
 
 
 
 
118
  result_docs = [self.docs[i] for i in sorted_doc_indices[-n:] if scores[i] > 0]
119
 
120
  return result_docs[::-1] # Return the top n documents in descending order which means the most relevant documents are first
test_docs.py CHANGED
@@ -15,17 +15,18 @@ tokenized_docs_path = os.path.join(base_path, "tokenized_docs.pkl")
15
 
16
  # Take all json files with names that end '_processed'
17
  for path in glob.glob(f"{base_path}/*_processed.json"):
 
18
  with open(path, 'r') as f:
19
  docs.extend(json.load(f))
20
 
21
  index = 0
22
 
23
- for i, doc in enumerate(docs):
24
- if 'body' in doc:
25
- if doc['body'] == "I don't fuck the sandwich before eating it":
26
- tokenized_doc = tokenize_doc(doc)
27
- print(tokenized_doc)
28
- index = i
29
 
30
  with open(bm25_path, 'rb') as f:
31
  bm25 = pickle.load(f)
@@ -39,7 +40,7 @@ with open(bm25_path, 'rb') as f:
39
  # with open(bm25_path, 'wb') as f:
40
  # pickle.dump(bm25, f)
41
 
42
- message = "tell me a joke about I don't fuck the sandwich before eating it"
43
  tokenized_message = tokenize_text(message)
44
  print(tokenized_message)
45
  scores = torch.tensor(bm25.get_scores(tokenized_message))
@@ -48,6 +49,7 @@ sorted_doc_indices = np.argsort(scores)
48
  for i in range(1, 2):
49
  print("Score:", scores[sorted_doc_indices[-i]] )
50
  print(docs[sorted_doc_indices[-i]])
 
51
 
52
  # result_docs = [docs[i] for i in sorted_doc_indices[-30:] if scores[i] > 0]
53
 
 
15
 
16
  # Take all json files with names that end '_processed'
17
  for path in glob.glob(f"{base_path}/*_processed.json"):
18
+ print(path)
19
  with open(path, 'r') as f:
20
  docs.extend(json.load(f))
21
 
22
  index = 0
23
 
24
+ # for i, doc in enumerate(docs):
25
+ # if 'body' in doc:
26
+ # if doc['body'] == "I don't fuck the sandwich before eating it":
27
+ # tokenized_doc = tokenize_doc(doc)
28
+ # print(tokenized_doc)
29
+ # index = i
30
 
31
  with open(bm25_path, 'rb') as f:
32
  bm25 = pickle.load(f)
 
40
  # with open(bm25_path, 'wb') as f:
41
  # pickle.dump(bm25, f)
42
 
43
+ message = "tell me a joke about sandwich before eating it"
44
  tokenized_message = tokenize_text(message)
45
  print(tokenized_message)
46
  scores = torch.tensor(bm25.get_scores(tokenized_message))
 
49
  for i in range(1, 2):
50
  print("Score:", scores[sorted_doc_indices[-i]] )
51
  print(docs[sorted_doc_indices[-i]])
52
+ print("Doc number:", sorted_doc_indices[-i])
53
 
54
  # result_docs = [docs[i] for i in sorted_doc_indices[-30:] if scores[i] > 0]
55