aliasgerovs commited on
Commit
9fc992f
1 Parent(s): 9c71743
Files changed (3) hide show
  1. app.py +6 -24
  2. nohup.out +98 -291
  3. predictors.py +27 -108
app.py CHANGED
@@ -26,13 +26,13 @@ analyze_and_highlight_quillbot = partial(
26
  )
27
 
28
 
29
- def ai_generated_test(option, bias_buster_selected, input, models):
30
  if bias_buster_selected:
31
  input = update(input)
32
  if option == "Human vs AI":
33
  return predict_bc_scores(input), None
34
  elif option == "Human vs AI Source Models":
35
- return predict_bc_scores(input), predict_mc_scores(input, models)
36
  return None, None
37
 
38
 
@@ -41,7 +41,6 @@ def main(
41
  ai_option,
42
  plag_option,
43
  input,
44
- models,
45
  year_from,
46
  month_from,
47
  day_from,
@@ -52,17 +51,6 @@ def main(
52
  source_block_size,
53
  ):
54
 
55
- # formatted_tokens = plagiarism_check(
56
- # plag_option,
57
- # input,
58
- # year_from,
59
- # month_from,
60
- # day_from,
61
- # year_to,
62
- # month_to,
63
- # day_to,
64
- # domains_to_skip,
65
- # )
66
  formatted_tokens = html_highlight(
67
  plag_option,
68
  input,
@@ -75,9 +63,9 @@ def main(
75
  domains_to_skip,
76
  source_block_size,
77
  )
78
- depth_analysis_plot = depth_analysis(input)
79
  bc_score = predict_bc_scores(input)
80
- mc_score = predict_mc_scores(input, models)
81
  quilscore = predict_quillbot(input)
82
 
83
  return (
@@ -143,13 +131,13 @@ with gr.Blocks() as demo:
143
  multiselect=True,
144
  label="Models to test against",
145
  )
 
146
  with gr.Row():
147
  with gr.Column():
148
  ai_option = gr.Radio(
149
  [
150
  "Human vs AI",
151
  "Human vs AI Source Models",
152
- # "Human vs AI Source Models (1 on 1)",
153
  ],
154
  label="Choose an option please.",
155
  )
@@ -205,9 +193,6 @@ with gr.Blocks() as demo:
205
  with gr.Column():
206
  bc_highlighter_output = gr.HTML(label="Human vs. AI Highlighter")
207
 
208
- # with gr.Column():
209
- # mc1on1Label = gr.Label(label="Creator(1 on 1 Approach)")
210
-
211
  with gr.Row():
212
  with gr.Column():
213
  QLabel = gr.Label(label="Humanized")
@@ -314,7 +299,6 @@ with gr.Blocks() as demo:
314
  ai_option,
315
  plag_option,
316
  input_text,
317
- models,
318
  year_from,
319
  month_from,
320
  day_from,
@@ -327,7 +311,6 @@ with gr.Blocks() as demo:
327
  outputs=[
328
  bcLabel,
329
  mcLabel,
330
- # mc1on1Label,
331
  sentenceBreakdown,
332
  writing_analysis_plot,
333
  QLabel,
@@ -337,8 +320,7 @@ with gr.Blocks() as demo:
337
 
338
  only_ai_btn.click(
339
  fn=ai_generated_test,
340
- inputs=[ai_option, bias_buster_selected, input_text, models],
341
- # outputs=[bcLabel, mcLabel, mc1on1Label],
342
  outputs=[bcLabel, mcLabel],
343
  api_name="ai_check",
344
  )
 
26
  )
27
 
28
 
29
+ def ai_generated_test(option, bias_buster_selected, input):
30
  if bias_buster_selected:
31
  input = update(input)
32
  if option == "Human vs AI":
33
  return predict_bc_scores(input), None
34
  elif option == "Human vs AI Source Models":
35
+ return predict_bc_scores(input), predict_mc_scores(input)
36
  return None, None
37
 
38
 
 
41
  ai_option,
42
  plag_option,
43
  input,
 
44
  year_from,
45
  month_from,
46
  day_from,
 
51
  source_block_size,
52
  ):
53
 
 
 
 
 
 
 
 
 
 
 
 
54
  formatted_tokens = html_highlight(
55
  plag_option,
56
  input,
 
63
  domains_to_skip,
64
  source_block_size,
65
  )
66
+ depth_analysis_plot = depth_analysis(bias_buster_selected, input)
67
  bc_score = predict_bc_scores(input)
68
+ mc_score = predict_mc_scores(input)
69
  quilscore = predict_quillbot(input)
70
 
71
  return (
 
131
  multiselect=True,
132
  label="Models to test against",
133
  )
134
+
135
  with gr.Row():
136
  with gr.Column():
137
  ai_option = gr.Radio(
138
  [
139
  "Human vs AI",
140
  "Human vs AI Source Models",
 
141
  ],
142
  label="Choose an option please.",
143
  )
 
193
  with gr.Column():
194
  bc_highlighter_output = gr.HTML(label="Human vs. AI Highlighter")
195
 
 
 
 
196
  with gr.Row():
197
  with gr.Column():
198
  QLabel = gr.Label(label="Humanized")
 
299
  ai_option,
300
  plag_option,
301
  input_text,
 
302
  year_from,
303
  month_from,
304
  day_from,
 
311
  outputs=[
312
  bcLabel,
313
  mcLabel,
 
314
  sentenceBreakdown,
315
  writing_analysis_plot,
316
  QLabel,
 
320
 
321
  only_ai_btn.click(
322
  fn=ai_generated_test,
323
+ inputs=[ai_option, bias_buster_selected, input_text],
 
324
  outputs=[bcLabel, mcLabel],
325
  api_name="ai_check",
326
  )
nohup.out CHANGED
@@ -1,166 +1,15 @@
1
- Original BC scores: AI: 0.983885645866394, HUMAN: 0.01611432246863842
2
- Calibration BC scores: AI: 0.5142857142857142, HUMAN: 0.48571428571428577
3
- Input Text: sOperation Title was an unsuccessful 1942 Allied attack on the German battleship Tirpitz during World War II. The Allies considered Tirpitz to be a major threat to their shipping and after several Royal Air Force heavy bomber raids failed to inflict any damage it was decided to use Royal Navy midget submarines instead. /s
4
-
5
- correcting text..: 0%| | 0/2 [00:00<?, ?it/s]
6
- correcting text..: 100%|██████████| 2/2 [00:00<00:00, 29.39it/s]
7
- Traceback (most recent call last):
8
- File "/usr/local/lib/python3.9/dist-packages/gradio/queueing.py", line 527, in process_events
9
- response = await route_utils.call_process_api(
10
- File "/usr/local/lib/python3.9/dist-packages/gradio/route_utils.py", line 270, in call_process_api
11
- output = await app.get_blocks().process_api(
12
- File "/usr/local/lib/python3.9/dist-packages/gradio/blocks.py", line 1856, in process_api
13
- data = await self.postprocess_data(fn_index, result["prediction"], state)
14
- File "/usr/local/lib/python3.9/dist-packages/gradio/blocks.py", line 1634, in postprocess_data
15
- self.validate_outputs(fn_index, predictions) # type: ignore
16
- File "/usr/local/lib/python3.9/dist-packages/gradio/blocks.py", line 1610, in validate_outputs
17
- raise ValueError(
18
- ValueError: An event handler (update) didn't receive enough output values (needed: 2, received: 1).
19
- Wanted outputs:
20
- [<gradio.components.textbox.Textbox object at 0x7f79abf202b0>, <gradio.components.textbox.Textbox object at 0x7f79abf20a60>]
21
- Received outputs:
22
- ["Operation Title was an unsuccessful 1942 Allied attack on the German battleship Tirpitz during World War II. The Allies considered Tirpitz to be a major threat to their shipping and after several Royal Air Force heavy bomber raids failed to inflict any damage it was decided to use Royal Navy midget submarines instead."]
23
- /usr/lib/python3/dist-packages/requests/__init__.py:87: RequestsDependencyWarning: urllib3 (2.2.1) or chardet (4.0.0) doesn't match a supported version!
24
- warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
25
- 2024-05-15 18:41:05.953508: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
26
- To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
27
- 2024-05-15 18:41:11.449382: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
28
- [nltk_data] Downloading package punkt to /root/nltk_data...
29
- [nltk_data] Package punkt is already up-to-date!
30
- [nltk_data] Downloading package stopwords to /root/nltk_data...
31
- [nltk_data] Package stopwords is already up-to-date!
32
- The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
33
- The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
34
- The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
35
- The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
36
- The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
37
- Some weights of the model checkpoint at textattack/roberta-base-CoLA were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
38
- - This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
39
- - This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
40
- The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
41
- Framework not specified. Using pt to export the model.
42
- Some weights of the model checkpoint at textattack/roberta-base-CoLA were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
43
- - This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
44
- - This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
45
- Using the export variant default. Available variants are:
46
- - default: The default ONNX variant.
47
-
48
- ***** Exporting submodel 1/1: RobertaForSequenceClassification *****
49
- Using framework PyTorch: 2.3.0+cu121
50
- Overriding 1 configuration item(s)
51
- - use_cache -> False
52
- Framework not specified. Using pt to export the model.
53
- Using the export variant default. Available variants are:
54
- - default: The default ONNX variant.
55
- Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
56
- Non-default generation parameters: {'max_length': 512, 'min_length': 8, 'num_beams': 2, 'no_repeat_ngram_size': 4}
57
-
58
- ***** Exporting submodel 1/3: T5Stack *****
59
- Using framework PyTorch: 2.3.0+cu121
60
- Overriding 1 configuration item(s)
61
- - use_cache -> False
62
-
63
- ***** Exporting submodel 2/3: T5ForConditionalGeneration *****
64
- Using framework PyTorch: 2.3.0+cu121
65
- Overriding 1 configuration item(s)
66
- - use_cache -> True
67
- /usr/local/lib/python3.9/dist-packages/transformers/modeling_utils.py:1017: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
68
- if causal_mask.shape[1] < attention_mask.shape[1]:
69
-
70
- ***** Exporting submodel 3/3: T5ForConditionalGeneration *****
71
- Using framework PyTorch: 2.3.0+cu121
72
- Overriding 1 configuration item(s)
73
- - use_cache -> True
74
- /usr/local/lib/python3.9/dist-packages/transformers/models/t5/modeling_t5.py:503: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
75
- elif past_key_value.shape[2] != key_value_states.shape[1]:
76
- In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode
77
- In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode
78
- Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
79
- Non-default generation parameters: {'max_length': 512, 'min_length': 8, 'num_beams': 2, 'no_repeat_ngram_size': 4}
80
- [nltk_data] Downloading package cmudict to /root/nltk_data...
81
- [nltk_data] Package cmudict is already up-to-date!
82
- [nltk_data] Downloading package punkt to /root/nltk_data...
83
- [nltk_data] Package punkt is already up-to-date!
84
- [nltk_data] Downloading package stopwords to /root/nltk_data...
85
- [nltk_data] Package stopwords is already up-to-date!
86
- [nltk_data] Downloading package wordnet to /root/nltk_data...
87
- [nltk_data] Package wordnet is already up-to-date!
88
- /usr/lib/python3/dist-packages/requests/__init__.py:87: RequestsDependencyWarning: urllib3 (2.2.1) or chardet (4.0.0) doesn't match a supported version!
89
- warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
90
- Collecting en_core_web_sm==2.3.1
91
- Using cached en_core_web_sm-2.3.1-py3-none-any.whl
92
- Requirement already satisfied: spacy<2.4.0,>=2.3.0 in /usr/local/lib/python3.9/dist-packages (from en_core_web_sm==2.3.1) (2.3.9)
93
- Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (3.0.9)
94
- Requirement already satisfied: blis<0.8.0,>=0.4.0 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (0.7.11)
95
- Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (4.66.2)
96
- Requirement already satisfied: srsly<1.1.0,>=1.0.2 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.0.7)
97
- Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/lib/python3/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (2.25.1)
98
- Requirement already satisfied: plac<1.2.0,>=0.9.6 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.1.3)
99
- Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (52.0.0)
100
- Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (2.0.8)
101
- Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.0.10)
102
- Requirement already satisfied: wasabi<1.1.0,>=0.4.0 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (0.10.1)
103
- Requirement already satisfied: numpy>=1.15.0 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.26.4)
104
- Requirement already satisfied: catalogue<1.1.0,>=0.0.7 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.0.2)
105
- Requirement already satisfied: thinc<7.5.0,>=7.4.1 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (7.4.6)
106
- ✔ Download and installation successful
107
- You can now load the model via spacy.load('en_core_web_sm')
108
- /usr/local/lib/python3.9/dist-packages/gradio/utils.py:953: UserWarning: Expected 1 arguments for function <function depth_analysis at 0x7f6df970eee0>, received 2.
109
  warnings.warn(
110
- /usr/local/lib/python3.9/dist-packages/gradio/utils.py:961: UserWarning: Expected maximum 1 arguments for function <function depth_analysis at 0x7f6df970eee0>, received 2.
111
  warnings.warn(
112
- IMPORTANT: You are using gradio version 4.28.3, however version 4.29.0 is available, please upgrade.
113
- --------
114
- Running on local URL: http://0.0.0.0:80
115
- Running on public URL: https://1f9431205fb743687b.gradio.live
116
-
117
- This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)
118
-
119
-
120
- huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
121
- To disable this warning, you can either:
122
- - Avoid using `tokenizers` before the fork if possible
123
- - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
124
- /usr/local/lib/python3.9/dist-packages/torch/cuda/__init__.py:619: UserWarning: Can't initialize NVML
125
- warnings.warn("Can't initialize NVML")
126
- /usr/local/lib/python3.9/dist-packages/optimum/bettertransformer/models/encoder_models.py:301: UserWarning: The PyTorch API of nested tensors is in prototype stage and will change in the near future. (Triggered internally at ../aten/src/ATen/NestedTensorImpl.cpp:178.)
127
- hidden_states = torch._nested_tensor_from_mask(hidden_states, ~attention_mask)
128
- Original BC scores: AI: 0.0012912281090393662, HUMAN: 0.9987087249755859
129
- Calibration BC scores: AI: 0.09973753280839895, HUMAN: 0.9002624671916011
130
- Input Text: sOperation Title was an unsuccessful 1942 Allied attack on the German battleship Tirpitz during World War II. The Allies considered Tirpitz to be a major threat to their shipping and after several Royal Air Force heavy bomber raids failed to inflict any damdage it was decided to use Royal Navy midget submarines instead. /s
131
-
132
-
133
- Original BC scores: AI: 1.946412595543734e-07, HUMAN: 0.9999997615814209
134
- Calibration BC scores: AI: 0.0013484877672895396, HUMAN: 0.9986515122327104
135
- Input Text: sThe Allies considered Trotsky to be a major threat to their shipping and after several heavy bombs failed to inflict any damage it was decided to use smaller Royal Navy submarines instead. /s
136
- Original BC scores: AI: 7.88536635809578e-06, HUMAN: 0.9999921321868896
137
- Calibration BC scores: AI: 0.008818342151675485, HUMAN: 0.9911816578483246
138
- Input Text: sAlireza Masrour, Generall Partner at Plug Play, has led over 200 investmens in startups sence 2008. Notable unicorn investmens include CloudWalk, Flyr, FiscalNote, Shippo, Owkin, and Trulioo. He has also been involvd in sucsessful exits such as FiscalNote's IPO, HealthPocket's acqusition by Health Insurans Innovations, and Kustomer's acqusition by FaceBook. Alireza has receeved recognition for his acheivements, includng beeing named a Silicon Valley 40 under 40 in 2018 and a rising-star VC by BusinessInsider. He has had 13 unicorn portfollio companys and manages a B Portfollio Club with investmens in companys like N26, BigID, Shippo, and TrueBill, wich was acquried by RocketCo for 1. 3B. Other investmens include Flexiv, Owkin, VisbyMedikal, Animoca, and AutoX. /s
139
- Models to Test: ['OpenAI GPT', 'Mistral', 'CLAUDE', 'Gemini', 'Grammar Enhancer']
140
- Original BC scores: AI: 7.88536635809578e-06, HUMAN: 0.9999921321868896
141
- Calibration BC scores: AI: 0.008818342151675485, HUMAN: 0.9911816578483246
142
- Starting MC
143
- MC Score: {'OpenAI GPT': 1.1978447330533474e-12, 'Mistral': 2.7469434957703303e-13, 'CLAUDE': 8.578213092883691e-13, 'Gemini': 6.304846046418989e-13, 'Grammar Enhancer': 0.008818342148714584}
144
-
145
- Original BC scores: AI: 0.9980764389038086, HUMAN: 0.001923577394336462
146
- Calibration BC scores: AI: 0.7272727272727273, HUMAN: 0.2727272727272727
147
- Input Text: sAlireza Marmar, general partner at Plug Play, has led over 200 investments in startups since 2008. Notable unicorns include CloudWatch, Flyer, FiscalNote, Shippo, Owkin, and Trulio. He has also been involved in successful exits such as Microsoft's IPO, HealthPocket's acquisition by HealthInsuranceInc. , and Salesforce's acquisition of Facebook. Alireza has received praise for his achievements, including being named a Silicon Valley 40 under 40 in 2018 and a Rising Star by Business Insider. He has had 13 unicorn companies and manages a Billion Ponzi scheme with investments in companies like N26, BigID, Shippo, and TruBill, which was acquired by RocketCoop for 1. 3B. Other investments include Xerox, Owatu, Microsoft, Amazon, and AutoX. /s
148
- Models to Test: ['OpenAI GPT', 'Mistral', 'CLAUDE', 'Gemini', 'Grammar Enhancer']
149
- Original BC scores: AI: 0.9980764389038086, HUMAN: 0.001923577394336462
150
- Calibration BC scores: AI: 0.7272727272727273, HUMAN: 0.2727272727272727
151
- Starting MC
152
- MC Score: {'OpenAI GPT': 1.7068867157614812e-06, 'Mistral': 6.292188498138414e-10, 'CLAUDE': 8.175567903345952e-09, 'Gemini': 2.868823230740637e-08, 'Grammar Enhancer': 0.7272709828929925}
153
-
154
-
155
- /usr/lib/python3/dist-packages/requests/__init__.py:87: RequestsDependencyWarning: urllib3 (2.2.1) or chardet (4.0.0) doesn't match a supported version!
156
- warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
157
- 2024-05-15 19:31:58.934498: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
158
  To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
159
- 2024-05-15 19:32:05.107700: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
160
  [nltk_data] Downloading package punkt to /root/nltk_data...
161
  [nltk_data] Package punkt is already up-to-date!
162
  [nltk_data] Downloading package stopwords to /root/nltk_data...
163
  [nltk_data] Package stopwords is already up-to-date!
 
 
164
  The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
165
  The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
166
  The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
@@ -186,6 +35,8 @@ Using the export variant default. Available variants are:
186
  - default: The default ONNX variant.
187
  Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
188
  Non-default generation parameters: {'max_length': 512, 'min_length': 8, 'num_beams': 2, 'no_repeat_ngram_size': 4}
 
 
189
 
190
  ***** Exporting submodel 1/3: T5Stack *****
191
  Using framework PyTorch: 2.3.0+cu121
@@ -209,104 +60,8 @@ In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.
209
  In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode
210
  Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
211
  Non-default generation parameters: {'max_length': 512, 'min_length': 8, 'num_beams': 2, 'no_repeat_ngram_size': 4}
212
- [nltk_data] Downloading package cmudict to /root/nltk_data...
213
- [nltk_data] Package cmudict is already up-to-date!
214
- [nltk_data] Downloading package punkt to /root/nltk_data...
215
- [nltk_data] Package punkt is already up-to-date!
216
- [nltk_data] Downloading package stopwords to /root/nltk_data...
217
- [nltk_data] Package stopwords is already up-to-date!
218
- [nltk_data] Downloading package wordnet to /root/nltk_data...
219
- [nltk_data] Package wordnet is already up-to-date!
220
- /usr/lib/python3/dist-packages/requests/__init__.py:87: RequestsDependencyWarning: urllib3 (2.2.1) or chardet (4.0.0) doesn't match a supported version!
221
- warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
222
- Collecting en_core_web_sm==2.3.1
223
- Using cached en_core_web_sm-2.3.1-py3-none-any.whl
224
- Requirement already satisfied: spacy<2.4.0,>=2.3.0 in /usr/local/lib/python3.9/dist-packages (from en_core_web_sm==2.3.1) (2.3.9)
225
- Requirement already satisfied: numpy>=1.15.0 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.26.4)
226
- Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (3.0.9)
227
- Requirement already satisfied: thinc<7.5.0,>=7.4.1 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (7.4.6)
228
- Requirement already satisfied: catalogue<1.1.0,>=0.0.7 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.0.2)
229
- Requirement already satisfied: plac<1.2.0,>=0.9.6 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.1.3)
230
- Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/lib/python3/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (2.25.1)
231
- Requirement already satisfied: wasabi<1.1.0,>=0.4.0 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (0.10.1)
232
- Requirement already satisfied: srsly<1.1.0,>=1.0.2 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.0.7)
233
- Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (4.66.2)
234
- Requirement already satisfied: blis<0.8.0,>=0.4.0 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (0.7.11)
235
- Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (52.0.0)
236
- Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.0.10)
237
- Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (2.0.8)
238
- ✔ Download and installation successful
239
- You can now load the model via spacy.load('en_core_web_sm')
240
- /usr/local/lib/python3.9/dist-packages/gradio/utils.py:953: UserWarning: Expected 1 arguments for function <function depth_analysis at 0x7f137170dee0>, received 2.
241
- warnings.warn(
242
- /usr/local/lib/python3.9/dist-packages/gradio/utils.py:961: UserWarning: Expected maximum 1 arguments for function <function depth_analysis at 0x7f137170dee0>, received 2.
243
- warnings.warn(
244
- WARNING: Invalid HTTP request received.
245
- huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
246
- To disable this warning, you can either:
247
- - Avoid using `tokenizers` before the fork if possible
248
- - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
249
  /usr/local/lib/python3.9/dist-packages/torch/cuda/__init__.py:619: UserWarning: Can't initialize NVML
250
  warnings.warn("Can't initialize NVML")
251
- /usr/local/lib/python3.9/dist-packages/optimum/bettertransformer/models/encoder_models.py:301: UserWarning: The PyTorch API of nested tensors is in prototype stage and will change in the near future. (Triggered internally at ../aten/src/ATen/NestedTensorImpl.cpp:178.)
252
- hidden_states = torch._nested_tensor_from_mask(hidden_states, ~attention_mask)
253
- /usr/lib/python3/dist-packages/requests/__init__.py:87: RequestsDependencyWarning: urllib3 (2.2.1) or chardet (4.0.0) doesn't match a supported version!
254
- warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
255
- 2024-05-15 22:08:54.473739: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
256
- To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
257
- 2024-05-15 22:09:00.121158: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
258
- [nltk_data] Downloading package punkt to /root/nltk_data...
259
- [nltk_data] Package punkt is already up-to-date!
260
- [nltk_data] Downloading package stopwords to /root/nltk_data...
261
- [nltk_data] Package stopwords is already up-to-date!
262
- The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
263
- The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
264
- The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
265
- The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
266
- The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
267
- Some weights of the model checkpoint at textattack/roberta-base-CoLA were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
268
- - This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
269
- - This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
270
- The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
271
- Framework not specified. Using pt to export the model.
272
- Some weights of the model checkpoint at textattack/roberta-base-CoLA were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
273
- - This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
274
- - This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
275
- Using the export variant default. Available variants are:
276
- - default: The default ONNX variant.
277
-
278
- ***** Exporting submodel 1/1: RobertaForSequenceClassification *****
279
- Using framework PyTorch: 2.3.0+cu121
280
- Overriding 1 configuration item(s)
281
- - use_cache -> False
282
- Framework not specified. Using pt to export the model.
283
- Using the export variant default. Available variants are:
284
- - default: The default ONNX variant.
285
- Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
286
- Non-default generation parameters: {'max_length': 512, 'min_length': 8, 'num_beams': 2, 'no_repeat_ngram_size': 4}
287
-
288
- ***** Exporting submodel 1/3: T5Stack *****
289
- Using framework PyTorch: 2.3.0+cu121
290
- Overriding 1 configuration item(s)
291
- - use_cache -> False
292
-
293
- ***** Exporting submodel 2/3: T5ForConditionalGeneration *****
294
- Using framework PyTorch: 2.3.0+cu121
295
- Overriding 1 configuration item(s)
296
- - use_cache -> True
297
- /usr/local/lib/python3.9/dist-packages/transformers/modeling_utils.py:1017: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
298
- if causal_mask.shape[1] < attention_mask.shape[1]:
299
-
300
- ***** Exporting submodel 3/3: T5ForConditionalGeneration *****
301
- Using framework PyTorch: 2.3.0+cu121
302
- Overriding 1 configuration item(s)
303
- - use_cache -> True
304
- /usr/local/lib/python3.9/dist-packages/transformers/models/t5/modeling_t5.py:503: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
305
- elif past_key_value.shape[2] != key_value_states.shape[1]:
306
- In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode
307
- In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode
308
- Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
309
- Non-default generation parameters: {'max_length': 512, 'min_length': 8, 'num_beams': 2, 'no_repeat_ngram_size': 4}
310
  [nltk_data] Downloading package cmudict to /root/nltk_data...
311
  [nltk_data] Package cmudict is already up-to-date!
312
  [nltk_data] Downloading package punkt to /root/nltk_data...
@@ -315,47 +70,99 @@ Non-default generation parameters: {'max_length': 512, 'min_length': 8, 'num_bea
315
  [nltk_data] Package stopwords is already up-to-date!
316
  [nltk_data] Downloading package wordnet to /root/nltk_data...
317
  [nltk_data] Package wordnet is already up-to-date!
318
- /usr/lib/python3/dist-packages/requests/__init__.py:87: RequestsDependencyWarning: urllib3 (2.2.1) or chardet (4.0.0) doesn't match a supported version!
319
- warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
320
- Collecting en_core_web_sm==2.3.1
321
- Using cached en_core_web_sm-2.3.1-py3-none-any.whl
322
- Requirement already satisfied: spacy<2.4.0,>=2.3.0 in /usr/local/lib/python3.9/dist-packages (from en_core_web_sm==2.3.1) (2.3.9)
323
- Requirement already satisfied: plac<1.2.0,>=0.9.6 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.1.3)
324
- Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.0.10)
325
- Requirement already satisfied: catalogue<1.1.0,>=0.0.7 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.0.2)
326
- Requirement already satisfied: blis<0.8.0,>=0.4.0 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (0.7.11)
327
- Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (52.0.0)
328
- Requirement already satisfied: numpy>=1.15.0 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.26.4)
329
- Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/lib/python3/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (2.25.1)
330
- Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (4.66.2)
331
- Requirement already satisfied: wasabi<1.1.0,>=0.4.0 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (0.10.1)
332
- Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (3.0.9)
333
- Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (2.0.8)
334
- Requirement already satisfied: thinc<7.5.0,>=7.4.1 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (7.4.6)
335
- Requirement already satisfied: srsly<1.1.0,>=1.0.2 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.0.7)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336
  ✔ Download and installation successful
337
- You can now load the model via spacy.load('en_core_web_sm')
338
- /usr/local/lib/python3.9/dist-packages/gradio/utils.py:953: UserWarning: Expected 1 arguments for function <function depth_analysis at 0x7f149d70dee0>, received 2.
339
  warnings.warn(
340
- /usr/local/lib/python3.9/dist-packages/gradio/utils.py:961: UserWarning: Expected maximum 1 arguments for function <function depth_analysis at 0x7f149d70dee0>, received 2.
341
  warnings.warn(
342
- huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
343
- To disable this warning, you can either:
344
- - Avoid using `tokenizers` before the fork if possible
345
- - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
346
- /usr/local/lib/python3.9/dist-packages/torch/cuda/__init__.py:619: UserWarning: Can't initialize NVML
347
- warnings.warn("Can't initialize NVML")
 
 
 
348
  /usr/local/lib/python3.9/dist-packages/optimum/bettertransformer/models/encoder_models.py:301: UserWarning: The PyTorch API of nested tensors is in prototype stage and will change in the near future. (Triggered internally at ../aten/src/ATen/NestedTensorImpl.cpp:178.)
349
  hidden_states = torch._nested_tensor_from_mask(hidden_states, ~attention_mask)
350
- WARNING: Invalid HTTP request received.
351
- WARNING: Invalid HTTP request received.
352
- WARNING: Invalid HTTP request received.
353
- WARNING: Invalid HTTP request received.
354
- WARNING: Invalid HTTP request received.
355
- WARNING: Invalid HTTP request received.
356
- WARNING: Invalid HTTP request received.
357
- WARNING: Invalid HTTP request received.
358
- WARNING: Invalid HTTP request received.
359
- WARNING: Invalid HTTP request received.
360
- WARNING: Invalid HTTP request received.
361
- WARNING: Invalid HTTP request received.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /usr/local/lib/python3.9/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  warnings.warn(
3
+ /usr/local/lib/python3.9/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
4
  warnings.warn(
5
+ 2024-05-16 12:11:24.934695: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
 
7
  [nltk_data] Downloading package punkt to /root/nltk_data...
8
  [nltk_data] Package punkt is already up-to-date!
9
  [nltk_data] Downloading package stopwords to /root/nltk_data...
10
  [nltk_data] Package stopwords is already up-to-date!
11
+ /usr/local/lib/python3.9/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
12
+ warnings.warn(
13
  The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
14
  The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
15
  The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
 
35
  - default: The default ONNX variant.
36
  Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
37
  Non-default generation parameters: {'max_length': 512, 'min_length': 8, 'num_beams': 2, 'no_repeat_ngram_size': 4}
38
+ /usr/local/lib/python3.9/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
39
+ warnings.warn(
40
 
41
  ***** Exporting submodel 1/3: T5Stack *****
42
  Using framework PyTorch: 2.3.0+cu121
 
60
  In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode
61
  Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
62
  Non-default generation parameters: {'max_length': 512, 'min_length': 8, 'num_beams': 2, 'no_repeat_ngram_size': 4}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  /usr/local/lib/python3.9/dist-packages/torch/cuda/__init__.py:619: UserWarning: Can't initialize NVML
64
  warnings.warn("Can't initialize NVML")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  [nltk_data] Downloading package cmudict to /root/nltk_data...
66
  [nltk_data] Package cmudict is already up-to-date!
67
  [nltk_data] Downloading package punkt to /root/nltk_data...
 
70
  [nltk_data] Package stopwords is already up-to-date!
71
  [nltk_data] Downloading package wordnet to /root/nltk_data...
72
  [nltk_data] Package wordnet is already up-to-date!
73
+ /usr/local/lib/python3.9/dist-packages/torch/cuda/__init__.py:619: UserWarning: Can't initialize NVML
74
+ warnings.warn("Can't initialize NVML")
75
+ Collecting en-core-web-sm==3.7.1
76
+ Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
77
+ Requirement already satisfied: spacy<3.8.0,>=3.7.2 in /usr/local/lib/python3.9/dist-packages (from en-core-web-sm==3.7.1) (3.7.2)
78
+ Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.4.0)
79
+ Requirement already satisfied: typer<0.10.0,>=0.3.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.9.4)
80
+ Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (24.0)
81
+ Requirement already satisfied: smart-open<7.0.0,>=5.2.1 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (6.4.0)
82
+ Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.1.2)
83
+ Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.0.10)
84
+ Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.0.5)
85
+ Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/lib/python3/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.25.1)
86
+ Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.4.8)
87
+ Requirement already satisfied: thinc<8.3.0,>=8.1.8 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (8.2.3)
88
+ Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (52.0.0)
89
+ Requirement already satisfied: numpy>=1.19.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.26.4)
90
+ Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.0.8)
91
+ Requirement already satisfied: jinja2 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.1.4)
92
+ Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (4.66.4)
93
+ Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.0.10)
94
+ Requirement already satisfied: weasel<0.4.0,>=0.1.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.3.4)
95
+ Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.0.12)
96
+ Requirement already satisfied: pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.7.1)
97
+ Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.0.9)
98
+ Requirement already satisfied: language-data>=1.2 in /usr/local/lib/python3.9/dist-packages (from langcodes<4.0.0,>=3.2.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.2.0)
99
+ Requirement already satisfied: marisa-trie>=0.7.7 in /usr/local/lib/python3.9/dist-packages (from language-data>=1.2->langcodes<4.0.0,>=3.2.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.1.1)
100
+ Requirement already satisfied: pydantic-core==2.18.2 in /usr/local/lib/python3.9/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.18.2)
101
+ Requirement already satisfied: typing-extensions>=4.6.1 in /usr/local/lib/python3.9/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (4.11.0)
102
+ Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.9/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.6.0)
103
+ Requirement already satisfied: blis<0.8.0,>=0.7.8 in /usr/local/lib/python3.9/dist-packages (from thinc<8.3.0,>=8.1.8->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.7.11)
104
+ Requirement already satisfied: confection<1.0.0,>=0.0.1 in /usr/local/lib/python3.9/dist-packages (from thinc<8.3.0,>=8.1.8->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.1.4)
105
+ Requirement already satisfied: click<9.0.0,>=7.1.1 in /usr/local/lib/python3.9/dist-packages (from typer<0.10.0,>=0.3.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (8.1.7)
106
+ Requirement already satisfied: cloudpathlib<0.17.0,>=0.7.0 in /usr/local/lib/python3.9/dist-packages (from weasel<0.4.0,>=0.1.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.16.0)
107
+ Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.9/dist-packages (from jinja2->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.1.5)
108
  ✔ Download and installation successful
109
+ You can now load the package via spacy.load('en_core_web_sm')
110
+ /usr/local/lib/python3.9/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
111
  warnings.warn(
112
+ /usr/local/lib/python3.9/dist-packages/gradio/utils.py:924: UserWarning: Expected 1 arguments for function <function depth_analysis at 0x7f402afeff70>, received 2.
113
  warnings.warn(
114
+ /usr/local/lib/python3.9/dist-packages/gradio/utils.py:932: UserWarning: Expected maximum 1 arguments for function <function depth_analysis at 0x7f402afeff70>, received 2.
115
+ warnings.warn(
116
+ IMPORTANT: You are using gradio version 4.26.0, however version 4.29.0 is available, please upgrade.
117
+ --------
118
+ Running on local URL: http://0.0.0.0:80
119
+ Running on public URL: https://3122a891c774a52363.gradio.live
120
+
121
+ This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)
122
+
123
  /usr/local/lib/python3.9/dist-packages/optimum/bettertransformer/models/encoder_models.py:301: UserWarning: The PyTorch API of nested tensors is in prototype stage and will change in the near future. (Triggered internally at ../aten/src/ATen/NestedTensorImpl.cpp:178.)
124
  hidden_states = torch._nested_tensor_from_mask(hidden_states, ~attention_mask)
125
+ Traceback (most recent call last):
126
+ File "/usr/local/lib/python3.9/dist-packages/gradio/queueing.py", line 527, in process_events
127
+ response = await route_utils.call_process_api(
128
+ File "/usr/local/lib/python3.9/dist-packages/gradio/route_utils.py", line 261, in call_process_api
129
+ output = await app.get_blocks().process_api(
130
+ File "/usr/local/lib/python3.9/dist-packages/gradio/blocks.py", line 1786, in process_api
131
+ result = await self.call_function(
132
+ File "/usr/local/lib/python3.9/dist-packages/gradio/blocks.py", line 1338, in call_function
133
+ prediction = await anyio.to_thread.run_sync(
134
+ File "/usr/local/lib/python3.9/dist-packages/anyio/to_thread.py", line 56, in run_sync
135
+ return await get_async_backend().run_sync_in_worker_thread(
136
+ File "/usr/local/lib/python3.9/dist-packages/anyio/_backends/_asyncio.py", line 2144, in run_sync_in_worker_thread
137
+ return await future
138
+ File "/usr/local/lib/python3.9/dist-packages/anyio/_backends/_asyncio.py", line 851, in run
139
+ result = context.run(func, *args)
140
+ File "/usr/local/lib/python3.9/dist-packages/gradio/utils.py", line 759, in wrapper
141
+ response = f(*args, **kwargs)
142
+ File "/home/aliasgarov/copyright_checker/app.py", line 35, in ai_generated_test
143
+ return predict_bc_scores(input), predict_mc_scores(input)
144
+ File "/home/aliasgarov/copyright_checker/predictors.py", line 390, in predict_mc_scores
145
+ for key in models
146
+ NameError: name 'models' is not defined
147
+ Traceback (most recent call last):
148
+ File "/usr/local/lib/python3.9/dist-packages/gradio/queueing.py", line 527, in process_events
149
+ response = await route_utils.call_process_api(
150
+ File "/usr/local/lib/python3.9/dist-packages/gradio/route_utils.py", line 261, in call_process_api
151
+ output = await app.get_blocks().process_api(
152
+ File "/usr/local/lib/python3.9/dist-packages/gradio/blocks.py", line 1786, in process_api
153
+ result = await self.call_function(
154
+ File "/usr/local/lib/python3.9/dist-packages/gradio/blocks.py", line 1338, in call_function
155
+ prediction = await anyio.to_thread.run_sync(
156
+ File "/usr/local/lib/python3.9/dist-packages/anyio/to_thread.py", line 56, in run_sync
157
+ return await get_async_backend().run_sync_in_worker_thread(
158
+ File "/usr/local/lib/python3.9/dist-packages/anyio/_backends/_asyncio.py", line 2144, in run_sync_in_worker_thread
159
+ return await future
160
+ File "/usr/local/lib/python3.9/dist-packages/anyio/_backends/_asyncio.py", line 851, in run
161
+ result = context.run(func, *args)
162
+ File "/usr/local/lib/python3.9/dist-packages/gradio/utils.py", line 759, in wrapper
163
+ response = f(*args, **kwargs)
164
+ File "/home/aliasgarov/copyright_checker/app.py", line 35, in ai_generated_test
165
+ return predict_bc_scores(input), predict_mc_scores(input)
166
+ File "/home/aliasgarov/copyright_checker/predictors.py", line 390, in predict_mc_scores
167
+ for key in models
168
+ NameError: name 'models' is not defined
predictors.py CHANGED
@@ -28,10 +28,8 @@ device = 'cpu'
28
  text_bc_model_path = params["TEXT_BC_MODEL_PATH"]
29
  text_mc_model_path = params["TEXT_MC_MODEL_PATH"]
30
  text_quillbot_model_path = params["TEXT_QUILLBOT_MODEL_PATH"]
31
- text_1on1_models = params["TEXT_1ON1_MODEL"]
32
  quillbot_labels = params["QUILLBOT_LABELS"]
33
  mc_label_map = params["MC_OUTPUT_LABELS"]
34
- text_1on1_label_map = params["1ON1_OUTPUT_LABELS"]
35
  mc_token_size = int(params["MC_TOKEN_SIZE"])
36
  bc_token_size = int(params["BC_TOKEN_SIZE"])
37
  bias_checker_model_name = params['BIAS_CHECKER_MODEL_PATH']
@@ -48,13 +46,6 @@ quillbot_tokenizer = AutoTokenizer.from_pretrained(text_quillbot_model_path)
48
  quillbot_model = AutoModelForSequenceClassification.from_pretrained(
49
  text_quillbot_model_path
50
  ).to(device)
51
- tokenizers_1on1 = {}
52
- models_1on1 = {}
53
- for model_name, model in zip(mc_label_map, text_1on1_models):
54
- tokenizers_1on1[model_name] = AutoTokenizer.from_pretrained(model)
55
- models_1on1[model_name] = (
56
- AutoModelForSequenceClassification.from_pretrained(model).to(device)
57
- )
58
 
59
 
60
 
@@ -352,43 +343,7 @@ def predict_bc_scores(input):
352
  return bc_score
353
 
354
 
355
- def predict_1on1(model, tokenizer, text):
356
- with torch.no_grad():
357
- model.eval()
358
- tokens = tokenizer(
359
- text,
360
- padding="max_length",
361
- truncation=True,
362
- return_tensors="pt",
363
- max_length=mc_token_size,
364
- ).to(device)
365
- output = model(**tokens)
366
- output_norm = softmax(output.logits.detach().cpu().numpy(), 1)[0]
367
- return output_norm
368
-
369
-
370
- def predict_1on1_combined(input):
371
- predictions = []
372
- for i, model in enumerate(text_1on1_models):
373
- predictions.append(
374
- predict_1on1(models_1on1[model], tokenizers_1on1[model], input)[1]
375
- )
376
- return predictions
377
-
378
-
379
- def predict_1on1_single(input, model):
380
- predictions = predict_1on1(
381
- models_1on1[model], tokenizers_1on1[model], input
382
- )[1]
383
- return predictions
384
-
385
-
386
- def predict_mc_scores(input, models):
387
-
388
- if len(models) == 0:
389
- return {}
390
-
391
- print(f"Models to Test: {models}")
392
  # BC SCORE
393
  bc_scores = []
394
  samples_len_bc = len(
@@ -410,70 +365,34 @@ def predict_mc_scores(input, models):
410
  human_score = 1 - ai_score
411
  bc_score = {"AI": ai_score, "HUMAN": human_score}
412
  print(f"Calibration BC scores: AI: {ai_score}, HUMAN: {human_score}")
413
-
414
- # MC SCORE
415
- if len(models) > 1:
416
- print("Starting MC")
417
- mc_scores = []
418
- segments_mc = split_text_allow_complete_sentences_nltk(
419
- input, type_det="mc"
420
- )
421
- samples_len_mc = len(
422
- split_text_allow_complete_sentences_nltk(input, type_det="mc")
 
423
  )
424
- for i in range(samples_len_mc):
425
- cleaned_text_mc = remove_special_characters(segments_mc[i])
426
- mc_score = predict_mc(
427
- text_mc_model, text_mc_tokenizer, cleaned_text_mc
428
- )
429
- mc_scores.append(mc_score)
430
- mc_scores_array = np.array(mc_scores)
431
- average_mc_scores = np.mean(mc_scores_array, axis=0)
432
- mc_score_list = average_mc_scores.tolist()
433
- mc_score = {}
434
- for score, label in zip(mc_score_list, mc_label_map):
435
- mc_score[label.upper()] = score
436
 
437
- mc_score = {
438
- key: mc_score[key.upper()]
439
- for key in models
440
- if key.upper() in mc_score
441
- }
442
- total = sum(mc_score.values())
443
- # Normalize each value by dividing it by the total
444
- mc_score = {key: value / total for key, value in mc_score.items()}
445
- sum_prob = 1 - bc_score["HUMAN"]
446
- for key, value in mc_score.items():
447
- mc_score[key] = value * sum_prob
448
- print("MC Score:", mc_score)
449
- if sum_prob < 0.01:
450
- mc_score = {}
451
-
452
- elif len(models) == 1:
453
- print("Starting 1on1")
454
- mc_scores = []
455
- segments_mc = split_text_allow_complete_sentences_nltk(
456
- input, type_det="mc"
457
- )
458
- samples_len_mc = len(
459
- split_text_allow_complete_sentences_nltk(input, type_det="mc")
460
- )
461
- for i in range(samples_len_mc):
462
- cleaned_text_mc = remove_special_characters(segments_mc[i])
463
- mc_score = predict_1on1_single(cleaned_text_mc, models[0])
464
- mc_scores.append(mc_score)
465
- mc_scores_array = np.array(mc_scores)
466
- average_mc_scores = np.mean(mc_scores_array, axis=0)
467
- print(average_mc_scores)
468
- mc_score_list = average_mc_scores.tolist()
469
  mc_score = {}
470
- mc_score[models[0].upper()] = mc_score_list
471
- mc_score["OTHER"] = 1 - mc_score_list
472
-
473
- sum_prob = 1 - bc_score["HUMAN"]
474
- for key, value in mc_score.items():
475
- mc_score[key] = value * sum_prob
476
- if sum_prob < 0.01:
477
- mc_score = {}
478
 
479
  return mc_score
 
28
  text_bc_model_path = params["TEXT_BC_MODEL_PATH"]
29
  text_mc_model_path = params["TEXT_MC_MODEL_PATH"]
30
  text_quillbot_model_path = params["TEXT_QUILLBOT_MODEL_PATH"]
 
31
  quillbot_labels = params["QUILLBOT_LABELS"]
32
  mc_label_map = params["MC_OUTPUT_LABELS"]
 
33
  mc_token_size = int(params["MC_TOKEN_SIZE"])
34
  bc_token_size = int(params["BC_TOKEN_SIZE"])
35
  bias_checker_model_name = params['BIAS_CHECKER_MODEL_PATH']
 
46
  quillbot_model = AutoModelForSequenceClassification.from_pretrained(
47
  text_quillbot_model_path
48
  ).to(device)
 
 
 
 
 
 
 
49
 
50
 
51
 
 
343
  return bc_score
344
 
345
 
346
+ def predict_mc_scores(input):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
347
  # BC SCORE
348
  bc_scores = []
349
  samples_len_bc = len(
 
365
  human_score = 1 - ai_score
366
  bc_score = {"AI": ai_score, "HUMAN": human_score}
367
  print(f"Calibration BC scores: AI: {ai_score}, HUMAN: {human_score}")
368
+ mc_scores = []
369
+ segments_mc = split_text_allow_complete_sentences_nltk(
370
+ input, type_det="mc"
371
+ )
372
+ samples_len_mc = len(
373
+ split_text_allow_complete_sentences_nltk(input, type_det="mc")
374
+ )
375
+ for i in range(samples_len_mc):
376
+ cleaned_text_mc = remove_special_characters(segments_mc[i])
377
+ mc_score = predict_mc(
378
+ text_mc_model, text_mc_tokenizer, cleaned_text_mc
379
  )
380
+ mc_scores.append(mc_score)
381
+ mc_scores_array = np.array(mc_scores)
382
+ average_mc_scores = np.mean(mc_scores_array, axis=0)
383
+ mc_score_list = average_mc_scores.tolist()
384
+ mc_score = {}
385
+ for score, label in zip(mc_score_list, mc_label_map):
386
+ mc_score[label.upper()] = score
 
 
 
 
 
387
 
388
+ total = sum(mc_score.values())
389
+ # Normalize each value by dividing it by the total
390
+ mc_score = {key: value / total for key, value in mc_score.items()}
391
+ sum_prob = 1 - bc_score["HUMAN"]
392
+ for key, value in mc_score.items():
393
+ mc_score[key] = value * sum_prob
394
+ print("MC Score:", mc_score)
395
+ if sum_prob < 0.01:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
396
  mc_score = {}
 
 
 
 
 
 
 
 
397
 
398
  return mc_score