aliasgerovs commited on
Commit
173f4a0
1 Parent(s): 2019311

Updated audio.py

Browse files
Files changed (5) hide show
  1. app.py +2 -2
  2. audio.py +4 -1
  3. nohup.out +587 -0
  4. predictors.py +12 -23
  5. utils.py +7 -2
app.py CHANGED
@@ -12,7 +12,7 @@ import yaml
12
  from functools import partial
13
  from audio import assemblyai_transcribe
14
  import yt_dlp
15
-
16
 
17
  np.set_printoptions(suppress=True)
18
 
@@ -369,7 +369,7 @@ with gr.Blocks() as demo:
369
 
370
  depth_analysis_btn.click(
371
  fn=depth_analysis,
372
- inputs=[bias_buster_selected, input_text],
373
  outputs=[writing_analysis_plot],
374
  api_name="depth_analysis",
375
  )
 
12
  from functools import partial
13
  from audio import assemblyai_transcribe
14
  import yt_dlp
15
+ import os
16
 
17
  np.set_printoptions(suppress=True)
18
 
 
369
 
370
  depth_analysis_btn.click(
371
  fn=depth_analysis,
372
+ inputs=[input_text, bias_buster_selected],
373
  outputs=[writing_analysis_plot],
374
  api_name="depth_analysis",
375
  )
audio.py CHANGED
@@ -4,12 +4,15 @@ import time
4
  import yaml
5
  import yt_dlp
6
  import assemblyai as aai
 
 
7
 
 
8
  with open("config.yaml", "r") as file:
9
  params = yaml.safe_load(file)
10
 
11
  transcriber = aai.Transcriber()
12
- aai.settings.api_key = params["ASSEMBLY_AI_TOKEN"]
13
 
14
  def assemblyai_transcribe(audio_url):
15
  if audio_url is None:
 
4
  import yaml
5
  import yt_dlp
6
  import assemblyai as aai
7
+ from dotenv import load_dotenv
8
+ import os
9
 
10
+ load_dotenv()
11
  with open("config.yaml", "r") as file:
12
  params = yaml.safe_load(file)
13
 
14
  transcriber = aai.Transcriber()
15
+ aai.settings.api_key = os.environ['ASSEMBLYAI_API_KEY']
16
 
17
  def assemblyai_transcribe(audio_url):
18
  if audio_url is None:
nohup.out CHANGED
@@ -0,0 +1,587 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /usr/local/lib/python3.9/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
2
+ warnings.warn(
3
+ 2024-06-05 13:33:46.838996: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
4
+ To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
5
+ [nltk_data] Downloading package punkt to /home/aliasgarov/nltk_data...
6
+ [nltk_data] Package punkt is already up-to-date!
7
+ [nltk_data] Downloading package stopwords to
8
+ [nltk_data] /home/aliasgarov/nltk_data...
9
+ [nltk_data] Package stopwords is already up-to-date!
10
+ /usr/local/lib/python3.9/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
11
+ warnings.warn(
12
+ The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
13
+ The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
14
+ The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
15
+ The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
16
+ The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
17
+ Some weights of the model checkpoint at textattack/roberta-base-CoLA were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
18
+ - This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
19
+ - This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
20
+ The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
21
+ Framework not specified. Using pt to export the model.
22
+ Some weights of the model checkpoint at textattack/roberta-base-CoLA were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
23
+ - This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
24
+ - This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
25
+ Using the export variant default. Available variants are:
26
+ - default: The default ONNX variant.
27
+
28
+ ***** Exporting submodel 1/1: RobertaForSequenceClassification *****
29
+ Using framework PyTorch: 2.3.0+cu121
30
+ Overriding 1 configuration item(s)
31
+ - use_cache -> False
32
+ Framework not specified. Using pt to export the model.
33
+ Using the export variant default. Available variants are:
34
+ - default: The default ONNX variant.
35
+ Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
36
+ Non-default generation parameters: {'max_length': 512, 'min_length': 8, 'num_beams': 2, 'no_repeat_ngram_size': 4}
37
+ /usr/local/lib/python3.9/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
38
+ warnings.warn(
39
+
40
+ ***** Exporting submodel 1/3: T5Stack *****
41
+ Using framework PyTorch: 2.3.0+cu121
42
+ Overriding 1 configuration item(s)
43
+ - use_cache -> False
44
+
45
+ ***** Exporting submodel 2/3: T5ForConditionalGeneration *****
46
+ Using framework PyTorch: 2.3.0+cu121
47
+ Overriding 1 configuration item(s)
48
+ - use_cache -> True
49
+ /usr/local/lib/python3.9/dist-packages/transformers/modeling_utils.py:1017: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
50
+ if causal_mask.shape[1] < attention_mask.shape[1]:
51
+
52
+ ***** Exporting submodel 3/3: T5ForConditionalGeneration *****
53
+ Using framework PyTorch: 2.3.0+cu121
54
+ Overriding 1 configuration item(s)
55
+ - use_cache -> True
56
+ /usr/local/lib/python3.9/dist-packages/transformers/models/t5/modeling_t5.py:503: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
57
+ elif past_key_value.shape[2] != key_value_states.shape[1]:
58
+ In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode
59
+ In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode
60
+ Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
61
+ Non-default generation parameters: {'max_length': 512, 'min_length': 8, 'num_beams': 2, 'no_repeat_ngram_size': 4}
62
+ [nltk_data] Downloading package cmudict to
63
+ [nltk_data] /home/aliasgarov/nltk_data...
64
+ [nltk_data] Package cmudict is already up-to-date!
65
+ [nltk_data] Downloading package punkt to /home/aliasgarov/nltk_data...
66
+ [nltk_data] Package punkt is already up-to-date!
67
+ [nltk_data] Downloading package stopwords to
68
+ [nltk_data] /home/aliasgarov/nltk_data...
69
+ [nltk_data] Package stopwords is already up-to-date!
70
+ [nltk_data] Downloading package wordnet to
71
+ [nltk_data] /home/aliasgarov/nltk_data...
72
+ [nltk_data] Package wordnet is already up-to-date!
73
+ WARNING: The directory '/home/aliasgarov/.cache/pip' or its parent directory is not owned or is not writable by the current user. The cache has been disabled. Check the permissions and owner of that directory. If executing pip with sudo, you may want sudo's -H flag.
74
+ Collecting en-core-web-sm==3.7.1
75
+ Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
76
+ Requirement already satisfied: spacy<3.8.0,>=3.7.2 in /usr/local/lib/python3.9/dist-packages (from en-core-web-sm==3.7.1) (3.7.2)
77
+ Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (24.0)
78
+ Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.1.2)
79
+ Requirement already satisfied: numpy>=1.19.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.26.4)
80
+ Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (52.0.0)
81
+ Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.4.8)
82
+ Requirement already satisfied: smart-open<7.0.0,>=5.2.1 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (6.4.0)
83
+ Requirement already satisfied: weasel<0.4.0,>=0.1.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.3.4)
84
+ Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.0.10)
85
+ Requirement already satisfied: pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.7.1)
86
+ Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.0.12)
87
+ Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.0.10)
88
+ Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.0.8)
89
+ Requirement already satisfied: thinc<8.3.0,>=8.1.8 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (8.2.3)
90
+ Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.32.3)
91
+ Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.0.5)
92
+ Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.0.9)
93
+ Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.4.0)
94
+ Requirement already satisfied: jinja2 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.1.4)
95
+ Requirement already satisfied: typer<0.10.0,>=0.3.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.9.4)
96
+ Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (4.66.4)
97
+ Requirement already satisfied: language-data>=1.2 in /usr/local/lib/python3.9/dist-packages (from langcodes<4.0.0,>=3.2.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.2.0)
98
+ Requirement already satisfied: marisa-trie>=0.7.7 in /usr/local/lib/python3.9/dist-packages (from language-data>=1.2->langcodes<4.0.0,>=3.2.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.1.1)
99
+ Requirement already satisfied: typing-extensions>=4.6.1 in /usr/local/lib/python3.9/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (4.11.0)
100
+ Requirement already satisfied: pydantic-core==2.18.2 in /usr/local/lib/python3.9/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.18.2)
101
+ Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.9/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.6.0)
102
+ Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.10)
103
+ Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.2.1)
104
+ Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.9/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.3.2)
105
+ Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2020.6.20)
106
+ Requirement already satisfied: confection<1.0.0,>=0.0.1 in /usr/local/lib/python3.9/dist-packages (from thinc<8.3.0,>=8.1.8->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.1.4)
107
+ Requirement already satisfied: blis<0.8.0,>=0.7.8 in /usr/local/lib/python3.9/dist-packages (from thinc<8.3.0,>=8.1.8->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.7.11)
108
+ Requirement already satisfied: click<9.0.0,>=7.1.1 in /usr/local/lib/python3.9/dist-packages (from typer<0.10.0,>=0.3.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (8.1.7)
109
+ Requirement already satisfied: cloudpathlib<0.17.0,>=0.7.0 in /usr/local/lib/python3.9/dist-packages (from weasel<0.4.0,>=0.1.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.16.0)
110
+ Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.9/dist-packages (from jinja2->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.1.5)
111
+ ✔ Download and installation successful
112
+ You can now load the package via spacy.load('en_core_web_sm')
113
+ /usr/local/lib/python3.9/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
114
+ warnings.warn(
115
+ [youtube] Extracting URL: https://www.youtube.com/watch?v=rXGqKJoQ4qM
116
+ IMPORTANT: You are using gradio version 4.26.0, however version 4.29.0 is available, please upgrade.
117
+ --------
118
+ Running on local URL: http://0.0.0.0:80
119
+ Running on public URL: https://881ad0461434819142.gradio.live
120
+
121
+ This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)
122
+ [youtube] rXGqKJoQ4qM: Downloading webpage
123
+ [youtube] rXGqKJoQ4qM: Downloading ios player API JSON
124
+ [youtube] rXGqKJoQ4qM: Downloading m3u8 information
125
+ /usr/local/lib/python3.9/dist-packages/optimum/bettertransformer/models/encoder_models.py:301: UserWarning: The PyTorch API of nested tensors is in prototype stage and will change in the near future. (Triggered internally at ../aten/src/ATen/NestedTensorImpl.cpp:178.)
126
+ hidden_states = torch._nested_tensor_from_mask(hidden_states, ~attention_mask)
127
+ [generic] Extracting URL:
128
+ Original BC scores: AI: 6.379470141837373e-05, HUMAN: 0.9999362230300903
129
+ Calibration BC scores: AI: 0.02666666666666667, HUMAN: 0.9733333333333334
130
+ Input Text: You've asked about machine learning, and we have a watermelon here. You know, you used to go to the store, pick up a watermelon. Maybe your family told you, you push on the end to see if it's soft, and that means it's a good watermelon or if it smells a certain way. That's how you tell if it's a good watermelon. Well, with machine learning, you don't do any of that. You basically try to determine all of the attributes about this watermelon that you can, and you take those attributes and you feed them into a baby machine model that knows nothing, how fat the stripes are, how thin they are, and you feed all these attributes into that model. You go home, you eat the watermelon, come back in the next day, and you tell that model that was a good watermelon, and it remembers all of those attributes and the fact that it was good. And you're going to do that every day for the next ten years. After ten years, that model is going to be able to tell you based on attributes that you give it. If the watermelon you picked up is good or bad, and you may not know why that model is telling you it's good or bad, but you can trust that it has done enough analysis, and it can tell you a percentage, a surety of whether it's good or bad, that when you pick up a watermelon, give it the attributes. If it says it's good, you can take it home and it will be good.
131
+ Original BC scores: AI: 6.379470141837373e-05, HUMAN: 0.9999362230300903
132
+ Calibration BC scores: AI: 0.02666666666666667, HUMAN: 0.9733333333333334
133
+ MC Score: {'OPENAI GPT': 2.165152131657536e-12, 'MISTRAL': 5.77177379964173e-13, 'CLAUDE': 9.21127433587778e-13, 'GEMINI': 1.2182041486674655e-12, 'GRAMMAR ENHANCER': 0.026666666666666616}
134
+ ERROR: [generic] '' is not a valid URL. Set --default-search "ytsearch" (or run yt-dlp "ytsearch:" ) to search YouTube
135
+ Traceback (most recent call last):
136
+ File "/usr/local/lib/python3.9/dist-packages/yt_dlp/YoutubeDL.py", line 1606, in wrapper
137
+ return func(self, *args, **kwargs)
138
+ File "/usr/local/lib/python3.9/dist-packages/yt_dlp/YoutubeDL.py", line 1741, in __extract_info
139
+ ie_result = ie.extract(url)
140
+ File "/usr/local/lib/python3.9/dist-packages/yt_dlp/extractor/common.py", line 734, in extract
141
+ ie_result = self._real_extract(url)
142
+ File "/usr/local/lib/python3.9/dist-packages/yt_dlp/extractor/generic.py", line 2349, in _real_extract
143
+ raise ExtractorError(
144
+ yt_dlp.utils.ExtractorError: [generic] '' is not a valid URL. Set --default-search "ytsearch" (or run yt-dlp "ytsearch:" ) to search YouTube
145
+
146
+ During handling of the above exception, another exception occurred:
147
+
148
+ Traceback (most recent call last):
149
+ File "/usr/local/lib/python3.9/dist-packages/gradio/queueing.py", line 527, in process_events
150
+ response = await route_utils.call_process_api(
151
+ File "/usr/local/lib/python3.9/dist-packages/gradio/route_utils.py", line 261, in call_process_api
152
+ output = await app.get_blocks().process_api(
153
+ File "/usr/local/lib/python3.9/dist-packages/gradio/blocks.py", line 1786, in process_api
154
+ result = await self.call_function(
155
+ File "/usr/local/lib/python3.9/dist-packages/gradio/blocks.py", line 1338, in call_function
156
+ prediction = await anyio.to_thread.run_sync(
157
+ File "/usr/local/lib/python3.9/dist-packages/anyio/to_thread.py", line 56, in run_sync
158
+ return await get_async_backend().run_sync_in_worker_thread(
159
+ File "/usr/local/lib/python3.9/dist-packages/anyio/_backends/_asyncio.py", line 2144, in run_sync_in_worker_thread
160
+ return await future
161
+ File "/usr/local/lib/python3.9/dist-packages/anyio/_backends/_asyncio.py", line 851, in run
162
+ result = context.run(func, *args)
163
+ File "/usr/local/lib/python3.9/dist-packages/gradio/utils.py", line 759, in wrapper
164
+ response = f(*args, **kwargs)
165
+ File "/home/aliasgarov/copyright_checker/audio.py", line 21, in assemblyai_transcribe
166
+ info = ydl.extract_info(audio_url, download=False)
167
+ File "/usr/local/lib/python3.9/dist-packages/yt_dlp/YoutubeDL.py", line 1595, in extract_info
168
+ return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
169
+ File "/usr/local/lib/python3.9/dist-packages/yt_dlp/YoutubeDL.py", line 1624, in wrapper
170
+ self.report_error(str(e), e.format_traceback())
171
+ File "/usr/local/lib/python3.9/dist-packages/yt_dlp/YoutubeDL.py", line 1073, in report_error
172
+ self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
173
+ File "/usr/local/lib/python3.9/dist-packages/yt_dlp/YoutubeDL.py", line 1012, in trouble
174
+ raise DownloadError(message, exc_info)
175
+ yt_dlp.utils.DownloadError: ERROR: [generic] '' is not a valid URL. Set --default-search "ytsearch" (or run yt-dlp "ytsearch:" ) to search YouTube
176
+ [youtube] Extracting URL: https://www.youtube.com/watch?v=zhWDdy_5v2w
177
+ [youtube] zhWDdy_5v2w: Downloading webpage
178
+ [youtube] zhWDdy_5v2w: Downloading ios player API JSON
179
+ [youtube] zhWDdy_5v2w: Downloading m3u8 information
180
+ Original BC scores: AI: 0.0008556331158615649, HUMAN: 0.9991443157196045
181
+ Calibration BC scores: AI: 0.08333333333333333, HUMAN: 0.9166666666666666
182
+ Input Text: In a single minute, your body produces 120 to 180 million red blood cells. People ask Google 2. 4 million questions and 25 million Coca Cola products are consumed. Many of those bottles will end up in a landfill where the World bank estimates we produce 5 million pounds of garbage. 108 human lives will be lost in this minute, and an adult male will lose 96 million cells. Fortunately, 96 million cells divide, replacing those lost. Speaking of divisions, in the USA, 1. 5 people get divorced, while worldwide 116 people will get married, 83, 300 people have sex, but only 258 babies will be born. And a fetus is developing neurons at a rate of 250, 000 /minute so it's no wonder that a computer simulator simulation takes 60 quadrillion bytes to simulate a minute. An average of 1. 38 rain fall around the world, which is 4. 7 billion bathtubs of water every minute. And with the storms comes approximately 6000 bolts of cloud to ground lightning hitting the earth. A 150 pound person expends 1. 1 calories of energy per minute while sleeping. While the sun provides us with 83. 33 terawatts of energy. The earth will complete 1800 its 940 million around the sun, moving 1034 times faster than a cheetah. 70, 000 hours of Netflix are watched, 300 hours are uploaded to YouTube and you can watch this video and subscribe.
183
+ Original BC scores: AI: 0.0008556331158615649, HUMAN: 0.9991443157196045
184
+ Calibration BC scores: AI: 0.08333333333333333, HUMAN: 0.9166666666666666
185
+ MC Score: {'OPENAI GPT': 0.041650297741095244, 'MISTRAL': 2.1457372915515795e-10, 'CLAUDE': 2.8301516389698626e-08, 'GEMINI': 5.853652282894475e-07, 'GRAMMAR ENHANCER': 0.041682422161102316}
186
+ Original BC scores: AI: 0.0008556331158615649, HUMAN: 0.9991443157196045
187
+ Calibration BC scores: AI: 0.08333333333333333, HUMAN: 0.9166666666666666
188
+ Input Text: In a single minute, your body produces 120 to 180 million red blood cells. People ask Google 2. 4 million questions and 25 million Coca Cola products are consumed. Many of those bottles will end up in a landfill where the World bank estimates we produce 5 million pounds of garbage. 108 human lives will be lost in this minute, and an adult male will lose 96 million cells. Fortunately, 96 million cells divide, replacing those lost. Speaking of divisions, in the USA, 1. 5 people get divorced, while worldwide 116 people will get married, 83, 300 people have sex, but only 258 babies will be born. And a fetus is developing neurons at a rate of 250, 000 /minute so it's no wonder that a computer simulator simulation takes 60 quadrillion bytes to simulate a minute. An average of 1. 38 rain fall around the world, which is 4. 7 billion bathtubs of water every minute. And with the storms comes approximately 6000 bolts of cloud to ground lightning hitting the earth. A 150 pound person expends 1. 1 calories of energy per minute while sleeping. While the sun provides us with 83. 33 terawatts of energy. The earth will complete 1800 its 940 million around the sun, moving 1034 times faster than a cheetah. 70, 000 hours of Netflix are watched, 300 hours are uploaded to YouTube and you can watch this video and subscribe.
189
+ Original BC scores: AI: 0.0008556331158615649, HUMAN: 0.9991443157196045
190
+ Calibration BC scores: AI: 0.08333333333333333, HUMAN: 0.9166666666666666
191
+ MC Score: {'OPENAI GPT': 0.041650297741095244, 'MISTRAL': 2.1457372915515795e-10, 'CLAUDE': 2.8301516389698626e-08, 'GEMINI': 5.853652282894475e-07, 'GRAMMAR ENHANCER': 0.041682422161102316}
192
+ Original BC scores: AI: 0.0007931223954074085, HUMAN: 0.9992069602012634
193
+ Calibration BC scores: AI: 0.08333333333333333, HUMAN: 0.9166666666666666
194
+ Input Text: The double sovereign is a gold coin of the United Kingdom with a nominal value of two pounds sterling (2). It features the reigning monarch on its obverse and, most often, Benedetto Pistrucci's depiction of Saint George and the Dragon on the reverse (pictured). It was rarely issued in the first century and a half after its debut in 1820, usually in a new monarch's coronation year or to mark the institution of a new coinage portrait of the monarch. In addition to the usual coinage in Britain, specimens were struck at Australia's Sydney Mint in 1887 and 1902. Most often struck as a proof coin, the double sovereign has been issued for circulation in only four years, and few examples worn from commercial use are known. It is now a collector and bullion coin, and has been struck by the Royal Mint most years since 1980. In some years, it has not been issued and the Royal
195
+ ['The double sovereign is a gold coin of the United Kingdom with a nominal value of two pounds sterling (£2).', "It features the reigning monarch on its obverse and, most often, Benedetto Pistrucci's depiction of Saint George and the Dragon on the reverse (pictured).", "It was rarely issued in the first century and a half after its debut in 1820, usually in a new monarch's coronation year or to mark the institution of a new coinage portrait of the monarch.", "In addition to the usual coinage in Britain, specimens were struck at Australia's Sydney Mint in 1887 and 1902.", 'Most often struck as a proof coin, the double sovereign has been issued for circulation in only four years, and few examples worn from commercial use are known.', 'It is now a collector and bullion coin, and has been struck by the Royal Mint most years since 1980.', 'In some years, it has not been issued and the Royal']
196
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
197
+ To disable this warning, you can either:
198
+ - Avoid using `tokenizers` before the fork if possible
199
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
200
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
201
+ To disable this warning, you can either:
202
+ - Avoid using `tokenizers` before the fork if possible
203
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
204
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
205
+ To disable this warning, you can either:
206
+ - Avoid using `tokenizers` before the fork if possible
207
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
208
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
209
+ To disable this warning, you can either:
210
+ - Avoid using `tokenizers` before the fork if possible
211
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
212
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
213
+ To disable this warning, you can either:
214
+ - Avoid using `tokenizers` before the fork if possible
215
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
216
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
217
+ To disable this warning, you can either:
218
+ - Avoid using `tokenizers` before the fork if possible
219
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
220
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
221
+ To disable this warning, you can either:
222
+ - Avoid using `tokenizers` before the fork if possible
223
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
224
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
225
+ To disable this warning, you can either:
226
+ - Avoid using `tokenizers` before the fork if possible
227
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
228
+ PLAGIARISM PROCESSING TIME: 10.284763590898365
229
+
230
+ Original BC scores: AI: 0.0007931223954074085, HUMAN: 0.9992069602012634
231
+ Calibration BC scores: AI: 0.08333333333333333, HUMAN: 0.9166666666666666
232
+ Input Text: The double sovereign is a gold coin of the United Kingdom with a nominal value of two pounds sterling (2). It features the reigning monarch on its obverse and, most often, Benedetto Pistrucci's depiction of Saint George and the Dragon on the reverse (pictured). It was rarely issued in the first century and a half after its debut in 1820, usually in a new monarch's coronation year or to mark the institution of a new coinage portrait of the monarch. In addition to the usual coinage in Britain, specimens were struck at Australia's Sydney Mint in 1887 and 1902. Most often struck as a proof coin, the double sovereign has been issued for circulation in only four years, and few examples worn from commercial use are known. It is now a collector and bullion coin, and has been struck by the Royal Mint most years since 1980. In some years, it has not been issued and the Royal
233
+ Original BC scores: AI: 0.0007931223954074085, HUMAN: 0.9992069602012634
234
+ Calibration BC scores: AI: 0.08333333333333333, HUMAN: 0.9166666666666666
235
+ MC Score: {'OPENAI GPT': 3.014583190482276e-08, 'MISTRAL': 5.927566886406354e-12, 'CLAUDE': 8.79120894599813e-08, 'GEMINI': 0.08333175381024682, 'GRAMMAR ENHANCER': 1.4605501140370815e-06}
236
+
237
+ /usr/local/lib/python3.9/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
238
+ warnings.warn(
239
+ 2024-06-05 14:11:09.267769: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
240
+ To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
241
+ [nltk_data] Downloading package punkt to /home/aliasgarov/nltk_data...
242
+ [nltk_data] Package punkt is already up-to-date!
243
+ [nltk_data] Downloading package stopwords to
244
+ [nltk_data] /home/aliasgarov/nltk_data...
245
+ [nltk_data] Package stopwords is already up-to-date!
246
+ /usr/local/lib/python3.9/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
247
+ warnings.warn(
248
+ The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
249
+ The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
250
+ The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
251
+ The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
252
+ The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
253
+ Some weights of the model checkpoint at textattack/roberta-base-CoLA were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
254
+ - This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
255
+ - This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
256
+ The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
257
+ Framework not specified. Using pt to export the model.
258
+ Some weights of the model checkpoint at textattack/roberta-base-CoLA were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
259
+ - This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
260
+ - This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
261
+ Using the export variant default. Available variants are:
262
+ - default: The default ONNX variant.
263
+
264
+ ***** Exporting submodel 1/1: RobertaForSequenceClassification *****
265
+ Using framework PyTorch: 2.3.0+cu121
266
+ Overriding 1 configuration item(s)
267
+ - use_cache -> False
268
+ Framework not specified. Using pt to export the model.
269
+ Using the export variant default. Available variants are:
270
+ - default: The default ONNX variant.
271
+ Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
272
+ Non-default generation parameters: {'max_length': 512, 'min_length': 8, 'num_beams': 2, 'no_repeat_ngram_size': 4}
273
+ /usr/local/lib/python3.9/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
274
+ warnings.warn(
275
+
276
+ ***** Exporting submodel 1/3: T5Stack *****
277
+ Using framework PyTorch: 2.3.0+cu121
278
+ Overriding 1 configuration item(s)
279
+ - use_cache -> False
280
+
281
+ ***** Exporting submodel 2/3: T5ForConditionalGeneration *****
282
+ Using framework PyTorch: 2.3.0+cu121
283
+ Overriding 1 configuration item(s)
284
+ - use_cache -> True
285
+ /usr/local/lib/python3.9/dist-packages/transformers/modeling_utils.py:1017: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
286
+ if causal_mask.shape[1] < attention_mask.shape[1]:
287
+
288
+ ***** Exporting submodel 3/3: T5ForConditionalGeneration *****
289
+ Using framework PyTorch: 2.3.0+cu121
290
+ Overriding 1 configuration item(s)
291
+ - use_cache -> True
292
+ /usr/local/lib/python3.9/dist-packages/transformers/models/t5/modeling_t5.py:503: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
293
+ elif past_key_value.shape[2] != key_value_states.shape[1]:
294
+ In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode
295
+ In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode
296
+ Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
297
+ Non-default generation parameters: {'max_length': 512, 'min_length': 8, 'num_beams': 2, 'no_repeat_ngram_size': 4}
298
+ [nltk_data] Downloading package cmudict to
299
+ [nltk_data] /home/aliasgarov/nltk_data...
300
+ [nltk_data] Package cmudict is already up-to-date!
301
+ [nltk_data] Downloading package punkt to /home/aliasgarov/nltk_data...
302
+ [nltk_data] Package punkt is already up-to-date!
303
+ [nltk_data] Downloading package stopwords to
304
+ [nltk_data] /home/aliasgarov/nltk_data...
305
+ [nltk_data] Package stopwords is already up-to-date!
306
+ [nltk_data] Downloading package wordnet to
307
+ [nltk_data] /home/aliasgarov/nltk_data...
308
+ [nltk_data] Package wordnet is already up-to-date!
309
+ WARNING: The directory '/home/aliasgarov/.cache/pip' or its parent directory is not owned or is not writable by the current user. The cache has been disabled. Check the permissions and owner of that directory. If executing pip with sudo, you may want sudo's -H flag.
310
+ Collecting en-core-web-sm==3.7.1
311
+ Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
312
+ Requirement already satisfied: spacy<3.8.0,>=3.7.2 in /usr/local/lib/python3.9/dist-packages (from en-core-web-sm==3.7.1) (3.7.2)
313
+ Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.4.8)
314
+ Requirement already satisfied: smart-open<7.0.0,>=5.2.1 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (6.4.0)
315
+ Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.32.3)
316
+ Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (52.0.0)
317
+ Requirement already satisfied: pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.7.1)
318
+ Requirement already satisfied: typer<0.10.0,>=0.3.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.9.4)
319
+ Requirement already satisfied: jinja2 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.1.4)
320
+ Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.0.8)
321
+ Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (4.66.4)
322
+ Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (24.0)
323
+ Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.0.5)
324
+ Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.0.12)
325
+ Requirement already satisfied: thinc<8.3.0,>=8.1.8 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (8.2.3)
326
+ Requirement already satisfied: numpy>=1.19.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.26.4)
327
+ Requirement already satisfied: weasel<0.4.0,>=0.1.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.3.4)
328
+ Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.0.10)
329
+ Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.0.9)
330
+ Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.0.10)
331
+ Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.1.2)
332
+ Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.4.0)
333
+ Requirement already satisfied: language-data>=1.2 in /usr/local/lib/python3.9/dist-packages (from langcodes<4.0.0,>=3.2.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.2.0)
334
+ Requirement already satisfied: marisa-trie>=0.7.7 in /usr/local/lib/python3.9/dist-packages (from language-data>=1.2->langcodes<4.0.0,>=3.2.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.1.1)
335
+ Requirement already satisfied: typing-extensions>=4.6.1 in /usr/local/lib/python3.9/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (4.11.0)
336
+ Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.9/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.6.0)
337
+ Requirement already satisfied: pydantic-core==2.18.2 in /usr/local/lib/python3.9/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.18.2)
338
+ Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2020.6.20)
339
+ Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.10)
340
+ Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.2.1)
341
+ Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.9/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.3.2)
342
+ Requirement already satisfied: blis<0.8.0,>=0.7.8 in /usr/local/lib/python3.9/dist-packages (from thinc<8.3.0,>=8.1.8->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.7.11)
343
+ Requirement already satisfied: confection<1.0.0,>=0.0.1 in /usr/local/lib/python3.9/dist-packages (from thinc<8.3.0,>=8.1.8->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.1.4)
344
+ Requirement already satisfied: click<9.0.0,>=7.1.1 in /usr/local/lib/python3.9/dist-packages (from typer<0.10.0,>=0.3.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (8.1.7)
345
+ Requirement already satisfied: cloudpathlib<0.17.0,>=0.7.0 in /usr/local/lib/python3.9/dist-packages (from weasel<0.4.0,>=0.1.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.16.0)
346
+ Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.9/dist-packages (from jinja2->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.1.5)
347
+ ✔ Download and installation successful
348
+ You can now load the package via spacy.load('en_core_web_sm')
349
+ /usr/local/lib/python3.9/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
350
+ warnings.warn(
351
+ Token indices sequence length is longer than the specified maximum sequence length for this model (2138 > 512). Running this sequence through the model will result in indexing errors
352
+ /usr/local/lib/python3.9/dist-packages/optimum/bettertransformer/models/encoder_models.py:301: UserWarning: The PyTorch API of nested tensors is in prototype stage and will change in the near future. (Triggered internally at ../aten/src/ATen/NestedTensorImpl.cpp:178.)
353
+ hidden_states = torch._nested_tensor_from_mask(hidden_states, ~attention_mask)
354
+ [youtube] Extracting URL: https://www.youtube.com/watch?v=1aA1WGON49E
355
+ IMPORTANT: You are using gradio version 4.26.0, however version 4.29.0 is available, please upgrade.
356
+ --------
357
+ Running on local URL: http://0.0.0.0:80
358
+ Running on public URL: https://9882bb485d656697af.gradio.live
359
+
360
+ This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)
361
+ Original BC scores: AI: 0.0009290315210819244, HUMAN: 0.9990710020065308
362
+ Calibration BC scores: AI: 0.08333333333333333, HUMAN: 0.9166666666666666
363
+ Input Text: Reece Rogers Google is set to start mixing ads into its new AI-generated search answers. Its a test of how the companys biggest revenue stream can adapt to the age of generative AI. Paresh Dave WIRED is where tomorrow is realized. It is the essential source of information and ideas that make sense of a world in constant transformation. The WIRED conversation illuminates how technology is changing every aspect of our livesfrom culture to business, science to design. The breakthroughs and innovations that we uncover lead to new ways of thinking, new connections, and new industries. More From WIRED Reviews and Guides 2024 Condé Nast. All rights reserved. WIRED may earn a portion of sales from products that are purchased through our site as part of our Affiliate Partnerships with retailers. The material on this site may not be reproduced, distributed, transmitted, cached or otherwise used, except with the prior written permission of Condé Nast. Select international site United States Large Chevron
364
+ Original BC scores: AI: 0.0009290315210819244, HUMAN: 0.9990710020065308
365
+ Calibration BC scores: AI: 0.08333333333333333, HUMAN: 0.9166666666666666
366
+ MC Score: {'OPENAI GPT': 0.009792306770881021, 'MISTRAL': 3.4086881465592965e-10, 'CLAUDE': 4.831611022382278e-08, 'GEMINI': 0.027845658361911788, 'GRAMMAR ENHANCER': 0.04569531977176668}
367
+ Original BC scores: AI: 0.0010414546122774482, HUMAN: 0.9989585876464844
368
+ Calibration BC scores: AI: 0.08333333333333333, HUMAN: 0.9166666666666666
369
+ Input Text: Reece Rogers Google is set to start mixing ads into its new AI-generated search answers. Its a test of how the companys biggest revenue stream can adapt to the age of generative AI. Paresh Dave WIRED is where tomorrow is realized. It is the essential source of information and ideas that make sense of a world in constant transformation. The WIRED conversation illuminates how technology is changing every aspect of our livesfrom culture to business, science to design. The breakthroughs and innovations that we uncover lead to new ways of thinking, new connections, and new industries. More From WIRED Reviews and Guides 2024 Condé Nast. All rights reserved. WIRED may earn a portion of sales from products that are purchased through our site as part of our Affiliate Partnerships with retailers. The material on this site may not be reproduced, distributed, transmitted, cached or otherwise used, except with the prior written permission of Condé Nast. Select international site United States Large Chevron
370
+ Original BC scores: AI: 0.0010414546122774482, HUMAN: 0.9989585876464844
371
+ Calibration BC scores: AI: 0.08333333333333333, HUMAN: 0.9166666666666666
372
+ MC Score: {'OPENAI GPT': 0.011016534020503366, 'MISTRAL': 3.769994686801209e-10, 'CLAUDE': 5.417933834905855e-08, 'GEMINI': 0.031325822075208044, 'GRAMMAR ENHANCER': 0.04099092384179435}
373
+ Original BC scores: AI: 0.0013807499781250954, HUMAN: 0.9986192584037781
374
+ Calibration BC scores: AI: 0.09973753280839895, HUMAN: 0.9002624671916011
375
+ Input Text: Also, even if Google developers did not intend for this feature to be a replacement of the original work, AI Overviews provide direct answers to questions in a manner that buries attribution and reduces the incentive for users to click through to the source material. We see that links included in AI Overviews get more clicks than if the page had appeared as a traditional web listing for that query, " said the Google spokesperson. No data to support this claim was offered to WIRED, so it's impossible to independently verify the impact of the AI feature on click-through rates. Also, its worth noting that the company compared AI Overview referral traffic to more traditional blue-link traffic from Google, not to articles chosen for a featured snippet, where the rates are likely much higher. After I reached out to Google about the AI Overview result that pulled from my work, the experimental AI search result for this query stopped showing up, but Google still attempted to generate an answer above the featured snippet. Reece Rogers via Google While many AI lawsuits remain unresolved, one legal expert I spoke with whfeel certain that if the company decides to expand the prevalence AI Overviews, then thnited States Large Chevron
376
+ Original BC scores: AI: 0.0013807499781250954, HUMAN: 0.9986192584037781
377
+ Calibration BC scores: AI: 0.09973753280839895, HUMAN: 0.9002624671916011
378
+ MC Score: {'OPENAI GPT': 0.0014353521324674597, 'MISTRAL': 8.853771236138064e-10, 'CLAUDE': 1.2876423798196547e-07, 'GEMINI': 0.02511704077557941, 'GRAMMAR ENHANCER': 0.07318501115783929}
379
+ Original BC scores: AI: 0.9780901670455933, HUMAN: 0.021909818053245544
380
+ Calibration BC scores: AI: 0.5142857142857142, HUMAN: 0.48571428571428577
381
+ Input Text: Googles AI Overview Search Results Copied My Original Work WIRED Open Navigation Menu Menu Story Saved To revisit this article, visit My Profile, then. Close Alert Googles AI Overview Search Results Copied My Original Work More Chevron Jun 5, 2024 6: 30 AM Googles AI Overview Search Results Copied My Original Work Googles AI feature bumped my article down on the results page, but the new AI Overview at the top still referenced it. What gives? Photo-illustration: Jacqui Van Liew; Getty Images Save this str is not directly attributed to me. Instead, my original article was one of six footnotes hyperlinked near the bottom of the result. With source links located so far down, its hard to imagine any publisher receiving significant traffic in this situation. AI Overviews will conceptually match information that appears in top web results, including those linked in the overview, wrote a Google spon that if the company decides to expand the prevalence AI Overviews, then thnited States Large Chevron
382
+ Original BC scores: AI: 0.9780901670455933, HUMAN: 0.021909818053245544
383
+ Calibration BC scores: AI: 0.5142857142857142, HUMAN: 0.48571428571428577
384
+ MC Score: {'OPENAI GPT': 2.5201448071245276e-11, 'MISTRAL': 9.422158589059545e-12, 'CLAUDE': 1.7335425951868287e-11, 'GEMINI': 2.408824389954489e-11, 'GRAMMAR ENHANCER': 0.5142857142857142}
385
+ [youtube] 1aA1WGON49E: Downloading webpage
386
+ [youtube] 1aA1WGON49E: Downloading ios player API JSON
387
+ [youtube] 1aA1WGON49E: Downloading m3u8 information
388
+
389
+ WARNING: Invalid HTTP request received.
390
+ WARNING: Invalid HTTP request received.
391
+ Original BC scores: AI: 0.00025223050033673644, HUMAN: 0.9997478127479553
392
+ Calibration BC scores: AI: 0.02666666666666667, HUMAN: 0.9733333333333334
393
+ Input Text: Wow, what an audience. But if I'm being honest, I don't care what you think of my talk. I don't. I care what the Internet thinks of my talk because they're the ones who get it seen and get it shared. And I think that's where most people get it wrong. They're talking to you here instead of talking to you. Random person scrolling Facebook. Thanks for the click. You see, back in 2009, we all had these weird little things called attention spans. Yeah, they're gone. They're gone. We killed them. They're dead. I'm trying to think of the last time I watched an 18 minutes TED talk. It's been years. Literally years. So if you're giving a TED talk, keep it quick. I'm doing mine in under a minute. I'm at 44 seconds right now. That means we've got time for one final joke. Why are balloons so expensive? Inflation.
394
+ Original BC scores: AI: 0.00025223050033673644, HUMAN: 0.9997478127479553
395
+ Calibration BC scores: AI: 0.02666666666666667, HUMAN: 0.9733333333333334
396
+ MC Score: {'OPENAI GPT': 3.122121820335142e-11, 'MISTRAL': 4.327827355747134e-12, 'CLAUDE': 4.987585455751277e-11, 'GEMINI': 0.026666666666666616, 'GRAMMAR ENHANCER': 6.573377694015398e-10}
397
+ Original BC scores: AI: 0.998515784740448, HUMAN: 0.0014842685777693987
398
+ Calibration BC scores: AI: 0.7272727272727273, HUMAN: 0.2727272727272727
399
+ Input Text: Cool extension with great content! It seamlessly verifies the authenticity of online content, making it a must-have for anyone concerned about trust and reliability online. Highly recommended!
400
+ Original BC scores: AI: 0.998515784740448, HUMAN: 0.0014842685777693987
401
+ Calibration BC scores: AI: 0.7272727272727273, HUMAN: 0.2727272727272727
402
+ MC Score: {'OPENAI GPT': 0.7272727272727273, 'MISTRAL': 3.3261002538057226e-11, 'CLAUDE': 6.143898521251211e-11, 'GEMINI': 1.713123784244627e-10, 'GRAMMAR ENHANCER': 8.93011624243782e-11}
403
+
404
+ probas = F.softmax(tensor_logits).detach().cpu().numpy()
405
+
406
+ probas = F.softmax(tensor_logits).detach().cpu().numpy()
407
+
408
+ probas = F.softmax(tensor_logits).detach().cpu().numpy()
409
+ ['Ricky West\nrw12west@gmail.com\n262-665-7816\n3 June 2024\nMaryna Burushkina\nGrowth Channel\n305 East Huntland Drive\nAustin, TX 78752\nDear Maryna,\nAdvertising has always been an interest of mine because of how it exists in our\neveryday lives.', 'Knowing how to advertise is such an important skill to have, and you will\nalmost always come across it in some way no matter what role you are in.', 'To be with a\ngreat company such as Growth Channel would be a great advantage.', 'Being passionate\nabout sales and having gained extensive experience in the field, I possess the\nenthusiasm to personally contribute to the realization of Growth Channel’s vision.', "I plan\nto optimize Growth Channel's vision by achieving higher conversion rates with future\nprospects by fully educating and creating awareness among consumers of the value\nGrowth Channel can bring to their company.", 'At SoftwareONE, I have been a top three seller of twenty-five SDRs for the last two\nfiscal quarters, and I have never been out of the top ten.', 'This can be attributed to my\nability to understand client needs and identify these needs to offer solutions that will\nimprove their yields.', 'Moreover, during my current and past sales/advertising experience,\nI have worked on several campaigns.', 'I had to use data to ensure that the advertisement\nteam and I achieved a campaign that fits the client’s goals.', 'For example, when I was in\nreal estate, I would work with my advertising team to target a certain audience for the\nluxury real estate properties that I was posting for my broker.', 'Although I have never\nofficially held an advertising title, much of my experience in the workforce has allowed\nme to become experienced and more knowledgeable than most in the advertising\nsector.', 'To continue, If I were to choose a company that Growth Channel should expand its\nnetwork on, it would be Thrive Market.', 'Thrive Market is an already successful company\nthat could really expand on its popularity especially because of its business model\nwhich sells sustainable groceries.', 'Being aware of the environmental friendliness and\nsustainability of the products it creates, Thrive Market is bound to increase its market\nshare and make a bigger, positive impact.', 'To create the campaign representing Thrive Market, I would use a targeting tool for ad\nnetworks, to aim at the audience interested in the protection of the environment.', 'The\ncampaign would include:\nAudience Segmentation: Using the technologies that Growth Channel possesses by\ntargeting an audience who has interest in eco-friendly institutions, environmentally\nfriendly goods and services, and other related options.', 'I would also aim it towards an\naudience who is health conscious as Thrive Market mainly sells whole, organic foods.', 'Multi-Channel Approach: Social media and search engine networks provide options to\nbuy ad space where consumers can be targeted effectively.', 'There are many unique ad\noptions in the social media networks for all types of advertisements.', 'Social media ads\nalso allow for very creative approaches.', 'Performance\nOptimization:\nReviewing\nand\nevaluating\nthe\neffectiveness\nof\nthe\ncampaigns would be a continuous process with the help of analytics tools offered by\nGrowth Channel.', "These tools will allow Growth Channel to optimize campaigns and\nmake adjustments they see fit to Thrive Market's audience which will increase ROI.", 'Finally, I am confident that my sales experience, combined with my knowledge of digital\nmarketing makes me a great fit for the SDR opportunity at Growth Channel.', "I will bring\nan enthusiastic and positive energy to the team, and I am looking forward to possibly\ncontributing to Growth Channel's continued success.", 'Thank you for considering my application.', 'I look forward to learning more about Growth\nChannel during this ongoing process.', 'Best Regards,\nRicky West\nrw12west@gmail.com\n262-665-7816']
410
+ {'Ricky West\nrw12west@gmail.com\n262-665-7816\n3 June 2024\nMaryna Burushkina\nGrowth Channel\n305 East Huntland Drive\nAustin, TX 78752\nDear Maryna,\nAdvertising has always been an interest of mine because of how it exists in our\neveryday lives.': -0.4892860322252093, 'Knowing how to advertise is such an important skill to have, and you will\nalmost always come across it in some way no matter what role you are in.': -0.19567786339047316, 'To be with a\ngreat company such as Growth Channel would be a great advantage.': -0.021392659362429345, 'Being passionate\nabout sales and having gained extensive experience in the field, I possess the\nenthusiasm to personally contribute to the realization of Growth Channel’s vision.': -0.5170004958369422, "I plan\nto optimize Growth Channel's vision by achieving higher conversion rates with future\nprospects by fully educating and creating awareness among consumers of the value\nGrowth Channel can bring to their company.": -0.05491668142680101, 'At SoftwareONE, I have been a top three seller of twenty-five SDRs for the last two\nfiscal quarters, and I have never been out of the top ten.': 0.012097714481722944, 'This can be attributed to my\nability to understand client needs and identify these needs to offer solutions that will\nimprove their yields.': 0.37451399354344495, 'Moreover, during my current and past sales/advertising experience,\nI have worked on several campaigns.': -0.04695787195830139, 'I had to use data to ensure that the advertisement\nteam and I achieved a campaign that fits the client’s goals.': -0.12585292717762425} bc
411
+ {'For example, when I was in\nreal estate, I would work with my advertising team to target a certain audience for the\nluxury real estate properties that I was posting for my broker.': -0.1608428972249821, 'Although I have never\nofficially held an advertising title, much of my experience in the workforce has allowed\nme to become experienced and more knowledgeable than most in the advertising\nsector.': -0.1676857138786792, 'To continue, If I were to choose a company that Growth Channel should expand its\nnetwork on, it would be Thrive Market.': -0.030235768266264267, 'Thrive Market is an already successful company\nthat could really expand on its popularity especially because of its business model\nwhich sells sustainable groceries.': -0.37725885761075584, 'Being aware of the environmental friendliness and\nsustainability of the products it creates, Thrive Market is bound to increase its market\nshare and make a bigger, positive impact.': -0.16059165851828455, 'To create the campaign representing Thrive Market, I would use a targeting tool for ad\nnetworks, to aim at the audience interested in the protection of the environment.': -0.2580689230746736, 'The\ncampaign would include:\nAudience Segmentation: Using the technologies that Growth Channel possesses by\ntargeting an audience who has interest in eco-friendly institutions, environmentally\nfriendly goods and services, and other related options.': 0.010524143055479326, 'I would also aim it towards an\naudience who is health conscious as Thrive Market mainly sells whole, organic foods.': -0.22973100808013053, 'Multi-Channel Approach: Social media and search engine networks provide options to\nbuy ad space where consumers can be targeted effectively.': -0.004408479538479063, 'There are many unique ad\noptions in the social media networks for all types of advertisements.': -0.11047277720491727, 'Social media ads\nalso allow for very creative approaches.': -0.0023210321339796106} bc
412
+ {'Performance\nOptimization:\nReviewing\nand\nevaluating\nthe\neffectiveness\nof\nthe\ncampaigns would be a continuous process with the help of analytics tools offered by\nGrowth Channel.': 0.003683657918039559, "These tools will allow Growth Channel to optimize campaigns and\nmake adjustments they see fit to Thrive Market's audience which will increase ROI.": 0.07425998772503634, 'Finally, I am confident that my sales experience, combined with my knowledge of digital\nmarketing makes me a great fit for the SDR opportunity at Growth Channel.': 0.2235278874397872, "I will bring\nan enthusiastic and positive energy to the team, and I am looking forward to possibly\ncontributing to Growth Channel's continued success.": -0.09174872553632324, 'Thank you for considering my application.': 0.03028501558908839, 'I look forward to learning more about Growth\nChannel during this ongoing process.': 0.06096195592591846, 'Best Regards,\nRicky West\nrw12west@gmail.com\n262-665-7816': 0.07553230323968681}
413
+ Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
414
+ /home/aliasgarov/copyright_checker/predictors.py:212: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
415
+ probas = F.softmax(tensor_logits).detach().cpu().numpy()
416
+ bc
417
+ <span style="background-color: rgb(14, 255, 14); color: black;">Ricky West
418
+ rw12west@gmail.com
419
+ 262-665-7816
420
+ 3 June 2024
421
+ Maryna Burushkina
422
+ Growth Channel
423
+ 305 East Huntland Drive
424
+ Austin, TX 78752
425
+ Dear Maryna,
426
+ Advertising has always been an interest of mine because of how it exists in our
427
+ everyday lives.</span> <span style="background-color: rgb(164, 255, 164); color: black;">Knowing how to advertise is such an important skill to have, and you will
428
+ almost always come across it in some way no matter what role you are in.</span> <span style="background-color: rgb(254, 255, 254); color: black;">To be with a
429
+ great company such as Growth Channel would be a great advantage.</span> <span style="background-color: rgb(0, 255, 0); color: black;">Being passionate
430
+ about sales and having gained extensive experience in the field, I possess the
431
+ enthusiasm to personally contribute to the realization of Growth Channel’s vision.</span> <span style="background-color: rgb(237, 255, 237); color: black;">I plan
432
+ to optimize Growth Channel's vision by achieving higher conversion rates with future
433
+ prospects by fully educating and creating awareness among consumers of the value
434
+ Growth Channel can bring to their company.</span> <span style="background-color: rgb(255, 254, 254); color: black;">At SoftwareONE, I have been a top three seller of twenty-five SDRs for the last two
435
+ fiscal quarters, and I have never been out of the top ten.</span> <span style="background-color: rgb(255, 0, 0); color: black;">This can be attributed to my
436
+ ability to understand client needs and identify these needs to offer solutions that will
437
+ improve their yields.</span> <span style="background-color: rgb(241, 255, 241); color: black;">Moreover, during my current and past sales/advertising experience,
438
+ I have worked on several campaigns.</span> <span style="background-color: rgb(200, 255, 200); color: black;">I had to use data to ensure that the advertisement
439
+ team and I achieved a campaign that fits the client’s goals.</span> <span style="background-color: rgb(146, 255, 146); color: black;">For example, when I was in
440
+ real estate, I would work with my advertising team to target a certain audience for the
441
+ luxury real estate properties that I was posting for my broker.</span> <span style="background-color: rgb(142, 255, 142); color: black;">Although I have never
442
+ officially held an advertising title, much of my experience in the workforce has allowed
443
+ me to become experienced and more knowledgeable than most in the advertising
444
+ sector.</span> <span style="background-color: rgb(235, 255, 235); color: black;">To continue, If I were to choose a company that Growth Channel should expand its
445
+ network on, it would be Thrive Market.</span> <span style="background-color: rgb(0, 255, 0); color: black;">Thrive Market is an already successful company
446
+ that could really expand on its popularity especially because of its business model
447
+ which sells sustainable groceries.</span> <span style="background-color: rgb(146, 255, 146); color: black;">Being aware of the environmental friendliness and
448
+ sustainability of the products it creates, Thrive Market is bound to increase its market
449
+ share and make a bigger, positive impact.</span> <span style="background-color: rgb(80, 255, 80); color: black;">To create the campaign representing Thrive Market, I would use a targeting tool for ad
450
+ networks, to aim at the audience interested in the protection of the environment.</span> <span style="background-color: rgb(255, 0, 0); color: black;">The
451
+ campaign would include:
452
+ Audience Segmentation: Using the technologies that Growth Channel possesses by
453
+ targeting an audience who has interest in eco-friendly institutions, environmentally
454
+ friendly goods and services, and other related options.</span> <span style="background-color: rgb(100, 255, 100); color: black;">I would also aim it towards an
455
+ audience who is health conscious as Thrive Market mainly sells whole, organic foods.</span> <span style="background-color: rgb(252, 255, 252); color: black;">Multi-Channel Approach: Social media and search engine networks provide options to
456
+ buy ad space where consumers can be targeted effectively.</span> <span style="background-color: rgb(180, 255, 180); color: black;">There are many unique ad
457
+ options in the social media networks for all types of advertisements.</span> <span style="background-color: rgb(254, 255, 254); color: black;">Social media ads
458
+ also allow for very creative approaches.</span> <span style="background-color: rgb(255, 253, 253); color: black;">Performance
459
+ Optimization:
460
+ Reviewing
461
+ and
462
+ evaluating
463
+ the
464
+ effectiveness
465
+ of
466
+ the
467
+ campaigns would be a continuous process with the help of analytics tools offered by
468
+ Growth Channel.</span> <span style="background-color: rgb(255, 172, 172); color: black;">These tools will allow Growth Channel to optimize campaigns and
469
+ make adjustments they see fit to Thrive Market's audience which will increase ROI.</span> <span style="background-color: rgb(255, 0, 0); color: black;">Finally, I am confident that my sales experience, combined with my knowledge of digital
470
+ marketing makes me a great fit for the SDR opportunity at Growth Channel.</span> <span style="background-color: rgb(0, 255, 0); color: black;">I will bring
471
+ an enthusiastic and positive energy to the team, and I am looking forward to possibly
472
+ contributing to Growth Channel's continued success.</span> <span style="background-color: rgb(255, 223, 223); color: black;">Thank you for considering my application.</span> <span style="background-color: rgb(255, 187, 187); color: black;">I look forward to learning more about Growth
473
+ Channel during this ongoing process.</span> <span style="background-color: rgb(255, 170, 170); color: black;">Best Regards,
474
+ Ricky West
475
+ rw12west@gmail.com
476
+ 262-665-7816</span>
477
+ Original BC scores: AI: 0.34673771262168884, HUMAN: 0.6532623171806335
478
+ Calibration BC scores: AI: 0.40939597315436244, HUMAN: 0.5906040268456376
479
+ Input Text: Performance Optimization: Reviewing and evaluating the effectiveness of the campaigns would be a continuous process with the help of analytics tools offered by Growth Channel. These tools will allow Growth Channel to optimize campaigns and make adjustments they see fit to Thrive Market's audience which will increase ROI. Finally, I am confident that my sales experience, combined with my knowledge of digital marketing makes me a great fit for the SDR opportunity at Growth Channel. I will bring an enthusiastic and positive energy to the team, and I am looking forward to possibly contributing to Growth Channel's continued success. Thank you for considering my application. I look forward to learning more about Growth Channel during this ongoing process. Best Regards, Ricky West rw12westgmail. com 262-665-7816
480
+ Original BC scores: AI: 0.34673771262168884, HUMAN: 0.6532623171806335
481
+ Calibration BC scores: AI: 0.40939597315436244, HUMAN: 0.5906040268456376
482
+ MC Score: {'OPENAI GPT': 0.3178691988023335, 'MISTRAL': 2.1444096669889683e-09, 'CLAUDE': 1.1681333364700646e-06, 'GEMINI': 0.07488741660678146, 'GRAMMAR ENHANCER': 0.016638195604685966}
483
+ {'Ricky West\nrw12west@gmail.com\n262-665-7816\n3 June 2024\nMaryna Burushkina\nGrowth Channel\n305 East Huntland Drive\nAustin, TX 78752\nDear Maryna,\nAdvertising has always been an interest of mine because of how it exists in our\neveryday lives.': -0.5306495551721879, 'Knowing how to advertise is such an important skill to have, and you will\nalmost always come across it in some way no matter what role you are in.': -0.19667031727765713, 'To be with a\ngreat company such as Growth Channel would be a great advantage.': -0.041189784573334345, 'Being passionate\nabout sales and having gained extensive experience in the field, I possess the\nenthusiasm to personally contribute to the realization of Growth Channel’s vision.': -0.24918810706161526, "I plan\nto optimize Growth Channel's vision by achieving higher conversion rates with future\nprospects by fully educating and creating awareness among consumers of the value\nGrowth Channel can bring to their company.": -0.06580943427496835, 'At SoftwareONE, I have been a top three seller of twenty-five SDRs for the last two\nfiscal quarters, and I have never been out of the top ten.': -0.13508458234735787, 'This can be attributed to my\nability to understand client needs and identify these needs to offer solutions that will\nimprove their yields.': 0.27866133085282396, 'Moreover, during my current and past sales/advertising experience,\nI have worked on several campaigns.': 0.017630278801475125, 'I had to use data to ensure that the advertisement\nteam and I achieved a campaign that fits the client’s goals.': -0.045258109662774965}
484
+ probas = F.softmax(tensor_logits).detach().cpu().numpy()
485
+ /home/aliasgarov/copyright_checker/predictors.py:212: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
486
+ probas = F.softmax(tensor_logits).detach().cpu().numpy()
487
+ bc
488
+ {'For example, when I was in\nreal estate, I would work with my advertising team to target a certain audience for the\nluxury real estate properties that I was posting for my broker.': -0.11700274179295864, 'Although I have never\nofficially held an advertising title, much of my experience in the workforce has allowed\nme to become experienced and more knowledgeable than most in the advertising\nsector.': -0.15774178293541702, 'To continue, If I were to choose a company that Growth Channel should expand its\nnetwork on, it would be Thrive Market.': -0.09720898432998293, 'Thrive Market is an already successful company\nthat could really expand on its popularity especially because of its business model\nwhich sells sustainable groceries.': -0.15585376721747624, 'Being aware of the environmental friendliness and\nsustainability of the products it creates, Thrive Market is bound to increase its market\nshare and make a bigger, positive impact.': -0.18900911119385885, 'To create the campaign representing Thrive Market, I would use a targeting tool for ad\nnetworks, to aim at the audience interested in the protection of the environment.': -0.23145917771310473, 'The\ncampaign would include:\nAudience Segmentation: Using the technologies that Growth Channel possesses by\ntargeting an audience who has interest in eco-friendly institutions, environmentally\nfriendly goods and services, and other related options.': -0.050303103673449596, 'I would also aim it towards an\naudience who is health conscious as Thrive Market mainly sells whole, organic foods.': -0.330775313595558, 'Multi-Channel Approach: Social media and search engine networks provide options to\nbuy ad space where consumers can be targeted effectively.': -0.14748824344664127, 'There are many unique ad\noptions in the social media networks for all types of advertisements.': -0.0924204993475862, 'Social media ads\nalso allow for very creative approaches.': -0.03324840268599188} bc
489
+ {'Performance\nOptimization:\nReviewing\nand\nevaluating\nthe\neffectiveness\nof\nthe\ncampaigns would be a continuous process with the help of analytics tools offered by\nGrowth Channel.': 0.1788831345248405, "These tools will allow Growth Channel to optimize campaigns and\nmake adjustments they see fit to Thrive Market's audience which will increase ROI.": -0.003715107276243355, 'Finally, I am confident that my sales experience, combined with my knowledge of digital\nmarketing makes me a great fit for the SDR opportunity at Growth Channel.': 0.24272772732024744, "I will bring\nan enthusiastic and positive energy to the team, and I am looking forward to possibly\ncontributing to Growth Channel's continued success.": -0.11980814206086883, 'Thank you for considering my application.': 0.05269689352576316, 'I look forward to learning more about Growth\nChannel during this ongoing process.': 0.03812065293420048, 'Best Regards,\nRicky West\nrw12west@gmail.com\n262-665-7816': 0.012098829368504904} bc
490
+ <span style="background-color: rgb(0, 255, 0); color: black;">Ricky West
491
+ rw12west@gmail.com
492
+ 262-665-7816
493
+ 3 June 2024
494
+ Maryna Burushkina
495
+ Growth Channel
496
+ 305 East Huntland Drive
497
+ Austin, TX 78752
498
+ Dear Maryna,
499
+ Advertising has always been an interest of mine because of how it exists in our
500
+ everyday lives.</span> <span style="background-color: rgb(173, 255, 173); color: black;">Knowing how to advertise is such an important skill to have, and you will
501
+ almost always come across it in some way no matter what role you are in.</span> <span style="background-color: rgb(254, 255, 254); color: black;">To be with a
502
+ great company such as Growth Channel would be a great advantage.</span> <span style="background-color: rgb(146, 255, 146); color: black;">Being passionate
503
+ about sales and having gained extensive experience in the field, I possess the
504
+ enthusiasm to personally contribute to the realization of Growth Channel’s vision.</span> <span style="background-color: rgb(241, 255, 241); color: black;">I plan
505
+ to optimize Growth Channel's vision by achieving higher conversion rates with future
506
+ prospects by fully educating and creating awareness among consumers of the value
507
+ Growth Channel can bring to their company.</span> <span style="background-color: rgb(205, 255, 205); color: black;">At SoftwareONE, I have been a top three seller of twenty-five SDRs for the last two
508
+ fiscal quarters, and I have never been out of the top ten.</span> <span style="background-color: rgb(255, 0, 0); color: black;">This can be attributed to my
509
+ ability to understand client needs and identify these needs to offer solutions that will
510
+ improve their yields.</span> <span style="background-color: rgb(255, 254, 254); color: black;">Moreover, during my current and past sales/advertising experience,
511
+ I have worked on several campaigns.</span> <span style="background-color: rgb(252, 255, 252); color: black;">I had to use data to ensure that the advertisement
512
+ team and I achieved a campaign that fits the client’s goals.</span> <span style="background-color: rgb(182, 255, 182); color: black;">For example, when I was in
513
+ real estate, I would work with my advertising team to target a certain audience for the
514
+ luxury real estate properties that I was posting for my broker.</span> <span style="background-color: rgb(147, 255, 147); color: black;">Although I have never
515
+ officially held an advertising title, much of my experience in the workforce has allowed
516
+ me to become experienced and more knowledgeable than most in the advertising
517
+ sector.</span> <span style="background-color: rgb(199, 255, 199); color: black;">To continue, If I were to choose a company that Growth Channel should expand its
518
+ network on, it would be Thrive Market.</span> <span style="background-color: rgb(149, 255, 149); color: black;">Thrive Market is an already successful company
519
+ that could really expand on its popularity especially because of its business model
520
+ which sells sustainable groceries.</span> <span style="background-color: rgb(121, 255, 121); color: black;">Being aware of the environmental friendliness and
521
+ sustainability of the products it creates, Thrive Market is bound to increase its market
522
+ share and make a bigger, positive impact.</span> <span style="background-color: rgb(84, 255, 84); color: black;">To create the campaign representing Thrive Market, I would use a targeting tool for ad
523
+ networks, to aim at the audience interested in the protection of the environment.</span> <span style="background-color: rgb(239, 255, 239); color: black;">The
524
+ campaign would include:
525
+ Audience Segmentation: Using the technologies that Growth Channel possesses by
526
+ targeting an audience who has interest in eco-friendly institutions, environmentally
527
+ friendly goods and services, and other related options.</span> <span style="background-color: rgb(0, 255, 0); color: black;">I would also aim it towards an
528
+ audience who is health conscious as Thrive Market mainly sells whole, organic foods.</span> <span style="background-color: rgb(156, 255, 156); color: black;">Multi-Channel Approach: Social media and search engine networks provide options to
529
+ buy ad space where consumers can be targeted effectively.</span> <span style="background-color: rgb(203, 255, 203); color: black;">There are many unique ad
530
+ options in the social media networks for all types of advertisements.</span> <span style="background-color: rgb(254, 255, 254); color: black;">Social media ads
531
+ also allow for very creative approaches.</span> <span style="background-color: rgb(255, 70, 70); color: black;">Performance
532
+ Optimization:
533
+ Reviewing
534
+ and
535
+ evaluating
536
+ the
537
+ effectiveness
538
+ of
539
+ the
540
+ campaigns would be a continuous process with the help of analytics tools offered by
541
+ Growth Channel.</span> <span style="background-color: rgb(252, 255, 252); color: black;">These tools will allow Growth Channel to optimize campaigns and
542
+ make adjustments they see fit to Thrive Market's audience which will increase ROI.</span> <span style="background-color: rgb(255, 0, 0); color: black;">Finally, I am confident that my sales experience, combined with my knowledge of digital
543
+ marketing makes me a great fit for the SDR opportunity at Growth Channel.</span> <span style="background-color: rgb(0, 255, 0); color: black;">I will bring
544
+ an enthusiastic and positive energy to the team, and I am looking forward to possibly
545
+ contributing to Growth Channel's continued success.</span> <span style="background-color: rgb(255, 209, 209); color: black;">Thank you for considering my application.</span> <span style="background-color: rgb(255, 225, 225); color: black;">I look forward to learning more about Growth
546
+ Channel during this ongoing process.</span> <span style="background-color: rgb(255, 253, 253); color: black;">Best Regards,
547
+ Ricky West
548
+ rw12west@gmail.com
549
+ 262-665-7816</span> Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
550
+
551
+ Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
552
+ Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
553
+
554
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
555
+ To disable this warning, you can either:
556
+ - Avoid using `tokenizers` before the fork if possible
557
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
558
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
559
+ To disable this warning, you can either:
560
+ - Avoid using `tokenizers` before the fork if possible
561
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
562
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
563
+ To disable this warning, you can either:
564
+ - Avoid using `tokenizers` before the fork if possible
565
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
566
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
567
+ To disable this warning, you can either:
568
+ - Avoid using `tokenizers` before the fork if possible
569
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
570
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
571
+ To disable this warning, you can either:
572
+ - Avoid using `tokenizers` before the fork if possible
573
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
574
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
575
+ To disable this warning, you can either:
576
+ - Avoid using `tokenizers` before the fork if possible
577
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
578
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
579
+ To disable this warning, you can either:
580
+ - Avoid using `tokenizers` before the fork if possible
581
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
582
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
583
+ To disable this warning, you can either:
584
+ - Avoid using `tokenizers` before the fork if possible
585
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
586
+ WARNING: Invalid HTTP request received.
587
+ WARNING: Invalid HTTP request received.
predictors.py CHANGED
@@ -21,6 +21,7 @@ from optimum.pipelines import pipeline
21
 
22
  with open("config.yaml", "r") as file:
23
  params = yaml.safe_load(file)
 
24
  nltk.download("punkt")
25
  nltk.download("stopwords")
26
  device_needed = "cuda" if torch.cuda.is_available() else "cpu"
@@ -35,34 +36,22 @@ mc_token_size = int(params["MC_TOKEN_SIZE"])
35
  bc_token_size = int(params["BC_TOKEN_SIZE"])
36
  bias_checker_model_name = params['BIAS_CHECKER_MODEL_PATH']
37
  bias_corrector_model_name = params['BIAS_CORRECTOR_MODEL_PATH']
38
- text_bc_tokenizer = AutoTokenizer.from_pretrained(text_bc_model_path)
39
- text_bc_model = AutoModelForSequenceClassification.from_pretrained(
40
- text_bc_model_path
41
- ).to(device)
42
- text_mc_tokenizer = AutoTokenizer.from_pretrained(text_mc_model_path)
43
- text_mc_model = AutoModelForSequenceClassification.from_pretrained(
44
- text_mc_model_path
45
- ).to(device)
46
- quillbot_tokenizer = AutoTokenizer.from_pretrained(text_quillbot_model_path)
47
- quillbot_model = AutoModelForSequenceClassification.from_pretrained(
48
- text_quillbot_model_path
49
- ).to(device)
50
-
51
 
 
 
 
 
 
 
52
 
53
  # proxy models for explainability
54
  mini_bc_model_name = "polygraf-ai/bc-model"
55
- bc_tokenizer_mini = AutoTokenizer.from_pretrained(mini_bc_model_name)
56
- bc_model_mini = AutoModelForSequenceClassification.from_pretrained(
57
- mini_bc_model_name
58
- ).to(device_needed)
59
  mini_humanizer_model_name = "polygraf-ai/humanizer-model"
60
- humanizer_tokenizer_mini = AutoTokenizer.from_pretrained(
61
- mini_humanizer_model_name
62
- )
63
- humanizer_model_mini = AutoModelForSequenceClassification.from_pretrained(
64
- mini_humanizer_model_name
65
- ).to(device_needed)
66
 
67
  bc_model_mini = BetterTransformer.transform(bc_model_mini)
68
  humanizer_model_mini = BetterTransformer.transform(humanizer_model_mini)
 
21
 
22
  with open("config.yaml", "r") as file:
23
  params = yaml.safe_load(file)
24
+
25
  nltk.download("punkt")
26
  nltk.download("stopwords")
27
  device_needed = "cuda" if torch.cuda.is_available() else "cpu"
 
36
  bc_token_size = int(params["BC_TOKEN_SIZE"])
37
  bias_checker_model_name = params['BIAS_CHECKER_MODEL_PATH']
38
  bias_corrector_model_name = params['BIAS_CORRECTOR_MODEL_PATH']
39
+ access_token = params['HF_TOKEN']
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
+ text_bc_tokenizer = AutoTokenizer.from_pretrained(text_bc_model_path, token=access_token)
42
+ text_bc_model = AutoModelForSequenceClassification.from_pretrained(text_bc_model_path, token=access_token).to(device)
43
+ text_mc_tokenizer = AutoTokenizer.from_pretrained(text_mc_model_path, token=access_token)
44
+ text_mc_model = AutoModelForSequenceClassification.from_pretrained(text_mc_model_path, token=access_token).to(device)
45
+ quillbot_tokenizer = AutoTokenizer.from_pretrained(text_quillbot_model_path, token=access_token)
46
+ quillbot_model = AutoModelForSequenceClassification.from_pretrained(text_quillbot_model_path, token=access_token).to(device)
47
 
48
  # proxy models for explainability
49
  mini_bc_model_name = "polygraf-ai/bc-model"
50
+ bc_tokenizer_mini = AutoTokenizer.from_pretrained(mini_bc_model_name, token=access_token)
51
+ bc_model_mini = AutoModelForSequenceClassification.from_pretrained(mini_bc_model_name, token=access_token).to(device_needed)
 
 
52
  mini_humanizer_model_name = "polygraf-ai/humanizer-model"
53
+ humanizer_tokenizer_mini = AutoTokenizer.from_pretrained(mini_humanizer_model_name, token=access_token)
54
+ humanizer_model_mini = AutoModelForSequenceClassification.from_pretrained(mini_humanizer_model_name, token=access_token).to(device_needed)
 
 
 
 
55
 
56
  bc_model_mini = BetterTransformer.transform(bc_model_mini)
57
  humanizer_model_mini = BetterTransformer.transform(humanizer_model_mini)
utils.py CHANGED
@@ -6,13 +6,18 @@ from unidecode import unidecode
6
  from transformers import AutoTokenizer
7
  import yaml
8
  import fitz
 
 
9
 
 
 
 
 
10
 
11
  def remove_accents(input_str):
12
  text_no_accents = unidecode(input_str)
13
  return text_no_accents
14
 
15
-
16
  def remove_special_characters(text):
17
  text = re.sub(r'https?://\S+|www\.\S+', '', text)
18
  emoji_pattern = re.compile("["
@@ -50,7 +55,7 @@ with open("config.yaml", "r") as file:
50
  params = yaml.safe_load(file)
51
 
52
  text_bc_model_path = params["TEXT_BC_MODEL_PATH"]
53
- text_bc_tokenizer = AutoTokenizer.from_pretrained(text_bc_model_path)
54
 
55
 
56
  def len_validator(text):
 
6
  from transformers import AutoTokenizer
7
  import yaml
8
  import fitz
9
+ import requests
10
+ from bs4 import BeautifulSoup
11
 
12
+ with open("config.yaml", "r") as file:
13
+ params = yaml.safe_load(file)
14
+
15
+ access_token = params['HF_TOKEN']
16
 
17
  def remove_accents(input_str):
18
  text_no_accents = unidecode(input_str)
19
  return text_no_accents
20
 
 
21
  def remove_special_characters(text):
22
  text = re.sub(r'https?://\S+|www\.\S+', '', text)
23
  emoji_pattern = re.compile("["
 
55
  params = yaml.safe_load(file)
56
 
57
  text_bc_model_path = params["TEXT_BC_MODEL_PATH"]
58
+ text_bc_tokenizer = AutoTokenizer.from_pretrained(text_bc_model_path, token=access_token)
59
 
60
 
61
  def len_validator(text):