Spaces:

polygraf-ai
/

copyright_checker

Runtime error

App Files Files Community

aliasgerovs commited on Jun 5, 2024

Commit

173f4a0

1 Parent(s): 2019311

Updated audio.py

Browse files

Files changed (5) hide show

app.py +2 -2
audio.py +4 -1
nohup.out +587 -0
predictors.py +12 -23
utils.py +7 -2

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ import yaml
 from functools import partial
 from audio import assemblyai_transcribe
 import yt_dlp
 np.set_printoptions(suppress=True)
@@ -369,7 +369,7 @@ with gr.Blocks() as demo:
     depth_analysis_btn.click(
         fn=depth_analysis,
-        inputs=[bias_buster_selected, input_text],
         outputs=[writing_analysis_plot],
         api_name="depth_analysis",
     )

 from functools import partial
 from audio import assemblyai_transcribe
 import yt_dlp
+import os
 np.set_printoptions(suppress=True)
     depth_analysis_btn.click(
         fn=depth_analysis,
+        inputs=[input_text, bias_buster_selected],
         outputs=[writing_analysis_plot],
         api_name="depth_analysis",
     )

audio.py CHANGED Viewed

@@ -4,12 +4,15 @@ import time
 import yaml
 import yt_dlp
 import assemblyai as aai
 with open("config.yaml", "r") as file:
     params = yaml.safe_load(file)
 transcriber = aai.Transcriber()
-aai.settings.api_key = params["ASSEMBLY_AI_TOKEN"]
 def assemblyai_transcribe(audio_url):
     if audio_url is None:

 import yaml
 import yt_dlp
 import assemblyai as aai
+from dotenv import load_dotenv
+import os
+load_dotenv()
 with open("config.yaml", "r") as file:
     params = yaml.safe_load(file)
 transcriber = aai.Transcriber()
+aai.settings.api_key = os.environ['ASSEMBLYAI_API_KEY']
 def assemblyai_transcribe(audio_url):
     if audio_url is None:

nohup.out CHANGED Viewed

	@@ -0,0 +1,587 @@

+/usr/local/lib/python3.9/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+2024-06-05 13:33:46.838996: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
+To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
+[nltk_data] Downloading package punkt to /home/aliasgarov/nltk_data...
+[nltk_data]   Package punkt is already up-to-date!
+[nltk_data] Downloading package stopwords to
+[nltk_data]     /home/aliasgarov/nltk_data...
+[nltk_data]   Package stopwords is already up-to-date!
+/usr/local/lib/python3.9/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
+The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
+The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
+The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
+The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
+Some weights of the model checkpoint at textattack/roberta-base-CoLA were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
+- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
+- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
+The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
+Framework not specified. Using pt to export the model.
+Some weights of the model checkpoint at textattack/roberta-base-CoLA were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
+- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
+- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
+Using the export variant default. Available variants are:
+    - default: The default ONNX variant.
+***** Exporting submodel 1/1: RobertaForSequenceClassification *****
+Using framework PyTorch: 2.3.0+cu121
+Overriding 1 configuration item(s)
+	- use_cache -> False
+Framework not specified. Using pt to export the model.
+Using the export variant default. Available variants are:
+    - default: The default ONNX variant.
+Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+Non-default generation parameters: {'max_length': 512, 'min_length': 8, 'num_beams': 2, 'no_repeat_ngram_size': 4}
+/usr/local/lib/python3.9/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+***** Exporting submodel 1/3: T5Stack *****
+Using framework PyTorch: 2.3.0+cu121
+Overriding 1 configuration item(s)
+	- use_cache -> False
+***** Exporting submodel 2/3: T5ForConditionalGeneration *****
+Using framework PyTorch: 2.3.0+cu121
+Overriding 1 configuration item(s)
+	- use_cache -> True
+/usr/local/lib/python3.9/dist-packages/transformers/modeling_utils.py:1017: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
+  if causal_mask.shape[1] < attention_mask.shape[1]:
+***** Exporting submodel 3/3: T5ForConditionalGeneration *****
+Using framework PyTorch: 2.3.0+cu121
+Overriding 1 configuration item(s)
+	- use_cache -> True
+/usr/local/lib/python3.9/dist-packages/transformers/models/t5/modeling_t5.py:503: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
+  elif past_key_value.shape[2] != key_value_states.shape[1]:
+In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode
+In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode
+Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+Non-default generation parameters: {'max_length': 512, 'min_length': 8, 'num_beams': 2, 'no_repeat_ngram_size': 4}
+[nltk_data] Downloading package cmudict to
+[nltk_data]     /home/aliasgarov/nltk_data...
+[nltk_data]   Package cmudict is already up-to-date!
+[nltk_data] Downloading package punkt to /home/aliasgarov/nltk_data...
+[nltk_data]   Package punkt is already up-to-date!
+[nltk_data] Downloading package stopwords to
+[nltk_data]     /home/aliasgarov/nltk_data...
+[nltk_data]   Package stopwords is already up-to-date!
+[nltk_data] Downloading package wordnet to
+[nltk_data]     /home/aliasgarov/nltk_data...
+[nltk_data]   Package wordnet is already up-to-date!
+WARNING: The directory '/home/aliasgarov/.cache/pip' or its parent directory is not owned or is not writable by the current user. The cache has been disabled. Check the permissions and owner of that directory. If executing pip with sudo, you may want sudo's -H flag.
+Collecting en-core-web-sm==3.7.1
+  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
+Requirement already satisfied: spacy<3.8.0,>=3.7.2 in /usr/local/lib/python3.9/dist-packages (from en-core-web-sm==3.7.1) (3.7.2)
+Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (24.0)
+Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.1.2)
+Requirement already satisfied: numpy>=1.19.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.26.4)
+Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (52.0.0)
+Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.4.8)
+Requirement already satisfied: smart-open<7.0.0,>=5.2.1 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (6.4.0)
+Requirement already satisfied: weasel<0.4.0,>=0.1.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.3.4)
+Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.0.10)
+Requirement already satisfied: pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.7.1)
+Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.0.12)
+Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.0.10)
+Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.0.8)
+Requirement already satisfied: thinc<8.3.0,>=8.1.8 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (8.2.3)
+Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.32.3)
+Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.0.5)
+Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.0.9)
+Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.4.0)
+Requirement already satisfied: jinja2 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.1.4)
+Requirement already satisfied: typer<0.10.0,>=0.3.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.9.4)
+Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (4.66.4)
+Requirement already satisfied: language-data>=1.2 in /usr/local/lib/python3.9/dist-packages (from langcodes<4.0.0,>=3.2.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.2.0)
+Requirement already satisfied: marisa-trie>=0.7.7 in /usr/local/lib/python3.9/dist-packages (from language-data>=1.2->langcodes<4.0.0,>=3.2.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.1.1)
+Requirement already satisfied: typing-extensions>=4.6.1 in /usr/local/lib/python3.9/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (4.11.0)
+Requirement already satisfied: pydantic-core==2.18.2 in /usr/local/lib/python3.9/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.18.2)
+Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.9/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.6.0)
+Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.10)
+Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.2.1)
+Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.9/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.3.2)
+Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2020.6.20)
+Requirement already satisfied: confection<1.0.0,>=0.0.1 in /usr/local/lib/python3.9/dist-packages (from thinc<8.3.0,>=8.1.8->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.1.4)
+Requirement already satisfied: blis<0.8.0,>=0.7.8 in /usr/local/lib/python3.9/dist-packages (from thinc<8.3.0,>=8.1.8->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.7.11)
+Requirement already satisfied: click<9.0.0,>=7.1.1 in /usr/local/lib/python3.9/dist-packages (from typer<0.10.0,>=0.3.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (8.1.7)
+Requirement already satisfied: cloudpathlib<0.17.0,>=0.7.0 in /usr/local/lib/python3.9/dist-packages (from weasel<0.4.0,>=0.1.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.16.0)
+Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.9/dist-packages (from jinja2->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.1.5)
+[38;5;2m✔ Download and installation successful[0m
+You can now load the package via spacy.load('en_core_web_sm')
+/usr/local/lib/python3.9/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+[youtube] Extracting URL: https://www.youtube.com/watch?v=rXGqKJoQ4qM
+IMPORTANT: You are using gradio version 4.26.0, however version 4.29.0 is available, please upgrade.
+--------
+Running on local URL:  http://0.0.0.0:80
+Running on public URL: https://881ad0461434819142.gradio.live
+This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)
+[youtube] rXGqKJoQ4qM: Downloading webpage
+[youtube] rXGqKJoQ4qM: Downloading ios player API JSON
+[youtube] rXGqKJoQ4qM: Downloading m3u8 information
+/usr/local/lib/python3.9/dist-packages/optimum/bettertransformer/models/encoder_models.py:301: UserWarning: The PyTorch API of nested tensors is in prototype stage and will change in the near future. (Triggered internally at ../aten/src/ATen/NestedTensorImpl.cpp:178.)
+  hidden_states = torch._nested_tensor_from_mask(hidden_states, ~attention_mask)
+[generic] Extracting URL:
+Original BC scores: AI: 6.379470141837373e-05, HUMAN: 0.9999362230300903
+Calibration BC scores: AI: 0.02666666666666667, HUMAN: 0.9733333333333334
+Input Text: You've asked about machine learning, and we have a watermelon here. You know, you used to go to the store, pick up a watermelon. Maybe your family told you, you push on the end to see if it's soft, and that means it's a good watermelon or if it smells a certain way. That's how you tell if it's a good watermelon. Well, with machine learning, you don't do any of that. You basically try to determine all of the attributes about this watermelon that you can, and you take those attributes and you feed them into a baby machine model that knows nothing, how fat the stripes are, how thin they are, and you feed all these attributes into that model. You go home, you eat the watermelon, come back in the next day, and you tell that model that was a good watermelon, and it remembers all of those attributes and the fact that it was good. And you're going to do that every day for the next ten years. After ten years, that model is going to be able to tell you based on attributes that you give it. If the watermelon you picked up is good or bad, and you may not know why that model is telling you it's good or bad, but you can trust that it has done enough analysis, and it can tell you a percentage, a surety of whether it's good or bad, that when you pick up a watermelon, give it the attributes. If it says it's good, you can take it home and it will be good.
+Original BC scores: AI: 6.379470141837373e-05, HUMAN: 0.9999362230300903
+Calibration BC scores: AI: 0.02666666666666667, HUMAN: 0.9733333333333334
+MC Score: {'OPENAI GPT': 2.165152131657536e-12, 'MISTRAL': 5.77177379964173e-13, 'CLAUDE': 9.21127433587778e-13, 'GEMINI': 1.2182041486674655e-12, 'GRAMMAR ENHANCER': 0.026666666666666616}
+ERROR: [generic] '' is not a valid URL. Set --default-search "ytsearch" (or run  yt-dlp "ytsearch:" ) to search YouTube
+Traceback (most recent call last):
+  File "/usr/local/lib/python3.9/dist-packages/yt_dlp/YoutubeDL.py", line 1606, in wrapper
+    return func(self, *args, **kwargs)
+  File "/usr/local/lib/python3.9/dist-packages/yt_dlp/YoutubeDL.py", line 1741, in __extract_info
+    ie_result = ie.extract(url)
+  File "/usr/local/lib/python3.9/dist-packages/yt_dlp/extractor/common.py", line 734, in extract
+    ie_result = self._real_extract(url)
+  File "/usr/local/lib/python3.9/dist-packages/yt_dlp/extractor/generic.py", line 2349, in _real_extract
+    raise ExtractorError(
+yt_dlp.utils.ExtractorError: [generic] '' is not a valid URL. Set --default-search "ytsearch" (or run  yt-dlp "ytsearch:" ) to search YouTube
+During handling of the above exception, another exception occurred:
+Traceback (most recent call last):
+  File "/usr/local/lib/python3.9/dist-packages/gradio/queueing.py", line 527, in process_events
+    response = await route_utils.call_process_api(
+  File "/usr/local/lib/python3.9/dist-packages/gradio/route_utils.py", line 261, in call_process_api
+    output = await app.get_blocks().process_api(
+  File "/usr/local/lib/python3.9/dist-packages/gradio/blocks.py", line 1786, in process_api
+    result = await self.call_function(
+  File "/usr/local/lib/python3.9/dist-packages/gradio/blocks.py", line 1338, in call_function
+    prediction = await anyio.to_thread.run_sync(
+  File "/usr/local/lib/python3.9/dist-packages/anyio/to_thread.py", line 56, in run_sync
+    return await get_async_backend().run_sync_in_worker_thread(
+  File "/usr/local/lib/python3.9/dist-packages/anyio/_backends/_asyncio.py", line 2144, in run_sync_in_worker_thread
+    return await future
+  File "/usr/local/lib/python3.9/dist-packages/anyio/_backends/_asyncio.py", line 851, in run
+    result = context.run(func, *args)
+  File "/usr/local/lib/python3.9/dist-packages/gradio/utils.py", line 759, in wrapper
+    response = f(*args, **kwargs)
+  File "/home/aliasgarov/copyright_checker/audio.py", line 21, in assemblyai_transcribe
+    info = ydl.extract_info(audio_url, download=False)
+  File "/usr/local/lib/python3.9/dist-packages/yt_dlp/YoutubeDL.py", line 1595, in extract_info
+    return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
+  File "/usr/local/lib/python3.9/dist-packages/yt_dlp/YoutubeDL.py", line 1624, in wrapper
+    self.report_error(str(e), e.format_traceback())
+  File "/usr/local/lib/python3.9/dist-packages/yt_dlp/YoutubeDL.py", line 1073, in report_error
+    self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
+  File "/usr/local/lib/python3.9/dist-packages/yt_dlp/YoutubeDL.py", line 1012, in trouble
+    raise DownloadError(message, exc_info)
+yt_dlp.utils.DownloadError: ERROR: [generic] '' is not a valid URL. Set --default-search "ytsearch" (or run  yt-dlp "ytsearch:" ) to search YouTube
+[youtube] Extracting URL: https://www.youtube.com/watch?v=zhWDdy_5v2w
+[youtube] zhWDdy_5v2w: Downloading webpage
+[youtube] zhWDdy_5v2w: Downloading ios player API JSON
+[youtube] zhWDdy_5v2w: Downloading m3u8 information
+Original BC scores: AI: 0.0008556331158615649, HUMAN: 0.9991443157196045
+Calibration BC scores: AI: 0.08333333333333333, HUMAN: 0.9166666666666666
+Input Text: In a single minute, your body produces 120 to 180 million red blood cells. People ask Google 2. 4 million questions and 25 million Coca Cola products are consumed. Many of those bottles will end up in a landfill where the World bank estimates we produce 5 million pounds of garbage. 108 human lives will be lost in this minute, and an adult male will lose 96 million cells. Fortunately, 96 million cells divide, replacing those lost. Speaking of divisions, in the USA, 1. 5 people get divorced, while worldwide 116 people will get married, 83, 300 people have sex, but only 258 babies will be born. And a fetus is developing neurons at a rate of 250, 000 /minute so it's no wonder that a computer simulator simulation takes 60 quadrillion bytes to simulate a minute. An average of 1. 38 rain fall around the world, which is 4. 7 billion bathtubs of water every minute. And with the storms comes approximately 6000 bolts of cloud to ground lightning hitting the earth. A 150 pound person expends 1. 1 calories of energy per minute while sleeping. While the sun provides us with 83. 33 terawatts of energy. The earth will complete 1800 its 940 million around the sun, moving 1034 times faster than a cheetah. 70, 000 hours of Netflix are watched, 300 hours are uploaded to YouTube and you can watch this video and subscribe.
+Original BC scores: AI: 0.0008556331158615649, HUMAN: 0.9991443157196045
+Calibration BC scores: AI: 0.08333333333333333, HUMAN: 0.9166666666666666
+MC Score: {'OPENAI GPT': 0.041650297741095244, 'MISTRAL': 2.1457372915515795e-10, 'CLAUDE': 2.8301516389698626e-08, 'GEMINI': 5.853652282894475e-07, 'GRAMMAR ENHANCER': 0.041682422161102316}
+Original BC scores: AI: 0.0008556331158615649, HUMAN: 0.9991443157196045
+Calibration BC scores: AI: 0.08333333333333333, HUMAN: 0.9166666666666666
+Input Text: In a single minute, your body produces 120 to 180 million red blood cells. People ask Google 2. 4 million questions and 25 million Coca Cola products are consumed. Many of those bottles will end up in a landfill where the World bank estimates we produce 5 million pounds of garbage. 108 human lives will be lost in this minute, and an adult male will lose 96 million cells. Fortunately, 96 million cells divide, replacing those lost. Speaking of divisions, in the USA, 1. 5 people get divorced, while worldwide 116 people will get married, 83, 300 people have sex, but only 258 babies will be born. And a fetus is developing neurons at a rate of 250, 000 /minute so it's no wonder that a computer simulator simulation takes 60 quadrillion bytes to simulate a minute. An average of 1. 38 rain fall around the world, which is 4. 7 billion bathtubs of water every minute. And with the storms comes approximately 6000 bolts of cloud to ground lightning hitting the earth. A 150 pound person expends 1. 1 calories of energy per minute while sleeping. While the sun provides us with 83. 33 terawatts of energy. The earth will complete 1800 its 940 million around the sun, moving 1034 times faster than a cheetah. 70, 000 hours of Netflix are watched, 300 hours are uploaded to YouTube and you can watch this video and subscribe.
+Original BC scores: AI: 0.0008556331158615649, HUMAN: 0.9991443157196045
+Calibration BC scores: AI: 0.08333333333333333, HUMAN: 0.9166666666666666
+MC Score: {'OPENAI GPT': 0.041650297741095244, 'MISTRAL': 2.1457372915515795e-10, 'CLAUDE': 2.8301516389698626e-08, 'GEMINI': 5.853652282894475e-07, 'GRAMMAR ENHANCER': 0.041682422161102316}
+Original BC scores: AI: 0.0007931223954074085, HUMAN: 0.9992069602012634
+Calibration BC scores: AI: 0.08333333333333333, HUMAN: 0.9166666666666666
+Input Text: The double sovereign is a gold coin of the United Kingdom with a nominal value of two pounds sterling (2). It features the reigning monarch on its obverse and, most often, Benedetto Pistrucci's depiction of Saint George and the Dragon on the reverse (pictured). It was rarely issued in the first century and a half after its debut in 1820, usually in a new monarch's coronation year or to mark the institution of a new coinage portrait of the monarch. In addition to the usual coinage in Britain, specimens were struck at Australia's Sydney Mint in 1887 and 1902. Most often struck as a proof coin, the double sovereign has been issued for circulation in only four years, and few examples worn from commercial use are known. It is now a collector and bullion coin, and has been struck by the Royal Mint most years since 1980. In some years, it has not been issued and the Royal
+['The double sovereign is a gold coin of the United Kingdom with a nominal value of two pounds sterling (£2).', "It features the reigning monarch on its obverse and, most often, Benedetto Pistrucci's depiction of Saint George and the Dragon on the reverse (pictured).", "It was rarely issued in the first century and a half after its debut in 1820, usually in a new monarch's coronation year or to mark the institution of a new coinage portrait of the monarch.", "In addition to the usual coinage in Britain, specimens were struck at Australia's Sydney Mint in 1887 and 1902.", 'Most often struck as a proof coin, the double sovereign has been issued for circulation in only four years, and few examples worn from commercial use are known.', 'It is now a collector and bullion coin, and has been struck by the Royal Mint most years since 1980.', 'In some years, it has not been issued and the Royal']
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+PLAGIARISM PROCESSING TIME:  10.284763590898365
+Original BC scores: AI: 0.0007931223954074085, HUMAN: 0.9992069602012634
+Calibration BC scores: AI: 0.08333333333333333, HUMAN: 0.9166666666666666
+Input Text: The double sovereign is a gold coin of the United Kingdom with a nominal value of two pounds sterling (2). It features the reigning monarch on its obverse and, most often, Benedetto Pistrucci's depiction of Saint George and the Dragon on the reverse (pictured). It was rarely issued in the first century and a half after its debut in 1820, usually in a new monarch's coronation year or to mark the institution of a new coinage portrait of the monarch. In addition to the usual coinage in Britain, specimens were struck at Australia's Sydney Mint in 1887 and 1902. Most often struck as a proof coin, the double sovereign has been issued for circulation in only four years, and few examples worn from commercial use are known. It is now a collector and bullion coin, and has been struck by the Royal Mint most years since 1980. In some years, it has not been issued and the Royal
+Original BC scores: AI: 0.0007931223954074085, HUMAN: 0.9992069602012634
+Calibration BC scores: AI: 0.08333333333333333, HUMAN: 0.9166666666666666
+MC Score: {'OPENAI GPT': 3.014583190482276e-08, 'MISTRAL': 5.927566886406354e-12, 'CLAUDE': 8.79120894599813e-08, 'GEMINI': 0.08333175381024682, 'GRAMMAR ENHANCER': 1.4605501140370815e-06}
+/usr/local/lib/python3.9/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+2024-06-05 14:11:09.267769: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
+To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
+[nltk_data] Downloading package punkt to /home/aliasgarov/nltk_data...
+[nltk_data]   Package punkt is already up-to-date!
+[nltk_data] Downloading package stopwords to
+[nltk_data]     /home/aliasgarov/nltk_data...
+[nltk_data]   Package stopwords is already up-to-date!
+/usr/local/lib/python3.9/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
+The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
+The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
+The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
+The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
+Some weights of the model checkpoint at textattack/roberta-base-CoLA were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
+- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
+- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
+The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
+Framework not specified. Using pt to export the model.
+Some weights of the model checkpoint at textattack/roberta-base-CoLA were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
+- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
+- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
+Using the export variant default. Available variants are:
+    - default: The default ONNX variant.
+***** Exporting submodel 1/1: RobertaForSequenceClassification *****
+Using framework PyTorch: 2.3.0+cu121
+Overriding 1 configuration item(s)
+	- use_cache -> False
+Framework not specified. Using pt to export the model.
+Using the export variant default. Available variants are:
+    - default: The default ONNX variant.
+Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+Non-default generation parameters: {'max_length': 512, 'min_length': 8, 'num_beams': 2, 'no_repeat_ngram_size': 4}
+/usr/local/lib/python3.9/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+***** Exporting submodel 1/3: T5Stack *****
+Using framework PyTorch: 2.3.0+cu121
+Overriding 1 configuration item(s)
+	- use_cache -> False
+***** Exporting submodel 2/3: T5ForConditionalGeneration *****
+Using framework PyTorch: 2.3.0+cu121
+Overriding 1 configuration item(s)
+	- use_cache -> True
+/usr/local/lib/python3.9/dist-packages/transformers/modeling_utils.py:1017: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
+  if causal_mask.shape[1] < attention_mask.shape[1]:
+***** Exporting submodel 3/3: T5ForConditionalGeneration *****
+Using framework PyTorch: 2.3.0+cu121
+Overriding 1 configuration item(s)
+	- use_cache -> True
+/usr/local/lib/python3.9/dist-packages/transformers/models/t5/modeling_t5.py:503: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
+  elif past_key_value.shape[2] != key_value_states.shape[1]:
+In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode
+In-place op on output of tensor.shape. See https://pytorch.org/docs/master/onnx.html#avoid-inplace-operations-when-using-tensor-shape-in-tracing-mode
+Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.
+Non-default generation parameters: {'max_length': 512, 'min_length': 8, 'num_beams': 2, 'no_repeat_ngram_size': 4}
+[nltk_data] Downloading package cmudict to
+[nltk_data]     /home/aliasgarov/nltk_data...
+[nltk_data]   Package cmudict is already up-to-date!
+[nltk_data] Downloading package punkt to /home/aliasgarov/nltk_data...
+[nltk_data]   Package punkt is already up-to-date!
+[nltk_data] Downloading package stopwords to
+[nltk_data]     /home/aliasgarov/nltk_data...
+[nltk_data]   Package stopwords is already up-to-date!
+[nltk_data] Downloading package wordnet to
+[nltk_data]     /home/aliasgarov/nltk_data...
+[nltk_data]   Package wordnet is already up-to-date!
+WARNING: The directory '/home/aliasgarov/.cache/pip' or its parent directory is not owned or is not writable by the current user. The cache has been disabled. Check the permissions and owner of that directory. If executing pip with sudo, you may want sudo's -H flag.
+Collecting en-core-web-sm==3.7.1
+  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
+Requirement already satisfied: spacy<3.8.0,>=3.7.2 in /usr/local/lib/python3.9/dist-packages (from en-core-web-sm==3.7.1) (3.7.2)
+Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.4.8)
+Requirement already satisfied: smart-open<7.0.0,>=5.2.1 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (6.4.0)
+Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.32.3)
+Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (52.0.0)
+Requirement already satisfied: pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.7.1)
+Requirement already satisfied: typer<0.10.0,>=0.3.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.9.4)
+Requirement already satisfied: jinja2 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.1.4)
+Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.0.8)
+Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (4.66.4)
+Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (24.0)
+Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.0.5)
+Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.0.12)
+Requirement already satisfied: thinc<8.3.0,>=8.1.8 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (8.2.3)
+Requirement already satisfied: numpy>=1.19.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.26.4)
+Requirement already satisfied: weasel<0.4.0,>=0.1.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.3.4)
+Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.0.10)
+Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.0.9)
+Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.0.10)
+Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.1.2)
+Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /usr/local/lib/python3.9/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.4.0)
+Requirement already satisfied: language-data>=1.2 in /usr/local/lib/python3.9/dist-packages (from langcodes<4.0.0,>=3.2.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.2.0)
+Requirement already satisfied: marisa-trie>=0.7.7 in /usr/local/lib/python3.9/dist-packages (from language-data>=1.2->langcodes<4.0.0,>=3.2.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.1.1)
+Requirement already satisfied: typing-extensions>=4.6.1 in /usr/local/lib/python3.9/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (4.11.0)
+Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.9/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.6.0)
+Requirement already satisfied: pydantic-core==2.18.2 in /usr/local/lib/python3.9/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.18.2)
+Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2020.6.20)
+Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.10)
+Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.2.1)
+Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.9/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.3.2)
+Requirement already satisfied: blis<0.8.0,>=0.7.8 in /usr/local/lib/python3.9/dist-packages (from thinc<8.3.0,>=8.1.8->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.7.11)
+Requirement already satisfied: confection<1.0.0,>=0.0.1 in /usr/local/lib/python3.9/dist-packages (from thinc<8.3.0,>=8.1.8->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.1.4)
+Requirement already satisfied: click<9.0.0,>=7.1.1 in /usr/local/lib/python3.9/dist-packages (from typer<0.10.0,>=0.3.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (8.1.7)
+Requirement already satisfied: cloudpathlib<0.17.0,>=0.7.0 in /usr/local/lib/python3.9/dist-packages (from weasel<0.4.0,>=0.1.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.16.0)
+Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.9/dist-packages (from jinja2->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.1.5)
+[38;5;2m✔ Download and installation successful[0m
+You can now load the package via spacy.load('en_core_web_sm')
+/usr/local/lib/python3.9/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
+  warnings.warn(
+Token indices sequence length is longer than the specified maximum sequence length for this model (2138 > 512). Running this sequence through the model will result in indexing errors
+/usr/local/lib/python3.9/dist-packages/optimum/bettertransformer/models/encoder_models.py:301: UserWarning: The PyTorch API of nested tensors is in prototype stage and will change in the near future. (Triggered internally at ../aten/src/ATen/NestedTensorImpl.cpp:178.)
+  hidden_states = torch._nested_tensor_from_mask(hidden_states, ~attention_mask)
+[youtube] Extracting URL: https://www.youtube.com/watch?v=1aA1WGON49E
+IMPORTANT: You are using gradio version 4.26.0, however version 4.29.0 is available, please upgrade.
+--------
+Running on local URL:  http://0.0.0.0:80
+Running on public URL: https://9882bb485d656697af.gradio.live
+This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)
+Original BC scores: AI: 0.0009290315210819244, HUMAN: 0.9990710020065308
+Calibration BC scores: AI: 0.08333333333333333, HUMAN: 0.9166666666666666
+Input Text: Reece Rogers Google is set to start mixing ads into its new AI-generated search answers. Its a test of how the companys biggest revenue stream can adapt to the age of generative AI. Paresh Dave WIRED is where tomorrow is realized. It is the essential source of information and ideas that make sense of a world in constant transformation. The WIRED conversation illuminates how technology is changing every aspect of our livesfrom culture to business, science to design. The breakthroughs and innovations that we uncover lead to new ways of thinking, new connections, and new industries. More From WIRED Reviews and Guides 2024 Condé Nast. All rights reserved. WIRED may earn a portion of sales from products that are purchased through our site as part of our Affiliate Partnerships with retailers. The material on this site may not be reproduced, distributed, transmitted, cached or otherwise used, except with the prior written permission of Condé Nast. Select international site United States Large Chevron
+Original BC scores: AI: 0.0009290315210819244, HUMAN: 0.9990710020065308
+Calibration BC scores: AI: 0.08333333333333333, HUMAN: 0.9166666666666666
+MC Score: {'OPENAI GPT': 0.009792306770881021, 'MISTRAL': 3.4086881465592965e-10, 'CLAUDE': 4.831611022382278e-08, 'GEMINI': 0.027845658361911788, 'GRAMMAR ENHANCER': 0.04569531977176668}
+Original BC scores: AI: 0.0010414546122774482, HUMAN: 0.9989585876464844
+Calibration BC scores: AI: 0.08333333333333333, HUMAN: 0.9166666666666666
+Input Text: Reece Rogers Google is set to start mixing ads into its new AI-generated search answers. Its a test of how the companys biggest revenue stream can adapt to the age of generative AI. Paresh Dave WIRED is where tomorrow is realized. It is the essential source of information and ideas that make sense of a world in constant transformation. The WIRED conversation illuminates how technology is changing every aspect of our livesfrom culture to business, science to design. The breakthroughs and innovations that we uncover lead to new ways of thinking, new connections, and new industries. More From WIRED Reviews and Guides 2024 Condé Nast. All rights reserved. WIRED may earn a portion of sales from products that are purchased through our site as part of our Affiliate Partnerships with retailers. The material on this site may not be reproduced, distributed, transmitted, cached or otherwise used, except with the prior written permission of Condé Nast. Select international site United States Large Chevron
+Original BC scores: AI: 0.0010414546122774482, HUMAN: 0.9989585876464844
+Calibration BC scores: AI: 0.08333333333333333, HUMAN: 0.9166666666666666
+MC Score: {'OPENAI GPT': 0.011016534020503366, 'MISTRAL': 3.769994686801209e-10, 'CLAUDE': 5.417933834905855e-08, 'GEMINI': 0.031325822075208044, 'GRAMMAR ENHANCER': 0.04099092384179435}
+Original BC scores: AI: 0.0013807499781250954, HUMAN: 0.9986192584037781
+Calibration BC scores: AI: 0.09973753280839895, HUMAN: 0.9002624671916011
+Input Text: Also, even if Google developers did not intend for this feature to be a replacement of the original work, AI Overviews provide direct answers to questions in a manner that buries attribution and reduces the incentive for users to click through to the source material. We see that links included in AI Overviews get more clicks than if the page had appeared as a traditional web listing for that query, " said the Google spokesperson. No data to support this claim was offered to WIRED, so it's impossible to independently verify the impact of the AI feature on click-through rates. Also, its worth noting that the company compared AI Overview referral traffic to more traditional blue-link traffic from Google, not to articles chosen for a featured snippet, where the rates are likely much higher. After I reached out to Google about the AI Overview result that pulled from my work, the experimental AI search result for this query stopped showing up, but Google still attempted to generate an answer above the featured snippet. Reece Rogers via Google While many AI lawsuits remain unresolved, one legal expert I spoke with whfeel certain that if the company decides to expand the prevalence AI Overviews, then thnited States Large Chevron
+Original BC scores: AI: 0.0013807499781250954, HUMAN: 0.9986192584037781
+Calibration BC scores: AI: 0.09973753280839895, HUMAN: 0.9002624671916011
+MC Score: {'OPENAI GPT': 0.0014353521324674597, 'MISTRAL': 8.853771236138064e-10, 'CLAUDE': 1.2876423798196547e-07, 'GEMINI': 0.02511704077557941, 'GRAMMAR ENHANCER': 0.07318501115783929}
+Original BC scores: AI: 0.9780901670455933, HUMAN: 0.021909818053245544
+Calibration BC scores: AI: 0.5142857142857142, HUMAN: 0.48571428571428577
+Input Text: Googles AI Overview Search Results Copied My Original Work WIRED Open Navigation Menu Menu Story Saved To revisit this article, visit My Profile, then. Close Alert Googles AI Overview Search Results Copied My Original Work More Chevron Jun 5, 2024 6: 30 AM Googles AI Overview Search Results Copied My Original Work Googles AI feature bumped my article down on the results page, but the new AI Overview at the top still referenced it. What gives? Photo-illustration: Jacqui Van Liew; Getty Images Save this str is not directly attributed to me. Instead, my original article was one of six footnotes hyperlinked near the bottom of the result. With source links located so far down, its hard to imagine any publisher receiving significant traffic in this situation. AI Overviews will conceptually match information that appears in top web results, including those linked in the overview, wrote a Google spon that if the company decides to expand the prevalence AI Overviews, then thnited States Large Chevron
+Original BC scores: AI: 0.9780901670455933, HUMAN: 0.021909818053245544
+Calibration BC scores: AI: 0.5142857142857142, HUMAN: 0.48571428571428577
+MC Score: {'OPENAI GPT': 2.5201448071245276e-11, 'MISTRAL': 9.422158589059545e-12, 'CLAUDE': 1.7335425951868287e-11, 'GEMINI': 2.408824389954489e-11, 'GRAMMAR ENHANCER': 0.5142857142857142}
+[youtube] 1aA1WGON49E: Downloading webpage
+[youtube] 1aA1WGON49E: Downloading ios player API JSON
+[youtube] 1aA1WGON49E: Downloading m3u8 information
+WARNING:  Invalid HTTP request received.
+WARNING:  Invalid HTTP request received.
+Original BC scores: AI: 0.00025223050033673644, HUMAN: 0.9997478127479553
+Calibration BC scores: AI: 0.02666666666666667, HUMAN: 0.9733333333333334
+Input Text: Wow, what an audience. But if I'm being honest, I don't care what you think of my talk. I don't. I care what the Internet thinks of my talk because they're the ones who get it seen and get it shared. And I think that's where most people get it wrong. They're talking to you here instead of talking to you. Random person scrolling Facebook. Thanks for the click. You see, back in 2009, we all had these weird little things called attention spans. Yeah, they're gone. They're gone. We killed them. They're dead. I'm trying to think of the last time I watched an 18 minutes TED talk. It's been years. Literally years. So if you're giving a TED talk, keep it quick. I'm doing mine in under a minute. I'm at 44 seconds right now. That means we've got time for one final joke. Why are balloons so expensive? Inflation.
+Original BC scores: AI: 0.00025223050033673644, HUMAN: 0.9997478127479553
+Calibration BC scores: AI: 0.02666666666666667, HUMAN: 0.9733333333333334
+MC Score: {'OPENAI GPT': 3.122121820335142e-11, 'MISTRAL': 4.327827355747134e-12, 'CLAUDE': 4.987585455751277e-11, 'GEMINI': 0.026666666666666616, 'GRAMMAR ENHANCER': 6.573377694015398e-10}
+Original BC scores: AI: 0.998515784740448, HUMAN: 0.0014842685777693987
+Calibration BC scores: AI: 0.7272727272727273, HUMAN: 0.2727272727272727
+Input Text: Cool extension with great content! It seamlessly verifies the authenticity of online content, making it a must-have for anyone concerned about trust and reliability online. Highly recommended!
+Original BC scores: AI: 0.998515784740448, HUMAN: 0.0014842685777693987
+Calibration BC scores: AI: 0.7272727272727273, HUMAN: 0.2727272727272727
+MC Score: {'OPENAI GPT': 0.7272727272727273, 'MISTRAL': 3.3261002538057226e-11, 'CLAUDE': 6.143898521251211e-11, 'GEMINI': 1.713123784244627e-10, 'GRAMMAR ENHANCER': 8.93011624243782e-11}
+  probas = F.softmax(tensor_logits).detach().cpu().numpy()
+  probas = F.softmax(tensor_logits).detach().cpu().numpy()
+  probas = F.softmax(tensor_logits).detach().cpu().numpy()
+['Ricky West\nrw12west@gmail.com\n262-665-7816\n3 June 2024\nMaryna Burushkina\nGrowth Channel\n305 East Huntland Drive\nAustin, TX 78752\nDear Maryna,\nAdvertising has always been an interest of mine because of how it exists in our\neveryday lives.', 'Knowing how to advertise is such an important skill to have, and you will\nalmost always come across it in some way no matter what role you are in.', 'To be with a\ngreat company such as Growth Channel would be a great advantage.', 'Being passionate\nabout sales and having gained extensive experience in the field, I possess the\nenthusiasm to personally contribute to the realization of Growth Channel’s vision.', "I plan\nto optimize Growth Channel's vision by achieving higher conversion rates with future\nprospects by fully educating and creating awareness among consumers of the value\nGrowth Channel can bring to their company.", 'At SoftwareONE, I have been a top three seller of twenty-five SDRs for the last two\nfiscal quarters, and I have never been out of the top ten.', 'This can be attributed to my\nability to understand client needs and identify these needs to offer solutions that will\nimprove their yields.', 'Moreover, during my current and past sales/advertising experience,\nI have worked on several campaigns.', 'I had to use data to ensure that the advertisement\nteam and I achieved a campaign that fits the client’s goals.', 'For example, when I was in\nreal estate, I would work with my advertising team to target a certain audience for the\nluxury real estate properties that I was posting for my broker.', 'Although I have never\nofficially held an advertising title, much of my experience in the workforce has allowed\nme to become experienced and more knowledgeable than most in the advertising\nsector.', 'To continue, If I were to choose a company that Growth Channel should expand its\nnetwork on, it would be Thrive Market.', 'Thrive Market is an already successful company\nthat could really expand on its popularity especially because of its business model\nwhich sells sustainable groceries.', 'Being aware of the environmental friendliness and\nsustainability of the products it creates, Thrive Market is bound to increase its market\nshare and make a bigger, positive impact.', 'To create the campaign representing Thrive Market, I would use a targeting tool for ad\nnetworks, to aim at the audience interested in the protection of the environment.', 'The\ncampaign would include:\nAudience Segmentation: Using the technologies that Growth Channel possesses by\ntargeting an audience who has interest in eco-friendly institutions, environmentally\nfriendly goods and services, and other related options.', 'I would also aim it towards an\naudience who is health conscious as Thrive Market mainly sells whole, organic foods.', 'Multi-Channel Approach: Social media and search engine networks provide options to\nbuy ad space where consumers can be targeted effectively.', 'There are many unique ad\noptions in the social media networks for all types of advertisements.', 'Social media ads\nalso allow for very creative approaches.', 'Performance\nOptimization:\nReviewing\nand\nevaluating\nthe\neffectiveness\nof\nthe\ncampaigns would be a continuous process with the help of analytics tools offered by\nGrowth Channel.', "These tools will allow Growth Channel to optimize campaigns and\nmake adjustments they see fit to Thrive Market's audience which will increase ROI.", 'Finally, I am confident that my sales experience, combined with my knowledge of digital\nmarketing makes me a great fit for the SDR opportunity at Growth Channel.', "I will bring\nan enthusiastic and positive energy to the team, and I am looking forward to possibly\ncontributing to Growth Channel's continued success.", 'Thank you for considering my application.', 'I look forward to learning more about Growth\nChannel during this ongoing process.', 'Best Regards,\nRicky West\nrw12west@gmail.com\n262-665-7816']
+{'Ricky West\nrw12west@gmail.com\n262-665-7816\n3 June 2024\nMaryna Burushkina\nGrowth Channel\n305 East Huntland Drive\nAustin, TX 78752\nDear Maryna,\nAdvertising has always been an interest of mine because of how it exists in our\neveryday lives.': -0.4892860322252093, 'Knowing how to advertise is such an important skill to have, and you will\nalmost always come across it in some way no matter what role you are in.': -0.19567786339047316, 'To be with a\ngreat company such as Growth Channel would be a great advantage.': -0.021392659362429345, 'Being passionate\nabout sales and having gained extensive experience in the field, I possess the\nenthusiasm to personally contribute to the realization of Growth Channel’s vision.': -0.5170004958369422, "I plan\nto optimize Growth Channel's vision by achieving higher conversion rates with future\nprospects by fully educating and creating awareness among consumers of the value\nGrowth Channel can bring to their company.": -0.05491668142680101, 'At SoftwareONE, I have been a top three seller of twenty-five SDRs for the last two\nfiscal quarters, and I have never been out of the top ten.': 0.012097714481722944, 'This can be attributed to my\nability to understand client needs and identify these needs to offer solutions that will\nimprove their yields.': 0.37451399354344495, 'Moreover, during my current and past sales/advertising experience,\nI have worked on several campaigns.': -0.04695787195830139, 'I had to use data to ensure that the advertisement\nteam and I achieved a campaign that fits the client’s goals.': -0.12585292717762425} bc
+{'For example, when I was in\nreal estate, I would work with my advertising team to target a certain audience for the\nluxury real estate properties that I was posting for my broker.': -0.1608428972249821, 'Although I have never\nofficially held an advertising title, much of my experience in the workforce has allowed\nme to become experienced and more knowledgeable than most in the advertising\nsector.': -0.1676857138786792, 'To continue, If I were to choose a company that Growth Channel should expand its\nnetwork on, it would be Thrive Market.': -0.030235768266264267, 'Thrive Market is an already successful company\nthat could really expand on its popularity especially because of its business model\nwhich sells sustainable groceries.': -0.37725885761075584, 'Being aware of the environmental friendliness and\nsustainability of the products it creates, Thrive Market is bound to increase its market\nshare and make a bigger, positive impact.': -0.16059165851828455, 'To create the campaign representing Thrive Market, I would use a targeting tool for ad\nnetworks, to aim at the audience interested in the protection of the environment.': -0.2580689230746736, 'The\ncampaign would include:\nAudience Segmentation: Using the technologies that Growth Channel possesses by\ntargeting an audience who has interest in eco-friendly institutions, environmentally\nfriendly goods and services, and other related options.': 0.010524143055479326, 'I would also aim it towards an\naudience who is health conscious as Thrive Market mainly sells whole, organic foods.': -0.22973100808013053, 'Multi-Channel Approach: Social media and search engine networks provide options to\nbuy ad space where consumers can be targeted effectively.': -0.004408479538479063, 'There are many unique ad\noptions in the social media networks for all types of advertisements.': -0.11047277720491727, 'Social media ads\nalso allow for very creative approaches.': -0.0023210321339796106} bc
+{'Performance\nOptimization:\nReviewing\nand\nevaluating\nthe\neffectiveness\nof\nthe\ncampaigns would be a continuous process with the help of analytics tools offered by\nGrowth Channel.': 0.003683657918039559, "These tools will allow Growth Channel to optimize campaigns and\nmake adjustments they see fit to Thrive Market's audience which will increase ROI.": 0.07425998772503634, 'Finally, I am confident that my sales experience, combined with my knowledge of digital\nmarketing makes me a great fit for the SDR opportunity at Growth Channel.': 0.2235278874397872, "I will bring\nan enthusiastic and positive energy to the team, and I am looking forward to possibly\ncontributing to Growth Channel's continued success.": -0.09174872553632324, 'Thank you for considering my application.': 0.03028501558908839, 'I look forward to learning more about Growth\nChannel during this ongoing process.': 0.06096195592591846, 'Best Regards,\nRicky West\nrw12west@gmail.com\n262-665-7816': 0.07553230323968681}
+Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
+/home/aliasgarov/copyright_checker/predictors.py:212: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
+  probas = F.softmax(tensor_logits).detach().cpu().numpy()
+ bc
+ <span style="background-color: rgb(14, 255, 14); color: black;">Ricky West
+rw12west@gmail.com
+262-665-7816
+3 June 2024
+Maryna Burushkina
+Growth Channel
+305 East Huntland Drive
+Austin, TX 78752
+Dear Maryna,
+Advertising has always been an interest of mine because of how it exists in our
+everyday lives.</span> <span style="background-color: rgb(164, 255, 164); color: black;">Knowing how to advertise is such an important skill to have, and you will
+almost always come across it in some way no matter what role you are in.</span> <span style="background-color: rgb(254, 255, 254); color: black;">To be with a
+great company such as Growth Channel would be a great advantage.</span> <span style="background-color: rgb(0, 255, 0); color: black;">Being passionate
+about sales and having gained extensive experience in the field, I possess the
+enthusiasm to personally contribute to the realization of Growth Channel’s vision.</span> <span style="background-color: rgb(237, 255, 237); color: black;">I plan
+to optimize Growth Channel's vision by achieving higher conversion rates with future
+prospects by fully educating and creating awareness among consumers of the value
+Growth Channel can bring to their company.</span> <span style="background-color: rgb(255, 254, 254); color: black;">At SoftwareONE, I have been a top three seller of twenty-five SDRs for the last two
+fiscal quarters, and I have never been out of the top ten.</span> <span style="background-color: rgb(255, 0, 0); color: black;">This can be attributed to my
+ability to understand client needs and identify these needs to offer solutions that will
+improve their yields.</span> <span style="background-color: rgb(241, 255, 241); color: black;">Moreover, during my current and past sales/advertising experience,
+I have worked on several campaigns.</span> <span style="background-color: rgb(200, 255, 200); color: black;">I had to use data to ensure that the advertisement
+team and I achieved a campaign that fits the client’s goals.</span>  <span style="background-color: rgb(146, 255, 146); color: black;">For example, when I was in
+real estate, I would work with my advertising team to target a certain audience for the
+luxury real estate properties that I was posting for my broker.</span> <span style="background-color: rgb(142, 255, 142); color: black;">Although I have never
+officially held an advertising title, much of my experience in the workforce has allowed
+me to become experienced and more knowledgeable than most in the advertising
+sector.</span> <span style="background-color: rgb(235, 255, 235); color: black;">To continue, If I were to choose a company that Growth Channel should expand its
+network on, it would be Thrive Market.</span> <span style="background-color: rgb(0, 255, 0); color: black;">Thrive Market is an already successful company
+that could really expand on its popularity especially because of its business model
+which sells sustainable groceries.</span> <span style="background-color: rgb(146, 255, 146); color: black;">Being aware of the environmental friendliness and
+sustainability of the products it creates, Thrive Market is bound to increase its market
+share and make a bigger, positive impact.</span> <span style="background-color: rgb(80, 255, 80); color: black;">To create the campaign representing Thrive Market, I would use a targeting tool for ad
+networks, to aim at the audience interested in the protection of the environment.</span> <span style="background-color: rgb(255, 0, 0); color: black;">The
+campaign would include:
+Audience Segmentation: Using the technologies that Growth Channel possesses by
+targeting an audience who has interest in eco-friendly institutions, environmentally
+friendly goods and services, and other related options.</span> <span style="background-color: rgb(100, 255, 100); color: black;">I would also aim it towards an
+audience who is health conscious as Thrive Market mainly sells whole, organic foods.</span> <span style="background-color: rgb(252, 255, 252); color: black;">Multi-Channel Approach: Social media and search engine networks provide options to
+buy ad space where consumers can be targeted effectively.</span> <span style="background-color: rgb(180, 255, 180); color: black;">There are many unique ad
+options in the social media networks for all types of advertisements.</span> <span style="background-color: rgb(254, 255, 254); color: black;">Social media ads
+also allow for very creative approaches.</span>  <span style="background-color: rgb(255, 253, 253); color: black;">Performance
+Optimization:
+Reviewing
+and
+evaluating
+the
+effectiveness
+of
+the
+campaigns would be a continuous process with the help of analytics tools offered by
+Growth Channel.</span> <span style="background-color: rgb(255, 172, 172); color: black;">These tools will allow Growth Channel to optimize campaigns and
+make adjustments they see fit to Thrive Market's audience which will increase ROI.</span> <span style="background-color: rgb(255, 0, 0); color: black;">Finally, I am confident that my sales experience, combined with my knowledge of digital
+marketing makes me a great fit for the SDR opportunity at Growth Channel.</span> <span style="background-color: rgb(0, 255, 0); color: black;">I will bring
+an enthusiastic and positive energy to the team, and I am looking forward to possibly
+contributing to Growth Channel's continued success.</span> <span style="background-color: rgb(255, 223, 223); color: black;">Thank you for considering my application.</span> <span style="background-color: rgb(255, 187, 187); color: black;">I look forward to learning more about Growth
+Channel during this ongoing process.</span> <span style="background-color: rgb(255, 170, 170); color: black;">Best Regards,
+Ricky West
+rw12west@gmail.com
+262-665-7816</span>
+Original BC scores: AI: 0.34673771262168884, HUMAN: 0.6532623171806335
+Calibration BC scores: AI: 0.40939597315436244, HUMAN: 0.5906040268456376
+Input Text: Performance Optimization: Reviewing and evaluating the effectiveness of the campaigns would be a continuous process with the help of analytics tools offered by Growth Channel. These tools will allow Growth Channel to optimize campaigns and make adjustments they see fit to Thrive Market's audience which will increase ROI. Finally, I am confident that my sales experience, combined with my knowledge of digital marketing makes me a great fit for the SDR opportunity at Growth Channel. I will bring an enthusiastic and positive energy to the team, and I am looking forward to possibly contributing to Growth Channel's continued success. Thank you for considering my application. I look forward to learning more about Growth Channel during this ongoing process. Best Regards, Ricky West rw12westgmail. com 262-665-7816
+Original BC scores: AI: 0.34673771262168884, HUMAN: 0.6532623171806335
+Calibration BC scores: AI: 0.40939597315436244, HUMAN: 0.5906040268456376
+MC Score: {'OPENAI GPT': 0.3178691988023335, 'MISTRAL': 2.1444096669889683e-09, 'CLAUDE': 1.1681333364700646e-06, 'GEMINI': 0.07488741660678146, 'GRAMMAR ENHANCER': 0.016638195604685966}
+{'Ricky West\nrw12west@gmail.com\n262-665-7816\n3 June 2024\nMaryna Burushkina\nGrowth Channel\n305 East Huntland Drive\nAustin, TX 78752\nDear Maryna,\nAdvertising has always been an interest of mine because of how it exists in our\neveryday lives.': -0.5306495551721879, 'Knowing how to advertise is such an important skill to have, and you will\nalmost always come across it in some way no matter what role you are in.': -0.19667031727765713, 'To be with a\ngreat company such as Growth Channel would be a great advantage.': -0.041189784573334345, 'Being passionate\nabout sales and having gained extensive experience in the field, I possess the\nenthusiasm to personally contribute to the realization of Growth Channel’s vision.': -0.24918810706161526, "I plan\nto optimize Growth Channel's vision by achieving higher conversion rates with future\nprospects by fully educating and creating awareness among consumers of the value\nGrowth Channel can bring to their company.": -0.06580943427496835, 'At SoftwareONE, I have been a top three seller of twenty-five SDRs for the last two\nfiscal quarters, and I have never been out of the top ten.': -0.13508458234735787, 'This can be attributed to my\nability to understand client needs and identify these needs to offer solutions that will\nimprove their yields.': 0.27866133085282396, 'Moreover, during my current and past sales/advertising experience,\nI have worked on several campaigns.': 0.017630278801475125, 'I had to use data to ensure that the advertisement\nteam and I achieved a campaign that fits the client’s goals.': -0.045258109662774965}
+  probas = F.softmax(tensor_logits).detach().cpu().numpy()
+/home/aliasgarov/copyright_checker/predictors.py:212: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
+  probas = F.softmax(tensor_logits).detach().cpu().numpy()
+ bc
+{'For example, when I was in\nreal estate, I would work with my advertising team to target a certain audience for the\nluxury real estate properties that I was posting for my broker.': -0.11700274179295864, 'Although I have never\nofficially held an advertising title, much of my experience in the workforce has allowed\nme to become experienced and more knowledgeable than most in the advertising\nsector.': -0.15774178293541702, 'To continue, If I were to choose a company that Growth Channel should expand its\nnetwork on, it would be Thrive Market.': -0.09720898432998293, 'Thrive Market is an already successful company\nthat could really expand on its popularity especially because of its business model\nwhich sells sustainable groceries.': -0.15585376721747624, 'Being aware of the environmental friendliness and\nsustainability of the products it creates, Thrive Market is bound to increase its market\nshare and make a bigger, positive impact.': -0.18900911119385885, 'To create the campaign representing Thrive Market, I would use a targeting tool for ad\nnetworks, to aim at the audience interested in the protection of the environment.': -0.23145917771310473, 'The\ncampaign would include:\nAudience Segmentation: Using the technologies that Growth Channel possesses by\ntargeting an audience who has interest in eco-friendly institutions, environmentally\nfriendly goods and services, and other related options.': -0.050303103673449596, 'I would also aim it towards an\naudience who is health conscious as Thrive Market mainly sells whole, organic foods.': -0.330775313595558, 'Multi-Channel Approach: Social media and search engine networks provide options to\nbuy ad space where consumers can be targeted effectively.': -0.14748824344664127, 'There are many unique ad\noptions in the social media networks for all types of advertisements.': -0.0924204993475862, 'Social media ads\nalso allow for very creative approaches.': -0.03324840268599188} bc
+{'Performance\nOptimization:\nReviewing\nand\nevaluating\nthe\neffectiveness\nof\nthe\ncampaigns would be a continuous process with the help of analytics tools offered by\nGrowth Channel.': 0.1788831345248405, "These tools will allow Growth Channel to optimize campaigns and\nmake adjustments they see fit to Thrive Market's audience which will increase ROI.": -0.003715107276243355, 'Finally, I am confident that my sales experience, combined with my knowledge of digital\nmarketing makes me a great fit for the SDR opportunity at Growth Channel.': 0.24272772732024744, "I will bring\nan enthusiastic and positive energy to the team, and I am looking forward to possibly\ncontributing to Growth Channel's continued success.": -0.11980814206086883, 'Thank you for considering my application.': 0.05269689352576316, 'I look forward to learning more about Growth\nChannel during this ongoing process.': 0.03812065293420048, 'Best Regards,\nRicky West\nrw12west@gmail.com\n262-665-7816': 0.012098829368504904} bc
+ <span style="background-color: rgb(0, 255, 0); color: black;">Ricky West
+rw12west@gmail.com
+262-665-7816
+3 June 2024
+Maryna Burushkina
+Growth Channel
+305 East Huntland Drive
+Austin, TX 78752
+Dear Maryna,
+Advertising has always been an interest of mine because of how it exists in our
+everyday lives.</span> <span style="background-color: rgb(173, 255, 173); color: black;">Knowing how to advertise is such an important skill to have, and you will
+almost always come across it in some way no matter what role you are in.</span> <span style="background-color: rgb(254, 255, 254); color: black;">To be with a
+great company such as Growth Channel would be a great advantage.</span> <span style="background-color: rgb(146, 255, 146); color: black;">Being passionate
+about sales and having gained extensive experience in the field, I possess the
+enthusiasm to personally contribute to the realization of Growth Channel’s vision.</span> <span style="background-color: rgb(241, 255, 241); color: black;">I plan
+to optimize Growth Channel's vision by achieving higher conversion rates with future
+prospects by fully educating and creating awareness among consumers of the value
+Growth Channel can bring to their company.</span> <span style="background-color: rgb(205, 255, 205); color: black;">At SoftwareONE, I have been a top three seller of twenty-five SDRs for the last two
+fiscal quarters, and I have never been out of the top ten.</span> <span style="background-color: rgb(255, 0, 0); color: black;">This can be attributed to my
+ability to understand client needs and identify these needs to offer solutions that will
+improve their yields.</span> <span style="background-color: rgb(255, 254, 254); color: black;">Moreover, during my current and past sales/advertising experience,
+I have worked on several campaigns.</span> <span style="background-color: rgb(252, 255, 252); color: black;">I had to use data to ensure that the advertisement
+team and I achieved a campaign that fits the client’s goals.</span>  <span style="background-color: rgb(182, 255, 182); color: black;">For example, when I was in
+real estate, I would work with my advertising team to target a certain audience for the
+luxury real estate properties that I was posting for my broker.</span> <span style="background-color: rgb(147, 255, 147); color: black;">Although I have never
+officially held an advertising title, much of my experience in the workforce has allowed
+me to become experienced and more knowledgeable than most in the advertising
+sector.</span> <span style="background-color: rgb(199, 255, 199); color: black;">To continue, If I were to choose a company that Growth Channel should expand its
+network on, it would be Thrive Market.</span> <span style="background-color: rgb(149, 255, 149); color: black;">Thrive Market is an already successful company
+that could really expand on its popularity especially because of its business model
+which sells sustainable groceries.</span> <span style="background-color: rgb(121, 255, 121); color: black;">Being aware of the environmental friendliness and
+sustainability of the products it creates, Thrive Market is bound to increase its market
+share and make a bigger, positive impact.</span> <span style="background-color: rgb(84, 255, 84); color: black;">To create the campaign representing Thrive Market, I would use a targeting tool for ad
+networks, to aim at the audience interested in the protection of the environment.</span> <span style="background-color: rgb(239, 255, 239); color: black;">The
+campaign would include:
+Audience Segmentation: Using the technologies that Growth Channel possesses by
+targeting an audience who has interest in eco-friendly institutions, environmentally
+friendly goods and services, and other related options.</span> <span style="background-color: rgb(0, 255, 0); color: black;">I would also aim it towards an
+audience who is health conscious as Thrive Market mainly sells whole, organic foods.</span> <span style="background-color: rgb(156, 255, 156); color: black;">Multi-Channel Approach: Social media and search engine networks provide options to
+buy ad space where consumers can be targeted effectively.</span> <span style="background-color: rgb(203, 255, 203); color: black;">There are many unique ad
+options in the social media networks for all types of advertisements.</span> <span style="background-color: rgb(254, 255, 254); color: black;">Social media ads
+also allow for very creative approaches.</span>  <span style="background-color: rgb(255, 70, 70); color: black;">Performance
+Optimization:
+Reviewing
+and
+evaluating
+the
+effectiveness
+of
+the
+campaigns would be a continuous process with the help of analytics tools offered by
+Growth Channel.</span> <span style="background-color: rgb(252, 255, 252); color: black;">These tools will allow Growth Channel to optimize campaigns and
+make adjustments they see fit to Thrive Market's audience which will increase ROI.</span> <span style="background-color: rgb(255, 0, 0); color: black;">Finally, I am confident that my sales experience, combined with my knowledge of digital
+marketing makes me a great fit for the SDR opportunity at Growth Channel.</span> <span style="background-color: rgb(0, 255, 0); color: black;">I will bring
+an enthusiastic and positive energy to the team, and I am looking forward to possibly
+contributing to Growth Channel's continued success.</span> <span style="background-color: rgb(255, 209, 209); color: black;">Thank you for considering my application.</span> <span style="background-color: rgb(255, 225, 225); color: black;">I look forward to learning more about Growth
+Channel during this ongoing process.</span> <span style="background-color: rgb(255, 253, 253); color: black;">Best Regards,
+Ricky West
+rw12west@gmail.com
+262-665-7816</span> Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
+Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
+Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+WARNING:  Invalid HTTP request received.
+WARNING:  Invalid HTTP request received.

predictors.py CHANGED Viewed

@@ -21,6 +21,7 @@ from optimum.pipelines import pipeline
 with open("config.yaml", "r") as file:
     params = yaml.safe_load(file)
 nltk.download("punkt")
 nltk.download("stopwords")
 device_needed = "cuda" if torch.cuda.is_available() else "cpu"
@@ -35,34 +36,22 @@ mc_token_size = int(params["MC_TOKEN_SIZE"])
 bc_token_size = int(params["BC_TOKEN_SIZE"])
 bias_checker_model_name = params['BIAS_CHECKER_MODEL_PATH']
 bias_corrector_model_name = params['BIAS_CORRECTOR_MODEL_PATH']
-text_bc_tokenizer = AutoTokenizer.from_pretrained(text_bc_model_path)
-text_bc_model = AutoModelForSequenceClassification.from_pretrained(
-    text_bc_model_path
-).to(device)
-text_mc_tokenizer = AutoTokenizer.from_pretrained(text_mc_model_path)
-text_mc_model = AutoModelForSequenceClassification.from_pretrained(
-    text_mc_model_path
-).to(device)
-quillbot_tokenizer = AutoTokenizer.from_pretrained(text_quillbot_model_path)
-quillbot_model = AutoModelForSequenceClassification.from_pretrained(
-    text_quillbot_model_path
-).to(device)
 # proxy models for explainability
 mini_bc_model_name = "polygraf-ai/bc-model"
-bc_tokenizer_mini = AutoTokenizer.from_pretrained(mini_bc_model_name)
-bc_model_mini = AutoModelForSequenceClassification.from_pretrained(
-    mini_bc_model_name
-).to(device_needed)
 mini_humanizer_model_name =  "polygraf-ai/humanizer-model"
-humanizer_tokenizer_mini = AutoTokenizer.from_pretrained(
-    mini_humanizer_model_name
-)
-humanizer_model_mini = AutoModelForSequenceClassification.from_pretrained(
-    mini_humanizer_model_name
-).to(device_needed)
 bc_model_mini = BetterTransformer.transform(bc_model_mini)
 humanizer_model_mini = BetterTransformer.transform(humanizer_model_mini)

 with open("config.yaml", "r") as file:
     params = yaml.safe_load(file)
 nltk.download("punkt")
 nltk.download("stopwords")
 device_needed = "cuda" if torch.cuda.is_available() else "cpu"
 bc_token_size = int(params["BC_TOKEN_SIZE"])
 bias_checker_model_name = params['BIAS_CHECKER_MODEL_PATH']
 bias_corrector_model_name = params['BIAS_CORRECTOR_MODEL_PATH']
+access_token = params['HF_TOKEN']
+text_bc_tokenizer = AutoTokenizer.from_pretrained(text_bc_model_path, token=access_token)
+text_bc_model = AutoModelForSequenceClassification.from_pretrained(text_bc_model_path, token=access_token).to(device)
+text_mc_tokenizer = AutoTokenizer.from_pretrained(text_mc_model_path, token=access_token)
+text_mc_model = AutoModelForSequenceClassification.from_pretrained(text_mc_model_path, token=access_token).to(device)
+quillbot_tokenizer = AutoTokenizer.from_pretrained(text_quillbot_model_path, token=access_token)
+quillbot_model = AutoModelForSequenceClassification.from_pretrained(text_quillbot_model_path, token=access_token).to(device)
 # proxy models for explainability
 mini_bc_model_name = "polygraf-ai/bc-model"
+bc_tokenizer_mini = AutoTokenizer.from_pretrained(mini_bc_model_name, token=access_token)
+bc_model_mini = AutoModelForSequenceClassification.from_pretrained(mini_bc_model_name, token=access_token).to(device_needed)
 mini_humanizer_model_name =  "polygraf-ai/humanizer-model"
+humanizer_tokenizer_mini = AutoTokenizer.from_pretrained(mini_humanizer_model_name, token=access_token)
+humanizer_model_mini = AutoModelForSequenceClassification.from_pretrained(mini_humanizer_model_name, token=access_token).to(device_needed)
 bc_model_mini = BetterTransformer.transform(bc_model_mini)
 humanizer_model_mini = BetterTransformer.transform(humanizer_model_mini)

utils.py CHANGED Viewed

@@ -6,13 +6,18 @@ from unidecode import unidecode
 from transformers import AutoTokenizer
 import yaml
 import fitz
 def remove_accents(input_str):
     text_no_accents = unidecode(input_str)
     return text_no_accents
 def remove_special_characters(text):
     text = re.sub(r'https?://\S+|www\.\S+', '', text)
     emoji_pattern = re.compile("["
@@ -50,7 +55,7 @@ with open("config.yaml", "r") as file:
     params = yaml.safe_load(file)
 text_bc_model_path = params["TEXT_BC_MODEL_PATH"]
-text_bc_tokenizer = AutoTokenizer.from_pretrained(text_bc_model_path)
 def len_validator(text):

 from transformers import AutoTokenizer
 import yaml
 import fitz
+import requests
+from bs4 import BeautifulSoup
+with open("config.yaml", "r") as file:
+    params = yaml.safe_load(file)
+access_token = params['HF_TOKEN']
 def remove_accents(input_str):
     text_no_accents = unidecode(input_str)
     return text_no_accents
 def remove_special_characters(text):
     text = re.sub(r'https?://\S+|www\.\S+', '', text)
     emoji_pattern = re.compile("["
     params = yaml.safe_load(file)
 text_bc_model_path = params["TEXT_BC_MODEL_PATH"]
+text_bc_tokenizer = AutoTokenizer.from_pretrained(text_bc_model_path, token=access_token)
 def len_validator(text):