SoybeanMilk commited on
Commit
90d4f1f
1 Parent(s): 238ccfc

Delete config.py

Browse files
Files changed (1) hide show
  1. config.py +0 -177
config.py DELETED
@@ -1,177 +0,0 @@
1
- from enum import Enum
2
-
3
- import os
4
- from typing import List, Dict, Literal
5
-
6
-
7
- class ModelConfig:
8
- def __init__(self, name: str, url: str, path: str = None, type: str = "whisper", tokenizer_url: str = None):
9
- """
10
- Initialize a model configuration.
11
-
12
- name: Name of the model
13
- url: URL to download the model from
14
- path: Path to the model file. If not set, the model will be downloaded from the URL.
15
- type: Type of model. Can be whisper or huggingface.
16
- """
17
- self.name = name
18
- self.url = url
19
- self.path = path
20
- self.type = type
21
- self.tokenizer_url = tokenizer_url
22
-
23
- VAD_INITIAL_PROMPT_MODE_VALUES=["prepend_all_segments", "prepend_first_segment", "json_prompt_mode"]
24
-
25
- class VadInitialPromptMode(Enum):
26
- PREPEND_ALL_SEGMENTS = 1
27
- PREPREND_FIRST_SEGMENT = 2
28
- JSON_PROMPT_MODE = 3
29
-
30
- @staticmethod
31
- def from_string(s: str):
32
- normalized = s.lower() if s is not None and len(s) > 0 else None
33
-
34
- if normalized == "prepend_all_segments":
35
- return VadInitialPromptMode.PREPEND_ALL_SEGMENTS
36
- elif normalized == "prepend_first_segment":
37
- return VadInitialPromptMode.PREPREND_FIRST_SEGMENT
38
- elif normalized == "json_prompt_mode":
39
- return VadInitialPromptMode.JSON_PROMPT_MODE
40
- elif normalized is not None and normalized != "":
41
- raise ValueError(f"Invalid value for VadInitialPromptMode: {s}")
42
- else:
43
- return None
44
-
45
- class ApplicationConfig:
46
- def __init__(self, models: Dict[Literal["whisper", "m2m100", "nllb", "mt5", "ALMA"], List[ModelConfig]],
47
- input_audio_max_duration: int = 600, share: bool = False, server_name: str = None, server_port: int = 7860,
48
- queue_concurrency_count: int = 1, delete_uploaded_files: bool = True,
49
- whisper_implementation: str = "whisper", default_model_name: str = "medium",
50
- default_nllb_model_name: str = "distilled-600M", default_vad: str = "silero-vad",
51
- vad_parallel_devices: str = "", vad_cpu_cores: int = 1, vad_process_timeout: int = 1800,
52
- auto_parallel: bool = False, output_dir: str = None,
53
- model_dir: str = None, device: str = None,
54
- verbose: bool = True, task: str = "transcribe", language: str = None,
55
- vad_initial_prompt_mode: str = "prepend_first_segment ",
56
- vad_merge_window: float = 5, vad_max_merge_size: float = 30,
57
- vad_padding: float = 1, vad_prompt_window: float = 3,
58
- temperature: float = 0, best_of: int = 5, beam_size: int = 5,
59
- patience: float = None, length_penalty: float = None,
60
- suppress_tokens: str = "-1", initial_prompt: str = None,
61
- condition_on_previous_text: bool = True, fp16: bool = True,
62
- compute_type: str = "float16",
63
- temperature_increment_on_fallback: float = 0.2, compression_ratio_threshold: float = 2.4,
64
- logprob_threshold: float = -1.0, no_speech_threshold: float = 0.6,
65
- repetition_penalty: float = 1.0, no_repeat_ngram_size: int = 0,
66
- # Word timestamp settings
67
- word_timestamps: bool = False, prepend_punctuations: str = "\"\'“¿([{-",
68
- append_punctuations: str = "\"\'.。,,!!??::”)]}、",
69
- highlight_words: bool = False,
70
- # Diarization
71
- auth_token: str = None, diarization: bool = False, diarization_speakers: int = 2,
72
- diarization_min_speakers: int = 1, diarization_max_speakers: int = 5,
73
- diarization_process_timeout: int = 60,
74
- # Translation
75
- translation_batch_size: int = 2,
76
- translation_no_repeat_ngram_size: int = 3,
77
- translation_num_beams: int = 2,
78
- ):
79
-
80
- self.models = models
81
-
82
- # WebUI settings
83
- self.input_audio_max_duration = input_audio_max_duration
84
- self.share = share
85
- self.server_name = server_name
86
- self.server_port = server_port
87
- self.queue_concurrency_count = queue_concurrency_count
88
- self.delete_uploaded_files = delete_uploaded_files
89
-
90
- self.whisper_implementation = whisper_implementation
91
- self.default_model_name = default_model_name
92
- self.default_nllb_model_name = default_nllb_model_name
93
- self.default_vad = default_vad
94
- self.vad_parallel_devices = vad_parallel_devices
95
- self.vad_cpu_cores = vad_cpu_cores
96
- self.vad_process_timeout = vad_process_timeout
97
- self.auto_parallel = auto_parallel
98
- self.output_dir = output_dir
99
-
100
- self.model_dir = model_dir
101
- self.device = device
102
- self.verbose = verbose
103
- self.task = task
104
- self.language = language
105
- self.vad_initial_prompt_mode = vad_initial_prompt_mode
106
- self.vad_merge_window = vad_merge_window
107
- self.vad_max_merge_size = vad_max_merge_size
108
- self.vad_padding = vad_padding
109
- self.vad_prompt_window = vad_prompt_window
110
- self.temperature = temperature
111
- self.best_of = best_of
112
- self.beam_size = beam_size
113
- self.patience = patience
114
- self.length_penalty = length_penalty
115
- self.suppress_tokens = suppress_tokens
116
- self.initial_prompt = initial_prompt
117
- self.condition_on_previous_text = condition_on_previous_text
118
- self.fp16 = fp16
119
- self.compute_type = compute_type
120
- self.temperature_increment_on_fallback = temperature_increment_on_fallback
121
- self.compression_ratio_threshold = compression_ratio_threshold
122
- self.logprob_threshold = logprob_threshold
123
- self.no_speech_threshold = no_speech_threshold
124
- self.repetition_penalty = repetition_penalty
125
- self.no_repeat_ngram_size = no_repeat_ngram_size
126
-
127
- # Word timestamp settings
128
- self.word_timestamps = word_timestamps
129
- self.prepend_punctuations = prepend_punctuations
130
- self.append_punctuations = append_punctuations
131
- self.highlight_words = highlight_words
132
-
133
- # Diarization settings
134
- self.auth_token = auth_token
135
- self.diarization = diarization
136
- self.diarization_speakers = diarization_speakers
137
- self.diarization_min_speakers = diarization_min_speakers
138
- self.diarization_max_speakers = diarization_max_speakers
139
- self.diarization_process_timeout = diarization_process_timeout
140
- # Translation
141
- self.translation_batch_size = translation_batch_size
142
- self.translation_no_repeat_ngram_size = translation_no_repeat_ngram_size
143
- self.translation_num_beams = translation_num_beams
144
-
145
- def get_model_names(self, name: str):
146
- return [ x.name for x in self.models[name] ]
147
-
148
- def update(self, **new_values):
149
- result = ApplicationConfig(**self.__dict__)
150
-
151
- for key, value in new_values.items():
152
- setattr(result, key, value)
153
- return result
154
-
155
- @staticmethod
156
- def create_default(**kwargs):
157
- app_config = ApplicationConfig.parse_file(os.environ.get("WHISPER_WEBUI_CONFIG", "config.json5"))
158
-
159
- # Update with kwargs
160
- if len(kwargs) > 0:
161
- app_config = app_config.update(**kwargs)
162
- return app_config
163
-
164
- @staticmethod
165
- def parse_file(config_path: str):
166
- import json5
167
-
168
- with open(config_path, "r", encoding="utf-8") as f:
169
- # Load using json5
170
- data = json5.load(f)
171
- data_models = data.pop("models", [])
172
- models: Dict[Literal["whisper", "m2m100", "nllb", "mt5", "ALMA"], List[ModelConfig]] = {
173
- key: [ModelConfig(**item) for item in value]
174
- for key, value in data_models.items()
175
- }
176
-
177
- return ApplicationConfig(models, **data)