Spaces:
Build error
Build error
Kartikeyssj2
commited on
Commit
•
5d9ed6e
1
Parent(s):
d1648d8
Update download_models.py
Browse files- download_models.py +104 -13
download_models.py
CHANGED
@@ -1,19 +1,110 @@
|
|
1 |
import os
|
2 |
from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
-
# Create the models directory if it doesn't exist
|
5 |
-
os.makedirs("./models", exist_ok=True)
|
6 |
-
os.makedirs("./models/tokenizer", exist_ok=True)
|
7 |
-
os.makedirs("./models/model", exist_ok=True)
|
8 |
|
9 |
-
print("Downloading and saving tokenizer...")
|
10 |
-
tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-base-960h")
|
11 |
-
tokenizer.save_pretrained("./models/tokenizer")
|
12 |
-
print("Tokenizer saved successfully.")
|
13 |
|
14 |
-
print("Downloading and saving model...")
|
15 |
-
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
|
16 |
-
model.save_pretrained("./models/model")
|
17 |
-
print("Model saved successfully.")
|
18 |
|
19 |
-
print("Download and save process completed.")
|
|
|
1 |
import os
|
2 |
from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
|
3 |
+
import nltk
|
4 |
+
import os
|
5 |
+
|
6 |
+
# Define the directory to save the data
|
7 |
+
data_dir = 'nltk_data'
|
8 |
+
|
9 |
+
# Create the directory if it does not exist
|
10 |
+
if not os.path.exists(data_dir):
|
11 |
+
os.makedirs(data_dir)
|
12 |
+
|
13 |
+
# Set the NLTK data path to the local directory
|
14 |
+
nltk.data.path.append(data_dir)
|
15 |
+
|
16 |
+
# Download the required NLTK data
|
17 |
+
nltk.download('punkt', download_dir=data_dir)
|
18 |
+
nltk.download('words', download_dir=data_dir)
|
19 |
+
|
20 |
+
|
21 |
+
|
22 |
+
|
23 |
+
|
24 |
+
from transformers import Wav2Vec2Tokenizer, Wav2Vec2ForCTC, DistilBertTokenizer, DistilBertForSequenceClassification
|
25 |
+
import os
|
26 |
+
|
27 |
+
# Define directories to save the models and tokenizers
|
28 |
+
pronunciation_model_dir = 'pronunciation_model'
|
29 |
+
fluency_model_dir = 'fluency_model'
|
30 |
+
|
31 |
+
# Create the directories if they don't exist
|
32 |
+
os.makedirs(pronunciation_model_dir, exist_ok=True)
|
33 |
+
os.makedirs(fluency_model_dir, exist_ok=True)
|
34 |
+
|
35 |
+
# Download and save the Pronunciation model and tokenizer
|
36 |
+
print("Downloading pronunciation tokenizer...")
|
37 |
+
pronunciation_tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-base-960h")
|
38 |
+
pronunciation_tokenizer.save_pretrained(pronunciation_model_dir)
|
39 |
+
|
40 |
+
print("Downloading pronunciation model...")
|
41 |
+
pronunciation_model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
|
42 |
+
pronunciation_model.save_pretrained(pronunciation_model_dir)
|
43 |
+
|
44 |
+
# Download and save the Fluency model and tokenizer
|
45 |
+
print("Downloading fluency tokenizer...")
|
46 |
+
fluency_tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
|
47 |
+
fluency_tokenizer.save_pretrained(fluency_model_dir)
|
48 |
+
|
49 |
+
print("Downloading fluency model...")
|
50 |
+
fluency_model = DistilBertForSequenceClassification.from_pretrained("Kartikeyssj2/Fluency_Scoring_V2")
|
51 |
+
fluency_model.save_pretrained(fluency_model_dir)
|
52 |
+
|
53 |
+
print("Download and save completed.")
|
54 |
+
|
55 |
+
|
56 |
+
|
57 |
+
|
58 |
+
|
59 |
+
|
60 |
+
|
61 |
+
from sentence_transformers import SentenceTransformer
|
62 |
+
import os
|
63 |
+
|
64 |
+
# Define the directory to save the model
|
65 |
+
model_dir = 'content_relevance_model'
|
66 |
+
|
67 |
+
# Create the directory if it does not exist
|
68 |
+
os.makedirs(model_dir, exist_ok=True)
|
69 |
+
|
70 |
+
# Download and save the SentenceTransformer model
|
71 |
+
print("Downloading SentenceTransformer model...")
|
72 |
+
model = SentenceTransformer('sentence-transformers/msmarco-distilbert-cos-v5')
|
73 |
+
model.save(model_dir)
|
74 |
+
|
75 |
+
print("Model downloaded and saved successfully.")
|
76 |
+
|
77 |
+
|
78 |
+
|
79 |
+
|
80 |
+
|
81 |
+
|
82 |
+
from transformers import BlipProcessor, BlipForConditionalGeneration
|
83 |
+
import os
|
84 |
+
|
85 |
+
# Define directories to save the models and processors
|
86 |
+
processor_dir = 'blip_processor'
|
87 |
+
model_dir = 'blip_model'
|
88 |
+
|
89 |
+
# Create the directories if they don't exist
|
90 |
+
os.makedirs(processor_dir, exist_ok=True)
|
91 |
+
os.makedirs(model_dir, exist_ok=True)
|
92 |
+
|
93 |
+
# Download and save the BlipProcessor
|
94 |
+
print("Downloading BlipProcessor...")
|
95 |
+
image_captioning_processor = BlipProcessor.from_pretrained("noamrot/FuseCap")
|
96 |
+
image_captioning_processor.save_pretrained(processor_dir)
|
97 |
+
print("BlipProcessor downloaded and saved.")
|
98 |
+
|
99 |
+
# Download and save the BlipForConditionalGeneration model
|
100 |
+
print("Downloading BlipForConditionalGeneration model...")
|
101 |
+
image_captioning_model = BlipForConditionalGeneration.from_pretrained("noamrot/FuseCap")
|
102 |
+
image_captioning_model.save_pretrained(model_dir)
|
103 |
+
print("BlipForConditionalGeneration model downloaded and saved.")
|
104 |
+
|
105 |
+
|
106 |
+
|
107 |
|
|
|
|
|
|
|
|
|
108 |
|
|
|
|
|
|
|
|
|
109 |
|
|
|
|
|
|
|
|
|
110 |
|
|