medmac01 commited on
Commit
9b0b32e
·
verified ·
1 Parent(s): ff09d94

Upload 6 files

Browse files
Files changed (2) hide show
  1. model.py +16 -10
  2. requirements.txt +3 -1
model.py CHANGED
@@ -1,8 +1,4 @@
1
- import os
2
- from PIL import Image, ImageDraw
3
- import cv2
4
- import numpy as np
5
- from IPython.display import HTML
6
  from base64 import b64encode
7
 
8
  import torch
@@ -14,8 +10,7 @@ from diffusers.schedulers.scheduling_ddim import DDIMScheduler
14
  #from transformers import CLIPTextModel, CLIPTokenizer
15
  from tqdm.auto import tqdm
16
  from huggingface_hub import notebook_login
17
-
18
- import weights
19
 
20
  device = 'cpu'
21
 
@@ -47,9 +42,6 @@ class MultilingualCLIP(transformers.PreTrainedModel):
47
  return model, [], [], []
48
 
49
 
50
- import torch
51
- import torch.nn as nn
52
-
53
  # Define the adaptation layer, 'checkpoint_9.pth'
54
  class AdaptationLayer(nn.Module):
55
  def __init__(self, input_dim, output_dim):
@@ -87,6 +79,20 @@ adapt_model.to(device)
87
  state_dict = torch.load('weights/checkpoint_9.pth')
88
  adapt_model.load_state_dict(state_dict)
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  # 1. Load the autoencoder model which will be used to decode the latents into image space.
91
  vae = AutoencoderKL.from_pretrained(
92
  'CompVis/stable-diffusion-v1-4', subfolder='vae', use_auth_token=True)
 
1
+ from PIL import Image
 
 
 
 
2
  from base64 import b64encode
3
 
4
  import torch
 
10
  #from transformers import CLIPTextModel, CLIPTokenizer
11
  from tqdm.auto import tqdm
12
  from huggingface_hub import notebook_login
13
+ import torch.nn as nn
 
14
 
15
  device = 'cpu'
16
 
 
42
  return model, [], [], []
43
 
44
 
 
 
 
45
  # Define the adaptation layer, 'checkpoint_9.pth'
46
  class AdaptationLayer(nn.Module):
47
  def __init__(self, input_dim, output_dim):
 
79
  state_dict = torch.load('weights/checkpoint_9.pth')
80
  adapt_model.load_state_dict(state_dict)
81
 
82
+ from Multilingual_CLIP.multilingual_clip import pt_multilingual_clip
83
+
84
+ texts = [
85
+ 'قطة تقرأ كتابا'
86
+ ]
87
+
88
+ model_name = 'M-CLIP/LABSE-Vit-L-14'
89
+
90
+ # Load Model & Tokenizer
91
+ text_model = pt_multilingual_clip.MultilingualCLIP.from_pretrained(model_name)
92
+ text_model = text_model.to(device)
93
+ text_tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
94
+
95
+ embeddings= text_model.forward(texts, text_tokenizer, device )
96
  # 1. Load the autoencoder model which will be used to decode the latents into image space.
97
  vae = AutoencoderKL.from_pretrained(
98
  'CompVis/stable-diffusion-v1-4', subfolder='vae', use_auth_token=True)
requirements.txt CHANGED
@@ -2,4 +2,6 @@ transformers
2
  diffusers
3
  torch
4
  accelerate
5
- gradio
 
 
 
2
  diffusers
3
  torch
4
  accelerate
5
+ gradio
6
+ opencv-python-headless
7
+ tqdm