Spaces:
Sleeping
Sleeping
PascalZhan
commited on
Commit
•
40b03a6
0
Parent(s):
Synchronisation Pascal
Browse files- .gitignore +1 -0
- README.md +37 -0
- app.py +88 -0
- importHuggingFaceHubModel.py +164 -0
- requirements.txt +1 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
*.keras
|
README.md
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: SAE-GPT2
|
3 |
+
emoji: ❤️
|
4 |
+
colorFrom: indigo
|
5 |
+
colorTo: red
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 3.50.2
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
---
|
11 |
+
|
12 |
+
<hr/>
|
13 |
+
|
14 |
+
<h4> Environnement de développement commun </h4>
|
15 |
+
|
16 |
+
<br/>
|
17 |
+
|
18 |
+
| Nom | Lien |
|
19 |
+
|------------|-------------------------------------------------------|
|
20 |
+
| Production | https://huggingface.co/spaces/FFatih/SAE-GPT2-PROD |
|
21 |
+
| Recette | https://huggingface.co/spaces/FFatih/SAE-GPT2-RECETTE |
|
22 |
+
|
23 |
+
<hr/>
|
24 |
+
|
25 |
+
<h4> Environnement de développement personnel </h4>
|
26 |
+
|
27 |
+
<br/>
|
28 |
+
|
29 |
+
| Prenom | Lien |
|
30 |
+
|-------------------|------------------------------------------------------------|
|
31 |
+
| Fatih | https://huggingface.co/spaces/FFatih/SAE-GPT2-FATIH |
|
32 |
+
| Bastien | https://huggingface.co/spaces/BastienHot/SAE-GPT2-BASTIEN |
|
33 |
+
| Pascal | https://huggingface.co/spaces/PascalZhan/SAE-GPT2-PASCAL |
|
34 |
+
| Tamij | https://huggingface.co/spaces/Tamij/SAE-GPT2-TAMIJ |
|
35 |
+
| Kevin | https://huggingface.co/spaces/Kemasu/SAE-GPT2-KEVIN |
|
36 |
+
| Lilian | https://huggingface.co/spaces/Solialiranes/SAE-GPT2-LILIAN |
|
37 |
+
| Evan | https://huggingface.co/spaces/Evanparis240/SAE-GPT2-EVAN |
|
app.py
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Author: Bastien & Pascal
|
2 |
+
# Date: 2/25/2024
|
3 |
+
# Project: SAE-GPT2 | BUT 3 Informatique - Semester 5
|
4 |
+
|
5 |
+
# Import of required libraries
|
6 |
+
import os
|
7 |
+
|
8 |
+
os.system("pip install --upgrade pip")
|
9 |
+
os.system("pip install googletrans-py")
|
10 |
+
os.system("pip install tensorflow==2.15.0")
|
11 |
+
os.system("pip install keras-nlp")
|
12 |
+
os.system("pip install -q --upgrade keras") # Upgrade Keras to version 3
|
13 |
+
|
14 |
+
import time
|
15 |
+
import keras
|
16 |
+
import keras_nlp
|
17 |
+
import pandas as pd
|
18 |
+
import gradio as gr
|
19 |
+
from googletrans import Translator
|
20 |
+
from importHuggingFaceHubModel import from_pretrained_keras
|
21 |
+
|
22 |
+
# Set Keras Backend to Tensorflow
|
23 |
+
os.environ["KERAS_BACKEND"] = "tensorflow"
|
24 |
+
|
25 |
+
# Load the fine-tuned model
|
26 |
+
#model = keras.models.load_model("LoRA_Model_V2.keras")
|
27 |
+
model = from_pretrained_keras('DracolIA/GPT-2-LoRA-HealthCare')
|
28 |
+
|
29 |
+
translator = Translator() # Create Translator Instance
|
30 |
+
|
31 |
+
# Function to generate responses from the model
|
32 |
+
def generate_responses(question):
|
33 |
+
language = translator.detect(question).lang.upper() # Verify the language of the prompt
|
34 |
+
if language != "EN":
|
35 |
+
question = translator.translate(question, src=language, dest="en").text # Translation of user text to english for the model
|
36 |
+
|
37 |
+
prompt = f"[QUESTION] {question} [ANSWER]"
|
38 |
+
# Generate the answer from the model and then clean and extract the real model's response from the prompt engineered string
|
39 |
+
output = clean_answer_text(model.generate(prompt, max_length=1024))
|
40 |
+
|
41 |
+
# Generate the answer from the model and then clean and extract the real model's response from the prompt engineered string
|
42 |
+
if language != "EN":
|
43 |
+
output = Translator().translate(output, src="en", dest=language).text # Translation of model's text to user's language
|
44 |
+
|
45 |
+
return output
|
46 |
+
|
47 |
+
# Function clean the output of the model from the prompt engineering done in the "generate_responses" function
|
48 |
+
def clean_answer_text(text: str) -> str:
|
49 |
+
# Define the start marker for the model's response
|
50 |
+
response_start = text.find("[ANSWER]") + len("[ANSWER]")
|
51 |
+
|
52 |
+
# Extract everything after "Doctor:"
|
53 |
+
response_text = text[response_start:].strip()
|
54 |
+
last_dot_index = response_text.rfind(".")
|
55 |
+
if last_dot_index != -1:
|
56 |
+
response_text = response_text[:last_dot_index + 1]
|
57 |
+
|
58 |
+
# Additional cleaning if necessary (e.g., removing leading/trailing spaces or new lines)
|
59 |
+
response_text = response_text.strip()
|
60 |
+
|
61 |
+
return response_text
|
62 |
+
|
63 |
+
|
64 |
+
# Define a Gradio interface
|
65 |
+
def chat_interface(question, history_df):
|
66 |
+
response = generate_responses(question)
|
67 |
+
# Insert the new question and response at the beginning of the DataFrame
|
68 |
+
history_df = pd.concat([pd.DataFrame({"Question": [question], "Réponse": [response]}), history_df], ignore_index=True)
|
69 |
+
return response, history_df
|
70 |
+
|
71 |
+
with gr.Blocks() as demo:
|
72 |
+
gr.HTML("""
|
73 |
+
<div style='width: 100%; height: 200px; background: url("https://github.com/BastienHot/SAE-GPT2/raw/70fb88500a2cc168d71e8ed635fc54492beb6241/image/logo.png") no-repeat center center; background-size: contain;'>
|
74 |
+
<h1 style='text-align:center; width=100%'>DracolIA - AI Question Answering for Healthcare</h1>
|
75 |
+
</div>
|
76 |
+
""")
|
77 |
+
with gr.Row():
|
78 |
+
question = gr.Textbox(label="Votre Question", placeholder="Saisissez ici...")
|
79 |
+
submit_btn = gr.Button("Envoyer")
|
80 |
+
response = gr.Textbox(label="Réponse", interactive=False)
|
81 |
+
|
82 |
+
# Initialize an empty DataFrame to keep track of question-answer history
|
83 |
+
history_display = gr.Dataframe(headers=["Question", "Réponse"], values=[], interactive=False)
|
84 |
+
|
85 |
+
submit_btn.click(fn=chat_interface, inputs=[question, history_display], outputs=[response, history_display])
|
86 |
+
|
87 |
+
if __name__ == "__main__":
|
88 |
+
demo.launch()
|
importHuggingFaceHubModel.py
ADDED
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Author : ZHAN Pascal
|
2 |
+
# Date 09/03/2025
|
3 |
+
# Project: SAE-GPT2 | BUT 3 Informatique - Semester 5
|
4 |
+
|
5 |
+
"""
|
6 |
+
https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/keras_mixin.py#L397
|
7 |
+
It seems the function 'from_pretrained_keras' from Hugging Face's 'huggingface_hub' is not working.
|
8 |
+
Let's rewrite the code to fix it locally.
|
9 |
+
|
10 |
+
To load the model, it's using 'tf.keras.models.load_model', but it's providing a folder instead of the path to the model file
|
11 |
+
So, we'll search for the first file with the .keras extension in the folder. If None is found then it will raise an error.
|
12 |
+
"""
|
13 |
+
|
14 |
+
from huggingface_hub import ModelHubMixin, snapshot_download
|
15 |
+
import os
|
16 |
+
from huggingface_hub.utils import (
|
17 |
+
get_tf_version,
|
18 |
+
is_tf_available,
|
19 |
+
)
|
20 |
+
|
21 |
+
def from_pretrained_keras(*args, **kwargs) -> "KerasModelHubMixin":
|
22 |
+
r"""
|
23 |
+
Instantiate a pretrained Keras model from a pre-trained model from the Hub.
|
24 |
+
The model is expected to be in `SavedModel` format.
|
25 |
+
Args:
|
26 |
+
pretrained_model_name_or_path (`str` or `os.PathLike`):
|
27 |
+
Can be either:
|
28 |
+
- A string, the `model id` of a pretrained model hosted inside a
|
29 |
+
model repo on huggingface.co. Valid model ids can be located
|
30 |
+
at the root-level, like `bert-base-uncased`, or namespaced
|
31 |
+
under a user or organization name, like
|
32 |
+
`dbmdz/bert-base-german-cased`.
|
33 |
+
- You can add `revision` by appending `@` at the end of model_id
|
34 |
+
simply like this: `dbmdz/bert-base-german-cased@main` Revision
|
35 |
+
is the specific model version to use. It can be a branch name,
|
36 |
+
a tag name, or a commit id, since we use a git-based system
|
37 |
+
for storing models and other artifacts on huggingface.co, so
|
38 |
+
`revision` can be any identifier allowed by git.
|
39 |
+
- A path to a `directory` containing model weights saved using
|
40 |
+
[`~transformers.PreTrainedModel.save_pretrained`], e.g.,
|
41 |
+
`./my_model_directory/`.
|
42 |
+
- `None` if you are both providing the configuration and state
|
43 |
+
dictionary (resp. with keyword arguments `config` and
|
44 |
+
`state_dict`).
|
45 |
+
force_download (`bool`, *optional*, defaults to `False`):
|
46 |
+
Whether to force the (re-)download of the model weights and
|
47 |
+
configuration files, overriding the cached versions if they exist.
|
48 |
+
resume_download (`bool`, *optional*, defaults to `False`):
|
49 |
+
Whether to delete incompletely received files. Will attempt to
|
50 |
+
resume the download if such a file exists.
|
51 |
+
proxies (`Dict[str, str]`, *optional*):
|
52 |
+
A dictionary of proxy servers to use by protocol or endpoint, e.g.,
|
53 |
+
`{'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}`. The
|
54 |
+
proxies are used on each request.
|
55 |
+
token (`str` or `bool`, *optional*):
|
56 |
+
The token to use as HTTP bearer authorization for remote files. If
|
57 |
+
`True`, will use the token generated when running `transformers-cli
|
58 |
+
login` (stored in `~/.huggingface`).
|
59 |
+
cache_dir (`Union[str, os.PathLike]`, *optional*):
|
60 |
+
Path to a directory in which a downloaded pretrained model
|
61 |
+
configuration should be cached if the standard cache should not be
|
62 |
+
used.
|
63 |
+
local_files_only(`bool`, *optional*, defaults to `False`):
|
64 |
+
Whether to only look at local files (i.e., do not try to download
|
65 |
+
the model).
|
66 |
+
model_kwargs (`Dict`, *optional*):
|
67 |
+
model_kwargs will be passed to the model during initialization
|
68 |
+
<Tip>
|
69 |
+
Passing `token=True` is required when you want to use a private
|
70 |
+
model.
|
71 |
+
</Tip>
|
72 |
+
"""
|
73 |
+
return KerasModelHubMixin.from_pretrained(*args, **kwargs)
|
74 |
+
|
75 |
+
|
76 |
+
class KerasModelHubMixin(ModelHubMixin):
|
77 |
+
"""
|
78 |
+
Implementation of [`ModelHubMixin`] to provide model Hub upload/download
|
79 |
+
capabilities to Keras models.
|
80 |
+
```python
|
81 |
+
>>> import tensorflow as tf
|
82 |
+
>>> from huggingface_hub import KerasModelHubMixin
|
83 |
+
>>> class MyModel(tf.keras.Model, KerasModelHubMixin):
|
84 |
+
... def __init__(self, **kwargs):
|
85 |
+
... super().__init__()
|
86 |
+
... self.config = kwargs.pop("config", None)
|
87 |
+
... self.dummy_inputs = ...
|
88 |
+
... self.layer = ...
|
89 |
+
... def call(self, *args):
|
90 |
+
... return ...
|
91 |
+
>>> # Initialize and compile the model as you normally would
|
92 |
+
>>> model = MyModel()
|
93 |
+
>>> model.compile(...)
|
94 |
+
>>> # Build the graph by training it or passing dummy inputs
|
95 |
+
>>> _ = model(model.dummy_inputs)
|
96 |
+
>>> # Save model weights to local directory
|
97 |
+
>>> model.save_pretrained("my-awesome-model")
|
98 |
+
>>> # Push model weights to the Hub
|
99 |
+
>>> model.push_to_hub("my-awesome-model")
|
100 |
+
>>> # Download and initialize weights from the Hub
|
101 |
+
>>> model = MyModel.from_pretrained("username/super-cool-model")
|
102 |
+
```
|
103 |
+
"""
|
104 |
+
|
105 |
+
@classmethod
|
106 |
+
def _from_pretrained(
|
107 |
+
cls,
|
108 |
+
model_id,
|
109 |
+
revision,
|
110 |
+
cache_dir,
|
111 |
+
force_download,
|
112 |
+
proxies,
|
113 |
+
resume_download,
|
114 |
+
local_files_only,
|
115 |
+
token,
|
116 |
+
**model_kwargs,
|
117 |
+
):
|
118 |
+
"""Here we just call [`from_pretrained_keras`] function so both the mixin and
|
119 |
+
functional APIs stay in sync.
|
120 |
+
TODO - Some args above aren't used since we are calling
|
121 |
+
snapshot_download instead of hf_hub_download.
|
122 |
+
"""
|
123 |
+
if is_tf_available():
|
124 |
+
import tensorflow as tf
|
125 |
+
else:
|
126 |
+
raise ImportError("Called a TensorFlow-specific function but could not import it.")
|
127 |
+
|
128 |
+
# TODO - Figure out what to do about these config values. Config is not going to be needed to load model
|
129 |
+
cfg = model_kwargs.pop("config", None)
|
130 |
+
|
131 |
+
# Root is either a local filepath matching model_id or a cached snapshot
|
132 |
+
if not os.path.isdir(model_id):
|
133 |
+
storage_folder = snapshot_download(
|
134 |
+
repo_id=model_id,
|
135 |
+
revision=revision,
|
136 |
+
cache_dir=cache_dir,
|
137 |
+
library_name="keras",
|
138 |
+
library_version=get_tf_version(),
|
139 |
+
)
|
140 |
+
else:
|
141 |
+
storage_folder = model_id
|
142 |
+
|
143 |
+
files = os.listdir(storage_folder)
|
144 |
+
modelFileName = None
|
145 |
+
nbModel = 0
|
146 |
+
for file in files :
|
147 |
+
if file.endswith(".keras"):
|
148 |
+
modelFileName = file
|
149 |
+
nbModel +=1
|
150 |
+
|
151 |
+
if modelFileName==None:
|
152 |
+
raise ValueError("Repository does not have model that ends with .keras!!!")
|
153 |
+
|
154 |
+
if nbModel > 1:
|
155 |
+
raise ValueError("Too many models!!!")
|
156 |
+
|
157 |
+
modelPath = storage_folder + '/' + modelFileName
|
158 |
+
|
159 |
+
model = tf.keras.models.load_model(modelPath, **model_kwargs)
|
160 |
+
|
161 |
+
# For now, we add a new attribute, config, to store the config loaded from the hub/a local dir.
|
162 |
+
model.config = cfg
|
163 |
+
|
164 |
+
return model
|
requirements.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
gradio
|