AdamNovotnyCom
commited on
Commit
•
15a1b0b
1
Parent(s):
f0a60ae
multiple models
Browse files- Dockerfile +0 -2
- Dockerfile_dev +0 -2
- app.py +3 -8
Dockerfile
CHANGED
@@ -20,8 +20,6 @@ RUN pip install -r requirements.txt
|
|
20 |
|
21 |
EXPOSE 7860
|
22 |
|
23 |
-
ENV MODEL=llama
|
24 |
-
|
25 |
RUN --mount=type=secret,id=HF_TOKEN,mode=0444,required=true
|
26 |
|
27 |
CMD ["python", "app.py"]
|
|
|
20 |
|
21 |
EXPOSE 7860
|
22 |
|
|
|
|
|
23 |
RUN --mount=type=secret,id=HF_TOKEN,mode=0444,required=true
|
24 |
|
25 |
CMD ["python", "app.py"]
|
Dockerfile_dev
CHANGED
@@ -20,7 +20,5 @@ RUN pip install -r requirements.txt
|
|
20 |
|
21 |
EXPOSE 7860
|
22 |
|
23 |
-
ENV MODEL=googleflan
|
24 |
-
|
25 |
# with reload
|
26 |
CMD ["gradio", "app.py"]
|
|
|
20 |
|
21 |
EXPOSE 7860
|
22 |
|
|
|
|
|
23 |
# with reload
|
24 |
CMD ["gradio", "app.py"]
|
app.py
CHANGED
@@ -6,20 +6,15 @@ import transformers
|
|
6 |
from transformers import AutoTokenizer
|
7 |
|
8 |
logging.basicConfig(level=logging.INFO)
|
|
|
9 |
|
10 |
if "googleflan" == os.environ.get("MODEL"):
|
11 |
model = "google/flan-t5-small"
|
12 |
-
logging.info(f"APP startup. Model {model}")
|
13 |
pipe_flan = transformers.pipeline("text2text-generation", model=model)
|
14 |
def model_func(input_text, request: gr.Request):
|
15 |
-
print(f"Input request: {input_text}")
|
16 |
-
print(request.query_params)
|
17 |
-
print(os.environ.get("HF_TOKEN")[:5])
|
18 |
-
logging.info(os.environ.get("HF_TOKEN")[:5])
|
19 |
return pipe_flan(input_text)
|
20 |
elif "llama" == os.environ.get("MODEL"):
|
21 |
model = "meta-llama/Llama-2-7b-chat-hf"
|
22 |
-
logging.info(f"APP startup. Model {model}")
|
23 |
tokenizer = AutoTokenizer.from_pretrained(
|
24 |
model,
|
25 |
token=os.environ.get("HF_TOKEN"),
|
@@ -27,7 +22,7 @@ elif "llama" == os.environ.get("MODEL"):
|
|
27 |
pipeline = transformers.pipeline(
|
28 |
"text-generation",
|
29 |
model=model,
|
30 |
-
torch_dtype=torch.
|
31 |
device_map="auto",
|
32 |
token=os.environ.get("HF_TOKEN"),
|
33 |
)
|
@@ -57,7 +52,7 @@ demo = gr.Interface(
|
|
57 |
value="",
|
58 |
),
|
59 |
outputs=gr.Textbox(
|
60 |
-
label="
|
61 |
lines=5,
|
62 |
value="",
|
63 |
),
|
|
|
6 |
from transformers import AutoTokenizer
|
7 |
|
8 |
logging.basicConfig(level=logging.INFO)
|
9 |
+
logging.info(f"APP startup")
|
10 |
|
11 |
if "googleflan" == os.environ.get("MODEL"):
|
12 |
model = "google/flan-t5-small"
|
|
|
13 |
pipe_flan = transformers.pipeline("text2text-generation", model=model)
|
14 |
def model_func(input_text, request: gr.Request):
|
|
|
|
|
|
|
|
|
15 |
return pipe_flan(input_text)
|
16 |
elif "llama" == os.environ.get("MODEL"):
|
17 |
model = "meta-llama/Llama-2-7b-chat-hf"
|
|
|
18 |
tokenizer = AutoTokenizer.from_pretrained(
|
19 |
model,
|
20 |
token=os.environ.get("HF_TOKEN"),
|
|
|
22 |
pipeline = transformers.pipeline(
|
23 |
"text-generation",
|
24 |
model=model,
|
25 |
+
torch_dtype=torch.float32,
|
26 |
device_map="auto",
|
27 |
token=os.environ.get("HF_TOKEN"),
|
28 |
)
|
|
|
52 |
value="",
|
53 |
),
|
54 |
outputs=gr.Textbox(
|
55 |
+
label=f"Model: {os.environ.get('MODEL')}",
|
56 |
lines=5,
|
57 |
value="",
|
58 |
),
|