AdamNovotnyCom commited on
Commit
f0a60ae
1 Parent(s): 102f247

multiple models

Browse files
Files changed (4) hide show
  1. Dockerfile +2 -0
  2. Dockerfile_dev +2 -0
  3. app.py +46 -49
  4. docker-compose.yml +1 -0
Dockerfile CHANGED
@@ -20,6 +20,8 @@ RUN pip install -r requirements.txt
20
 
21
  EXPOSE 7860
22
 
 
 
23
  RUN --mount=type=secret,id=HF_TOKEN,mode=0444,required=true
24
 
25
  CMD ["python", "app.py"]
 
20
 
21
  EXPOSE 7860
22
 
23
+ ENV MODEL=llama
24
+
25
  RUN --mount=type=secret,id=HF_TOKEN,mode=0444,required=true
26
 
27
  CMD ["python", "app.py"]
Dockerfile_dev CHANGED
@@ -20,5 +20,7 @@ RUN pip install -r requirements.txt
20
 
21
  EXPOSE 7860
22
 
 
 
23
  # with reload
24
  CMD ["gradio", "app.py"]
 
20
 
21
  EXPOSE 7860
22
 
23
+ ENV MODEL=googleflan
24
+
25
  # with reload
26
  CMD ["gradio", "app.py"]
app.py CHANGED
@@ -7,64 +7,61 @@ from transformers import AutoTokenizer
7
 
8
  logging.basicConfig(level=logging.INFO)
9
 
10
- print("APP startup")
11
-
12
- # pipe_flan = transformers.pipeline("text2text-generation", model="google/flan-t5-small")
13
- # def google_flan(input_text, request: gr.Request):
14
- # print(f"Input request: {input_text}")
15
- # print(request.query_params)
16
- # print(os.environ.get("HF_TOKEN")[:5])
17
- # logging.info(os.environ.get("HF_TOKEN")[:5])
18
-
19
- # # with open("/home/user/app/.secrets/HF_TOKEN", "r") as f:
20
- # # var = f.readlines()[0].strip()
21
- # # print(var[:5])
22
- # # logging.info(var[:5])
23
- # return pipe_flan(input_text)
24
-
25
- model = "meta-llama/Llama-2-7b-chat-hf"
26
- tokenizer = AutoTokenizer.from_pretrained(
27
- model,
28
- token=os.environ.get("HF_TOKEN"),
29
- )
30
- pipeline = transformers.pipeline(
31
- "text-generation",
32
- model=model,
33
- torch_dtype=torch.float16,
34
- device_map="auto",
35
- token=os.environ.get("HF_TOKEN"),
36
- low_cpu_mem_usage=True,
37
- )
38
-
39
- def llama2(input_text, request: gr.Request):
40
- sequences = pipeline(
41
- input_text,
42
- do_sample=True,
43
- top_k=10,
44
- num_return_sequences=1,
45
- eos_token_id=tokenizer.eos_token_id,
46
- max_length=200,
47
  )
48
- if "name" in list(request.query_params):
49
- output_text = f"{request.query_params['name']}:\n"
50
- else:
51
- output_text = ""
52
- for seq in sequences:
53
- output_text += seq["generated_text"] + "\n"
54
- return output_text
 
 
 
 
 
 
 
 
 
 
55
 
56
  demo = gr.Interface(
57
- # fn=google_flan,
58
- fn=llama2,
59
- inputs="text",
60
- # outputs="text",
 
 
61
  outputs=gr.Textbox(
62
  label="LLM",
63
  lines=5,
64
  value="",
65
  ),
66
  allow_flagging=False,
67
- title="How can I help?",
68
  theme=gr.themes.Default(primary_hue="blue", secondary_hue="pink")
69
  )
70
 
 
7
 
8
  logging.basicConfig(level=logging.INFO)
9
 
10
+ if "googleflan" == os.environ.get("MODEL"):
11
+ model = "google/flan-t5-small"
12
+ logging.info(f"APP startup. Model {model}")
13
+ pipe_flan = transformers.pipeline("text2text-generation", model=model)
14
+ def model_func(input_text, request: gr.Request):
15
+ print(f"Input request: {input_text}")
16
+ print(request.query_params)
17
+ print(os.environ.get("HF_TOKEN")[:5])
18
+ logging.info(os.environ.get("HF_TOKEN")[:5])
19
+ return pipe_flan(input_text)
20
+ elif "llama" == os.environ.get("MODEL"):
21
+ model = "meta-llama/Llama-2-7b-chat-hf"
22
+ logging.info(f"APP startup. Model {model}")
23
+ tokenizer = AutoTokenizer.from_pretrained(
24
+ model,
25
+ token=os.environ.get("HF_TOKEN"),
26
+ )
27
+ pipeline = transformers.pipeline(
28
+ "text-generation",
29
+ model=model,
30
+ torch_dtype=torch.float16,
31
+ device_map="auto",
32
+ token=os.environ.get("HF_TOKEN"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  )
34
+
35
+ def model_func(input_text, request: gr.Request):
36
+ sequences = pipeline(
37
+ input_text,
38
+ do_sample=True,
39
+ top_k=10,
40
+ num_return_sequences=1,
41
+ eos_token_id=tokenizer.eos_token_id,
42
+ max_length=200,
43
+ )
44
+ if "name" in list(request.query_params):
45
+ output_text = f"{request.query_params['name']}:\n"
46
+ else:
47
+ output_text = ""
48
+ for seq in sequences:
49
+ output_text += seq["generated_text"] + "\n"
50
+ return output_text
51
 
52
  demo = gr.Interface(
53
+ fn=model_func,
54
+ inputs=gr.Textbox(
55
+ label="How can I help?",
56
+ lines=3,
57
+ value="",
58
+ ),
59
  outputs=gr.Textbox(
60
  label="LLM",
61
  lines=5,
62
  value="",
63
  ),
64
  allow_flagging=False,
 
65
  theme=gr.themes.Default(primary_hue="blue", secondary_hue="pink")
66
  )
67
 
docker-compose.yml CHANGED
@@ -11,6 +11,7 @@ services:
11
  working_dir: /home/user/app
12
  environment:
13
  - HF_TOKEN=${HF_TOKEN}
 
14
  stdin_open: true
15
  tty: true
16
  restart: always
 
11
  working_dir: /home/user/app
12
  environment:
13
  - HF_TOKEN=${HF_TOKEN}
14
+ - MODEL=googleflan
15
  stdin_open: true
16
  tty: true
17
  restart: always