AdamNovotnyCom commited on
Commit
102f247
1 Parent(s): a2ac7df
Files changed (2) hide show
  1. Dockerfile +0 -1
  2. app.py +44 -38
Dockerfile CHANGED
@@ -20,7 +20,6 @@ RUN pip install -r requirements.txt
20
 
21
  EXPOSE 7860
22
 
23
- RUN mkdir -p /home/user/app/.secrets
24
  RUN --mount=type=secret,id=HF_TOKEN,mode=0444,required=true
25
 
26
  CMD ["python", "app.py"]
 
20
 
21
  EXPOSE 7860
22
 
 
23
  RUN --mount=type=secret,id=HF_TOKEN,mode=0444,required=true
24
 
25
  CMD ["python", "app.py"]
app.py CHANGED
@@ -9,49 +9,55 @@ logging.basicConfig(level=logging.INFO)
9
 
10
  print("APP startup")
11
 
12
- pipe_flan = transformers.pipeline("text2text-generation", model="google/flan-t5-small")
13
- def google_flan(input_text, request: gr.Request):
14
- print(f"Input request: {input_text}")
15
- print(request.query_params)
16
- print(os.environ.get("HF_TOKEN")[:5])
17
- logging.info(os.environ.get("HF_TOKEN")[:5])
18
- # with open("/home/user/app/.secrets/HF_TOKEN", "r") as f:
19
- # var = f.readlines()[0].strip()
20
- # print(var[:5])
21
- # logging.info(var[:5])
22
- return pipe_flan(input_text)
23
 
24
- # model = "meta-llama/Llama-2-7b-chat-hf"
25
- # tokenizer = AutoTokenizer.from_pretrained(
26
- # model,
27
- # token=os.environ.get("HF_TOKEN"),
28
- # )
29
- # pipeline = transformers.pipeline(
30
- # "text-generation",
31
- # model=model,
32
- # torch_dtype=torch.float16,
33
- # device_map="auto",
34
- # token=os.environ.get("HF_TOKEN"),
35
- # low_cpu_mem_usage=True,
36
- # )
37
 
38
- # def llama2(input_text):
39
- # sequences = pipeline(
40
- # input_text,
41
- # do_sample=True,
42
- # top_k=10,
43
- # num_return_sequences=1,
44
- # eos_token_id=tokenizer.eos_token_id,
45
- # max_length=200,
46
- # )
47
- # output_text = ""
48
- # for seq in sequences:
49
- # output_text += seq["generated_text"] + "\n"
50
- # return output_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
  demo = gr.Interface(
53
- fn=google_flan,
 
54
  inputs="text",
 
55
  outputs=gr.Textbox(
56
  label="LLM",
57
  lines=5,
 
9
 
10
  print("APP startup")
11
 
12
+ # pipe_flan = transformers.pipeline("text2text-generation", model="google/flan-t5-small")
13
+ # def google_flan(input_text, request: gr.Request):
14
+ # print(f"Input request: {input_text}")
15
+ # print(request.query_params)
16
+ # print(os.environ.get("HF_TOKEN")[:5])
17
+ # logging.info(os.environ.get("HF_TOKEN")[:5])
 
 
 
 
 
18
 
19
+ # # with open("/home/user/app/.secrets/HF_TOKEN", "r") as f:
20
+ # # var = f.readlines()[0].strip()
21
+ # # print(var[:5])
22
+ # # logging.info(var[:5])
23
+ # return pipe_flan(input_text)
 
 
 
 
 
 
 
 
24
 
25
+ model = "meta-llama/Llama-2-7b-chat-hf"
26
+ tokenizer = AutoTokenizer.from_pretrained(
27
+ model,
28
+ token=os.environ.get("HF_TOKEN"),
29
+ )
30
+ pipeline = transformers.pipeline(
31
+ "text-generation",
32
+ model=model,
33
+ torch_dtype=torch.float16,
34
+ device_map="auto",
35
+ token=os.environ.get("HF_TOKEN"),
36
+ low_cpu_mem_usage=True,
37
+ )
38
+
39
+ def llama2(input_text, request: gr.Request):
40
+ sequences = pipeline(
41
+ input_text,
42
+ do_sample=True,
43
+ top_k=10,
44
+ num_return_sequences=1,
45
+ eos_token_id=tokenizer.eos_token_id,
46
+ max_length=200,
47
+ )
48
+ if "name" in list(request.query_params):
49
+ output_text = f"{request.query_params['name']}:\n"
50
+ else:
51
+ output_text = ""
52
+ for seq in sequences:
53
+ output_text += seq["generated_text"] + "\n"
54
+ return output_text
55
 
56
  demo = gr.Interface(
57
+ # fn=google_flan,
58
+ fn=llama2,
59
  inputs="text",
60
+ # outputs="text",
61
  outputs=gr.Textbox(
62
  label="LLM",
63
  lines=5,