habdine commited on
Commit
1e4e98d
1 Parent(s): 73d2dca

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +25 -0
  2. README.md +7 -6
  3. app.py +125 -0
  4. style.css +11 -0
Dockerfile ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM ubuntu:22.04
2
+ FROM python:3.9
3
+
4
+ RUN useradd -m -u 1000 user
5
+ USER user
6
+ ENV PATH="/home/user/.local/bin:$PATH"
7
+
8
+ WORKDIR /app
9
+
10
+ RUN apt-get update
11
+ RUN apt-get install wget
12
+
13
+ RUN apt install -y git-all
14
+ RUN apt-get install -y dssp
15
+
16
+ RUN pip install --upgrade pip
17
+ RUN pip install git+https://github.com/a-r-j/graphein.git
18
+ RUN pip install numpy scipy torch==2.2 transformers==4.44.2
19
+ RUN pip install torch-geometric
20
+ RUN pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.2.0+cpu.html
21
+ RUN pip install gradio
22
+
23
+
24
+ COPY --chown=user . /app
25
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,12 +1,13 @@
1
  ---
2
  title: Prot2Text
3
- emoji: 🦀
4
- colorFrom: yellow
5
- colorTo: gray
6
- sdk: docker
 
 
7
  pinned: false
8
- license: cc-by-nc-4.0
9
- short_description: Protein function prediction using its structure and sequence
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: Prot2Text
3
+ emoji: 🧬
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: gradio
7
+ sdk_version: 5.1.0
8
+ app_file: app.py
9
  pinned: false
10
+ short_description: Chatbot
 
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from threading import Thread
3
+ from typing import Iterator
4
+
5
+ import gradio as gr
6
+ import spaces
7
+ import torch
8
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
9
+
10
+ DESCRIPTION = """\
11
+ # Prot2Text Demo
12
+
13
+ A demo to generate a protein's funtion with its amino acid sequence and its structure using [Prot2Text Base v1.1](https://huggingface.co/habdine/Prot2Text-Base-v1-1). To test this model, only enter below, the AlphaFoldDB ID of the protein.
14
+ """
15
+
16
+ MAX_MAX_NEW_TOKENS = 256
17
+ DEFAULT_MAX_NEW_TOKENS = 100
18
+
19
+
20
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
21
+
22
+
23
+ tokenizer = AutoTokenizer.from_pretrained('habdine/Prot2Text-Base-v1-1',
24
+ trust_remote_code=True)
25
+ model = AutoModelForCausalLM.from_pretrained('habdine/Prot2Text-Base-v1-1',
26
+ trust_remote_code=True).to(device)
27
+ model.eval()
28
+
29
+
30
+ @spaces.GPU(duration=90)
31
+ def generate(
32
+ message: str,
33
+ chat_history: list[dict],
34
+ max_new_tokens: int = 1024,
35
+ do_sample: bool = False,
36
+ temperature: float = 0.6,
37
+ top_p: float = 0.9,
38
+ top_k: int = 50,
39
+ repetition_penalty: float = 1.2,
40
+ ) -> Iterator[str]:
41
+
42
+
43
+ streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
44
+ generate_kwargs = dict(
45
+ protein_sequence=message,
46
+ tokenizer=tokenizer,
47
+ device=device,
48
+ streamer=streamer,
49
+ max_new_tokens=max_new_tokens,
50
+ do_sample=do_sample,
51
+ top_p=top_p,
52
+ top_k=top_k,
53
+ temperature=temperature,
54
+ num_beams=1,
55
+ repetition_penalty=repetition_penalty,
56
+ )
57
+ t = Thread(target=model.generate_protein_description, kwargs=generate_kwargs)
58
+ t.start()
59
+
60
+ outputs = []
61
+ for text in streamer:
62
+ outputs.append(text)
63
+ yield "".join(outputs)
64
+
65
+
66
+ chat_interface = gr.ChatInterface(
67
+ fn=generate,
68
+ additional_inputs=[
69
+ gr.Slider(
70
+ label="Max new tokens",
71
+ minimum=1,
72
+ maximum=MAX_MAX_NEW_TOKENS,
73
+ step=1,
74
+ value=DEFAULT_MAX_NEW_TOKENS,
75
+ ),
76
+ gr.Checkbox(label="Do Sample"),
77
+ gr.Slider(
78
+ label="Temperature",
79
+ minimum=0.1,
80
+ maximum=4.0,
81
+ step=0.1,
82
+ value=0.6,
83
+ ),
84
+ gr.Slider(
85
+ label="Top-p (nucleus sampling)",
86
+ minimum=0.05,
87
+ maximum=1.0,
88
+ step=0.05,
89
+ value=0.9,
90
+ ),
91
+ gr.Slider(
92
+ label="Top-k",
93
+ minimum=1,
94
+ maximum=1000,
95
+ step=1,
96
+ value=50,
97
+ ),
98
+ gr.Slider(
99
+ label="Repetition penalty",
100
+ minimum=1.0,
101
+ maximum=2.0,
102
+ step=0.05,
103
+ value=1.0,
104
+ ),
105
+ ],
106
+ stop_btn=None,
107
+ examples=[
108
+ ['P0A0V1'],
109
+ ["Q10MK9"],
110
+ ["A0A0P0W604"],
111
+ ["Q6K5W5"],
112
+ ["Q65WY8"]
113
+
114
+ ],
115
+ cache_examples=False,
116
+ type="messages",
117
+ )
118
+
119
+ with gr.Blocks(css_paths="style.css", fill_height=True) as demo:
120
+ gr.Markdown(DESCRIPTION)
121
+ gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
122
+ chat_interface.render()
123
+
124
+ if __name__ == "__main__":
125
+ demo.queue(max_size=20).launch()
style.css ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ h1 {
2
+ text-align: center;
3
+ display: block;
4
+ }
5
+
6
+ #duplicate-button {
7
+ margin: auto;
8
+ color: #fff;
9
+ background: #1565c0;
10
+ border-radius: 100vh;
11
+ }