Spaces:
Running
Running
Commit
·
446ee68
1
Parent(s):
055887b
Add app
Browse files- Dockerfile +28 -0
- LICENSE +21 -0
- README.md +1 -0
- app.py +74 -0
- requirements.txt +3 -0
Dockerfile
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.11
|
2 |
+
|
3 |
+
# Set up a new user named "user" with user ID 1000
|
4 |
+
RUN useradd -m -u 1000 user
|
5 |
+
|
6 |
+
# Switch to the "user" user
|
7 |
+
USER user
|
8 |
+
|
9 |
+
# Set home to the user's home directory
|
10 |
+
ENV HOME=/home/user \
|
11 |
+
PATH=/home/user/.local/bin:$PATH
|
12 |
+
|
13 |
+
# Set the working directory to the user's home directory
|
14 |
+
WORKDIR $HOME/app
|
15 |
+
|
16 |
+
# Try and run pip command after setting the user with `USER user` to avoid permission issues with Python
|
17 |
+
RUN pip install --no-cache-dir --upgrade pip
|
18 |
+
|
19 |
+
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
20 |
+
COPY --chown=user . $HOME/app
|
21 |
+
|
22 |
+
COPY --chown=user requirements.txt .
|
23 |
+
|
24 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
25 |
+
|
26 |
+
COPY --chown=user app.py .
|
27 |
+
|
28 |
+
ENTRYPOINT ["solara", "run", "app.py", "--host=0.0.0.0", "--port", "7860"]
|
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2023 Alonso Silva Allende
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
README.md
CHANGED
@@ -6,6 +6,7 @@ colorTo: pink
|
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
license: mit
|
|
|
9 |
---
|
10 |
|
11 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
license: mit
|
9 |
+
app_port: 7860
|
10 |
---
|
11 |
|
12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import pandas as pd
|
3 |
+
import random
|
4 |
+
import solara
|
5 |
+
import torch
|
6 |
+
import torch.nn.functional as F
|
7 |
+
|
8 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
9 |
+
|
10 |
+
tokenizer = AutoTokenizer.from_pretrained('gpt2', padding_side='left')
|
11 |
+
model = AutoModelForCausalLM.from_pretrained('gpt2')
|
12 |
+
|
13 |
+
text1 = solara.reactive("""One, two, three, four, mango""")
|
14 |
+
@solara.component
|
15 |
+
def Page():
|
16 |
+
with solara.Column(margin="10"):
|
17 |
+
solara.Markdown("#Perplexity")
|
18 |
+
solara.Markdown("This is an educational tool. For any given passage of text, this tool augments the original text with highlights and annotations that indicate how 'surprising' each token is to the model, as well as which other tokens the model deemed most likely to occur in its place.")
|
19 |
+
css = """
|
20 |
+
.mystronggreen{
|
21 |
+
background-color:#99ff99;
|
22 |
+
color:black!important;
|
23 |
+
padding:0px;
|
24 |
+
}
|
25 |
+
.mygreen{
|
26 |
+
background-color:#ccffcc;
|
27 |
+
color:black!important;
|
28 |
+
}
|
29 |
+
.myyellow{
|
30 |
+
background-color: #ffff99;
|
31 |
+
color:black!important;
|
32 |
+
}
|
33 |
+
.myorange{
|
34 |
+
background-color: #ffcc99;
|
35 |
+
color:black!important;
|
36 |
+
}
|
37 |
+
.myred{
|
38 |
+
background-color:#ffcab0;
|
39 |
+
color:black!important;
|
40 |
+
}
|
41 |
+
"""
|
42 |
+
solara.InputText("Enter text and press enter when you're done:", value=text1, continuous_update=False)
|
43 |
+
if text1.value != "":
|
44 |
+
with solara.VBox():
|
45 |
+
with solara.HBox(align_items="stretch"):
|
46 |
+
tokens = tokenizer.encode(text1.value, return_tensors="pt")
|
47 |
+
tokens = torch.cat((torch.tensor([tokenizer.eos_token_id]), tokens[0])).reshape(1,-1)
|
48 |
+
for i in np.arange(0,len(tokens[0])-1):
|
49 |
+
outputs = model.generate(tokens[0][:i+1].reshape(1,-1), max_new_tokens=1, output_scores=True, return_dict_in_generate=True, pad_token_id=tokenizer.eos_token_id)
|
50 |
+
scores = F.softmax(outputs.scores[0], dim=-1)
|
51 |
+
top_10 = torch.topk(scores, 10)
|
52 |
+
df = pd.DataFrame()
|
53 |
+
a = scores[0][tokens[0][i+1]]
|
54 |
+
b = top_10.values
|
55 |
+
df["probs"] = list(np.concatenate([a.reshape(-1,1).numpy()[0], b[0].numpy()]))
|
56 |
+
diff = 100*(df["probs"].iloc[0]-df["probs"].iloc[1])
|
57 |
+
if np.abs(diff)<1:
|
58 |
+
color = "mystronggreen"
|
59 |
+
elif np.abs(diff)<10:
|
60 |
+
color = "mygreen"
|
61 |
+
elif np.abs(diff)<20:
|
62 |
+
color = "myorange"
|
63 |
+
elif np.abs(diff)<30:
|
64 |
+
color = "myyellow"
|
65 |
+
else:
|
66 |
+
color = "myred"
|
67 |
+
df["probs"] = [f"{value:.2%}" for value in df["probs"].values]
|
68 |
+
aux = [tokenizer.decode(tokens[0][i+1])] + [tokenizer.decode(top_10.indices[0][i]) for i in range(10)]
|
69 |
+
df["predicted next token"] = aux
|
70 |
+
solara_df = solara.DataFrame(df, items_per_page=11)
|
71 |
+
with solara.Tooltip(solara_df, color="white"):
|
72 |
+
solara.Style(css)
|
73 |
+
solara.Text(f"{tokenizer.decode(tokens[0][i+1])}|", classes=[f"{color}"])
|
74 |
+
Page()
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
solara
|
2 |
+
pandas
|
3 |
+
transformers[torch]
|