sergiomar73
commited on
Commit
•
7cec07d
1
Parent(s):
e63d545
Update app.py
Browse files
app.py
CHANGED
@@ -1,154 +1,51 @@
|
|
1 |
-
# https://huggingface.co/tasks/token-classification
|
2 |
-
# https://huggingface.co/spacy/en_core_web_sm
|
3 |
-
# pip install https://huggingface.co/spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.whl
|
4 |
-
|
5 |
import gradio as gr
|
6 |
import os
|
7 |
-
import time
|
8 |
import openai
|
9 |
-
import numpy as np
|
10 |
-
import pandas as pd
|
11 |
-
import spacy
|
12 |
-
import en_core_web_sm
|
13 |
-
import plotly.express as px
|
14 |
|
15 |
openai.organization = os.environ.get('ORGANIZATION')
|
16 |
openai.api_key = os.environ.get('API_KEY')
|
17 |
|
18 |
-
|
19 |
-
|
20 |
-
# The following text inside one of this categories: Entertainment, Business, Politics
|
21 |
-
# This dull recreation of the animated film doesn’t strive for anything more than what was contained in the original version of this film and actually delivers less.
|
22 |
-
# Category: Entertainment
|
23 |
|
24 |
-
|
25 |
-
doc = nlp(text)
|
26 |
-
sentences = [ sentence.text for sentence in list(doc.sents) ]
|
27 |
-
# print(sentences[:3])
|
28 |
-
return sentences
|
29 |
-
|
30 |
-
def calculate_embeddings_with_gpt3(text, engine="text-similarity-davinci-001", interval = 1.5, verbose=True):
|
31 |
-
if verbose:
|
32 |
-
print(f'Calculating embedding for {text}...')
|
33 |
-
time.sleep(interval)
|
34 |
-
response = openai.Embedding.create(
|
35 |
-
input=text,
|
36 |
-
engine=engine
|
37 |
-
)
|
38 |
-
embedding = response['data'][0]['embedding']
|
39 |
-
return embedding
|
40 |
-
|
41 |
-
def gpt3_zero_shot_classification(text, labels):
|
42 |
-
|
43 |
-
df_sentences = pd.DataFrame(columns=['line', 'sentence', 'embedding'])
|
44 |
-
for idx, sentence in enumerate(text_to_sentences(text)):
|
45 |
-
embedding = calculate_embeddings_with_gpt3(sentence)
|
46 |
-
# Create new row
|
47 |
-
new_row = {
|
48 |
-
'line': idx + 1,
|
49 |
-
'sentence': sentence,
|
50 |
-
'embedding': embedding
|
51 |
-
}
|
52 |
-
df_sentences = df_sentences.append(new_row, ignore_index=True)
|
53 |
-
# print(df_sentences.shape)
|
54 |
-
# df_sentences.head()
|
55 |
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
source = np.array(row["embedding"])
|
66 |
-
cosine = np.dot(targets,source)/(np.linalg.norm(targets, axis=1)*np.linalg.norm(source))
|
67 |
-
# Create new row
|
68 |
-
new_row = dict([(f"Cosine{f'{key:02}'}", value) for key, value in enumerate(cosine.flatten(), 1)])
|
69 |
-
new_row["line"] = row["line"]
|
70 |
-
df_cosines = df_cosines.append(new_row, ignore_index=True)
|
71 |
-
|
72 |
-
df_cosines['line'] = df_cosines['line'].astype('int')
|
73 |
-
# print(df_cosines.shape)
|
74 |
-
# df_cosines.head(3)
|
75 |
-
|
76 |
-
df_comparison = df_cosines #[(df_cosines.filter(regex='Cosine') > threshold).any(axis=1)]
|
77 |
-
# print(df_comparison.shape)
|
78 |
-
# df_comparison.head(3)
|
79 |
-
|
80 |
-
threshold = threshold / 100
|
81 |
-
|
82 |
-
df_results = pd.DataFrame(columns=['line', 'sentence', 'phrase', 'category', 'tag', 'similarity'])
|
83 |
-
|
84 |
-
for i, row in df_comparison.iterrows():
|
85 |
-
for n in range(1,64+1):
|
86 |
-
col = f"Cosine{f'{n:02}'}"
|
87 |
-
# if row[col] > threshold:
|
88 |
-
phrase = df_phrases.loc[[ n - 1 ]]
|
89 |
-
new_row = {
|
90 |
-
'line': row["line"],
|
91 |
-
'sentence': df_sentences.at[int(row["line"])-1,"sentence"],
|
92 |
-
'phrase': df_phrases.at[n-1,"example"],
|
93 |
-
'category': df_phrases.at[n-1,"category"],
|
94 |
-
'tag': df_phrases.at[n-1,"label"],
|
95 |
-
'similarity': row[col]
|
96 |
-
}
|
97 |
-
df_results = df_results.append(new_row, ignore_index=True)
|
98 |
-
|
99 |
-
df_results['line'] = df_cosines['line'].astype('int')
|
100 |
-
# print(df_results.shape)
|
101 |
-
# df_results.head(3)
|
102 |
-
|
103 |
-
df_summary = df_results.groupby(['tag'])['similarity'].agg('max').to_frame()
|
104 |
-
df_summary['ok'] = np.where(df_summary['similarity'] > threshold, True, False)
|
105 |
-
# df_summary
|
106 |
-
|
107 |
-
fig = px.bar(
|
108 |
-
df_summary,
|
109 |
-
y='similarity',
|
110 |
-
color='ok',
|
111 |
-
color_discrete_map={ True: px.colors.qualitative.Plotly[2], False: px.colors.qualitative.Set2[7] },
|
112 |
-
text='similarity',
|
113 |
-
text_auto='.3f',
|
114 |
-
labels={'tag': 'Category', 'similarity': 'Similarity'},
|
115 |
-
title = f"{text[:200]}..."
|
116 |
-
)
|
117 |
-
fig.add_shape( # add a horizontal "target" line
|
118 |
-
type="line", line_color="salmon", line_width=3, opacity=1, line_dash="dot",
|
119 |
-
x0=0, x1=1, xref="paper", y0=threshold, y1=threshold, yref="y"
|
120 |
)
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
return res, fig, details
|
130 |
|
131 |
# Gradio UI
|
132 |
|
133 |
with gr.Blocks(css=".gradio-container { background-color: white; }") as demo:
|
134 |
gr.Markdown(f"# GPT-3 Zero shot classification app")
|
135 |
with gr.Row():
|
136 |
-
context = gr.Textbox(lines=3, label="Context", placeholder="Context
|
137 |
with gr.Row():
|
138 |
-
|
139 |
btn = gr.Button(value="Analyze!", variant="primary")
|
140 |
with gr.Row():
|
141 |
-
|
142 |
-
|
143 |
-
with gr.Row():
|
144 |
-
grid = gr.Dataframe(wrap=True)
|
145 |
-
btn.click(fn=gpt3_zero_shot_classification, inputs=[context,threshold], outputs=[label,plot,grid])
|
146 |
gr.Examples(
|
147 |
[
|
148 |
-
[ "", "Entertainment, Business, Politics" ],
|
149 |
-
[ "", "Entertainment, Business, Politics" ],
|
150 |
-
[ "", "Entertainment, Business, Politics" ],
|
151 |
-
[ "", "Entertainment, Business, Politics" ]
|
152 |
],
|
153 |
[context, threshold],
|
154 |
fn=gpt3_zero_shot_classification
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import os
|
|
|
3 |
import openai
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
openai.organization = os.environ.get('ORGANIZATION')
|
6 |
openai.api_key = os.environ.get('API_KEY')
|
7 |
|
8 |
+
def classificate_with_gpt3(text, labels, engine="text-similarity-davinci-001"):
|
9 |
+
prompt = f"""The following text inside one of this categories: {labels}:
|
|
|
|
|
|
|
10 |
|
11 |
+
{text}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
+
Category:"""
|
14 |
+
response = openai.Completion.create(
|
15 |
+
model=engine
|
16 |
+
prompt=prompt,
|
17 |
+
temperature=0.7,
|
18 |
+
max_tokens=1000,
|
19 |
+
top_p=1,
|
20 |
+
frequency_penalty=0,
|
21 |
+
presence_penalty=0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
)
|
23 |
+
completion = response['data'][0]
|
24 |
+
print(completion)
|
25 |
+
return completion
|
26 |
+
|
27 |
+
def gpt3_zero_shot_classification(text, labels):
|
28 |
+
completion = classificate_with_gpt3(text, labels)
|
29 |
+
return completion
|
|
|
|
|
30 |
|
31 |
# Gradio UI
|
32 |
|
33 |
with gr.Blocks(css=".gradio-container { background-color: white; }") as demo:
|
34 |
gr.Markdown(f"# GPT-3 Zero shot classification app")
|
35 |
with gr.Row():
|
36 |
+
context = gr.Textbox(lines=3, label="Context", placeholder="Context here...")
|
37 |
with gr.Row():
|
38 |
+
labels = gr.Textbox(lines=3, label="Labels (Comma separated)", placeholder="Labels here...")
|
39 |
btn = gr.Button(value="Analyze!", variant="primary")
|
40 |
with gr.Row():
|
41 |
+
out = gr.Textbox()
|
42 |
+
btn.click(fn=gpt3_zero_shot_classification, inputs=[context,labels], outputs=[out])
|
|
|
|
|
|
|
43 |
gr.Examples(
|
44 |
[
|
45 |
+
[ "On 22 February 2014, Ukrainian president Viktor Yanukovych was ousted from office as a result of the Euromaidan and the Revolution of Dignity, which broke out after his decision to reject the European Union–Ukraine Association Agreement and instead pursue closer ties with Russia and the Eurasian Economic Union. Shortly after Yanukovych's overthrow and exile to Russia, Ukraine's eastern and southern regions erupted with pro-Russia unrest.", "Entertainment, Business, Politics" ],
|
46 |
+
[ "This dull recreation of the animated film doesn’t strive for anything more than what was contained in the original version of this film and actually delivers less.", "Entertainment, Business, Politics" ],
|
47 |
+
[ "The third beta of iOS 16.1 that was released earlier this week expands the Adaptive Transparency feature introduced with the second-generation AirPods Pro to the original AirPods Pro.", "Entertainment, Business, Politics" ],
|
48 |
+
[ "The chefs even taught the users how to cook the meal through pictures, videos, and other WhatsApp features.", "Entertainment, Business, Politics" ]
|
49 |
],
|
50 |
[context, threshold],
|
51 |
fn=gpt3_zero_shot_classification
|