sergiomar73 commited on
Commit
7cec07d
1 Parent(s): e63d545

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -130
app.py CHANGED
@@ -1,154 +1,51 @@
1
- # https://huggingface.co/tasks/token-classification
2
- # https://huggingface.co/spacy/en_core_web_sm
3
- # pip install https://huggingface.co/spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.whl
4
-
5
  import gradio as gr
6
  import os
7
- import time
8
  import openai
9
- import numpy as np
10
- import pandas as pd
11
- import spacy
12
- import en_core_web_sm
13
- import plotly.express as px
14
 
15
  openai.organization = os.environ.get('ORGANIZATION')
16
  openai.api_key = os.environ.get('API_KEY')
17
 
18
- nlp = spacy.load("en_core_web_sm")
19
-
20
- # The following text inside one of this categories: Entertainment, Business, Politics
21
- # This dull recreation of the animated film doesn’t strive for anything more than what was contained in the original version of this film and actually delivers less.
22
- # Category: Entertainment
23
 
24
- def text_to_sentences(text):
25
- doc = nlp(text)
26
- sentences = [ sentence.text for sentence in list(doc.sents) ]
27
- # print(sentences[:3])
28
- return sentences
29
-
30
- def calculate_embeddings_with_gpt3(text, engine="text-similarity-davinci-001", interval = 1.5, verbose=True):
31
- if verbose:
32
- print(f'Calculating embedding for {text}...')
33
- time.sleep(interval)
34
- response = openai.Embedding.create(
35
- input=text,
36
- engine=engine
37
- )
38
- embedding = response['data'][0]['embedding']
39
- return embedding
40
-
41
- def gpt3_zero_shot_classification(text, labels):
42
-
43
- df_sentences = pd.DataFrame(columns=['line', 'sentence', 'embedding'])
44
- for idx, sentence in enumerate(text_to_sentences(text)):
45
- embedding = calculate_embeddings_with_gpt3(sentence)
46
- # Create new row
47
- new_row = {
48
- 'line': idx + 1,
49
- 'sentence': sentence,
50
- 'embedding': embedding
51
- }
52
- df_sentences = df_sentences.append(new_row, ignore_index=True)
53
- # print(df_sentences.shape)
54
- # df_sentences.head()
55
 
56
-
57
-
58
- targets = np.array([ np.array(value[0]) for value in df_phrases[["embedding"]].values ])
59
- # print(f"targets:{targets.shape}")
60
- df_cosines = pd.DataFrame(columns=['line'])
61
-
62
- for i, row in df_sentences.iterrows():
63
- line = f'{row["line"]:03}'
64
- # print(f'Calculating cosines for [ {line} ] {row["sentence"][:50]}...')
65
- source = np.array(row["embedding"])
66
- cosine = np.dot(targets,source)/(np.linalg.norm(targets, axis=1)*np.linalg.norm(source))
67
- # Create new row
68
- new_row = dict([(f"Cosine{f'{key:02}'}", value) for key, value in enumerate(cosine.flatten(), 1)])
69
- new_row["line"] = row["line"]
70
- df_cosines = df_cosines.append(new_row, ignore_index=True)
71
-
72
- df_cosines['line'] = df_cosines['line'].astype('int')
73
- # print(df_cosines.shape)
74
- # df_cosines.head(3)
75
-
76
- df_comparison = df_cosines #[(df_cosines.filter(regex='Cosine') > threshold).any(axis=1)]
77
- # print(df_comparison.shape)
78
- # df_comparison.head(3)
79
-
80
- threshold = threshold / 100
81
-
82
- df_results = pd.DataFrame(columns=['line', 'sentence', 'phrase', 'category', 'tag', 'similarity'])
83
-
84
- for i, row in df_comparison.iterrows():
85
- for n in range(1,64+1):
86
- col = f"Cosine{f'{n:02}'}"
87
- # if row[col] > threshold:
88
- phrase = df_phrases.loc[[ n - 1 ]]
89
- new_row = {
90
- 'line': row["line"],
91
- 'sentence': df_sentences.at[int(row["line"])-1,"sentence"],
92
- 'phrase': df_phrases.at[n-1,"example"],
93
- 'category': df_phrases.at[n-1,"category"],
94
- 'tag': df_phrases.at[n-1,"label"],
95
- 'similarity': row[col]
96
- }
97
- df_results = df_results.append(new_row, ignore_index=True)
98
-
99
- df_results['line'] = df_cosines['line'].astype('int')
100
- # print(df_results.shape)
101
- # df_results.head(3)
102
-
103
- df_summary = df_results.groupby(['tag'])['similarity'].agg('max').to_frame()
104
- df_summary['ok'] = np.where(df_summary['similarity'] > threshold, True, False)
105
- # df_summary
106
-
107
- fig = px.bar(
108
- df_summary,
109
- y='similarity',
110
- color='ok',
111
- color_discrete_map={ True: px.colors.qualitative.Plotly[2], False: px.colors.qualitative.Set2[7] },
112
- text='similarity',
113
- text_auto='.3f',
114
- labels={'tag': 'Category', 'similarity': 'Similarity'},
115
- title = f"{text[:200]}..."
116
- )
117
- fig.add_shape( # add a horizontal "target" line
118
- type="line", line_color="salmon", line_width=3, opacity=1, line_dash="dot",
119
- x0=0, x1=1, xref="paper", y0=threshold, y1=threshold, yref="y"
120
  )
121
- fig.update_traces(textfont_size=24, textangle=0, textposition="inside", cliponaxis=False)
122
- fig.update_yaxes(range=[0, 1])
123
- # fig.show()
124
-
125
- details = df_results.drop(labels='line',axis=1).sort_values(['tag','similarity'],ascending=[True,False]).groupby('tag').head(3).reset_index() .drop(labels='index',axis=1)
126
-
127
- res = df_summary['similarity'].to_dict()
128
-
129
- return res, fig, details
130
 
131
  # Gradio UI
132
 
133
  with gr.Blocks(css=".gradio-container { background-color: white; }") as demo:
134
  gr.Markdown(f"# GPT-3 Zero shot classification app")
135
  with gr.Row():
136
- context = gr.Textbox(lines=3, label="Context", placeholder="Context Here...")
137
  with gr.Row():
138
- threshold = gr.Slider(0, 100, 80)
139
  btn = gr.Button(value="Analyze!", variant="primary")
140
  with gr.Row():
141
- label = gr.Label()
142
- plot = gr.Plot()
143
- with gr.Row():
144
- grid = gr.Dataframe(wrap=True)
145
- btn.click(fn=gpt3_zero_shot_classification, inputs=[context,threshold], outputs=[label,plot,grid])
146
  gr.Examples(
147
  [
148
- [ "", "Entertainment, Business, Politics" ],
149
- [ "", "Entertainment, Business, Politics" ],
150
- [ "", "Entertainment, Business, Politics" ],
151
- [ "", "Entertainment, Business, Politics" ]
152
  ],
153
  [context, threshold],
154
  fn=gpt3_zero_shot_classification
 
 
 
 
 
1
  import gradio as gr
2
  import os
 
3
  import openai
 
 
 
 
 
4
 
5
  openai.organization = os.environ.get('ORGANIZATION')
6
  openai.api_key = os.environ.get('API_KEY')
7
 
8
+ def classificate_with_gpt3(text, labels, engine="text-similarity-davinci-001"):
9
+ prompt = f"""The following text inside one of this categories: {labels}:
 
 
 
10
 
11
+ {text}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ Category:"""
14
+ response = openai.Completion.create(
15
+ model=engine
16
+ prompt=prompt,
17
+ temperature=0.7,
18
+ max_tokens=1000,
19
+ top_p=1,
20
+ frequency_penalty=0,
21
+ presence_penalty=0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  )
23
+ completion = response['data'][0]
24
+ print(completion)
25
+ return completion
26
+
27
+ def gpt3_zero_shot_classification(text, labels):
28
+ completion = classificate_with_gpt3(text, labels)
29
+ return completion
 
 
30
 
31
  # Gradio UI
32
 
33
  with gr.Blocks(css=".gradio-container { background-color: white; }") as demo:
34
  gr.Markdown(f"# GPT-3 Zero shot classification app")
35
  with gr.Row():
36
+ context = gr.Textbox(lines=3, label="Context", placeholder="Context here...")
37
  with gr.Row():
38
+ labels = gr.Textbox(lines=3, label="Labels (Comma separated)", placeholder="Labels here...")
39
  btn = gr.Button(value="Analyze!", variant="primary")
40
  with gr.Row():
41
+ out = gr.Textbox()
42
+ btn.click(fn=gpt3_zero_shot_classification, inputs=[context,labels], outputs=[out])
 
 
 
43
  gr.Examples(
44
  [
45
+ [ "On 22 February 2014, Ukrainian president Viktor Yanukovych was ousted from office as a result of the Euromaidan and the Revolution of Dignity, which broke out after his decision to reject the European Union–Ukraine Association Agreement and instead pursue closer ties with Russia and the Eurasian Economic Union. Shortly after Yanukovych's overthrow and exile to Russia, Ukraine's eastern and southern regions erupted with pro-Russia unrest.", "Entertainment, Business, Politics" ],
46
+ [ "This dull recreation of the animated film doesn’t strive for anything more than what was contained in the original version of this film and actually delivers less.", "Entertainment, Business, Politics" ],
47
+ [ "The third beta of iOS 16.1 that was released earlier this week expands the Adaptive Transparency feature introduced with the second-generation AirPods Pro to the original ‌AirPods Pro‌.", "Entertainment, Business, Politics" ],
48
+ [ "The chefs even taught the users how to cook the meal through pictures, videos, and other WhatsApp features.", "Entertainment, Business, Politics" ]
49
  ],
50
  [context, threshold],
51
  fn=gpt3_zero_shot_classification