Spaces:

sergiomar73
/

nlp-gpt3-zero-shot-classification-app

Sleeping

App Files Files Community

sergiomar73 commited on Sep 30, 2022

Commit

7cec07d

•

1 Parent(s): e63d545

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -130

app.py CHANGED Viewed

@@ -1,154 +1,51 @@
-# https://huggingface.co/tasks/token-classification
-# https://huggingface.co/spacy/en_core_web_sm
-# pip install https://huggingface.co/spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.whl
 import gradio as gr
 import os
-import time
 import openai
-import numpy as np
-import pandas as pd
-import spacy
-import en_core_web_sm
-import plotly.express as px
 openai.organization = os.environ.get('ORGANIZATION')
 openai.api_key = os.environ.get('API_KEY')
-nlp = spacy.load("en_core_web_sm")
-# The following text inside one of this categories: Entertainment, Business, Politics
-# This dull recreation of the animated film doesn’t strive for anything more than what was contained in the original version of this film and actually delivers less.
-# Category: Entertainment
-def text_to_sentences(text):
-  doc = nlp(text)
-  sentences = [ sentence.text for sentence in list(doc.sents) ]
-  # print(sentences[:3])
-  return sentences
-def calculate_embeddings_with_gpt3(text, engine="text-similarity-davinci-001", interval = 1.5, verbose=True):
-  if verbose:
-    print(f'Calculating embedding for {text}...')
-  time.sleep(interval)
-  response = openai.Embedding.create(
-    input=text,
-    engine=engine
-  )
-  embedding = response['data'][0]['embedding']
-  return embedding
-def gpt3_zero_shot_classification(text, labels):
-  df_sentences = pd.DataFrame(columns=['line', 'sentence', 'embedding'])
-  for idx, sentence in enumerate(text_to_sentences(text)):
-    embedding = calculate_embeddings_with_gpt3(sentence)
-    # Create new row
-    new_row = {
-      'line': idx + 1,
-      'sentence': sentence,
-      'embedding': embedding
-    }
-    df_sentences = df_sentences.append(new_row, ignore_index=True)
-  # print(df_sentences.shape)
-  # df_sentences.head()
-  targets = np.array([ np.array(value[0]) for value in df_phrases[["embedding"]].values ])
-  # print(f"targets:{targets.shape}")
-  df_cosines = pd.DataFrame(columns=['line'])
-  for i, row in df_sentences.iterrows():
-    line = f'{row["line"]:03}'
-    # print(f'Calculating cosines for [ {line} ] {row["sentence"][:50]}...')
-    source = np.array(row["embedding"])
-    cosine = np.dot(targets,source)/(np.linalg.norm(targets, axis=1)*np.linalg.norm(source))
-    # Create new row
-    new_row = dict([(f"Cosine{f'{key:02}'}", value) for key, value in enumerate(cosine.flatten(), 1)])
-    new_row["line"] = row["line"]
-    df_cosines = df_cosines.append(new_row, ignore_index=True)
-  df_cosines['line'] = df_cosines['line'].astype('int')
-  # print(df_cosines.shape)
-  # df_cosines.head(3)
-  df_comparison = df_cosines #[(df_cosines.filter(regex='Cosine') > threshold).any(axis=1)]
-  # print(df_comparison.shape)
-  # df_comparison.head(3)
-  threshold = threshold / 100
-  df_results = pd.DataFrame(columns=['line', 'sentence', 'phrase', 'category', 'tag', 'similarity'])
-  for i, row in df_comparison.iterrows():
-    for n in range(1,64+1):
-      col = f"Cosine{f'{n:02}'}"
-      # if row[col] > threshold:
-      phrase = df_phrases.loc[[ n - 1 ]]
-      new_row = {
-        'line': row["line"],
-        'sentence': df_sentences.at[int(row["line"])-1,"sentence"],
-        'phrase': df_phrases.at[n-1,"example"],
-        'category': df_phrases.at[n-1,"category"],
-        'tag': df_phrases.at[n-1,"label"],
-        'similarity': row[col]
-      }
-      df_results = df_results.append(new_row, ignore_index=True)
-  df_results['line'] = df_cosines['line'].astype('int')
-  # print(df_results.shape)
-  # df_results.head(3)
-  df_summary = df_results.groupby(['tag'])['similarity'].agg('max').to_frame()
-  df_summary['ok'] = np.where(df_summary['similarity'] > threshold, True, False)
-  # df_summary
-  fig = px.bar(
-    df_summary,
-    y='similarity',
-    color='ok',
-    color_discrete_map={ True: px.colors.qualitative.Plotly[2], False: px.colors.qualitative.Set2[7] },
-    text='similarity',
-    text_auto='.3f',
-    labels={'tag': 'Category', 'similarity': 'Similarity'},
-    title = f"{text[:200]}..."
-  )
-  fig.add_shape( # add a horizontal "target" line
-    type="line", line_color="salmon", line_width=3, opacity=1, line_dash="dot",
-    x0=0, x1=1, xref="paper", y0=threshold, y1=threshold, yref="y"
   )
-  fig.update_traces(textfont_size=24, textangle=0, textposition="inside", cliponaxis=False)
-  fig.update_yaxes(range=[0, 1])
-  # fig.show()
-  details = df_results.drop(labels='line',axis=1).sort_values(['tag','similarity'],ascending=[True,False]).groupby('tag').head(3).reset_index()    .drop(labels='index',axis=1)
-  res = df_summary['similarity'].to_dict()
-  return res, fig, details
 # Gradio UI
 with gr.Blocks(css=".gradio-container { background-color: white; }") as demo:
   gr.Markdown(f"# GPT-3 Zero shot classification app")
   with gr.Row():
-    context = gr.Textbox(lines=3, label="Context", placeholder="Context Here...")
   with gr.Row():
-    threshold = gr.Slider(0, 100, 80)
   btn = gr.Button(value="Analyze!", variant="primary")
   with gr.Row():
-    label = gr.Label()
-    plot = gr.Plot()
-  with gr.Row():
-    grid = gr.Dataframe(wrap=True)
-  btn.click(fn=gpt3_zero_shot_classification, inputs=[context,threshold], outputs=[label,plot,grid])
   gr.Examples(
     [
-      [ "", "Entertainment, Business, Politics" ],
-      [ "", "Entertainment, Business, Politics" ],
-      [ "", "Entertainment, Business, Politics" ],
-      [ "", "Entertainment, Business, Politics" ]
     ],
     [context, threshold],
     fn=gpt3_zero_shot_classification

 import gradio as gr
 import os
 import openai
 openai.organization = os.environ.get('ORGANIZATION')
 openai.api_key = os.environ.get('API_KEY')
+def classificate_with_gpt3(text, labels, engine="text-similarity-davinci-001"):
+  prompt = f"""The following text inside one of this categories: {labels}:
+  {text}
+  Category:"""
+  response = openai.Completion.create(
+    model=engine
+    prompt=prompt,
+    temperature=0.7,
+    max_tokens=1000,
+    top_p=1,
+    frequency_penalty=0,
+    presence_penalty=0
   )
+  completion = response['data'][0]
+  print(completion)
+  return completion
+def gpt3_zero_shot_classification(text, labels):
+  completion = classificate_with_gpt3(text, labels)
+  return completion
 # Gradio UI
 with gr.Blocks(css=".gradio-container { background-color: white; }") as demo:
   gr.Markdown(f"# GPT-3 Zero shot classification app")
   with gr.Row():
+    context = gr.Textbox(lines=3, label="Context", placeholder="Context here...")
   with gr.Row():
+    labels = gr.Textbox(lines=3, label="Labels (Comma separated)", placeholder="Labels here...")
   btn = gr.Button(value="Analyze!", variant="primary")
   with gr.Row():
+    out = gr.Textbox()
+  btn.click(fn=gpt3_zero_shot_classification, inputs=[context,labels], outputs=[out])
   gr.Examples(
     [
+      [ "On 22 February 2014, Ukrainian president Viktor Yanukovych was ousted from office as a result of the Euromaidan and the Revolution of Dignity, which broke out after his decision to reject the European Union–Ukraine Association Agreement and instead pursue closer ties with Russia and the Eurasian Economic Union. Shortly after Yanukovych's overthrow and exile to Russia, Ukraine's eastern and southern regions erupted with pro-Russia unrest.", "Entertainment, Business, Politics" ],
+      [ "This dull recreation of the animated film doesn’t strive for anything more than what was contained in the original version of this film and actually delivers less.", "Entertainment, Business, Politics" ],
+      [ "The third beta of iOS 16.1 that was released earlier this week expands the Adaptive Transparency feature introduced with the second-generation AirPods Pro to the original ‌AirPods Pro‌.", "Entertainment, Business, Politics" ],
+      [ "The chefs even taught the users how to cook the meal through pictures, videos, and other WhatsApp features.", "Entertainment, Business, Politics" ]
     ],
     [context, threshold],
     fn=gpt3_zero_shot_classification