plomty commited on
Commit
53332ed
β€’
1 Parent(s): 00621f5

Shift to url classifier

Browse files
Files changed (3) hide show
  1. .gitignore +1 -0
  2. app.py +26 -21
  3. requirements.txt +71 -70
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ env/
app.py CHANGED
@@ -2,41 +2,46 @@ import gradio as gr
2
  from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
3
 
4
 
5
- class EmotionClassifier:
6
- def __init__(self, model_name: str):
7
- self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
8
- self.tokenizer = AutoTokenizer.from_pretrained(model_name)
9
  self.pipeline = pipeline(
10
- "text-classification",
11
- model=self.model,
12
- tokenizer=self.tokenizer,
13
- return_all_scores=True,
14
  )
 
 
15
 
16
  def predict(self, input_text: str):
17
- pred = self.pipeline(input_text)[0]
18
- result = {
19
- "Sadness 😭": pred[0]["score"],
20
- "Joy πŸ˜‚": pred[1]["score"],
21
- "Love 😍": pred[2]["score"],
22
- "Anger 😠": pred[3]["score"],
23
- "Fear 😨": pred[4]["score"],
24
- "Surprise 😲": pred[5]["score"],
25
- }
 
 
 
 
 
 
 
26
  return result
27
 
28
 
29
  def main():
30
- model = EmotionClassifier("bhadresh-savani/bert-base-uncased-emotion")
31
  iface = gr.Interface(
32
  fn=model.predict,
33
  inputs=gr.inputs.Textbox(
34
  lines=3,
35
- placeholder="Type a phrase that has some emotion",
36
- label="Input Text",
37
  ),
38
  outputs="label",
39
- title="Emotion Classification",
40
  examples=[
41
  "I get so down when I'm alone",
42
  "I believe that today everything will work out",
 
2
  from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
3
 
4
 
5
+ class UrlClassifier:
6
+ def __init__(self):
 
 
7
  self.pipeline = pipeline(
8
+ "zero-shot-classification",
9
+ model="MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli"
 
 
10
  )
11
+ self.candidate_labels = ["Apps", "Arts and Entertainment", "Automotive Ownership", "Automotive Products", "Beauty Services", "Business and Industrial", "Clothing and Accessories", "Collectables and Antiques", "Consumer Electronics", "Edible Consumer Packaged Goods", "Education and Careers", "Family and Parenting", "Finance and Insurance", "Food and Beverage Services", "Furniture", "Gifts and Holiday Items", "Hardware Supplies", "Health and Medical Services", "Hobbies and Interests", "Home and Garden Services", "Legal Services", "Life Events", "Logistics and Delivery", "Non-Profits", "Office Equipment and Supplies", "Pet Services", "Pharmaceuticals", "Real Estate", "Recreation and Fitness Activities", "Software", "Sporting Goods", "Travel and Tourism", "Web Services"]
12
+
13
 
14
  def predict(self, input_text: str):
15
+ pred = self.pipeline(input_text, self.candidate_labels)
16
+ # pred = self.pipeline(input_text)[0]
17
+
18
+ sorted_preds = sorted(zip(pred['scores'], pred['labels']))
19
+ result = {}
20
+ for i in range(len(sorted_preds)):
21
+ result[sorted_preds[i][1]] = sorted_preds[i][0]
22
+
23
+ # result = {
24
+ # "Sadness 😭": pred[0]["score"],
25
+ # "Joy πŸ˜‚": pred[1]["score"],
26
+ # "Love 😍": pred[2]["score"],
27
+ # "Anger 😠": pred[3]["score"],
28
+ # "Fear 😨": pred[4]["score"],
29
+ # "Surprise 😲": pred[5]["score"],
30
+ # }
31
  return result
32
 
33
 
34
  def main():
35
+ model = UrlClassifier()
36
  iface = gr.Interface(
37
  fn=model.predict,
38
  inputs=gr.inputs.Textbox(
39
  lines=3,
40
+ placeholder="Input a shopping website URL",
41
+ label="Input URL",
42
  ),
43
  outputs="label",
44
+ title="Ecommerce URL Classification",
45
  examples=[
46
  "I get so down when I'm alone",
47
  "I believe that today everything will work out",
requirements.txt CHANGED
@@ -1,74 +1,75 @@
1
- analytics-python==1.4.0
2
- backcall==0.2.0
3
- backoff==1.10.0
4
- bcrypt==3.2.0
5
- certifi==2021.10.8
6
- cffi==1.15.0
7
- charset-normalizer==2.0.8
8
- click==8.0.3
9
- cryptography==36.0.0
 
 
 
10
  cycler==0.11.0
11
- debugpy==1.5.1
12
- decorator==5.1.0
13
- entrypoints==0.3
14
- ffmpy==0.3.0
15
- filelock==3.4.0
16
- Flask==2.0.2
17
- Flask-CacheBuster==1.0.0
18
- Flask-Cors==3.0.10
19
- Flask-Login==0.5.0
20
- fonttools==4.28.2
21
- gradio==2.4.6
22
- huggingface-hub==0.1.2
23
- idna==3.3
24
- ipykernel==6.5.1
25
- ipython==7.30.0
26
- itsdangerous==2.0.1
27
- jedi==0.18.1
28
- Jinja2==3.0.3
29
- joblib==1.1.0
30
- jupyter-client==7.1.0
31
- jupyter-core==4.9.1
32
- kiwisolver==1.3.2
33
- markdown2==2.4.1
34
- MarkupSafe==2.0.1
35
- matplotlib==3.5.0
36
- matplotlib-inline==0.1.3
37
- monotonic==1.6
38
- nest-asyncio==1.5.1
39
- numpy==1.21.4
40
- packaging==21.3
41
- pandas==1.3.4
42
- paramiko==2.8.1
43
- parso==0.8.2
44
- pexpect==4.8.0
45
- pickleshare==0.7.5
46
- Pillow==8.4.0
47
- prompt-toolkit==3.0.23
48
- ptyprocess==0.7.0
49
- pycparser==2.21
50
- pycryptodome==3.11.0
51
  pydub==0.25.1
52
- Pygments==2.10.0
53
- PyNaCl==1.4.0
54
- pyparsing==3.0.6
55
  python-dateutil==2.8.2
56
- pytz==2021.3
57
- PyYAML==6.0
58
- pyzmq==22.3.0
59
- regex==2021.11.10
60
- requests==2.26.0
61
- sacremoses==0.0.46
62
- setuptools-scm==6.3.2
 
 
63
  six==1.16.0
64
- tokenizers==0.10.3
65
- tomli==1.2.2
66
- torch==1.10.0
67
- tornado==6.1
68
- tqdm==4.62.3
69
- traitlets==5.1.1
70
- transformers==4.12.5
71
- typing-extensions==4.0.0
72
- urllib3==1.26.7
73
- wcwidth==0.2.5
74
- Werkzeug==2.0.2
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ aiohttp==3.8.5
3
+ aiosignal==1.3.1
4
+ altair==5.1.0
5
+ annotated-types==0.5.0
6
+ anyio==3.7.1
7
+ async-timeout==4.0.3
8
+ attrs==23.1.0
9
+ certifi==2023.7.22
10
+ charset-normalizer==3.2.0
11
+ click==8.1.7
12
+ contourpy==1.1.0
13
  cycler==0.11.0
14
+ datasets==2.14.4
15
+ dill==0.3.7
16
+ fastapi==0.103.0
17
+ ffmpy==0.3.1
18
+ filelock==3.12.3
19
+ fonttools==4.42.1
20
+ frozenlist==1.4.0
21
+ fsspec==2023.6.0
22
+ gradio==3.41.2
23
+ gradio_client==0.5.0
24
+ h11==0.14.0
25
+ httpcore==0.17.3
26
+ httpx==0.24.1
27
+ huggingface-hub==0.16.4
28
+ idna==3.4
29
+ importlib-resources==6.0.1
30
+ Jinja2==3.1.2
31
+ jsonschema==4.19.0
32
+ jsonschema-specifications==2023.7.1
33
+ kiwisolver==1.4.5
34
+ MarkupSafe==2.1.3
35
+ matplotlib==3.7.2
36
+ mpmath==1.3.0
37
+ multidict==6.0.4
38
+ multiprocess==0.70.15
39
+ networkx==3.1
40
+ numpy==1.25.2
41
+ orjson==3.9.5
42
+ packaging==23.1
43
+ pandas==2.0.3
44
+ Pillow==10.0.0
45
+ pyarrow==13.0.0
46
+ pydantic==2.3.0
47
+ pydantic_core==2.6.3
 
 
 
 
 
 
48
  pydub==0.25.1
49
+ pyparsing==3.0.9
 
 
50
  python-dateutil==2.8.2
51
+ python-multipart==0.0.6
52
+ pytz==2023.3
53
+ PyYAML==6.0.1
54
+ referencing==0.30.2
55
+ regex==2023.8.8
56
+ requests==2.31.0
57
+ rpds-py==0.10.0
58
+ safetensors==0.3.3
59
+ semantic-version==2.10.0
60
  six==1.16.0
61
+ sniffio==1.3.0
62
+ starlette==0.27.0
63
+ sympy==1.12
64
+ tokenizers==0.13.3
65
+ toolz==0.12.0
66
+ torch==2.0.1
67
+ tqdm==4.66.1
68
+ transformers==4.32.1
69
+ typing_extensions==4.7.1
70
+ tzdata==2023.3
71
+ urllib3==2.0.4
72
+ uvicorn==0.23.2
73
+ websockets==11.0.3
74
+ xxhash==3.3.0
75
+ yarl==1.9.2