simonschoe commited on
Commit
64ab470
β€’
1 Parent(s): 28f49a6

create app

Browse files
Files changed (3) hide show
  1. README.md +6 -4
  2. app.py +86 -0
  3. requirements.txt +4 -0
README.md CHANGED
@@ -1,12 +1,14 @@
1
  ---
2
- title: Call2Vec
3
  emoji: πŸŒ–
4
- colorFrom: pink
5
- colorTo: yellow
6
  sdk: gradio
7
  sdk_version: 3.0.24
8
  app_file: app.py
9
- pinned: false
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
1
  ---
2
+ title: Call2vec
3
  emoji: πŸŒ–
4
+ colorFrom: blue
5
+ colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 3.0.24
8
  app_file: app.py
9
+ models: call2vec
10
+ pinned: true
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
14
+
app.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import pandas as pd
4
+ from datetime import datetime
5
+ import os
6
+ import re
7
+
8
+ from huggingface_hub import hf_hub_url, cached_download
9
+ from gensim.models.fasttext import load_facebook_model
10
+
11
+ ACCESS_KEY = os.environ.get('ACCESS_KEY')
12
+
13
+ # download model from huggingface hub
14
+ url = hf_hub_url(repo_id="simonschoe/call2vec", filename="model.bin")
15
+ cached_download(url)
16
+
17
+ # load model via gensim
18
+ model = load_facebook_model(cached_download(url))
19
+
20
+ def process(_input, topn):
21
+
22
+ # split by delimiting characters
23
+ _input = re.split('[,;\n]', _input)
24
+
25
+ # convert input to lower, replace whitespaces by underscores
26
+ _input = [s.strip().lower().replace(' ', '_') for s in _input if s]
27
+
28
+ if _input[0] != ACCESS_KEY:
29
+ with open('log.txt', 'a') as f:
30
+ f.write(str(datetime.now()) + '+++' + '___'.join(_input) + '\n')
31
+
32
+ # apply model
33
+ if len(_input)>1:
34
+ # compute average seed embedding
35
+ avg_input = np.stack([model.wv[w] for w in _input], axis=0).mean(axis=0)
36
+ # find similarities
37
+ nearest_neighbors = model.wv.most_similar(positive=avg_input, topn=topn)
38
+ frequencies = [model.wv.get_vecattr(nn[0], 'count') for nn in nearest_neighbors]
39
+ else:
40
+ # find similarities
41
+ nearest_neighbors = model.wv.most_similar(positive=_input[0], topn=topn)
42
+ frequencies = [model.wv.get_vecattr(nn[0], 'count') for nn in nearest_neighbors]
43
+
44
+ result = pd.DataFrame([(a[0],a[1],b) for a,b in zip(nearest_neighbors, frequencies)], columns=['Token', 'Cosine Similarity', 'Frequency'])
45
+ if _input[0] == ACCESS_KEY:
46
+ with open('log.txt', 'r') as f:
47
+ prompts = f.readlines()
48
+ prompts = [p.strip().split('+++') for p in prompts]
49
+ result = pd.DataFrame(prompts, columns=['Time', 'Prompt'])
50
+ result.to_csv('result.csv')
51
+ return result, 'result.csv', '\n'.join(_input)
52
+
53
+ def save(df):
54
+ df.to_csv('result.csv')
55
+ return 'result.csv'
56
+
57
+ demo = gr.Blocks()
58
+
59
+ with demo:
60
+ gr.Markdown("# Call2Vec")
61
+ gr.Markdown("## Earnings call transformation project")
62
+ with gr.Row():
63
+ with gr.Column():
64
+ gr.Markdown("""
65
+ #### Project Description
66
+ Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.""")
67
+ gr.Markdown(
68
+ """#### App usage:
69
+ Add your input prompts to the text field on the right. To use multiple input prompts at once separate
70
+ them by comma, semicolon or a new line
71
+ ##### Examples
72
+ - Climate change
73
+ - Financial risk, energy dependency, climate neutrality
74
+ """
75
+ )
76
+ with gr.Column():
77
+ text_input = gr.Textbox(lines=1)
78
+ with gr.Row():
79
+ n_output = gr.Slider(minimum=5, maximum=50, step=1)
80
+ compute_button = gr.Button("Compute")
81
+ df_output = gr.Dataframe(interactive=False)
82
+ file_out = gr.File(interactive=False)
83
+
84
+ compute_button.click(process, inputs=[text_input, n_output], outputs=[df_output, file_out, text_input])
85
+
86
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gensim==4.2.0
2
+ huggingface_hub==0.8.1
3
+ numpy==1.23.0
4
+ pandas==1.4.3