os1187 DrGabrielLopez commited on
Commit
200ac02
0 Parent(s):

Duplicate from DrGabrielLopez/gpt2-chatbot

Browse files

Co-authored-by: dr Gabriel Lopez <DrGabrielLopez@users.noreply.huggingface.co>

Files changed (7) hide show
  1. .gitattributes +34 -0
  2. .gitignore +2 -0
  3. Pipfile +178 -0
  4. Pipfile.lock +0 -0
  5. README.md +14 -0
  6. app.py +139 -0
  7. requirements.txt +9 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .env
2
+ .git*
Pipfile ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [[source]]
2
+ url = "https://pypi.org/simple"
3
+ verify_ssl = true
4
+ name = "pypi"
5
+
6
+ [packages]
7
+ gradio = "==3.10.1"
8
+ tensorflow = "==2.11.0"
9
+ transformers = "==4.24.0"
10
+ absl-py = "==1.3.0"
11
+ aiohttp = "==3.8.3"
12
+ aiosignal = "==1.3.1"
13
+ antlr4-python3-runtime = "==4.8"
14
+ anyio = "==3.6.2"
15
+ appdirs = "==1.4.4"
16
+ astunparse = "==1.6.3"
17
+ async-timeout = "==4.0.2"
18
+ attrs = "==22.1.0"
19
+ audioread = "==3.0.0"
20
+ autoflake = "==2.0.0"
21
+ bcrypt = "==4.0.1"
22
+ bitarray = "==2.6.0"
23
+ blis = "==0.7.9"
24
+ cachetools = "==5.2.0"
25
+ catalogue = "==2.0.8"
26
+ certifi = "==2022.9.24"
27
+ cffi = "==1.15.1"
28
+ charset-normalizer = "==2.1.1"
29
+ ci-sdr = "==0.0.2"
30
+ click = "==8.1.3"
31
+ colorama = "==0.4.6"
32
+ confection = "==0.0.3"
33
+ configargparse = "==1.5.3"
34
+ contourpy = "==1.0.6"
35
+ cryptography = "==38.0.3"
36
+ ctc-segmentation = "==1.7.4"
37
+ cycler = "==0.11.0"
38
+ cymem = "==2.0.7"
39
+ cython = "==0.29.32"
40
+ decorator = "==5.1.1"
41
+ distance = "==0.1.3"
42
+ einops = "==0.6.0"
43
+ en-core-web-sm = {file = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.4.1/en_core_web_sm-3.4.1-py3-none-any.whl"}
44
+ espnet = "==202209"
45
+ espnet-tts-frontend = "==0.0.3"
46
+ fairseq = "==0.12.2"
47
+ fast-bss-eval = "==0.1.3"
48
+ fastapi = "==0.76.0"
49
+ ffmpy = "==0.3.0"
50
+ filelock = "==3.8.0"
51
+ flatbuffers = "==22.10.26"
52
+ fonttools = "==4.38.0"
53
+ frozenlist = "==1.3.3"
54
+ fsspec = "==2022.11.0"
55
+ g2p-en = "==2.1.0"
56
+ gast = "==0.4.0"
57
+ google-auth = "==2.14.1"
58
+ google-auth-oauthlib = "==0.4.6"
59
+ google-pasta = "==0.2.0"
60
+ grpcio = "==1.34.1"
61
+ h11 = "==0.12.0"
62
+ h5py = "==3.1.0"
63
+ httpcore = "==0.15.0"
64
+ httpx = "==0.23.1"
65
+ huggingface-hub = "==0.11.0"
66
+ humanfriendly = "==10.0"
67
+ hydra-core = "==1.0.7"
68
+ idna = "==3.4"
69
+ importlib-metadata = "==4.13.0"
70
+ inflect = "==6.0.2"
71
+ jaconv = "==0.3"
72
+ jamo = "==0.4.1"
73
+ jinja2 = "==3.1.2"
74
+ joblib = "==1.2.0"
75
+ kaldiio = "==2.17.2"
76
+ keras = "==2.11.0"
77
+ keras-nightly = "==2.5.0.dev2021032900"
78
+ keras-preprocessing = "==1.1.2"
79
+ kiwisolver = "==1.4.4"
80
+ langcodes = "==3.3.0"
81
+ libclang = "==14.0.6"
82
+ librosa = "==0.9.2"
83
+ linkify-it-py = "==1.0.3"
84
+ llvmlite = "==0.39.1"
85
+ lxml = "==4.9.1"
86
+ markdown = "==3.4.1"
87
+ markdown-it-py = "==2.1.0"
88
+ markupsafe = "==2.1.1"
89
+ matplotlib = "==3.6.2"
90
+ mdit-py-plugins = "==0.3.1"
91
+ mdurl = "==0.1.2"
92
+ multidict = "==6.0.2"
93
+ murmurhash = "==1.0.9"
94
+ nltk = "==3.7"
95
+ numba = "==0.56.4"
96
+ numpy = "==1.23.5"
97
+ oauthlib = "==3.2.2"
98
+ omegaconf = "==2.0.6"
99
+ opt-einsum = "==3.3.0"
100
+ orjson = "==3.8.2"
101
+ pandas = "==1.4.4"
102
+ paramiko = "==2.12.0"
103
+ pathy = "==0.10.0"
104
+ pillow = "==9.3.0"
105
+ plotly = "==5.11.0"
106
+ pooch = "==1.6.0"
107
+ portalocker = "==2.6.0"
108
+ preshed = "==3.0.8"
109
+ protobuf = "==3.19.6"
110
+ pyasn1 = "==0.4.8"
111
+ pyasn1-modules = "==0.2.8"
112
+ pycparser = "==2.21"
113
+ pycryptodome = "==3.15.0"
114
+ pydantic = "==1.9.2"
115
+ pydub = "==0.25.1"
116
+ pyflakes = "==3.0.1"
117
+ pynacl = "==1.5.0"
118
+ pyparsing = "==3.0.9"
119
+ pypinyin = "==0.44.0"
120
+ python-dateutil = "==2.8.2"
121
+ python-multipart = "==0.0.5"
122
+ pytorch-wpe = "==0.0.1"
123
+ pytz = "==2022.6"
124
+ pyworld = "==0.3.2"
125
+ pyyaml = "==6.0"
126
+ regex = "==2022.10.31"
127
+ requests = "==2.28.1"
128
+ requests-oauthlib = "==1.3.1"
129
+ resampy = "==0.4.2"
130
+ rfc3986 = "==1.5.0"
131
+ rsa = "==4.9"
132
+ sacrebleu = "==2.3.1"
133
+ scikit-learn = "==1.1.3"
134
+ scipy = "==1.9.3"
135
+ sentencepiece = "==0.1.97"
136
+ six = "==1.15.0"
137
+ smart-open = "==5.2.1"
138
+ sniffio = "==1.3.0"
139
+ soundfile = "==0.11.0"
140
+ spacy = "==3.4.3"
141
+ spacy-legacy = "==3.0.10"
142
+ spacy-loggers = "==1.0.3"
143
+ srsly = "==2.4.5"
144
+ starlette = "==0.18.0"
145
+ tabulate = "==0.9.0"
146
+ tenacity = "==8.1.0"
147
+ tensorboard = "==2.11.0"
148
+ tensorboard-data-server = "==0.6.1"
149
+ tensorboard-plugin-wit = "==1.8.1"
150
+ tensorflow-estimator = "==2.11.0"
151
+ tensorflow-io-gcs-filesystem = "==0.28.0"
152
+ termcolor = "==1.1.0"
153
+ thinc = "==8.1.5"
154
+ threadpoolctl = "==3.1.0"
155
+ tokenizers = "==0.13.2"
156
+ tomli = "==2.0.1"
157
+ torch = "==1.13.0"
158
+ torch-complex = "==0.4.3"
159
+ torchaudio = "==0.13.0"
160
+ tqdm = "==4.64.1"
161
+ typeguard = "==2.13.3"
162
+ typer = "==0.7.0"
163
+ typing-extensions = "==4.4.0"
164
+ uc-micro-py = "==1.0.1"
165
+ unidecode = "==1.3.6"
166
+ urllib3 = "==1.26.12"
167
+ uvicorn = "==0.20.0"
168
+ wasabi = "==0.10.1"
169
+ websockets = "==10.4"
170
+ werkzeug = "==2.2.2"
171
+ wrapt = "==1.12.1"
172
+ yarl = "==1.8.1"
173
+ zipp = "==3.10.0"
174
+
175
+ [dev-packages]
176
+
177
+ [requires]
178
+ python_version = "3.9"
Pipfile.lock ADDED
The diff for this file is too large to render. See raw diff
 
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Funny Chatbot
3
+ emoji: 🌖
4
+ colorFrom: yellow
5
+ colorTo: yellow
6
+ sdk: gradio
7
+ sdk_version: 3.9.1
8
+ app_file: app.py
9
+ pinned: false
10
+ license: cc-by-nc-sa-4.0
11
+ duplicated_from: DrGabrielLopez/gpt2-chatbot
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import TFAutoModelForCausalLM, AutoTokenizer
2
+ import tensorflow as tf
3
+ import gradio as gr
4
+ import spacy
5
+ from spacy import displacy
6
+ from transformers import TFAutoModelForSequenceClassification
7
+ from transformers import AutoTokenizer
8
+ from scipy.special import softmax
9
+ import plotly.express as px
10
+ import plotly.io as pio
11
+
12
+ # configuration params
13
+ pio.templates.default = "plotly_dark"
14
+
15
+ # setting up the text in the page
16
+ TITLE = "<center><h1>Talk with an AI</h1></center>"
17
+ DESCRIPTION = r"""<center>This application allows you to talk with a machine/robot with state-of-the-art technology!!<br>
18
+ In the back-end is using the GPT2 model from OpenAI. One of the best models in text generation and comprehension.<br>
19
+ Language processing is done using RoBERTa for sentiment-analysis and spaCy for named-entity recognition and dependency plotting.<br>
20
+ The AI thinks he is a human, so please treat him as such, else he migh get angry!<br>
21
+ """
22
+ EXAMPLES = [
23
+ ["What is your favorite videogame?"],
24
+ ["What gets you really sad?"],
25
+ ["How can I make you really angry? "],
26
+ ["What do you do for work?"],
27
+ ["What are your hobbies?"],
28
+ ["What is your favorite food?"],
29
+ ]
30
+ ARTICLE = r"""<center>
31
+ Done by dr. Gabriel Lopez<br>
32
+ For more please visit: <a href='https://sites.google.com/view/dr-gabriel-lopez/home'>My Page</a><br>
33
+ For info about the chat-bot model can also see the <a href="https://arxiv.org/abs/1911.00536">ArXiv paper</a><br>
34
+ </center>"""
35
+
36
+ # Loading necessary NLP models
37
+ # dialog
38
+ checkpoint = "microsoft/DialoGPT-medium" # tf
39
+ model_gtp2 = TFAutoModelForCausalLM.from_pretrained(checkpoint)
40
+ tokenizer_gtp2 = AutoTokenizer.from_pretrained(checkpoint)
41
+ # sentiment
42
+ checkpoint = f"cardiffnlp/twitter-roberta-base-emotion"
43
+ model_roberta = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)
44
+ tokenizer_roberta = AutoTokenizer.from_pretrained(checkpoint)
45
+ # NER & Dependency
46
+ nlp = spacy.load("en_core_web_sm")
47
+
48
+ # test-to-test : chatting function -- GPT2
49
+ def chat_with_bot(user_input, chat_history_and_input=[]):
50
+ """Text generation using GPT2"""
51
+ emb_user_input = tokenizer_gtp2.encode(
52
+ user_input + tokenizer_gtp2.eos_token, return_tensors="tf"
53
+ )
54
+ if chat_history_and_input == []:
55
+ bot_input_ids = emb_user_input # first iteration
56
+ else:
57
+ bot_input_ids = tf.concat(
58
+ [chat_history_and_input, emb_user_input], axis=-1
59
+ ) # other iterations
60
+ chat_history_and_input = model_gtp2.generate(
61
+ bot_input_ids, max_length=1000, pad_token_id=tokenizer_gtp2.eos_token_id
62
+ ).numpy()
63
+ # print
64
+ bot_response = tokenizer_gtp2.decode(
65
+ chat_history_and_input[:, bot_input_ids.shape[-1] :][0],
66
+ skip_special_tokens=True,
67
+ )
68
+ return bot_response, chat_history_and_input
69
+
70
+
71
+ # text-to-sentiment
72
+ def text_to_sentiment(text_input):
73
+ """Sentiment analysis using RoBERTa"""
74
+ labels = ["anger", "joy", "optimism", "sadness"]
75
+ encoded_input = tokenizer_roberta(text_input, return_tensors="tf")
76
+ output = model_roberta(encoded_input)
77
+ scores = output[0][0].numpy()
78
+ scores = softmax(scores)
79
+ return px.histogram(x=labels, y=scores, height=200)
80
+
81
+
82
+ # text_to_semantics
83
+ def text_to_semantics(text_input):
84
+ """NER and Dependency plot using Spacy"""
85
+ processed_text = nlp(text_input)
86
+ # Dependency
87
+ html_dep = displacy.render(
88
+ processed_text,
89
+ style="dep",
90
+ options={"compact": True, "color": "white", "bg": "light-black"},
91
+ page=False,
92
+ )
93
+ html_dep = "" + html_dep + ""
94
+ # NER
95
+ pos_tokens = []
96
+ for token in processed_text:
97
+ pos_tokens.extend([(token.text, token.pos_), (" ", None)])
98
+ # html_ner = ("" + html_ner + "")s
99
+ return pos_tokens, html_dep
100
+
101
+
102
+ # gradio interface
103
+ blocks = gr.Blocks()
104
+ with blocks:
105
+ # physical elements
106
+ session_state = gr.State([])
107
+ gr.Markdown(TITLE)
108
+ gr.Markdown(DESCRIPTION)
109
+ with gr.Row():
110
+ with gr.Column():
111
+ in_text = gr.Textbox(value="How was the class?", label="Start chatting!")
112
+ submit_button = gr.Button("Submit")
113
+ gr.Examples(inputs=in_text, examples=EXAMPLES)
114
+ with gr.Column():
115
+ response_text = gr.Textbox(value="", label="GPT2 response:")
116
+ sentiment_plot = gr.Plot(
117
+ label="How is GPT2 feeling about your conversation?:", visible=True
118
+ )
119
+ ner_response = gr.Highlight(
120
+ label="Named Entity Recognition (NER) over response"
121
+ )
122
+ dependency_plot = gr.HTML(label="Dependency plot of response")
123
+ gr.Markdown(ARTICLE)
124
+ # event listeners
125
+ submit_button.click(
126
+ inputs=[in_text, session_state],
127
+ outputs=[response_text, session_state],
128
+ fn=chat_with_bot,
129
+ )
130
+ response_text.change(
131
+ inputs=response_text, outputs=sentiment_plot, fn=text_to_sentiment
132
+ )
133
+ response_text.change(
134
+ inputs=response_text,
135
+ outputs=[ner_response, dependency_plot],
136
+ fn=text_to_semantics,
137
+ )
138
+
139
+ blocks.launch()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ gradio==3.12.0
2
+ plotly==5.11.0
3
+ scipy==1.5.4
4
+ spacy==3.4.3
5
+ tensorflow==2.5.0
6
+ transformers==4.24.0
7
+
8
+ # spacy internal nlp model
9
+ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.4.1/en_core_web_sm-3.4.1.tar.gz