Rajiv Shah commited on
Commit
bca06e3
1 Parent(s): 5dc2702

updated notebook

Browse files
Files changed (1) hide show
  1. Prep_FINBert.ipynb +167 -0
Prep_FINBert.ipynb ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 5,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import os\n",
10
+ "import gradio as gr\n",
11
+ "from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline"
12
+ ]
13
+ },
14
+ {
15
+ "cell_type": "code",
16
+ "execution_count": 10,
17
+ "metadata": {},
18
+ "outputs": [],
19
+ "source": [
20
+ "tokenizer = AutoTokenizer.from_pretrained(\"yiyanghkust/finbert-fls\")\n",
21
+ "\n",
22
+ "finbert = AutoModelForSequenceClassification.from_pretrained(\"yiyanghkust/finbert-fls\")"
23
+ ]
24
+ },
25
+ {
26
+ "cell_type": "code",
27
+ "execution_count": 11,
28
+ "metadata": {},
29
+ "outputs": [],
30
+ "source": [
31
+ "nlp = pipeline(\"text-classification\", model=finbert, tokenizer=tokenizer)\n",
32
+ "results = nlp(['we expect the age of our fleet to enhance availability and reliability due to reduced downtime for repairs.',\n",
33
+ " 'on an equivalent unit of production basis, general and administrative expenses declined 24 percent from 1994 to $.67 per boe.',\n",
34
+ " 'we will continue to assess the need for a valuation allowance against deferred tax assets considering all available evidence obtained in future reporting periods.'])"
35
+ ]
36
+ },
37
+ {
38
+ "cell_type": "code",
39
+ "execution_count": 15,
40
+ "metadata": {},
41
+ "outputs": [
42
+ {
43
+ "data": {
44
+ "text/plain": [
45
+ "<transformers.pipelines.text_classification.TextClassificationPipeline at 0x144572f40>"
46
+ ]
47
+ },
48
+ "execution_count": 15,
49
+ "metadata": {},
50
+ "output_type": "execute_result"
51
+ }
52
+ ],
53
+ "source": []
54
+ },
55
+ {
56
+ "cell_type": "code",
57
+ "execution_count": 12,
58
+ "metadata": {},
59
+ "outputs": [
60
+ {
61
+ "data": {
62
+ "text/plain": [
63
+ "[{'label': 'Specific FLS', 'score': 0.77278733253479},\n",
64
+ " {'label': 'Not FLS', 'score': 0.9905241131782532},\n",
65
+ " {'label': 'Non-specific FLS', 'score': 0.975904107093811}]"
66
+ ]
67
+ },
68
+ "execution_count": 12,
69
+ "metadata": {},
70
+ "output_type": "execute_result"
71
+ }
72
+ ],
73
+ "source": [
74
+ "results"
75
+ ]
76
+ },
77
+ {
78
+ "cell_type": "code",
79
+ "execution_count": null,
80
+ "metadata": {},
81
+ "outputs": [],
82
+ "source": [
83
+ "['we expect the age of our fleet to enhance availability and reliability due to reduced downtime for repairs.',\n",
84
+ " 'on an equivalent unit of production basis, general and administrative expenses declined 24 percent from 1994 to $.67 per boe.',\n",
85
+ " 'we will continue to assess the need for a valuation allowance against deferred tax assets considering all available evidence obtained in future reporting periods.']]"
86
+ ]
87
+ },
88
+ {
89
+ "cell_type": "code",
90
+ "execution_count": 19,
91
+ "metadata": {},
92
+ "outputs": [
93
+ {
94
+ "name": "stdout",
95
+ "output_type": "stream",
96
+ "text": [
97
+ "Running on local URL: http://127.0.0.1:7860/\n",
98
+ "\n",
99
+ "To create a public link, set `share=True` in `launch()`.\n"
100
+ ]
101
+ },
102
+ {
103
+ "data": {
104
+ "text/html": [
105
+ "<div><iframe src=\"http://127.0.0.1:7860/\" width=\"900\" height=\"500\" allow=\"autoplay; camera; microphone;\" frameborder=\"0\" allowfullscreen></iframe></div>"
106
+ ],
107
+ "text/plain": [
108
+ "<IPython.core.display.HTML object>"
109
+ ]
110
+ },
111
+ "metadata": {},
112
+ "output_type": "display_data"
113
+ }
114
+ ],
115
+ "source": [
116
+ "title = \"Forward Looking Statement Classification with FinBERT\"\n",
117
+ "description = \"This model classifies a sentence into one of the three categories: Specific FLS, Non- Specific FLS, and Not-FLS. We label a sentence as Specific FLS if it is about the future of the company, as Non-Specific FLS if it is future-oriented but could be said of any company (e.g., cautionary language or risk disclosure), and as Not-FLS if it is not about the future.\"\n",
118
+ "examples =[['we expect the age of our fleet to enhance availability and reliability due to reduced downtime for repairs.'],\n",
119
+ " ['on an equivalent unit of production basis, general and administrative expenses declined 24 percent from 1994 to $.67 per boe.'],\n",
120
+ " ['we will continue to assess the need for a valuation allowance against deferred tax assets considering all available evidence obtained in future reporting periods.']]\n",
121
+ "\n",
122
+ "def get_sentiment(input_text):\n",
123
+ " return nlp(input_text)\n",
124
+ "\n",
125
+ "iface = gr.Interface(fn=get_sentiment, \n",
126
+ " inputs=\"text\", \n",
127
+ " outputs=[\"text\"],\n",
128
+ " title=title,\n",
129
+ " description=description,\n",
130
+ " examples=examples)\n",
131
+ "iface.launch(debug=True)"
132
+ ]
133
+ },
134
+ {
135
+ "cell_type": "code",
136
+ "execution_count": null,
137
+ "metadata": {},
138
+ "outputs": [],
139
+ "source": []
140
+ }
141
+ ],
142
+ "metadata": {
143
+ "interpreter": {
144
+ "hash": "325bbc5f2b77b6a5675ad3f6ec2d9cde3e7a8993fd48d3c331b30741632a2dac"
145
+ },
146
+ "kernelspec": {
147
+ "display_name": "Python 3.8.13 ('hf_public')",
148
+ "language": "python",
149
+ "name": "python3"
150
+ },
151
+ "language_info": {
152
+ "codemirror_mode": {
153
+ "name": "ipython",
154
+ "version": 3
155
+ },
156
+ "file_extension": ".py",
157
+ "mimetype": "text/x-python",
158
+ "name": "python",
159
+ "nbconvert_exporter": "python",
160
+ "pygments_lexer": "ipython3",
161
+ "version": "3.8.13"
162
+ },
163
+ "orig_nbformat": 4
164
+ },
165
+ "nbformat": 4,
166
+ "nbformat_minor": 2
167
+ }