Heisenberg08 commited on
Commit
df767ad
1 Parent(s): 1359fe4

added model

Browse files
Gpt_neo_Epoch_10_Loss_031_data_5000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:941e5b0ba78e59be1cf44737b967c6f1b03953330615f6af73bf8099beb6aa4d
3
+ size 551214929
app.py CHANGED
@@ -1,4 +1,69 @@
1
  import streamlit as st
 
 
 
2
 
3
- x = st.slider('Select a value')
4
- st.write(x, 'squared is', x * x)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import torch
3
+ import transformers
4
+ from transformers import AutoTokenizer, AutoModelWithLMHead
5
 
6
+ # device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
7
+ device=torch.device("cpu")
8
+
9
+ tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-125M")
10
+ model=torch.load("Gpt_neo_Epoch_10_Loss_031_data_5000.pth",map_location=torch.device('cpu'))
11
+
12
+
13
+ def predict_query(input_sentence,max_len=40,temp=0.7):
14
+ pred=[]
15
+ seq=tokenizer(input_sentence,return_tensors='pt')['input_ids'].to(device)
16
+ outputs=model.generate(seq,
17
+ max_length=max_len,
18
+ do_sample=True,
19
+ top_p=0.95,
20
+ #num_beams=5,
21
+ temperature=temp,
22
+ no_repeat_ngram_size=3,
23
+ num_return_sequences=5
24
+ ).to(device)
25
+ for i,out in enumerate(outputs):
26
+ out=tokenizer.decode(out, skip_special_tokens=True)
27
+ idx=out.find("<|sep|>")+7
28
+ out=out[idx:]
29
+ print(f"Sugestion{i} :{out}")
30
+ pred.append(tokenizer.decode(out, skip_special_tokens=True))
31
+ return pred
32
+ # option = st.selectbox(
33
+ # 'Please Select option',
34
+ # ('Predictive writing',"None"),index=1)
35
+
36
+
37
+ st.title("Predictive scientific writing")
38
+ st.write('### Using AI to Generate scientific literature')
39
+ st.sidebar.markdown(
40
+ '''
41
+ ## This is a demo of a text generation model trained with GPT-2
42
+ ''')
43
+ max_len = st.sidebar.slider(label='Output Size', min_value=1, max_value=150, value=10, step=1)
44
+ # samples = st.sidebar.slider(label='Number of Samples', min_value=1, max_value=50, value=10, step=1)
45
+ temp = st.sidebar.slider(label='Temperature', min_value=0.0, max_value=2.0, value=0.8, step=0.1)
46
+ # temp = st.sidebar.slider(label='Temperature', min_value=0.1, max_value=1.0, value=5.0, step=0.05)
47
+ # do_sample=st.sidebar.checkbox("do_sample")
48
+
49
+
50
+
51
+ # max_len=st.slider("max_len",1,100,None,1,key="max_len")
52
+ # top_k=st.slider("top_k",1,50,None,1)
53
+ # do_sample=st.checkbox("do_sample")
54
+ # print(max_len)
55
+ sentence = st.text_area('Input your sentence here:')
56
+ clear=st.button("Clear")
57
+ Enter=st.button("Generate")
58
+
59
+ if clear:
60
+ print(clear)
61
+ st.markdown(' ')
62
+
63
+ if Enter:
64
+ st.header("Output-")
65
+ print("Generating predictions......\n\n")
66
+ # out=generate(sentence,max_len,top_k,do_sample)
67
+ out=predict_query(sentence,max_len,temp)
68
+ for i,out in enumerate(out):
69
+ st.markdown(f"Sugestion {i} :{out}")
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ torch=1.11.0
2
+ transformers==4.19.2
3
+ streamlit
test.ipynb ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import torch\n",
10
+ "import transformers\n",
11
+ "from transformers import AutoTokenizer, AutoModelWithLMHead\n"
12
+ ]
13
+ },
14
+ {
15
+ "cell_type": "code",
16
+ "execution_count": 2,
17
+ "metadata": {},
18
+ "outputs": [],
19
+ "source": [
20
+ "tokenizer = AutoTokenizer.from_pretrained(\"EleutherAI/gpt-neo-125M\")\n"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": 3,
26
+ "metadata": {},
27
+ "outputs": [
28
+ {
29
+ "name": "stderr",
30
+ "output_type": "stream",
31
+ "text": [
32
+ "c:\\Users\\ADMIN\\textifyai\\lib\\site-packages\\transformers\\models\\auto\\modeling_auto.py:664: FutureWarning: The class `AutoModelWithLMHead` is deprecated and will be removed in a future version. Please use `AutoModelForCausalLM` for causal language models, `AutoModelForMaskedLM` for masked language models and `AutoModelForSeq2SeqLM` for encoder-decoder models.\n",
33
+ " FutureWarning,\n"
34
+ ]
35
+ }
36
+ ],
37
+ "source": [
38
+ "# model= AutoModelWithLMHead.from_pretrained(\"EleutherAI/gpt-neo-125M\")\n"
39
+ ]
40
+ },
41
+ {
42
+ "cell_type": "code",
43
+ "execution_count": 4,
44
+ "metadata": {},
45
+ "outputs": [
46
+ {
47
+ "ename": "AttributeError",
48
+ "evalue": "Can't get attribute 'NewGELUActivation' on <module 'transformers.activations' from 'c:\\\\Users\\\\ADMIN\\\\textifyai\\\\lib\\\\site-packages\\\\transformers\\\\activations.py'>",
49
+ "output_type": "error",
50
+ "traceback": [
51
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
52
+ "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
53
+ "\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_9168/1093822913.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mmodel\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtorch\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mload\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Gpt_neo_Epoch_10_Loss_031_data_5000.pth\"\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mmap_location\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtorch\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdevice\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'cpu'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
54
+ "\u001b[1;32mc:\\Users\\ADMIN\\textifyai\\lib\\site-packages\\torch\\serialization.py\u001b[0m in \u001b[0;36mload\u001b[1;34m(f, map_location, pickle_module, **pickle_load_args)\u001b[0m\n\u001b[0;32m 605\u001b[0m \u001b[0mopened_file\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mseek\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0morig_position\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 606\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mtorch\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mjit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mload\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mopened_file\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 607\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0m_load\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mopened_zipfile\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpickle_module\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mpickle_load_args\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 608\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0m_legacy_load\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mopened_file\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpickle_module\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mpickle_load_args\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 609\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
55
+ "\u001b[1;32mc:\\Users\\ADMIN\\textifyai\\lib\\site-packages\\torch\\serialization.py\u001b[0m in \u001b[0;36m_load\u001b[1;34m(zip_file, map_location, pickle_module, pickle_file, **pickle_load_args)\u001b[0m\n\u001b[0;32m 880\u001b[0m \u001b[0munpickler\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mUnpicklerWrapper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdata_file\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mpickle_load_args\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 881\u001b[0m \u001b[0munpickler\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpersistent_load\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpersistent_load\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 882\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0munpickler\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mload\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 883\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 884\u001b[0m \u001b[0mtorch\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_utils\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_validate_loaded_sparse_tensors\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
56
+ "\u001b[1;32mc:\\Users\\ADMIN\\textifyai\\lib\\site-packages\\torch\\serialization.py\u001b[0m in \u001b[0;36mfind_class\u001b[1;34m(self, mod_name, name)\u001b[0m\n\u001b[0;32m 873\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mfind_class\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmod_name\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 874\u001b[0m \u001b[0mmod_name\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mload_module_mapping\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmod_name\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmod_name\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 875\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0msuper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfind_class\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmod_name\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 876\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 877\u001b[0m \u001b[1;31m# Load the data (which may in turn use `persistent_load` to load tensors)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
57
+ "\u001b[1;31mAttributeError\u001b[0m: Can't get attribute 'NewGELUActivation' on <module 'transformers.activations' from 'c:\\\\Users\\\\ADMIN\\\\textifyai\\\\lib\\\\site-packages\\\\transformers\\\\activations.py'>"
58
+ ]
59
+ }
60
+ ],
61
+ "source": [
62
+ "model=torch.load(\"Gpt_neo_Epoch_10_Loss_031_data_5000.pth\",map_location=torch.device('cpu'))\n"
63
+ ]
64
+ },
65
+ {
66
+ "cell_type": "code",
67
+ "execution_count": null,
68
+ "metadata": {},
69
+ "outputs": [],
70
+ "source": [
71
+ "def predict_query(input_sentence,max_len=40,temp=0.7):\n",
72
+ " pred=[]\n",
73
+ " seq=tokenizer(input_sentence,return_tensors='pt')['input_ids'].to(device)\n",
74
+ " outputs=model.generate(seq,\n",
75
+ " max_length=max_len,\n",
76
+ " do_sample=True,\n",
77
+ " top_p=0.95,\n",
78
+ " #num_beams=5,\n",
79
+ " temperature=temp,\n",
80
+ " no_repeat_ngram_size=3,\n",
81
+ " num_return_sequences=5\n",
82
+ " ).to(device)\n",
83
+ " for i,out in enumerate(outputs):\n",
84
+ " out=tokenizer.decode(out, skip_special_tokens=True)\n",
85
+ " idx=out.find(\"<|sep|>\")+7\n",
86
+ " out=out[idx:]\n",
87
+ " print(f\"Sugestion{i} :{out}\")\n",
88
+ " pred.append(tokenizer.decode(out, skip_special_tokens=True))\n",
89
+ " return pred\n"
90
+ ]
91
+ },
92
+ {
93
+ "cell_type": "code",
94
+ "execution_count": 2,
95
+ "metadata": {},
96
+ "outputs": [
97
+ {
98
+ "name": "stdout",
99
+ "output_type": "stream",
100
+ "text": [
101
+ "4.11.3\n"
102
+ ]
103
+ }
104
+ ],
105
+ "source": [
106
+ "import transformers\n",
107
+ "\n",
108
+ "print(transformers.__version__)"
109
+ ]
110
+ },
111
+ {
112
+ "cell_type": "code",
113
+ "execution_count": 4,
114
+ "metadata": {},
115
+ "outputs": [
116
+ {
117
+ "name": "stdout",
118
+ "output_type": "stream",
119
+ "text": [
120
+ "1.8.2+cu111\n"
121
+ ]
122
+ }
123
+ ],
124
+ "source": [
125
+ "import torch\n",
126
+ "\n",
127
+ "print(torch.__version__)"
128
+ ]
129
+ },
130
+ {
131
+ "cell_type": "code",
132
+ "execution_count": 5,
133
+ "metadata": {},
134
+ "outputs": [
135
+ {
136
+ "ename": "ModuleNotFoundError",
137
+ "evalue": "No module named 'tensorflow'",
138
+ "output_type": "error",
139
+ "traceback": [
140
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
141
+ "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
142
+ "\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_3544/763479854.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[1;32mimport\u001b[0m \u001b[0mtensorflow\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mtf\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"tf.__version__\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__version__\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
143
+ "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'tensorflow'"
144
+ ]
145
+ }
146
+ ],
147
+ "source": [
148
+ "import tensorflow as tf\n",
149
+ "\n",
150
+ "print(\"tf.__version__\", tf.__version__)"
151
+ ]
152
+ }
153
+ ],
154
+ "metadata": {
155
+ "interpreter": {
156
+ "hash": "c2aedac36a10eb432fae2be3be58ef9112ca6433e61b95db4d37b1628d19a18a"
157
+ },
158
+ "kernelspec": {
159
+ "display_name": "Python 3.7.7 64-bit ('textifyai': venv)",
160
+ "language": "python",
161
+ "name": "python3"
162
+ },
163
+ "language_info": {
164
+ "codemirror_mode": {
165
+ "name": "ipython",
166
+ "version": 3
167
+ },
168
+ "file_extension": ".py",
169
+ "mimetype": "text/x-python",
170
+ "name": "python",
171
+ "nbconvert_exporter": "python",
172
+ "pygments_lexer": "ipython3",
173
+ "version": "3.7.7"
174
+ },
175
+ "orig_nbformat": 4
176
+ },
177
+ "nbformat": 4,
178
+ "nbformat_minor": 2
179
+ }