aus10powell commited on
Commit
8158335
·
1 Parent(s): e39674d

Upload 74 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Dockerfile +19 -0
  2. app.py +187 -0
  3. models/BarackObama/added_tokens.json +3 -0
  4. models/BarackObama/config.json +39 -0
  5. models/BarackObama/merges.txt +0 -0
  6. models/BarackObama/pytorch_model.bin +3 -0
  7. models/BarackObama/special_tokens_map.json +24 -0
  8. models/BarackObama/tokenizer_config.json +34 -0
  9. models/BarackObama/training_args.bin +3 -0
  10. models/BarackObama/vocab.json +0 -0
  11. models/alikarimi_ak8/added_tokens.json +3 -0
  12. models/alikarimi_ak8/config.json +39 -0
  13. models/alikarimi_ak8/merges.txt +0 -0
  14. models/alikarimi_ak8/pytorch_model.bin +3 -0
  15. models/alikarimi_ak8/special_tokens_map.json +24 -0
  16. models/alikarimi_ak8/tokenizer_config.json +34 -0
  17. models/alikarimi_ak8/training_args.bin +3 -0
  18. models/alikarimi_ak8/vocab.json +0 -0
  19. models/cathiedwood/added_tokens.json +3 -0
  20. models/cathiedwood/config.json +39 -0
  21. models/cathiedwood/merges.txt +0 -0
  22. models/cathiedwood/pytorch_model.bin +3 -0
  23. models/cathiedwood/special_tokens_map.json +24 -0
  24. models/cathiedwood/tokenizer_config.json +34 -0
  25. models/cathiedwood/training_args.bin +3 -0
  26. models/cathiedwood/vocab.json +0 -0
  27. models/elonmusk/added_tokens.json +3 -0
  28. models/elonmusk/config.json +39 -0
  29. models/elonmusk/merges.txt +0 -0
  30. models/elonmusk/pytorch_model.bin +3 -0
  31. models/elonmusk/special_tokens_map.json +24 -0
  32. models/elonmusk/tokenizer_config.json +34 -0
  33. models/elonmusk/training_args.bin +3 -0
  34. models/elonmusk/vocab.json +0 -0
  35. models/taylorlorenz/added_tokens.json +3 -0
  36. models/taylorlorenz/config.json +39 -0
  37. models/taylorlorenz/merges.txt +0 -0
  38. models/taylorlorenz/pytorch_model.bin +3 -0
  39. models/taylorlorenz/special_tokens_map.json +24 -0
  40. models/taylorlorenz/tokenizer_config.json +34 -0
  41. models/taylorlorenz/training_args.bin +3 -0
  42. models/taylorlorenz/vocab.json +0 -0
  43. models/ylecun/added_tokens.json +3 -0
  44. models/ylecun/config.json +39 -0
  45. models/ylecun/merges.txt +0 -0
  46. models/ylecun/pytorch_model.bin +3 -0
  47. models/ylecun/special_tokens_map.json +24 -0
  48. models/ylecun/tokenizer_config.json +34 -0
  49. models/ylecun/training_args.bin +3 -0
  50. models/ylecun/vocab.json +0 -0
Dockerfile ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ FROM python:3.10
3
+
4
+ WORKDIR /code
5
+
6
+ COPY ./requirements.txt /code/requirements.txt
7
+
8
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
9
+
10
+ RUN useradd -m -u 1000 user
11
+ USER user
12
+ ENV HOME=/home/user \
13
+ PATH=/home/user/.local/bin:$PATH
14
+
15
+ WORKDIR $HOME/app
16
+
17
+ COPY --chown=user . $HOME/app
18
+
19
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """FastAPI endpoint
2
+ To run locally use 'uvicorn app:app --host localhost --port 7860'
3
+ or
4
+ `python -m uvicorn app:app --reload --host localhost --port 7860`
5
+ """
6
+ import datetime as dt
7
+ import json
8
+ import logging
9
+ import numpy as np
10
+ import os
11
+ import random
12
+ from typing import Dict, List
13
+
14
+ import uvicorn
15
+ from fastapi import FastAPI, HTTPException, Request, Response
16
+ from fastapi.responses import HTMLResponse
17
+ from fastapi.staticfiles import StaticFiles
18
+ from fastapi.templating import Jinja2Templates
19
+
20
+ import scripts.sentiment as sentiment
21
+ import scripts.twitter_scraper as ts
22
+ import scripts.utils as utils
23
+ from scripts import generative
24
+
25
+ logging.basicConfig(level=logging.INFO)
26
+
27
+ app = FastAPI()
28
+ templates = Jinja2Templates(directory="templates")
29
+ app.mount("/static", StaticFiles(directory="static"), name="static")
30
+ # Construct absolute path to models folder
31
+ models_path = os.path.abspath("models")
32
+
33
+ username_list = [
34
+ "alikarimi_ak8",
35
+ "elonmusk",
36
+ "BarackObama",
37
+ "taylorlorenz",
38
+ "cathiedwood",
39
+ "ylecun",
40
+ ]
41
+
42
+ ## Static objects/paths
43
+ start_date = dt.date(year=2023, month=2, day=1)
44
+ end_date = dt.date(year=2023, month=3, day=22)
45
+
46
+
47
+ @app.get("/", response_class=HTMLResponse)
48
+ async def webpage(request: Request):
49
+ return templates.TemplateResponse("index.html", {"request": request})
50
+
51
+
52
+ @app.get("/accounts", response_model=List[dict])
53
+ def get_accounts() -> List[dict]:
54
+ logging.info(f"Pulling account information on {username_list}")
55
+ account_info_list = [
56
+ ts.get_twitter_account_info(twitter_handle=account) for account in username_list
57
+ ]
58
+ return account_info_list
59
+
60
+
61
+ @app.get("/tweets/{username}", response_model=dict)
62
+ def get_tweets(username: str) -> dict:
63
+ if username in username_list:
64
+ #query = f"from:{username} since:{start_date} until:{end_date}"
65
+ return ts.get_tweets(query=query)
66
+ else:
67
+ return {"detail": "Account not in scope of project."}
68
+
69
+
70
+ @app.get("/audience/{username}", response_model=dict)
71
+ def get_audience(username: str) -> dict:
72
+ if username in username_list:
73
+ query = f"from:{username} since:{start_date} until:{end_date}"
74
+ tweets = ts.get_tweets(query=query)
75
+
76
+ n_samples = 5
77
+ # Random sample 3 tweets from user
78
+ tweets_sampled = random.sample(tweets, n_samples)
79
+
80
+ # Get all replies to sampled tweets
81
+ tweet_threads = []
82
+ for tweet in tweets_sampled:
83
+ threads = ts.get_replies(
84
+ username=tweet["username"],
85
+ conversation_id=tweet["conversation_id"],
86
+ max_tweets=100,
87
+ )
88
+ tweet_threads += threads
89
+
90
+ # Get usernames from sample threads tweets
91
+ usernames = [t["username"] for t in tweet_threads]
92
+ # Get user info from sample replies to sampled tweets of user
93
+ info_accounts = [
94
+ ts.get_twitter_account_info(twitter_handle=account) for account in usernames
95
+ ]
96
+
97
+ # "follower_count":1,"friends_count":20,"verified":false}
98
+ # Get stats for followers/audience engaging with tweets
99
+ follower_counts = [
100
+ info_accounts[i]["follower_count"] for i in range(len(info_accounts))
101
+ ]
102
+ friends_counts = [
103
+ info_accounts[i]["friends_count"] for i in range(len(info_accounts))
104
+ ]
105
+ verified_counts = [
106
+ 1 if info_accounts[i]["verified"] == True else 0
107
+ for i in range(len(info_accounts))
108
+ ]
109
+ return {
110
+ "sample_size": len(info_accounts),
111
+ "mean_follower_count": round(np.mean(follower_counts), 3),
112
+ "mean_friends_count": round(np.mean(friends_counts), 3),
113
+ "mean_verified": round(np.mean(verified_counts), 3),
114
+ }
115
+ else:
116
+ response = Response(content="Account not in scope of project.", status_code=404)
117
+ return response
118
+
119
+
120
+ @app.get("/sentiment/{username}")
121
+ async def get_sentiment(username: str) -> Dict[str, Dict[str, float]]:
122
+ if username not in username_list:
123
+ raise HTTPException(status_code=404, detail="Account not in scope of project.")
124
+
125
+ query = f"from:{username} since:{start_date} until:{end_date}"
126
+ tweets = ts.get_tweets(query=query)
127
+ n_samples = 5
128
+ tweets_sampled = random.sample(tweets, n_samples)
129
+
130
+ tweet_threads = []
131
+ for tweet in tweets_sampled:
132
+ threads = ts.get_replies(
133
+ username=tweet["username"],
134
+ conversation_id=tweet["conversation_id"],
135
+ max_tweets=100,
136
+ )
137
+ tweet_threads += threads
138
+
139
+ print(
140
+ f"Total replies to {n_samples} sampled tweets from username: {username}, {len(tweet_threads)}"
141
+ )
142
+
143
+ ## Sentiment scoring
144
+ print(f"Running tweet sentiment scoring on username: {username} tweets")
145
+ tweets_scores = sentiment.get_tweets_sentiment(tweets=tweets)
146
+ mean_tweets_score = round(np.mean(tweets_scores), 2)
147
+ ci_tweets = utils.wilson_score_interval(tweets_scores)
148
+
149
+ # Get sentiment of the threads from tweets
150
+ # Get username tweets sentiment
151
+ print(f"Running tweet thread sentiment scoring on username: {username} tweets")
152
+ threads_scores = sentiment.get_tweets_sentiment(tweets=tweet_threads)
153
+ mean_threads_score = round(np.mean(threads_scores), 2)
154
+ ci_threads = utils.wilson_score_interval(threads_scores)
155
+
156
+ return {
157
+ "thread_level": {
158
+ "mean": mean_threads_score,
159
+ "confidence_interal": ci_threads,
160
+ },
161
+ "audience_level": {
162
+ "mean": mean_tweets_score,
163
+ "confidence_interval": ci_tweets,
164
+ },
165
+ }
166
+
167
+
168
+ @app.post("/api/generate")
169
+ # async def generate_text(account: str, text: str):
170
+ async def generate_text(request: Request):
171
+
172
+ data = await request.json()
173
+ print("*"*50)
174
+ print("POST Request:")
175
+ print(data['account'],data['text'])
176
+ generated_text = generative.generate_account_text(
177
+ prompt=data['text'], model_dir=os.path.join(models_path, data['account'])
178
+ )
179
+ # return one example
180
+ generated_text = generated_text[0]["generated_text"]
181
+ return {"generated_text": generated_text}
182
+
183
+
184
+
185
+ # if __name__ == "__main__":
186
+ # # uvicorn.run(app, host="0.0.0.0", port=8000)
187
+ # uvicorn.run("app:app", host="127.0.0.1", port=5049, reload=True)
models/BarackObama/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[PAD]": 50257
3
+ }
models/BarackObama/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.24.0",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
models/BarackObama/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
models/BarackObama/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18c6382bf4651ffdfb464fcafe980119de8bf39d52e7e1ba5678130fcd1e9469
3
+ size 510395581
models/BarackObama/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "[PAD]",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
models/BarackObama/tokenizer_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "eos_token": {
13
+ "__type": "AddedToken",
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "errors": "replace",
21
+ "model_max_length": 1024,
22
+ "name_or_path": "gpt2",
23
+ "pad_token": null,
24
+ "special_tokens_map_file": null,
25
+ "tokenizer_class": "GPT2Tokenizer",
26
+ "unk_token": {
27
+ "__type": "AddedToken",
28
+ "content": "<|endoftext|>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ }
34
+ }
models/BarackObama/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71684cf5cce747c17720cae37368baf794483b4018c616e0d26487886590e338
3
+ size 3387
models/BarackObama/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
models/alikarimi_ak8/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[PAD]": 50257
3
+ }
models/alikarimi_ak8/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.24.0",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
models/alikarimi_ak8/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
models/alikarimi_ak8/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfffa9a2034ca45dd1be4946de2c080045ed4c221e652275f13024078cd8e604
3
+ size 510395581
models/alikarimi_ak8/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "[PAD]",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
models/alikarimi_ak8/tokenizer_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "eos_token": {
13
+ "__type": "AddedToken",
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "errors": "replace",
21
+ "model_max_length": 1024,
22
+ "name_or_path": "gpt2",
23
+ "pad_token": null,
24
+ "special_tokens_map_file": null,
25
+ "tokenizer_class": "GPT2Tokenizer",
26
+ "unk_token": {
27
+ "__type": "AddedToken",
28
+ "content": "<|endoftext|>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ }
34
+ }
models/alikarimi_ak8/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20869c5642f6be95f3a342a6133749dd150bfdecffedab7860d7e767a521ed74
3
+ size 3387
models/alikarimi_ak8/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
models/cathiedwood/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[PAD]": 50257
3
+ }
models/cathiedwood/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.24.0",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
models/cathiedwood/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
models/cathiedwood/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecac4ae5b95aa181da1616db839314ca186f138d59bd2322960623bc537497e9
3
+ size 510395581
models/cathiedwood/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "[PAD]",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
models/cathiedwood/tokenizer_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "eos_token": {
13
+ "__type": "AddedToken",
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "errors": "replace",
21
+ "model_max_length": 1024,
22
+ "name_or_path": "gpt2",
23
+ "pad_token": null,
24
+ "special_tokens_map_file": null,
25
+ "tokenizer_class": "GPT2Tokenizer",
26
+ "unk_token": {
27
+ "__type": "AddedToken",
28
+ "content": "<|endoftext|>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ }
34
+ }
models/cathiedwood/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90e74b31b37904075ebd8760cd82fe42a6f239f874c9989a7fc199126050a4a5
3
+ size 3387
models/cathiedwood/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
models/elonmusk/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[PAD]": 50257
3
+ }
models/elonmusk/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.24.0",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
models/elonmusk/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
models/elonmusk/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0305a3b73d7f97d5fd0ac0409407fe5fa80e32de4f5ae8b73305bbab896aff5
3
+ size 510395581
models/elonmusk/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "[PAD]",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
models/elonmusk/tokenizer_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "eos_token": {
13
+ "__type": "AddedToken",
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "errors": "replace",
21
+ "model_max_length": 1024,
22
+ "name_or_path": "gpt2",
23
+ "pad_token": null,
24
+ "special_tokens_map_file": null,
25
+ "tokenizer_class": "GPT2Tokenizer",
26
+ "unk_token": {
27
+ "__type": "AddedToken",
28
+ "content": "<|endoftext|>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ }
34
+ }
models/elonmusk/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c50926e7c005b420f813373fa239e70fac310bdbcafef95f95dfb1e3145c544
3
+ size 3387
models/elonmusk/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
models/taylorlorenz/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[PAD]": 50257
3
+ }
models/taylorlorenz/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.24.0",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
models/taylorlorenz/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
models/taylorlorenz/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5075410d32a65bc98c38d578df472ecb270124c0f053b73477e6b7f1b3377003
3
+ size 510395581
models/taylorlorenz/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "[PAD]",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
models/taylorlorenz/tokenizer_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "eos_token": {
13
+ "__type": "AddedToken",
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "errors": "replace",
21
+ "model_max_length": 1024,
22
+ "name_or_path": "gpt2",
23
+ "pad_token": null,
24
+ "special_tokens_map_file": null,
25
+ "tokenizer_class": "GPT2Tokenizer",
26
+ "unk_token": {
27
+ "__type": "AddedToken",
28
+ "content": "<|endoftext|>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ }
34
+ }
models/taylorlorenz/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad406657c6f7bdf6c5a07979f9b57f380adf0376b6c3d99b27a1ce2f9903e9f6
3
+ size 3387
models/taylorlorenz/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
models/ylecun/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[PAD]": 50257
3
+ }
models/ylecun/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.24.0",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
models/ylecun/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
models/ylecun/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfffa9a2034ca45dd1be4946de2c080045ed4c221e652275f13024078cd8e604
3
+ size 510395581
models/ylecun/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "[PAD]",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
models/ylecun/tokenizer_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "eos_token": {
13
+ "__type": "AddedToken",
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "errors": "replace",
21
+ "model_max_length": 1024,
22
+ "name_or_path": "gpt2",
23
+ "pad_token": null,
24
+ "special_tokens_map_file": null,
25
+ "tokenizer_class": "GPT2Tokenizer",
26
+ "unk_token": {
27
+ "__type": "AddedToken",
28
+ "content": "<|endoftext|>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ }
34
+ }
models/ylecun/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20869c5642f6be95f3a342a6133749dd150bfdecffedab7860d7e767a521ed74
3
+ size 3387
models/ylecun/vocab.json ADDED
The diff for this file is too large to render. See raw diff