Update README.md
Browse files
README.md
CHANGED
@@ -4,149 +4,150 @@ license:
|
|
4 |
- cc-by-nc-4.0
|
5 |
datasets: pszemraj/fleece2instructions-codealpaca
|
6 |
tags:
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
|
|
11 |
metrics:
|
12 |
-
|
13 |
language:
|
14 |
-
|
15 |
widget:
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
-
|
33 |
-
|
34 |
-
*/
|
35 |
-
export class Document implements DocumentParams {
|
36 |
-
pageContent: string;
|
37 |
|
38 |
-
|
39 |
-
|
40 |
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
def merge(left, right):
|
49 |
-
if len(left) == 0:
|
50 |
-
return right
|
51 |
-
|
52 |
-
if len(right) == 0:
|
53 |
-
return left
|
54 |
-
|
55 |
-
result = []
|
56 |
-
index_left = index_right = 0
|
57 |
-
|
58 |
-
while len(result) < len(left) + len(right):
|
59 |
-
if left[index_left] <= right[index_right]:
|
60 |
-
result.append(left[index_left])
|
61 |
-
index_left += 1
|
62 |
-
else:
|
63 |
-
result.append(right[index_right])
|
64 |
-
index_right += 1
|
65 |
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
|
79 |
-
|
80 |
|
81 |
|
82 |
-
|
83 |
-
|
84 |
|
85 |
|
86 |
-
|
87 |
-
|
88 |
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
|
98 |
-
|
99 |
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
|
132 |
-
|
133 |
|
134 |
|
135 |
-
|
136 |
|
137 |
-
|
138 |
|
139 |
-
|
140 |
|
141 |
-
|
142 |
-
|
143 |
|
144 |
|
145 |
-
|
146 |
-
|
147 |
|
148 |
-
|
149 |
-
|
150 |
inference:
|
151 |
parameters:
|
152 |
max_length: 96
|
@@ -202,4 +203,4 @@ The following hyperparameters were used during training:
|
|
202 |
|:-------------:|:-----:|:----:|:---------------:|:-------:|:-------:|:-------:|:---------:|:-------:|
|
203 |
| 1.1165 | 1.0 | 281 | 1.1090 | 57.9239 | 31.9259 | 53.8737 | 54.9811 | 28.2924 |
|
204 |
| 1.0763 | 2.0 | 563 | 1.0267 | 59.9605 | 34.0298 | 55.7523 | 56.8021 | 29.6966 |
|
205 |
-
| 0.9595 | 2.99 | 843 | 1.0136 | 59.9513 | 33.9118 | 55.7815 | 56.9064 | 29.7146 |
|
|
|
4 |
- cc-by-nc-4.0
|
5 |
datasets: pszemraj/fleece2instructions-codealpaca
|
6 |
tags:
|
7 |
+
- generated_from_trainer
|
8 |
+
- instruct
|
9 |
+
- instructions
|
10 |
+
- code
|
11 |
+
- instructiongen
|
12 |
metrics:
|
13 |
+
- rouge
|
14 |
language:
|
15 |
+
- en
|
16 |
widget:
|
17 |
+
- text: |
|
18 |
+
git lfs install
|
19 |
+
huggingface-cli lfs-enable-largefiles .
|
20 |
+
git lfs track "*.bin"
|
21 |
+
git add .
|
22 |
+
git commit -a -m "add fp32 chkpt"
|
23 |
+
git push
|
24 |
+
example_title: bash
|
25 |
+
- text: |
|
26 |
+
export interface DocumentParams {
|
27 |
+
pageContent: string;
|
28 |
+
|
29 |
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
30 |
+
metadata: Record<string, any>;
|
31 |
+
}
|
32 |
+
|
33 |
+
/**
|
34 |
+
* Interface for interacting with a document.
|
35 |
+
*/
|
36 |
+
export class Document implements DocumentParams {
|
37 |
+
pageContent: string;
|
38 |
+
|
39 |
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
40 |
+
metadata: Record<string, any>;
|
41 |
+
|
42 |
+
constructor(fields?: Partial<DocumentParams>) {
|
43 |
+
this.pageContent = fields?.pageContent ?? this.pageContent;
|
44 |
+
this.metadata = fields?.metadata ?? {};
|
45 |
}
|
46 |
+
}
|
47 |
+
example_title: js
|
48 |
+
- text: |
|
49 |
+
def merge(left, right):
|
50 |
+
if len(left) == 0:
|
51 |
+
return right
|
52 |
|
53 |
+
if len(right) == 0:
|
54 |
+
return left
|
|
|
|
|
|
|
55 |
|
56 |
+
result = []
|
57 |
+
index_left = index_right = 0
|
58 |
|
59 |
+
while len(result) < len(left) + len(right):
|
60 |
+
if left[index_left] <= right[index_right]:
|
61 |
+
result.append(left[index_left])
|
62 |
+
index_left += 1
|
63 |
+
else:
|
64 |
+
result.append(right[index_right])
|
65 |
+
index_right += 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
|
67 |
+
if index_right == len(right):
|
68 |
+
result += left[index_left:]
|
69 |
+
break
|
70 |
|
71 |
+
if index_left == len(left):
|
72 |
+
result += right[index_right:]
|
73 |
+
break
|
74 |
|
75 |
+
return result
|
76 |
+
example_title: merge
|
77 |
+
- text: >
|
78 |
+
import pandas as pd
|
79 |
|
80 |
+
import plotly.graph_objects as go
|
81 |
|
82 |
|
83 |
+
df =
|
84 |
+
pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_apple_stock.csv')
|
85 |
|
86 |
|
87 |
+
fig = go.Figure(go.Scatter(x = df['AAPL_x'], y = df['AAPL_y'],
|
88 |
+
name='Share Prices (in USD)'))
|
89 |
|
90 |
+
fig.update_layout(title='Apple Share Prices over time (2014)',
|
91 |
+
plot_bgcolor='rgb(230, 230,230)',
|
92 |
+
showlegend=True)
|
93 |
|
94 |
+
fig.show()
|
95 |
+
example_title: plot
|
96 |
+
- text: |
|
97 |
+
from spellchecker import SpellChecker
|
98 |
|
99 |
+
spell = SpellChecker()
|
100 |
|
101 |
+
def check_word_spelling(word: str):
|
102 |
+
misspelled = spell.unknown([word])
|
103 |
+
return len(misspelled) == 0
|
104 |
|
105 |
+
def eval_and_replace(text: str, match_token: str = "- "):
|
106 |
+
if match_token not in text:
|
107 |
+
return text
|
108 |
+
else:
|
109 |
+
while True:
|
110 |
+
full_before_text = text.split(match_token, maxsplit=1)[0]
|
111 |
+
before_text = [
|
112 |
+
char for char in full_before_text.split()[-1] if char.isalpha()
|
113 |
+
]
|
114 |
+
before_text = "".join(before_text)
|
115 |
+
full_after_text = text.split(match_token, maxsplit=1)[-1]
|
116 |
+
after_text = [char for char in full_after_text.split()[0] if char.isalpha()]
|
117 |
+
after_text = "".join(after_text)
|
118 |
+
full_text = before_text + after_text
|
119 |
+
if check_word_spelling(full_text):
|
120 |
+
text = full_before_text + full_after_text
|
121 |
+
else:
|
122 |
+
text = full_before_text + " " + full_after_text
|
123 |
+
if match_token not in text:
|
124 |
+
break
|
125 |
+
return text
|
126 |
|
127 |
+
text = "I- am- a go- od- boy"
|
128 |
+
eval_and_replace(text)
|
129 |
+
example_title: spell check
|
130 |
+
- text: >
|
131 |
+
import torch
|
132 |
|
133 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
134 |
|
135 |
|
136 |
+
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
|
137 |
|
138 |
+
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
|
139 |
|
140 |
+
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
|
141 |
|
142 |
+
sequences = ["I've been waiting for a HuggingFace course my whole life.",
|
143 |
+
"So have I!"]
|
144 |
|
145 |
|
146 |
+
tokens = tokenizer(sequences, padding=True, truncation=True,
|
147 |
+
return_tensors="pt")
|
148 |
|
149 |
+
output = model(**tokens)
|
150 |
+
example_title: model inference
|
151 |
inference:
|
152 |
parameters:
|
153 |
max_length: 96
|
|
|
203 |
|:-------------:|:-----:|:----:|:---------------:|:-------:|:-------:|:-------:|:---------:|:-------:|
|
204 |
| 1.1165 | 1.0 | 281 | 1.1090 | 57.9239 | 31.9259 | 53.8737 | 54.9811 | 28.2924 |
|
205 |
| 1.0763 | 2.0 | 563 | 1.0267 | 59.9605 | 34.0298 | 55.7523 | 56.8021 | 29.6966 |
|
206 |
+
| 0.9595 | 2.99 | 843 | 1.0136 | 59.9513 | 33.9118 | 55.7815 | 56.9064 | 29.7146 |
|