strickvl commited on
Commit
c9036ae
1 Parent(s): d1671f2

add data folder to gitignore

Browse files
Files changed (2) hide show
  1. .gitignore +3 -0
  2. src/train_tokenizer.ipynb +53 -0
.gitignore CHANGED
@@ -158,3 +158,6 @@ cython_debug/
158
  # and can be added to the global gitignore or merged into this file. For a more nuclear
159
  # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
  #.idea/
 
 
 
 
158
  # and can be added to the global gitignore or merged into this file. For a more nuclear
159
  # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
  #.idea/
161
+
162
+ # Project-specific ignores
163
+ data/
src/train_tokenizer.ipynb ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 5,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "# !pip install datasets\n",
10
+ "# !huggingface-cli login"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 8,
16
+ "metadata": {},
17
+ "outputs": [],
18
+ "source": [
19
+ "# from datasets import load_dataset\n",
20
+ "# load_dataset(\"balochiml/balochi-language-data\", data_dir=\"data\", cache_dir=\"../data\")"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": null,
26
+ "metadata": {},
27
+ "outputs": [],
28
+ "source": []
29
+ }
30
+ ],
31
+ "metadata": {
32
+ "kernelspec": {
33
+ "display_name": "balochi",
34
+ "language": "python",
35
+ "name": "python3"
36
+ },
37
+ "language_info": {
38
+ "codemirror_mode": {
39
+ "name": "ipython",
40
+ "version": 3
41
+ },
42
+ "file_extension": ".py",
43
+ "mimetype": "text/x-python",
44
+ "name": "python",
45
+ "nbconvert_exporter": "python",
46
+ "pygments_lexer": "ipython3",
47
+ "version": "3.10.11"
48
+ },
49
+ "orig_nbformat": 4
50
+ },
51
+ "nbformat": 4,
52
+ "nbformat_minor": 2
53
+ }