m3hrdadfi commited on
Commit
6851088
1 Parent(s): a459bd8

Hello everyone

Browse files
.gitignore ADDED
@@ -0,0 +1,886 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### VirtualEnv template
2
+ # Virtualenv
3
+ # http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
4
+ .Python
5
+ [Bb]in
6
+ [Ii]nclude
7
+ [Ll]ib
8
+ [Ll]ib64
9
+ [Ll]ocal
10
+ [Ss]cripts
11
+ pyvenv.cfg
12
+ .venv
13
+ pip-selfcheck.json
14
+
15
+ ### Python template
16
+ # Byte-compiled / optimized / DLL files
17
+ __pycache__/
18
+ *.py[cod]
19
+ *$py.class
20
+
21
+ # C extensions
22
+ *.so
23
+
24
+ # Distribution / packaging
25
+ .Python
26
+ build/
27
+ develop-eggs/
28
+ dist/
29
+ downloads/
30
+ eggs/
31
+ .eggs/
32
+ lib/
33
+ lib64/
34
+ parts/
35
+ sdist/
36
+ var/
37
+ wheels/
38
+ share/python-wheels/
39
+ *.egg-info/
40
+ .installed.cfg
41
+ *.egg
42
+ MANIFEST
43
+
44
+ # PyInstaller
45
+ # Usually these files are written by a python script from a template
46
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
47
+ *.manifest
48
+ *.spec
49
+
50
+ # Installer logs
51
+ pip-log.txt
52
+ pip-delete-this-directory.txt
53
+
54
+ # Unit test / coverage reports
55
+ htmlcov/
56
+ .tox/
57
+ .nox/
58
+ .coverage
59
+ .coverage.*
60
+ .cache
61
+ nosetests.xml
62
+ coverage.xml
63
+ *.cover
64
+ *.py,cover
65
+ .hypothesis/
66
+ .pytest_cache/
67
+ cover/
68
+
69
+ # Translations
70
+ *.mo
71
+ *.pot
72
+
73
+ # Django stuff:
74
+ *.log
75
+ local_settings.py
76
+ db.sqlite3
77
+ db.sqlite3-journal
78
+
79
+ # Flask stuff:
80
+ instance/
81
+ .webassets-cache
82
+
83
+ # Scrapy stuff:
84
+ .scrapy
85
+
86
+ # Sphinx documentation
87
+ docs/_build/
88
+
89
+ # PyBuilder
90
+ .pybuilder/
91
+ target/
92
+
93
+ # Jupyter Notebook
94
+ .ipynb_checkpoints
95
+
96
+ # IPython
97
+ profile_default/
98
+ ipython_config.py
99
+
100
+ # pyenv
101
+ # For a library or package, you might want to ignore these files since the code is
102
+ # intended to run in multiple environments; otherwise, check them in:
103
+ # .python-version
104
+
105
+ # pipenv
106
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
107
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
108
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
109
+ # install all needed dependencies.
110
+ #Pipfile.lock
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ ### JupyterNotebooks template
156
+ # gitignore template for Jupyter Notebooks
157
+ # website: http://jupyter.org/
158
+
159
+ .ipynb_checkpoints
160
+ */.ipynb_checkpoints/*
161
+
162
+ # IPython
163
+ profile_default/
164
+ ipython_config.py
165
+
166
+ # Remove previous ipynb_checkpoints
167
+ # git rm -r .ipynb_checkpoints/
168
+
169
+ ### Eclipse template
170
+ .metadata
171
+ bin/
172
+ tmp/
173
+ *.tmp
174
+ *.bak
175
+ *.swp
176
+ *~.nib
177
+ local.properties
178
+ .settings/
179
+ .loadpath
180
+ .recommenders
181
+
182
+ # External tool builders
183
+ .externalToolBuilders/
184
+
185
+ # Locally stored "Eclipse launch configurations"
186
+ *.launch
187
+
188
+ # PyDev specific (Python IDE for Eclipse)
189
+ *.pydevproject
190
+
191
+ # CDT-specific (C/C++ Development Tooling)
192
+ .cproject
193
+
194
+ # CDT- autotools
195
+ .autotools
196
+
197
+ # Java annotation processor (APT)
198
+ .factorypath
199
+
200
+ # PDT-specific (PHP Development Tools)
201
+ .buildpath
202
+
203
+ # sbteclipse plugin
204
+ .target
205
+
206
+ # Tern plugin
207
+ .tern-project
208
+
209
+ # TeXlipse plugin
210
+ .texlipse
211
+
212
+ # STS (Spring Tool Suite)
213
+ .springBeans
214
+
215
+ # Code Recommenders
216
+ .recommenders/
217
+
218
+ # Annotation Processing
219
+ .apt_generated/
220
+ .apt_generated_test/
221
+
222
+ # Scala IDE specific (Scala & Java development for Eclipse)
223
+ .cache-main
224
+ .scala_dependencies
225
+ .worksheet
226
+
227
+ # Uncomment this line if you wish to ignore the project description file.
228
+ # Typically, this file would be tracked if it contains build/dependency configurations:
229
+ #.project
230
+
231
+ ### VisualStudio template
232
+ ## Ignore Visual Studio temporary files, build results, and
233
+ ## files generated by popular Visual Studio add-ons.
234
+ ##
235
+ ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
236
+
237
+ # User-specific files
238
+ *.rsuser
239
+ *.suo
240
+ *.user
241
+ *.userosscache
242
+ *.sln.docstates
243
+
244
+ # User-specific files (MonoDevelop/Xamarin Studio)
245
+ *.userprefs
246
+
247
+ # Mono auto generated files
248
+ mono_crash.*
249
+
250
+ # Build results
251
+ [Dd]ebug/
252
+ [Dd]ebugPublic/
253
+ [Rr]elease/
254
+ [Rr]eleases/
255
+ x64/
256
+ x86/
257
+ [Ww][Ii][Nn]32/
258
+ [Aa][Rr][Mm]/
259
+ [Aa][Rr][Mm]64/
260
+ bld/
261
+ [Bb]in/
262
+ [Oo]bj/
263
+ [Ll]og/
264
+ [Ll]ogs/
265
+
266
+ # Visual Studio 2015/2017 cache/options directory
267
+ .vs/
268
+ # Uncomment if you have tasks that create the project's static files in wwwroot
269
+ #wwwroot/
270
+
271
+ # Visual Studio 2017 auto generated files
272
+ Generated\ Files/
273
+
274
+ # MSTest test Results
275
+ [Tt]est[Rr]esult*/
276
+ [Bb]uild[Ll]og.*
277
+
278
+ # NUnit
279
+ *.VisualState.xml
280
+ TestResult.xml
281
+ nunit-*.xml
282
+
283
+ # Build Results of an ATL Project
284
+ [Dd]ebugPS/
285
+ [Rr]eleasePS/
286
+ dlldata.c
287
+
288
+ # Benchmark Results
289
+ BenchmarkDotNet.Artifacts/
290
+
291
+ # .NET Core
292
+ project.lock.json
293
+ project.fragment.lock.json
294
+ artifacts/
295
+
296
+ # ASP.NET Scaffolding
297
+ ScaffoldingReadMe.txt
298
+
299
+ # StyleCop
300
+ StyleCopReport.xml
301
+
302
+ # Files built by Visual Studio
303
+ *_i.c
304
+ *_p.c
305
+ *_h.h
306
+ *.ilk
307
+ *.meta
308
+ *.obj
309
+ *.iobj
310
+ *.pch
311
+ *.pdb
312
+ *.ipdb
313
+ *.pgc
314
+ *.pgd
315
+ *.rsp
316
+ *.sbr
317
+ *.tlb
318
+ *.tli
319
+ *.tlh
320
+ *.tmp
321
+ *.tmp_proj
322
+ *_wpftmp.csproj
323
+ *.log
324
+ *.vspscc
325
+ *.vssscc
326
+ .builds
327
+ *.pidb
328
+ *.svclog
329
+ *.scc
330
+
331
+ # Chutzpah Test files
332
+ _Chutzpah*
333
+
334
+ # Visual C++ cache files
335
+ ipch/
336
+ *.aps
337
+ *.ncb
338
+ *.opendb
339
+ *.opensdf
340
+ *.sdf
341
+ *.cachefile
342
+ *.VC.db
343
+ *.VC.VC.opendb
344
+
345
+ # Visual Studio profiler
346
+ *.psess
347
+ *.vsp
348
+ *.vspx
349
+ *.sap
350
+
351
+ # Visual Studio Trace Files
352
+ *.e2e
353
+
354
+ # TFS 2012 Local Workspace
355
+ $tf/
356
+
357
+ # Guidance Automation Toolkit
358
+ *.gpState
359
+
360
+ # ReSharper is a .NET coding add-in
361
+ _ReSharper*/
362
+ *.[Rr]e[Ss]harper
363
+ *.DotSettings.user
364
+
365
+ # TeamCity is a build add-in
366
+ _TeamCity*
367
+
368
+ # DotCover is a Code Coverage Tool
369
+ *.dotCover
370
+
371
+ # AxoCover is a Code Coverage Tool
372
+ .axoCover/*
373
+ !.axoCover/settings.json
374
+
375
+ # Coverlet is a free, cross platform Code Coverage Tool
376
+ coverage*.json
377
+ coverage*.xml
378
+ coverage*.info
379
+
380
+ # Visual Studio code coverage results
381
+ *.coverage
382
+ *.coveragexml
383
+
384
+ # NCrunch
385
+ _NCrunch_*
386
+ .*crunch*.local.xml
387
+ nCrunchTemp_*
388
+
389
+ # MightyMoose
390
+ *.mm.*
391
+ AutoTest.Net/
392
+
393
+ # Web workbench (sass)
394
+ .sass-cache/
395
+
396
+ # Installshield output folder
397
+ [Ee]xpress/
398
+
399
+ # DocProject is a documentation generator add-in
400
+ DocProject/buildhelp/
401
+ DocProject/Help/*.HxT
402
+ DocProject/Help/*.HxC
403
+ DocProject/Help/*.hhc
404
+ DocProject/Help/*.hhk
405
+ DocProject/Help/*.hhp
406
+ DocProject/Help/Html2
407
+ DocProject/Help/html
408
+
409
+ # Click-Once directory
410
+ publish/
411
+
412
+ # Publish Web Output
413
+ *.[Pp]ublish.xml
414
+ *.azurePubxml
415
+ # Note: Comment the next line if you want to checkin your web deploy settings,
416
+ # but database connection strings (with potential passwords) will be unencrypted
417
+ *.pubxml
418
+ *.publishproj
419
+
420
+ # Microsoft Azure Web App publish settings. Comment the next line if you want to
421
+ # checkin your Azure Web App publish settings, but sensitive information contained
422
+ # in these scripts will be unencrypted
423
+ PublishScripts/
424
+
425
+ # NuGet Packages
426
+ *.nupkg
427
+ # NuGet Symbol Packages
428
+ *.snupkg
429
+ # The packages folder can be ignored because of Package Restore
430
+ **/[Pp]ackages/*
431
+ # except build/, which is used as an MSBuild target.
432
+ !**/[Pp]ackages/build/
433
+ # Uncomment if necessary however generally it will be regenerated when needed
434
+ #!**/[Pp]ackages/repositories.config
435
+ # NuGet v3's project.json files produces more ignorable files
436
+ *.nuget.props
437
+ *.nuget.targets
438
+
439
+ # Microsoft Azure Build Output
440
+ csx/
441
+ *.build.csdef
442
+
443
+ # Microsoft Azure Emulator
444
+ ecf/
445
+ rcf/
446
+
447
+ # Windows Store app package directories and files
448
+ AppPackages/
449
+ BundleArtifacts/
450
+ Package.StoreAssociation.xml
451
+ _pkginfo.txt
452
+ *.appx
453
+ *.appxbundle
454
+ *.appxupload
455
+
456
+ # Visual Studio cache files
457
+ # files ending in .cache can be ignored
458
+ *.[Cc]ache
459
+ # but keep track of directories ending in .cache
460
+ !?*.[Cc]ache/
461
+
462
+ # Others
463
+ ClientBin/
464
+ ~$*
465
+ *~
466
+ *.dbmdl
467
+ *.dbproj.schemaview
468
+ *.jfm
469
+ *.pfx
470
+ *.publishsettings
471
+ orleans.codegen.cs
472
+
473
+ # Including strong name files can present a security risk
474
+ # (https://github.com/github/gitignore/pull/2483#issue-259490424)
475
+ #*.snk
476
+
477
+ # Since there are multiple workflows, uncomment next line to ignore bower_components
478
+ # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
479
+ #bower_components/
480
+
481
+ # RIA/Silverlight projects
482
+ Generated_Code/
483
+
484
+ # Backup & report files from converting an old project file
485
+ # to a newer Visual Studio version. Backup files are not needed,
486
+ # because we have git ;-)
487
+ _UpgradeReport_Files/
488
+ Backup*/
489
+ UpgradeLog*.XML
490
+ UpgradeLog*.htm
491
+ ServiceFabricBackup/
492
+ *.rptproj.bak
493
+
494
+ # SQL Server files
495
+ *.mdf
496
+ *.ldf
497
+ *.ndf
498
+
499
+ # Business Intelligence projects
500
+ *.rdl.data
501
+ *.bim.layout
502
+ *.bim_*.settings
503
+ *.rptproj.rsuser
504
+ *- [Bb]ackup.rdl
505
+ *- [Bb]ackup ([0-9]).rdl
506
+ *- [Bb]ackup ([0-9][0-9]).rdl
507
+
508
+ # Microsoft Fakes
509
+ FakesAssemblies/
510
+
511
+ # GhostDoc plugin setting file
512
+ *.GhostDoc.xml
513
+
514
+ # Node.js Tools for Visual Studio
515
+ .ntvs_analysis.dat
516
+ node_modules/
517
+
518
+ # Visual Studio 6 build log
519
+ *.plg
520
+
521
+ # Visual Studio 6 workspace options file
522
+ *.opt
523
+
524
+ # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
525
+ *.vbw
526
+
527
+ # Visual Studio LightSwitch build output
528
+ **/*.HTMLClient/GeneratedArtifacts
529
+ **/*.DesktopClient/GeneratedArtifacts
530
+ **/*.DesktopClient/ModelManifest.xml
531
+ **/*.Server/GeneratedArtifacts
532
+ **/*.Server/ModelManifest.xml
533
+ _Pvt_Extensions
534
+
535
+ # Paket dependency manager
536
+ .paket/paket.exe
537
+ paket-files/
538
+
539
+ # FAKE - F# Make
540
+ .fake/
541
+
542
+ # CodeRush personal settings
543
+ .cr/personal
544
+
545
+ # Python Tools for Visual Studio (PTVS)
546
+ __pycache__/
547
+ *.pyc
548
+
549
+ # Cake - Uncomment if you are using it
550
+ # tools/**
551
+ # !tools/packages.config
552
+
553
+ # Tabs Studio
554
+ *.tss
555
+
556
+ # Telerik's JustMock configuration file
557
+ *.jmconfig
558
+
559
+ # BizTalk build output
560
+ *.btp.cs
561
+ *.btm.cs
562
+ *.odx.cs
563
+ *.xsd.cs
564
+
565
+ # OpenCover UI analysis results
566
+ OpenCover/
567
+
568
+ # Azure Stream Analytics local run output
569
+ ASALocalRun/
570
+
571
+ # MSBuild Binary and Structured Log
572
+ *.binlog
573
+
574
+ # NVidia Nsight GPU debugger configuration file
575
+ *.nvuser
576
+
577
+ # MFractors (Xamarin productivity tool) working folder
578
+ .mfractor/
579
+
580
+ # Local History for Visual Studio
581
+ .localhistory/
582
+
583
+ # BeatPulse healthcheck temp database
584
+ healthchecksdb
585
+
586
+ # Backup folder for Package Reference Convert tool in Visual Studio 2017
587
+ MigrationBackup/
588
+
589
+ # Ionide (cross platform F# VS Code tools) working folder
590
+ .ionide/
591
+
592
+ # Fody - auto-generated XML schema
593
+ FodyWeavers.xsd
594
+
595
+ ### TeX template
596
+ ## Core latex/pdflatex auxiliary files:
597
+ *.aux
598
+ *.lof
599
+ *.log
600
+ *.lot
601
+ *.fls
602
+ *.out
603
+ *.toc
604
+ *.fmt
605
+ *.fot
606
+ *.cb
607
+ *.cb2
608
+ .*.lb
609
+
610
+ ## Intermediate documents:
611
+ *.dvi
612
+ *.xdv
613
+ *-converted-to.*
614
+ # these rules might exclude image files for figures etc.
615
+ # *.ps
616
+ # *.eps
617
+ # *.pdf
618
+
619
+ ## Generated if empty string is given at "Please type another file name for output:"
620
+ .pdf
621
+
622
+ ## Bibliography auxiliary files (bibtex/biblatex/biber):
623
+ *.bbl
624
+ *.bcf
625
+ *.blg
626
+ *-blx.aux
627
+ *-blx.bib
628
+ *.run.xml
629
+
630
+ ## Build tool auxiliary files:
631
+ *.fdb_latexmk
632
+ *.synctex
633
+ *.synctex(busy)
634
+ *.synctex.gz
635
+ *.synctex.gz(busy)
636
+ *.pdfsync
637
+
638
+ ## Build tool directories for auxiliary files
639
+ # latexrun
640
+ latex.out/
641
+
642
+ ## Auxiliary and intermediate files from other packages:
643
+ # algorithms
644
+ *.alg
645
+ *.loa
646
+
647
+ # achemso
648
+ acs-*.bib
649
+
650
+ # amsthm
651
+ *.thm
652
+
653
+ # beamer
654
+ *.nav
655
+ *.pre
656
+ *.snm
657
+ *.vrb
658
+
659
+ # changes
660
+ *.soc
661
+
662
+ # comment
663
+ *.cut
664
+
665
+ # cprotect
666
+ *.cpt
667
+
668
+ # elsarticle (documentclass of Elsevier journals)
669
+ *.spl
670
+
671
+ # endnotes
672
+ *.ent
673
+
674
+ # fixme
675
+ *.lox
676
+
677
+ # feynmf/feynmp
678
+ *.mf
679
+ *.mp
680
+ *.t[1-9]
681
+ *.t[1-9][0-9]
682
+ *.tfm
683
+
684
+ #(r)(e)ledmac/(r)(e)ledpar
685
+ *.end
686
+ *.?end
687
+ *.[1-9]
688
+ *.[1-9][0-9]
689
+ *.[1-9][0-9][0-9]
690
+ *.[1-9]R
691
+ *.[1-9][0-9]R
692
+ *.[1-9][0-9][0-9]R
693
+ *.eledsec[1-9]
694
+ *.eledsec[1-9]R
695
+ *.eledsec[1-9][0-9]
696
+ *.eledsec[1-9][0-9]R
697
+ *.eledsec[1-9][0-9][0-9]
698
+ *.eledsec[1-9][0-9][0-9]R
699
+
700
+ # glossaries
701
+ *.acn
702
+ *.acr
703
+ *.glg
704
+ *.glo
705
+ *.gls
706
+ *.glsdefs
707
+ *.lzo
708
+ *.lzs
709
+
710
+ # uncomment this for glossaries-extra (will ignore makeindex's style files!)
711
+ # *.ist
712
+
713
+ # gnuplottex
714
+ *-gnuplottex-*
715
+
716
+ # gregoriotex
717
+ *.gaux
718
+ *.gtex
719
+
720
+ # htlatex
721
+ *.4ct
722
+ *.4tc
723
+ *.idv
724
+ *.lg
725
+ *.trc
726
+ *.xref
727
+
728
+ # hyperref
729
+ *.brf
730
+
731
+ # knitr
732
+ *-concordance.tex
733
+ # TODO Uncomment the next line if you use knitr and want to ignore its generated tikz files
734
+ # *.tikz
735
+ *-tikzDictionary
736
+
737
+ # listings
738
+ *.lol
739
+
740
+ # luatexja-ruby
741
+ *.ltjruby
742
+
743
+ # makeidx
744
+ *.idx
745
+ *.ilg
746
+ *.ind
747
+
748
+ # minitoc
749
+ *.maf
750
+ *.mlf
751
+ *.mlt
752
+ *.mtc[0-9]*
753
+ *.slf[0-9]*
754
+ *.slt[0-9]*
755
+ *.stc[0-9]*
756
+
757
+ # minted
758
+ _minted*
759
+ *.pyg
760
+
761
+ # morewrites
762
+ *.mw
763
+
764
+ # nomencl
765
+ *.nlg
766
+ *.nlo
767
+ *.nls
768
+
769
+ # pax
770
+ *.pax
771
+
772
+ # pdfpcnotes
773
+ *.pdfpc
774
+
775
+ # sagetex
776
+ *.sagetex.sage
777
+ *.sagetex.py
778
+ *.sagetex.scmd
779
+
780
+ # scrwfile
781
+ *.wrt
782
+
783
+ # sympy
784
+ *.sout
785
+ *.sympy
786
+ sympy-plots-for-*.tex/
787
+
788
+ # pdfcomment
789
+ *.upa
790
+ *.upb
791
+
792
+ # pythontex
793
+ *.pytxcode
794
+ pythontex-files-*/
795
+
796
+ # tcolorbox
797
+ *.listing
798
+
799
+ # thmtools
800
+ *.loe
801
+
802
+ # TikZ & PGF
803
+ *.dpth
804
+ *.md5
805
+ *.auxlock
806
+
807
+ # todonotes
808
+ *.tdo
809
+
810
+ # vhistory
811
+ *.hst
812
+ *.ver
813
+
814
+ # easy-todo
815
+ *.lod
816
+
817
+ # xcolor
818
+ *.xcp
819
+
820
+ # xmpincl
821
+ *.xmpi
822
+
823
+ # xindy
824
+ *.xdy
825
+
826
+ # xypic precompiled matrices and outlines
827
+ *.xyc
828
+ *.xyd
829
+
830
+ # endfloat
831
+ *.ttt
832
+ *.fff
833
+
834
+ # Latexian
835
+ TSWLatexianTemp*
836
+
837
+ ## Editors:
838
+ # WinEdt
839
+ *.bak
840
+ *.sav
841
+
842
+ # Texpad
843
+ .texpadtmp
844
+
845
+ # LyX
846
+ *.lyx~
847
+
848
+ # Kile
849
+ *.backup
850
+
851
+ # gummi
852
+ .*.swp
853
+
854
+ # KBibTeX
855
+ *~[0-9]*
856
+
857
+ # TeXnicCenter
858
+ *.tps
859
+
860
+ # auto folder when using emacs and auctex
861
+ ./auto/*
862
+ *.el
863
+
864
+ # expex forward references with \gathertags
865
+ *-tags.tex
866
+
867
+ # standalone packages
868
+ *.sta
869
+
870
+ # Makeindex log files
871
+ *.lpz
872
+
873
+ # xwatermark package
874
+ *.xwm
875
+
876
+ # REVTeX puts footnotes in the bibliography by default, unless the nofootinbib
877
+ # option is specified. Footnotes are the stored in a file with suffix Notes.bib.
878
+ # Uncomment the next line to have this generated file ignored.
879
+ #*Notes.bib
880
+
881
+ # CUSTOM
882
+ .idea
883
+ .DS_store
884
+ .DS_Store
885
+ *.tmp.py
886
+ tmp
README.md CHANGED
@@ -1,33 +1,17 @@
1
  ---
2
- title: Gpt2 Persian
3
- emoji: 🔥
4
- colorFrom: blue
5
- colorTo: green
6
  sdk: streamlit
7
  app_file: app.py
8
  pinned: false
9
  ---
10
 
11
- # Configuration
12
 
13
- `title`: _string_
14
- Display title for the Space
15
 
16
- `emoji`: _string_
17
- Space emoji (emoji-only character allowed)
18
-
19
- `colorFrom`: _string_
20
- Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
21
-
22
- `colorTo`: _string_
23
- Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
24
-
25
- `sdk`: _string_
26
- Can be either `gradio` or `streamlit`
27
-
28
- `app_file`: _string_
29
- Path to your main application file (which contains either `gradio` or `streamlit` Python code).
30
- Path is relative to the root of the repository.
31
-
32
- `pinned`: _boolean_
33
- Whether the Space stays on top of your list.
1
  ---
2
+ title: GPT2 Persian
3
+ emoji: 🤘
4
+ colorFrom: orange
5
+ colorTo: red
6
  sdk: streamlit
7
  app_file: app.py
8
  pinned: false
9
  ---
10
 
11
+ # Streamlit demo for GPT2 Persian
12
 
13
+ ### Launch demo:
 
14
 
15
+ ```
16
+ streamlit run app.py
17
+ ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import pipeline, set_seed
3
+ from transformers import AutoTokenizer
4
+ from mtranslate import translate
5
+ import random
6
+
7
+ import meta
8
+ import examples
9
+ from normalizer import normalize
10
+ from utils import (
11
+ remote_css,
12
+ local_css
13
+ )
14
+
15
+
16
+ class TextGeneration:
17
+ def __init__(self):
18
+ self.debug = False
19
+ self.dummy_output = "ناف جایی قرار گرفته که در واقع بندناف در داخل رحم در آنجا به شکم جنین وصل بوده‌است. " \
20
+ "بندناف که جفت را به جنین متصل کرده بعد از تولد از نوزاد جدا می‌شود. برای جدا کردن بند ناف از دو پنس استفاده می‌کنند و بین آن دو را میبرند. پنس دیگری نزدیک شکم نوزاد قرار داده می‌شود که بعد از دو روز برداشته خواهد شد. بندناف باقی‌مانده طی ۱۵ روز خشک شده و می‌افتد و به جای آن اسکاری طبیعی به جای میماند. البته بر خلاف تصور عامه مردم شکل ناف در اثر بریدن بند ناف به وجود نمی‌آید و پیش از این در شکم مادر حالت ناف شکل گرفته‌است. شکل ناف در میان مردم مختلف متفاوت است و اندازه آن بین ۱.۵ تا ۲ سانتی‌متر است. تمام پستانداران جفت‌زیست ناف دارند. ناف در انسان‌ها به سادگی قابل مشاهده‌است."
21
+ self.tokenizer = None
22
+ self.generator = None
23
+ self.task = "text-generation"
24
+ self.model_name_or_path = "flax-community/gpt2-medium-persian"
25
+ set_seed(42)
26
+
27
+ def load(self):
28
+ if not self.debug:
29
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path)
30
+ self.generator = pipeline(self.task, model=self.model_name_or_path, tokenizer=self.model_name_or_path)
31
+
32
+ def generate(self, prompt, **generation_kwargs):
33
+ if not self.debug:
34
+ generation_kwargs["num_return_sequences"] = 1
35
+
36
+ max_length = len(self.tokenizer(prompt)["input_ids"][0]) + generation_kwargs["max_length"]
37
+ generation_kwargs["max_length"] = max_length
38
+
39
+ return self.generator(
40
+ prompt,
41
+ **generation_kwargs,
42
+ )[0]["generated_text"]
43
+
44
+ return self.dummy_output
45
+
46
+
47
+ @st.cache(allow_output_mutation=True)
48
+ def load_text_generator():
49
+ generator = TextGeneration()
50
+ generator.load()
51
+ return generator
52
+
53
+
54
+ def main():
55
+ st.set_page_config(
56
+ page_title="X",
57
+ page_icon="🧊",
58
+ layout="wide",
59
+ initial_sidebar_state="expanded"
60
+ )
61
+ remote_css("https://cdn.jsdelivr.net/gh/rastikerdar/vazir-font/dist/font-face.css")
62
+ local_css("assets/rtl.css")
63
+ generator = load_text_generator()
64
+
65
+ st.sidebar.markdown(meta.SIDEBAR_INFO)
66
+
67
+ max_length = st.sidebar.slider(
68
+ label='Max Length',
69
+ help="The maximum length of the sequence to be generated.",
70
+ min_value=1,
71
+ max_value=128,
72
+ value=50,
73
+ step=1
74
+ )
75
+ top_k = st.sidebar.slider(
76
+ label='Top-k',
77
+ help="The number of highest probability vocabulary tokens to keep for top-k-filtering",
78
+ min_value=40,
79
+ max_value=80,
80
+ value=50,
81
+ step=1
82
+ )
83
+ top_p = st.sidebar.slider(
84
+ label='Top-p',
85
+ help="Only the most probable tokens with probabilities that add up to `top_p` or higher are kept for "
86
+ "generation.",
87
+ min_value=0.0,
88
+ max_value=1.0,
89
+ value=0.95,
90
+ step=0.01
91
+ )
92
+ temperature = st.sidebar.slider(
93
+ label='Temperature',
94
+ help="The value used to module the next token probabilities",
95
+ min_value=0.1,
96
+ max_value=10.0,
97
+ value=1.0,
98
+ step=0.05
99
+ )
100
+ do_sample = st.sidebar.selectbox(
101
+ label='Sampling ?',
102
+ options=(True, False),
103
+ help="Whether or not to use sampling; use greedy decoding otherwise.",
104
+ )
105
+ translated = st.sidebar.selectbox(
106
+ label='Translation ?',
107
+ options=(True, False),
108
+ help="Will translate the result in English",
109
+ )
110
+ generation_kwargs = {
111
+ "max_length": max_length,
112
+ "top_k": top_k,
113
+ "top_p": top_p,
114
+ "temperature": temperature,
115
+ "do_sample": do_sample,
116
+ }
117
+
118
+ st.markdown(meta.HEADER_INFO)
119
+ prompts = list(examples.EXAMPLES.keys()) + ["Custom"]
120
+ prompt = st.selectbox('Examples', prompts, index=len(prompts) - 1)
121
+
122
+ if prompt == "Custom":
123
+ prompt_box = meta.PROMPT_BOX
124
+ else:
125
+ prompt_box = random.choice(examples.EXAMPLES[prompt])
126
+
127
+ text = st.text_area("Enter text", prompt_box)
128
+ generation_kwargs_ph = st.empty()
129
+
130
+ if st.button("Generate !"):
131
+ with st.spinner(text="Generating ..."):
132
+ generation_kwargs_ph.markdown(", ".join([f"`{k}`: {v}" for k, v in generation_kwargs.items()]))
133
+ text = normalize(text)
134
+ if text:
135
+ generated_text = generator.generate(text)
136
+ st.markdown(
137
+ f'<p class="rtl rtl-box">'
138
+ f'<span class="result-text">{text} <span>'
139
+ f'<span class="result-text generated-text">{generated_text}</span>'
140
+ f'</p>',
141
+ unsafe_allow_html=True
142
+ )
143
+
144
+ if translated:
145
+ translated_text = translate(text, "en", "fa")
146
+ translated_generated_text = translate(generated_text, "en", "fa")
147
+
148
+ st.markdown(
149
+ f'<p class="ltr ltr-box">'
150
+ f'<span class="result-text">{translated_text} <span>'
151
+ f'<span class="result-text generated-text">{translated_generated_text}</span>'
152
+ f'</p>',
153
+ unsafe_allow_html=True
154
+ )
155
+
156
+
157
+ if __name__ == '__main__':
158
+ main()
assets/rtl.css ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .rtl,
2
+ textarea {
3
+ font-family: Vazir !important;
4
+ text-align: right;
5
+ direction: rtl !important;
6
+ }
7
+ .rtl-box {
8
+ border-bottom: 1px solid #ddd;
9
+ padding-bottom: 20px;
10
+ }
11
+ .ltr {
12
+ text-align: left;
13
+ direction: ltr !important;
14
+ }
15
+
16
+ span.result-text {
17
+ padding: 3px 3px;
18
+ line-height: 32px;
19
+ }
20
+ span.generated-text {
21
+ background-color: rgb(118 200 147 / 13%);
22
+ }
dictionary.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ characters = {
2
+ "ك": "ک",
3
+ "دِ": "د",
4
+ "بِ": "ب",
5
+ "زِ": "ز",
6
+ "ذِ": "ذ",
7
+ "شِ": "ش",
8
+ "سِ": "س",
9
+ "ى": "ی",
10
+ "ي": "ی",
11
+ "ؤ": "و",
12
+ "ے": "ی",
13
+ "ۀ": "ه",
14
+ "ﭘ": "پ",
15
+ "ﮐ": "ک",
16
+ "ﯽ": "ی",
17
+ "ﺎ": "ا",
18
+ "ﺑ": "ب",
19
+ "ﺘ": "ت",
20
+ "ﺧ": "خ",
21
+ "ﺩ": "د",
22
+ "ﺱ": "س",
23
+ "ﻀ": "ض",
24
+ "ﻌ": "ع",
25
+ "ﻟ": "ل",
26
+ "ﻡ": "م",
27
+ "ﻢ": "م",
28
+ "ﻪ": "ه",
29
+ "ﻮ": "و",
30
+ "ﺍ": "ا",
31
+ "ة": "ه",
32
+ "ﯾ": "ی",
33
+ "ﯿ": "ی",
34
+ "ﺒ": "ب",
35
+ "ﺖ": "ت",
36
+ "ﺪ": "د",
37
+ "ﺮ": "ر",
38
+ "ﺴ": "س",
39
+ "ﺷ": "ش",
40
+ "ﺸ": "ش",
41
+ "ﻋ": "ع",
42
+ "ﻤ": "م",
43
+ "ﻥ": "ن",
44
+ "ﻧ": "ن",
45
+ "ﻭ": "و",
46
+ "ﺭ": "ر",
47
+ "ﮔ": "گ",
48
+ "إ": "ا",
49
+ "ٕ": " ",
50
+ "ھ": "ه",
51
+ "...": ".",
52
+ "…": ".",
53
+ "-": " - ",
54
+ "هٔ": "ه",
55
+ "ﻯ": "ی",
56
+ "ﻛ": "ک",
57
+ "ﭼ": "چ",
58
+ "ﺓ": "ه",
59
+ "ﻴ": "ی",
60
+ "ﻊ": "ع",
61
+ "ﮬ": "ه",
62
+ "ﺟ": "ج",
63
+ "ﺳ": "س",
64
+ "ﻦ": "ن",
65
+ "ﺬ": "ذ",
66
+ "ﺋ": "ئ",
67
+ "ﷲ": "لله",
68
+ "ﺞ": "ج",
69
+ "ﺙ": "ث",
70
+ "ﻗ": "ق",
71
+ "ﮪ": "ه",
72
+ "ﺰ": "ز",
73
+ "ﯼ": "ی",
74
+ "ٺ": "ت",
75
+ "ﺻ": "ص",
76
+ "ﻂ": "ط",
77
+ "ﻣ": "م",
78
+ "ﻈ": "ظ",
79
+ "ﺐ": "ب",
80
+ "ﻍ": "غ",
81
+ "ݸ": "و",
82
+ "ﻨ": "ن",
83
+ "ﻝ": "ل",
84
+ "ﻩ": "ه",
85
+ "ﻲ": "ی",
86
+ "ﻐ": "غ",
87
+ "ﺲ": "س",
88
+ "ﺁ": "آ",
89
+ "ڔ": "ر",
90
+ "ﺫ": "ذ",
91
+ "ﭻ": "چ",
92
+ "ﺠ": "ج",
93
+ "ﯙ": "و",
94
+ "ﮏ": "ک",
95
+ "ﺣ": "ح",
96
+ "ﺝ": "ج",
97
+ "ﺼ": "ص",
98
+ "ﻳ": "ی",
99
+ "ﻘ": "ق",
100
+ "ﺨ": "خ",
101
+ "ﻔ": "ف",
102
+ "ﻎ": "غ",
103
+ "ئ": "ی",
104
+ "ﻓ": "ف",
105
+ "ﻕ": "ق",
106
+ "ﮋ": "ژ",
107
+ "ﺗ": "ت",
108
+ "ﻁ": "ط",
109
+ "ﺯ": "ز",
110
+ "ﮕ": "گ",
111
+ "ﺌ": "ئ",
112
+ "ﺵ": "ش",
113
+ "ۮ": "د",
114
+ "ﻫ": "ه",
115
+ "ﻬ": "ه",
116
+ "ﻏ": "غ",
117
+ "ﻰ": "ی",
118
+ "﷼": "ریال",
119
+ "ﺿ": "ض",
120
+ "ﺛ": "ث",
121
+ "ݐ": "پ",
122
+ "ﺏ": "ب",
123
+ "ﭙ": "پ",
124
+ "ﭽ": "چ",
125
+ "ﺜ": "ث",
126
+ "ﻃ": "ط",
127
+ "ۂ": "ه",
128
+ "ﻑ": "ف",
129
+ "ﺕ": "ت",
130
+ "ﻞ": "ل",
131
+ }
132
+
133
+ special_tokens = {}
134
+
135
+ words_map = {
136
+ "Leave a comment": "",
137
+ "[…]": "",
138
+ "[.]": "",
139
+ }
examples.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
1
+ EXAMPLES = {
2
+ "Scientific ...": [
3
+ "در یک اتفاق شگفت انگیز، پژوهشگران",
4
+ "عینک‌های هوشمند از جمله گجت‌هایی هستند که می‌توانند",
5
+ "وقتی ستاره‌ای بسیار بزرگ به پایان عمر خود می‌رسد، منفجر می‌شود و",
6
+ ],
7
+ }
meta.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ HEADER_INFO = """
2
+ # GPT2 - Persian
3
+
4
+ Persian GPT-2 demo. Part of the [Huggingface JAX/Flax event](
5
+ https://discuss.huggingface.co/t/pretrain-gpt2-from-scratch-in-persian/7560).
6
+
7
+ """.strip()
8
+ SIDEBAR_INFO = """
9
+ # Configuration
10
+ """.strip()
11
+ PROMPT_BOX = "متن خود را وارد کنید ..."
normalizer.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import hazm
2
+ import re
3
+
4
+ from regexes.currency import CURRENCY_REGEX
5
+ from regexes.email import EMAIL_REGEX
6
+ from regexes.latin import LATIN_REGEX
7
+ from regexes.number import NUMBERS_REGEX
8
+ from regexes.phone import PHONE_REGEX
9
+ from regexes.quote import DOUBLE_QUOTE_REGEX, SINGLE_QUOTE_REGEX
10
+ from regexes.url import URL_REGEX
11
+ from regexes.persian import PERSIAN_REGEX
12
+ import dictionary
13
+
14
+
15
+ def make_trans(list_a, list_b):
16
+ return dict((ord(a), b) for a, b in zip(list_a, list_b))
17
+
18
+
19
+ def multiple_replace(text, chars_to_mapping):
20
+ pattern = "|".join(map(re.escape, chars_to_mapping.keys()))
21
+ return re.sub(pattern, lambda m: chars_to_mapping[m.group()], str(text))
22
+
23
+
24
+ ar2fa_digits = make_trans("٠١٢٣٤٥٦٧٨٩٪", "۰۱۲۳۴۵۶۷۸۹٪")
25
+ fa2en_digits = make_trans("۰۱۲۳۴۵۶۷۸۹٪", "0123456789%")
26
+ normalizer = hazm.Normalizer(persian_numbers=True, punctuation_spacing=False)
27
+
28
+
29
+ def normalize(text, zwnj="\u200c", tokenized=False):
30
+ text = text.replace("\n", " ").replace("\t", " ")
31
+ text = re.sub(r"\u200c+", "\u200c", text)
32
+ text = text.replace('ـ', '')
33
+ text = normalizer.normalize(text)
34
+
35
+ if len(dictionary.characters) > 0:
36
+ text = multiple_replace(text, dictionary.characters)
37
+
38
+ if len(dictionary.words_map) > 0:
39
+ text = multiple_replace(text, dictionary.words_map)
40
+
41
+ text = text.translate(ar2fa_digits)
42
+ text = text.translate(fa2en_digits)
43
+
44
+ text = SINGLE_QUOTE_REGEX.sub("'", text)
45
+ text = DOUBLE_QUOTE_REGEX.sub('"', text)
46
+ text = CURRENCY_REGEX.sub(r" \1 ", text)
47
+ text = URL_REGEX.sub(" ", text)
48
+ text = EMAIL_REGEX.sub(" ", text)
49
+ text = PHONE_REGEX.sub(r" \1 ", text)
50
+ text = NUMBERS_REGEX.sub(r" \1 ", text)
51
+ text = LATIN_REGEX.sub(r" \1 ", text)
52
+
53
+ # Allow only english and persian characters
54
+ text = re.sub(PERSIAN_REGEX, " ", text)
55
+
56
+ text = text.replace(f" {zwnj} ", f"{zwnj}")
57
+ text = text.replace(f"{zwnj} ", f"{zwnj}")
58
+ text = text.replace(f" {zwnj}", f"{zwnj}")
59
+
60
+ if len(dictionary.special_tokens) > 0:
61
+ text = multiple_replace(text, dictionary.special_tokens)
62
+
63
+ tokens = []
64
+ for token in text.split():
65
+ token = token.strip()
66
+ if token:
67
+ if token.startswith(zwnj) and token.endswith(zwnj):
68
+ token = token[1:-1]
69
+ if token.startswith(zwnj):
70
+ token = token[1:]
71
+ elif token.endswith(zwnj):
72
+ token = token[:-1]
73
+ else:
74
+ token = token
75
+
76
+ tokens.append(token)
77
+
78
+ if tokenized:
79
+ return tokens
80
+
81
+ return " ".join(tokens)
regexes/__init__.py ADDED
File without changes
regexes/currency.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ CURRENCIES = {
4
+ "$": "USD",
5
+ "zł": "PLN",
6
+ "£": "GBP",
7
+ "¥": "JPY",
8
+ "฿": "THB",
9
+ "₡": "CRC",
10
+ "₦": "NGN",
11
+ "₩": "KRW",
12
+ "₪": "ILS",
13
+ "₫": "VND",
14
+ "€": "EUR",
15
+ "₱": "PHP",
16
+ "₲": "PYG",
17
+ "₴": "UAH",
18
+ "₹": "INR",
19
+ "﷼": "IRR",
20
+ }
21
+ CURRENCY_REGEX = re.compile(
22
+ "({})+".format("|".join(re.escape(c) for c in CURRENCIES.keys()))
23
+ )
regexes/email.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
1
+ import re
2
+
3
+ EMAIL_REGEX = re.compile(
4
+ r"(?:^|(?<=[^\w@.)]))([\w+-](\.(?!\.))?)*?[\w+-](@|[(<{\[]at[)>}\]])(?:(?:[a-z\\u00a1-\\uffff0-9]-?)*[a-z\\u00a1-\\uffff0-9]+)(?:\.(?:[a-z\\u00a1-\\uffff0-9]-?)*[a-z\\u00a1-\\uffff0-9]+)*(?:\.(?:[a-z\\u00a1-\\uffff]{2,}))",
5
+ flags=re.IGNORECASE | re.UNICODE,
6
+ )
regexes/latin.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ LATIN_WITH_SPECIAL_REGEX = re.compile(
4
+ r"(\b(?!URL|EMAIL|PHONE|NUMBER|CUR|LATIN\b)[0-9a-zA-Z]+)"
5
+ )
6
+
7
+ LATIN_REGEX = re.compile(
8
+ r"([0-9a-zA-Z]+)"
9
+ )
10
+
11
+ LATIN_SPACES_REGEX = re.compile(
12
+ r"([0-9a-zA-Z])"
13
+ )
regexes/number.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
1
+ import re
2
+
3
+ NUMBERS_REGEX = re.compile(
4
+ r"(?:^|(?<=[^\w,.]))[+–-]?(([1-9]\d{0,2}(,\d{3})+(\.\d*)?)|([1-9]\d{0,2}([ .]\d{3})+(,\d*)?)|(\d*?[.,]\d+)|\d+)(?:$|(?=\b))"
5
+ )
regexes/persian.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+
4
+ PERSIAN_ALPHA = "ءآئابتثجحخدذرزسشصضطظعغفقلمنهوپچژکگیە" # noqa: E501
5
+ PERSIAN_DIGIT = "۰۱۲۳۴۵۶۷۸۹"
6
+
7
+
8
+ ZWNJ = "\u200c"
9
+ PUNK = '\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\]\^\_\`\{\|\}\~\«\»\؟\:\×\٬\٫\﷼\٪\،'
10
+
11
+ PERSIAN = (
12
+ "a-zA-Z0-9" +
13
+ PERSIAN_ALPHA +
14
+ PERSIAN_DIGIT +
15
+ ZWNJ +
16
+ PUNK
17
+ )
18
+
19
+ PERSIAN_REGEX = r"[^" + PERSIAN + "+]"
regexes/phone.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
1
+ import re
2
+
3
+
4
+ PHONE_REGEX = re.compile(
5
+ r"((?:^|(?<=[^\w)]))(((\+?[01])|(\+\d{2}))[ .-]?)?(\(?\d{3,4}\)?/?[ .-]?)?(\d{3}[ .-]?\d{4})(\s?(?:ext\.?|[#x-])\s?\d{2,6})?(?:$|(?=\W)))|\+?\d{4,5}[ .-/]\d{6,9}"
6
+ )
regexes/punk.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
1
+ import re
2
+
3
+ PUNK_REGEX = re.compile(
4
+ r"([\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\=\?\@\[\\\]\^\_\`\{\|\}\~\«\»\⸮\؟\،\٬\٫\؛])"
5
+ )
regexes/quote.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+
4
+ strange_double_quotes = [
5
+ "«",
6
+ "‹",
7
+ "»",
8
+ "›",
9
+ "„",
10
+ "“",
11
+ "‟",
12
+ "”",
13
+ "❝",
14
+ "❞",
15
+ "❮",
16
+ "❯",
17
+ "〝",
18
+ "〞",
19
+ "〟",
20
+ """,
21
+ ]
22
+ strange_single_quotes = ["‘", "‛", "’", "❛", "❜", "`", "´", "‘", "’"]
23
+
24
+ DOUBLE_QUOTE_REGEX = re.compile("|".join(strange_double_quotes))
25
+ SINGLE_QUOTE_REGEX = re.compile("|".join(strange_single_quotes))
regexes/url.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ URL_REGEX = re.compile(
4
+ r"(?:^|(?<![\w\/\.]))"
5
+ # protocol identifier
6
+ # r"(?:(?:https?|ftp)://)" <-- alt?
7
+ r"(?:(?:https?:\/\/|ftp:\/\/|www\d{0,3}\.))"
8
+ # user:pass authentication
9
+ r"(?:\S+(?::\S*)?@)?" r"(?:"
10
+ # IP address exclusion
11
+ # private & local networks
12
+ r"(?!(?:10|127)(?:\.\d{1,3}){3})"
13
+ r"(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})"
14
+ r"(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})"
15
+ # IP address dotted notation octets
16
+ # excludes loopback network 0.0.0.0
17
+ # excludes reserved space >= 224.0.0.0
18
+ # excludes network & broadcast addresses
19
+ # (first & last IP address of each class)
20
+ r"(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])"
21
+ r"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}"
22
+ r"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))"
23
+ r"|"
24
+ # host name
25
+ r"(?:(?:[a-z\\u00a1-\\uffff0-9]-?)*[a-z\\u00a1-\\uffff0-9]+)"
26
+ # domain name
27
+ r"(?:\.(?:[a-z\\u00a1-\\uffff0-9]-?)*[a-z\\u00a1-\\uffff0-9]+)*"
28
+ # TLD identifier
29
+ r"(?:\.(?:[a-z\\u00a1-\\uffff]{2,}))" r"|" r"(?:(localhost))" r")"
30
+ # port number
31
+ r"(?::\d{2,5})?"
32
+ # resource path
33
+ r"(?:\/[^\)\]\}\s]*)?",
34
+ # r"(?:$|(?![\w?!+&\/\)]))",
35
+ # @jfilter: I removed the line above from the regex because I don't understand what it is used for, maybe it was useful?
36
+ # But I made sure that it does not include ), ] and } in the URL.
37
+ flags=re.UNICODE | re.IGNORECASE,
38
+ )
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
1
+ streamlit
2
+ hazm
3
+ Pillow
4
+ mtranslate
5
+ torch
6
+ transformers
utils.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import json
3
+ from PIL import Image
4
+
5
+
6
+ def load_image(image_path, image_resize=None):
7
+ image = Image.open(image_path)
8
+ if isinstance(image_resize, tuple):
9
+ image.resize(image_resize)
10
+ return image
11
+
12
+
13
+ def load_text(text_path):
14
+ text = ''
15
+ with open(text_path) as f:
16
+ text = f.read()
17
+
18
+ return text
19
+
20
+
21
+ def load_json(json_path):
22
+ jdata = ''
23
+ with open(json_path) as f:
24
+ jdata = json.load(f)
25
+
26
+ return jdata
27
+
28
+
29
+ def local_css(css_path):
30
+ with open(css_path) as f:
31
+ st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
32
+
33
+
34
+ def remote_css(css_url):
35
+ st.markdown(f'<link href="{css_url}" rel="stylesheet">', unsafe_allow_html=True)