Spaces:
Runtime error
Runtime error
Upload folder using huggingface_hub
Browse files- README.md +2 -8
- __pycache__/app.cpython-310.pyc +0 -0
- app.py +61 -0
- langs_enum_r2.txt +29 -0
- requirements.txt +3 -0
- tglang.pt +3 -0
README.md
CHANGED
@@ -1,12 +1,6 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
|
4 |
-
colorFrom: gray
|
5 |
-
colorTo: yellow
|
6 |
sdk: gradio
|
7 |
sdk_version: 4.5.0
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
---
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: Tglang_programming_langugage_detection
|
3 |
+
app_file: app.py
|
|
|
|
|
4 |
sdk: gradio
|
5 |
sdk_version: 4.5.0
|
|
|
|
|
6 |
---
|
|
|
|
__pycache__/app.cpython-310.pyc
ADDED
Binary file (2.65 kB). View file
|
|
app.py
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
|
3 |
+
import gradio as gr
|
4 |
+
import pyonmttok
|
5 |
+
import torch.jit
|
6 |
+
|
7 |
+
model = None
|
8 |
+
tokenizer = None
|
9 |
+
lang_enum = None
|
10 |
+
|
11 |
+
TITLE = "Tglang: Programming Language Detection"
|
12 |
+
DESCRIPTION = ("<h5 style=\"text-align:center\">"
|
13 |
+
"Enter a code snippet and the model will predict the programming language it is written in.\n\n"
|
14 |
+
"Alternatively, it's possible to select one example from the dropdown menu to see how the model works.<h5>")
|
15 |
+
FOOTER = ("This is a solution for the "
|
16 |
+
"[Telegram ML competition 2023, Round 2](https://contest.com/docs/ML-Competition-2023-r2).\n\n"
|
17 |
+
"For more details, read [this article]() or check out [this repo]()")
|
18 |
+
EXAMPLES = [
|
19 |
+
["def foo():\n print('Hello, world!')", "TGLANG_LANGUAGE_PYTHON"],
|
20 |
+
["int main() {\n printf(\"Hello, world!\");\n return 0;\n}", "TGLANG_LANGUAGE_C"],
|
21 |
+
["function foo() {\n console.log('Hello, world!');\n}", "TGLANG_LANGUAGE_JAVASCRIPT"],
|
22 |
+
["public class HelloWorld {\n public static void main(String[] args) {\n System.out.println(\"Hello, world!\");\n }\n}", "TGLANG_LANGUAGE_JAVA"],
|
23 |
+
["#include <iostream>\n\nint main() {\n std::cout << \"Hello, world!\" << std::endl;\n}", "TGLANG_LANGUAGE_CPP"],
|
24 |
+
["using System;\n\npublic class Program\n{\n public static void Main()\n {\n Console.WriteLine(\"Hello, world!\");\n }\n}", "TGLANG_LANGUAGE_CSHARP"],
|
25 |
+
]
|
26 |
+
|
27 |
+
|
28 |
+
def init_model():
|
29 |
+
global model, tokenizer, lang_enum
|
30 |
+
tokenizer = pyonmttok.Tokenizer("conservative")
|
31 |
+
model = torch.jit.load(Path(__file__).with_name("tglang.pt"))
|
32 |
+
lang_enum = Path(__file__).with_name("langs_enum_r2.txt").read_text().strip().split("\n")
|
33 |
+
lang_enum = [l.strip() for l in lang_enum if bool(l)]
|
34 |
+
|
35 |
+
|
36 |
+
def predict(text):
|
37 |
+
global model, tokenizer, lang_enum
|
38 |
+
tokens = tokenizer(text)
|
39 |
+
lang_index, *_ = model([tokens])
|
40 |
+
return lang_enum[lang_index]
|
41 |
+
|
42 |
+
|
43 |
+
def create_demo():
|
44 |
+
init_model()
|
45 |
+
demo = gr.Interface(fn=predict,
|
46 |
+
inputs=gr.Textbox(label="Code snippet", placeholder="Enter code here..."),
|
47 |
+
outputs=gr.Textbox(label="Model prediction"),
|
48 |
+
title=TITLE,
|
49 |
+
description=DESCRIPTION,
|
50 |
+
examples=EXAMPLES,
|
51 |
+
theme=gr.themes.Monochrome(),
|
52 |
+
article=FOOTER,
|
53 |
+
)
|
54 |
+
return demo
|
55 |
+
|
56 |
+
|
57 |
+
demo = create_demo()
|
58 |
+
|
59 |
+
|
60 |
+
if __name__ == "__main__":
|
61 |
+
demo.launch(show_api=False)
|
langs_enum_r2.txt
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
TGLANG_LANGUAGE_OTHER
|
2 |
+
TGLANG_LANGUAGE_C
|
3 |
+
TGLANG_LANGUAGE_CPLUSPLUS
|
4 |
+
TGLANG_LANGUAGE_CSHARP
|
5 |
+
TGLANG_LANGUAGE_CSS
|
6 |
+
TGLANG_LANGUAGE_DART
|
7 |
+
TGLANG_LANGUAGE_DOCKER
|
8 |
+
TGLANG_LANGUAGE_FUNC
|
9 |
+
TGLANG_LANGUAGE_GO
|
10 |
+
TGLANG_LANGUAGE_HTML
|
11 |
+
TGLANG_LANGUAGE_JAVA
|
12 |
+
TGLANG_LANGUAGE_JAVASCRIPT
|
13 |
+
TGLANG_LANGUAGE_JSON
|
14 |
+
TGLANG_LANGUAGE_KOTLIN
|
15 |
+
TGLANG_LANGUAGE_LUA
|
16 |
+
TGLANG_LANGUAGE_NGINX
|
17 |
+
TGLANG_LANGUAGE_OBJECTIVE_C
|
18 |
+
TGLANG_LANGUAGE_PHP
|
19 |
+
TGLANG_LANGUAGE_POWERSHELL
|
20 |
+
TGLANG_LANGUAGE_PYTHON
|
21 |
+
TGLANG_LANGUAGE_RUBY
|
22 |
+
TGLANG_LANGUAGE_RUST
|
23 |
+
TGLANG_LANGUAGE_SHELL
|
24 |
+
TGLANG_LANGUAGE_SOLIDITY
|
25 |
+
TGLANG_LANGUAGE_SQL
|
26 |
+
TGLANG_LANGUAGE_SWIFT
|
27 |
+
TGLANG_LANGUAGE_TL
|
28 |
+
TGLANG_LANGUAGE_TYPESCRIPT
|
29 |
+
TGLANG_LANGUAGE_XML
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
torch
|
3 |
+
pyonmttok
|
tglang.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:60d7452b5fcf381deee08bf9dcf5387cf20236d809d0a044dbde827bf18ab07c
|
3 |
+
size 630285
|