hynky HF staff commited on
Commit
e787318
1 Parent(s): b402b61

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +50 -0
README.md ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ datasets:
3
+ - hynky/czech_news_dataset_v2
4
+ language:
5
+ - cs
6
+ library_name: transformers
7
+ tags:
8
+ - news
9
+ - nlp
10
+ - czech
11
+ ---
12
+
13
+ - A model for predicting the source of news articles
14
+ ## Usage:
15
+
16
+ ```
17
+ import re
18
+ from transformers import pipeline
19
+ from html import unescape
20
+ from unicodedata import normalize
21
+
22
+ re_multispace = re.compile(r"\s+")
23
+
24
+ def normalize_text(text):
25
+ if text == None:
26
+ return None
27
+
28
+ text = text.strip()
29
+ text = text.replace("\n", " ")
30
+ text = text.replace("\t", " ")
31
+ text = text.replace("\r", " ")
32
+ text = re_multispace.sub(" ", text)
33
+ text = unescape(text)
34
+ text = normalize("NFKC", text)
35
+ return text
36
+
37
+
38
+ model = pipeline(task="text-classification",
39
+ model=f"hynky/Server", tokenizer="ufal/robeczech-base",
40
+ truncation=True, max_length=512,
41
+ top_k=5
42
+ )
43
+
44
+
45
+ def predict(article):
46
+ article = normalize_text(article)
47
+ predictions = model(article)
48
+
49
+ predict("Dnes v noci bude pršet.")
50
+ ```