File size: 1,766 Bytes
58d4ef5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e48fc12
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
from sys import argv
import nltk
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
import joblib

# Load the trained model from the file
loaded_classifier = joblib.load("text_identification_model.pkl")

# Load the TF-IDF vectorizer used for training
vectorizer = joblib.load("text_identification_vectorizer.pkl")

# Define labels for your categories
categories = {0: 'Other', 1: 'Bible', 2: 'Talmud'}

def parse_text(new_text):
    # Transform the new text using the TF-IDF vectorizer
    new_text_tfidf = vectorizer.transform([new_text])

    # Make predictions on the new text
    prediction = loaded_classifier.predict(new_text_tfidf)

    # Get the confidence score for the predicted class
    probabilities = loaded_classifier.predict_proba(new_text_tfidf)
    confidence_score = probabilities[0, 1]  # Confidence score for class "Bible" (index 1)

    # Determine the predicted category label
    predicted_category = categories[prediction[0]]

    # Print the prediction and the confidence score
    print(f"Text: {new_text} | Prediction: {predicted_category} | Confidence Score: {confidence_score:.4f}")


text_list = [
'讻诪讛 讬驻讛 讜谞讗讛 讻砖砖讜诪注讬诐 讛砖讬专讛 砖诇讛诐',
'讞讚砖讜转 讛注专讘: 砖诇讜砖讛 讗谞砖讬诐 谞爪诐讜 讟讜讘注讬诐 讘讻讬谞专转',
'讜讛讬讛 讘注转 讛讛讬讗 讗讞驻砖 讗转 讬专讜砖诇讬诐 讘谞专讜转 讜讛讜讚注转讬讛 讗转 讻诇 转讜注讘讜转讬讛',
'讜讬讗诪专 诪砖讛 讗诇 讘谞讬 讬砖专讗诇',
'讚讗诪专 谞砖讬讗 诪讘讬讗 砖注讬专 转讜 讛讗 讚转谞谉',
'讗诪专 诇讬讛 讗讘讬讬 诇专讘 讝注讬专讗',
'讜讗讬讛讜 诇讗 拽讗 讬讛讬讘 砖注讜专讗 讘诪砖讻讗',]


if argv[1:]:
    new_text = argv[1]
    parse_text(new_text)
else:
    for new_text in text_list:
        parse_text(new_text)