File size: 3,867 Bytes
b837a10 0885169 b837a10 0885169 b837a10 0885169 b837a10 0885169 b837a10 0885169 b837a10 0885169 b837a10 0885169 b837a10 0885169 b837a10 0885169 b837a10 0885169 b837a10 0885169 b837a10 0885169 dc76b04 b837a10 36599ed b837a10 36599ed 0885169 b837a10 7159c31 0885169 b837a10 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2025-04-08 22:18:10,848 - simalign.simalign - INFO - Initialized the EmbeddingLoader with model: distilbert-base-multilingual-cased\n",
"Initialized the EmbeddingLoader with model: distilbert-base-multilingual-cased\n"
]
}
],
"source": [
"from categories.fluency import *\n",
"from categories.accuracy import *"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Sentence: The cat sat the quickly up apples banana.\n"
]
}
],
"source": [
"src_sent = \"Das ist ein Test.\" # Example source sentence\n",
"trg_sent = input(f\"{src_sent}: \") # Prompt the user to enter a sentence\n",
"\n",
"if trg_sent == \"\":\n",
" trg_sent = \"The cat sat the quickly up apples banana.\"\n",
"\n",
"print(\"Sentence:\", trg_sent) # Print the input sentence\n",
"\n",
"err = grammar_errors(trg_sent) # Call the function to execute the grammar error checking\n",
"flu = pseudo_perplexity(trg_sent, threshold=3.1) # Call the function to execute the fluency checking\n",
"acc = accuracy(src_sent, trg_sent) # Call the function to execute the accuracy checking"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"An apostrophe may be missing.: apples banana.\n",
"Adjusted liklihood 4.8056646935577145 over threshold 3.1: sat\n",
"Adjusted liklihood 4.473408069089179 over threshold 3.1: the\n",
"Adjusted liklihood 4.732453441503642 over threshold 3.1: quickly\n",
"Adjusted liklihood 5.1115574262487735 over threshold 3.1: apples\n",
"Word ist possibly mistranslated or omitted: cat\n",
"Word ein possibly mistranslated or omitted: sat\n",
"Word sat possibly mistranslated or added erroneously: sat\n",
"Word the possibly mistranslated or added erroneously: the\n",
"Word quickly possibly mistranslated or added erroneously: quickly\n",
"Word up possibly mistranslated or added erroneously: up\n",
"Word apples possibly mistranslated or added erroneously: apples\n",
"Word banana possibly mistranslated or added erroneously: banana.\n"
]
}
],
"source": [
"combined_err = err[\"errors\"] + flu[\"errors\"] + acc[\"errors\"] # Combine the error counts from both functions\n",
"\n",
"for e in combined_err:\n",
" substr = \" \".join(trg_sent.split(\" \")[e[\"start\"]:e[\"end\"]+1])\n",
" print(f\"{e['message']}: {substr}\") # Print the error messages\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fluency Score: 76.62\n",
"Accuracy Score: 24.45\n"
]
}
],
"source": [
"fluency_score = 0.5 * err[\"score\"] + 0.5 * flu[\"score\"] # Calculate the fluency score\n",
"print(\"Fluency Score:\", round(fluency_score, 2)) # Print the fluency score\n",
"\n",
"print(\"Accuracy Score:\", acc[\"score\"]) # Print the accuracy score"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "teach-bs",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
|