Supermichi100 commited on
Commit
9e04abb
1 Parent(s): 9e5fce2

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +9 -0
  2. .github/workflows/update_space.yml +28 -0
  3. 01_module_handbooks/bachelor_business_management_and_economics.pdf +3 -0
  4. 01_module_handbooks/bachelor_economathematics.pdf +3 -0
  5. 01_module_handbooks/bachelor_information_systems.html +0 -0
  6. 01_module_handbooks/bachelor_information_systems.pdf +3 -0
  7. 01_module_handbooks/master_information_systems.pdf +3 -0
  8. 01_module_handbooks/master_international_economic_policy.pdf +3 -0
  9. 01_module_handbooks/master_management.pdf +3 -0
  10. 02_data_extraction/BA_MM_all_modules.xlsx +0 -0
  11. 02_data_extraction/__pycache__/helper_methods.cpython-38.pyc +0 -0
  12. 02_data_extraction/data_extraction.ipynb +539 -0
  13. 02_data_extraction/extract_keywords.ipynb +246 -0
  14. 02_data_extraction/helper_methods.py +31 -0
  15. 02_data_extraction/module_catalogues/BA_IS_all_modules.pdf +3 -0
  16. 02_data_extraction/module_catalogues/BA_MM_all_modules.pdf +0 -0
  17. 02_data_extraction/module_catalogues/MS_IS_all_modules.pdf +3 -0
  18. 02_data_extraction/module_catalogues/MS_MM_all_modules.pdf +3 -0
  19. 03_extracted_final_modules/BA_IS_all_modules.xlsx +0 -0
  20. 03_extracted_final_modules/BA_IS_all_modules_15.xlsx +0 -0
  21. 03_extracted_final_modules/BA_MM_all_modules.xlsx +0 -0
  22. 03_extracted_final_modules/BA_MM_all_modules_15.xlsx +0 -0
  23. 03_extracted_final_modules/MS_IS_all_modules.csv +0 -0
  24. 03_extracted_final_modules/MS_IS_all_modules.xlsx +0 -0
  25. 03_extracted_final_modules/MS_IS_all_modules_cleaned.xlsx +0 -0
  26. 03_extracted_final_modules/MS_IS_all_modules_orginal_15_rows_cleaned.csv +16 -0
  27. 03_extracted_final_modules/MS_IS_all_modules_orginal_15_rows_cleaned.xlsx +0 -0
  28. 03_extracted_final_modules/MS_IS_all_modules_orginal_to_clean_cleaned.csv +0 -0
  29. 03_extracted_final_modules/MS_IS_all_modules_orginal_to_clean_cleaned.xlsx +0 -0
  30. 03_extracted_final_modules/MS_MM_all_modules.xlsx +0 -0
  31. 04_finetuning_approaches/FT_Tapas.ipynb +0 -0
  32. 04_finetuning_approaches/Fine_tuning_TapasForQuestionAnswering_on_SQA.ipynb +0 -0
  33. 04_finetuning_approaches/MS_IS_50_modules_cleaned.xlsx +0 -0
  34. 04_finetuning_approaches/MS_IS_all_modules.xlsx +0 -0
  35. 04_finetuning_approaches/MS_IS_all_modules_orginal_15_rows.xlsx +0 -0
  36. 04_finetuning_approaches/MS_IS_all_modules_orginal_15_rows_cleaned.csv +16 -0
  37. 04_finetuning_approaches/MS_IS_all_modules_orginal_15_rows_cleaned.xlsx +0 -0
  38. 04_finetuning_approaches/MS_IS_all_modules_orginal_to_clean.xlsx +0 -0
  39. 04_finetuning_approaches/MS_IS_all_modules_orginal_to_clean_cleaned.csv +0 -0
  40. 04_finetuning_approaches/MS_IS_all_modules_orginal_to_clean_cleaned.xlsx +0 -0
  41. 04_finetuning_approaches/finetuned_sqa_tryout.ipynb +63 -0
  42. 04_finetuning_approaches/generate_training_question.ipynb +786 -0
  43. 04_finetuning_approaches/module_guide_sq_30_questions.xlsx +0 -0
  44. 04_finetuning_approaches/module_guide_sqa.xlsx +0 -0
  45. 04_finetuning_approaches/module_guide_sqa_contents.xlsx +0 -0
  46. 04_finetuning_approaches/module_guide_sqa_etcs.xlsx +0 -0
  47. 04_finetuning_approaches/qa_catalog.xlsx +0 -0
  48. 04_finetuning_approaches/sqa_train_set_28_examples.xlsx +0 -0
  49. 0915NC_Studienplaetze.jpg +0 -0
  50. 09_archive_and_discarded_approaches/MS_IS_all_modules.csv +0 -0
.gitattributes CHANGED
@@ -33,3 +33,12 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ 01_module_handbooks/bachelor_business_management_and_economics.pdf filter=lfs diff=lfs merge=lfs -text
37
+ 01_module_handbooks/bachelor_economathematics.pdf filter=lfs diff=lfs merge=lfs -text
38
+ 01_module_handbooks/bachelor_information_systems.pdf filter=lfs diff=lfs merge=lfs -text
39
+ 01_module_handbooks/master_information_systems.pdf filter=lfs diff=lfs merge=lfs -text
40
+ 01_module_handbooks/master_international_economic_policy.pdf filter=lfs diff=lfs merge=lfs -text
41
+ 01_module_handbooks/master_management.pdf filter=lfs diff=lfs merge=lfs -text
42
+ 02_data_extraction/module_catalogues/BA_IS_all_modules.pdf filter=lfs diff=lfs merge=lfs -text
43
+ 02_data_extraction/module_catalogues/MS_IS_all_modules.pdf filter=lfs diff=lfs merge=lfs -text
44
+ 02_data_extraction/module_catalogues/MS_MM_all_modules.pdf filter=lfs diff=lfs merge=lfs -text
.github/workflows/update_space.yml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Run Python script
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+
8
+ jobs:
9
+ build:
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - name: Checkout
14
+ uses: actions/checkout@v2
15
+
16
+ - name: Set up Python
17
+ uses: actions/setup-python@v2
18
+ with:
19
+ python-version: '3.9'
20
+
21
+ - name: Install Gradio
22
+ run: python -m pip install gradio
23
+
24
+ - name: Log in to Hugging Face
25
+ run: python -c 'import huggingface_hub; huggingface_hub.login(token="${{ secrets.hf_token }}")'
26
+
27
+ - name: Deploy to Spaces
28
+ run: gradio deploy
01_module_handbooks/bachelor_business_management_and_economics.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:007c627f1a2ade281ba674145f313c575a11b0b3705d93b8e718f8fe454be4fc
3
+ size 8143666
01_module_handbooks/bachelor_economathematics.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0477865991cd9eeacc560e020367821e8d7bd9bf741f6dec64143c81ede3d98f
3
+ size 14227856
01_module_handbooks/bachelor_information_systems.html ADDED
The diff for this file is too large to render. See raw diff
 
01_module_handbooks/bachelor_information_systems.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb186b0a0d75d1f82ef34a9cc395e17a77eea4a2dd6c0c5b88b43347ce8bd471
3
+ size 15806261
01_module_handbooks/master_information_systems.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd431677afbf1efbe1d533be5f2357705e93992fba97ca1d95277562a40440b3
3
+ size 12414315
01_module_handbooks/master_international_economic_policy.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c402c321942c3aad16d7ffcd325981d1bc9e4129a80b1b96036da0ba6359e3eb
3
+ size 17147808
01_module_handbooks/master_management.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1e72fec495a8fcad34c63b58a5d2f7435cc4c0511857870d75cde04ee98aaa3
3
+ size 46696350
02_data_extraction/BA_MM_all_modules.xlsx ADDED
Binary file (50.8 kB). View file
 
02_data_extraction/__pycache__/helper_methods.cpython-38.pyc ADDED
Binary file (908 Bytes). View file
 
02_data_extraction/data_extraction.ipynb ADDED
@@ -0,0 +1,539 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "# Necessary pip installs: \n",
10
+ "# pip install pandas\n",
11
+ "# pip install pdfminer.six\n",
12
+ "# pip install xlsxwriter"
13
+ ]
14
+ },
15
+ {
16
+ "cell_type": "code",
17
+ "execution_count": 16,
18
+ "metadata": {},
19
+ "outputs": [],
20
+ "source": [
21
+ "# moduleCatalogue paths \n",
22
+ "\n",
23
+ "# Masters\n",
24
+ "MS_IS_all_modules = \"./module_catalogues/MS_IS_all_modules.pdf\"\n",
25
+ "\n",
26
+ "MS_MM_all_modules = \"./module_catalogues/MS_MM_all_modules.pdf\"\n",
27
+ "\n",
28
+ "\n",
29
+ "# Bachelors\n",
30
+ "BA_IS_all_modules = \"./module_catalogues/BA_IS_all_modules.pdf\"\n",
31
+ "\n",
32
+ "BA_MM_all_modules = \"./module_catalogues/BA_MM_all_modules.pdf\"\n"
33
+ ]
34
+ },
35
+ {
36
+ "cell_type": "code",
37
+ "execution_count": 11,
38
+ "metadata": {},
39
+ "outputs": [],
40
+ "source": [
41
+ "import re\n",
42
+ "from pdfminer.high_level import extract_text\n",
43
+ "import pandas as pd\n",
44
+ "\n",
45
+ "# Read PDF file\n",
46
+ "text_Module_Catalogue = extract_text(BA_MM_all_modules)\n"
47
+ ]
48
+ },
49
+ {
50
+ "cell_type": "code",
51
+ "execution_count": 48,
52
+ "metadata": {},
53
+ "outputs": [],
54
+ "source": [
55
+ "# Pattern to remove Master Information Systems\n",
56
+ "removal_patterns_MS_IS = [\n",
57
+ " r\"Module Catalogue for the Subject\\nInformation Systems\\nMaster’s with 1 major, 120 ECTS credits\",\n",
58
+ " r\"JMU\\sWürzburg\\s•\\sgenerated\\s\\d{1,2}-[A-Za-z]+-\\d{4}\\s•\\sexam\\.\\sreg\\.\\sda-\\nta\\srecord\\sMaster\\s\\(120\\sECTS\\)\\sInformation\\sSystems\\s-\\s\\d{4}\",\n",
59
+ " r\"Master’s\\s+with\\s+1\\s+major\\s+Information\\s+Systems\\s+\\(\\d{4}\\)\",\n",
60
+ " r\"page\\s+\\d+\\s+/\\s+\\d+\",\n",
61
+ " r'^\\s*$'\n",
62
+ " ]\n",
63
+ "\n",
64
+ "# Pattern to remove Bachelor Information Systems\n",
65
+ "\n",
66
+ "removal_patterns_BA_IS = [\n",
67
+ " r\"Module Catalogue for the Subject\\nBusiness Information Systems\\nBachelor’s with 1 major, 180 ECTS credits\",\n",
68
+ " r\"JMU\\sWürzburg\\s•\\sgenerated\\s\\d{1,2}-[A-Za-z]+-\\d{4}\\s•\\sexam\\.\\sreg\\.\\sda-\\nta\\srecord\\sBachelor\\s\\(180\\sECTS\\)\\sWirtschaftsinformatik\\s-\\s\\d{4}\",\n",
69
+ " r\"Bachelor’s\\s+with\\s+1\\s+major\\s+Business\\s+Information\\s+Sy-\\n+stems\\s+\\(\\d{4}\\)\",\n",
70
+ " r\"page\\s+\\d+\\s+/\\s+\\d+\",\n",
71
+ " r'^\\s*$'\n",
72
+ " ]\n",
73
+ "\n",
74
+ "# Pattern to remove Bachlor Wirtschaftswissenschaften\n",
75
+ "removal_patterns_MS_MM = [\n",
76
+ " r\"Module Catalogue for the Subject\\nManagement\\nMaster’s with 1 major, 120 ECTS credits\",\n",
77
+ " r\"JMU\\sWürzburg\\s•\\sgenerated\\s11-Mai-2023\\s•\\sexam\\.\\sreg\\.\\s\\ndata\\srecord\\sMaster\\s\\(120\\sECTS\\)\\sManagement\\s-\\s2018\",\n",
78
+ " r\"Master’s\\s+with\\s+1\\s+major\\s+Management\\s+\\(\\d{4}\\)\",\n",
79
+ " r\"page\\s+\\d+\\s+/\\s+\\d+\",\n",
80
+ " r'^\\s*$'\n",
81
+ " ]\n",
82
+ "\n",
83
+ "removal_patterns_BA_MM = [\n",
84
+ " r\"Module Catalogue for the Subject\\nBusiness Management and Economics\\nBachelor’s with 1 major, 180 ECTS credits\",\n",
85
+ " r\"JMU Würzburg • generated \\d{2}-[A-Za-z]{3}-\\d{4} • exam\\. reg\\. data re-[\\s\\S]*?Bachelor \\(180 ECTS\\) Wirtschaftswissenschaft - 2008\",\n",
86
+ " r\"Bachelor’s\\s+with\\s+1\\s+major\\s+Business\\s+Management\\s+and\\s+Economics\\s+\\(\\d{4}\\)\",\n",
87
+ " r\"page\\s+\\d+\\s+/\\s+\\d+\",\n",
88
+ " r'^\\s*$'\n",
89
+ " ]\n"
90
+ ]
91
+ },
92
+ {
93
+ "cell_type": "code",
94
+ "execution_count": 50,
95
+ "metadata": {},
96
+ "outputs": [],
97
+ "source": [
98
+ "# regex patterns to get attributes of Master Information Systems\n",
99
+ "from enum import Enum\n",
100
+ "\n",
101
+ "class Patterns_MS_IS(Enum):\n",
102
+ " PATTERN_ENTIRE_MODULE = r\"Module title[\\s\\S]*?(?=Module title|$)\"\n",
103
+ " MODULE_TITLE = r'Module title\\s*\\n*\\s*(.*)'\n",
104
+ " ABBREVIATION = r'Abbreviation\\s*\\n*\\s*(.*)'\n",
105
+ " MODULE_OFFERED_BY = r\"^(Faculty|Institute).*\"\n",
106
+ " MODULE_COORDINATOR = r\"^(Holder|holder|Dean).*\"\n",
107
+ " ETCS = r\"^\\d{1,2}$\"\n",
108
+ " METHOD_GRADING = r\".*(not\\s)?successfully completed|numerical grade.*\"\n",
109
+ " DURATION = r\"^\\d\\ssemester$\"\n",
110
+ " MODULE_LEVEL = r\"^(?:graduate|undergraduate)$\"\n",
111
+ " CONTENTS = r'Contents([\\s\\S]*?)Intended learning outcomes'\n",
112
+ " INTENDED_LEARNING_OUTCOMES = r'Intended learning outcomes\\n\\n([\\s\\S]*?)\\n\\nCourses \\(type'\n",
113
+ " COURSES = r'if other than German\\)([\\s\\S]*?)Method of assessment'\n",
114
+ " ASSESSMENT = r'whether\\s*\\nmodule is creditable for bonus\\)([\\s\\S]*?)Allocation of places'\n",
115
+ " ALLOCATION = r'Allocation of places([\\s\\S]*?)Additional information'\n",
116
+ " ADDITIONAL_INFORMATION = r'Additional information([\\s\\S]*?)Workload'\n",
117
+ " WORKLOAD = r'Workload([\\s\\S]*?)Teaching cycle'\n",
118
+ " TEACHING_CYCLE = r'Teaching cycle([\\s\\S]*?)Referred to in LPO I'\n",
119
+ " REFERRED_LPO = r'regulations for teaching-degree programmes\\)([\\s\\S]*?)Module appears in'\n"
120
+ ]
121
+ },
122
+ {
123
+ "cell_type": "code",
124
+ "execution_count": 51,
125
+ "metadata": {},
126
+ "outputs": [],
127
+ "source": [
128
+ "from helper_methods import extract_first_match, extract_LineMatch, clean_entries\n",
129
+ "import xlsxwriter\n",
130
+ "\n",
131
+ "# Extract modules to xlsx -> Method shall be used generically for all modules later\n",
132
+ "\n",
133
+ "def extract_modules_to_xlsx (text, patternsToRemove, file_path):\n",
134
+ "\n",
135
+ " modules = re.findall(Patterns_MS_IS.PATTERN_ENTIRE_MODULE.value, text)\n",
136
+ " modules = clean_entries(modules, patternsToRemove)\n",
137
+ "\n",
138
+ " workbook = xlsxwriter.Workbook(file_path)\n",
139
+ " worksheet = workbook.add_worksheet()\n",
140
+ "\n",
141
+ " # set columns\n",
142
+ " column_names = ['Module title', 'Abbreviation', 'Module coordinator', 'Module offered by', 'ETCS', 'Method of grading',\n",
143
+ " 'Duration', 'Module level', 'Contents', 'Intended learning outcomes', 'Courses', 'Method of assessment',\n",
144
+ " 'Allocation of places', 'Additional information', 'Workload', 'Teaching cycle', 'Referred to in LPO I']\n",
145
+ " \n",
146
+ " for i in range(len(column_names)):\n",
147
+ " worksheet.write(0, i, column_names[i])\n",
148
+ "\n",
149
+ " counter = 1\n",
150
+ " # Extract module attributes\n",
151
+ " for i in range(len(modules)):\n",
152
+ " module_attributes = []\n",
153
+ " module_attributes.append(extract_first_match(modules[i], Patterns_MS_IS.MODULE_TITLE.value))\n",
154
+ " module_attributes.append(extract_first_match(modules[i], Patterns_MS_IS.ABBREVIATION.value))\n",
155
+ " module_attributes.append(extract_LineMatch(modules[i], Patterns_MS_IS.MODULE_OFFERED_BY.value))\n",
156
+ " module_attributes.append(extract_LineMatch(modules[i], Patterns_MS_IS.MODULE_COORDINATOR.value))\n",
157
+ " module_attributes.append(extract_LineMatch(modules[i], Patterns_MS_IS.ETCS.value))\n",
158
+ " module_attributes.append(extract_LineMatch(modules[i], Patterns_MS_IS.METHOD_GRADING.value))\n",
159
+ " module_attributes.append(extract_LineMatch(modules[i], Patterns_MS_IS.DURATION.value))\n",
160
+ " module_attributes.append(extract_LineMatch(modules[i], Patterns_MS_IS.MODULE_LEVEL.value))\n",
161
+ " module_attributes.append(extract_first_match(modules[i], Patterns_MS_IS.CONTENTS.value))\n",
162
+ " module_attributes.append(extract_first_match(modules[i], Patterns_MS_IS.INTENDED_LEARNING_OUTCOMES.value))\n",
163
+ " module_attributes.append(extract_first_match(modules[i], Patterns_MS_IS.COURSES.value))\n",
164
+ " module_attributes.append(extract_first_match(modules[i], Patterns_MS_IS.ASSESSMENT.value))\n",
165
+ " module_attributes.append(extract_first_match(modules[i], Patterns_MS_IS.ALLOCATION.value))\n",
166
+ " module_attributes.append(extract_first_match(modules[i], Patterns_MS_IS.ADDITIONAL_INFORMATION.value))\n",
167
+ " module_attributes.append(extract_first_match(modules[i], Patterns_MS_IS.WORKLOAD.value))\n",
168
+ " module_attributes.append(extract_first_match(modules[i], Patterns_MS_IS.TEACHING_CYCLE.value))\n",
169
+ " module_attributes.append(extract_first_match(modules[i], Patterns_MS_IS.REFERRED_LPO.value))\n",
170
+ " \n",
171
+ " # Write to xlsx file\n",
172
+ " for j in range(len(module_attributes)):\n",
173
+ " worksheet.write(counter, j, module_attributes[j])\n",
174
+ " \n",
175
+ " counter += 1\n",
176
+ " workbook.close()\n",
177
+ "\n",
178
+ "\n"
179
+ ]
180
+ },
181
+ {
182
+ "cell_type": "code",
183
+ "execution_count": 52,
184
+ "metadata": {},
185
+ "outputs": [],
186
+ "source": [
187
+ "# Use write to xlsx method for Master Information Systems\n",
188
+ "\n",
189
+ "extract_modules_to_xlsx(text_Module_Catalogue, removal_patterns_MS_MM, \"BA_MM_all_modules.xlsx\")\n"
190
+ ]
191
+ },
192
+ {
193
+ "cell_type": "code",
194
+ "execution_count": 49,
195
+ "metadata": {},
196
+ "outputs": [
197
+ {
198
+ "name": "stdout",
199
+ "output_type": "stream",
200
+ "text": [
201
+ "Module title\n",
202
+ "\n",
203
+ "Introduction to Market-Oriented Management\n",
204
+ "\n",
205
+ "Abbreviation\n",
206
+ "\n",
207
+ "12-Mark-G-082-m01\n",
208
+ "\n",
209
+ "Module coordinator\n",
210
+ "\n",
211
+ "Module offered by\n",
212
+ "\n",
213
+ "holder of the Chair of Business Management and Marke-\n",
214
+ "ting\n",
215
+ "\n",
216
+ "Faculty of Business Management and Economics\n",
217
+ "\n",
218
+ "ECTS Method of grading\n",
219
+ "\n",
220
+ "Only after succ. compl. of module(s)\n",
221
+ "\n",
222
+ "5\n",
223
+ "\n",
224
+ "numerical grade\n",
225
+ "\n",
226
+ "--\n",
227
+ "\n",
228
+ "Duration\n",
229
+ "\n",
230
+ "Module level\n",
231
+ "\n",
232
+ "Other prerequisites\n",
233
+ "\n",
234
+ "1 semester\n",
235
+ "\n",
236
+ "undergraduate\n",
237
+ "\n",
238
+ "--\n",
239
+ "\n",
240
+ "Contents\n",
241
+ "\n",
242
+ "Description\n",
243
+ "In this module, students will acquire the theoretical foundations of market-oriented management.\n",
244
+ "\n",
245
+ "Content:\n",
246
+ "With the stakeholder approach as a starting point, the basic design of market-oriented management will be ex-\n",
247
+ "plained and exemplified in the 5 classical steps: situation analysis, objectives, strategies, tools and control-\n",
248
+ "ling. The course will focus not only on the behavioural approaches of consumer behaviour but also on industri-\n",
249
+ "al purchasing behaviour. A case study introducing students to the fundamental principles of market research ba-\n",
250
+ "sed on a conjoint analysis will provide students with deeper insights into the topic.\n",
251
+ "\n",
252
+ "Outline of syllabus:\n",
253
+ "1. Marketing, entrepreneurship and business management\n",
254
+ "2. Explanations of consumer behaviour\n",
255
+ "3. Fundamentals of market research\n",
256
+ "4. Strategic marketing; marketing tools\n",
257
+ "5. Corporate social responsibility versus creating shared value\n",
258
+ "\n",
259
+ "Reading:\n",
260
+ "Foscht, T. / Swoboda, B.: Käuferverhalten: Grundlagen -- Perspektiven -- Anwendungen, 4th revised and exp. ed.,\n",
261
+ "Wiesbaden 2011.\n",
262
+ "Homburg, Ch.: Grundlagen des Marketingmanagements: Einführung in Strategie, Instrumente, Umsetzung und\n",
263
+ "Unternehmensführung, 4th revised and exp. ed., Wiesbaden 2012.\n",
264
+ "Homburg, Ch.: Grundlagen des Marketingmanagements: Einführung in Strategie, Instrumente, Umsetzung und\n",
265
+ "Unternehmensführung, 3rd ed., Wiesbaden, 2012a.\n",
266
+ "Kroeber-Riel, W. /Weinberg, P.: Konsumentenverhalten, 9th ed., Munich 2009.\n",
267
+ "Meffert, H. / Burman, Ch / Kirchgeorg, M.: Marketing -- Grundlagen marktorientierter Unternehmensführung: Kon-\n",
268
+ "zepte -- Instrumente -- Praxisbeispiele, 11th revised and exp. ed., Wiesbaden 2012.\n",
269
+ "Meffert, H. / Burman, Ch / Becker, Ch.: Internationales Marketing-Management -- Ein markenorientierter Ansatz,\n",
270
+ "4th ed., Stuttgart 2010.\n",
271
+ "Meyer, M.: Ökonomische Organisation der Industrie: Netzwerkarrangements zwischen Markt und Unternehmung,\n",
272
+ "Wiesbaden 1995.\n",
273
+ "Porter, M. E.: Wettbewerbsvorteile -- Spitzenleistungen erreichen und behaupten, 8th ed., Campus Frankfurt /\n",
274
+ "New York 2014. (Original: Porter, M.: Competitive Advantage, New York 1985.)\n",
275
+ "Simon, H. / Fassnacht, M.: Preismanagement, Strategie -- Analyse -- Entscheidung -- Umsetzung, 3rd ed., Wies-\n",
276
+ "baden 2009.\n",
277
+ "\n",
278
+ "Intended learning outcomes\n",
279
+ "\n",
280
+ "The students have a basic understanding of business management and are able to classify the knowledge syste-\n",
281
+ "matically. In addition, they can use the acquired knowledge solve and identify the conventional problem fields of\n",
282
+ "business management.\n",
283
+ "\n",
284
+ "Courses (type, number of weekly contact hours, language — if other than German)\n",
285
+ "\n",
286
+ "V + Ü (no information on SWS (weekly contact hours) and course language available)\n",
287
+ "\n",
288
+ "Method of assessment (type, scope, language — if other than German, examination offered — if not every semester, information on whether\n",
289
+ "module is creditable for bonus)\n",
290
+ "\n",
291
+ "written examination (approx. 60 minutes)\n",
292
+ "\n",
293
+ "Allocation of places\n",
294
+ "\n",
295
+ "Number of places: 405. No restrictions with regard to available places for Bachelor's students of Wirtschaftswis-\n",
296
+ "senschaft (Business Management and Economics), Wirtschaftsmathematik (Mathematics for Economics) and\n",
297
+ "Wirtschaftsinformatik (Business Information Systems). The remaining places will be allocated to students of\n",
298
+ "other subjects. Should the number of applications exceed the number of available places, places will be allo-\n",
299
+ "cated in a standardised procedure among all applicants irrespective of their subjects according to the following\n",
300
+ "quotas: Quota 1 (50% of places): total number of ECTS credits already achieved in the respective degree subject;\n",
301
+ "among applicants with the same number of ECTS credits achieved, places will be allocated by lot. Quota 2 (25%\n",
302
+ "of places): number of subject semesters of the respective applicant; among applicants with the same number of\n",
303
+ "subject semesters, places will be allocated by lot. Quota 3 (25% of places): allocation by lot. Applicants who al-\n",
304
+ "ready have successfully completed at least one module component of the respective module will be given prefe-\n",
305
+ "rential consideration. Places on all courses of the module component with a restricted number of places will be\n",
306
+ "allocated in the same procedure. A waiting list will be maintained and places re-allocated as they become availa-\n",
307
+ "ble.\n",
308
+ "\n",
309
+ "Additional information\n",
310
+ "\n",
311
+ "--\n",
312
+ "\n",
313
+ "Referred to in LPO I (examination regulations for teaching-degree programmes)\n",
314
+ "\n",
315
+ "--\n",
316
+ "\n",
317
+ "Module title\n",
318
+ "\n",
319
+ "Abbreviation\n",
320
+ "\n",
321
+ "Supply, Production and Operations Management. An Introduction\n",
322
+ "\n",
323
+ "12-BPL-G-082-m01\n",
324
+ "\n",
325
+ "Module coordinator\n",
326
+ "\n",
327
+ "Module offered by\n",
328
+ "\n",
329
+ "holder of the Chair of Business Management and Industrial\n",
330
+ "Management\n",
331
+ "\n",
332
+ "Faculty of Business Management and Economics\n",
333
+ "\n",
334
+ "ECTS Method of grading\n",
335
+ "\n",
336
+ "Only after succ. compl. of module(s)\n",
337
+ "\n",
338
+ "5\n",
339
+ "\n",
340
+ "numerical grade\n",
341
+ "\n",
342
+ "--\n",
343
+ "\n",
344
+ "Duration\n",
345
+ "\n",
346
+ "Module level\n",
347
+ "\n",
348
+ "Other prerequisites\n",
349
+ "\n",
350
+ "1 semester\n",
351
+ "\n",
352
+ "undergraduate\n",
353
+ "\n",
354
+ "--\n",
355
+ "\n",
356
+ "Contents\n",
357
+ "\n",
358
+ "This course will provide students with an overview of fundamental processes in procurement, production and lo-\n",
359
+ "gistics and the related corporate functions as well as a model-based introduction to related planning procedu-\n",
360
+ "res.\n",
361
+ "\n",
362
+ "Intended learning outcomes\n",
363
+ "\n",
364
+ "The students will be able to describe and discuss the objectives and major processes in the domains of corpo-\n",
365
+ "rate procurement, production and logistics as well as their interdependencies. Furthermore, they are capable of\n",
366
+ "developing and applying basic planning models in these fields.\n",
367
+ "\n",
368
+ "Courses (type, number of weekly contact hours, language — if other than German)\n",
369
+ "\n",
370
+ "V + Ü (no information on SWS (weekly contact hours) and course language available)\n",
371
+ "\n",
372
+ "Method of assessment (type, scope, language — if other than German, examination offered — if not every semester, information on whether\n",
373
+ "module is creditable for bonus)\n",
374
+ "\n",
375
+ "written examination (approx. 60 minutes)\n",
376
+ "\n",
377
+ "Allocation of places\n",
378
+ "\n",
379
+ "Number of places: 405. No restrictions with regard to available places for Bachelor's students of Wirtschaftswis-\n",
380
+ "senschaft (Business Management and Economics), Wirtschaftsmathematik (Mathematics for Economics) and\n",
381
+ "Wirtschaftsinformatik (Business Information Systems). The remaining places will be allocated to students of\n",
382
+ "other subjects. Should the number of applications exceed the number of available places, places will be allo-\n",
383
+ "cated in a standardised procedure among all applicants irrespective of their subjects according to the following\n",
384
+ "quotas: Quota 1 (50% of places): total number of ECTS credits already achieved in the respective degree subject;\n",
385
+ "among applicants with the same number of ECTS credits achieved, places will be allocated by lot. Quota 2 (25%\n",
386
+ "of places): number of subject semesters of the respective applicant; among applicants with the same number of\n",
387
+ "subject semesters, places will be allocated by lot. Quota 3 (25% of places): allocation by lot. Applicants who al-\n",
388
+ "ready have successfully completed at least one module component of the respective module will be given prefe-\n",
389
+ "rential consideration. Places on all courses of the module component with a restricted number of places will be\n",
390
+ "allocated in the same procedure. A waiting list will be maintained and places re-allocated as they become availa-\n",
391
+ "ble.\n",
392
+ "\n",
393
+ "Additional information\n",
394
+ "\n",
395
+ "--\n",
396
+ "\n",
397
+ "Referred to in LPO I (examination regulations for teaching-degree programmes)\n",
398
+ "\n",
399
+ "--\n",
400
+ "\n",
401
+ "Module title\n",
402
+ "\n",
403
+ "Managerial Accounting\n",
404
+ "\n",
405
+ "Module coordinator\n",
406
+ "\n",
407
+ "holder of the Chair of Business Management and Accoun-\n",
408
+ "ting\n",
409
+ "\n",
410
+ "Abbreviation\n",
411
+ "\n",
412
+ "12-IntUR-G-082-m01\n",
413
+ "\n",
414
+ "Module offered by\n",
415
+ "\n",
416
+ "Faculty of Business Management and Economics\n",
417
+ "\n",
418
+ "ECTS Method of grading\n",
419
+ "\n",
420
+ "Only after succ. compl. of module(s)\n",
421
+ "\n",
422
+ "5\n",
423
+ "\n",
424
+ "numerical grade\n",
425
+ "\n",
426
+ "--\n",
427
+ "\n",
428
+ "Duration\n",
429
+ "\n",
430
+ "Module level\n",
431
+ "\n",
432
+ "Other prerequisites\n",
433
+ "\n",
434
+ "1 semester\n",
435
+ "\n",
436
+ "undergraduate\n",
437
+ "\n",
438
+ "--\n",
439
+ "\n",
440
+ "Contents\n",
441
+ "\n",
442
+ "Content:\n",
443
+ "This course offers an introduction to aims and methods of managerial accounting (cost accounting).\n",
444
+ "\n",
445
+ "Outline of syllabus:\n",
446
+ "1. Managerial accounting and financial accounting\n",
447
+ "2. Managerial accounting: basic terms\n",
448
+ "3. Different types of costs\n",
449
+ "4. Cost centre accounting based on total costs\n",
450
+ "5. Job costing based on total costs\n",
451
+ "6. Cost centre accounting and job costing based on direct/variable costs\n",
452
+ "7. Budgeting and cost-variance analysis\n",
453
+ "8. Cost-volume-profit analysis\n",
454
+ "9. Cost information and operating decisions\n",
455
+ "\n",
456
+ "Reading:\n",
457
+ "Coenenberg/Fischer/Günther: Kostenrechnung und Kostenanalyse, Stuttgart.\n",
458
+ "Friedl/Hofmann/Pedell: Kostenrechnung. Eine entscheidungsorientierte Einführung.\n",
459
+ "(most recent editions)\n",
460
+ "\n",
461
+ "Intended learning outcomes\n",
462
+ "\n",
463
+ "After completing the course \"Management Accounting and Control\", the students will be able to\n",
464
+ "(i) set out the responsibilities of the company's internal accounting and control;\n",
465
+ "(ii) define the central concepts of internal enterprise computing restriction and control and assign case studies\n",
466
+ "the terms;\n",
467
+ "(iii) apply the basic methods of internal corporate accounting and control on a full and cost base to idealized ca-\n",
468
+ "se studies of medium difficulty that calculate relevant costs and benefits and take on this basis a reasoned deci-\n",
469
+ "sion.\n",
470
+ "\n",
471
+ "Courses (type, number of weekly contact hours, language — if other than German)\n",
472
+ "\n",
473
+ "V + Ü (no information on SWS (weekly contact hours) and course language available)\n",
474
+ "\n",
475
+ "Method of assessment (type, scope, language — if other than German, examination offered — if not every semester, information on whether\n",
476
+ "module is creditable for bonus)\n",
477
+ "\n",
478
+ "written examination (approx. 60 minutes)\n",
479
+ "\n",
480
+ "Allocation of places\n",
481
+ "\n",
482
+ "Number of places: 640. No restrictions with regard to available places for Bachelor's students of Wirtschafts-\n",
483
+ "wissenschaft (Business Management and Economics), Wirtschaftsmathematik (Mathematics for Economics)\n",
484
+ "and Wirtschaftsinformatik (Business Information Systems). The remaining places will be allocated to students\n",
485
+ "of other subjects. Should the number of applications exceed the number of available places, places will be allo-\n",
486
+ "cated in a standardised procedure among all applicants irrespective of their subjects according to the following\n",
487
+ "quotas: Quota 1 (50% of places): total number of ECTS credits already achieved in the respective degree subject;\n",
488
+ "among applicants with the same number of ECTS credits achieved, places will be allocated by lot. Quota 2 (25%\n",
489
+ "of places): number of subject semesters of the respective applicant; among applicants with the same number of\n",
490
+ "\n",
491
+ "subject semesters, places will be allocated by lot. Quota 3 (25% of places): allocation by lot. Applicants who al-\n",
492
+ "ready have successfully completed at least one module component of the respective module will be given prefe-\n",
493
+ "rential consideration. Places on all courses of the module component with a restricted number of places will be\n",
494
+ "allocated in the same procedure. A waiting list will be maintained and places re-allocated as they become availa-\n",
495
+ "ble.\n",
496
+ "\n",
497
+ "Additional information\n",
498
+ "\n",
499
+ "--\n",
500
+ "\n",
501
+ "Referred to in LPO I (examination regulations for teaching-degree programmes)\n",
502
+ "\n",
503
+ "--\n",
504
+ "\n"
505
+ ]
506
+ }
507
+ ],
508
+ "source": [
509
+ "modules = re.findall(Patterns_MS_IS.PATTERN_ENTIRE_MODULE.value, text_Module_Catalogue)\n",
510
+ "modules = clean_entries(modules, removal_patterns_BA_MM)\n",
511
+ "\n",
512
+ "for i in range (3):\n",
513
+ " print(modules[i])"
514
+ ]
515
+ }
516
+ ],
517
+ "metadata": {
518
+ "kernelspec": {
519
+ "display_name": "py38",
520
+ "language": "python",
521
+ "name": "python3"
522
+ },
523
+ "language_info": {
524
+ "codemirror_mode": {
525
+ "name": "ipython",
526
+ "version": 3
527
+ },
528
+ "file_extension": ".py",
529
+ "mimetype": "text/x-python",
530
+ "name": "python",
531
+ "nbconvert_exporter": "python",
532
+ "pygments_lexer": "ipython3",
533
+ "version": "3.8.16"
534
+ },
535
+ "orig_nbformat": 4
536
+ },
537
+ "nbformat": 4,
538
+ "nbformat_minor": 2
539
+ }
02_data_extraction/extract_keywords.ipynb ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import pandas as pd\n",
10
+ "from keybert import KeyBERT\n",
11
+ "\n",
12
+ "pd.set_option('display.max_colwidth', None)"
13
+ ]
14
+ },
15
+ {
16
+ "cell_type": "code",
17
+ "execution_count": null,
18
+ "metadata": {},
19
+ "outputs": [],
20
+ "source": [
21
+ "def remove_newlines_at_start_end(df):\n",
22
+ " \"\"\"\n",
23
+ " Removes leading and trailing newlines from all columns of a pandas dataframe.\n",
24
+ " \"\"\"\n",
25
+ " # for all string columns, remove leading and trailing newlines\n",
26
+ " for col in df.select_dtypes(include=['object']).columns:\n",
27
+ " df[col] = df[col].str.strip('\\n')\n",
28
+ " return df\n",
29
+ "\n",
30
+ "df = pd.read_csv('MS_IS_all_modules.csv')\n",
31
+ "\n",
32
+ "def xlsx_to_csv(xlsx_file_path, csv_file_path):\n",
33
+ " df = pd.read_excel(xlsx_file_path)\n",
34
+ " print(df.head())\n",
35
+ " df.shape\n",
36
+ " df.to_csv(csv_file_path)\n",
37
+ "\n",
38
+ "\n",
39
+ "def extract_keywords_from_content(df):\n",
40
+ " df = remove_newlines_at_start_end(df)\n",
41
+ " kw_model = KeyBERT()\n",
42
+ " df['keywords'] = df['Contents'].apply(lambda x: ', '.join(kw[0] for kw in kw_model.extract_keywords(x, keyphrase_ngram_range=(1,2),\n",
43
+ " stop_words='english', \n",
44
+ " highlight=False,\n",
45
+ " top_n=4)))\n",
46
+ " return df\n",
47
+ "\n",
48
+ "\n",
49
+ "def extract_keywords_from_intended_learning_outcomes(df):\n",
50
+ " df = remove_newlines_at_start_end(df)\n",
51
+ " kw_model = KeyBERT()\n",
52
+ " df['keywords_learning'] = df['Intended learning outcomes'].apply(lambda x: ', '.join(kw[0] for kw in kw_model.extract_keywords(x, keyphrase_ngram_range=(1,2),\n",
53
+ " stop_words='english', \n",
54
+ " highlight=False,\n",
55
+ " top_n=4)))\n",
56
+ " return df\n"
57
+ ]
58
+ },
59
+ {
60
+ "cell_type": "code",
61
+ "execution_count": null,
62
+ "metadata": {},
63
+ "outputs": [],
64
+ "source": [
65
+ "df = pd.read_csv('MS_IS_all_modules.csv') \n",
66
+ "\n",
67
+ "df = df.head(30)\n",
68
+ "df = extract_keywords_from_content(df) \n",
69
+ "df = extract_keywords_from_intended_learning_outcomes(df)\n",
70
+ "df[['Module title', 'keywords', 'Contents', ]].head(30)\n",
71
+ "\n",
72
+ "print(\"---------------SAME FOR INTENDED LEARNING OUTCOMES-------------------\")\n",
73
+ "\n",
74
+ "df = extract_keywords_from_intended_learning_outcomes(df) # Pass the dataframe to the function\n",
75
+ "df[['Module title', 'keywords', 'Contents', 'keywords_learning', 'Intended learning outcomes']].head(30)"
76
+ ]
77
+ },
78
+ {
79
+ "cell_type": "markdown",
80
+ "metadata": {},
81
+ "source": [
82
+ "# Ansatz mit zugeordneten Schlagwörter (Ausblick --> erstmal verworfen) "
83
+ ]
84
+ },
85
+ {
86
+ "cell_type": "code",
87
+ "execution_count": null,
88
+ "metadata": {},
89
+ "outputs": [],
90
+ "source": [
91
+ "import pandas as pd\n",
92
+ "import spacy\n",
93
+ "from sklearn.feature_extraction.text import TfidfVectorizer\n",
94
+ "from sklearn.metrics.pairwise import cosine_similarity\n",
95
+ "\n",
96
+ "def assign_categories(dataframe):\n",
97
+ " # Define the predefined categories\n",
98
+ " categories = {\n",
99
+ " \"Artificial Intelligence\": [\"AI\", \"Machine Learning\", \"Deep Learning\", \"Neuronal Nets\"],\n",
100
+ " \"Strategy\": [\"Business Strategy\", \"Market Analysis\", \"Competitive Intelligence\"],\n",
101
+ " \"Marketing\": [\"Digital Marketing\", \"Social Media Marketing\", \"Market Research\"],\n",
102
+ " \"Optimization\": [\"Operations Optimization\", \"Process Improvement\", \"Supply Chain Optimization\"],\n",
103
+ " \"Data Science\": [\"Data Analysis\", \"Data Mining\", \"Statistical Modeling\"],\n",
104
+ " \"Software Engineering\": [\"Software Development\", \"Programming\", \"Web Development\", \"\"],\n",
105
+ " \"Society and Ethics\": [\"Ethical Issues\", \"Social Impact\", \"Sustainability\", \"Privacy\"],\n",
106
+ " \"Finance\": [\"Financial Analysis\", \"Financial Markets\", \"Accounting\", \"Financial Reporting\"],\n",
107
+ " \"Management\": [\"Leadership\", \"Project Management\", \"Team Management\", \"Change Management\"],\n",
108
+ " \"Communication\": [\"Presentation Skills\", \"Negotiation\", \"Stakeholder Management\", \"Conflict Management\"],\n",
109
+ " \"Entrepreneurship\": [\"Business Development\", \"Innovation\", \"Startups\", \"Venture Capital\"],\n",
110
+ " \"Blockchain\": [\"Distributed Ledger\", \"Smart Contracts\", \"Cryptocurrencies\", \"Decentralized Applications\"],\n",
111
+ " \"Internet of Things\": [\"IoT Devices\", \"IoT Platforms\", \"IoT Security\", \"IoT Data Management\"],\n",
112
+ " }\n",
113
+ "\n",
114
+ " # Initialize the NLP model (spacy)\n",
115
+ " nlp = spacy.load(\"en_core_web_sm\")\n",
116
+ "\n",
117
+ " # Extract the names of predefined categories\n",
118
+ " category_names = list(categories.keys())\n",
119
+ "\n",
120
+ " # Create a TF-IDF vectorizer\n",
121
+ " vectorizer = TfidfVectorizer()\n",
122
+ "\n",
123
+ " # Fit and transform the predefined category names\n",
124
+ " category_vectors = vectorizer.fit_transform(category_names)\n",
125
+ "\n",
126
+ " # Iterate over the rows in the DataFrame\n",
127
+ " assigned_categories = []\n",
128
+ " for index, row in dataframe.iterrows():\n",
129
+ " # Extract the row name\n",
130
+ " row_name = row[\"Module title\"]\n",
131
+ "\n",
132
+ " # Calculate the similarity between the row name and predefined categories\n",
133
+ " row_vector = vectorizer.transform([row_name])\n",
134
+ " similarities = cosine_similarity(row_vector, category_vectors)[0]\n",
135
+ "\n",
136
+ " # Find the index of the most similar category\n",
137
+ " max_index = similarities.argmax()\n",
138
+ "\n",
139
+ " # Assign the category based on the index, if scores are less than 0.75, assign \"Other\"\n",
140
+ " if similarities[max_index] < 0.000008:\n",
141
+ " assigned_category = \"Other\"\n",
142
+ " else:\n",
143
+ " assigned_category = category_names[max_index]\n",
144
+ " \n",
145
+ "\n",
146
+ " # Add the assigned category to the list\n",
147
+ " assigned_categories.append(assigned_category)\n",
148
+ "\n",
149
+ " # Add the assigned categories to the DataFrame\n",
150
+ " dataframe[\"Assigned Category\"] = assigned_categories\n",
151
+ "\n",
152
+ " return dataframe\n",
153
+ "\n",
154
+ "# Example usage\n",
155
+ "\n",
156
+ "df = pd.read_csv('MS_IS_all_modules.csv')\n",
157
+ "\n",
158
+ "df = pd.DataFrame(df)\n",
159
+ "df_with_categories = assign_categories(df)\n",
160
+ "#print only the columns we need\n",
161
+ "df_with_categories = df_with_categories[['Module title', 'Assigned Category']]\n",
162
+ "print(df_with_categories)"
163
+ ]
164
+ },
165
+ {
166
+ "cell_type": "markdown",
167
+ "metadata": {},
168
+ "source": [
169
+ "# Ansatz über Zero-Shot-Classificator\n"
170
+ ]
171
+ },
172
+ {
173
+ "cell_type": "code",
174
+ "execution_count": null,
175
+ "metadata": {},
176
+ "outputs": [],
177
+ "source": [
178
+ "from transformers import pipeline\n",
179
+ "classifier = pipeline(\"zero-shot-classification\", model=\"facebook/bart-large-mnli\")"
180
+ ]
181
+ },
182
+ {
183
+ "cell_type": "code",
184
+ "execution_count": null,
185
+ "metadata": {},
186
+ "outputs": [],
187
+ "source": [
188
+ "candidate_labels = [\n",
189
+ " \"Artificial Intelligence\",\n",
190
+ " \"Strategy\",\n",
191
+ " \"Marketing\",\n",
192
+ " \"Optimization\",\n",
193
+ " \"Data Science\",\n",
194
+ " \"Software Engineering\",\n",
195
+ " \"Society and Ethics\",\n",
196
+ " \"Finance\",\n",
197
+ " \"Management\",\n",
198
+ " \"Communication\",\n",
199
+ " \"Entrepreneurship\",\n",
200
+ " \"Internet of Things\",\n",
201
+ "]"
202
+ ]
203
+ },
204
+ {
205
+ "cell_type": "code",
206
+ "execution_count": null,
207
+ "metadata": {},
208
+ "outputs": [],
209
+ "source": [
210
+ "df = pd.read_csv('MS_IS_all_modules.csv')\n",
211
+ "\n",
212
+ "print(df['Module title'][5])\n",
213
+ "text = df['Module title'][5]\n",
214
+ "print(text)\n",
215
+ "\n",
216
+ "output = classifier(text, candidate_labels, device=0)\n",
217
+ "df = pd.DataFrame({'label': output['labels'], 'score': output['scores']})\n",
218
+ "nr_of_results = 3\n",
219
+ "df = df.sort_values(by=['score'], ascending=False).head(nr_of_results)\n",
220
+ "print(df)"
221
+ ]
222
+ }
223
+ ],
224
+ "metadata": {
225
+ "kernelspec": {
226
+ "display_name": "enterpriseai2",
227
+ "language": "python",
228
+ "name": "python3"
229
+ },
230
+ "language_info": {
231
+ "codemirror_mode": {
232
+ "name": "ipython",
233
+ "version": 3
234
+ },
235
+ "file_extension": ".py",
236
+ "mimetype": "text/x-python",
237
+ "name": "python",
238
+ "nbconvert_exporter": "python",
239
+ "pygments_lexer": "ipython3",
240
+ "version": "3.8.16"
241
+ },
242
+ "orig_nbformat": 4
243
+ },
244
+ "nbformat": 4,
245
+ "nbformat_minor": 2
246
+ }
02_data_extraction/helper_methods.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generic methods to apply regex and clean text
2
+ import re
3
+ import csv
4
+ import os
5
+
6
+ def extract_first_match(text, regex_pattern):
7
+ pattern = re.compile(regex_pattern)
8
+ match = pattern.search(text)
9
+ if match:
10
+ return match.group(1)
11
+ else:
12
+ return None
13
+
14
+ def extract_LineMatch(text, regex_pattern):
15
+ pattern = re.compile(regex_pattern, re.MULTILINE)
16
+ match = pattern.search(text)
17
+ if match:
18
+ return match.group()
19
+ else:
20
+ return None
21
+
22
+ def clean_entries(matches, patternsToRemove):
23
+ cleaned_entries = []
24
+ for entry in matches:
25
+ cleaned_text = entry
26
+ for pattern in patternsToRemove:
27
+ cleaned_text = re.sub(pattern, "", cleaned_text, flags=re.MULTILINE)
28
+ cleaned_entries.append(cleaned_text)
29
+
30
+ return cleaned_entries
31
+
02_data_extraction/module_catalogues/BA_IS_all_modules.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:304162843305df6de0297f6797edc8ed9fa7d287c57fbc36135f74d80c96cfcd
3
+ size 1365797
02_data_extraction/module_catalogues/BA_MM_all_modules.pdf ADDED
Binary file (711 kB). View file
 
02_data_extraction/module_catalogues/MS_IS_all_modules.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c544ebb17f06ccda44c284de278ca757a3687bd4b95de64cb3523fe50d20e115
3
+ size 1071080
02_data_extraction/module_catalogues/MS_MM_all_modules.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc8b0bcf1e8b885a3f557d3e638d75674a977d72d563551795fe1dd5c6c5e455
3
+ size 3998937
03_extracted_final_modules/BA_IS_all_modules.xlsx ADDED
Binary file (63 kB). View file
 
03_extracted_final_modules/BA_IS_all_modules_15.xlsx ADDED
Binary file (13.5 kB). View file
 
03_extracted_final_modules/BA_MM_all_modules.xlsx ADDED
Binary file (50.8 kB). View file
 
03_extracted_final_modules/BA_MM_all_modules_15.xlsx ADDED
Binary file (14.6 kB). View file
 
03_extracted_final_modules/MS_IS_all_modules.csv ADDED
The diff for this file is too large to render. See raw diff
 
03_extracted_final_modules/MS_IS_all_modules.xlsx ADDED
Binary file (62.5 kB). View file
 
03_extracted_final_modules/MS_IS_all_modules_cleaned.xlsx ADDED
Binary file (51.8 kB). View file
 
03_extracted_final_modules/MS_IS_all_modules_orginal_15_rows_cleaned.csv ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Module title,Abbreviation,Module coordinator,Module offered by,ETCS,Method of grading,Duration,Module level,Contents,Intended learning outcomes,Courses,Method of assessment,Allocation of places,Additional information,Workload,Teaching cycle,Referred to in LPO I
2
+ Information Processing within Organizations,12-IV-161-m01,Faculty of Business Management and Economics,holder of the Chair of Business Management and Business,5,numerical grade,1 semester,graduate,"Content:This course provides students with an in-depth overview of the structure and the application areas of business management information systems in enterprises and public institutions.Outline of syllabus:1. What is software: concepts, categories, application2. Software life cycle: duration, phases, steps3. As-is analysis: tasks, problems4. To-be concept: system design, data design, dialog design, function design5. Object orientation: paradigm shift6. Change management: meaning, methodologies, project management7. Office automation: tasks, areas of application","After completing the course ""Integrated Information Processing"", students will be able to(i) understand the importance of integration in enterprises, especially in information systems;(ii) assess the progress of development of a software project, estimate cycle costs, know and consider require-ments, which brings a software implementation with;(iii) select the correct procedures or practices in an as-is analysis and target conception and practically apply (with participation in the exercise);(iv) understand the importance of change management and project management and know the appropriate me-thods for specific applications.",V (2) + Ü (2),written examination (approx. 60 minutes)Language of assessment: German and/or Englishcreditable for bonus,--,--,150 h,--,--
3
+ IT-Management,12-M-ITM-161-m01,Faculty of Business Management and Economics,Holder of the Chair of Information Systems Engineering,5,numerical grade,1 semester,graduate,"Content:This course provides students with an in-depth overview of aims, tasks and appropriate methods of IT manage-ment.Outline of syllabus:1. Organisation and distinction2. IT strategy3. IT organisation4. Management of IT systems5. Enterprise Architecture Management6. IT project management7. IT security8. IT law9. IT controllingReading:• Hofmann/Schmidt: Masterkurs IT-Management, Wiesbaden.• Tiemeyer: Handbuch IT-Management, Munich.• Hanschke: Strategisches Management der IT-Landschaft, Munich.","After completing the course ""IT Management"", students will be able to1. overview the different aspects to be considered regarding a purposeful IT management;2. understand and apply appropriate methods and tools;3. independently perform system search and selection in a team project (only after participation in the practice lessons).",V (2) + Ü (2),a) written examination (approx. 60 minutes) or b) oral examination (one candidate each: approx. 15 to 20 minu-tes; groups of 2: approx. 20 minutes; groups of 3: approx. 30 minutes)Language of assessment: German and/or Englishcreditable for bonus,--,--,150 h,--,--
4
+ Project Seminar,12-PS-192-m01,Faculty of Business Management and Economics,Holder of the Chair of Business Management and Business,15,numerical grade,1 semester,graduate,"Content:In small project teams of 4 to 10 members, students will spend several months actively working on a specific and realistic problem with practical relevance. They will progress through several project stages including as-is analy-sis, to-be conception and implementation of an IS solution. The project teams will be required to work indepen-dently and will only receive advice and minor support from research assistants.Reading:will vary according to topic","After completing the course ""Projektseminar"", students will be able to1. analyze business tasks and requirements and generate fitting IS solutions;2. apply project management methods;3. internalize stress, time and conflict management by means of practical teamwork.",S (2),"project: preparing a conceptual design (approx. 150 hours), designing and implementing an approach to solution (approx. 300 hours) as well as presentation (approx. 20 minutes), weighted 1:2:1Language of assessment: German, EnglishCreditable for bonus",--,--,450 h,--,--
5
+ Information Retrieval,10-I=IR-161-m01,Institute of Computer Science,Dean of Studies Informatik (Computer Science),5,numerical grade,1 semester,graduate,"IR models (e. g. Boolean and vector space model, evaluation), processing of text (tokenising, text properties), data structures (e. g. inverted index), query elements (e. g. query operations, relevance feedback, query langua-ges and paradigms, structured queries), search engine (e. g. architecture, crawling, interfaces, link analysis), me-thods to support IR (e. g. recommendation systems, text clustering and classification, information extraction).",The students possess theoretical and practical knowledge in the area of information retrieval and have acquired the technical know-how to create a search engine.,V (2) + Ü (2),"written examination (approx. 60 to 120 minutes).If announced by the lecturer at the beginning of the course, the written examination may be replaced by an oral examination of one candidate each (approx. 20 minutes) or an oral examination in groups of 2 candidates (ap-prox. 15 minutes per candidate).Language of assessment: German and/or Englishcreditable for bonus",--,"Focuses available for students of the Masters programme Informatik (Computer Science, 120 ECTS credits):IT,IS,HCI,GE",150 h,--,--
6
+ Analysis and Design of Programs,10-I=PA-161-m01,Institute of Computer Science,holder of the Chair of Computer Science II,5,numerical grade,1 semester,graduate,"Program analysis, model creation in software engineering, program quality, test of programs, process models.","The students are able to analyse programs, to use testing frameworks and metrics as well as to judge program quality.",V (2) + Ü (2),"written examination (approx. 60 to 120 minutes).If announced by the lecturer at the beginning of the course, the written examination may be replaced by an oral examination of one candidate each (approx. 20 minutes) or an oral examination in groups of 2 candidates (ap-prox. 15 minutes per candidate).Language of assessment: German and/or Englishcreditable for bonus",--,"Focuses available for students of the Masters programme Informatik (Computer Science, 120 ECTS credits):SE,IS,ES,GE",150 h,--,--
7
+ Security of Software Systems,10-I=SSS-172-m01,Institute of Computer Science,holder of the Chair of Computer Science II,5,numerical grade,1 semester,graduate,"The lecture provides an overview of common software vulnerabilities, state-of-the-art attack techniques on mo-dern computer systems, as well as the measures implemented to protect against these attacks. In the course, the following topics are discussed:• x86-64 instruction set architecture and assembly language• Runtime attacks (code injection, code reuse, defenses)• Web security• Blockchains and smart contracts• Side-channel attacks• Hardware security","Students gain a deep understanding of software security, from hardware and low-level attacks to modern con-cepts such as blockchains. The lecture prepares for research in the area of security and privacy, while the exerci-ses allow students to gain hands-on experience with attacks and analysis of systems from an attackers perspec-tive.",V (2) + Ü (2)Module taught in: English,"written examination (approx. 60 to 120 minutes).If announced by the lecturer at the beginning of the course, the written examination may be replaced by an oral examination of one candidate each (approx. 20 minutes) or an oral examination in groups of 2 candidates (ap-prox. 15 minutes per candidate).Language of assessment: Englishcreditable for bonus",--,"Focuses available for students of the Masters programme Informatik (Computer Science, 120 ECTS credits): SE, IS, LR, HCI, ES.Basic programming knowledge in C is required.",150 h,--,--
8
+ Software Architecture,10-I=SAR-161-m01,Institute of Computer Science,holder of the Chair of Computer Science II,5,numerical grade,1 semester,graduate,Current topics in the area of aerospace.,The students possess a fundamental and applicable knowledge about advanced topics in software engineering with a focus on modern software architectures and fundamental approaches to model-driven software enginee-ring.,V (2) + Ü (2),"written examination (approx. 60 to 120 minutes).If announced by the lecturer at the beginning of the course, the written examination may be replaced by an oral examination of one candidate each (approx. 20 minutes) or an oral examination in groups of 2 candidates (ap-prox. 15 minutes per candidate).Language of assessment: German and/or Englishcreditable for bonus",--,"Focuses available for students of the Masters programme Informatik (Computer Science, 120 ECTS credits):SE,IT,ES",150 h,--,--
9
+ Artificial Intelligence 1,10-I=KI1-161-m01,Institute of Computer Science,holder of the Chair of Computer Science VI,5,numerical grade,1 semester,graduate,"Intelligent agents, uninformed and heuristic search, constraint problem solving, search with partial information, propositional and predicate logic and inference, knowledge representation.","The students possess theoretical and practical knowledge about artificial intelligence in the area of agents, search and logic and are able to assess possible applications.",V (2) + Ü (2),"written examination (approx. 60 to 120 minutes).If announced by the lecturer at the beginning of the course, the written examination may be replaced by an oral examination of one candidate each (approx. 20 minutes) or an oral examination in groups of 2 candidates (ap-prox. 15 minutes per candidate).Language of assessment: German and/or Englishcreditable for bonus",--,"Focuses available for students of the Masters programme Informatik (Computer Science, 120 ECTS credits):AT,SE,IS,HCI",150 h,--,--
10
+ Discrete Event Simulation,10-I=ST-161-m01,Institute of Computer Science,holder of the Chair of Computer Science III,8,numerical grade,1 semester,graduate,"Introduction to simulation techniques, statistical groundwork, creation of random numbers and random varia-bles, random sample theory and estimation techniques, statistical analysis of simulation values, inspection of measured data, planning and evaluation of simulation experiments, special random processes, possibilities and limits of model creation and simulation, advanced concepts and techniques, practical execution of simulation projects.","The students possess the methodic knowledge and the practical skills necessary for the stochastic simulation of (technical) systems, the evaluation of results and the correct assessment of the possibilities and limits of simu-lation methods.",V (4) + Ü (2),"written examination (approx. 60 to 120 minutes).If announced by the lecturer at the beginning of the course, the written examination may be replaced by an oral examination of one candidate each (approx. 20 minutes) or an oral examination in groups of 2 candidates (ap-prox. 15 minutes per candidate).Language of assessment: German and/or Englishcreditable for bonus",--,"Focuses available for students of the Masters programme Informatik (Computer Science, 120 ECTS credits):IT,IS,ES,GE",240 h,--,--
11
+ Advanced Programming,10-I=APR-182-m01,Institute of Computer Science,holder of the Chair of Computer Science II,5,numerical grade,1 semester,graduate,"With the knowledge of basic programming, taught in introductory lectures, it is possible to realize simpler pro-grams. If more complex problems are to be tackled, suboptimal results like long, incomprehensible functions and code duplicates occur. In this lecture, further knowledge is to be conveyed on how to give programs and co-de a sensible structure. Also, further topics in the areas of software security and parallel programming are dis-cussed.","Students learn advanced programming paradigms especially suited for space applications. Different patterns are then implemented in multiple languages and their efficiency measured using standard metrics. In addition, par-allel processing concepts are introduced culminating in the use of GPU architectures for extremely quick proces-sing.",V (2) + Ü (2)Module taught in: English,written examination (90 to 120 minutes)Language of assessment: Englishcreditable for bonus,--,--,150 h,--,--
12
+ Programming with neural nets,10-I=PNN-212-m01,Institute of Computer Science,holder of the Chair of Computer Science IX,5,numerical grade,1 semester,graduate,"Overview over NN, implementation of important NN-architectures like FCN, CNN and LSTMs, practical example for NN-architectures, among others in the area of image and language processing.","Knowledge about possible applications and limitations of NN, for important architectures (eg. FCN, CNN, LSTM) and how they are implemented in NN-tools like Tensorflow/Keras, ability to program network structures from lite-rature, to prepare data and solve concrete tasks for NN.",V (2) + Ü (2),"written examination (approx. 60 to 120 minutes).If announced by the lecturer at the beginning of the course, the written examination may be replaced by an oral examination of one candidate each (approx. 20 minutes) or an oral examination in groups of 2 candidates (ap-prox. 15 minutes per candidate).creditable for bonusLanguage of assessment: German and/or English",--,"Focuses available for students of the Masters programme Informatik (Computer Science, 120 ECTS credits): IT,KI,HCI,GE",150 h,--,--
13
+ NLP and Text Mining,10-I=STM-162-m01,Institute of Computer Science,holder of the Chair of Computer Science VI,5,numerical grade,1 semester,graduate,"Foundations in the following areas: definition of NLP and text mining, properties of text, sentence boundary de-tection, tokenisation, collocation, N-gram models, morphology, hidden Markov models for tagging, probabili-stic parsing, word sense disambiguation, term extraction methods, information extraction, sentiment analysis. The students possess theoretical and practical knowledge about typical methods and algorithms in the area of text mining and language processing mostly for English. They are able to solve problems through the methods taught. They have gained experience in the application of text mining algorithms.",The students possess theoretical and practical knowledge about typical methods and algorithms in the area of text mining and language processing. They are able to solve practical problems with the methods acquired in class. They have gained experience in the application of text mining algorithms.,V (2) + Ü (2),"written examination (approx. 60 to 120 minutes).If announced by the lecturer at the beginning of the course, the written examination may be replaced by an oral examination of one candidate each (approx. 20 minutes) or an oral examination in groups of 2 candidates (ap-prox. 15 minutes per candidate).Language of assessment: German and/or English",--,"Focuses available for students of the Masters programme Informatik (Computer Science, 120 ECTS credits): AT, IT, HCI.",150 h,--,--
14
+ Systems Benchmarking,10-I=SB-212-m01,Institute of Computer Science,holder of the Chair of Computer Science IX,5,numerical grade,1 semester,nan,--,--,V (2) + Ü (2),"written examination (approx. 60 to 120 minutes).If announced by the lecturer at the beginning of the course, the written examination may be replaced by an oral examination of one candidate each (approx. 20 minutes) or an oral examination in groups of 2 candidates (ap-prox. 15 minutes per candidate).creditable for bonusLanguage of assessment: German and/or English",--,"Focuses available for students of the Masters programme Informatik (Computer Science, 120 ECTS credits):SE,IT,ES,HCI,GE",150 h,--,--
15
+ Computer Vision,10-xtAI=CV-202-m01,Institute of Computer Science,Dean of Studies Informatik (Computer Science),5,numerical grade,1 semester,graduate,"The lecture provides knowledge about current methods and algorithms in the field of computer vision. Important basics as well as the most recent approaches to image representation, image processing and image analysis are taught. Actual models and methods of machine learning as well as their technical backgrounds are presented and their respective applications in image processing are shown.",Students have fundamental knowledge of problems and techniques in the field of computer vision and are able to independently identify and apply suitable methods for concrete problems.,V (2) + Ü (2)Module taught in: English,"Written examination (approx. 60 to 120 minutes)If announced by the lecturer at the beginning of the course, the written examination may be replaced by an oral examination of one candidate each (approx. 20 minutes) or an oral examination in groups of 2 candidates (ap-prox. 15 minutes per candidate).Language of assessment: EnglishCreditable for bonus",--,--,150 h,--,--
16
+ Image Processing and Computational Photography,10-I=IP-222-m01,Institute of Computer Science,nan,5,numerical grade,1 semester,nan,--,--,V (2) + Ü (2)Module taught in: English,"written examination (approx. 60 to 120 minutes)If announced by the lecturer at the beginning of the course, the written examination may be replaced by an oral examination of one candidate each (approx. 20 minutes) or an oral examination in groups of 2 candidates (ap-prox. 15 minutes per candidate).Language of assessment: EnglishCreditable for bonus",--,--,150 h,--,--
03_extracted_final_modules/MS_IS_all_modules_orginal_15_rows_cleaned.xlsx ADDED
Binary file (10.9 kB). View file
 
03_extracted_final_modules/MS_IS_all_modules_orginal_to_clean_cleaned.csv ADDED
The diff for this file is too large to render. See raw diff
 
03_extracted_final_modules/MS_IS_all_modules_orginal_to_clean_cleaned.xlsx ADDED
Binary file (48.4 kB). View file
 
03_extracted_final_modules/MS_MM_all_modules.xlsx ADDED
Binary file (113 kB). View file
 
04_finetuning_approaches/FT_Tapas.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
04_finetuning_approaches/Fine_tuning_TapasForQuestionAnswering_on_SQA.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
04_finetuning_approaches/MS_IS_50_modules_cleaned.xlsx ADDED
Binary file (27.6 kB). View file
 
04_finetuning_approaches/MS_IS_all_modules.xlsx ADDED
Binary file (62.5 kB). View file
 
04_finetuning_approaches/MS_IS_all_modules_orginal_15_rows.xlsx ADDED
Binary file (15.2 kB). View file
 
04_finetuning_approaches/MS_IS_all_modules_orginal_15_rows_cleaned.csv ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Module title,Abbreviation,Module coordinator,Module offered by,ETCS,Method of grading,Duration,Module level,Contents,Intended learning outcomes,Courses,Method of assessment,Allocation of places,Additional information,Workload,Teaching cycle,Referred to in LPO I
2
+ Information Processing within Organizations,12-IV-161-m01,Faculty of Business Management and Economics,holder of the Chair of Business Management and Business,5,numerical grade,1 semester,graduate,"Content:This course provides students with an in-depth overview of the structure and the application areas of business management information systems in enterprises and public institutions.Outline of syllabus:1. What is software: concepts, categories, application2. Software life cycle: duration, phases, steps3. As-is analysis: tasks, problems4. To-be concept: system design, data design, dialog design, function design5. Object orientation: paradigm shift6. Change management: meaning, methodologies, project management7. Office automation: tasks, areas of application","After completing the course ""Integrated Information Processing"", students will be able to(i) understand the importance of integration in enterprises, especially in information systems;(ii) assess the progress of development of a software project, estimate cycle costs, know and consider require-ments, which brings a software implementation with;(iii) select the correct procedures or practices in an as-is analysis and target conception and practically apply (with participation in the exercise);(iv) understand the importance of change management and project management and know the appropriate me-thods for specific applications.",V (2) + Ü (2),written examination (approx. 60 minutes)Language of assessment: German and/or Englishcreditable for bonus,--,--,150 h,--,--
3
+ IT-Management,12-M-ITM-161-m01,Faculty of Business Management and Economics,Holder of the Chair of Information Systems Engineering,5,numerical grade,1 semester,graduate,"Content:This course provides students with an in-depth overview of aims, tasks and appropriate methods of IT manage-ment.Outline of syllabus:1. Organisation and distinction2. IT strategy3. IT organisation4. Management of IT systems5. Enterprise Architecture Management6. IT project management7. IT security8. IT law9. IT controllingReading:• Hofmann/Schmidt: Masterkurs IT-Management, Wiesbaden.• Tiemeyer: Handbuch IT-Management, Munich.• Hanschke: Strategisches Management der IT-Landschaft, Munich.","After completing the course ""IT Management"", students will be able to1. overview the different aspects to be considered regarding a purposeful IT management;2. understand and apply appropriate methods and tools;3. independently perform system search and selection in a team project (only after participation in the practice lessons).",V (2) + Ü (2),a) written examination (approx. 60 minutes) or b) oral examination (one candidate each: approx. 15 to 20 minu-tes; groups of 2: approx. 20 minutes; groups of 3: approx. 30 minutes)Language of assessment: German and/or Englishcreditable for bonus,--,--,150 h,--,--
4
+ Project Seminar,12-PS-192-m01,Faculty of Business Management and Economics,Holder of the Chair of Business Management and Business,15,numerical grade,1 semester,graduate,"Content:In small project teams of 4 to 10 members, students will spend several months actively working on a specific and realistic problem with practical relevance. They will progress through several project stages including as-is analy-sis, to-be conception and implementation of an IS solution. The project teams will be required to work indepen-dently and will only receive advice and minor support from research assistants.Reading:will vary according to topic","After completing the course ""Projektseminar"", students will be able to1. analyze business tasks and requirements and generate fitting IS solutions;2. apply project management methods;3. internalize stress, time and conflict management by means of practical teamwork.",S (2),"project: preparing a conceptual design (approx. 150 hours), designing and implementing an approach to solution (approx. 300 hours) as well as presentation (approx. 20 minutes), weighted 1:2:1Language of assessment: German, EnglishCreditable for bonus",--,--,450 h,--,--
5
+ Information Retrieval,10-I=IR-161-m01,Institute of Computer Science,Dean of Studies Informatik (Computer Science),5,numerical grade,1 semester,graduate,"IR models (e. g. Boolean and vector space model, evaluation), processing of text (tokenising, text properties), data structures (e. g. inverted index), query elements (e. g. query operations, relevance feedback, query langua-ges and paradigms, structured queries), search engine (e. g. architecture, crawling, interfaces, link analysis), me-thods to support IR (e. g. recommendation systems, text clustering and classification, information extraction).",The students possess theoretical and practical knowledge in the area of information retrieval and have acquired the technical know-how to create a search engine.,V (2) + Ü (2),"written examination (approx. 60 to 120 minutes).If announced by the lecturer at the beginning of the course, the written examination may be replaced by an oral examination of one candidate each (approx. 20 minutes) or an oral examination in groups of 2 candidates (ap-prox. 15 minutes per candidate).Language of assessment: German and/or Englishcreditable for bonus",--,"Focuses available for students of the Masters programme Informatik (Computer Science, 120 ECTS credits):IT,IS,HCI,GE",150 h,--,--
6
+ Analysis and Design of Programs,10-I=PA-161-m01,Institute of Computer Science,holder of the Chair of Computer Science II,5,numerical grade,1 semester,graduate,"Program analysis, model creation in software engineering, program quality, test of programs, process models.","The students are able to analyse programs, to use testing frameworks and metrics as well as to judge program quality.",V (2) + Ü (2),"written examination (approx. 60 to 120 minutes).If announced by the lecturer at the beginning of the course, the written examination may be replaced by an oral examination of one candidate each (approx. 20 minutes) or an oral examination in groups of 2 candidates (ap-prox. 15 minutes per candidate).Language of assessment: German and/or Englishcreditable for bonus",--,"Focuses available for students of the Masters programme Informatik (Computer Science, 120 ECTS credits):SE,IS,ES,GE",150 h,--,--
7
+ Security of Software Systems,10-I=SSS-172-m01,Institute of Computer Science,holder of the Chair of Computer Science II,5,numerical grade,1 semester,graduate,"The lecture provides an overview of common software vulnerabilities, state-of-the-art attack techniques on mo-dern computer systems, as well as the measures implemented to protect against these attacks. In the course, the following topics are discussed:• x86-64 instruction set architecture and assembly language• Runtime attacks (code injection, code reuse, defenses)• Web security• Blockchains and smart contracts• Side-channel attacks• Hardware security","Students gain a deep understanding of software security, from hardware and low-level attacks to modern con-cepts such as blockchains. The lecture prepares for research in the area of security and privacy, while the exerci-ses allow students to gain hands-on experience with attacks and analysis of systems from an attackers perspec-tive.",V (2) + Ü (2)Module taught in: English,"written examination (approx. 60 to 120 minutes).If announced by the lecturer at the beginning of the course, the written examination may be replaced by an oral examination of one candidate each (approx. 20 minutes) or an oral examination in groups of 2 candidates (ap-prox. 15 minutes per candidate).Language of assessment: Englishcreditable for bonus",--,"Focuses available for students of the Masters programme Informatik (Computer Science, 120 ECTS credits): SE, IS, LR, HCI, ES.Basic programming knowledge in C is required.",150 h,--,--
8
+ Software Architecture,10-I=SAR-161-m01,Institute of Computer Science,holder of the Chair of Computer Science II,5,numerical grade,1 semester,graduate,Current topics in the area of aerospace.,The students possess a fundamental and applicable knowledge about advanced topics in software engineering with a focus on modern software architectures and fundamental approaches to model-driven software enginee-ring.,V (2) + Ü (2),"written examination (approx. 60 to 120 minutes).If announced by the lecturer at the beginning of the course, the written examination may be replaced by an oral examination of one candidate each (approx. 20 minutes) or an oral examination in groups of 2 candidates (ap-prox. 15 minutes per candidate).Language of assessment: German and/or Englishcreditable for bonus",--,"Focuses available for students of the Masters programme Informatik (Computer Science, 120 ECTS credits):SE,IT,ES",150 h,--,--
9
+ Artificial Intelligence 1,10-I=KI1-161-m01,Institute of Computer Science,holder of the Chair of Computer Science VI,5,numerical grade,1 semester,graduate,"Intelligent agents, uninformed and heuristic search, constraint problem solving, search with partial information, propositional and predicate logic and inference, knowledge representation.","The students possess theoretical and practical knowledge about artificial intelligence in the area of agents, search and logic and are able to assess possible applications.",V (2) + Ü (2),"written examination (approx. 60 to 120 minutes).If announced by the lecturer at the beginning of the course, the written examination may be replaced by an oral examination of one candidate each (approx. 20 minutes) or an oral examination in groups of 2 candidates (ap-prox. 15 minutes per candidate).Language of assessment: German and/or Englishcreditable for bonus",--,"Focuses available for students of the Masters programme Informatik (Computer Science, 120 ECTS credits):AT,SE,IS,HCI",150 h,--,--
10
+ Discrete Event Simulation,10-I=ST-161-m01,Institute of Computer Science,holder of the Chair of Computer Science III,8,numerical grade,1 semester,graduate,"Introduction to simulation techniques, statistical groundwork, creation of random numbers and random varia-bles, random sample theory and estimation techniques, statistical analysis of simulation values, inspection of measured data, planning and evaluation of simulation experiments, special random processes, possibilities and limits of model creation and simulation, advanced concepts and techniques, practical execution of simulation projects.","The students possess the methodic knowledge and the practical skills necessary for the stochastic simulation of (technical) systems, the evaluation of results and the correct assessment of the possibilities and limits of simu-lation methods.",V (4) + Ü (2),"written examination (approx. 60 to 120 minutes).If announced by the lecturer at the beginning of the course, the written examination may be replaced by an oral examination of one candidate each (approx. 20 minutes) or an oral examination in groups of 2 candidates (ap-prox. 15 minutes per candidate).Language of assessment: German and/or Englishcreditable for bonus",--,"Focuses available for students of the Masters programme Informatik (Computer Science, 120 ECTS credits):IT,IS,ES,GE",240 h,--,--
11
+ Advanced Programming,10-I=APR-182-m01,Institute of Computer Science,holder of the Chair of Computer Science II,5,numerical grade,1 semester,graduate,"With the knowledge of basic programming, taught in introductory lectures, it is possible to realize simpler pro-grams. If more complex problems are to be tackled, suboptimal results like long, incomprehensible functions and code duplicates occur. In this lecture, further knowledge is to be conveyed on how to give programs and co-de a sensible structure. Also, further topics in the areas of software security and parallel programming are dis-cussed.","Students learn advanced programming paradigms especially suited for space applications. Different patterns are then implemented in multiple languages and their efficiency measured using standard metrics. In addition, par-allel processing concepts are introduced culminating in the use of GPU architectures for extremely quick proces-sing.",V (2) + Ü (2)Module taught in: English,written examination (90 to 120 minutes)Language of assessment: Englishcreditable for bonus,--,--,150 h,--,--
12
+ Programming with neural nets,10-I=PNN-212-m01,Institute of Computer Science,holder of the Chair of Computer Science IX,5,numerical grade,1 semester,graduate,"Overview over NN, implementation of important NN-architectures like FCN, CNN and LSTMs, practical example for NN-architectures, among others in the area of image and language processing.","Knowledge about possible applications and limitations of NN, for important architectures (eg. FCN, CNN, LSTM) and how they are implemented in NN-tools like Tensorflow/Keras, ability to program network structures from lite-rature, to prepare data and solve concrete tasks for NN.",V (2) + Ü (2),"written examination (approx. 60 to 120 minutes).If announced by the lecturer at the beginning of the course, the written examination may be replaced by an oral examination of one candidate each (approx. 20 minutes) or an oral examination in groups of 2 candidates (ap-prox. 15 minutes per candidate).creditable for bonusLanguage of assessment: German and/or English",--,"Focuses available for students of the Masters programme Informatik (Computer Science, 120 ECTS credits): IT,KI,HCI,GE",150 h,--,--
13
+ NLP and Text Mining,10-I=STM-162-m01,Institute of Computer Science,holder of the Chair of Computer Science VI,5,numerical grade,1 semester,graduate,"Foundations in the following areas: definition of NLP and text mining, properties of text, sentence boundary de-tection, tokenisation, collocation, N-gram models, morphology, hidden Markov models for tagging, probabili-stic parsing, word sense disambiguation, term extraction methods, information extraction, sentiment analysis. The students possess theoretical and practical knowledge about typical methods and algorithms in the area of text mining and language processing mostly for English. They are able to solve problems through the methods taught. They have gained experience in the application of text mining algorithms.",The students possess theoretical and practical knowledge about typical methods and algorithms in the area of text mining and language processing. They are able to solve practical problems with the methods acquired in class. They have gained experience in the application of text mining algorithms.,V (2) + Ü (2),"written examination (approx. 60 to 120 minutes).If announced by the lecturer at the beginning of the course, the written examination may be replaced by an oral examination of one candidate each (approx. 20 minutes) or an oral examination in groups of 2 candidates (ap-prox. 15 minutes per candidate).Language of assessment: German and/or English",--,"Focuses available for students of the Masters programme Informatik (Computer Science, 120 ECTS credits): AT, IT, HCI.",150 h,--,--
14
+ Systems Benchmarking,10-I=SB-212-m01,Institute of Computer Science,holder of the Chair of Computer Science IX,5,numerical grade,1 semester,nan,--,--,V (2) + Ü (2),"written examination (approx. 60 to 120 minutes).If announced by the lecturer at the beginning of the course, the written examination may be replaced by an oral examination of one candidate each (approx. 20 minutes) or an oral examination in groups of 2 candidates (ap-prox. 15 minutes per candidate).creditable for bonusLanguage of assessment: German and/or English",--,"Focuses available for students of the Masters programme Informatik (Computer Science, 120 ECTS credits):SE,IT,ES,HCI,GE",150 h,--,--
15
+ Computer Vision,10-xtAI=CV-202-m01,Institute of Computer Science,Dean of Studies Informatik (Computer Science),5,numerical grade,1 semester,graduate,"The lecture provides knowledge about current methods and algorithms in the field of computer vision. Important basics as well as the most recent approaches to image representation, image processing and image analysis are taught. Actual models and methods of machine learning as well as their technical backgrounds are presented and their respective applications in image processing are shown.",Students have fundamental knowledge of problems and techniques in the field of computer vision and are able to independently identify and apply suitable methods for concrete problems.,V (2) + Ü (2)Module taught in: English,"Written examination (approx. 60 to 120 minutes)If announced by the lecturer at the beginning of the course, the written examination may be replaced by an oral examination of one candidate each (approx. 20 minutes) or an oral examination in groups of 2 candidates (ap-prox. 15 minutes per candidate).Language of assessment: EnglishCreditable for bonus",--,--,150 h,--,--
16
+ Image Processing and Computational Photography,10-I=IP-222-m01,Institute of Computer Science,nan,5,numerical grade,1 semester,nan,--,--,V (2) + Ü (2)Module taught in: English,"written examination (approx. 60 to 120 minutes)If announced by the lecturer at the beginning of the course, the written examination may be replaced by an oral examination of one candidate each (approx. 20 minutes) or an oral examination in groups of 2 candidates (ap-prox. 15 minutes per candidate).Language of assessment: EnglishCreditable for bonus",--,--,150 h,--,--
04_finetuning_approaches/MS_IS_all_modules_orginal_15_rows_cleaned.xlsx ADDED
Binary file (10.9 kB). View file
 
04_finetuning_approaches/MS_IS_all_modules_orginal_to_clean.xlsx ADDED
Binary file (62.5 kB). View file
 
04_finetuning_approaches/MS_IS_all_modules_orginal_to_clean_cleaned.csv ADDED
The diff for this file is too large to render. See raw diff
 
04_finetuning_approaches/MS_IS_all_modules_orginal_to_clean_cleaned.xlsx ADDED
Binary file (48.4 kB). View file
 
04_finetuning_approaches/finetuned_sqa_tryout.ipynb ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 31,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "from transformers import pipeline\n",
10
+ "import pandas as pd\n",
11
+ "import torch\n",
12
+ "\n",
13
+ "\n",
14
+ "table = pd.read_excel(\"MS_IS_50_modules_cleaned.xlsx\")\n",
15
+ "table = table.astype(str)\n",
16
+ "\n",
17
+ "tqa = pipeline(task=\"table-question-answering\", model=\"google/tapas-large-finetuned-wtq\")"
18
+ ]
19
+ },
20
+ {
21
+ "cell_type": "code",
22
+ "execution_count": 33,
23
+ "metadata": {},
24
+ "outputs": [
25
+ {
26
+ "name": "stdout",
27
+ "output_type": "stream",
28
+ "text": [
29
+ "['Programming with neural nets']\n"
30
+ ]
31
+ }
32
+ ],
33
+ "source": [
34
+ "question = \"Which modules titles are about programming?\"\n",
35
+ "c = tqa(table, query=question)['cells']\n",
36
+ "\n",
37
+ "print(c)"
38
+ ]
39
+ }
40
+ ],
41
+ "metadata": {
42
+ "kernelspec": {
43
+ "display_name": "py38",
44
+ "language": "python",
45
+ "name": "python3"
46
+ },
47
+ "language_info": {
48
+ "codemirror_mode": {
49
+ "name": "ipython",
50
+ "version": 3
51
+ },
52
+ "file_extension": ".py",
53
+ "mimetype": "text/x-python",
54
+ "name": "python",
55
+ "nbconvert_exporter": "python",
56
+ "pygments_lexer": "ipython3",
57
+ "version": "3.8.16"
58
+ },
59
+ "orig_nbformat": 4
60
+ },
61
+ "nbformat": 4,
62
+ "nbformat_minor": 2
63
+ }
04_finetuning_approaches/generate_training_question.ipynb ADDED
@@ -0,0 +1,786 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 18,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stdout",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "There are 0 duplicated abbreviations.\n"
13
+ ]
14
+ },
15
+ {
16
+ "data": {
17
+ "text/plain": [
18
+ "'CSV and Excel files saved at felix_playground_SQA_Training/MS_IS_all_modules_cleaned.csv and felix_playground_SQA_Training/MS_IS_all_modules_cleaned.xlsx respectively.'"
19
+ ]
20
+ },
21
+ "execution_count": 18,
22
+ "metadata": {},
23
+ "output_type": "execute_result"
24
+ }
25
+ ],
26
+ "source": [
27
+ "import pandas as pd\n",
28
+ "\n",
29
+ "def clean_and_save(file_path):\n",
30
+ " df = pd.read_excel(file_path)\n",
31
+ " df = df.astype(str)\n",
32
+ " df = df.replace('\\n', '', regex=True)\n",
33
+ " df = df.replace(\"'\", '\"')\n",
34
+ " df = df.replace(\"'\", \"\", regex=True)\n",
35
+ " df = df.apply(lambda x: x.str.strip() if x.dtype == \"object\" else x)\n",
36
+ " df = df.drop_duplicates()\n",
37
+ " csv_file_path = file_path.replace(\".xlsx\", \"_cleaned.csv\")\n",
38
+ " df.to_csv(csv_file_path, index=False)\n",
39
+ " excel_file_path = file_path.replace(\".xlsx\", \"_cleaned.xlsx\")\n",
40
+ " print(f\"There are {df.duplicated(subset=['Abbreviation']).sum()} duplicated abbreviations.\")\n",
41
+ " df = df.drop_duplicates(subset=['Abbreviation'], keep='first')\n",
42
+ " df.to_excel(excel_file_path, index=False)\n",
43
+ " return f\"CSV and Excel files saved at {csv_file_path} and {excel_file_path} respectively.\"\n",
44
+ "\n",
45
+ "clean_and_save(\"felix_playground_SQA_Training/MS_IS_all_modules.xlsx\")"
46
+ ]
47
+ },
48
+ {
49
+ "cell_type": "code",
50
+ "execution_count": 1,
51
+ "metadata": {},
52
+ "outputs": [
53
+ {
54
+ "ename": "NameError",
55
+ "evalue": "name 'pd' is not defined",
56
+ "output_type": "error",
57
+ "traceback": [
58
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
59
+ "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
60
+ "Cell \u001b[1;32mIn[1], line 29\u001b[0m\n\u001b[0;32m 25\u001b[0m training_data\u001b[39m.\u001b[39mto_excel(\u001b[39m'\u001b[39m\u001b[39mfelix_playground_SQA_Training/module_guide_sq_abbreviation.xlsx\u001b[39m\u001b[39m'\u001b[39m, index\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m)\n\u001b[0;32m 27\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mTraining data created and saved as \u001b[39m\u001b[39m'\u001b[39m\u001b[39mtraining_data.xlsx\u001b[39m\u001b[39m'\u001b[39m\u001b[39m.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m---> 29\u001b[0m create_training_data_abbreviation(\u001b[39m\"\u001b[39;49m\u001b[39mfelix_playground_SQA_Training/MS_IS_all_modules_cleaned.xlsx\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n",
61
+ "Cell \u001b[1;32mIn[1], line 5\u001b[0m, in \u001b[0;36mcreate_training_data_abbreviation\u001b[1;34m(file_path)\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mcreate_training_data_abbreviation\u001b[39m(file_path):\n\u001b[0;32m 4\u001b[0m \u001b[39m# Read the cleaned excel file\u001b[39;00m\n\u001b[1;32m----> 5\u001b[0m df \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39mread_excel(file_path)\n\u001b[0;32m 7\u001b[0m \u001b[39m# Create a new dataframe for training data\u001b[39;00m\n\u001b[0;32m 8\u001b[0m training_data \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39mDataFrame(columns\u001b[39m=\u001b[39m[\u001b[39m'\u001b[39m\u001b[39mid\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mannotator\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mposition\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mquestion\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mtable_file\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39manswer_coordinates\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39manswer_text\u001b[39m\u001b[39m'\u001b[39m])\n",
62
+ "\u001b[1;31mNameError\u001b[0m: name 'pd' is not defined"
63
+ ]
64
+ }
65
+ ],
66
+ "source": [
67
+ "import random\n",
68
+ "\n",
69
+ "def create_training_data_abbreviation(file_path):\n",
70
+ " # Read the cleaned excel file\n",
71
+ " df = pd.read_excel(file_path)\n",
72
+ " \n",
73
+ " # Create a new dataframe for training data\n",
74
+ " training_data = pd.DataFrame(columns=['id', 'annotator', 'position', 'question', 'table_file', 'answer_coordinates', 'answer_text'])\n",
75
+ " \n",
76
+ " # Define a list of possible question formulations\n",
77
+ " \n",
78
+ " \n",
79
+ " for i, row in df.iterrows():\n",
80
+ " new_row = {'id': f'ms-is-01', 'annotator': 0, 'position': 0, 'question': '', 'table_file': '', 'answer_coordinates': '', 'answer_text': ''}\n",
81
+ " question_formulations = [ f\"What is the abbreviation of {row['Module title']}?\",f\"What is the code for {row['Module title']}?\",f\"What is the ID of {row['Module title']}?\",f\"What is the abbreviation of the module {row['Module title']}?\"]\n",
82
+ " question = random.choice(question_formulations).format(row=row)\n",
83
+ " new_row['question'] = question\n",
84
+ " table_file = f\"felix_playground_SQA_Training/MS_IS_all_modules_cleaned.csv\"\n",
85
+ " new_row['table_file'] = table_file\n",
86
+ " answer_coordinates = f\"['({i}, {df.columns.get_loc('Abbreviation')})']\"\n",
87
+ " new_row['answer_coordinates'] = answer_coordinates\n",
88
+ " answer_text = f\"['{row['Abbreviation']}']\"\n",
89
+ " new_row['answer_text'] = answer_text\n",
90
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
91
+ " training_data.to_excel('felix_playground_SQA_Training/module_guide_sq_abbreviation.xlsx', index=False)\n",
92
+ " \n",
93
+ " return \"Training data created and saved as 'training_data.xlsx'.\"\n",
94
+ "\n",
95
+ "create_training_data_abbreviation(\"felix_playground_SQA_Training/MS_IS_all_modules_cleaned.xlsx\")\n"
96
+ ]
97
+ },
98
+ {
99
+ "cell_type": "code",
100
+ "execution_count": 15,
101
+ "metadata": {},
102
+ "outputs": [
103
+ {
104
+ "name": "stderr",
105
+ "output_type": "stream",
106
+ "text": [
107
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
108
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
109
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
110
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
111
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
112
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
113
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
114
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
115
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
116
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
117
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
118
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
119
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
120
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
121
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
122
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
123
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
124
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
125
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
126
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
127
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
128
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
129
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
130
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
131
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
132
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
133
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
134
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
135
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
136
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
137
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
138
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
139
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
140
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
141
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
142
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
143
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
144
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
145
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
146
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
147
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
148
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
149
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
150
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
151
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
152
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
153
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
154
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
155
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
156
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
157
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
158
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
159
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
160
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
161
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
162
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
163
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
164
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
165
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
166
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
167
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
168
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
169
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
170
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
171
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
172
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
173
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
174
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
175
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
176
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
177
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
178
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
179
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
180
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
181
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
182
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
183
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
184
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
185
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
186
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
187
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
188
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
189
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
190
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
191
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
192
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
193
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
194
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
195
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
196
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
197
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
198
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
199
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
200
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
201
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
202
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
203
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
204
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
205
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
206
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
207
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
208
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
209
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
210
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
211
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
212
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
213
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
214
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
215
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
216
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
217
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
218
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
219
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
220
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
221
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
222
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
223
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
224
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
225
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
226
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
227
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
228
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
229
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
230
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
231
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
232
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
233
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
234
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
235
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
236
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
237
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
238
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
239
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
240
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
241
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
242
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
243
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
244
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
245
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
246
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
247
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
248
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
249
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
250
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
251
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
252
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
253
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
254
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
255
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
256
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
257
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
258
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
259
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
260
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
261
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
262
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
263
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
264
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
265
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
266
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
267
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
268
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
269
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
270
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
271
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
272
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
273
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
274
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
275
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
276
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
277
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
278
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
279
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
280
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
281
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
282
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
283
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
284
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
285
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
286
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
287
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
288
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
289
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
290
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
291
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
292
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
293
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
294
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
295
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
296
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
297
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
298
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
299
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
300
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
301
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
302
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
303
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
304
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
305
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
306
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
307
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
308
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
309
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
310
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
311
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
312
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
313
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
314
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
315
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
316
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
317
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
318
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
319
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
320
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
321
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
322
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
323
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
324
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
325
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
326
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
327
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
328
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
329
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
330
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
331
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
332
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
333
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
334
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
335
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
336
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
337
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
338
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
339
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
340
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
341
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
342
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
343
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
344
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
345
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\1598320022.py:55: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
346
+ " training_data = training_data.append(new_row, ignore_index=True)\n"
347
+ ]
348
+ },
349
+ {
350
+ "data": {
351
+ "text/plain": [
352
+ "\"Training data created and saved as 'questions_content.xlsx'.\""
353
+ ]
354
+ },
355
+ "execution_count": 15,
356
+ "metadata": {},
357
+ "output_type": "execute_result"
358
+ }
359
+ ],
360
+ "source": [
361
+ "def create_training_data_content(file_path):\n",
362
+ " # Read the cleaned excel file\n",
363
+ " df = pd.read_excel(file_path)\n",
364
+ "\n",
365
+ " # Create a new dataframe for training data\n",
366
+ " training_data = pd.DataFrame(\n",
367
+ " columns=[\n",
368
+ " \"id\",\n",
369
+ " \"annotator\",\n",
370
+ " \"position\",\n",
371
+ " \"question\",\n",
372
+ " \"table_file\",\n",
373
+ " \"answer_coordinates\",\n",
374
+ " \"answer_text\",\n",
375
+ " ]\n",
376
+ " )\n",
377
+ "\n",
378
+ " # Define a list of possible question formulations\n",
379
+ " for i, row in df.iterrows():\n",
380
+ " # Create a new row for the training data\n",
381
+ " new_row = {\n",
382
+ " \"id\": f\"ms-is-01\",\n",
383
+ " \"annotator\": 0,\n",
384
+ " \"position\": 0,\n",
385
+ " \"question\": \"\",\n",
386
+ " \"table_file\": \"\",\n",
387
+ " \"answer_coordinates\": \"\",\n",
388
+ " \"answer_text\": \"\",\n",
389
+ " }\n",
390
+ "\n",
391
+ " question_formulations = [\n",
392
+ " f\"What is the content of {row['Module title']}?\",\n",
393
+ " f\"What is the description of {row['Module title']}?\",\n",
394
+ " f\"What is {row['Module title']} about?\",\n",
395
+ " f\"Give me the content of {row['Module title']}?\",\n",
396
+ " f\"Give me the content of the module {row['Module title']}? \",\n",
397
+ " ]\n",
398
+ "\n",
399
+ " question = random.choice(question_formulations).format(row=row)\n",
400
+ " new_row[\"question\"] = question\n",
401
+ "\n",
402
+ " # Set the table file\n",
403
+ " table_file = f\"felix_playground_SQA_Training/MS_IS_all_modules_cleaned.csv\"\n",
404
+ " new_row[\"table_file\"] = table_file\n",
405
+ "\n",
406
+ " # Set the answer coordinates\n",
407
+ " answer_coordinates = f\"['({i}, {df.columns.get_loc('Contents')})']\"\n",
408
+ " new_row[\"answer_coordinates\"] = answer_coordinates\n",
409
+ "\n",
410
+ " # Set the answer text\n",
411
+ " answer_text = f\"['{row['Contents']}']\"\n",
412
+ " new_row[\"answer_text\"] = answer_text\n",
413
+ "\n",
414
+ " # Append the new row to the training data\n",
415
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
416
+ "\n",
417
+ " # Save the training data as an excel file\n",
418
+ " training_data.to_excel(\"felix_playground_SQA_Training/module_guide_sqa_contents.xlsx\", index=False)\n",
419
+ "\n",
420
+ " return \"Training data created and saved as 'questions_content.xlsx'.\"\n",
421
+ "\n",
422
+ "create_training_data_content(\"felix_playground_SQA_Training/MS_IS_all_modules_cleaned.xlsx\")"
423
+ ]
424
+ },
425
+ {
426
+ "cell_type": "code",
427
+ "execution_count": 17,
428
+ "metadata": {},
429
+ "outputs": [
430
+ {
431
+ "name": "stderr",
432
+ "output_type": "stream",
433
+ "text": [
434
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
435
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
436
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
437
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
438
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
439
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
440
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
441
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
442
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
443
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
444
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
445
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
446
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
447
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
448
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
449
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
450
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
451
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
452
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
453
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
454
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
455
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
456
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
457
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
458
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
459
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
460
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
461
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
462
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
463
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
464
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
465
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
466
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
467
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
468
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
469
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
470
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
471
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
472
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
473
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
474
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
475
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
476
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
477
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
478
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
479
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
480
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
481
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
482
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
483
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
484
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
485
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
486
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
487
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
488
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
489
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
490
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
491
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
492
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
493
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
494
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
495
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
496
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
497
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
498
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
499
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
500
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
501
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
502
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
503
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
504
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
505
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
506
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
507
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
508
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
509
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
510
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
511
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
512
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
513
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
514
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
515
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
516
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
517
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
518
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
519
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
520
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
521
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
522
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
523
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
524
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
525
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
526
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
527
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
528
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
529
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
530
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
531
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
532
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
533
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
534
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
535
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
536
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
537
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
538
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
539
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
540
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
541
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
542
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
543
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
544
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
545
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
546
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
547
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
548
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
549
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
550
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
551
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
552
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
553
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
554
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
555
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
556
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
557
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
558
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
559
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
560
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
561
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
562
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
563
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
564
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
565
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
566
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
567
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
568
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
569
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
570
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
571
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
572
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
573
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
574
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
575
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
576
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
577
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
578
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
579
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
580
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
581
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
582
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
583
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
584
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
585
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
586
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
587
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
588
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
589
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
590
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
591
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
592
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
593
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
594
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
595
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
596
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
597
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
598
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
599
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
600
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
601
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
602
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
603
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
604
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
605
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
606
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
607
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
608
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
609
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
610
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
611
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
612
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
613
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
614
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
615
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
616
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
617
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
618
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
619
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
620
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
621
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
622
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
623
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
624
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
625
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
626
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
627
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
628
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
629
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
630
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
631
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
632
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
633
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
634
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
635
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
636
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
637
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
638
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
639
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
640
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
641
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
642
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
643
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
644
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
645
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
646
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
647
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
648
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
649
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
650
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
651
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
652
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
653
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
654
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
655
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
656
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
657
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
658
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
659
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
660
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
661
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
662
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
663
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
664
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
665
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
666
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
667
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
668
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
669
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
670
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
671
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
672
+ "C:\\Users\\FelixNeubauer\\AppData\\Local\\Temp\\ipykernel_19396\\3422203238.py:58: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
673
+ " training_data = training_data.append(new_row, ignore_index=True)\n"
674
+ ]
675
+ },
676
+ {
677
+ "data": {
678
+ "text/plain": [
679
+ "\"Training data created and saved as 'questions_ETCS.xlsx'.\""
680
+ ]
681
+ },
682
+ "execution_count": 17,
683
+ "metadata": {},
684
+ "output_type": "execute_result"
685
+ }
686
+ ],
687
+ "source": [
688
+ "def create_training_data_ETCS(file_path):\n",
689
+ " # Read the cleaned excel file\n",
690
+ " df = pd.read_excel(file_path)\n",
691
+ "\n",
692
+ " # Create a new dataframe for training data\n",
693
+ " training_data = pd.DataFrame(\n",
694
+ " columns=[\n",
695
+ " \"id\",\n",
696
+ " \"annotator\",\n",
697
+ " \"position\",\n",
698
+ " \"question\",\n",
699
+ " \"table_file\",\n",
700
+ " \"answer_coordinates\",\n",
701
+ " \"answer_text\",\n",
702
+ " ]\n",
703
+ " )\n",
704
+ "\n",
705
+ " # Define a list of possible question formulations\n",
706
+ " for i, row in df.iterrows():\n",
707
+ " # Create a new row for the training data\n",
708
+ " new_row = {\n",
709
+ " \"id\": f\"ms-is-01\",\n",
710
+ " \"annotator\": 0,\n",
711
+ " \"position\": 0,\n",
712
+ " \"question\": \"\",\n",
713
+ " \"table_file\": \"\",\n",
714
+ " \"answer_coordinates\": \"\",\n",
715
+ " \"answer_text\": \"\",\n",
716
+ " }\n",
717
+ " \n",
718
+ " question_formulations = [\n",
719
+ " f\"How many credits does {row['Module title']} have?\",\n",
720
+ " f\"What is the credit value of {row['Module title']}?\",\n",
721
+ " f\"how many credits do I get for {row['Module title']}?\",\n",
722
+ " f\"How many etcs has {row['Module title']}?\",\n",
723
+ " f\"how many credits has {row['Module title']} have?\",\n",
724
+ " f\"hw many etcs does {row['Module title']} have?\",\n",
725
+ " f\"Give me the amount of ects for {row['Module title']}?\",\n",
726
+ " f\"give me the ects points of module {row['Module title']}? \",\n",
727
+ " ]\n",
728
+ "\n",
729
+ " question = random.choice(question_formulations).format(row=row)\n",
730
+ " new_row[\"question\"] = question\n",
731
+ "\n",
732
+ " # Set the table file\n",
733
+ " table_file = f\"felix_playground_SQA_Training/MS_IS_all_modules_cleaned.csv\"\n",
734
+ " new_row[\"table_file\"] = table_file\n",
735
+ "\n",
736
+ " # Set the answer coordinates\n",
737
+ " answer_coordinates = f\"['({i}, {df.columns.get_loc('ETCS')})']\"\n",
738
+ " new_row[\"answer_coordinates\"] = answer_coordinates\n",
739
+ "\n",
740
+ " # Set the answer text\n",
741
+ " answer_text = f\"['{row['ETCS']}']\"\n",
742
+ " new_row[\"answer_text\"] = answer_text\n",
743
+ "\n",
744
+ " # Append the new row to the training data\n",
745
+ " training_data = training_data.append(new_row, ignore_index=True)\n",
746
+ "\n",
747
+ " # Save the training data as an excel file\n",
748
+ " training_data.to_excel(\"felix_playground_SQA_Training/module_guide_sqa_etcs.xlsx\", index=False)\n",
749
+ "\n",
750
+ " return \"Training data created and saved as 'questions_ETCS.xlsx'.\"\n",
751
+ "\n",
752
+ "\n",
753
+ "create_training_data_ETCS(\"felix_playground_SQA_Training/MS_IS_all_modules_cleaned.xlsx\")\n"
754
+ ]
755
+ },
756
+ {
757
+ "cell_type": "code",
758
+ "execution_count": null,
759
+ "metadata": {},
760
+ "outputs": [],
761
+ "source": []
762
+ }
763
+ ],
764
+ "metadata": {
765
+ "kernelspec": {
766
+ "display_name": "enterpriseai2",
767
+ "language": "python",
768
+ "name": "python3"
769
+ },
770
+ "language_info": {
771
+ "codemirror_mode": {
772
+ "name": "ipython",
773
+ "version": 3
774
+ },
775
+ "file_extension": ".py",
776
+ "mimetype": "text/x-python",
777
+ "name": "python",
778
+ "nbconvert_exporter": "python",
779
+ "pygments_lexer": "ipython3",
780
+ "version": "3.8.16"
781
+ },
782
+ "orig_nbformat": 4
783
+ },
784
+ "nbformat": 4,
785
+ "nbformat_minor": 2
786
+ }
04_finetuning_approaches/module_guide_sq_30_questions.xlsx ADDED
Binary file (12.7 kB). View file
 
04_finetuning_approaches/module_guide_sqa.xlsx ADDED
Binary file (49.3 kB). View file
 
04_finetuning_approaches/module_guide_sqa_contents.xlsx ADDED
Binary file (30.7 kB). View file
 
04_finetuning_approaches/module_guide_sqa_etcs.xlsx ADDED
Binary file (11 kB). View file
 
04_finetuning_approaches/qa_catalog.xlsx ADDED
Binary file (13.2 kB). View file
 
04_finetuning_approaches/sqa_train_set_28_examples.xlsx ADDED
Binary file (11.8 kB). View file
 
0915NC_Studienplaetze.jpg ADDED
09_archive_and_discarded_approaches/MS_IS_all_modules.csv ADDED
The diff for this file is too large to render. See raw diff