JavedA commited on
Commit
a67ae61
1 Parent(s): 4543d6a
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. .gitignore +11 -0
  3. Code/Visuals/Plotly/plt_0.py +47 -0
  4. Code/Visuals/Plotly/plt_1.py +106 -0
  5. Code/file_Extensions.py +16 -0
  6. Code/gls/gls_Main.lua +229 -0
  7. Code/math_Dollar_White_Space.py +51 -0
  8. Data/0_Latex_True/0_Deco/0_Frontpage.tex +81 -0
  9. Data/0_Latex_True/0_Deco/10_Appendix.tex +216 -0
  10. Data/0_Latex_True/0_Deco/1_erkl.tex +23 -0
  11. Data/0_Latex_True/0_Deco/2_1_Abstract.tex +16 -0
  12. Data/0_Latex_True/0_Deco/2_Thanks.tex +10 -0
  13. Data/0_Latex_True/0_Deco/3_Used_Abbrev.log +326 -0
  14. Data/0_Latex_True/0_Deco/3_Used_Abbrev.tex +36 -0
  15. Data/0_Latex_True/1_Task/1_Introduction.tex +97 -0
  16. Data/0_Latex_True/1_Task/2_State_Of_Art.tex +98 -0
  17. Data/0_Latex_True/1_Task/3_CNM.tex +198 -0
  18. Data/0_Latex_True/1_Task/4_CNMc.tex +114 -0
  19. Data/0_Latex_True/2_Task/0_Methodlogy.tex +189 -0
  20. Data/0_Latex_True/2_Task/1_Data_Gen.tex +263 -0
  21. Data/0_Latex_True/2_Task/2_Clustering.tex +273 -0
  22. Data/0_Latex_True/2_Task/3_Tracking.tex +107 -0
  23. Data/0_Latex_True/2_Task/4_Track_Workflow.tex +131 -0
  24. Data/0_Latex_True/2_Task/5_Track_Validity.tex +177 -0
  25. Data/0_Latex_True/2_Task/6_Modeling.tex +157 -0
  26. Data/0_Latex_True/2_Task/7_QT.tex +149 -0
  27. Data/0_Latex_True/3_Task/0_Results.tex +40 -0
  28. Data/0_Latex_True/3_Task/1_Track_Results.tex +248 -0
  29. Data/0_Latex_True/3_Task/2_Mod_CPE.tex +228 -0
  30. Data/0_Latex_True/3_Task/3_SVD_NMF.tex +314 -0
  31. Data/0_Latex_True/3_Task/4_SVD_Regression.tex +220 -0
  32. Data/0_Latex_True/3_Task/5_Pred.tex +27 -0
  33. Data/0_Latex_True/3_Task/6_SLS.tex +107 -0
  34. Data/0_Latex_True/3_Task/7_Models.tex +320 -0
  35. Data/0_Latex_True/4_Task/1_Concl.tex +37 -0
  36. Data/0_Latex_True/4_Task/2_Zusammen_Deutsch.tex +37 -0
  37. Data/10_Law/license_Hippocratic +51 -0
  38. Data/1_Writing/0_Deco/0_Frontpage.qmd +80 -0
  39. Data/1_Writing/0_Deco/1_Erkl.qmd +22 -0
  40. Data/1_Writing/0_Deco/2_1_Abstract.qmd +17 -0
  41. Data/1_Writing/0_Deco/2_Thanks.qmd +10 -0
  42. Data/1_Writing/0_Deco/3_Used_Abbrev.log +326 -0
  43. Data/1_Writing/0_Deco/3_Used_Abbrev.qmd +89 -0
  44. Data/1_Writing/1_Task/1_Introduction.qmd +20 -0
  45. Data/1_Writing/1_Task/2_0_Motivation.qmd +71 -0
  46. Data/1_Writing/1_Task/2_State_Of_Art.qmd +104 -0
  47. Data/1_Writing/1_Task/3_CNM.qmd +203 -0
  48. Data/1_Writing/1_Task/4_CNMc.qmd +108 -0
  49. Data/1_Writing/2_Task/0_Methodlogy.qmd +60 -0
  50. Data/1_Writing/2_Task/1_0_CNMC_Data.qmd +122 -0
.gitattributes CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ *.pdf filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _book/
2
+ .quarto/
3
+ .vscode/
4
+ _extensions/
5
+ /.quarto/
6
+ _.site/
7
+
8
+ /.luarc.json
9
+ *.pdf
10
+ *.svg
11
+ *.npz
Code/Visuals/Plotly/plt_0.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import plotly.graph_objects as go
2
+ import numpy as np
3
+
4
+ # Helix equation
5
+ t = np.linspace(0, 10, 50)
6
+ x, y, z = np.cos(t), np.sin(t), t
7
+
8
+ fig = go.Figure(data=[go.Scatter3d(x=x, y=y, z=z,
9
+ mode='markers')])
10
+
11
+ fig.update_layout(
12
+ title= "title",
13
+ title_x=0.5,
14
+
15
+ # plotlyexpress 3d axes:
16
+ scene = dict(
17
+ xaxis = dict(
18
+ showbackground = False,
19
+ showticklabels = False,
20
+ title='',
21
+ showgrid = False,
22
+ zeroline = False,),
23
+ yaxis = dict(
24
+ showbackground = False,
25
+ showticklabels = False,
26
+ title='',
27
+ showgrid = False,
28
+ zeroline = False,),
29
+ zaxis = dict(
30
+ showbackground = False,
31
+ showticklabels = False,
32
+ title='',
33
+ showgrid = False,
34
+ zeroline = False,
35
+ ),
36
+ ),
37
+ template= 'plotly_dark'
38
+ # template= 'plotly'
39
+ )
40
+
41
+ fig.write_html("./test.html",
42
+ div_id= "plt_Div",
43
+ include_plotlyjs = "cdn")
44
+
45
+ div_Rep = fig.to_html(div_id= "plt_Div",
46
+ include_plotlyjs = "cdn")
47
+ print("debug-stop")
Code/Visuals/Plotly/plt_1.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import plotly.io as pio
3
+ pio.renderers.default = "plotly_mimetype+notebook_connected"
4
+
5
+ import plotly.graph_objects as go # to combine figures
6
+
7
+ # load data from the numpy npz file
8
+ data = np.load('Data/6_Html_Data/0_Viz/plt_Dat_16.78.npz')
9
+
10
+ # extraxt the data - load it
11
+ x_Traj = data["x"]
12
+ y_Traj = data["y"]
13
+ z_Traj = data["z"]
14
+ x_Cone = data["x_Cone"]
15
+ y_Cone = data["y_Cone"]
16
+ z_Cone = data["z_Cone"]
17
+ u_Cone = data["u_Cone"]
18
+ v_Cone = data["v_Cone"]
19
+ w_Cone = data["w_Cone"]
20
+
21
+ # The trajectory
22
+ fig = go.Figure(data=[go.Scatter3d(
23
+ x= x_Traj,
24
+ y= y_Traj,
25
+ z= z_Traj,
26
+ name = "Trajectory",
27
+ showlegend = False,
28
+ )])
29
+
30
+
31
+ fig.update_traces(marker_size = 2,
32
+ mode = "lines",
33
+ marker_color ="green")
34
+
35
+
36
+ # Cones
37
+ fig_Cones = go.Figure(data=go.Cone( x = x_Cone ,
38
+ y = y_Cone ,
39
+ z = z_Cone ,
40
+ u = u_Cone ,
41
+ v = v_Cone ,
42
+ w = w_Cone ,
43
+ name = "Direction",
44
+ showlegend = False,
45
+
46
+ )
47
+ )
48
+
49
+
50
+ # hiding color-bar
51
+ fig_Cones.update_traces(showscale=False)
52
+
53
+ # combine cone and trajectory
54
+ fig.add_traces(data = fig_Cones.data)
55
+
56
+
57
+ # style the figure
58
+ fig.update_layout(
59
+ # plotlyexpress 3d axes:
60
+ scene = dict(
61
+ xaxis = dict(
62
+ showbackground = False,
63
+ showticklabels = False,
64
+ title='',
65
+ showgrid = False,
66
+ zeroline = False,),
67
+ yaxis = dict(
68
+ showbackground = False,
69
+ showticklabels = False,
70
+ title='',
71
+ showgrid = False,
72
+ zeroline = False,),
73
+ zaxis = dict(
74
+ showbackground = False,
75
+ showticklabels = False,
76
+ title='',
77
+ showgrid = False,
78
+ zeroline = False,
79
+ ),
80
+ ),
81
+ # template= 'plotly_dark'
82
+ # template= 'plotly'
83
+ paper_bgcolor='rgba(0,0,0,0)',
84
+ plot_bgcolor='rgba(0,0,0,0)',
85
+ modebar = dict(bgcolor='rgba(0, 0, 0, 0)'),
86
+ margin=dict(
87
+ l=0,
88
+ r=0,
89
+ b=0,
90
+ t=0,
91
+ pad=0
92
+ ),
93
+
94
+ scene_camera_eye=dict(x=1,
95
+ y=0,
96
+ z=0),
97
+ )
98
+
99
+
100
+ fig.write_html("./tornado.html",
101
+ div_id= "plt_Div",
102
+ include_plotlyjs = "cdn")
103
+
104
+ fig.write_image("Data/6_Html_Data/1_Logo_Img/2_Tornado.svg")
105
+
106
+ print("test")
Code/file_Extensions.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ convert all .tex extions to .md
3
+ '''
4
+
5
+ from pathlib import Path
6
+
7
+ cwd = Path.cwd();
8
+
9
+
10
+ writ_Fold = cwd/"Data/1_Writing"
11
+
12
+ for iF,ct_File in enumerate(Path(writ_Fold).rglob('*.md')):
13
+ # print(ct_File.name)
14
+
15
+ # convert all .tex extions to .md
16
+ ct_File.rename(ct_File.with_suffix('.qmd'))
Code/gls/gls_Main.lua ADDED
@@ -0,0 +1,229 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -------------------------------------------------------------------------- --
2
+ -- Does work searches for Hello and replaces it with foundyou
3
+ -- function Para(elem)
4
+ -- for i, item in ipairs(elem.content) do
5
+ -- if item.text == "Hello" then
6
+ -- elem.content[i].text = "foundyou"
7
+ -- end
8
+ -- end
9
+ -- return elem
10
+ -- end
11
+
12
+ -- ------------------------- read the header content ------------------------ --
13
+ -- Define the filter function
14
+ local function headerfilter(elem)
15
+ if elem.t == 'Header' then
16
+ local text = pandoc.utils.stringify(elem.content)
17
+ io.write('\27[32mCurrent header:', text ,'\n--------\27[0m\n')
18
+ end
19
+ return elem
20
+ end
21
+
22
+ -- Define the filter function
23
+ local function glsfilter_1(elem)
24
+ if elem.t == 'Para' then
25
+ -- local text = pandoc.utils.stringify(elem)
26
+ local text = elem.text
27
+
28
+ local pattern = '\\gls'
29
+ local match = text:match(pattern)
30
+ if match then
31
+ print("\27[32mMatch found:", match, '\27[0m\n')
32
+
33
+ print(text, '\n--------\n')
34
+
35
+ local link = pandoc.Link(match, '#' .. match)
36
+ -- return pandoc.Para{link}
37
+ return "replaced"
38
+ else
39
+ print("\27[31mNo match found\27[0m\n")
40
+ print(text, '\n--------\n')
41
+ end
42
+ end
43
+ return elem
44
+ end
45
+
46
+ gls_Dict = {
47
+ ode = {"<b>O</b>rdinary <b>D</b>ifferential <b>E</b>quation",
48
+ "ODE"},
49
+
50
+ cnm = { "<b>C</b>luster-based <b>N</b>etwork <b>M</b>odeling",
51
+ "CNM"},
52
+
53
+ cnmc = {"<b>c</b>ontrol-oriented <b>C</b>luster-based <b>N</b>etwork <b>M</b>odeling",
54
+ "CNMc"},
55
+
56
+ cmm = { "<b>C</b>luster <b>M</b>arkov-based <b>M</b>odeling",
57
+ "CMM"},
58
+
59
+ cfd = {"<b>C</b>omputational <b>F</b>luid <b>D</b>ynamics",
60
+ "CFD"},
61
+
62
+ rans = {"<b>R</b>eynolds <b>A</b>veraged <b>N</b>avier <b>S</b>tockes",
63
+ "RANS"},
64
+
65
+ dlr = {"German Aerospace Center",
66
+ "DLR"},
67
+
68
+ gpu = {"<b>G</b>raphics <b>P</b>rocessing <b>U</b>nit",
69
+ "GPU"},
70
+
71
+ cpu = {"<b>C</b>omputer <b>P</b>rocessing <b>U</b>nit",
72
+ "CPU"},
73
+
74
+ sdic = {"<b>S</b>ensitive <b>D</b>ependence on <b>I</b>nitial <b>C</b>onditions",
75
+ "SDIC"},
76
+
77
+ nmf = {"<b>N</b>on-negative <b>M</b>atrix <b>F</b>actorization",
78
+ "NMF"},
79
+
80
+ svd = {"<b>S</b>ingular <b>V</b>alue <b>D</b>ecomposition",
81
+ "SVD"},
82
+
83
+ rf = {"<b>R</b>andom <b>F</b>orest",
84
+ "RF"},
85
+
86
+ cpd = {"<b>C</b>luster <b>P</b>robability <b>D</b>istribution",
87
+ "CPD"},
88
+
89
+ cpevol = {"<b>C</b>entroid <b>P</b>osition <b>E</b>volution",
90
+ "CPE"},
91
+
92
+ dtw = {"<b>D</b>ynamical <b>T</b>ime <b>W</b>arping",
93
+ "DTW"},
94
+
95
+ knn = {"<b>KN</b>earest <b>N</b>eighbor",
96
+ "KNN"},
97
+ }
98
+
99
+
100
+ -- -------------------------------------------------------------------------- --
101
+ local function headerfilter(elem)
102
+ if elem.t == 'Header' then
103
+ local text = pandoc.utils.stringify(elem.content)
104
+ io.write('\27[32mCurrent header:', text ,'\n--------\27[0m\n')
105
+ end
106
+ return elem
107
+ end
108
+
109
+
110
+ -- Define the filter function
111
+ local function glsfilter(elem)
112
+ if elem.t == 'Para' then
113
+ local has_match = false
114
+ -- Traverse the element tree and replace matched elements
115
+ local new_content = {}
116
+ for _, item in ipairs(elem.content) do
117
+
118
+ -- -------------------------------- gls ------------------------------- --
119
+ if item.t == 'RawInline' then
120
+ local gls_Pat = '\\gls{(%w+)}'
121
+ local gls_First_Pat = '\\glsfirst{(%w+)}'
122
+ local gls_Pl_Pat = '\\glspl{(%w+)}'
123
+
124
+ local text = item.text
125
+
126
+ -- will only show the latex \command{} content
127
+ -- print("current line is: ", text)
128
+
129
+ -- was tested with:
130
+ -- jav test: \gls{rans} \gls{gpu} \gls{rans} \gls{cfd} \gls{cfd}
131
+ -- it does replace each occurence correcly
132
+
133
+ local gls_Match = string.match(text,gls_Pat)
134
+ local gls_First_Match = string.match(text,gls_First_Pat)
135
+ local gls_Pl_Match = string.match(text,gls_Pl_Pat)
136
+
137
+ if gls_Match then
138
+
139
+ has_match = true
140
+ long_Term = gls_Dict[gls_Match][1]
141
+ bold_Abbrev = gls_Dict[gls_Match][2]
142
+
143
+ -- to make sure that the code is understood as valid html code it must be converted to RawInline with the html input
144
+ html_String = pandoc.RawInline('html',long_Term)
145
+
146
+ span_Var = pandoc.Span(html_String,
147
+ {class = 'gls_Content'})
148
+
149
+ -- print("span_Var: ",span_Var)
150
+ -- see: https://pandoc.org/lua-filters.html#pandoc.link
151
+ local link = pandoc.Link(
152
+ {bold_Abbrev,span_Var},
153
+ '../0_Deco/3_Used_Abbrev.qmd'..'#' .. gls_Match,
154
+ nil,
155
+ -- add id and class
156
+ {id = gls_Match.. "gls", class = 'gls'})
157
+
158
+ table.insert(new_content, link)
159
+ end
160
+
161
+ -- ------------------------ gls_First_Match ----------------------- --
162
+ if gls_First_Match then
163
+ has_match = true
164
+ -- to make sure that the code is understood as valid html code it must be converted to RawInline with the html input
165
+ long_Term = gls_Dict[gls_First_Match][1]
166
+ bold_Abbrev = gls_Dict[gls_First_Match][2]
167
+
168
+ -- to make sure that the code is understood as valid html code it must be converted to RawInline with the html input
169
+ html_String = pandoc.RawInline('html',long_Term.. " (".. bold_Abbrev .. ")")
170
+
171
+ local link = pandoc.Link(
172
+ html_String ,
173
+ '../0_Deco/3_Used_Abbrev.qmd'..'#' .. gls_First_Match,
174
+ nil,
175
+ -- add id and class
176
+ {id = gls_First_Match.. "gls", class = 'gls'})
177
+
178
+ table.insert(new_content, link)
179
+ end
180
+
181
+ -- ------------------------- gls_Pl_Match ------------------------- --
182
+ if gls_Pl_Match then
183
+ has_match = true
184
+ long_Term = gls_Dict[gls_Pl_Match][1]
185
+ bold_Abbrev = gls_Dict[gls_Pl_Match][2]
186
+
187
+ -- to make sure that the code is understood as valid html code it must be converted to RawInline with the html input
188
+ html_String = pandoc.RawInline('html',long_Term .. "s")
189
+
190
+ span_Var = pandoc.Span(html_String,
191
+ {class = 'gls_Content'})
192
+
193
+ -- see: https://pandoc.org/lua-filters.html#pandoc.link
194
+ local link = pandoc.Link(
195
+ {bold_Abbrev .. "s",span_Var},
196
+ '../0_Deco/3_Used_Abbrev.qmd'..'#' .. gls_Pl_Match,
197
+ nil,
198
+ -- add id and class
199
+ {id = gls_Pl_Match.. "gls", class = 'gls'})
200
+
201
+ table.insert(new_content, link)
202
+
203
+ else
204
+ -- Print non-matching text in red
205
+ -- io.write('\27[31mNo match found: ' .. text .. '\27[0m\n')
206
+ table.insert(new_content, item)
207
+ end
208
+ else
209
+ table.insert(new_content, item)
210
+ end
211
+ end
212
+ -- If no matches were found, return the original element
213
+ if not has_match then
214
+
215
+ -- print("No match found and return simply the regular element")
216
+ return elem
217
+ end
218
+
219
+ return pandoc.Para(new_content)
220
+ end
221
+ return elem
222
+ end
223
+
224
+
225
+ -- Export the filter function as a table
226
+ return {
227
+ {Header = headerfilter},
228
+ {Para = glsfilter}
229
+ }
Code/math_Dollar_White_Space.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+
3
+ the issue is about $ here comes somezhting eq1 termsi whitespace $
4
+ the whitespace must be removed for proper displaying
5
+
6
+ go through all qdms files and find the white spaces at the beginning and
7
+ at the end
8
+ '''
9
+
10
+ import pathlib
11
+ import re
12
+
13
+ # get current working directory
14
+ cwd = pathlib.Path().cwd()
15
+
16
+ # define output path
17
+ inp_Fold = cwd / "Data/1_Writing/"
18
+
19
+
20
+ # pat = r"\\$[^\\$]+\\$"
21
+ pat = r"\$[^\$]+\$"
22
+ pat_Start = r"(\s)\$"
23
+ pat_End = r"\$(\s)"
24
+
25
+ found_And_Replace= {}
26
+
27
+ # recursively looking into all folders --> subfolders are looked into as well
28
+ for iF,ct_File in enumerate(inp_Fold.rglob('*.qmd')):
29
+ # print(ct_File.name)
30
+
31
+ file_Content = []
32
+ with open(str(ct_File),"r") as file:
33
+ file_Content = file.readlines()
34
+
35
+ for il, line in enumerate(file_Content):
36
+ results = re.findall(pat, line)
37
+
38
+ # found all simmilar to ['$\\beta_i$'] --> make sure that start and end with empty space are both covered
39
+ for je, elem in enumerate(results):
40
+
41
+ # start with empty space
42
+ res_Start = re.findall(pat_Start, elem)
43
+ res_End = re.findall(pat_End, elem)
44
+
45
+ if len(res_Start) > 0 or len(res_End):
46
+ # print(f"found results in file: {ct_File}")
47
+ print(f"elem: {elem}")
48
+
49
+
50
+
51
+ print("done")
Data/0_Latex_True/0_Deco/0_Frontpage.tex ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ % \includegraphics[width=0.42\textwidth]{./2_Figures/TUBraunschweig_4C.pdf} &
3
+
4
+ \begin{center}
5
+ \begin{tabular}{p{\textwidth}}
6
+
7
+ \begin{minipage}{\textwidth}
8
+ % \centering
9
+ \includegraphics[width=0.4\textwidth]{./2_Figures/TUBraunschweig_4C.pdf}
10
+ \end{minipage}
11
+ % \begin{minipage}{0.5\textwidth}
12
+ % \centering
13
+ % \includegraphics[width=0.5\textwidth]{./2_Figures/0_Deco/dlr_Logo.jpeg}
14
+ % \end{minipage}
15
+
16
+
17
+ \vspace{1cm}
18
+
19
+ \\
20
+
21
+ \begin{center}
22
+ \large{\textsc{
23
+ Master thesis number: 486\\
24
+ }}
25
+ \end{center}
26
+
27
+ \begin{center}
28
+ \LARGE{\textsc{
29
+ Flow predictions using control-oriented cluster-based network modeling\\
30
+ }}
31
+ \end{center}
32
+
33
+ \\
34
+
35
+
36
+ \begin{center}
37
+ \large{Technische Universität Braunschweig \\
38
+ Institute of Fluid Mechanics
39
+ }
40
+ \end{center}
41
+
42
+
43
+ \begin{center}
44
+ \textbf{\Large{Master Thesis}}
45
+ \end{center}
46
+
47
+
48
+ \begin{center}
49
+ written by
50
+ \end{center}
51
+
52
+ \begin{center}
53
+ \large{\textbf{Javed Arshad Butt}} \\
54
+
55
+ \large{5027847} \\
56
+ \end{center}
57
+
58
+ \begin{center}
59
+ \large{born on 20.05.1996 in Gujrat}
60
+ \end{center}
61
+
62
+ \vspace{3cm}
63
+ \begin{center}
64
+ \begin{tabular}{lll}
65
+ \textbf{Submission date:} & & 29.04.2022\\
66
+ \textbf{Supervisor :} & & Dr. Richard Semaan \\
67
+ \textbf{Examiner :} & & Prof. Dr.-Ing. R. Radespiel\\
68
+
69
+
70
+ \end{tabular}
71
+ \end{center}
72
+
73
+ \end{tabular}
74
+ \end{center}
75
+ %Damit die erste Seite = Deckblatt nicht nummeriert wird.
76
+ \thispagestyle{empty}
77
+
78
+
79
+
80
+
81
+
Data/0_Latex_True/0_Deco/10_Appendix.tex ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ \appendix
2
+ \chapter{Further implemented dynamical systems}
3
+ \label{ch_Ap_Dyna}
4
+ \begin{enumerate}
5
+ \item \textbf{Chen} \cite{Chen1999}:
6
+ \begin{equation}
7
+ \label{eq_8_Chen}
8
+ \begin{aligned}
9
+ \dot x &= a\, (y - x) \\
10
+ \dot y &= x \,(\beta - a) - xz + \beta y \\
11
+ \dot z &= x y -b z
12
+ \end{aligned}
13
+ \end{equation}
14
+
15
+ \item \textbf{Lu} \cite{Lu2002}:
16
+ \begin{equation}
17
+ \label{eq_9_Lu}
18
+ \begin{aligned}
19
+ \dot x &= a \, (y -x) \\
20
+ \dot y &= \beta y -x z \\
21
+ \dot z &= x y - b z
22
+ \end{aligned}
23
+ \end{equation}
24
+
25
+ \item \textbf{Van der Pol} \cite{VanderPol}:
26
+ \begin{equation}
27
+ \label{eq_14_VDP}
28
+ \begin{aligned}
29
+ \dot x &= y \\
30
+ \dot y &= y \beta\,(1-x^2) -x
31
+ \end{aligned}
32
+ \end{equation}
33
+
34
+ \end{enumerate}
35
+
36
+ \chapter{Some basics about chaotic systems}
37
+ \label{ch_Ap_Chaotic}
38
+ Since
39
+ Chaotic systems are the height
40
+ of intricacy when considering dynamical systems.
41
+ The reason why the term intricacy was chosen
42
+ instead of complexity is that chaotic systems can be, but are not necessarily
43
+ complex. For the relation between complex and
44
+ chaotic the reader is referred to \cite{Rickles2007}.
45
+ The mentioned intricacy of chaotic systems shall be explained by
46
+ reviewing two reasons. First,
47
+ chaotic systems are sensitive to their initial conditions.
48
+ To understand this, imagine we want to solve an \gls{ode}. In order to solve any
49
+ differential
50
+ equation, the initial condition or starting state must be known. Meaning, that the
51
+ solution to the \gls{ode} at the very first initial step, from where the
52
+ remaining interval is solved, must be identified beforehand.
53
+ One might believe, a starting point, which is not guessed unreasonably off,
54
+ should suffice to infer the system's future dynamics.\newline
55
+
56
+ This is
57
+ an educated attempt, however, it is not true for systems that exhibit
58
+ sensitivity to initial conditions. These systems amplify any
59
+ perturbation or deviation exponentially
60
+ as time increases. From this it can be concluded
61
+ that even in case the initial value would be accurate to, e.g., 10 decimal places,
62
+ still after some time, the outcome can not be trusted anymore.
63
+ Visually
64
+ this can be comprehended by thinking of initial conditions
65
+ as locations in space. Let us picture two points with two initial conditions
66
+ that are selected to be next to each other. Only by zooming in multiple times,
67
+ a small spatial deviation should be perceivable.
68
+ As the time changes, the points will leave the location defined through the initial condition. \newline
69
+
70
+
71
+ With
72
+ chaotic systems in mind, both initially neighboring
73
+ points will diverge exponentially fast from each other.
74
+ As a consequence of the initial condition not being
75
+ known with infinite precision, the initial microscopic
76
+ errors become macroscopic with increasing time. Microscopic mistakes
77
+ might be considered to be imperceptible and thus have no impact
78
+ on the outcome, which would be worth to be mentioned.
79
+ Macroscopic mistakes on the other hand are visible. Depending on
80
+ accuracy demands solutions might be or might not be accepted.
81
+ However, as time continues further, the results eventually
82
+ will become completely unusable and diverge from the actual output on a macroscopic scale.\newline
83
+
84
+
85
+ The second reason, why chaotic systems are very difficult
86
+ to cope with, is the lack of a clear definition. It can be
87
+ argued that even visually, it is not always possible to
88
+ unambiguously identify a chaotic system. The idea
89
+ is that at some time step, a chaotic system appears to
90
+ be evolving randomly over time. The question then arises,
91
+ how is someone supposed to distinguish between something which
92
+ is indeed evolving randomly and something which only appears
93
+ to be random. The follow-up question most likely is going to be,
94
+ what is the difference between chaos and randomness, or
95
+ even if there is a difference. \newline
96
+
97
+ Maybe randomness itself is only
98
+ a lack of knowledge, e.g., the movement of gas particles
99
+ can be considered to be chaotic or random. If the
100
+ velocity and spatial position of each molecule are
101
+ trackable, the concept of temperature is made
102
+ redundant. Gibbs only invented the concept of temperature
103
+ in order to be able to make some qualitative statements
104
+ about a system \cite{Argyris2017}.
105
+ A system that can not be described microscopically.
106
+ Here the question arises if the movement of the molecules
107
+ would be random, how is it possible that every time
108
+ some amount of heat is introduced into a system, the temperature
109
+ changes in one direction. If a random microscale system
110
+ always tends to go in one direction within a macroscale view,
111
+ a clear definition of randomness is required. \newline
112
+
113
+ Laplace once said if the initial condition
114
+ (space and velocity) of each atom would be known,
115
+ the entire future
116
+ could be calculated. In other words, if a system is
117
+ build on equations, which is a deterministic way
118
+ to describe an event, the outcome should just
119
+ depend on the values of the variables.
120
+ Thus, the future, for as long as it is desired could be predicted
121
+ or computed exactly. To briefly summarize this conversion,
122
+ Albert Einstein once remarked that God would not play dice. Nils
123
+ Bohr replied that it
124
+ would be presumptuous of us human beings to prescribe to the Almighty
125
+ how he is to take his decisions. A more in-depth introduction to
126
+ this subject is provided by \cite{Argyris2017}.
127
+ Nevertheless, by doing literature research, one way to
128
+ visually distinguish between
129
+ randomness and chaos was found \cite{Boeing2016}.
130
+ Yet, in \cite{Boeing2016} the method was only
131
+ deployed on a logistic map. Hence, further research
132
+ is required here. \newline
133
+
134
+ As explained, a clear definition of chaos does not exist.
135
+ However, some parts of definitions do occur regularly, e.g.,
136
+ the already mentioned \glsfirst{sdic}. Other definition parts are the following: Chaotic
137
+ motion is \textbf{aperiodic} and based on a \textbf{deterministic} system.
138
+ An aperiodic system is not repeating any
139
+ previous \textbf{trajectory} and a deterministic system is
140
+ described by governing equations. A trajectory is the evolution
141
+ of a dynamical system over time. For instance, a dynamical system
142
+ consisting of 3 variables is denoted as a 3-dimensional dynamical system.
143
+ Each of the variables has its own representation axis.
144
+ Assuming these
145
+ 3 variables capture space, motion in the x-,y- and z-direction
146
+ is possible. For each point in a defined time range, there is one set of x, y and z values, which fully describes the output of the dynamical system or the position at a chosen time point.
147
+ Simply put, the trajectory is the movement
148
+ or change of the variables of the differential equation over time. Usually, the
149
+ trajectory is displayed in the phase space, i.e., the axis represents the state or values of the variables of a dynamical system. An example can be observed in section \ref{subsec_1_1_3_first_CNMc}. \newline
150
+
151
+
152
+ One misconception which is often believed \cite{Taylor2010}
153
+ and found, e.g., in
154
+ Wikipedia \cite{Wiki_Chaos} is that
155
+ strange attractors would only appear as a consequence of
156
+ chaos. Yet, Grebogi et al. \cite{Grebogi1984} proved
157
+ otherwise. According to
158
+ \cite{Boeing2016,Taylor2010} strange attractors exhibit
159
+ self-similarity. This can be understood visually by imaging any shape
160
+ of a trajectory. Now by zooming in or out, the exact same shape
161
+ is found again. The amount of zooming in or out and consequently
162
+ changing the view scale, will not change the perceived
163
+ shape of the trajectory. Self-similarity happens to be
164
+ one of the fundamental properties of a geometry
165
+ in order to be called a fractal \cite{Taylor2010}.
166
+ In case one believes,
167
+ strange attractors would always be chaotic and knows that by definition strange attractors phase
168
+ space is self-similar, then
169
+ something further misleading is concluded.
170
+ Namely, if a geometry is turned out not only
171
+ to be self-similar but also to be a fractal, this
172
+ would demand interpreting every fractal to be
173
+ chaotic. \newline
174
+
175
+ To refute this, consider the Gophy
176
+ attractor \cite{Grebogi1984}.
177
+ It exhibits the described self-similarity,
178
+ moreover, it is a fractal, and it is also a
179
+ strange attractor. However, the Gophy
180
+ attractor is not chaotic. The reason is found, when
181
+ calculating the Lyapunov exponent, which is negative
182
+ \cite{Taylor2010}. Latter tells us that two neighboring
183
+ trajectories are not separating exponentially fast
184
+ from each other. Thus, it does not obey the
185
+ sensitive dependence
186
+ of initial conditions requirement and is
187
+ regarded to be non-chaotic. The key messages are
188
+ that a chaotic attractor surely is a strange
189
+ attractor and a strange attractor is not necessarily
190
+ chaotic. A strange attractor refers to a fractal
191
+ geometry in which chaotic behavior may
192
+ or may not exist \cite{Taylor2010}.
193
+ Having acquired the knowledge that strange attractors
194
+ can occur in chaotic systems and form a fractal,
195
+ one might infer another question. If a chaotic
196
+ strange attractor always generates a geometry, which
197
+ stays constant when scaled, can chaos be
198
+ regarded to be random?\newline
199
+
200
+
201
+ This question will not be discussed in detail here, but for the sake of completeness, the 3 known types of nonstrange attractors
202
+ shall be mentioned. These are
203
+ the fixed point attractor, the limit cycle attractor, and the
204
+ torus attractor \cite{Taylor2010}.
205
+ A fixed point attractor is one point in the phase space, which attracts or pulls nearby trajectories to itself.
206
+ Inside the fix-point attractor, there is no motion, meaning
207
+ the derivative of the differential equation is zero.
208
+ In simpler words,
209
+ once the trajectory runs into a fix-point, the trajectory ends there.
210
+ This is because no change over time can be found here.
211
+ A limit cycle can be expressed as an endlessly repeating loop, e.g. in the shape of a circle.
212
+ The trajectory can start at
213
+ any given initial condition, still, it can go through a place in the phase space, from where the trajectory is continued as an infinitely
214
+ repeating loop.
215
+ For a visualization of the latter and the tours, as well more
216
+ detail the reader is referred to \cite{Argyris2017, Kutz2022, Strogatz2019, Taylor2010}.
Data/0_Latex_True/0_Deco/1_erkl.tex ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ \chapter*{Declaration of independent authorship}
4
+
5
+ I hereby declare that the present work, the master thesis, is solely and independently done by myself in all aspects, such as developments, code implementations, and writing of the thesis.
6
+ In addition, I confirm that I did not use any tools, materials or sources other than those explicitly specified.\newline \break
7
+
8
+ \vspace{1cm}
9
+ \noindent Full name: Javed Arshad Butt \newline \break
10
+ \noindent Date and place: 29.04.2022, Braunschweig\newline \break
11
+
12
+ \vspace{1cm}
13
+ \noindent Signature:
14
+
15
+ \begin{figure}[!h]
16
+ \centering
17
+ \includegraphics[width =0.2\textwidth]
18
+ % In order to insert an eps file - Only_File_Name (Without file extension)
19
+ {2_Figures/0_Deco/signature_1.jpg}
20
+ % \caption{Adapted coordinates by using cosine function and initial CST modes}
21
+ \label{fig_0_signature}
22
+ \end{figure}
23
+
Data/0_Latex_True/0_Deco/2_1_Abstract.tex ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ \chapter*{Abstract}
2
+ In this master thesis, a data-driven modeling technique is proposed.
3
+ It enables making predictions for general dynamic systems for unknown model parameter values or operating conditions.
4
+ The tool is denoted as \gls{cnmc}.
5
+ The most recent developed version delivered promising results for the chaotic Lorenz system \cite{lorenz1963deterministic}.
6
+ Since, the earlier work was restricted to the application of only one dynamical system, with this contribution the first major improvement was to allow \gls{cnmc} to be utilized for any general dynamical system.
7
+ For this, \gls{cnmc} was written from scratch in a modular manner.
8
+ The limitation of the number of the dimension and the shape of the trajectory of the dynamical systems are removed.
9
+ Adding a new dynamic system was designed such that it should be as straightforward as possible.
10
+ To affirm this point, 10 dynamic systems, most of which are chaotic systems, are included by default.
11
+ To be able to run \gls{cnmc} on arbitrary dynamic systems in an automated way, a parameter study for the modal decomposition method \gls{nmf} was implemented.
12
+ However, since a single \gls{nmf} solution took up to hours, a second option was added, i.e., \gls{svd}.
13
+ With \gls{svd} the most time-consuming task could be brought to a level of seconds.
14
+ The improvements introduced, allow \gls{cnmc} to be executed on a general dynamic system on a normal computer in a reasonable time.
15
+ Furthermore, \gls{cnmc} comes with its integrated post-processor in form of HTML files to inspect the generated plots in detail.
16
+ All the parameters used in \gls{cnmc} some additional beneficial features can be controlled via one settings file.
Data/0_Latex_True/0_Deco/2_Thanks.tex ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ \chapter*{Acknowledgments}
3
+
4
+ All praise and thanks to the \textbf{ONE}, Who does neither need my praise nor my thanks.
5
+ To the \textbf{ONE}, Who is independent of everything and everyone, but on Whom everything and everyone depends.
6
+
7
+ \vspace{1cm}
8
+ Thank you, Dr. Semaan - you provided me with the possibility to work on such a compelling and challenging topic. Even though the difficult tasks were not always pleasant, I very much appreciate the opportunity to have worked on these captivating tasks.
9
+ Thank you for the time and effort you invested in this work.
10
+ Also, thank you for the weekly English exercises and for explaining to me how to pronounce methodology correctly :D
Data/0_Latex_True/0_Deco/3_Used_Abbrev.log ADDED
@@ -0,0 +1,326 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ This is pdfTeX, Version 3.14159265-2.6-1.40.20 (TeX Live 2019/Debian) (preloaded format=pdflatex 2020.12.21) 25 APR 2022 13:20
2
+ entering extended mode
3
+ restricted \write18 enabled.
4
+ file:line:error style messages enabled.
5
+ %&-line parsing enabled.
6
+ **/home/jav/Schreibtisch/Uni/27_Master/3_Latex/2_Writing/1_Latex_Files/0_Deco/3_Used_Abbrev
7
+ (/home/jav/Schreibtisch/Uni/27_Master/3_Latex/2_Writing/1_Latex_Files/0_Deco/3_Used_Abbrev.tex
8
+ LaTeX2e <2020-02-02> patch level 2
9
+ L3 programming layer <2020-02-14>
10
+ /home/jav/Schreibtisch/Uni/27_Master/3_Latex/2_Writing/1_Latex_Files/0_Deco/3_Used_Abbrev.tex:9: Undefined control sequence.
11
+ l.9 \DeclareAcronym
12
+ {usa}{
13
+ The control sequence at the end of the top line
14
+ of your error message was never \def'ed. If you have
15
+ misspelled it (e.g., `\hobx'), type `I' and the correct
16
+ spelling (e.g., `I\hbox'). Otherwise just continue,
17
+ and I'll forget about whatever was undefined.
18
+
19
+
20
+ /home/jav/Schreibtisch/Uni/27_Master/3_Latex/2_Writing/1_Latex_Files/0_Deco/3_Used_Abbrev.tex:9: LaTeX Error: Missing \begin{document}.
21
+
22
+ See the LaTeX manual or LaTeX Companion for explanation.
23
+ Type H <return> for immediate help.
24
+ ...
25
+
26
+ l.9 \DeclareAcronym{u
27
+ sa}{
28
+ You're in trouble here. Try typing <return> to proceed.
29
+ If that doesn't work, type X <return> to quit.
30
+
31
+ Missing character: There is no u in font nullfont!
32
+ Missing character: There is no s in font nullfont!
33
+ Missing character: There is no a in font nullfont!
34
+ Missing character: There is no s in font nullfont!
35
+ Missing character: There is no h in font nullfont!
36
+ Missing character: There is no o in font nullfont!
37
+ Missing character: There is no r in font nullfont!
38
+ Missing character: There is no t in font nullfont!
39
+ Missing character: There is no = in font nullfont!
40
+ Missing character: There is no U in font nullfont!
41
+ Missing character: There is no S in font nullfont!
42
+ Missing character: There is no A in font nullfont!
43
+ Missing character: There is no , in font nullfont!
44
+ Missing character: There is no l in font nullfont!
45
+ Missing character: There is no o in font nullfont!
46
+ Missing character: There is no n in font nullfont!
47
+ Missing character: There is no g in font nullfont!
48
+ Missing character: There is no = in font nullfont!
49
+ Missing character: There is no U in font nullfont!
50
+ Missing character: There is no n in font nullfont!
51
+ Missing character: There is no i in font nullfont!
52
+ Missing character: There is no t in font nullfont!
53
+ Missing character: There is no e in font nullfont!
54
+ Missing character: There is no d in font nullfont!
55
+ Missing character: There is no S in font nullfont!
56
+ Missing character: There is no t in font nullfont!
57
+ Missing character: There is no a in font nullfont!
58
+ Missing character: There is no t in font nullfont!
59
+ Missing character: There is no e in font nullfont!
60
+ Missing character: There is no s in font nullfont!
61
+ Missing character: There is no o in font nullfont!
62
+ Missing character: There is no f in font nullfont!
63
+ Missing character: There is no A in font nullfont!
64
+ Missing character: There is no m in font nullfont!
65
+ Missing character: There is no e in font nullfont!
66
+ Missing character: There is no r in font nullfont!
67
+ Missing character: There is no i in font nullfont!
68
+ Missing character: There is no c in font nullfont!
69
+ Missing character: There is no a in font nullfont!
70
+ Missing character: There is no , in font nullfont!
71
+ /home/jav/Schreibtisch/Uni/27_Master/3_Latex/2_Writing/1_Latex_Files/0_Deco/3_Used_Abbrev.tex:13: Undefined control sequence.
72
+ l.13 \DeclareAcronym
73
+ {eu}{
74
+ The control sequence at the end of the top line
75
+ of your error message was never \def'ed. If you have
76
+ misspelled it (e.g., `\hobx'), type `I' and the correct
77
+ spelling (e.g., `I\hbox'). Otherwise just continue,
78
+ and I'll forget about whatever was undefined.
79
+
80
+ Missing character: There is no e in font nullfont!
81
+ Missing character: There is no u in font nullfont!
82
+ Missing character: There is no s in font nullfont!
83
+ Missing character: There is no h in font nullfont!
84
+ Missing character: There is no o in font nullfont!
85
+ Missing character: There is no r in font nullfont!
86
+ Missing character: There is no t in font nullfont!
87
+ Missing character: There is no = in font nullfont!
88
+ Missing character: There is no E in font nullfont!
89
+ Missing character: There is no U in font nullfont!
90
+ Missing character: There is no , in font nullfont!
91
+ Missing character: There is no l in font nullfont!
92
+ Missing character: There is no o in font nullfont!
93
+ Missing character: There is no n in font nullfont!
94
+ Missing character: There is no g in font nullfont!
95
+ Missing character: There is no = in font nullfont!
96
+ Missing character: There is no E in font nullfont!
97
+ Missing character: There is no u in font nullfont!
98
+ Missing character: There is no r in font nullfont!
99
+ Missing character: There is no o in font nullfont!
100
+ Missing character: There is no p in font nullfont!
101
+ Missing character: There is no e in font nullfont!
102
+ Missing character: There is no a in font nullfont!
103
+ Missing character: There is no n in font nullfont!
104
+ Missing character: There is no U in font nullfont!
105
+ Missing character: There is no n in font nullfont!
106
+ Missing character: There is no i in font nullfont!
107
+ Missing character: There is no o in font nullfont!
108
+ Missing character: There is no n in font nullfont!
109
+ Missing character: There is no , in font nullfont!
110
+ /home/jav/Schreibtisch/Uni/27_Master/3_Latex/2_Writing/1_Latex_Files/0_Deco/3_Used_Abbrev.tex:17: Undefined control sequence.
111
+ l.17 \DeclareAcronym
112
+ {ussr}{
113
+ The control sequence at the end of the top line
114
+ of your error message was never \def'ed. If you have
115
+ misspelled it (e.g., `\hobx'), type `I' and the correct
116
+ spelling (e.g., `I\hbox'). Otherwise just continue,
117
+ and I'll forget about whatever was undefined.
118
+
119
+ Missing character: There is no u in font nullfont!
120
+ Missing character: There is no s in font nullfont!
121
+ Missing character: There is no s in font nullfont!
122
+ Missing character: There is no r in font nullfont!
123
+ Missing character: There is no s in font nullfont!
124
+ Missing character: There is no h in font nullfont!
125
+ Missing character: There is no o in font nullfont!
126
+ Missing character: There is no r in font nullfont!
127
+ Missing character: There is no t in font nullfont!
128
+ Missing character: There is no = in font nullfont!
129
+ Missing character: There is no U in font nullfont!
130
+ Missing character: There is no S in font nullfont!
131
+ Missing character: There is no S in font nullfont!
132
+ Missing character: There is no R in font nullfont!
133
+ Missing character: There is no , in font nullfont!
134
+ Missing character: There is no l in font nullfont!
135
+ Missing character: There is no o in font nullfont!
136
+ Missing character: There is no n in font nullfont!
137
+ Missing character: There is no g in font nullfont!
138
+ Missing character: There is no = in font nullfont!
139
+ Missing character: There is no U in font nullfont!
140
+ Missing character: There is no n in font nullfont!
141
+ Missing character: There is no i in font nullfont!
142
+ Missing character: There is no o in font nullfont!
143
+ Missing character: There is no n in font nullfont!
144
+ Missing character: There is no o in font nullfont!
145
+ Missing character: There is no f in font nullfont!
146
+ Missing character: There is no S in font nullfont!
147
+ Missing character: There is no o in font nullfont!
148
+ Missing character: There is no v in font nullfont!
149
+ Missing character: There is no i in font nullfont!
150
+ Missing character: There is no e in font nullfont!
151
+ Missing character: There is no t in font nullfont!
152
+ Missing character: There is no S in font nullfont!
153
+ Missing character: There is no o in font nullfont!
154
+ Missing character: There is no c in font nullfont!
155
+ Missing character: There is no i in font nullfont!
156
+ Missing character: There is no a in font nullfont!
157
+ Missing character: There is no l in font nullfont!
158
+ Missing character: There is no i in font nullfont!
159
+ Missing character: There is no s in font nullfont!
160
+ Missing character: There is no t in font nullfont!
161
+ Missing character: There is no R in font nullfont!
162
+ Missing character: There is no e in font nullfont!
163
+ Missing character: There is no p in font nullfont!
164
+ Missing character: There is no u in font nullfont!
165
+ Missing character: There is no b in font nullfont!
166
+ Missing character: There is no l in font nullfont!
167
+ Missing character: There is no i in font nullfont!
168
+ Missing character: There is no c in font nullfont!
169
+ Missing character: There is no s in font nullfont!
170
+ Missing character: There is no , in font nullfont!
171
+
172
+ Overfull \hbox (20.0pt too wide) in paragraph at lines 9--21
173
+ []
174
+ []
175
+
176
+ /home/jav/Schreibtisch/Uni/27_Master/3_Latex/2_Writing/1_Latex_Files/0_Deco/3_Used_Abbrev.tex:23: Undefined control sequence.
177
+ l.23 \ac
178
+ {usa}, \ac{usa}
179
+ The control sequence at the end of the top line
180
+ of your error message was never \def'ed. If you have
181
+ misspelled it (e.g., `\hobx'), type `I' and the correct
182
+ spelling (e.g., `I\hbox'). Otherwise just continue,
183
+ and I'll forget about whatever was undefined.
184
+
185
+
186
+ /home/jav/Schreibtisch/Uni/27_Master/3_Latex/2_Writing/1_Latex_Files/0_Deco/3_Used_Abbrev.tex:23: LaTeX Error: Missing \begin{document}.
187
+
188
+ See the LaTeX manual or LaTeX Companion for explanation.
189
+ Type H <return> for immediate help.
190
+ ...
191
+
192
+ l.23 \ac{u
193
+ sa}, \ac{usa}
194
+ You're in trouble here. Try typing <return> to proceed.
195
+ If that doesn't work, type X <return> to quit.
196
+
197
+ Missing character: There is no u in font nullfont!
198
+ Missing character: There is no s in font nullfont!
199
+ Missing character: There is no a in font nullfont!
200
+ Missing character: There is no , in font nullfont!
201
+ /home/jav/Schreibtisch/Uni/27_Master/3_Latex/2_Writing/1_Latex_Files/0_Deco/3_Used_Abbrev.tex:23: Undefined control sequence.
202
+ l.23 \ac{usa}, \ac
203
+ {usa}
204
+ The control sequence at the end of the top line
205
+ of your error message was never \def'ed. If you have
206
+ misspelled it (e.g., `\hobx'), type `I' and the correct
207
+ spelling (e.g., `I\hbox'). Otherwise just continue,
208
+ and I'll forget about whatever was undefined.
209
+
210
+ Missing character: There is no u in font nullfont!
211
+ Missing character: There is no s in font nullfont!
212
+ Missing character: There is no a in font nullfont!
213
+
214
+ Overfull \hbox (20.0pt too wide) in paragraph at lines 23--24
215
+ []
216
+ []
217
+
218
+ /home/jav/Schreibtisch/Uni/27_Master/3_Latex/2_Writing/1_Latex_Files/0_Deco/3_Used_Abbrev.tex:25: Undefined control sequence.
219
+ l.25 \ac
220
+ {eu}, \ac{eu}
221
+ The control sequence at the end of the top line
222
+ of your error message was never \def'ed. If you have
223
+ misspelled it (e.g., `\hobx'), type `I' and the correct
224
+ spelling (e.g., `I\hbox'). Otherwise just continue,
225
+ and I'll forget about whatever was undefined.
226
+
227
+
228
+ /home/jav/Schreibtisch/Uni/27_Master/3_Latex/2_Writing/1_Latex_Files/0_Deco/3_Used_Abbrev.tex:25: LaTeX Error: Missing \begin{document}.
229
+
230
+ See the LaTeX manual or LaTeX Companion for explanation.
231
+ Type H <return> for immediate help.
232
+ ...
233
+
234
+ l.25 \ac{e
235
+ u}, \ac{eu}
236
+ You're in trouble here. Try typing <return> to proceed.
237
+ If that doesn't work, type X <return> to quit.
238
+
239
+ Missing character: There is no e in font nullfont!
240
+ Missing character: There is no u in font nullfont!
241
+ Missing character: There is no , in font nullfont!
242
+ /home/jav/Schreibtisch/Uni/27_Master/3_Latex/2_Writing/1_Latex_Files/0_Deco/3_Used_Abbrev.tex:25: Undefined control sequence.
243
+ l.25 \ac{eu}, \ac
244
+ {eu}
245
+ The control sequence at the end of the top line
246
+ of your error message was never \def'ed. If you have
247
+ misspelled it (e.g., `\hobx'), type `I' and the correct
248
+ spelling (e.g., `I\hbox'). Otherwise just continue,
249
+ and I'll forget about whatever was undefined.
250
+
251
+ Missing character: There is no e in font nullfont!
252
+ Missing character: There is no u in font nullfont!
253
+
254
+ Overfull \hbox (20.0pt too wide) in paragraph at lines 25--26
255
+ []
256
+ []
257
+
258
+ /home/jav/Schreibtisch/Uni/27_Master/3_Latex/2_Writing/1_Latex_Files/0_Deco/3_Used_Abbrev.tex:27: Undefined control sequence.
259
+ l.27 \ac
260
+ {ussr}, \ac{ussr}
261
+ The control sequence at the end of the top line
262
+ of your error message was never \def'ed. If you have
263
+ misspelled it (e.g., `\hobx'), type `I' and the correct
264
+ spelling (e.g., `I\hbox'). Otherwise just continue,
265
+ and I'll forget about whatever was undefined.
266
+
267
+
268
+ /home/jav/Schreibtisch/Uni/27_Master/3_Latex/2_Writing/1_Latex_Files/0_Deco/3_Used_Abbrev.tex:27: LaTeX Error: Missing \begin{document}.
269
+
270
+ See the LaTeX manual or LaTeX Companion for explanation.
271
+ Type H <return> for immediate help.
272
+ ...
273
+
274
+ l.27 \ac{u
275
+ ssr}, \ac{ussr}
276
+ You're in trouble here. Try typing <return> to proceed.
277
+ If that doesn't work, type X <return> to quit.
278
+
279
+ Missing character: There is no u in font nullfont!
280
+ Missing character: There is no s in font nullfont!
281
+ Missing character: There is no s in font nullfont!
282
+ Missing character: There is no r in font nullfont!
283
+ Missing character: There is no , in font nullfont!
284
+ /home/jav/Schreibtisch/Uni/27_Master/3_Latex/2_Writing/1_Latex_Files/0_Deco/3_Used_Abbrev.tex:27: Undefined control sequence.
285
+ l.27 \ac{ussr}, \ac
286
+ {ussr}
287
+ The control sequence at the end of the top line
288
+ of your error message was never \def'ed. If you have
289
+ misspelled it (e.g., `\hobx'), type `I' and the correct
290
+ spelling (e.g., `I\hbox'). Otherwise just continue,
291
+ and I'll forget about whatever was undefined.
292
+
293
+ Missing character: There is no u in font nullfont!
294
+ Missing character: There is no s in font nullfont!
295
+ Missing character: There is no s in font nullfont!
296
+ Missing character: There is no r in font nullfont!
297
+
298
+ Overfull \hbox (20.0pt too wide) in paragraph at lines 27--28
299
+ []
300
+ []
301
+
302
+ /home/jav/Schreibtisch/Uni/27_Master/3_Latex/2_Writing/1_Latex_Files/0_Deco/3_Used_Abbrev.tex:29: Undefined control sequence.
303
+ l.29 \printacronyms
304
+
305
+ The control sequence at the end of the top line
306
+ of your error message was never \def'ed. If you have
307
+ misspelled it (e.g., `\hobx'), type `I' and the correct
308
+ spelling (e.g., `I\hbox'). Otherwise just continue,
309
+ and I'll forget about whatever was undefined.
310
+
311
+ )
312
+ ! Emergency stop.
313
+ <*> ..._Writing/1_Latex_Files/0_Deco/3_Used_Abbrev
314
+
315
+ *** (job aborted, no legal \end found)
316
+
317
+
318
+ Here is how much of TeX's memory you used:
319
+ 17 strings out of 481239
320
+ 639 string characters out of 5920376
321
+ 236564 words of memory out of 5000000
322
+ 15384 multiletter control sequences out of 15000+600000
323
+ 532338 words of font info for 24 fonts, out of 8000000 for 9000
324
+ 1141 hyphenation exceptions out of 8191
325
+ 12i,0n,15p,161b,16s stack positions out of 5000i,500n,10000p,200000b,80000s
326
+ ! ==> Fatal error occurred, no output PDF file produced!
Data/0_Latex_True/0_Deco/3_Used_Abbrev.tex ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ % abbreviations:
2
+ \newacronym{ode}{ODE}{\glstextformat{\textbf{O}}rdinary \glstextformat{\textbf{D}}ifferential \glstextformat{\textbf{E}}quation}
3
+
4
+ \newacronym{cnm}{CNM}{\glstextformat{\textbf{C}}luster-based \glstextformat{\textbf{N}}etwork \glstextformat{\textbf{M}}odeling}
5
+
6
+ \newacronym{cnmc}{\glstextformat{\emph{CNMc}}}{\glstextformat{\textbf{c}}ontrol-oriented \glstextformat{\textbf{C}}luster-based \glstextformat{\textbf{N}}etwork \glstextformat{\textbf{M}}odeling}
7
+
8
+ \newacronym[]{cmm}{CMM}{\glstextformat{\textbf{C}}luster \glstextformat{\textbf{M}}arkov-based \glstextformat{\textbf{M}}odeling}
9
+
10
+ \newacronym{cfd}{CFD}{\glstextformat{\textbf{C}}omputational \glstextformat{\textbf{F}}luid \glstextformat{\textbf{D}}ynamics}
11
+
12
+ \newacronym{rans}{RANS}{\glstextformat{\textbf{R}}eynolds \glstextformat{\textbf{A}}veraged \glstextformat{\textbf{N}}avier \glstextformat{\textbf{S}}tockes}
13
+
14
+ \newacronym{dlr}{DLR}{German Aerospace Center}
15
+
16
+ \newacronym{gpu}{GPU}{\glstextformat{\textbf{G}}raphics \glstextformat{\textbf{P}}rocessing \glstextformat{\textbf{U}}nit}
17
+
18
+ \newacronym{cpu}{CPU}{\glstextformat{\textbf{C}}omputer \glstextformat{\textbf{P}}rocessing \glstextformat{\textbf{U}}nit}
19
+
20
+ \newacronym[]{sdic}{SDIC}{\glstextformat{\textbf{S}}ensitive \glstextformat{\textbf{D}}ependence on \glstextformat{\textbf{I}}nitial \glstextformat{\textbf{C}}onditions}
21
+
22
+ \newacronym[]{nmf}{NMF}{\glstextformat{\textbf{N}}on-negative \glstextformat{\textbf{M}}atrix \glstextformat{\textbf{F}}actorization}
23
+
24
+ \newacronym[]{svd}{SVD}{\glstextformat{\textbf{S}}ingular \glstextformat{\textbf{V}}alue \glstextformat{\textbf{D}}ecomposition}
25
+
26
+ \newacronym[]{rf}{RF}{\glstextformat{\textbf{R}}andom \glstextformat{\textbf{F}}orest}
27
+
28
+ \newacronym[]{cpd}{CPD}{\glstextformat{\textbf{C}}luster \glstextformat{\textbf{P}}robability \glstextformat{\textbf{D}}istribution}
29
+
30
+ \newacronym[]{cpevol}{CPE}{\glstextformat{\textbf{C}}entroid \glstextformat{\textbf{P}}osition \glstextformat{\textbf{E}}volution}
31
+
32
+
33
+ \newacronym[]{dtw}{DTW}{\glstextformat{\textbf{D}}ynamical \glstextformat{\textbf{T}}ime \glstextformat{\textbf{W}}arping}
34
+
35
+ \newacronym[]{knn}{KNN}{\glstextformat{\textbf{K}-\textbf{N}}earest \glstextformat{\textbf{N}}eighbor}
36
+
Data/0_Latex_True/1_Task/1_Introduction.tex ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ \chapter{Introduction}
4
+ \label{chap_1_Intro}
5
+ In this work, a tool called \glsfirst{cnmc} is further developed.
6
+ The overall goal, in very brief terms, is to generate a model, which is able to
7
+ predict the trajectories of general dynamical systems. The model
8
+ shall be capable of predicting the trajectories when a model parameter
9
+ value is changed.
10
+ Some basics about dynamical systems are covered in
11
+ subsection \ref{subsec_1_1_1_Principles} and in-depth explanations about \gls{cnmc} are given in
12
+ chapter \ref{chap_2_Methodlogy}.\newline
13
+
14
+ However, for a short and broad introduction to \gls{cnmc} the workflow depicted in figure \ref{fig_1_CNMC_Workflow} shall be highlighted.
15
+ The input it receives is data of a dynamical system or space state vectors for a range of model parameter values. The two main important outcomes are some accuracy measurements and the predicted trajectory for each desired model parameter value.
16
+ Any inexperienced user may only have a look at the predicted trajectories to
17
+ quickly decide visually whether the prediction matches the trained data. Since \gls{cnmc} is written in a modular manner, meaning it can be regarded as
18
+ a black-box function, it can easily be integrated into other existing codes or
19
+ workflows. \newline
20
+
21
+ \begin{figure}[!h]
22
+ \def\svgwidth{\linewidth}
23
+ \input{2_Figures/1_Task/1_CNMc.pdf_tex}
24
+ \caption{Broad overview: Workflow of \gls{cnmc}}
25
+ \label{fig_1_CNMC_Workflow}
26
+ \end{figure}
27
+
28
+ % ==============================================================================
29
+ % ==================== Motivation ==============================================
30
+ % ==============================================================================
31
+ \section{Motivation}
32
+ \label{sec_Motivation}
33
+ \gls{cfd} is an
34
+ indispensable technique, when aimed to obtain information about aerodynamic properties, such
35
+ as drag and lift distributions. Modern \gls{cfd} solvers, such as \gls{dlr}'s \emph{TAU}
36
+ \cite{Langer2014} often solves
37
+ the \gls{rans} equations to obtain one flow-field. Advanced solvers like \emph{TAU} apply advanced
38
+ mathematical knowledge to speed up calculations and
39
+ heavily exploit multiple \glspl{cpu} in an optimized manner. Nevertheless,
40
+ depending on the size of the object and accuracy demands or in other terms mesh grid size, the computation often is not economically
41
+ efficient enough. If the object for which a flow field is desired is a full aircraft, then even with a big cluster and making use of symmetry properties of the shape of the airplane, if such exists, the computation of one single
42
+ flow field can still easily cost one or even multiple months in computation time. \newline
43
+
44
+ In modern science, there is a trend towards relying on \glspl{gpu} instead of \glspl{cpu}. Graphic cards possess much
45
+ more cores than a CPU. However, even with the utilization of \glspl{gpu} and GPU-optimized \gls{cfd} solvers, the computation is still very expensive. Not only in time but also
46
+ in electricity costs.
47
+ Running calculations on a cluster for multiple months is such expensive that wind tunnel measurements can be considered to be the economically more
48
+ efficient choice to make.
49
+ Regarding accuracy, wind tunnel measurements and \gls{cfd} simulations with state-of-the-art solvers can be considered to be
50
+ equally useful. When using \gls{cfd} solvers, there is one more thing to keep
51
+ in mind.
52
+ Each outcome is only valid for one single set of input parameters.
53
+ Within the set of input parameters, the user often is only interested
54
+ in the impact of one parameter, e.g., the angle of attack. Consequently,
55
+ wanting to capture the effect of the change of the angle of attack on the flow field,
56
+ multiple \gls{cfd} calculations need to be performed, i.e., for each desired
57
+ angle of attack.
58
+ Based on the chosen angle of attack the solver might be able to converge faster to a solution. However, the calculation time
59
+ needs to be added up for each desired angle of attack.
60
+ In terms of time and energy costs, this could again be more expensive than wind-tunnel
61
+ measurements. Wind tunnel measurements are difficult to set up, but once a
62
+ configuration is available, measuring flow field properties with it, in general, is known to be faster and easier than running \gls{cfd} simulations.\newline
63
+
64
+ % ------------------------------------------------------------------------------
65
+ Within the scope of this work, a data-driven tool was developed that allows predictions for dynamic systems.
66
+ In \cite{Max2021} the first version of it showed promising results.
67
+ However, it was dedicated to the solution of one single dynamical system, i.e., the Lorenz system \cite{lorenz1963deterministic}.
68
+ Due to the focus on one singular dynamical system, the proposed \glsfirst{cnmc} was not verified for other dynamical systems.
69
+ Hence, one of the major goals of this thesis is to enable \gls{cnmc} to be applied to any general dynamical system.
70
+ For this, it is important to state that because of two main reasons \gls{cnmc} was not built upon the first version of \gls{cnmc}, but written from scratch.
71
+ First, since the initial version of \gls{cnmc} was designed for only a single dynamic system, extending it to a general \gls{cnmc} was considered more time-consuming than starting fresh.
72
+ Second, not all parts of the initial version of \gls{cnmc} could be executed without errors.
73
+ The current \gls{cnmc} is therefore developed in a modular manner, i.e., on the one hand, the implementation of any other dynamical system is straightforward.
74
+ To exemplify this, 10 different dynamic systems are available by default, so new dynamic systems can be added analogously.\newline
75
+
76
+ The second important aspect for allowing \gls{cnmc} to be utilized in any general dynamical system is the removal of the two limitations.
77
+ In the first version of \gls{cnmc} the behavior of the dynamical systems had to be circular as, e.g., the ears of the Lorenz system \cite{lorenz1963deterministic} are.
78
+ Next, its dimensionality must be strictly 3-dimensional.
79
+ Neither is a general dynamical system is not bound to exhibit a circular motion nor to be 3-dimensional.
80
+ By removing these two limitations \gls{cnmc} can be leveraged on any dynamical system.
81
+ However, the first version of \gls{cnmc} employed \glsfirst{nmf} as the modal decomposition method.
82
+ The exploited \gls{nmf} algorithm is highly computationally intensive, which makes a universal \gls{cnmc} application economically inefficient.
83
+ Therefore, the current \gls{cnmc} has been extended by the option to choose between the \gls{nmf} and the newly implemented \glsfirst{svd}.
84
+ The aim is not only that \gls{cnmc} is returning results within an acceptable timescale, but also to ensure that the quality of the modal decomposition remains at least at an equal level.
85
+ Proofs for the latter can be found in section \ref{sec_3_3_SVD_NMF}.\newline
86
+
87
+ With these modifications, the current \gls{cnmc} is now able to be used in any dynamical system within a feasible time frame.
88
+ The next addressed issue is the B-spline interpolation.
89
+ It is used in the propagation step of \glsfirst{cnm} \cite{Fernex2021} to smooth the predicted trajectory.
90
+ However, as already noted in \cite{Max2021}, when the number of the clustering centroids $K$ is $K \gtrapprox 15$, the B-spline interpolation embeds oscillations with unacceptable high deviations from the original trajectories.
91
+ To resolve this problem, the B-spline interpolation is replaced with linear interpolation.
92
+ By preventing the occurrence of outliers caused by the B-spline interpolation, neither the autocorrelation defined in subsection \ref{subsec_1_1_3_first_CNMc} nor the predicted trajectories are made impractical.
93
+ Apart from the main ability of \gls{cnmc} a high number of additional features are available, e.g., the entire pipeline of \gls{cnmc} with all its parameters can be adjusted via one file (\emph{settings.py}), an incorporated log file, storing results at desired steps, the ability to execute multiple dynamical models consequentially and activating and disabling each step of \gls{cnmc}.
94
+ The latter is particularly designed for saving computational time.
95
+ Also, \gls{cnmc} comes with its own post-processor.
96
+ It is optional to generate and save the plots.
97
+ However, in the case of utilizing this feature, the plots are available as HTML files which, e.g., allow extracting further information about the outcome or rotating and zooming in 3d plots.
Data/0_Latex_True/1_Task/2_State_Of_Art.tex ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ % ===================================================
3
+ % ==================== STATE OF THE ART =============
4
+ % ===================================================
5
+ \section{State of the art}
6
+ \label{sec_1_1_State}
7
+
8
+ The desire to get fast \gls{cfd} output is not new and also
9
+ a data-driven approach is found in the literature.
10
+ This section aims to describe some evolutionary steps of \glsfirst{cnmc}. Given that this work is built upon the most recent advancements,
11
+ they will be explained in particular detail.
12
+ Whereas the remaining development stages are briefly
13
+ summarized to mainly clarify the differences and
14
+ mention the reasons why improvements were desired. Since, this topic
15
+ demands some prior knowledge to follow \gls{cnmc}'s workflow and goal, some basic principles about important topics shall be given in their subsection.\newline
16
+
17
+ The first data-driven approach, which is known to the author,
18
+ is by \cite[]{Kaiser2014} and shall be called \gls{cmm}.
19
+ \gls{cnmc} is not directly built upon \gls{cmm} but on the latest version
20
+ of \gls{cnm} and is described in \cite[]{Fernex2021}.
21
+ \gls{cnmc} invokes \gls{cnm} many times in order to use
22
+ its outcome for further progress. Therefore, it's evident that only if \gls{cnm} is understood, CNMc's
23
+ progress can be followed. \gls{cmm} on the other hand has only a historical link to \gls{cnmc}, but no line of code of \gls{cmm} is invoked in \gls{cnmc}'s workflow. Consequently, \gls{cnm} will be explained in more detail than \gls{cmm}.
24
+
25
+ \subsection{Principles}
26
+ \label{subsec_1_1_1_Principles}
27
+ CNM \cite[]{Fernex2021} is a method that uses some machine learning
28
+ techniques, graphs, and probability theory to mirror the behavior of
29
+ complex systems. These complex systems are described often by dynamical systems, which themselves are simply a set of
30
+ differential equations. Differential equations are useful to
31
+ capture motion. Thus, a dynamical system can be seen as a synonym for motion
32
+ over time. Some differential equations can be
33
+ solved in closed form, meaning analytically. However, for most of them
34
+ either it is too difficult to obtain an analytical solution or the
35
+ analytical solution is very unhandy or unknown. Unhandy in terms of the solution
36
+ being expressed in too many terms. Therefore, in most
37
+ cases, differential equations are solved numerically. Since
38
+ the purpose of \gls{cnm} is not to be only used for analytically
39
+ solvable equations, a numerical ordinary differential integrator
40
+ is used. \newline
41
+
42
+ The default solver is \emph{SciPy}'s \emph{RK45} solver.
43
+ It is a widely deployed solver and can also be applied to
44
+ chaotic systems for integration
45
+ over a certain amount of time.
46
+ Another option for solving chaotic \gls{ode}s is
47
+ \emph{LSODA}. The developers of \emph{pySindy} \cite{Silva2020, Kaptanoglu2022}
48
+ state on their homepage \cite{pysindy_Home} that
49
+ \emph{LSODA} even outperforms the default \emph{RK45} when it comes to chaotic dynamical systems. The reasons why for \gls{cnmc} still \emph{RK45} was chosen will be given in
50
+ section
51
+ \ref{sec_2_2_Data_Gen}.
52
+ It is important to remember that turbulent flows are chaotic.
53
+ This is the main reason why in this work \gls{cnmc}, has been designed to handle not only general dynamical systems but also general chaotic attractors.
54
+ Other well-known instances where chaos is found are, e.g., the weather, the
55
+ motion of planets and also the financial market is believed to be chaotic.
56
+ For more places, where chaos is found the reader is referred to \cite{Argyris2017}.\newline
57
+
58
+ Note that \gls{cnmc} is designed for all kinds of dynamical systems, it is not restricted to linear, nonlinear or chaotic systems.
59
+ Therefore, chaotic systems shall be recorded to be only one application example of \gls{cnmc}.
60
+ However, because chaotic attractors were primarily exploited in the context of the performed investigations in this work, a slightly lengthier introduction to chaotic systems is provided in the appendix \ref{ch_Ap_Chaotic}.
61
+ Two terms that will be used extensively over this entire thesis are called model parameter value $\beta$ and a range of model parameter values $\vec{\beta}$. A regular differential equation can be expressed as
62
+ in equation \eqref{eq_1_0_DGL}, where $F$ is denoted as the function which describes the dynamical system.
63
+ The vector $\vec{x}(t)$ is the state vector.
64
+ The form in which differential equations are viewed in this work is given in equation \eqref{eq_1_1_MPV}.
65
+
66
+ \begin{equation}
67
+ F = \dot{\vec{x}}(t) = \frac{\vec{x}(t)}{dt} = f(\vec{x}(t))
68
+ \label{eq_1_0_DGL}
69
+ \end{equation}
70
+ \begin{equation}
71
+ F_{\gls{cnmc}} = \left(\dot{\vec{x}}(t), \, \vec{\beta} \right) =
72
+ \left( \frac{\vec{x}(t)}{dt}, \, \vec{\beta} \right) =
73
+ f(\vec{x}(t), \, \vec{\beta} )
74
+ \label{eq_1_1_MPV}
75
+ \end{equation}
76
+
77
+ Note the vector $\vec{\beta}$ indicates a range of model parameter values, i.e., the differential equation is solved for each model parameter value $\beta$ separately.
78
+ The model parameter value $\beta$ is a constant and does not depend on the time, but rather it is a user-defined value.
79
+ In other terms, it remains unchanged over the entire timeline for which the dynamical system is solved.
80
+ The difference between $F$ and $F_{\gls{cnmc}}$ is that $F$ is the differential equation for only one $\beta$, while $F_{\gls{cnmc}}$ can be considered as the same differential equation, however, solved, for a range of individual $\beta$ values.
81
+ The subscript \gls{cnmc} stresses that fact that \gls{cnmc} is performed for a range of model parameter values $\vec{\beta}$.
82
+ Some dynamical systems, which will be used for \gls{cnmc}'s validation can be found in section \ref{sec_2_2_Data_Gen}. They are written as a set of differential equations in the $\beta$ dependent form.
83
+ Even a tiny change in $\beta$ can result in the emergence of an entirely different trajectory. \newline
84
+ % The behavior could exhibit such strong alterations, such
85
+ % that one might believe to require new underlying differential equations.
86
+ % These heavy transitions are called bifurcations.\newline
87
+
88
+ % Although bifurcations
89
+ % cause the trajectory to vary seemingly arbitrary,
90
+ % there exist canonical bifurcation types. Explanations for
91
+ % deriving their equations and visualization are well covered in literature
92
+ % and can be found,
93
+ % e.g., in \cite{Argyris2017,Kutz2022,Strogatz2019}. Although a detailed coverage of bifurcations is not feasible within the scope of this thesis, the method of how the trajectory is changed in such a significant way shall be outlined.
94
+ % Namely, bifurcations can replace, remove and generate new attractors, e.g., the above introduced fix-point, limit cycle and torus attractor.
95
+ % Bifurcations were mentioned here only for the sake of completeness. Indeed, one of the final goals for \gls{cnmc} is the extension to handle bifurcations. However, the latter is not part of this thesis.\newline
96
+
97
+ In summary, the following key aspects can be concluded. The reason why \gls{cnmc} in future releases is believed to be able to manage real \gls{cfd} fluid flow data and make predictions for unknown model parameter values $\beta$ is that turbulent flows are chaotic. Thus, allowing \gls{cnmc} to work with chaotic attractors in the course of this thesis is considered to be the first step toward predicting entire flow fields.
98
+ % The second point is that there is no real unified definition of chaos, but there are some aspects that are more prevalent in the literature.
Data/0_Latex_True/1_Task/3_CNM.tex ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ % =================================================
2
+ % ================ Meet \gls{cnm} =======================
3
+ % =================================================
4
+ \section{Cluster-based Network Modeling (CNM)}
5
+ \label{sec_1_1_2_CNM}
6
+ In this subsection, the workflow of \gls{cnm} \cite{Fernex2021} will be elaborated, as well as the previous attempt to expand the algorithm to accommodate a range of model parameter values $\vec{\beta}$.
7
+ \gls{cnm} \cite{Fernex2021} is the basis on which \gls{cnmc} is built or rather
8
+ \gls{cnmc} invokes \gls{cnm} multiple times for one of its preprocessing steps.
9
+ CNM can be split up into 4 main tasks, which are
10
+ data collection, clustering, calculating
11
+ transition properties and propagation.
12
+ The first step is to collect the data, which can be provided from any dynamic system or numerical simulations.
13
+ In this study, only dynamical systems are investigated.
14
+ Once the data for the dynamical system is passed to \gls{cnm}, the data is clustered, e.g., with k-means++ algorithm \cite{Arthur2006}.
15
+ A detailed elaboration about this step is given in section \ref{sec_2_3_Clustering}. \gls{cnm} exploits graph theory for approximating the trajectory as a movement on nodes.
16
+ These nodes are equivalent to the centroids, which are acquired through clustering.
17
+ Next, the motion, i.e., movement from one centroid to another, shall be clarified.\newline
18
+
19
+ In order to fully describe the motion on the centroids, the time at which
20
+ one centroid is visited is exited, and also the order of movement must be known.
21
+ Note, when saying the motion is on the centroids, that
22
+ means the centroids or characteristic nodes do not move
23
+ at all. The entire approximated motion of the original trajectory
24
+ on the nodes is described with the transition
25
+ property matrices $\bm Q$ and $\bm T$.
26
+ The matrices $\bm Q$ and $\bm T$ are the transition probability and transition time matrices, respectively.
27
+ $\bm Q$ is used to apply probability theory for predicting the next following most likely centroid. In other words, if
28
+ the current location is at any node $c_i$,
29
+ $\bm Q$ will provide all possible successor centroids
30
+ with their corresponding transition probabilities.
31
+ Thus, the motion on the centroids
32
+ through $\bm Q$ is probability-based.
33
+ In more detail, the propagation of the motion on the centroids can be described as equation \eqref{eq_34}.
34
+ The variables are denoted as the propagated $\vec{x}(t)$ trajectory, time $t$, centroid positions $\vec{c}_k,\, \vec{c}_j$, the time $t_j$ where centroid $\vec{c}_j$ is left and the transition time $T_{k,j}$ from $\vec{c}_j$ to $\vec{c}_k$ \cite{Fernex2021}.
35
+ Furthermore, for the sake of a smooth trajectory, the motion between the centroids is interpolated through a spline interpolation.\newline
36
+
37
+ \begin{equation}
38
+ \vec{x}(t) = \alpha_{kj} (t) \, \vec{c}_k + [\, 1 - \alpha_{kj} (t)\,] \, \vec{c}_j, \quad \alpha_{kj} (t) = \frac{t-t_j}{T_{k,j}}
39
+ \label{eq_34}
40
+ \end{equation}
41
+
42
+
43
+ The $\bm Q$ matrix only contains non-trivial transitions, i.e.,
44
+ if after a transition the centroid remains on the same centroid, the transition is not considered to be a real transition in \gls{cnm}.
45
+ This idea
46
+ is an advancement to the original work of Kaiser et al. \cite{Kaiser2014}.
47
+ In Kaiser et al. \cite{Kaiser2014} the transition is modeled
48
+ as a Markov model. Markov models enable non-trivial transitions. Consequently,
49
+ the diagonals of the resulting non-direct transition matrix $\bm{Q_n}$
50
+ exhibits the highest values. The diagonal elements stand for non-trivial
51
+ transitions which lead to idling on the same centroid
52
+ many times. Such behavior is encountered and described by Kaiser et al. \cite{Kaiser2014}.\newline
53
+
54
+
55
+ There are 3 more important aspects that come along when
56
+ adhering to Markov models. First, the propagation of motion is done
57
+ by matrix-vector multiplication. In the case of the existence of a
58
+ stationary state, the solution
59
+ will converge to the stationary state, with an increasing number of iterations, where no change with time happens.
60
+ A dynamical system can only survive as long as change with time exists.
61
+ In cases where no change with respect to time is encountered, equilibrium
62
+ or fixed points are found.
63
+ Now, if a stationary state or fixed point
64
+ exists in the considered dynamical system, the propagation
65
+ will tend to converge to this fixed point. However, the nature of
66
+ Markov models must not necessarily be valid for general dynamical systems.
67
+ Another way to see that is by applying some linear algebra. The
68
+ long-term behavior of the Markov transition matrix can be obtained
69
+ with equation \eqref{eq_3_Infinite}. Here, $l$ is the number
70
+ of iterations to get from one stage to another. Kaiser et al.
71
+ \cite{Kaiser2014} depict in a figure, how the values of
72
+ $\bm{Q_n}$ evolves after $1 \mathrm{e}{+3}$ steps. $\bm{Q_n}$ has
73
+ become more uniform.
74
+
75
+ \begin{equation}
76
+ \label{eq_3_Infinite}
77
+ \lim\limits_{l \to \infty} \bm{Q_n}^l
78
+ \end{equation}
79
+
80
+ If the number of steps is increased even further
81
+ and all the rows would have the same probability value,
82
+ $\bm{Q_n}$ would converge to a stationary point. What
83
+ also can be concluded from rows being equal is that it does not matter
84
+ from where the dynamical system was started or what its
85
+ initial conditions were. The probability
86
+ to end at one specific state or centroid is constant as
87
+ the number of steps approaches infinity. Following that,
88
+ it would violate the sensitive dependency on initial conditions,
89
+ which often is considered to be mandatory for modeling chaotic systems. Moreover, chaotic
90
+ systems amplify any perturbation exponentially, whether at time
91
+ $t = 0$ or at time $t>>0$. \newline
92
+
93
+ Thus, a stationary transition matrix $\bm{Q_n}$ is prohibited by chaos at any time step.
94
+ This can be found to be one of the main reasons, why
95
+ the \textbf{C}luster \textbf{M}arkov based \textbf{M}odeling (\gls{cmm})
96
+ often fails to
97
+ predict the trajectory.
98
+ Li et al. \cite{Li2021} summarize this observation
99
+ compactly as after some time the initial condition
100
+ would be forgotten and the asymptotic distribution would be reached.
101
+ Further, they stated, that due to this fact, \gls{cmm} would
102
+ not be suited for modeling dynamical systems.
103
+ The second problem which is involved, when deploying
104
+ regular Markov modeling is that the future only depends
105
+ on the current state. However, \cite{Fernex2021} has shown
106
+ with the latest \gls{cnm} version that incorporating also past
107
+ centroid positions for predicting the next centroid position
108
+ increases the prediction quality. The latter effect is especially
109
+ true when systems are complex.\newline
110
+
111
+
112
+ However, for multiple consecutive time steps
113
+ the trajectories position still could be assigned to the same
114
+ centroid position (trivial transitions).
115
+ Thus, past centroids are those centroids that are found when going
116
+ back in time through only non-trivial transitions. The number of incorporated
117
+ past centroids is given as equation \eqref{eq_5_B_Past}, where $L$ is denoted
118
+ as the model order number. It represents the number of all
119
+ considered centroids, where the current and all the past centroids are included, with which the prediction of the successor centroid
120
+ is made.
121
+
122
+ \begin{equation}
123
+ B_{past} = L -1
124
+ \label{eq_5_B_Past}
125
+ \end{equation}
126
+
127
+ Furthermore, in \cite{Fernex2021} it is not simply believed that an
128
+ increasing model
129
+ order $L$ would increase the outcome quality in every case.
130
+ Therefore, a study on the number of $L$ and the clusters $K$
131
+ was conducted. The results proved that the choice of
132
+ $L$ and $K$ depend on the considered dynamical system.
133
+ \newline
134
+
135
+ The third problem encountered when Markov models are used is
136
+ that the time step must be provided. This time step is used
137
+ to define when a transition is expected. In case
138
+ the time step is too small, some amount of iterations is
139
+ required to transit to the next centroid. Thus, non-trivial
140
+ transitions would occur. In case the time step is too high,
141
+ the intermediate centroids would be missed. Such behavior
142
+ would be a coarse approximation of the real dynamics. Visually this can
143
+ be thought of as jumping from one centroid to another while
144
+ having skipped one or multiple centroids. The reconstructed
145
+ trajectory could lead to an entirely wrong representation of the
146
+ state-space.
147
+ CNM generates the transition time matrix $\bm T$ from data
148
+ and therefore no input from the user is required.\newline
149
+
150
+ A brief review of how the $\bm Q$ is built shall be provided.
151
+ Since the concept of
152
+ model order, $L$ has been explained, it can be clarified that
153
+ it is not always right to call $\bm Q$ and $\bm T$ matrices.
154
+ The latter is only correct, if $L = 1$, otherwise it must be
155
+ denoted as a tensor. $\bm Q$ and $\bm T$ can always be
156
+ referred to as tensors since a tensor incorporates matrices, i.e., a matrix is a tensor of rank 2.
157
+ In order to generate $\bm Q$,
158
+ $L$ must be defined, such that the shape of $\bm Q$ is
159
+ known. The next step is to gather all sequences of clusters
160
+ $c_i$. To understand that, we imagine the following scenario,
161
+ $L = 3$, which means 2 centroids from the past and the
162
+ current one are
163
+ incorporated to predict the next centroid.
164
+ Furthermore, imagining that two cluster sequence scenarios were found,
165
+ $c_0 \rightarrow c_1 \rightarrow c_2 $ and $c_5 \rightarrow c_1 \rightarrow c_2 $.
166
+ These cluster sequences tell us that the current centroid is $c_2$ and the remaining centroids belong to the past.
167
+ In order to complete the sequence for $L = 3$, the successor cluster also needs
168
+ to be added, $c_0 \rightarrow c_1 \rightarrow c_2 \rightarrow c_5 $ and $c_5 \rightarrow c_1 \rightarrow c_2 \rightarrow c_4$.
169
+ The following step is to calculate the likelihood
170
+ of a transition to a specific successor cluster. This is done with equation \eqref{eq_4_Poss}, where $n_{k, \bm{j}}$
171
+ is the amount of complete sequences, where also the successor
172
+ is found. The index $j$ is written as a vector in order
173
+ to generalize the equation for $L \ge 1$. It then contains
174
+ all incorporated centroids from the past and the current centroid.
175
+ The index $k$ represents the successor centroid ($\bm{j} \rightarrow k$).
176
+ Finally, $n_{\bm{j}}$ counts all the matching incomplete sequences.
177
+
178
+ \begin{equation}
179
+ \label{eq_4_Poss}
180
+ P_{k, \bm j} = \frac{n_{k,\bm{j}}}{n_{\bm{j}}}
181
+ \end{equation}
182
+
183
+ After having collected all the possible complete cluster sequences with their corresponding probabilities $\bm Q$, the transition time tensors $\bm T$ can be inferred from the data.
184
+ With that, the residence time on each cluster is known and can be
185
+ used for computing the transition times for every
186
+ single transition. At this stage, it shall be highlighted again,
187
+ CNM approximates its data fully with only two
188
+ matrices or when $L \ge 2$ tensors, $\bm Q$ and $\bm T$. The
189
+ final step is the prorogation following equation \eqref{eq_34}.
190
+ For smoothing the propagation between two centroids the B-spline interpolation
191
+ is applied.
192
+
193
+ % It can be concluded that one of the major differences between \gls{cnm} and \gls{cmm} is that {cnm} dismissed Markov modeling.
194
+ % Hence, only direct or non-trivial transition are possible.
195
+ % Fernex et al. \cite{Fernex2021} improved \cite{Li2021} by
196
+ % rejecting one more property of Markov chains, namely
197
+ % that the future state could be inferred exclusively from the current state.
198
+ % Through the upgrade of \cite{Fernex2021}, incorporating past states for the prediction of future states could be exploited.
Data/0_Latex_True/1_Task/4_CNMc.tex ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ \subsection{First version of CNMc}
2
+ \label{subsec_1_1_3_first_CNMc}
3
+ Apart from this thesis, there already has been an
4
+ attempt to build \glsfirst{cnmc}.
5
+ The procedure, progress and results of the most recent effort are described in \cite{Max2021}.
6
+ Also, in the latter, the main idea was to predict the trajectories
7
+ for dynamical systems with a control term or a model parameter value $\beta$.
8
+ In this subsection, a review of
9
+ \cite{Max2021} shall be given with pointing out which parts need to be improved. In addition, some distinctions between the previous version of \gls{cnmc} and the most recent version are named.
10
+ Further applied modifications are provided in chapter \ref{chap_2_Methodlogy}.\newline
11
+
12
+ To avoid confusion between the \gls{cnmc} version described in this thesis and the prior \gls{cnmc} version, the old version will be referred to as \emph{first CNMc}.
13
+ \emph{First CNMc} starts by defining a range of model parameter values
14
+ $\vec{\beta}$.
15
+ It was specifically designed to only be able to make predictions for the Lorenz attractor \cite{lorenz1963deterministic}, which is described with the set of equations \eqref{eq_6_Lorenz} given in section \ref{sec_2_2_Data_Gen}.
16
+ An illustrative trajectory is of the Lorenz system \cite{lorenz1963deterministic} with $\beta = 28$ is depicted in figure \ref{fig_2_Lorenz_Example}.\newline
17
+ %
18
+ % ==============================================================================
19
+ % ============================ PLTS ============================================
20
+ % ==============================================================================
21
+ \begin{figure}[!h]
22
+ \centering
23
+ \includegraphics[width =\textwidth]
24
+ % In order to insert an eps file - Only_File_Name (Without file extension)
25
+ {2_Figures/1_Task/2_Lorenz.pdf}
26
+ \caption{Illustrative trajectory of the Lorenz attractor \cite{lorenz1963deterministic}, $\beta = 28$}
27
+ \label{fig_2_Lorenz_Example}
28
+ \end{figure}
29
+ %
30
+
31
+ Having chosen a range of model parameter values $\vec{\beta}$, the Lorenz system was solved numerically and its solution was supplied to \gls{cnm} in order to run k-means++ on all received trajectories.
32
+ % It assigns each data point to a cluster and
33
+ % calculates all the $K$ cluster centroids for all provided trajectories.
34
+ % Each cluster has an identity that in literature is known as a label, with which it can be accessed.
35
+ The centroid label allocation by the k-means+ algorithm is conducted randomly.
36
+ Thus, linking or matching centroid labels from one model parameter value $\beta_i$ to another model parameter value $\beta_j$, where $i \neq j$, is performed in 3 steps.
37
+ The first two steps are ordering the $\vec{\beta}$ in ascending
38
+ order and transforming the Cartesian coordinate system into a spherical coordinate system.
39
+ With the now available azimuth angle, each centroid is labeled in increasing order of the azimuth angle.
40
+ The third step is to match the centroids across $\vec{\beta}$, i.e., $\beta_i$ with $\beta_j$.
41
+ For this purpose, the centroid label from the prior model parameter value
42
+ is used as a reference to match its corresponding nearest centroid in the next model parameter value.
43
+ As a result, one label can be assigned to one centroid across the available $\vec{\beta}$.\newline
44
+
45
+
46
+ Firstly, \cite{Max2021} showed that ambiguous regions can
47
+ occur. Here the matching of the centroids across the $\vec{\beta}$ can
48
+ not be trusted anymore.
49
+ Secondly, the deployed coordinate transformation is assumed to only work properly in 3 dimensions. There is the possibility to set one
50
+ or two variables to zero in order to use it in two or one dimension, respectively.
51
+ However, it is not known, whether such an artificially decrease of dimensions yields a successful outcome for lower-dimensional (2- and 1-dimensional) dynamical systems. In the event of a 4-dimensional or even higher dimensional case, the proposed coordinate transformation cannot be used anymore.
52
+ In conclusion, the transformation is only secure to be utilized in 3 dimensions.
53
+ Thirdly, which is also acknowledged by \cite[]{Max2021} is that the
54
+ coordinate transformation forces the dynamical system to have
55
+ a circular-like trajectory, e.g., as the in figure \ref{fig_2_Lorenz_Example} depicted Lorenz system does.
56
+ Since not every dynamical system is forced to have a circular-like trajectory, it is one of the major parts which needs to be improved, when \emph{first CNMc} is meant to be leveraged for all kinds of dynamical systems.
57
+ Neither the number of dimensions nor the shape of the trajectory should matter for a generalized \gls{cnmc}.\newline
58
+
59
+
60
+ Once the centroids are matched across all the available $\vec{\beta}$ pySINDy \cite{Brunton2016,Silva2020, Kaptanoglu2022} is used
61
+ to build a regression model. This regression model serves the purpose
62
+ of capturing all centroid positions of the calculated model parameter
63
+ values $\vec{\beta }$ and making predictions for unseen $\vec{\beta}_{unseen}$.
64
+ Next, a preprocessing step is performed on the
65
+ transition property tensors $\bm Q$ and $\bm T$. Both are
66
+ scaled, such that the risk of a bias is assumed to be reduced.
67
+ Then, on both \glsfirst{nmf} \cite{Lee1999} is
68
+ applied.
69
+ Following equation \eqref{eq_5_NMF} \gls{nmf} \cite{Lee1999} returns
70
+ two matrices, i.e., $\bm W$ and $\bm H$.
71
+ The matrices exhibit a physically
72
+ relevant meaning. $\bm W$ corresponds to a mode collection and $\bm H$ contains
73
+ the weighting factor for each corresponding mode.\newline
74
+
75
+ \begin{equation}
76
+ \label{eq_5_NMF}
77
+ \bm {A_{i \mu}} \approx \bm A^{\prime}_{i \mu} = (\bm W \bm H)_{i \mu} = \sum_{a = 1}^{r}
78
+ \bm W_{ia} \bm H_{a \mu}
79
+ \end{equation}
80
+
81
+ The number of modes $r$ depends on the underlying dynamical system.
82
+ Firstly, the \gls{nmf} is utilized by deploying optimization.
83
+ The goal is to satisfy the condition that, the deviation between the original matrix and the approximated matrix shall be below a chosen threshold.
84
+ For this purpose, the number of required optimization iterations easily can be
85
+ in the order of $\mathcal{O} (1 \mathrm{e}+7)$. The major drawback here is that such a high number of iterations is computationally very expensive.
86
+ Secondly, for \emph{first CNMc} the number of modes $r$ must be known beforehand.
87
+ Since in most cases this demand cannot be fulfilled two issues arise.
88
+ On the one hand, running \gls{nmf} on a single known $r$ can already be considered to be computationally expensive.
89
+ On the other hand, conducting a study to find the appropriate $r$ involves even more computational effort.
90
+ Pierzyna \cite[]{Max2021} acknowledges this issue and defined it to be one of the major limitations. \newline
91
+
92
+
93
+ The next step is to generate a regression model with \glsfirst{rf}.
94
+ Some introductory words about \gls{rf} are given in subsection \ref{subsec_2_4_2_QT}.
95
+ As illustrated in \cite{Max2021}, \gls{rf} was able to reproduce the training data reasonably well.
96
+ However, it faced difficulties to approximate spike-like curves.
97
+ Once the centroid positions and the two transitions property tensors $\bm Q$ and $\bm T$ are known, they are passed to \gls{cnm} to calculate the predicted trajectories.
98
+ For assessing the prediction quality two methods are used, i.e., the autocorrelation and the \glsfirst{cpd}.
99
+ \gls{cpd} outlines the probability of being on one of the $K$ clusters.
100
+ The autocorrelation given in equation \eqref{eq_35} allows comparing two trajectories with a phase-mismatch \cite{protas2015optimal} and it measures how well a point in trajectory correlates with a point that is some time steps ahead.
101
+ The variables in equation \eqref{eq_35} are denoted as time lag $\tau$, state space vector $\bm x$, time $t$ and the inner product $(\bm x, \bm y) = \bm x \cdot \bm{y}^T$. \newline
102
+
103
+ \begin{equation}
104
+ R(\tau) = \frac{1}{T - \tau} \int\limits_{0}^{T-\tau}\, (\bm{x} (t), \bm{x}(t+ \tau)) dt, \quad \tau \in [\, 0, \, T\,]
105
+ \label{eq_35}
106
+ \end{equation}
107
+
108
+ \emph{First CNMc} proved to work well for the Lorenz system only for the number of centroids up to $K=10$ and small $\beta$.
109
+ Among the points which need to be improved is the method to match the centroids across the chosen $\vec{\beta}$.
110
+ Because of this, two of the major problems occur, i.e., the limitation to 3 dimensions and the behavior of the trajectory must be circular, similar to the Lorenz system \cite{lorenz1963deterministic}.
111
+ These demands are the main obstacles to the application of \emph{first CNMc} to all kinds of dynamical systems.
112
+ The modal decomposition with \gls{nmf} is the most computationally intensive part and should be replaced by a faster alternative.
113
+
114
+
Data/0_Latex_True/2_Task/0_Methodlogy.tex ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ \chapter{Methodology}
2
+ \label{chap_2_Methodlogy}
3
+ In this chapter, the entire pipeline for designing the proposed
4
+ \gls{cnmc} is elaborated. For this purpose, the ideas behind
5
+ the individual processes are explained.
6
+ Results from the step tracking onwards will be presented in chapter \ref{ch_3}.
7
+ Having said that, \gls{cnmc} consists of multiple main process steps or stages.
8
+ First, a broad overview of the \gls{cnmc}'s workflow shall be given.
9
+ Followed by a detailed explanation for each major operational step. The
10
+ implemented process stages are presented in the same order as they are
11
+ executed in \gls{cnmc}. However, \gls{cnmc} is not forced
12
+ to go through each stage. If the output of some steps is
13
+ already available, the execution of the respective steps can be skipped. \newline
14
+
15
+ The main idea behind such an implementation is to prevent computing the same task multiple times.
16
+ Computational time can be reduced if the output of some \gls{cnmc} steps are available.
17
+ Consequently, it allows users to be flexible in their explorations.
18
+ It could be the case that only one step of \textsc{CNMc} is desired to be examined with different settings or even with newly implemented functions without running the full \gls{cnmc} pipeline.
19
+ Let the one \gls{cnmc} step be denoted as C, then it is possible to skip steps A and B if their output is already calculated and thus available.
20
+ Also, the upcoming steps can be skipped or activated depending on the need for their respective outcomes.
21
+ Simply put, the mentioned flexibility enables to load data for A and B and execute only C. Executing follow-up steps or loading their data is also made selectable.
22
+ %
23
+ %------------------------------- SHIFT FROM INTRODUCTION ----------------------
24
+ %
25
+ Since the tasks of this thesis required much coding,
26
+ it is important to
27
+ mention the used programming language and the dependencies.
28
+ As for the programming language,
29
+ \emph{Python 3} \cite{VanRossum2009} was chosen. For the libraries, only a few important libraries will be mentioned, because the number of used libraries is high. Note, each used module is
30
+ freely available on the net and no licenses are required to be purchased.
31
+ \newline
32
+
33
+ The important libraries in terms of performing actual calculations are
34
+ \emph{NumPy} \cite{harris2020array}, \emph{SciPy} \cite{2020SciPy-NMeth}, \emph{Scikit-learn} \cite{scikit-learn}, \emph{pySindy} \cite{Silva2020, Kaptanoglu2022}, for multi-dimensional sparse matrix management \emph{sparse} and for plotting only \emph{plotly} \cite{plotly} was deployed. One of the reason why \emph{plotly} is preferred over \emph{Matplotlib} \cite{Hunter:2007} are post-processing capabilities, which now a re available. Note, the previous \emph{\gls{cmm}c} version used \emph{Matplotlib} \cite{Hunter:2007}, which in this work has been fully replaced by \emph{plotly} \cite{plotly}. More reasons why this modification is useful and new implemented post-processing capabilities will be given in the upcoming sections.\newline
35
+
36
+ For local coding, the author's Linux-Mint-based laptop with the following hardware was deployed: CPU: Intel Core i7-4702MQ \gls{cpu}@ 2.20GHz × 4, RAM: 16GB.
37
+ The Institute of fluid dynamics of the Technische Universität Braunschweig
38
+ also supported this work by providing two more powerful computation resources.
39
+ The hardware specification will not be mentioned, due to the fact, that all computations and results elaborated in this thesis can be obtained by
40
+ the hardware described above (authors laptop). However, the two provided
41
+ resources shall be mentioned and explained if \gls{cnmc} benefits from
42
+ faster computers. The first bigger machine is called \emph{Buran}, it is a
43
+ powerful Linux-based working station and access to it is directly provided by
44
+ the chair of fluid dynamics. \newline
45
+
46
+ The second resource is the high-performance
47
+ computer or cluster available across the Technische Universität Braunschweig
48
+ \emph{Phoenix}. The first step, where the dynamical systems are solved through an \gls{ode} solver
49
+ is written in a parallel manner. This step can if specified in the \emph{settings.py} file, be performed in parallel and thus benefits from
50
+ multiple available cores. However, most implemented \gls{ode}s are solved within
51
+ a few seconds. There are also some dynamical systems implemented whose
52
+ ODE solution can take a few minutes. Applying \gls{cnmc} on latter dynamical
53
+ systems results in solving their \gls{ode}s for multiple different model parameter values. Thus, deploying the parallelization can be advised in the latter mentioned time-consuming \gls{ode}s.\newline
54
+
55
+ By far the most time-intensive part of the improved \gls{cnmc} is the clustering step. The main computation for this step is done with
56
+ {Scikit-learn} \cite{scikit-learn}. It is heavily parallelized and the
57
+ computation time can be reduced drastically when multiple threads are available.
58
+ Other than that, \emph{NumPy} and \emph{SciPy} are well-optimized libraries and
59
+ are assumed to benefit from powerful computers. In summary, it shall be stated that a powerful machine is for sure advised when multiple dynamical
60
+ systems with a range of different settings shall be investigated since parallelization is available. Yet executing \gls{cnmc} on a single dynamical system, a regular laptop can be regarded as
61
+ a sufficient tool.
62
+
63
+ %------------------------------- SHIFT FROM INTRODUCTION ----------------------
64
+
65
+ % =====================================================================
66
+ % ============= Workflow ==============================================
67
+ % =====================================================================
68
+ \section{CNMc's data and workflow}
69
+ \label{sec_2_1_Workflow}
70
+ In this section, the 5 main points that characterize \gls{cnmc} will be discussed.
71
+ Before diving directly into \gls{cnmc}'s workflow some remarks
72
+ are important to be made.
73
+ First, \gls{cnmc} is written from scratch, it is not simply an updated version of the described \emph{first CNMc} in subsection
74
+ \ref{subsec_1_1_3_first_CNMc}.
75
+ Therefore, the workflow described in this section for \gls{cnmc} will not match that of \emph{first CNMc}, e.g., \emph{first CNMc} had no concept of \emph{settings.py} and it was not utilizing \emph{Plotly} \cite{plotly} to facilitate post-processing capabilities.
76
+ The reasons for a fresh start were given in subsection \ref{subsec_1_1_3_first_CNMc}.
77
+ However, the difficulty of running \emph{first CNMc} and the time required to adjust \emph{first CNMc} such that a generic dynamic system could be utilized were considered more time-consuming than starting from zero. \newline
78
+
79
+ Second, the reader is reminded to have the following in mind.
80
+ Although it is called pipeline or workflow, \gls{cnmc} is not obliged to run the whole workflow. With \emph{settings.py} file, which will be explained below, it is possible to run only specific selected tasks.
81
+ The very broad concept of \gls{cnmc} was already provided at the beginning of chapter \ref{chap_1_Intro}.
82
+ However, instead of providing data of dynamical systems for different model parameter values, the user defines a so-called \emph{settings.py} file and executes \gls{cnmc}.
83
+ The outcome of \gls{cnmc} consists, very broadly, of the predicted trajectories and some accuracy measurements as depicted in figure
84
+ \ref{fig_1_CNMC_Workflow}.
85
+ In the following, a more in-depth view shall be given.\newline
86
+
87
+
88
+ The extension of \emph{settings.py} is a regular \emph{Python} file. However, it is a dictionary, thus there is no need to acquire and have specific knowledge about \emph{Python}.
89
+ The syntax of \emph{Python's} dictionary is quite similar to that of the \emph{JSON} dictionary, in that the setting name is supplied within a quote mark
90
+ and the argument is stated after a colon. In order to understand the main points of \gls{cnmc}, its main data and workflow are depicted \ref{fig_3_Workflow} as an XDSM diagram \cite{Lambe2012}.
91
+ \newline
92
+
93
+ % ============================================
94
+ % ================ 2nd Workflow ==============
95
+ % ============================================
96
+ \begin{sidewaysfigure} [!]
97
+ \hspace*{-2cm}
98
+ \resizebox{\textwidth}{!}{
99
+ \input{2_Figures/2_Task/0_Workflow.tikz}
100
+ }
101
+ \caption{\gls{cnmc} general workflow overview}
102
+ \label{fig_3_Workflow}
103
+ \end{sidewaysfigure}
104
+
105
+ The first action for executing \gls{cnmc} is to define \emph{settings.py}. It contains descriptive information about the entire pipeline, e.g., which dynamical system to use, which model parameters to select for training, which for testing, which method to use for modal decomposition and mode regression.
106
+ To be precise, it contains all the configuration attributes of all the 5 main \gls{cnmc} steps and some other handy extra functions. It is written in
107
+ a very clear way such that settings to the corresponding stages of \gls{cnmc}
108
+ and the extra features can be distinguished at first glance.
109
+ First, there are separate dictionaries for each of the 5 steps to ensure that the desired settings are made where they are needed.
110
+ Second, instead of regular line breaks, multiline comment blocks with the stage names in the center are used.
111
+ Third, almost every \emph{settings.py} attribute is explained with comments.
112
+ Fourth, there are some cases, where
113
+ a specific attribute needs to be reused in other steps.
114
+ The user is not required to adapt it manually for all its occurrences, but rather to change it only on the first occasion, where the considered function is defined.
115
+ \emph{Python} will automatically ensure that all remaining steps receive the change correctly.
116
+ Other capabilities implemented in \emph{settings.py} are mentioned when they are actively exploited.
117
+ In figure \ref{fig_3_Workflow} it can be observed that after passing \emph{settings.py} a so-called \emph{Informer} and a log file are obtained.
118
+ The \emph{Informer} is a file, which is designed to save all user-defined settings in \emph{settings.py} for each execution of \gls{cnmc}.
119
+ Also, here the usability and readability of the output are important and have been formatted accordingly. It proves to be particularly useful when a dynamic system with different settings is to be calculated, e.g., to observe the influence of one or multiple parameters. \newline
120
+
121
+ One of the important attributes which
122
+ can be arbitrarily defined by the user in \emph{settings.py} and thus re-found in the \emph{Informer} is the name of the model.
123
+ In \gls{cnmc} multiple dynamical systems are implemented, which can be chosen by simply changing one attribute in \emph{settings.py}.
124
+ Different models could be calculated with the same settings, thus this clear and fast possibility to distinguish between multiple calculations is required.
125
+ The name of the model is not only be saved in the \emph{Informer} but it will
126
+ be used to generate a folder, where all of \gls{cnmc} output for this single
127
+ \gls{cnmc} workflow will be stored.
128
+ The latter should contribute to on the one hand that the \gls{cnmc} models can be easily distinguished from each other and on the other hand that all results of one model are obtained in a structured way.
129
+ \newline
130
+
131
+ When executing \gls{cnmc} many terminal outputs are displayed. This allows the user to be kept up to date on the current progress on the one hand and to see important results directly on the other.
132
+ In case of unsatisfying results, \gls{cnmc} could be aborted immediately, instead of having to compute the entire workflow. In other words, if a computation expensive \gls{cnmc} task shall be performed, knowing about possible issues in the first steps can
133
+ be regarded as a time-saving mechanism.
134
+ The terminal outputs are formatted to include the date, time, type of message, the message itself and the place in the code where the message can be found.
135
+ The terminal outputs are colored depending on the type of the message, e.g., green is used for successful computations.
136
+ Colored terminal outputs are applied for the sake of readability.
137
+ More relevant outputs can easily be distinguished from others.
138
+ The log file can be considered as a memory since, in it, the terminal outputs are saved.\newline
139
+
140
+ The stored terminal outputs are in the format as the terminal output described above, except that no coloring is utilized.
141
+ An instance, where the log file can be very helpful is the following. Some implemented quality measurements give very significant information about prediction reliability. Comparing different settings in terms of prediction capability would become very challenging if the terminal outputs would be lost whenever the \gls{cnmc} terminal is closed. The described \emph{Informer} and the log file can be beneficial as explained, nevertheless, they are optional.
142
+ That is, both come as two of the extra features mentioned above and can be turned off in \emph{settings.py}.\newline
143
+
144
+ Once \emph{settings.py} is defined, \gls{cnmc} will filter the provided input, adapt the settings if required and send the corresponding parts to their respective steps.
145
+ The sending of the correct settings is depicted in figure \ref{fig_3_Workflow}, where the abbreviation \emph{st} stands for settings.
146
+ The second abbreviation \emph{SOP} is found for all 5 stages and denotes storing output and plots. All the outcome is stored in a compressed form such that memory can be saved. All the plots are saved as HTML files. There are many reasons to do so, however, to state the most crucial ones. First, the HTML file can be opened on any operating system.
147
+ In other words, it does not matter if Windows, Linux or Mac is used.
148
+ Second, the big difference to an image is that HTML files can be upgraded with, e.g., CSS, JavaScript and PHP functions.
149
+ Each received HTML plot is equipped with some post-processing features, e.g., zooming, panning and taking screenshots of the modified view. When zooming in or out the axes labels are adapted accordingly. Depending on the position of
150
+ the cursor, a panel with the exact coordinates of one point and other information such as the $\beta $ are made visible. \newline
151
+
152
+ In the same way that data is stored in a compressed format, all HTML files are generated in such a way that additional resources are not written directly into the HTML file, but a link is used so that the required content is obtained via the Internet.
153
+ Other features associated with HTML plots and which data are saved will be explained in their respective section in this chapter.
154
+ The purpose of \gls{cnmc} is to generate a surrogate model with which predictions can be made for unknown model parameter values ${\beta}$.
155
+ For a revision on important terminology as model parameter value $\beta$
156
+ the reader is referred to subsection \ref{subsec_1_1_1_Principles}.
157
+ Usually, in order to obtain a sound predictive model, machine learning methods require a considerable amount of data. Therefore, the \gls{ode} is solved for a set of $\vec{\beta }$. An in-depth explanation for the first is provided in
158
+ section \ref{sec_2_2_Data_Gen}.
159
+ The next step is to cluster all the received trajectories deploying kmeans++ \cite{Arthur2006}. Once this has been done, tracking can take be performed.
160
+ Here the objective is to keep track of the positions of all the centroids when $\beta$ is changed over the whole range of $\vec{\beta }$.
161
+ A more detailed description is given in section \ref{sec_2_3_Tracking}.\newline
162
+
163
+
164
+ The modeling step is divided into two subtasks, which are not displayed as such in figure \ref{fig_3_Workflow}. The first subtask aims to get a model that yields all positions of all the $K$ centroids for an unseen $\beta_{unseen}$, where an unseen $\beta_{unseen}$ is any $\beta$ that was not used to train the model. In the second subtask, multiple tasks are performed.
165
+ First, the regular \gls{cnm} \cite{Fernex2021} shall be applied to all the tracked clusters from the tracking step. For this purpose, the format of the tracked results is adapted in a way such that \gls{cnm} can be executed without having to modify \gls{cnm} itself. By running \gls{cnm} on the tracked data of all $\vec{\beta }$, the transition property tensors $\bm Q$ and $\bm T$ for all $\vec{\beta }$ are received. \newline
166
+
167
+ Second, all the $\bm Q$ and the $\bm T$ tensors are stacked to form $\bm {Q_{stacked}}$ and $\bm {T_{stacked}}$ matrices.
168
+ These stacked matrices are subsequently supplied to one of the two possible implemented modal decomposition methods.
169
+ Third, a regression model for the obtained modes is constructed.
170
+ Clarifications on the modeling stage can be found in section \ref{sec_2_4_Modeling}.\newline
171
+
172
+ The final step is to make the actual predictions for all provided $\beta_{unseen}$ and allow the operator to draw conclusions about the trustworthiness of the predictions.
173
+ For the trustworthiness, among others, the three quality measurement concepts explained in subsection
174
+ \ref{subsec_1_1_3_first_CNMc}
175
+ are leveraged. Namely, comparing the \gls{cnmc} and \gls{cnm} predicted trajectories by overlaying them directly. The two remaining techniques, which were already applied in regular \gls{cnm} \cite{Fernex2021}, are the \glsfirst{cpd} and the autocorrelation.\newline
176
+
177
+ The data and workflow in figure \ref{fig_3_Workflow} do not reveal one additional feature of the implementation of \gls{cnmc}. That is, inside the folder \emph{Inputs} multiple subfolders containing a \emph{settings.py} file, e.g., different dynamical systems, can be inserted to allow a sequential run. In the case of an empty subfolder, \gls{cnmc} will inform the user about that and continue its execution without an error.
178
+ As explained above, each model will have its own folder where the entire output will be stored.
179
+ To switch between the multiple and a single \emph{settings.py} version, the {settings.py} file outside the \emph{Inputs} folder needs to be modified. The argument for that is \emph{multiple\_Settings}.\newline
180
+
181
+ Finally, one more extra feature shall be mentioned. After having computed expensive models, it is not desired to overwrite the log file or any other output.
182
+ To prevent such unwanted events, it is possible to leverage the overwriting attribute in \emph{settings.py}. If overwriting is disabled, \gls{cnmc} would verify whether a folder with the specified model name already exists.
183
+ In the positive case, \gls{cnmc} would initially only propose an alternative model name. Only if the suggested model name would not overwrite any existing folders, the suggestion will be accepted as the new model name.
184
+ Both, whether the model name was chosen in \emph{settings.py} as well the new final replaced model name is going to be printed out in the terminal line.\newline
185
+
186
+ In summary, the data and workflow of \gls{cnmc} are shown in Figure \ref{fig_3_Workflow} and are sufficient for a broad understanding of the main steps.
187
+ However, each of the 5 steps can be invoked individually, without having to run the full pipeline. Through the implementation of \emph{settings.py} \gls{cnmc} is highly flexible. All settings for the steps and the extra features can be managed with \emph{settings.py}.
188
+ A log file containing all terminal outputs as well a summary of chosen settings is stored in a separate file called \emph{Informer} are part of \gls{cnmc}'s tools.
189
+
Data/0_Latex_True/2_Task/1_Data_Gen.tex ADDED
@@ -0,0 +1,263 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ % =====================================================================
2
+ % ============= Data generation =======================================
3
+ % =====================================================================
4
+ \section{Data generation}
5
+ \label{sec_2_2_Data_Gen}
6
+ In this section, the first main step of the 5 steps shall be explained.
7
+ The idea of \gls{cnmc} is to create a surrogate model such that predictions for unseen $\beta_{unseen}$ can be made.
8
+ An unseen model parameter value $\beta_{unseen}$ is defined to be not incorporated in the training data. Generally in machine learning, the more linear independent data is available the higher the trustworthiness of the surrogate model is assumed to be.
9
+ Linear independent data is to be described as data which provide new information.
10
+ Imagining any million times a million data matrix $\bm {A_{n\, x\, n}}$, where $n = 1 \mathrm{e}{+6}$.
11
+ On this big data matrix $\bm A$ a modal decomposition method, e.g., the \glsfirst{svd} \cite{Brunton2019,gerbrands1981relationships}, shall be applied.\newline
12
+
13
+ To reconstruct the original matrix $\bm A$ fully with the decomposed matrices only the non-zero modes are required. The number of the non-zero modes $r$ is often much smaller than the dimension of the original matrix, i.e., $r << n$.
14
+ If $r << n$, the measurement matrix $\bm A$ contains a high number of linear dependent data. This has the advantage of allowing the original size to be reduced. The disadvantage, however, is that $\bm A$ contains duplicated entries (rows, or columns). For this reason, $\bm A$ includes data parts which do not provide any new information.
15
+ In the case of $r = n$ only meaningful observations are comprised and $\bm A$ has full rank.
16
+ Part of feature engineering is to supply the regression model with beneficial training data and filter out redundant copies.
17
+ The drawback of $r = n$ is observed when the number of representative modes is chosen to be smaller than the full dimension $r < n$.
18
+ Consequently, valuable measurements could be lost. \newline
19
+
20
+
21
+ Moreover, if the dimension $n$ is very large, accuracy demands may make working with matrices unavoidable.
22
+ As a result, more powerful computers are required and the computational time is expected to be increased.
23
+ For this work, an attempt is made to represent non-linear differential equations by a surrogate model.
24
+ In addition, trajectories of many $\vec{\beta }$ can be handled quite efficiently.
25
+ Therefore, it attempted to provide sufficient trajectories as training data.
26
+ Having said that the data and workflow of this step, i.e., data generation, shall be described.
27
+ The general overview is depicted in figure \ref{fig_4_Data_Gen}.
28
+ Data generation corresponding settings are passed to its step, which invokes the \gls{ode} solver for the range of selected $\vec{\beta}$.
29
+ The trajectories are plotted and, both, all the obtained trajectories $F_(\vec{\beta})$ and their plots are saved. Note that $\vec{\beta}$ indicates that one differential equation is solved for selected $\beta$ values within a range of model parameter values $\vec{\beta}$.\newline
30
+ %
31
+ % ==============================================
32
+ % ========== Constraint Viol Workflow =========
33
+ % ==============================================
34
+ \begin{figure} [!h]
35
+ \hspace*{-4cm}
36
+ \resizebox{1.2\textwidth}{!}{
37
+ \input{2_Figures/2_Task/1_Data_Gen.tikz}
38
+ }
39
+ \caption{Data and workflow of the first step: Data generation}
40
+ \label{fig_4_Data_Gen}
41
+ \end{figure}
42
+ %
43
+ %
44
+ A detailed description will be given in the following.
45
+ First, in order to run this task, it should be activated in \emph{settings.py}.
46
+ Next, the user may change local output paths, define which kind of plots shall be generated, which dynamical model should be employed and provide the range $\vec{\beta}$.
47
+ As for the first point, the operator can select the path where the output of this specific task shall be stored. Note, that this is an optional attribute. Also, although it was only tested on Linux, the library \emph{pathlib} was applied.
48
+ Therefore, if the output is stored on a Windows or Mac-based operating system, which uses a different path system, no errors are expected.
49
+ \newline
50
+
51
+ Regarding the types of plots, first, for each type of plot, the user is enabled to define if these plots are desired or not. Second, all the plots are saved as HTML files. Some reasons for that were provided at the beginning of this chapter and others which are important for trajectory are the following.
52
+ With in-depth explorations in mind, the user might want to highlight specific regions in order to get detailed and correct information.
53
+ For trajectories, this can be encountered when e.g., coordinates of some points within a specified region shall be obtained. Here zooming, panning, rotation and a panel that writes out additional information about the current location of the cursor can be helpful tools. The first type of plot is the trajectory itself with the initial condition as a dot in the state-space.\newline
54
+
55
+ If desired, arrows pointing in the direction of motion of the trajectory can be included in the plots.
56
+ The trajectory, the initial state sphere and the arrows can be made invisible by one click on the legend if desired. The second type of representation is an animated plot, i.e., each trajectory $ F(\beta)$ is available as the animated motion. The final type of display is one plot that contains all $F(\vec{\beta})$ as a sub-figure.
57
+ The latter type of visualization is a very valuable method to see the impact of $\beta$ across the available $\vec{\beta }$ on the trajectories $ F(\vec{\beta})$.
58
+ Also, it can be leveraged as fast sanity check technique, i.e., if any $F(\beta )$ is from expectation, this can be determined quickly by looking at the stacked trajectory plots.
59
+ \newline
60
+
61
+ If for presentation HTML files are not desired, clicking on a button will provide a \emph{png} image of the current view state of the trajectory. Note, that the button will not be on the picture.
62
+ Finally, modern software, especially coding environments, understood that staring at white monitors is eye-straining. Consequently, dark working environments are set as default. For this reason, all the mentioned types of plots have a toggle implemented.
63
+ It allows switching between a dark default and a white representation mode.\newline
64
+
65
+ For choosing a dynamical system, two possibilities are given.
66
+ On the one hand, one of the 10 incorporated models can be selected by simply selecting a number, which corresponds to an integrated dynamical system.
67
+ On the other hand, a new dynamical system can be implemented.
68
+ This can be achieved without much effort by following the syntax of one of the 10 available models. The main adjustment is done by replacing the \gls{ode}.
69
+ The differential equations of all 10 dynamic systems that can be selected by default are given in equations \eqref{eq_6_Lorenz} to \eqref{eq_13_Insect} and the 3 sets of equations \eqref{eq_8_Chen} to \eqref{eq_14_VDP} are found the Appendix.
70
+ The latter 3 sets of equations are provided in the Appendix because they are not used for validating \gls{cnmc} prediction performance.
71
+ Next to the model's name, the reference to the dynamical system can be seen.
72
+ The variables $a$ and $b$ are constants.
73
+ Except for the Van der Pol, which is given in the Appendix \ref{ch_Ap_Dyna} as equation \eqref{eq_14_VDP}, all dynamical systems are 3-dimensional.\newline
74
+
75
+ % ==============================================================================
76
+ % ============================ EQUATIONS =======================================
77
+ % ==============================================================================
78
+ \begin{enumerate}
79
+ \item \textbf{Lorenz} \cite{lorenz1963deterministic}:
80
+ \begin{equation}
81
+ \label{eq_6_Lorenz}
82
+ \begin{aligned}
83
+ \dot x &= a\, (y - x) \\
84
+ \dot y &= x\, (\beta - z -y) \\
85
+ \dot z &= x y -\beta z
86
+ \end{aligned}
87
+ \end{equation}
88
+
89
+ \item \textbf{Rössler} \cite{Roessler1976}:
90
+ \begin{equation}
91
+ \label{eq_7_Ross}
92
+ \begin{aligned}
93
+ \dot x &= -y -z \\
94
+ \dot y &= x + ay \\
95
+ \dot z &= b +z \, (x-\beta)\\
96
+ \end{aligned}
97
+ \end{equation}
98
+
99
+
100
+
101
+ \item \textbf{Two Scroll} \cite{TwoScroll}:
102
+ \begin{equation}
103
+ \label{eq_9_2_Scroll}
104
+ \begin{aligned}
105
+ \dot x &= \beta \, (y-x) \\
106
+ \dot y &= x z \\
107
+ \dot z &= a - by^4
108
+ \end{aligned}
109
+ \end{equation}
110
+
111
+ \item \textbf{Four Wing} \cite{FourWing}:
112
+ \begin{equation}
113
+ \label{eq_10_4_Wing}
114
+ \begin{aligned}
115
+ \dot x &= \beta x +y +yz\\
116
+ \dot y &= yz - xz \\
117
+ \dot z &= a + bxy -z
118
+ \end{aligned}
119
+ \end{equation}
120
+
121
+ \item \textbf{Sprott\_V\_1} \cite{sprott2020we}:
122
+ \begin{equation}
123
+ \label{eq_11_Sprott_V_1}
124
+ \begin{aligned}
125
+ \dot x &= y \\
126
+ \dot y &= -x - sign(z)\,y\\
127
+ \dot z &= y^2 - exp(-x^2) \, \beta
128
+ \end{aligned}
129
+ \end{equation}
130
+
131
+
132
+ \item \textbf{Tornado} \cite{sprott2020we}:
133
+ \begin{equation}
134
+ \label{eq_12_Tornado}
135
+ \begin{aligned}
136
+ \dot x &= y \, \beta \\
137
+ \dot y &= -x - sign(z)\,y\\
138
+ \dot z &= y^2 - exp(-x^2)
139
+ \end{aligned}
140
+ \end{equation}
141
+
142
+
143
+ \item \textbf{Insect} \cite{sprott2020we}:
144
+ \begin{equation}
145
+ \label{eq_13_Insect}
146
+ \begin{aligned}
147
+ \dot x &= y \\\
148
+ \dot y &= -x - sign(z)\,y \, \beta\\
149
+ \dot z &= y^2 - exp(-x^2)
150
+ \end{aligned}
151
+ \end{equation}
152
+
153
+
154
+ \end{enumerate}
155
+ % ==============================================================================
156
+ % ============================ EQUATIONS =======================================
157
+ % ==============================================================================
158
+
159
+ Sprott\_V\_1, Tornado and Insect in equations \eqref{eq_11_Sprott_V_1} to \eqref{eq_13_Insect} are not present in the cited reference \cite{sprott2020we} in this expressed form.
160
+ The reason is that the introduced equations are a modification of the chaotic attractor proposed in \cite{sprott2020we}. The curious reader is invited to read \cite{sprott2020we} and to be convinced about the unique properties.
161
+ The given names are made up and serve to distinguish them.
162
+ Upon closer inspection, it becomes clear that they differ only in the place where $\beta$ is added.
163
+ All 3 models are highly sensitive to $\beta $, i.e., a small change in $\beta $ results in bifurcations.
164
+ For follow-up improvements of \gls{cnmc}, these 3 systems can be applied as performance benchmarks for bifurcation prediction capability.\newline
165
+
166
+ Showing the trajectories of all 10 models with different $\vec{\beta} $ would claim too much many pages. Therefore, for demonstration purposes the
167
+ 3 above-mentioned models, i.e., Sprott\_V\_1, Tornado and Insect are
168
+ displayed in figures \ref{fig_5_Sprott} to \ref{fig_11_Insect}.
169
+ Figure \ref{fig_5_Sprott} depicts the dynamical system Sprott\_V\_1 \eqref{eq_11_Sprott_V_1} with $\beta =9$.
170
+ Figures \ref{fig_6_Tornado} to \ref{fig_8_Tornado} presents the Tornado
171
+ \eqref{eq_12_Tornado}
172
+ with $\beta =16.78$ with 3 different camera perspectives.
173
+ Observing these figures, the reader might recognize why the name Tornado was chosen. The final 3 figures \ref{fig_9_Insect} to \ref{fig_11_Insect} display the Insect \eqref{eq_13_Insect} with $\beta =7$ for 3 different perspectives.
174
+ Other default models will be displayed in subsection \ref{subsec_3_5_2_Models}, as they were used for performing benchmarks. \newline
175
+
176
+ % ==============================================================================
177
+ % ============================ PLTS ============================================
178
+ % ==============================================================================
179
+ \begin{figure}[!h]
180
+ \centering
181
+ \includegraphics[width =\textwidth]
182
+ % In order to insert an eps file - Only_File_Name (Without file extension)
183
+ {2_Figures/2_Task/2_Sprott_V1.pdf}
184
+ \caption{Default model: Sprott\_V\_1 \eqref{eq_11_Sprott_V_1} with $\beta =9$}
185
+ \label{fig_5_Sprott}
186
+ \end{figure}
187
+
188
+
189
+
190
+ \begin{figure}[!h]
191
+ %\vspace{0.5cm}
192
+ \begin{minipage}[h]{0.47\textwidth}
193
+ \centering
194
+ \includegraphics[width =\textwidth]{2_Figures/2_Task/3_Tornado.pdf}
195
+ \caption{Default model: Tornado \eqref{eq_12_Tornado} with $\beta =16.78$, view: 1}
196
+ \label{fig_6_Tornado}
197
+ \end{minipage}
198
+ \hfill
199
+ \begin{minipage}{0.47\textwidth}
200
+ \centering
201
+ \includegraphics[width =\textwidth]{2_Figures/2_Task/4_Tornado.pdf}
202
+ \caption{Default model: Tornado \eqref{eq_12_Tornado} with $\beta =16.78$, view: 2}
203
+ \label{fig_7_Tornado}
204
+ \end{minipage}
205
+ \end{figure}
206
+
207
+ \begin{figure}[!h]
208
+ \begin{minipage}[h]{0.47\textwidth}
209
+ \centering
210
+ \includegraphics[width =\textwidth]{2_Figures/2_Task/5_Tornado.pdf}
211
+ \caption{Default model: Tornado \eqref{eq_12_Tornado} with $\beta =16.78$, view: 3}
212
+ \label{fig_8_Tornado}
213
+ \end{minipage}
214
+ \hfill
215
+ \begin{minipage}{0.47\textwidth}
216
+ \centering
217
+ \includegraphics[width =\textwidth]{2_Figures/2_Task/6_Insect.pdf}
218
+ \caption{Default model: Insect \eqref{eq_13_Insect} with $\beta =7$, view: 1}
219
+ \label{fig_9_Insect}
220
+ \end{minipage}
221
+ \end{figure}
222
+
223
+ \begin{figure}[!h]
224
+ \begin{minipage}[h]{0.47\textwidth}
225
+ \centering
226
+ \includegraphics[width =\textwidth]{2_Figures/2_Task/7_Insect.pdf}
227
+ \caption{Default model: Insect \eqref{eq_13_Insect} with $\beta =7$, view: 2}
228
+ \label{fig_10_Insect}
229
+ \end{minipage}
230
+ \hfill
231
+ \begin{minipage}{0.47\textwidth}
232
+ \centering
233
+ \includegraphics[width =\textwidth]{2_Figures/2_Task/8_Insect.pdf}
234
+ \caption{Default model: Insect \eqref{eq_13_Insect} with $\beta =7$, view: 3}
235
+ \label{fig_11_Insect}
236
+ \end{minipage}
237
+ \end{figure}
238
+ % ==============================================================================
239
+ % ============================ PLTS ============================================
240
+ % ==============================================================================
241
+ \newpage
242
+ Having selected a dynamical system, the model parameter values for which the system shall be solved must be specified in \emph{settings.py}.
243
+ With the known range $\vec{\beta}$ the problem can be described, as already mentioned in subsection \ref{subsec_1_1_1_Principles}, with equation \eqref{eq_1_1_MPV}.
244
+
245
+ \begin{equation}
246
+ F_{\gls{cnmc}} = \left(\dot{\vec{x}}(t), \, \vec{\beta} \right) =
247
+ \left( \frac{\vec{x}(t)}{dt}, \, \vec{\beta} \right) =
248
+ f(\vec{x}(t), \, \vec{\beta} )
249
+ \tag{\ref{eq_1_1_MPV}}
250
+ \end{equation}
251
+
252
+ The solution to \eqref{eq_1_1_MPV} is obtained numerically by applying \emph{SciPy's RK45} \gls{ode} solver. If desired \gls{cnmc} allows completing this task in parallel. Additional notes on executing this task in parallel are given in section \ref{sec_Motivation}. The main reason for relying on \emph{RK45} is that it is commonly known to be a reliable option.
253
+ Also, in \cite{Butt2021} \emph{RK45} was directly compared with \emph{LSODA}. The outcome was that \emph{LSODA} was slightly better, however, the deviation between \emph{RK45's} and \emph{LSODA's} performance was found to be negligible.
254
+ In other words, both solvers fulfilled the accuracy demands.
255
+ Since chaotic systems are known for their \glsfirst{sdic} any deviation, even in the $\mathcal{O} (1 \mathrm{e}{-15})$, will be amplified approximately exponentially and finally will become unacceptably high.
256
+ Therefore, it was tested, whether the \emph{RK45} solver would allow statistical variations during the solution process.
257
+ For this purpose, the Lorenz system \cite{lorenz1963deterministic} was solved multiple times with different time ranges. The outcome is that \emph{RK45} has no built-in statistical variation.
258
+ Simply put, the trajectory of the Lorenz system for one constant $\beta $ will not differ when solved multiple times on one computer.\newline
259
+
260
+
261
+ Comparing \emph{first CNMc} and \gls{cnmc} the key takeaways are that \gls{cnmc} has 10 in-built dynamical systems.
262
+ However, desiring to implement a new model is also achieved in a way that is considered relatively straightforward.
263
+ Important settings, such as the model itself, the $\vec{\beta }$, plotting and storing outcome can be managed with the \emph{settings.py}. The plots are generated and stored such that post-processing capabilities are supplied.
Data/0_Latex_True/2_Task/2_Clustering.tex ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ % ==============================================================================
2
+ % =========================== Clustering =======================================
3
+ % ==============================================================================
4
+ \section{Clustering}
5
+ \label{sec_2_3_Clustering}
6
+ In this section, the second step, the clustering of all trajectories $(\vec{\beta})$, is explained.
7
+ The main idea is to represent $F(\vec{\beta})$ through movement on centroids.
8
+ The data and workflow of clustering are very similar to the previous step of the data generation. It can be comprehended with figure \ref{fig_12_Clust}.
9
+ All settings for this step can be individually configured in \emph{settings.py}.
10
+ The $ F(\vec{\beta})$ and cluster-algorithm specific parameters are filtered and provided to the clustering algorithm. The solutions are plotted and both, the plots and the clustered output are saved.\newline
11
+
12
+ % ==============================================
13
+ % ========== Clustering Workflow ===============
14
+ % ==============================================
15
+ \begin{figure} [!h]
16
+ \hspace*{-4cm}
17
+ \resizebox{1.2\textwidth}{!}{
18
+ \input{2_Figures/2_Task/9_Clust.tikz}
19
+ }
20
+ \caption{Data and workflow of the second step: Clustering}
21
+ \label{fig_12_Clust}
22
+ \end{figure}
23
+ %
24
+ %
25
+
26
+ Data clustering is an unsupervised machine learning technique.
27
+ There are a variety of approaches that may be used for this, e.g.,
28
+ k-means, affinity propagation, mean shift,
29
+ spectral clustering and Gaussian mixtures. All the
30
+ methods differ in their use cases, scalability,
31
+ metric or deployed norm and required input parameters. The latter
32
+ is an indicator of customization abilities. Since k-means can be used for very large
33
+ data sets and enables easy and fast implementation, k-means is preferred. Furthermore, David Arthur et al.
34
+ \cite{Arthur2006} introduced k-means++, which is known
35
+ to outperform k-means. Therefore, \gls{cnmc} uses k-means++
36
+ as its default method for data clustering.
37
+ Note, applying k-means++ is not new in \gls{cnmc}, but it was already applied in the regular \gls{cnm} \cite{Fernex2021}.\newline
38
+
39
+ In order to
40
+ cover the basics of k-means and k-means++, two terms
41
+ should be understood.
42
+ Picturing a box with 30 points in it, where 10 are located on the left, 10
43
+ in the middle and 10 on the right side of the box. Adhering to such a
44
+ constellation, it is appealing to create 3 groups, one for
45
+ each overall position (left, center and right). Each group would
46
+ contain 10 points. These groups are called clusters and the
47
+ geometrical center of each cluster is called a centroid.
48
+ A similar thought experiment is visually depicted in \cite{Sergei_Visual}.
49
+ Considering a dynamical system, the trajectory is retrieved by integrating the \gls{ode} numerically at discrete time steps.
50
+ For each time step the obtained point is described with one x-, y- and z-coordinate.
51
+ Applying the above-mentioned idea on, e.g., the Lorenz system \cite{lorenz1963deterministic}, defined as the set of equations in \eqref{eq_6_Lorenz}, then the resulting centroids can be seen in figure \ref{fig_13_Clust}.
52
+ The full domains of the groups or clusters are color-coded in figure \ref{fig_14_Clust}.\newline
53
+
54
+ \begin{figure}[!h]
55
+ %\vspace{0.5cm}
56
+ \begin{minipage}[h]{0.47\textwidth}
57
+ \centering
58
+ \includegraphics[width =\textwidth]{2_Figures/2_Task/10_Clust.pdf}
59
+ \caption{Centroids of the Lorenz system \eqref{eq_6_Lorenz} with $\beta =28$}
60
+ \label{fig_13_Clust}
61
+ \end{minipage}
62
+ \hfill
63
+ \begin{minipage}{0.47\textwidth}
64
+ \centering
65
+ \includegraphics[width =\textwidth]{2_Figures/2_Task/11_Clust.pdf}
66
+ \caption{Cluster domains of the Lorenz system \eqref{eq_6_Lorenz} with $\beta =28$}
67
+ \label{fig_14_Clust}
68
+ \end{minipage}
69
+ \end{figure}
70
+
71
+
72
+ Theoretically,
73
+ the points which are taken to calculate a center could be assigned
74
+ weighting factors. However, this is not done in \gls{cnmc} and therefore only
75
+ be outlined as a side note. After being familiar with the concept of
76
+ clusters and centroids, the actual workflow of k-means shall be explained.
77
+ For initializing
78
+ k-means, a number of clusters and an initial guess for the centroid
79
+ positions must be provided. Next, the distance between all the data
80
+ points and the centroids is calculated. The data points closest to a
81
+ centroid are assigned to these respective clusters. In other words, each data point is assigned to that cluster for which
82
+ the corresponding centroid exhibits the smallest distance
83
+ to the considered data point.
84
+ The geometrical mean value for all clusters is subsequently determined for all cluster-associated residents' data points. With the
85
+ new centroid positions, the clustering is
86
+ performed again. \newline
87
+
88
+ Calculating the mean of the clustered
89
+ data points (centroids) and performing clustering based on the
90
+ distance between each data point and the centroids
91
+ is done iteratively. The iterative process stops when
92
+ the difference between the prior and current
93
+ centroids position is equal to zero or
94
+ satisfies a given threshold. Other explanations with pseudo-code and
95
+ visualization for better understanding can be found\cite{Frochte2020}
96
+ and \cite{Sergei_Visual}, respectively\newline
97
+
98
+ % ------------------------------------------------------------------------------
99
+ % --------------------- PART 2 -------------------------------------------------
100
+ % ------------------------------------------------------------------------------
101
+ Mathematically k-means objective can be expressed
102
+ as an optimization problem with the centroid
103
+ position $\bm{\mu_}j$ as the design variable. That is given in equation
104
+ \eqref{eq_1_k_means} (extracted from \cite{Frochte2020}), where
105
+ $\bm{\mu_}j$ and $\mathrm{D}^{\prime}_j$ denote the centroid or
106
+ mean of the \emph{j}th cluster and the data points
107
+ belonging to the \emph{j}th cluster, respectively.
108
+ The distance between all the \emph{j}th cluster data points
109
+ and its corresponding \emph{j}th centroid is
110
+ stated as $\mathrm{dist}(\bm{x}_j, \bm{\mu}_j)$.
111
+
112
+ \begin{equation}
113
+ \label{eq_1_k_means}
114
+ \argmin_{\bm{\mu}_j}\sum_{j=1}^k \; \sum_{\bm{x}_j \in \mathrm{D}^{\prime}_j }
115
+ \mathrm{dist}(\bm{x}_j, \bm \mu_j)
116
+ \end{equation}
117
+
118
+ Usually, the k-means algorithm is deployed with a Euclidean metric
119
+ and equation \eqref{eq_1_k_means} becomes \eqref{eq_2_k_Means_Ly}, which
120
+ is known as the Lloyd algorithm \cite{Frochte2020, Lloyd1982}. The
121
+ Lloyd algorithm can be understood as the minimization of the variance.
122
+ Thus, it is not necessarily true that k-means is equivalent to reducing
123
+ the variance. It is only true when the Euclidean norm is used.
124
+
125
+ \begin{equation}
126
+ \label{eq_2_k_Means_Ly}
127
+ \argmin_{\bm{\mu}_j}\sum_{j=1}^k \; \sum_{\bm{x}_j \in \mathrm{D}^{\prime}_j }
128
+ \| \bm x_j - \bm \mu_j \|^2
129
+ \end{equation}
130
+
131
+ The clustering algorithm highly depends on the provided
132
+ initial centroids positions. Since in most cases, these
133
+ are guessed, there is no guarantee of a reliable outcome.
134
+ Sergei Vassilvitskii, one of the founders of
135
+ k-means++, says in one of his presentations \cite{Sergei_Black_Art},
136
+ finding a good set of initial points would be black art.
137
+ Arthur et al. \cite{Arthur2006} state,
138
+ that the speed and simplicity of k-means would be appealing, not
139
+ its accuracy. There are many natural examples for which
140
+ the algorithm generates arbitrarily bad clusterings \cite{Arthur2006}.\newline
141
+
142
+
143
+ An alternative or improved version of k-means is the already
144
+ mentioned k-means++, which
145
+ only differs in the initialization step. Instead of providing
146
+ initial positions for all centroids, just one centroid's
147
+ position is supplied. The remaining are calculated based on
148
+ maximal distances. In concrete, the distance between all
149
+ data points and the existing centroids is computed. The data point
150
+ which exhibits the greatest distance is added to the
151
+ list of collected centroids. This is done until all $k$
152
+ clusters are generated. A visual depiction of this process
153
+ is given by Sergei Vassilvitskii in \cite{Sergei_Visual}.
154
+ Since the outcome of k-means++ is more reliable than
155
+ k-means, k-means++ is deployed in \gls{cnmc}.\newline
156
+
157
+ After having discussed some basics of k-means++, it shall be
158
+ elaborated on how and why the solution of the dynamical system should be
159
+ clustered. The solution of any dynamical system returns a trajectory.
160
+ If the trajectory repeats itself or happens to come close
161
+ to prior trajectories without actually touching them,
162
+ characteristic sections can be found.
163
+ Each characteristic section in the phase space is
164
+ captured by a centroid. The movement from one
165
+ centroid to another is supposed to portray the original
166
+ trajectory. With a clustering algorithm, these representative
167
+ characteristic locations in the phase space are obtained.
168
+ Since the clusters shall capture an entire trajectory, it is
169
+ evident that the number of clusters is an
170
+ essential parameter to choose. Latter fact becomes even
171
+ more clear when recalling that a trajectory can be multi-modal or complex.\newline
172
+
173
+ In the case of a highly non-linear
174
+ trajectory, it is obvious that many clusters are demanded in
175
+ order to minimize the loss of the original
176
+ trajectories. The projection of the real trajectory
177
+ to a cluster-based movement can be compared to
178
+ a reduced-order model of the trajectory. In this context,
179
+ it is plausible to refer to the centroids as
180
+ representative characteristic locations. Furthermore, \gls{cnm} and thus, \gls{cnmc}, exploits graph theory.
181
+ Therefore, the centroids can be denoted as nodes or characteristic nodes.\newline
182
+
183
+ The remaining part of this section will be devoted exclusively to the application of \gls{cnmc}. First, the leveraged kmeans++ algorithm is from the machine learning \emph{Python} library \emph{Scikit-learn} \cite{scikit-learn}.
184
+ Crucial settings, e.g., the number of clusters $K$, the maximal number of iterations, the tolerance as a convergence criterion and the number of different centroid seeds with which k-means is executed.
185
+ The operator can decide if the clustering step shall be performed or skipped.
186
+ The path for outputs can be modified and generating plots is also optional.
187
+ For the clustering stage, there are 4 types of plots available.
188
+ Two types of plots are depicted in figures \ref{fig_13_Clust} and \ref{fig_14_Clust}.
189
+ With the generated HTML plots the same features as described in section \ref{sec_2_2_Data_Gen} are available, e.g., receiving more information through pop-up panels and
190
+ switching between a dark and white mode.
191
+ \newline
192
+
193
+ The other two types of charts are not displayed here because they are intended to be studied as HTML graphs where the output can be viewed from multiple angles.
194
+ The first type shows the clustered output of one system for two different $\beta$ next to each other.
195
+ The centroids are labeled randomly as will be shown in subsection \ref{subsec_2_2_1_Parameter_Study}.
196
+ Consequently, the centroids representing the immediate neighbors across the two separate $\beta $ have separate labels.
197
+ In the second remaining type of HTML graph, the closest centroids across the two different $\beta $ are connected through lines.
198
+ Also, in the same way, as it was done for the first step in the data generation an additional HTML file containing all $\vec{\beta } $ charts is generated.
199
+ \newline
200
+
201
+ It can be concluded that the clustering step is performed by employing \emph{Scikit-learn's} kmeans++ implementation, which is well suited for a great number of points. As usual, all important settings can be controlled with \emph{settings.py}.
202
+
203
+ \subsection{Parameter Study}
204
+ \label{subsec_2_2_1_Parameter_Study}
205
+ In this subsection, the effects on the clustering step caused by the parameter \emph{n\_init} shall be named. After that, the random labeling of the centroids is to be highlighted.
206
+ With the parameter \emph{n\_init} it is possible to define how often the k-means algorithm will be executed with different centroid seeds \cite{scikit-learn}.
207
+ For a reliable clustering quality \emph{n\_init} should be chosen high. However, the drawback is that with increasing \emph{n\_init} the calculation time increases unacceptably high. Having chosen \emph{n\_init} too high, the clustering part becomes the new bottleneck of the entire \gls{cnmc} pipeline. \newline
208
+
209
+ To conduct the parameter study, clustering was performed using the following \emph{n\_init} values:
210
+ $\text{\emph{n\_init}} = \{100,\, 200, \, 400,\, 800,\, 1000, \, 1200, \, 1500 \}$.
211
+ Some results are presented in figures \ref{fig_15} to \ref{fig_20}.
212
+ It can be observed that when all the different \emph{n\_init} values are compared, visually no big difference in the placing of the centroids can be perceived.
213
+ A graphical examination is sufficient because even with \emph{n\_init} values that differ by only by the number one ($n_{init,1} - n_{init,2} = 1 $), the centroid positions are still expected to vary slightly.
214
+ Simply put, only deviations on a macroscopic scale, which can be noted visually are searched for. As a conclusion it can be said that $\text{\emph{n\_init}} = 100$ and $\text{\emph{n\_init}} = 1500$ can be considered as an equally valuable clustering outcome.
215
+ Hence, \emph{n\_init} the computational expense can be reduced by deciding on a reasonable small value for \emph{n\_init}.\newline
216
+
217
+ The second aim of this subsection was to highlight the fact that the centroids are labeled randomly. For this purpose, the depicted figures \ref{fig_15} to \ref{fig_20} shall be examined. Concretely, any of the referred figures can be compared with the remaining figures to be convinced that the labeling is not obeying any evident rule.
218
+
219
+
220
+ % ==============================================================================
221
+ % ============================ PLTS ============================================
222
+ % ==============================================================================
223
+ \begin{figure}[!h]
224
+ \begin{minipage}[h]{0.47\textwidth}
225
+ \centering
226
+ \includegraphics[width =\textwidth]{2_Figures/2_Task/0_N_Study/0_K_100.pdf}
227
+ \caption{Lorenz \eqref{eq_6_Lorenz}, $\beta =28$, $\text{n\_init}= 100$}
228
+ \label{fig_15}
229
+ \end{minipage}
230
+ \hfill
231
+ \begin{minipage}{0.47\textwidth}
232
+ \centering
233
+ \includegraphics[width =\textwidth]{2_Figures/2_Task/0_N_Study/1_K_200.pdf}
234
+ \caption{Lorenz \eqref{eq_6_Lorenz}, $\beta =28$, $\text{n\_init}= 200$}
235
+ \label{fig_16}
236
+ \end{minipage}
237
+ \end{figure}
238
+
239
+ \begin{figure}[!h]
240
+ \begin{minipage}[h]{0.47\textwidth}
241
+ \centering
242
+ \includegraphics[width =\textwidth]{2_Figures/2_Task/0_N_Study/2_K_400.pdf}
243
+ \caption{Lorenz \eqref{eq_6_Lorenz}, $\beta =28$, $\text{n\_init}= 400$}
244
+ \label{fig_17}
245
+ \end{minipage}
246
+ \hfill
247
+ \begin{minipage}{0.47\textwidth}
248
+ \centering
249
+ \includegraphics[width =\textwidth]{2_Figures/2_Task/0_N_Study/3_K_1000.pdf}
250
+ \caption{Lorenz \eqref{eq_6_Lorenz}, $\beta =28$, $\text{n\_init}= 1000$}
251
+ \label{fig_18}
252
+ \end{minipage}
253
+ \end{figure}
254
+
255
+ \begin{figure}[!h]
256
+ \begin{minipage}[h]{0.47\textwidth}
257
+ \centering
258
+ \includegraphics[width =\textwidth]{2_Figures/2_Task/0_N_Study/4_K_1200.pdf}
259
+ \caption{Lorenz \eqref{eq_6_Lorenz}, $\beta =28$, $\text{n\_init}= 1200$}
260
+ \label{fig_19}
261
+ \end{minipage}
262
+ \hfill
263
+ \begin{minipage}{0.47\textwidth}
264
+ \centering
265
+ \includegraphics[width =\textwidth]{2_Figures/2_Task/0_N_Study/5_K_1500.pdf}
266
+ \caption{Lorenz \eqref{eq_6_Lorenz}, $\beta =28$, $\text{n\_init}= 1500$}
267
+ \label{fig_20}
268
+ \end{minipage}
269
+ \end{figure}
270
+ % ==============================================================================
271
+ % ============================ PLTS ============================================
272
+ % ==============================================================================
273
+
Data/0_Latex_True/2_Task/3_Tracking.tex ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ \section{Tracking}
2
+ \label{sec_2_3_Tracking}
3
+ In this section, it is the pursuit to explain the third step, tracking, by initially answering the following questions.
4
+ What is tracking, why is it regarded to be complex and why is it important?
5
+ In the subsection \ref{subsec_2_3_1_Tracking_Workflow} the final workflow will be elaborated. Furthermore, a brief discussion on the advancements in tracking of \gls{cnmc} to \emph{first CNMc} shall be given.
6
+ Since the data and workflow of tracking are extensive, for the sake of a better comprehension the steps are visually separated with two horizontal lines in the upcoming subsection.
7
+ The lines introduce new tracking subtasks, which are intended to provide clear guidance to orient readers within the workflow.
8
+ Note, the tracking results will be presented in subsection \ref{sec_3_1_Tracking_Results}. \newline
9
+
10
+ To define the term tracking some explanations from subsection \ref{subsec_1_1_3_first_CNMc} shall be revisited.
11
+ Through the clustering step, each centroid is defined with a label.
12
+ The label allocation is performed randomly as showed in subsection \ref{subsec_2_2_1_Parameter_Study}.
13
+ Thus, matching centroid labels from one model parameter value $\beta_i$ to
14
+ another model parameter value $\beta_j$, where $i \neq j$, becomes an issue.
15
+ In order first, to understand the term tracking, figure \ref{fig_21} shall be considered.
16
+ The centroids of the Lorenz system \eqref{eq_6_Lorenz} for two $\beta$ values $\beta_i = 31.333$ in green and $\beta_j = 32.167$ in yellow are plotted next to each other.
17
+ The objective is to match each centroid of $\beta_i$ with one corresponding centroid of $\beta _j$.
18
+ It is important to understand that the matching must be done across the two $\beta $ values $\beta_i$ and $\beta_j$ and not within the same $\beta$.\newline
19
+
20
+ % ==============================================================================
21
+ % ============================ PLTS ============================================
22
+ % ==============================================================================
23
+ \begin{figure}[!h]
24
+ \centering
25
+ \includegraphics[width =0.8\textwidth]{2_Figures/2_Task/1_Tracking/0_Matching.pdf}
26
+ \caption{Unrealistic tracking example for the Lorenz system with $\beta_i=31.333, \, \beta_j=32.167, \, K = 10$}
27
+ \label{fig_21}
28
+ \end{figure}
29
+
30
+ By inspecting the depicted figure closer it can be observed that each green centroid $\beta_i$ has been connected with a corresponding yellow centroid $\beta_j$ with an orange line.
31
+ The centroids which are connected through the orange lines shall be referred to as \emph{inter} $\beta$ \emph{connected} centroids.
32
+ Determining the \emph{inter} $\beta$ \emph{connected} centroids is the outcome of tracking. Thus, it is matching centroids across different model parameter values $\beta$ based on their corresponding distance to each other. The closer two \emph{inter} $\beta $ centroids are, the more likely they are to be matched.
33
+ The norm for measuring distance can be chosen from
34
+ one of the 23 possible norms defined in \emph{SciPy} \cite{2020SciPy-NMeth}.
35
+ However, the default metric is the euclidean norm which is defined as equation \eqref{eq_16}.\newline
36
+
37
+ \begin{equation}
38
+ \label{eq_16}
39
+ d(\vec x, \vec y) = \sqrt[]{\sum_{i=1}^n \left(\vec{x}_i - \vec{y}_i\right)^2}
40
+ \end{equation}
41
+
42
+ \vspace{0.2cm}
43
+ The orange legend on the right side of figure \ref{fig_21} outlines the tracked results.
44
+ In this rare and not the general case, the \emph{inter} $\beta$ labeling is straightforward in two ways. First, the closest centroids from $\beta_i$ to $\beta_j$ have the same label. Generally, this is not the case, since the centroid labeling is assigned randomly.
45
+ Second, the \emph{inter} $\beta$ centroid positions can be matched easily visually.
46
+ Ambiguous regions, where visually tracking is not possible, are not present.
47
+ To help understand what ambiguous regions could look like, figure \ref{fig_22} shall be viewed. It illustrates the outcome of the Lorenz system \eqref{eq_6_Lorenz} with $\beta_i=39.004,\, \beta_j = 39.987$ and with a number of centroids of $K= 50$.
48
+ Overall, the tracking seems to be fairly obvious, but paying attention to the centroids in the center, matching the centroids becomes more difficult.
49
+ This is a byproduct of the higher number of centroids $K$.
50
+ With more available centroids, more centroids can fill a small area.
51
+ As a consequence, multiple possible reasonable matchings are allowed.
52
+ Note, that due to spacing limitations, not all tracking results are listed in the right orange legend of figure \ref{fig_22}.
53
+ The emergence of such ambiguous regions is the main driver why tracking is considered to be complex.\newline
54
+
55
+ \begin{figure}[!h]
56
+ \centering
57
+ \includegraphics[width =\textwidth]
58
+ {2_Figures/2_Task/1_Tracking/2_Ambiguous_Regions.pdf}
59
+ \caption{Ambiguous regions in the tracking example for the Lorenz system with $\beta_i=39.004,\, \beta_j = 39.987,\, K= 50$}
60
+ \label{fig_22}
61
+ \end{figure}
62
+
63
+
64
+ In general, it can be stated that the occurrence of ambiguous regions can be regulated well with the number of centroids $K$.
65
+ $K$ itself depends on the underlying dynamical system.
66
+ Thus, $K$ should be only as high as required to capture the complexity of the dynamical system.
67
+ Going above that generates unnecessary many centroids in the state space.
68
+ Each of them increases the risk of enabling ambiguous regions to appear. Consequently, incorrect tracking results can arise.\newline
69
+
70
+
71
+ In figure \ref{fig_23} a second example of tracked outcome for the Lorenz system \eqref{eq_6_Lorenz} with $\beta_i=30.5,\, \beta_j=31.333, \, K = 10$ is given.
72
+ Here it can be inspected that the immediate \emph{inter} $\beta $ centroid neighbors do not adhere to the same label. Hence, it is representative of a more general encountered case. The latter is only true when the $K$ is chosen in a reasonable magnitude. The reason why centroids are tracked by employing distance measurements is grounded in the following.
73
+ If the clustering parameter \emph{n\_init} is chosen appropriately (see \ref{subsec_2_2_1_Parameter_Study}), the position of the centroids are expected to change only slightly when $\beta $ is changed.
74
+ In simpler terms, a change in $\beta$ should not move a centroid much, if the clustering step was performed satisfactorily in the first place.
75
+
76
+ \begin{figure}[!h]
77
+ \centering
78
+ \includegraphics[width =0.8\textwidth]{2_Figures/2_Task/1_Tracking/1_Non_Matching.pdf}
79
+ \caption{Realistic tracking example for the Lorenz system with $\beta_i=30.5$ and $\beta_j=31.333$}
80
+ \label{fig_23}
81
+ \end{figure}
82
+
83
+ The next point is to answer the question, of why tracking is of such importance to \gls{cnmc}.
84
+ The main idea of \gls{cnmc} is to approximate dynamical systems and allow prediction trajectories for unseen $\beta_{unseen}$.
85
+ The motion, i.e., the trajectory, is replicated by the movement from one centroid to another centroid. Now, if the centroids are labeled wrong, the imitation of the motion is wrong as well.
86
+ For instance, the considered dynamical system is only one movement from left to right.
87
+ For instance, imagining a dynamical system, where the trajectory is comprised of solely one movement.
88
+ Namely, moving from left to right.
89
+ Following that, labeling the left centroid $c_l$ to be the right centroid $c_r$, would fully change the direction of the movement, i.e. $(c_l \rightarrow c_r) \neq (c_r \rightarrow c_l)$.
90
+ In one sentence, the proper tracking is vital because otherwise \gls{cnmc} cannot provide reliably predicted trajectories. \newline
91
+
92
+ \subsection{Tracking workflow}
93
+ \label{subsec_2_3_1_Tracking_Workflow}
94
+ In this subsection, the main objective is to go through the designed tracking workflow. As side remarks, other attempted approaches to tracking and the reason for their failure will be mentioned briefly.\newline
95
+
96
+
97
+ To recapitulate on the term tracking, it is a method to match centroids across a set of different model parameter values $\vec{\beta}$ based on their respective distances. One obvious method for handling this task could be \gls{knn}. However, having implemented it, undesired results were encountered.
98
+ Namely, one centroid label could be assigned to multiple centroids within the same $\beta$. The demand for tracking, on the hand, is that, e.g., with $K=10$, each of the 10 available labels is found exactly once for one $\beta $.
99
+ Therefore, it can be stated that \gls{knn} is not suitable for tracking, as it might not be possible to impose \gls{knn} to comply with the tracking demand.\newline
100
+
101
+ The second approach was by applying \gls{dtw}. The conclusion is that DTW's tracking results highly depended on the order in which the inter $\beta $ distances are calculated. Further, it can be stated that DTW needs some initial wrong matching before the properly tracked outcomes are provided.
102
+ The initial incorrect matching can be seen as the reason, why DTW is mostly used when very long signals, as in speech recognition, are provided.
103
+ In these cases, some mismatching is tolerated. For \gls{cnmc}, where only a few $K$ centroids are available, a mismatch is strongly unwelcome.\newline
104
+
105
+ The third method was based on the sequence of the labels.
106
+ The idea was that the order of the movement from one centroid to another centroid is known. In other terms, if the current position is at centroid $c_i$ and the next position for centroid $c_{i+1}$ is known. Assuming that the sequences across the $\vec{\beta}$ should be very similar to each other, a majority vote should return the correct tracking results. It can be recorded that this was not the case and the approach was dismissed.\newline
107
+
Data/0_Latex_True/2_Task/4_Track_Workflow.tex ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ After having explained 3 methods, which did not lead to a satisfactory outcome, the data and workflow of the final successful approach shall be presented.
2
+ First very concisely, followed by an in-depth account.
3
+ For that, figure \ref{fig_24_Tracking_Workflow} shall be analyzed.
4
+ The initial input is obtained through \emph{settings.py}, where execution, storage and plotting attributes are defined.
5
+ For the further sub-steps, it shall be noted that the index big O stands for output of the respective sub-step.
6
+ The clustered results from step two, described in section \cite{sec_2_3_Clustering} are used as input for the so-called ordering step.
7
+ The ordered state can be stored and plotted if desired and exploited to calculate a cost matrix $\bm A (\vec{\beta})$. \newline
8
+
9
+ % =============================================================================
10
+ % ================ Tracking Workflow ==========================================
11
+ % =============================================================================
12
+ \begin{sidewaysfigure} [!]
13
+ \hspace*{-2cm}
14
+ \resizebox{\textwidth}{!}{
15
+ \input{2_Figures/2_Task/1_Tracking/3_Track.tikz}
16
+ }
17
+ \caption{General data and workflow overview of the third step, tracking}
18
+ \label{fig_24_Tracking_Workflow}
19
+ \end{sidewaysfigure}
20
+
21
+ The tracking demand is applied on $\bm A (\vec{\beta})$, e.g., each row element must be matched to exactly one column element with the constraint that their additive sum is minimized.
22
+ It will return a suggested best path, i.e., the proposed optimized tracking path.
23
+ It is possible that the proposed optimized tracking path may not be feasible concerning a linking condition, it undergoes a validity check.
24
+ If required the suggested path will be chopped off and replaced such that the linking condition is met. The final path is then imposed to a transition such that the centroid labeling across all available $\vec{\beta}$ matches.
25
+ The transformed final paths are designated as the tracked outcome and can be saved and plotted.\newline
26
+
27
+ Since the fast description leaves some open questions, the in-depth explanation shall be tackled. Defining \emph{settings.py} is
28
+ analogously done to the two previous steps, i.e. data generation \ref{sec_2_2_Data_Gen} and clustering \ref{sec_2_3_Clustering}.
29
+ Therefore, accounts regarding the sub-tasks \emph{settings.py} and the clustered data are not repeated.\newline
30
+
31
+ \textbf{1. Ordering$\,(\vec{\boldsymbol\beta})$}
32
+ \hrule
33
+ \vspace{0.05cm}
34
+ \hrule
35
+ \vspace{0.25cm}
36
+ The ordering of the clustered data can be understood by viewing figures \ref{fig_25_Clust} and \ref{fig_26_Ord}.
37
+ Both depict the clustered Lorenz system \eqref{eq_6_Lorenz} for $\beta = 30.5$.
38
+ The difference between the two figures is that figure \ref{fig_25_Clust} shows the clustering as it is obtained from the clustering step. It shall be referred to as the initial state.
39
+ Figure \ref{fig_26_Ord} on the other shows the ordered state, i.e. the state after applying the ordering sub-step. The labeling of the ordered step represents to some degree the actual movement of the trajectory.
40
+ It can be observed that moving from label $1$ up to $6$ in a consecutive manner the resulting trajectory is generating the left ear of the Lorenz system.
41
+ Analogously, moving from label $7$ up to $10$ produces the right ear of the Lorenz system. Furthermore, the transition from centroid $6$ to $7$ captures the transition from one ear to the other.\newline
42
+
43
+ \begin{figure}[!h]
44
+ %\vspace{0.5cm}
45
+ \begin{minipage}[h]{0.47\textwidth}
46
+ \centering
47
+ \includegraphics[width =\textwidth]{2_Figures/2_Task/1_Tracking/4_Clus_30_5.pdf}
48
+ \caption{Initial State - centroids of the Lorenz system \eqref{eq_6_Lorenz} $\beta =30.5$}
49
+ \label{fig_25_Clust}
50
+ \end{minipage}
51
+ \hfill
52
+ \begin{minipage}{0.47\textwidth}
53
+ \centering
54
+ \includegraphics[width =\textwidth]{2_Figures/2_Task/1_Tracking/5_Ordered_30_5.pdf}
55
+ \caption{Ordered State - centroids of the Lorenz system \eqref{eq_6_Lorenz} $\beta =30.5$}
56
+ \label{fig_26_Ord}
57
+ \end{minipage}
58
+ \end{figure}
59
+
60
+ The way the ordered state is retrieved is as follows. The entire sequence of the motion along the centroids is available. In simpler terms, the first centroid from where the trajectory will start, all the upcoming centroids and the order in which they will be visited are known.
61
+ Therefore, the initial centroid can be labeled as $1$, the second as $2$ and so on.
62
+ However, it is important to note that with modifying one label of the trajectory sequence, the same label needs to be found in the entire sequence and modified as well.
63
+ Otherwise, the ordered-state is true for one turn and a wrong initial-ordered-state mixture is kept for the remaining turns.
64
+ Such an approach would also falsify the trajectory.
65
+ The labeling in the ordered state provides information about the trajectory.
66
+ Further, the original motion of the trajectory is untouched. Labeling the characteristic centroids with different numbers or with Greek letters does not impose any change on the original dynamics. For that to be fully true, the newly introduced labeling must be consistent across the entire sequence.
67
+ Although it is obvious, nevertheless \gls{cnmc} performs a sanity check, i.e., it is verified, whether the resulting trajectory in the ordered state is the same as the original trajectory.
68
+ Note, that all the same 4 types of plots stated in section \ref{sec_2_3_Clustering} are also available for visualizing the ordered state.
69
+ \newline
70
+
71
+ \textbf{2. Calculating $\bm A \, (\vec{\boldsymbol\beta})$ \& best path$\,(\vec{\boldsymbol\beta})$ }
72
+ \hrule
73
+ \vspace{0.05cm}
74
+ \hrule
75
+ \vspace{0.25cm}
76
+ In the next sub-task the cost or similarity matrix $\bm A(\vec{\beta})$ is calculated.
77
+ First, the assignment problem shall be elaborated.
78
+ Let $\beta_1$ and $\beta_2$ be two different model parameter values $\beta_1 \neq \beta_2$ and both shall consist of $K$ centroids. Each centroid is not only associated with a label but described fully with a position.
79
+ The goal is to match each centroid from $\beta_1$ to exactly one corresponding centroid from $\beta_2$ such that the overall spatial distance is minimized.
80
+ This idea was given as part of the definition of the term tracking itself.
81
+ The difference between tracking and the assignment problem is that first, tracking solves the assignment problem multiple times and thus the assignment problem is only a part of tracking.
82
+ Second, the tracked results are also feasible and transformed, which will be covered later in this subsection.\newline
83
+
84
+ For construction an illustrative cost matrix $\bm A(\vec{\beta})$,
85
+ 3 pairwise different $\beta$ values, $\beta_1, \, \beta_2, \beta_3$ with $(\beta_1,\neq \beta_2) \neq \beta_3$ shall be considered.
86
+ Again, each $\beta_i$, where $i = \{1,2,3\}$, consists of $K$ centroid positions.
87
+ The assignment problem is solved by exploiting \emph{SciPy} \cite{2020SciPy-NMeth}.
88
+ Its solution, e.g., for $\beta_1$ and $\beta_2$ only matches the centroids from the two different $\beta$ such that the overall spatial distance is minimized.
89
+ The addition of the spatial distances of $\beta_1$ and $\beta_2$ shall be designated as the cost value $\beta_{i=1,j=2}$.
90
+ With this level of understanding, the similarity matrix given in equation \eqref{eq_17_Dist_A} can be read.\newline
91
+
92
+ \begin{equation}
93
+ \bm A_{3\times 3}\,(\vec{\beta}) =
94
+ \begin{bmatrix}
95
+ \beta_{1,1} & \beta_{1,2} & \beta_{1,3}\\
96
+ \beta_{2,1} &\beta_{2,2} & \beta_{2,3}\\
97
+ \beta_{3,1} & \beta_{3,2} &\beta_{3,3}
98
+ \end{bmatrix}
99
+ \label{eq_17_Dist_A}
100
+ \end{equation}
101
+
102
+ Considering equation \eqref{eq_18_Expl}, if the assignment problem is solved for equal $\beta \Rightarrow \beta_i = \beta_j$, the centroid positions overlap exactly.
103
+ As a consequence, the distance between all the centroids across the two same $\beta$ is zero.
104
+ Further, adding up the zero spatial distances yields a cost of zero $\beta_{i,i} = 0$.\newline
105
+
106
+ \begin{equation}
107
+ \begin{aligned}
108
+ i &= j \\
109
+ \Rightarrow \beta_i &= \beta_j \\
110
+ \Rightarrow \beta_{i,j} &= \beta_{i,i} = 0
111
+ \end{aligned}
112
+ \label{eq_18_Expl}
113
+ \end{equation}
114
+
115
+ The cost matrix $\bm A\,(\vec{\beta})$ compares each $\beta_i$ with all the remaining $\beta_j$, where $i = \{1, \,2, \cdots, n_{\beta}\}, \; j = \{1, \,2, \cdots, n_{\beta}\}$ and $ n_{\beta}$ denotes the number of the pairwise different $\vec{\beta}$.
116
+ The outcome of each possible comparison $\beta_i$ with $\beta_j$ is a cost value representing a similarity between $\beta_i$ and $\beta_j$.
117
+ Obviously, in the case trivial as described above $\beta_i = \beta_j$, the similarity is maximized and the cost is minimized.
118
+ To find the best path, i.e., proposed tracking results, the trivial entries on the diagonal entries must be prohibited. Obeying that the cost matrix $\bm A\,(\vec{\beta})$ can be reformulated as equation \eqref{eq_19}.
119
+ Moreover, $\bm A\,(\vec{\beta})$ is symmetrical, therefore computing one triangular part of the cost matrix is sufficient.
120
+ The triangular part can be filled by mirroring along with the diagonal entries $\beta_{i,i}$ as outlined for the lower triangular matrix in equation \eqref{eq_19}.
121
+ \newline
122
+
123
+ \begin{equation}
124
+ \bm A_{3\times 3}\,(\vec{\beta}) =
125
+ \begin{bmatrix}
126
+ \infty & \beta_{1,2} & \beta_{1,3}\\
127
+ \beta_{2,1} = \beta_{1,2} & \infty & \beta_{2,3}\\
128
+ \beta_{3,1} = \beta_{1,3} & \beta_{3,2} =\beta_{2,3} & \infty\\
129
+ \end{bmatrix}
130
+ \label{eq_19}
131
+ \end{equation}
Data/0_Latex_True/2_Task/5_Track_Validity.tex ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ The objective behind exploiting symmetry is to reduce computation time.
2
+ Having defined the cost matrix $\bm A\,(\vec{\beta})$ as given in equation \eqref{eq_19}, it can be used to again solve the assignment problem.
3
+ Its output is denoted as path$_O\,(\vec{\beta })$ in figure \ref{fig_24_Tracking_Workflow}.\newline
4
+
5
+ \textbf{3. Validity check}
6
+ \hrule
7
+ \vspace{0.05cm}
8
+ \hrule
9
+ \vspace{0.25cm}
10
+ The validity check can also be regarded as a feasibility investigation.
11
+ To grasp what the feasibility constraint is table \ref{tab_1} shall be analyzed.\newline
12
+
13
+ % ------------------------------------------------------------------------------
14
+ % ------------------------------ TABLE -----------------------------------------
15
+ % ------------------------------------------------------------------------------
16
+ \begin{table}[!h]
17
+
18
+ \begin{minipage}{.3\linewidth}
19
+ \caption{\emph{Direct feasible}}
20
+ \centering
21
+ \begin{tabular}{c|c}
22
+ \multicolumn{1}{c}{$\beta_i$} & \multicolumn{1}{c}{$\beta_j$} \\
23
+ \hline
24
+ \hline
25
+ 1 & 2 \\
26
+ \hline
27
+ 2 & 3 \\
28
+ \hline
29
+ 3 & 4 \\
30
+ \hline
31
+ 4 & 5 \\
32
+ \hline
33
+ 5 & 6 \\
34
+ \hline
35
+ 6 & 7
36
+ \label{tab_1}
37
+ \end{tabular}
38
+ \end{minipage}
39
+ \begin{minipage}{.3\linewidth}
40
+ \centering
41
+ \caption{feasible}
42
+ \begin{tabular}{c|c}
43
+ \multicolumn{1}{c}{$\beta_i$} & \multicolumn{1}{c}{$\beta_j$} \\
44
+ \hline
45
+ \hline
46
+ 1 & 2 \\
47
+ \hline
48
+ 2 & 3 \\
49
+ \hline
50
+ 3 & 6 \\
51
+ \hline
52
+ 4 & 7 \\
53
+ \hline
54
+ 5 & 4 \\
55
+ \hline
56
+ 6 & 5
57
+ \label{tab_2}
58
+ \end{tabular}
59
+ \end{minipage}
60
+ \begin{minipage}{.3\linewidth}
61
+ \centering
62
+ \caption{infeasible}
63
+ \begin{tabular}{c|c}
64
+ \multicolumn{1}{c}{$\beta_i$} & \multicolumn{1}{c}{$\beta_j$} \\
65
+ \hline
66
+ \hline
67
+ 1 & 2 \\
68
+ \hline
69
+ 2 & 1 \\
70
+ \hline
71
+ 3 & 5 \\
72
+ \hline
73
+ 4 & 6 \\
74
+ \hline
75
+ 5 & 7 \\
76
+ \hline
77
+ 6 & 3
78
+ \label{tab_3}
79
+ \end{tabular}
80
+ \end{minipage}
81
+ \caption{Examples for feasible and infeasible best tracking paths}
82
+ \label{tab_4}
83
+ \end{table}
84
+
85
+ % ------------------------------------------------------------------------------
86
+ % ------------------------------ TABLE -----------------------------------------
87
+ % ------------------------------------------------------------------------------
88
+
89
+
90
+ It can be observed that in total the 7 model parameter values $(\vec{\beta}, \, n_{\beta}=7)$ were chosen.
91
+ The overall goal is to provide one centroid label and get its corresponding centroid positions across all the 7 model parameter values $\vec{\beta }$.
92
+ Therefore, a \emph{feasible linking path}, which allows the linking of all centroids of all $\beta_i$ to all the other $\beta_{\vec{j}}$ centroids, is required.
93
+ The latter description shall be elaborated step-wise in the following.
94
+ For instance, if the first $\beta_i = 1$, a linking to the remaining $\beta_{\vec{j}} = \{2, \, 3, \, 4, \, 5, \, 6, \, 7 \}$ is mandatory.
95
+ The first item of table \ref{tab_1} outlines that the centroids from $\beta_i = 1$ are tracked with the centroids $\beta_j=2$.
96
+ In other words, a clear relationship between the centroids across $\beta_i = 1$ and $\beta_j=2$ is established. Leveraging this relationship, the proper tracked centroid position across the two $ \beta = 1$ and $\beta= 2$, are returned.\newline
97
+
98
+
99
+ Because the centroid labels of $\beta_i = 1$ are used as the reference to match the centroid labels of $\beta_j=2$, the \emph{known linked path} can be stated as $L_{known}= \{1,\, 2\}$. The next model parameter value $\beta_j = 3$ and it is tracked with $\beta_i =2$.
100
+ Since $\beta_i =2$ is already incorporated in the \emph{known linked path}, the \emph{known linking path} can be extended to $L_{known}= \{1,\, 2, \, 3\}$. The next model parameter value $\beta_j = 4$ and its corresponding tracked counterpart is $\beta_i =3$.
101
+ Again, $\beta_i =3$ is found in the \emph{known linked path}, therefore the \emph{known linking path} can be extended to $L_{known}= \{1,\, 2, \, 3, \, 4\}$. The next model parameter value $\beta_j = 5$ and its corresponding tracked $\beta_i =4$ and so this procedure can be performed until the last $\beta_j = 7$.
102
+ Having completed the scheme, the \emph{known linking path} is of full rank, i.e. with $n_{\beta}= 7$ all the 7 pairwise different model parameter values $\vec{\beta}$ are captured in the \emph{known linking path} $L_{known}$.
103
+ The information gained through a full ranked $L_{known, full}$ is that all centroids of all $\beta_i$ are linked to all the other $\beta_{\vec{j}}$ centroids.
104
+ \newline
105
+
106
+ After having introduced the full rank $L_{known, full}$, the more straightforward definition for \emph{feasible linking path} can be stated as follows.
107
+ A \emph{feasible linking path} is given when $L_{known}$ has full rank $L_{known, full}$. \emph{Direct feasible} cases as shown in table \ref{tab_1} are one way of \emph{feasible linking paths}. Another, more general feasible case is provided in table \ref{tab_2}. Here, up to $\beta_i = 2$ and $\beta_j = 3$ the scheme of the \emph{direct feasible} linking path is followed.
108
+ However, with $\beta_i = 4$ and $\beta_j = 7$ the obstacle that $\beta_j = 7$ is not present in the current $L_{known}= \{1,\, 2,\, 3,\, 6\}$, occurs.
109
+ This issue can be circumvented by viewing $\beta_i = 6$ and $\beta_j = 5$.
110
+ Since $\beta_i = 6$ is in the current state of $L_{known}= \{1,\, 2,\, 3,\, 6\}$, $L_{known}$ can be extended with $\beta_j = 5$, i.e., $L_{known}= \{1,\, 2,\, 3,\, 5, \, 6\}$.
111
+ Note, having acquired the relationship between $\beta_i$ to $\beta_j$ is the same as knowing the relationship between $\beta_j$ to $\beta_i$.
112
+ Applying the newly added linking perspective, it can be seen that table \ref{tab_2} also demonstrates a fulled ranked $L_{known, full}$ or a \emph{feasible linking path}.\newline
113
+
114
+ In table \ref{tab_3} an example for an incomplete linking path or an \emph{infeasible linking path} is provided, where $L_{known}$ has no full rank.
115
+ The aim of the sub-task, validity, is to determine, whether the proposed optimized tracking path is feasible by extracting information about the rank of the final $L_{known}$.
116
+ Internally in \gls{cnmc}, this is achieved through logical queries utilizing mainly if statements.
117
+ One major advantage which was not mentioned when the examples above were worked out is the following. $\beta_{i,ref} = 1$ is not necessarily the best choice for being the reference.
118
+ The reference $\beta_{i,ref}$ is chosen such that it has the overall highest similarity or least cost to all the other $(n_{\beta} -1)$ available $\vec{\beta}$.
119
+ Hence, a \emph{feasible linking path} with a lower sum of cost sum is generated.\newline
120
+
121
+ This feature of a flexible reference is only providing better \emph{feasible linking paths}, when the proposed optimized tracking path is infeasible, which in general is the case. Therefore, in most cases, it is advantageous to leverage the flexible reference.
122
+ One of the outputs of \gls{cnmc} is the percentage cost savings that could be achieved with the flexible approach. In others, by what percentage could the costs be decreased when the flexible approach is compared with the rigid approach.
123
+ In the rigid approach, $\beta_{i,ref} = 1$ is chosen as the reference.
124
+ Further, in the rigid approach, the $\vec{\beta}$ are linked in increasing order, i.e. $\beta_1$ with $\beta_1$, $\beta_2$ with $\beta_2$, $\beta_3$ with $\beta_4$ and so on. Exploiting the flexible approach yields cost savings of around $20\%$ to $50\%$ An example of coping with a flexible reference is provided in the description of the following sub-step.
125
+ \newline
126
+
127
+
128
+ \textbf{4. Truncate, final path}
129
+ \hrule
130
+ \vspace{0.05cm}
131
+ \hrule
132
+ \vspace{0.25cm}
133
+ If the proposed optimized tracking path is feasible (\emph{feasible linking path}), i.e. $L_{known}$ has full rank $L_{known, full}$, the truncation can be skipped.
134
+ Consequently, the final path is the same as the proposed optimized tracking path.
135
+ However, as mentioned, in general, this is not the expected case.
136
+ Therefore, an example with an incomplete $L_{known}$ shall be portrayed to explain the workflow with active truncation.\newline
137
+
138
+
139
+ First, the final incomplete $L_{known}$ will be used as the starting point.
140
+ It will be filled until full rank is reached.
141
+ Allowing a flexible reference $\beta_{i,ref}$ the incomplete \emph{known linked path} could be, e.g., $L_{known} = \{3, \, 4, \, 5\}$.
142
+ To get full rank, the remaining $L_{missing} = \{1, \, 2, \, 6, \, 7\}$ are inferred through the following concept.
143
+ The cost $\beta_{i,j}$ between all $L_{known}$ and $L_{missing}$ are known
144
+ through the cost matrix $\bm A\,(\vec{\beta })$.
145
+ The one $\beta_j$ entry from $L_{missing}$ which has the highest similarity or lowest cost $\beta_{i,j}$ to the one entry $\beta_j$ of the $L_{known}$, is removed from $L_{missing}$ and added to $L_{known}$.
146
+ Now, the same procedure can be applied to the modified $L_{known}$ and $L_{missing}$ until $L_{missing}$ is empty and $L_{known}$ has reached full rank.
147
+ The inferred $L_{known, full}$ is then used as the final path and sent to the next sub-task.\newline
148
+
149
+
150
+ \textbf{5. Transform}
151
+ \hrule
152
+ \vspace{0.05cm}
153
+ \hrule
154
+ \vspace{0.25cm}
155
+ Once the final path is determined, it is known which $\beta_i$ is linked to which $\beta_j$.
156
+ For all the $\beta_{i},\, \beta_j$ matches in the final path, the linear sum assignment problem is solved again.
157
+ Two illustrative solutions are provided in section \ref{sec_3_1_Tracking_Results}.
158
+ For further explanation, table \ref{tab_2} shall be revisited.
159
+ The first $\beta_{i},\, \beta_j$ link is defined as $\beta_i = 1$ and $\beta_j = 2$. Moreover, for this scenario, it is assumed that $\beta_i = \beta_{ref} = 1$. Therefore, the $\beta_{i} = 1,\, \beta_j= 2$ is denoted as a direct match.
160
+ In simpler terms, a direct pairwise $\beta_{i},\, \beta_j$ relation, is obtained when $\beta_i$ or $\beta_j$ is directly traced back to the reference.
161
+ For a pairwise direct $\beta_{i},\, \beta_j$ link the
162
+ transformation, i.e., relabeling without changing the dynamics of the system, as explained for the ordering sub-step, is applied directly and only once.\newline
163
+
164
+
165
+ Now, considering the next $\beta_{i},\, \beta_j$ match, i.e., $\beta_i = 2$ and $\beta_j = 3$.
166
+ Linking the centroids from $\beta_j = 3$ to $\beta_i = 2$ directly would have no connection to the reference $\beta_{ref} = 1$.
167
+ Therefore, the solution to its linear sum assignment problem must experience the same transformations as $\beta_i = 2$ did.
168
+ In this case it is only the transformation caused by the $(\beta_i = 1,\,\beta_j = 2)$ match.
169
+ The idea behind the transformation stays the same, however, if no direct relation is seen, respective multiple transformations must be performed.
170
+ Once the final path has undergone all required transformations, the output is the desired tracked state.
171
+ The output can be stored and plotted if desired.
172
+ Some types of plots, which can be generated, will be shown in the section
173
+ \ref{sec_3_1_Tracking_Results}.\newline
174
+
175
+ Finally, in short, the difference between \emph{first CNMc} and this \gls{cnmc} version shall be mentioned.
176
+ The proposed tracking algorithm is neither restricted to any dimension nor to a specific dynamical system. Thus, two major limitations of \emph{first CNMc} could be removed in the current \gls{cnmc} version.
177
+ Also, the flexible approach yields a better feasible linking path.
Data/0_Latex_True/2_Task/6_Modeling.tex ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ \section{Modeling}
2
+ \label{sec_2_4_Modeling}
3
+ In this section, the fourth main step of \gls{cnmc}, i.e., modeling, is elaborated.
4
+ The data and workflow is described in figure \ref{fig_42}.
5
+ It comprises two main sub-tasks, which are modeling the \glsfirst{cpevol} and modeling the transition properties tensors $\bm Q / \bm T$.
6
+ The settings are as usually defined in \emph{settings.py} and the extracted attributes are distributed to the sub-tasks.
7
+ Modeling the \gls{cpevol} and the $\bm Q/ \bm T$ tensors can be executed separately from each other.
8
+ If the output of one of the two modeling sub-steps is at hand, \gls{cnmc} is not forced to recalculate both modeling sub-steps.
9
+ Since the tracked states are used as training data to run the modeling they are prerequisites for both modeling parts.
10
+ The modeling of the centroid position shall be explained in the upcoming subsection \ref{subsec_2_4_1_CPE}, followed by the explanation of the transition properties in subsection \ref{subsec_2_4_2_QT}.
11
+ A comparison between this \gls{cnmc} and the \emph{first CNMc} version is provided at the end of the respective subsections.
12
+ The results of both modeling steps can be found in section
13
+ \ref{sec_3_2_MOD_CPE} and \ref{sec_3_3_SVD_NMF}
14
+
15
+ \begin{figure} [!h]
16
+ \hspace*{-4cm}
17
+ \resizebox{1.2\textwidth}{!}{
18
+ \input{2_Figures/2_Task/2_Modeling/0_Modeling.tikz}
19
+ }
20
+ \caption{Data and workflow of the fourth step: Modeling}
21
+ \label{fig_42}
22
+ \end{figure}
23
+
24
+
25
+ \subsection{Modeling the centroid position evolution}
26
+ \label{subsec_2_4_1_CPE}
27
+ In this subsection, the modeling of the \gls{cpevol} is described.
28
+ The objective is to find a surrogate model, which returns all $K$ centroid positions for an unseen $\beta_{unseen}$.
29
+ The training data for this are the tracked centroids from the previous step, which is described in section \ref{sec_2_3_Tracking}.
30
+ To explain the modeling of the \emph{CPE}, figure \ref{fig_43} shall be inspected.
31
+ The model parameter values which shall be used to train the model $\vec{\beta_{tr}}$ are used for generating a so-called candidate library matrix $\boldsymbol{\Theta}\,(\vec{\beta_{tr}})$. The candidate library matrix $\boldsymbol{\Theta}\,(\vec{\beta_{tr}})$ is obtained making use of a function of \emph{pySindy} \cite{Silva2020,Kaptanoglu2022,Brunton2016}.
32
+ In \cite{Brunton2016} the term $\boldsymbol{\Theta}\,(\vec{\beta_{tr}})$ is explained well. However, in brief terms, it allows the construction of a matrix, which comprises the output of defined functions.
33
+ These functions could be, e.g., a linear, polynomial, trigonometrical or any other non-linear function. Made-up functions that include logical conditions can also be applied. \newline
34
+
35
+ \begin{figure} [!h]
36
+ \hspace*{-4cm}
37
+ \resizebox{1.2\textwidth}{!}{
38
+ \input{2_Figures/2_Task/2_Modeling/1_Pos_Mod.tikz}
39
+ }
40
+ \caption{Data and workflow of modeling the \glsfirst{cpevol}}
41
+ \label{fig_43}
42
+ \end{figure}
43
+
44
+ Since, the goal is not to explain, how to operate \emph{pySindy} \cite{Brunton2016}, the curious reader is referred to the \emph{pySindy} very extensive online documentation and \cite{Silva2020,Kaptanoglu2022}.
45
+ Nevertheless, to understand $\boldsymbol{\Theta}\,(\vec{\beta_{tr}})$ equation \eqref{eq_20} shall be considered.
46
+ In this example, 3 different functions, denoted as $f_i$ in the first row, are employed.
47
+ The remaining rows are the output for the chosen $f_i$.
48
+ Furthermore, $n$ is the number of samples or the size of $\vec{\beta_{tr} }$, i.e., $n_{\beta,tr} $ and $m$ denotes the number of the features, i.e., the number of the functions $f_i$. \newline
49
+
50
+ \begin{equation}
51
+ \boldsymbol{\Theta_{exampl(n \times m )}}(\,\vec{\beta_{tr}}) =
52
+ % \renewcommand\arraystretch{3}
53
+ \renewcommand\arraycolsep{10pt}
54
+ \begin{bmatrix}
55
+ f_1 = \beta & f_2 = \beta^2 & f_2 = cos(\beta)^2 - exp\,\left(\dfrac{\beta}{-0.856} \right) \\[1.5em]
56
+ 1 & 1^2 & cos(1)^2 - exp\,\left(\dfrac{1}{-0.856} \right) \\[1.5em]
57
+ 2 & 2^2 & cos(2)^2 - exp\,\left(\dfrac{2}{-0.856} \right) \\[1.5em]
58
+ \end{bmatrix}
59
+ \label{eq_20}
60
+ \end{equation}
61
+
62
+ The actual candidate library matrix $\boldsymbol{\Theta}\,(\vec{\beta_{tr}})$ incorporates a quadratic polynomial, the inverse $ \frac{1}{\beta}$, the exponential $exp(\beta)$ and 3 frequencies of cos and sin, i.e., $cos(\vec{\beta}_{freq}), \ sin(\vec{\beta}_{freq})$, where $\vec{\beta}_{freq} = [1, \, 2,\, 3]$.
63
+ There are much more complex $\boldsymbol{\Theta}\,(\vec{\beta_{tr}})$ available in \gls{cnmc}, which can be selected if desired.
64
+ Nonetheless, the default $\boldsymbol{\Theta}\,(\vec{\beta_{tr}})$ is chosen as described above.
65
+ Once $\boldsymbol{\Theta}\,(\vec{\beta_{tr}})$ is the generated, the system of equations \eqref{eq_21} is solved.
66
+ Note, this is very similar to solving the well-known $\bm A \, \vec{x} = \vec{y}$ system of equations.
67
+ The difference is that the vectors $\vec{x}, \, \vec{y}$ can be vectors in the case of \eqref{eq_21} as well, but in general, they are the matrices $\bm{X} ,\, \bm Y$, respectively. The solution to the matrix $\bm{X}$ is the desired output.
68
+ It contains the coefficients which assign importance to the used functions $f_i$.
69
+ The matrix $\bm Y$ contains the targets or the known output for the chosen functions $f_i$.
70
+ Comparing $\bm A$ and $ \boldsymbol{\Theta}\,(\vec{\beta_{tr}})$ mathematically, no difference exists.\newline
71
+
72
+ \begin{equation}
73
+ \boldsymbol{\Theta}\,(\vec{\beta_{tr}}) \: \bm X = \bm Y
74
+ \label{eq_21}
75
+ \end{equation}
76
+
77
+ With staying in the \emph{pySindy} environment, the system of equations \eqref{eq_21} is solved by means of the optimizer \emph{SR3}, which is implemented in \emph{pySindy}.
78
+ Details and some advantages of the \emph{SR3} optimizer can be found in \cite{SR3}. Nevertheless, two main points shall be stated.
79
+ It is highly unlikely that the $\boldsymbol{\Theta}\,(\vec{\beta_{tr}}),\: \bm X,\, \bm Y$ is going to lead to a well-posed problem, i.e., the number of equations are equal to the number of unknowns and having a unique solution.
80
+ In most cases the configuration will be ill-posed, i.e., the number of equations is not equal to the number of unknowns.
81
+ In the latter case, two scenarios are possible, the configuration could result in an over-or under-determined system of equations.\newline
82
+
83
+ For an over-determined system, there are more equations than unknowns.
84
+ Thus, generally, no outcome that satisfies equation \eqref{eq_21} exists.
85
+ In order to find a representation that comes close to a solution, an error metric is defined as the objective function for optimization.
86
+ There are a lot of error metrics or norms, however, some commonly used \cite{Brunton2019} are given in equations \eqref{eq_22} to \eqref{eq_24}, where $f(x_k)$ are true values of a function and $y_k$ are their corresponding predictions.
87
+ The under-determined system has more unknown variables than equations, thus infinitely many solutions exist.
88
+ To find one prominent solution, again, optimization is performed.
89
+ Note, for practical application penalization or regularization parameter are exploited as additional constraints within the definition of the optimization problem.
90
+ For more about over- and under-determined systems as well as for deploying optimization for finding a satisfying result the reader is referred to \cite{Brunton2019}.\newline
91
+
92
+ \begin{equation}
93
+ E_{\infty} = \max_{1<k<n} |f(x_k) -y_k | \quad \text{Maximum Error} \;(l_{\infty})
94
+ \label{eq_22}
95
+ \end{equation}
96
+
97
+ \vspace{0.1cm}
98
+ \begin{equation}
99
+ E_{1} = \frac{1}{n} \sum_{k=1}^{n} |f(x_k) -y_k | \quad \text{Mean Absolute Error} \;(l_{1})
100
+ \label{eq_23}
101
+ \end{equation}
102
+
103
+ \vspace{0.1cm}
104
+ \begin{equation}
105
+ E_{2} = \sqrt{\frac{1}{n} \sum_{k=1}^{n} |f(x_k) -y_k |^2 } \quad \text{Least-squares Error} \;(l_{2})
106
+ \label{eq_24}
107
+ \end{equation}
108
+ \vspace{0.1cm}
109
+
110
+ The aim for modeling \emph{CPE} is to receive a regression model, which is sparse, i.e., it is described through a small number of functions $f_i$.
111
+ For this to work, the coefficient matrix $\bm X$ must be sparse, i.e., most of its entries are zero.
112
+ Consequently, most of the used functions $f_i$ would be inactive and only a few $f_i$ are actively applied to capture the \emph{CPE} behavior.
113
+ The $l_1$ norm as defined in \eqref{eq_23} and the $l_0$ are metrics which promotes sparsity.
114
+ In simpler terms, they are leveraged to find only a few important and active functions $f_i$.
115
+ The $l_2$ norm as defined in \eqref{eq_24} is known for its opposite effect, i.e. to assign importance to a high number of $f_i$.
116
+ The \emph{SR3} optimizer is a sparsity promoting optimizer, which deploys $l_0$ and $l_1$ regularization.\newline
117
+
118
+ The second point which shall be mentioned about the \emph{SR3} optimizer is that it can cope with over-and under-determined systems and solves them without any additional input.
119
+ One important note regarding the use of \emph{pySindy} is that \emph{pySindy} in this thesis is not used as it is commonly. For modeling the \emph{CPE} only the modules for generating the candidate library matrix $\boldsymbol{\Theta}\,(\vec{\beta_{tr}})$ and the \emph{SR3} optimizer are utilized.\newline
120
+
121
+ Going back to the data and workflow in figure \ref{fig_43}, the candidate library matrix $\boldsymbol{\Theta}\,(\vec{\beta_{tr}})$ is generated.
122
+ Furthermore, it also has been explained how it is passed to \emph{pySindy} and how \emph{SR3} is used to find a solution. It can be observed that $\boldsymbol{\Theta}\,(\vec{\beta_{tr}})$ is also passed to a \emph{Linear} and \emph{Elastic net} block. The \emph{Linear} block is used to solve the system of equations \eqref{eq_21} through linear interpolation.
123
+ The \emph{Elastic net} solves the same system of equations with the elastic net approach. In this the optimization is penalized with an $l_1$ and $l_2$ norm.
124
+ In other words, it combines the Lasso \cite{Lasso, Brunton2019} and Ridge \cite{Brunton2019}, regression respectively.
125
+ The linear and elastic net solvers are invoked from the \emph{Scikit-learn} \cite{scikit-learn} library.\newline
126
+
127
+ The next step is not depicted in figure \ref{fig_43}.
128
+ Namely, the linear regression model is built with the full data. For \emph{pySindy} and the elastic net, the models are trained with $90 \%$ of the training data and the remaining $10 \%$ are used to test or validate the model.
129
+ For \emph{pySindy} $20$ different models with the linear distributed thresholds starting from $0.1$ and ending at $2.0$ are generated.
130
+ The model which has the least mean absolute error \eqref{eq_23} will be selected as the \emph{pySindy} model.
131
+ The mean absolute error of the linear, elastic net and the selected \emph{pySindy} will be compared against each other.
132
+ The one regression model which has the lowest mean absolute error is selected as the final model.\newline
133
+
134
+ The described process is executed multiple times.
135
+ In 3-dimensions the location of a centroid is given as the coordinates of the 3 axes.
136
+ Since the \emph{CPE} across the 3 different axes can deviate significantly, capturing the entire behavior in one model would require a complex model.
137
+ A complex model, however, is not sparse anymore.
138
+ Thus, a regression model for each of the $K$ labels and for each of the 3 axes is required.
139
+ In total $3 \, K$ regression models are generated. \newline
140
+
141
+ Finally, \emph{first CNMc} and \gls{cnmc} shall be compared.
142
+ First, in \emph{first CNMc} only \emph{pySindy} with a different built-in optimizer.
143
+ Second, the modeling \emph{CPE} was specifically designed for the Lorenz system \eqref{eq_6_Lorenz}.
144
+ Third, \emph{first CNMc} entirely relies on \emph{pySindy}, no linear and elastic models are calculated and used for comparison.
145
+ Fourth, the way \emph{first CNMc} would perform prediction, was by transforming the active $f_i$ with their coefficients to equations such that \emph{SymPy} could be applied.
146
+ The disadvantage is that if $\boldsymbol{\Theta}\,(\vec{\beta_{tr}})$ is changed, modifications for \emph{SymPy} are necessary.
147
+ Also, $\boldsymbol{\Theta}\,(\vec{\beta_{tr}})$ can be used for arbitrary defined functions $f_i$, \emph{SymPy} functions, however, are restricted to some predefined functions.
148
+ In \gls{cnmc} it is also possible to get the active $f_i$ as equations.
149
+ However, the prediction is obtained with a regular matrix-matrix multiplication as given in equation \eqref{eq_25}. The variables are denoted as the predicted outcome $\bm{\tilde{Y}}$, the testing data for which the prediction is desired $\bm{\Theta_s}$ and the coefficient matrix $\bm X$ from equation \eqref{eq_21}.
150
+
151
+ \begin{equation}
152
+ \bm{\tilde{Y}} = \bm{\Theta_s} \, \bm X
153
+ \label{eq_25}
154
+ \end{equation}
155
+
156
+
157
+ With leveraging equation \eqref{eq_25} the limitations imposed through \emph{SymPy} are removed.
Data/0_Latex_True/2_Task/7_QT.tex ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ \subsection{Modeling Q/T}
2
+ \label{subsec_2_4_2_QT}
3
+ In this subsection, the goal is to explain how the transition properties are modeled.
4
+ The transition properties are the two tensors $\bm Q$ and $\bm T$, which consist of transition probability from one centroid to another and the corresponding transition time, respectively.
5
+ For further details about the transition properties, the reader is referred to section \ref{sec_1_1_2_CNM}.
6
+ Modeling $\bm Q / \bm T$ means to find surrogate models that capture the trained behavior and can predict the tensors for unseen model parameter values $\bm{\tilde{Q}}(\vec{\beta}_{unseeen}) ,\, \bm{\tilde{T}}(\vec{\beta}_{unseeen})$.
7
+ To go through the data and workflow figure \ref{fig_44} shall be considered.\newline
8
+
9
+
10
+ \begin{figure} [!h]
11
+ \hspace*{-4cm}
12
+ \resizebox{1.2\textwidth}{!}{
13
+ \input{2_Figures/2_Task/2_Modeling/2_QT_Mod.tikz}
14
+ }
15
+ \caption{Data and workflow of $\bm Q / \bm T$ modeling}
16
+ \label{fig_44}
17
+ \end{figure}
18
+
19
+ First, the tracked state data is loaded and adapted in the sense that CNM's data format is received. After that \gls{cnm} can be executed on the tracked state data.
20
+ The outcome of \gls{cnm} are the transition property tensors for all the provided model parameter values $\bm Q(\vec{\beta}) ,\, \bm T(\vec{\beta})$.
21
+ However, \gls{cnm} does not return tensors as \emph{NumPy} \cite{harris2020array} arrays, but as \emph{Python} dictionaries.
22
+ Thus, the next step is to transform the dictionaries to \emph{NumPy} arrays.
23
+ $\bm Q / \bm T$ are highly sparse, i.e., $85 \% - 99\%$ of the entries can be zero.
24
+ The $99\%$ case is seen with a great model order, which for the Lorenz system \eqref{eq_6_Lorenz} was found to be $L \approx 7$.
25
+ Furthermore, with an increasing $L$, saving the dictionaries as \emph{NumPy} arrays becomes inefficient and at some $L$ impossible. With $L>7$ the storage cost goes above multiple hundreds of gigabytes of RAM.
26
+ Therefore, the dictionaries are converted into sparse matrices. \newline
27
+
28
+ Thereafter, the sparse matrices are reshaped or stacked into a single matrix, such that a modal decomposition method can be applied.
29
+ Followed by training a regression model for each of the mode coefficients.
30
+ The idea is that the regression models receive a $\beta_{unseen}$ and returns all the corresponding predicted modes.
31
+ The regression models are saved and if desired plots can be enabled via \emph{settings.py}. \newline
32
+
33
+ In this version of \gls{cnmc} two modal decomposition methods are available.
34
+ Namely, the \glsfirst{svd} and the \glsfirst{nmf}.
35
+ The difference between both is given in \cite{Lee1999}.
36
+ The \gls{svd} is stated in equation \eqref{eq_26}, where the variables are designated as the input matrix $\bm A$ which shall be decomposed, the left singular matrix $ \bm U $, the diagonal singular matrix $ \bm \Sigma $ and the right singular matrix $ \bm V^T $.
37
+ The singular matrix $ \bm \Sigma $ is mostly ordered in descending order such that the highest singular value is the first diagonal element.
38
+ The intuition behind the singular values is that they assign importance to the modes in the left and right singular matrices $ \bm U $ and $ \bm {V^T} $, respectively.
39
+
40
+ \begin{equation}
41
+ \bm A = \bm U \, \bm \Sigma \, \bm {V^T}
42
+ \label{eq_26}
43
+ \end{equation}
44
+
45
+
46
+ The big advantage of the \gls{svd} is observed when the so-called economical \gls{svd} is calculated.
47
+ The economical \gls{svd} removes all zero singular values, thus the dimensionality of all 3 matrices can be reduced.
48
+ However, from the economical \gls{svd} as a basis, all the output with all $r$ modes is available.
49
+ There is no need to perform any additional \gls{svd} to get the output for $r$ modes, but rather the economical \gls{svd} is truncated with the number $r$ for this purpose.
50
+ \gls{nmf}, given in equation \eqref{eq_5_NMF}, on the other hand, has the disadvantage that there is no such thing as economical NMF.
51
+ For every change in the number of modes $r$, a full \gls{nmf} must be recalculated.\newline
52
+
53
+ \begin{equation}
54
+ \bm {A_{i \mu}} \approx \bm A^{\prime}_{i \mu} = (\bm W \bm H)_{i \mu} = \sum_{a = 1}^{r}
55
+ \bm W_{ia} \bm H_{a \mu}
56
+ \tag{\ref{eq_5_NMF}}
57
+ \end{equation}
58
+
59
+
60
+ The issue with \gls{nmf} is that the solution is obtained through an iterative optimization process.
61
+ The number of iterations can be in the order of millions and higher to meet the convergence criteria.
62
+ Because the optimal $r_{opt}$ depends on the dynamical system, there is no general rule for acquiring it directly.
63
+ Consequently, \gls{nmf} must be run with multiple different $r$ values to find $r_{opt}$.
64
+ Apart from the mentioned parameter study, one single \gls{nmf} execution was found to be more computationally expensive than \gls{svd}.
65
+ In \cite{Max2021} \gls{nmf} was found to be the performance bottleneck of \emph{first CNMc}, which became more evident when $L$ was increased.
66
+ In subsection
67
+ \ref{subsec_3_3_1_SVD_Speed}
68
+ a comparison between \gls{nmf} and \gls{svd} regarding computational time is given.\newline
69
+
70
+
71
+ Nevertheless, if the user wants to apply \gls{nmf}, only one attribute in \emph{settings.py} needs to be modified.
72
+ Because of that and the overall modular structure of \gls{cnmc}, implementation of any other decomposition method should be straightforward.
73
+ In \gls{cnmc} the study for finding $r_{opt}$ is automated and thus testing \gls{cnmc} on various dynamical systems with \gls{nmf} should be manageable.
74
+ The benefit of applying \gls{nmf} is that the entries of the output matrices $\bm W_{ia},\, \bm H_{a \mu}$ are all non-zero.
75
+ This enables interpreting the $\bm W_{ia}$ matrix since both $\bm Q / \bm T$ tensors cannot contain negative entries, i.e., no negative probability and no negative transition time.\newline
76
+
77
+ Depending on whether \gls{nmf} or \gls{svd} is chosen, $r_{opt}$ is found through a parameter study or based on $99 \%$ of the information content, respectively.
78
+ The $99 \%$ condition is met when $r_{opt}$ modes add up to $99 \%$ of the total sum of the modes. In \gls{svd} $r_{opt}$ is automatically detected and does not require any new \gls{svd} execution. A comparison between \gls{svd} and \gls{nmf} regarding prediction quality is given in
79
+ section
80
+ \ref{subsec_3_3_2_SVD_Quality}.
81
+ After the decomposition has been performed, modes that capture characteristic information are available.
82
+ If the modes can be predicted for any $\beta_{unseen}$, the predicted transition properties $\bm{\tilde{Q}}(\vec{\beta}_{unseeen}) ,\, \bm{\tilde{T}}(\vec{\beta}_{unseeen})$ are obtained.
83
+ To comply with this \gls{cnmc} has 3 built-in methods.
84
+ Namely, \textbf{R}andom \textbf{F}orest (RF), AdaBoost, and Gaussian Process.\newline
85
+
86
+ First, \gls{rf} is based on decision trees, but additionally deploys
87
+ a technique called bootstrap aggregation or bagging.
88
+ Bagging creates multiple sets from the original dataset, which are equivalent in size.
89
+ However, some features are duplicated in the new datasets, whereas others are
90
+ neglected entirely. This allows \gls{rf} to approximate very complex functions
91
+ and reduce the risk of overfitting, which is encountered commonly
92
+ with regular decision trees.
93
+ Moreover, it is such a powerful tool
94
+ that, e.g., Kilian Weinberger, a well-known Professor for machine learning
95
+ at Cornell University, considers \gls{rf} in one of his lectures, to be
96
+ one of the most powerful regression techniques that the state of the art has to offer.
97
+ Furthermore, \gls{rf} proved to be able to approximate the training data
98
+ acceptable as shown in \cite{Max2021}.
99
+ However, as mentioned in subsection \ref{subsec_1_1_3_first_CNMc}, it faced difficulties to approximate spike-like curves.
100
+ Therefore, it was desired to test alternatives as well.\newline
101
+
102
+ These two alternatives were chosen to be AdaBoost, and Gaussian Process.
103
+ Both methods are well recognized and used in many machine learning applications.
104
+ Thus, instead of motivating them and giving theoretical explanations, the reader is referred to \cite{Adaboost}, \cite{Rasmussen2004,bishop2006pattern} for AdaBoost and Gaussian Process, respectively.
105
+ As for the implementation, all 3 methods are invoked through \emph{Scikit-learn} \cite{scikit-learn}.
106
+ The weak learner for AdaBoost is \emph{Scikit-learn's} default decision tree regressor.
107
+ The kernel utilized for the Gaussian Process is the \textbf{R}adial \textbf{B}asis \textbf{F}unction (RBF). A comparison of these 3 methods in terms of prediction capabilities is provided in section \ref{subsec_3_3_2_SVD_Quality}.\newline
108
+
109
+ Since the predicted $\bm{\tilde{Q}}(\vec{\beta}_{unseeen}) ,\, \bm{\tilde{T}}(\vec{\beta}_{unseeen})$ are based on regression techniques, their output will have some deviation to the original $\bm{Q}(\vec{\beta}_{unseeen}) ,\, \bm{T}(\vec{\beta}_{unseeen})$.
110
+ Due to that, the physical requirements given in equations \eqref{eq_31_Q_T_prediction} may be violated. \newline
111
+
112
+ \begin{equation}
113
+ \begin{aligned}
114
+ 0 \leq \, \bm Q \leq 1\\
115
+ \bm T \geq 0 \\
116
+ \bm Q \,(\bm T > 0) > 0 \\
117
+ \bm T(\bm Q = 0) = 0
118
+ \end{aligned}
119
+ \label{eq_31_Q_T_prediction}
120
+ \end{equation}
121
+
122
+ To manually enforce these physical constraints, the rules defined in equation \eqref{eq_32_Rule} are applied.
123
+ The smallest allowed probability is defined to be 0, thus negative probabilities are set to zero.
124
+ The biggest probability is 1, hence, overshoot values are set to 1.
125
+ Also, negative transition times would result in moving backward, therefore, they are set to zero.
126
+ Furthermore, it is important to verify that a probability is zero if its corresponding transition time is less than or equals zero.
127
+ In general, the deviation is in the order of $\mathcal{O}(1 \mathrm{e}{-2})$, such that the modification following equation \eqref{eq_32_Rule} can be considered reasonable.
128
+ \newline
129
+ \begin{equation}
130
+ \begin{aligned}
131
+ & \bm Q < 0 := 0 \\
132
+ & \bm Q > 1 := 1 \\
133
+ & \bm T < 0 := 0\\
134
+ & \bm Q \, (\bm T \leq 0) := 0
135
+ \end{aligned}
136
+ \label{eq_32_Rule}
137
+ \vspace{0.1cm}
138
+ \end{equation}
139
+ In conclusion, it can be said that modeling $\bm Q / \bm T$ \gls{cnmc} is equipped with two different modal decomposition methods, \gls{svd} and NMF.
140
+ To choose between them one attribute in \emph{settings.py} needs to be modified.
141
+ The application of \gls{nmf} is automated with the integrated parameter study.
142
+ For the mode surrogate models, 3 different regression methods are available.
143
+ Selecting between them is kept convenient, i.e. by editing one property in \emph{settings.py}.
144
+
145
+
146
+
147
+
148
+
149
+
Data/0_Latex_True/3_Task/0_Results.tex ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ \chapter{Results}
2
+ \label{ch_3}
3
+ In this chapter, the results achieved with \gls{cnmc} shall be presented and assessed.
4
+ First, in section \ref{sec_3_1_Tracking_Results}, the tracking algorithm is evaluated by showing the outcome for 3 different dynamical model configurations.
5
+ Second, in section \ref{sec_3_2_MOD_CPE}, statements about the performance of modeling the \glsfirst{cpevol} are made.
6
+ They are supported with some representative outputs.
7
+ Third, in section \ref{sec_3_3_SVD_NMF} the two decomposition methods are compared in terms of computational time and prediction quality in subsection \ref{subsec_3_3_1_SVD_Speed} and \ref{subsec_3_3_2_SVD_Quality}, respectively.
8
+ Fourth, it has been mentioned that 3 different regressors for representing the $\bm Q / \bm T$ tensor are available.
9
+ Their rating is given in section \ref{sec_3_4_SVD_Regression}.
10
+ Finally, the \gls{cnmc} predicted trajectories for different models shall be displayed and evaluated in section \ref{sec_3_5_Pred}.\newline
11
+
12
+
13
+ For assessing the performance of \gls{cnmc} some dynamical model with a specific configuration will be used many times.
14
+ In order not to repeat them too often, they will be defined in the following.\newline
15
+
16
+ \textbf{Model configurations}
17
+ \hrule
18
+ \vspace{0.05cm}
19
+ \hrule
20
+ \vspace{0.25cm}
21
+ The first model configuration is denoted as \emph{SLS}, which stands for \textsl{S}mall \textbf{L}orenz \textsl{S}ystem .
22
+ It is the Lorenz system described with the sets of equations \eqref{eq_6_Lorenz} and the number of centroids is $K=10$.
23
+ Furthermore, the model was trained with $\vec{\beta }_{tr} = [\beta_0 = 28 ; \, \beta_{end} = 33], \, n_{\beta, tr} = 7$, where the training model parameter values $\vec{\beta}_{tr}$ are chosen to start from $\beta_0 = 28$ and end at $\beta_{end} = 33$, where the total number of linearly distributed model parameter values is $n_{\beta, tr} = 7$.\newline
24
+
25
+ The second model is referred to as \emph{LS20}.
26
+ It is also a Lorenz system \eqref{eq_6_Lorenz}, but with a higher number of centroids $K=20$ and the following model configuration: $\vec{\beta }_{tr} = [\, \beta_0 = 24.75 ; \, \beta_{end} = 53.75 \,], \, n_{\beta, tr} = 60$.\newline
27
+
28
+ The third model is designated as \emph{FW15}. It is based on the \emph{Four Wing} set of equations \eqref{eq_10_4_Wing} and an illustrative trajectory is given in figure \ref{fig_37}.
29
+ The number of centroids is $K=15$ and it is constructed with the following configuration $\vec{\beta }_{tr} = [\, \beta_0 = 8 ; \, \beta_{end} = 11 \,], \, n_{\beta, tr} = 13$.\newline
30
+
31
+ \begin{figure}[!h]
32
+ \centering
33
+ \includegraphics[width =\textwidth]
34
+ {2_Figures/3_Task/1_Tracking/10_1_Traj_8.pdf}
35
+ \caption{\emph{FW15} \eqref{eq_10_4_Wing} trajectory for $\beta = 8$}
36
+ \label{fig_37}
37
+ \end{figure}
38
+
39
+
40
+
Data/0_Latex_True/3_Task/1_Track_Results.tex ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ \section{Tracking results}
2
+ \label{sec_3_1_Tracking_Results}
3
+ In this section, some outputs of tracking data and workflow, described in subsection \ref{subsec_2_3_1_Tracking_Workflow}, shall be presented.
4
+ After that, in short, the current \gls{cnmc} shall be compared to \emph{first CNMc} \newline
5
+
6
+ First, two illustrative solutions for the assignment problem from the final path, as explained in subsection \ref{subsec_2_3_1_Tracking_Workflow}, are provided in figures \ref{fig_27} and \ref{fig_28}.
7
+ The axes are denoted as $c_k$ and $c_p$ and represent the labels of the $\beta_j$ and $\beta_i$ centroids, respectively.
8
+ The color bar on the right side informs about the euclidean distance, which is equivalent to the cost.
9
+ Above the solution of the assignment problem in figures \ref{fig_27} and \ref{fig_28}, the corresponding $\beta_i$ and $\beta_j$ centroid labels are given in the respective two figures, i.e., \ref{fig_27_1}, \ref{fig_27_2} and \ref{fig_28_1}, \ref{fig_28_2}.
10
+
11
+ \begin{figure}[!h]
12
+ \begin{subfigure}{0.5\textwidth}
13
+ \centering
14
+ \caption{Ordered state, $\beta_i =32.167$ }
15
+ \includegraphics[width =\textwidth]
16
+ {2_Figures/3_Task/1_Tracking/16_lb_32.167.pdf}
17
+ \label{fig_27_1}
18
+ \end{subfigure}
19
+ \hfill
20
+ \begin{subfigure}{0.5\textwidth}
21
+ \centering
22
+ \caption{Ordered state, $\beta_j = 33$}
23
+ \includegraphics[width =\textwidth]
24
+ {2_Figures/3_Task/1_Tracking/17_lb_33.000.pdf}
25
+ \label{fig_27_2}
26
+ \end{subfigure}
27
+
28
+ \smallskip
29
+ \centering
30
+ \begin{subfigure}{\textwidth}
31
+ \caption{Solution to the assignment problem}
32
+ \includegraphics[width =\textwidth]{2_Figures/3_Task/1_Tracking/1_LSA.pdf}
33
+ \label{fig_27}
34
+ \end{subfigure}
35
+ \vspace{-0.3cm}
36
+ \caption{Illustrative solution for the assignment problem, $\beta_i =32.167,\, \beta_j = 33 ,\, K =10$}
37
+ \label{fig_27_All}
38
+ \end{figure}
39
+ %
40
+ %
41
+ The centroid $c_{k=1} (\beta_j = 33)$ has its lowest cost to
42
+ $c_{p=3} (\beta_i = 32.167)$. In this case, this is also the solution for the assignment problem, outlined by the blue cross.
43
+ However, the solution to the linear sum assignment problem is not always to choose the minimal cost for one \emph{inter} $\beta$ match.
44
+ It could be that one centroid in $\beta_i$ is to found the closest centroid to multiple centroids in $\beta_j$.
45
+ Matching only based on the minimal distance does not include the restriction that exactly one centroid from $\beta_i$ must be matched with exactly one centroid from $\beta_j$.
46
+ The latter demand is incorporated in the solution of the linear sum assignment problem. \newline
47
+
48
+
49
+ \begin{figure}[!h]
50
+ \begin{subfigure}{0.5\textwidth}
51
+ \centering
52
+ \caption{Ordered state, $\beta_i =31.333$ }
53
+ \includegraphics[width =\textwidth]
54
+ {2_Figures/3_Task/1_Tracking/18_lb_31.333.pdf}
55
+ \label{fig_28_1}
56
+ \end{subfigure}
57
+ \hfill
58
+ \begin{subfigure}{0.5\textwidth}
59
+ \centering
60
+ \caption{Ordered state, $\beta_j = 32.167$}
61
+ \includegraphics[width =\textwidth]
62
+ {2_Figures/3_Task/1_Tracking/16_lb_32.167.pdf}
63
+ \label{fig_28_2}
64
+ \end{subfigure}
65
+
66
+ \smallskip
67
+ \centering
68
+ \begin{subfigure}{\textwidth}
69
+ \caption{Solution to the assignment problem}
70
+ \includegraphics[width =\textwidth]{2_Figures/3_Task/1_Tracking/2_LSA.pdf}
71
+ \label{fig_28}
72
+ \end{subfigure}
73
+ \vspace{-0.3cm}
74
+ \caption{Illustrative solution for the assignment problem, $\beta_i =31.333,\, \beta_j = 32.167, \,K =10 $}
75
+ \label{fig_28_All}
76
+ \end{figure}
77
+
78
+ Comparing figure \ref{fig_27} with the second example in figure \ref{fig_28}, it can be observed that the chosen \emph{inter} $\beta$ centroid matches can have very different shapes.
79
+ This can be seen by looking at the blue crosses.
80
+ Furthermore, paying attention to the remaining possible \emph{inter} $\beta$ centroid matches, it can be stated that there is a clear trend, i.e., the next best \emph{inter} $\beta$ centroid match has a very high increase in its cost.
81
+ For example, considering the following \emph{inter} $\beta$ match. With $c_{k=1} (\beta_j = 32.167)$ and $c_{p=1} (\beta_i = 31.333)$, the minimal cost is around $cost_{min} \approx 0.84$. The next best option jumps to $cost_{second} = 13.823$. These jumps can be seen for each \emph{inter} $\beta$ match in figure in both depicted figures \ref{fig_27} and \ref{fig_28}.
82
+ The key essence behind this finding is that for the chosen number of centroids $K$ of this dynamical model (Lorenz system \eqref{eq_6_Lorenz}), no ambiguous regions, as explained at the beginning of this chapter, occur.\newline
83
+
84
+ Next, the tracking result of 3 different systems shall be viewed.
85
+ The tracked state for \emph{SLS} is depicted in figures \ref{fig_29}.
86
+ In each of the figures, one centroid is colored blue that denotes
87
+ the first centroid in the sequence of the underlying trajectory.
88
+ Within the depicted range $\vec{\beta}$, it can be observed, that each label across the $\vec{\beta}$ is labeled as expected.
89
+ No single ambiguity or mislabeling can be seen.
90
+ In other words, it highlights the high performance of the tracking algorithm.
91
+ %
92
+ %
93
+ % ==============================================================================
94
+ % ======================= SLS =================================================
95
+ % ==============================================================================
96
+ \begin{figure}[!h]
97
+ \begin{subfigure}{0.5\textwidth}
98
+ \centering
99
+ \caption{$\beta =28$ }
100
+ \includegraphics[width =\textwidth]
101
+ {2_Figures/3_Task/1_Tracking/3_lb_28.000.pdf}
102
+ \end{subfigure}
103
+ \hfill
104
+ \begin{subfigure}{0.5\textwidth}
105
+ \centering
106
+ \caption{ $\beta = 28.833$}
107
+ \includegraphics[width =\textwidth]
108
+ {2_Figures/3_Task/1_Tracking/4_lb_28.833.pdf}
109
+ \end{subfigure}
110
+
111
+ \smallskip
112
+ \begin{subfigure}{0.5\textwidth}
113
+ \centering
114
+ \caption{$\beta = 31.333$}
115
+ \includegraphics[width =\textwidth]
116
+ {2_Figures/3_Task/1_Tracking/15_lb_31.333.pdf}
117
+ \end{subfigure}
118
+ \hfill
119
+ \begin{subfigure}{0.5\textwidth}
120
+ \centering
121
+ \caption{ $\beta = 33$}
122
+ \includegraphics[width =\textwidth]
123
+ {2_Figures/3_Task/1_Tracking/5_lb_33.000.pdf}
124
+ \end{subfigure}
125
+ \vspace{-0.3cm}
126
+ \caption{Tracked states for \emph{SLS}, $K = 10,\, \vec{\beta} = [\, 28, \, 28.333, \, 31.333, \, 31.14, \, 33 \, ]$}
127
+ \label{fig_29}
128
+ \end{figure}
129
+ % ==============================================================================
130
+ % ======================= SLS =================================================
131
+ % ==============================================================================
132
+ %
133
+ The second model is the \emph{LS20}, i.e, $K= 20,\, \vec{\beta }_{tr} = [\, \beta_0 = 24.75 ; \, \beta_{end} = 53.75 \,], \, n_{\beta,tr} = 60$.
134
+ The outcome is depicted in figures \ref{fig_32}.
135
+ It can be noted that $\beta = 24.75$ and $\beta = 30.648$ exhibit very similar results to the \emph{SLS} model.
136
+ The same is true for intermediate $\beta$ values, i.e., $24.75 \leq \beta \lessapprox 30.648 $.
137
+ However, with $\beta \gtrapprox 30.64$ as depicted for $\beta = 31.14$, one centroid, i.e. the centroid with the label $20$ in the right ear appears unexpectedly.
138
+ With this, a drastic change to the centroid placing network is imposed.
139
+ Looking at the upcoming $\beta$ these erratic changes are found again.\newline
140
+
141
+
142
+ % ==============================================================================
143
+ % ======================= LS20 =================================================
144
+ % ==============================================================================
145
+ \begin{figure}[!h]
146
+ \begin{subfigure}{0.5\textwidth}
147
+ \centering
148
+ \caption{$\beta =24.75$ }
149
+ \includegraphics[width =\textwidth]
150
+ {2_Figures/3_Task/1_Tracking/6_lb_24.750.pdf}
151
+ \end{subfigure}
152
+ \hfill
153
+ \begin{subfigure}{0.5\textwidth}
154
+ \centering
155
+ \caption{ $\beta = 28.682$}
156
+ \includegraphics[width =\textwidth]
157
+ {2_Figures/3_Task/1_Tracking/7_lb_28.682.pdf}
158
+ \end{subfigure}
159
+
160
+ \smallskip
161
+ \begin{subfigure}{0.5\textwidth}
162
+ \centering
163
+ \caption{$\beta = 30.648$}
164
+ \includegraphics[width =\textwidth]
165
+ {2_Figures/3_Task/1_Tracking/7_lb_30.648.pdf}
166
+ \end{subfigure}
167
+ \hfill
168
+ \begin{subfigure}{0.5\textwidth}
169
+ \centering
170
+ \caption{ $\beta = 31.140$}
171
+ \includegraphics[width =\textwidth]
172
+ {2_Figures/3_Task/1_Tracking/8_lb_31.140.pdf}
173
+ \end{subfigure}
174
+
175
+ \smallskip
176
+ \begin{subfigure}{0.5\textwidth}
177
+ \centering
178
+ \caption{$\beta = 42.936$}
179
+ \includegraphics[width =\textwidth]
180
+ {2_Figures/3_Task/1_Tracking/9_lb_42.936.pdf}
181
+ \end{subfigure}
182
+ \hfill
183
+ \begin{subfigure}{0.5\textwidth}
184
+ \centering
185
+ \caption{ $\beta = 53.750$}
186
+ \includegraphics[width =\textwidth]
187
+ {2_Figures/3_Task/1_Tracking/10_lb_53.750.pdf}
188
+ \end{subfigure}
189
+ \vspace{-0.3cm}
190
+ \caption{Tracked states for \emph{LS20}, $K = 20,\, \vec{\beta} = [\, 24.75, \, 28.682, \, 30.648, \, 31.14, \, 31.14,$ $42.936, \, 53.75 \, ]$ }
191
+ \label{fig_32}
192
+ \end{figure}
193
+ % ==============================================================================
194
+ % ======================= LS20 =================================================
195
+ % ==============================================================================
196
+ Generating a tracked state with these discontinuous cluster network deformations even manually can be considered hard to impossible because tracking demands some kind of similarity.
197
+ If two cluster networks differ too much from each other, then necessarily at least tracked label is going to be unsatisfying.
198
+ Hence, it would be wrong to conclude that the tracking algorithm is not performing well, but rather the clustering algorithm itself or the range of $\vec{\beta} $ must be adapted. If the range of $\vec{\beta} $ is shortened, multiple models can be trained and tracked.\newline
199
+
200
+ \FloatBarrier
201
+ The third model is referred to as \emph{FW15}.
202
+ Figures in \ref{fig_38} show the tracked state for 4 different $\beta$ values. It can be observed that for $\beta = 11$ the centroid placing has changed notably to the other $\beta$ values, thus tracking the centroids in the center for $\beta = 11$ becomes unfavorable.
203
+ Overall, however, the tracked state results advocate the performance of the tracking algorithm.\newline
204
+
205
+ % ==============================================================================
206
+ % ======================= FW15 =================================================
207
+ % ==============================================================================
208
+ \begin{figure}[!h]
209
+ \begin{subfigure}{0.5\textwidth}
210
+ \centering
211
+ \caption{$\beta =8$ }
212
+ \includegraphics[width =\textwidth]
213
+ {2_Figures/3_Task/1_Tracking/11_lb_8.000.pdf}
214
+ \end{subfigure}
215
+ \hfill
216
+ \begin{subfigure}{0.5\textwidth}
217
+ \centering
218
+ \caption{ $\beta = 8.25$}
219
+ \includegraphics[width =\textwidth]
220
+ {2_Figures/3_Task/1_Tracking/12_lb_8.250.pdf}
221
+ \end{subfigure}
222
+
223
+ \smallskip
224
+ \begin{subfigure}{0.5\textwidth}
225
+ \centering
226
+ \caption{$\beta = 10$}
227
+ \includegraphics[width =\textwidth]
228
+ {2_Figures/3_Task/1_Tracking/13_lb_10.000.pdf}
229
+ \end{subfigure}
230
+ \hfill
231
+ \begin{subfigure}{0.5\textwidth}
232
+ \centering
233
+ \caption{ $\beta = 11$}
234
+ \includegraphics[width =\textwidth]
235
+ {2_Figures/3_Task/1_Tracking/14_lb_11.000.pdf}
236
+ \end{subfigure}
237
+ \vspace{-0.3cm}
238
+ \caption{Tracked states for \emph{FW15}, $K = 15,\, \vec{\beta} = [\, 8, \, 8.25, \, 10, \, 11 \, ]$}
239
+ \label{fig_38}
240
+ \end{figure}
241
+ % ==============================================================================
242
+ % ======================= FW15 =================================================
243
+ % ==============================================================================
244
+
245
+ It can be concluded that the tracking algorithm performs remarkably well. However, when the cluster placing network is abruptly changed from one $\beta$ to the other $\beta$, the tracking outcome gets worse and generates sudden cluster network deformation.
246
+ As a possible solution, splitting up the $\vec{\beta}_{tr}$ range into smaller $\vec{\beta}_{tr,i}$ ranges, can be named. This is not only seen for the \emph{LS20}, but also for other dynamical systems as illustratively shown with the center area of the \emph{FW15} system for $\beta= 11$.
247
+ \FloatBarrier
248
+
Data/0_Latex_True/3_Task/2_Mod_CPE.tex ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ \section{CPE modeling results}
2
+ \label{sec_3_2_MOD_CPE}
3
+ In this section, results to the \gls{cpevol} modeling explained in subsection \ref{subsec_2_4_1_CPE}, shall be presented and assessed.
4
+ First, a selection of equations, which defines the \gls{cpevol} are given for one model configuration.
5
+ Next, representative plots of the \gls{cpevol} for different models are analyzed.
6
+ Finally, the predicted centroid position is compared with the actual clustered centroid position.\newline
7
+
8
+
9
+ Modeling the \emph{CPE} returns, among other results, analytical equations.
10
+ These equations describe the behavior of the centroid positions across the range $\vec{\beta}$ and can also be used for making predictions for $\vec{\beta}_{unseen}$.
11
+ The model configuration for which they are be presented is \emph{SLS}, i.e. Lorenz system \eqref{eq_6_Lorenz}, $K= 10,\, \vec{\beta }_{tr} = [\, \beta_0 = 28 ; \, \beta_{end} =33 \,], \, n_{\beta, tr} = 7$.
12
+ The analytical \gls{cpevol} expressions are listed in \eqref{eq_27} to \eqref{eq_29} for the centroids $[\,1,\, 2,\,7\,]$, respectively.
13
+ Recalling that the behavior across the 3 different axes (x, y, z) can vary greatly, each axis has its own regression model $(\tilde x,\, \tilde y,\, \tilde z)$.
14
+ Thus, for each label, 3 different analytical expressions are provided. \newline
15
+
16
+
17
+ \begin{figure}[!h]
18
+ \begin{minipage}{.47\textwidth}
19
+ \begin{equation}
20
+ \begin{aligned}
21
+ \tilde x &= -0.1661 \, cos(3 \, \beta) \\
22
+ \tilde y &= -0.1375 \, cos(3 \, \beta) \\
23
+ \tilde z &= 0.8326 \, \beta
24
+ \end{aligned}
25
+ \label{eq_27}
26
+ \end{equation}
27
+ \end{minipage}%
28
+ \hfill
29
+ \begin{minipage}{.47\textwidth}
30
+ \centering
31
+ \includegraphics[width =\textwidth]{2_Figures/3_Task/2_Mod_CPE/1_lb_1.pdf}
32
+ \caption{\emph{SLS}, \emph{CPE} model for centroid: 1 }
33
+ \label{fig_45}
34
+ \end{minipage}
35
+ \end{figure}
36
+
37
+ \begin{figure}[!h]
38
+ \begin{minipage}{.47\textwidth}
39
+ \begin{equation}
40
+ \begin{aligned}
41
+ \tilde x &= 0.1543 \, sin(3 \, \beta) + 0.2446 \, cos(3 \, \beta) \\
42
+ \tilde y &= 0.2638 \, sin(3 \, \beta) + 0.4225 \, cos(3 \, \beta) \\
43
+ \tilde z &= 0.4877 \, \beta
44
+ \end{aligned}
45
+ \label{eq_28}
46
+ \end{equation}
47
+ \end{minipage}%
48
+ \hfill
49
+ \begin{minipage}{.47\textwidth}
50
+ \centering
51
+ \includegraphics[width =\textwidth]{2_Figures/3_Task/2_Mod_CPE/2_lb_2.pdf}
52
+ \caption{\emph{SLS}, \emph{CPE} model for centroid: 2 }
53
+ \label{fig_46}
54
+ \end{minipage}
55
+ \end{figure}
56
+
57
+ \begin{figure}[!h]
58
+ \begin{minipage}{.47\textwidth}
59
+ \begin{equation}
60
+ \begin{aligned}
61
+ \tilde x &= -0.1866 \, \beta + 0.133 \, sin(3 \, \beta) \\
62
+ & \quad + 0.1411 \, cos(3 \, \beta) \\
63
+ \tilde y &= -0.3 \, \beta \\
64
+ \tilde z &= -1.0483+ 0.6358 \,\beta
65
+ \end{aligned}
66
+ \label{eq_29}
67
+ \end{equation}
68
+ \end{minipage}%
69
+ \hfill
70
+ \begin{minipage}{.47\textwidth}
71
+ \centering
72
+ \includegraphics[width =\textwidth]{2_Figures/3_Task/2_Mod_CPE/3_lb_7.pdf}
73
+ \caption{\emph{SLS}, \emph{CPE} model for centroid: 7 }
74
+ \label{fig_47}
75
+ \end{minipage}
76
+ \end{figure}
77
+
78
+
79
+ Right to the equations the corresponding plots are depicted in figures \ref{fig_45} to \ref{fig_47}.
80
+ Here, the blue and green curves indicate true and modeled CPE, respectively.
81
+ Each of the figures supports the choice of allowing each axis to be modeled separately.
82
+ The z-axis appears to undergo less alteration or to be more linear than the x- and y-axis.
83
+ If a model is supposed to be valid for all 3 axes, a more complex model, i.e., a higher of terms, is required.
84
+ Although more flexible models fit training data increasingly better, they tend to overfit.
85
+ In other words, complex models capture the trained data well but could exhibit oscillations for $\vec{\beta}_{unseen}$.
86
+ The latter is even more severe when the model is employed for extrapolation.
87
+ The difference between interpolation and extrapolation is that for extrapolation the prediction is made with $\beta_{unseen}$ which are not in the range of the trained $\vec{\beta}_{tr}$.
88
+ Therefore, less complexity is preferred.\newline
89
+
90
+ Next, the performance of predicting the centroid for $\vec{\beta}_{unseen}$ is elaborated.
91
+ For this purpose, figures \ref{fig_48} to \ref{fig_52} shall be examined.
92
+ All figures depict the original centroid positions, which are obtained through the clustering step in green and the predicted centroid positions in blue.
93
+ On closer inspection, orange lines connecting the true and predicted centroid positions can be identified.
94
+ Note, that they will only be visible if the deviation between the true and predicted state is high enough.
95
+ Figures \ref{fig_48_0} an \ref{fig_48_1} show the outcome for \emph{SLS} with $\beta_{unseen} = 28.5$ and $\beta_{unseen} = 32.5$, respectively.
96
+ Visually, both predictions are very close to the true centroid positions.
97
+ Because of this high performance in showed in figures \ref{fig_49_0} and \ref{fig_49_1} two examples for extrapolation are given for $\beta_{unseen} = 26.5$ and $\beta_{unseen} = 37$, respectively.
98
+ For the first one, the outcome is very applicable.
99
+ In contrast, $\beta_{unseen} = 37$ returns some deviations which are notably high.
100
+ \newline
101
+
102
+
103
+
104
+ % ----------------- Interpolation ----------------------------------------------
105
+ \begin{figure}[!h]
106
+ \begin{subfigure}{0.5\textwidth}
107
+ \centering
108
+ \caption{$\beta_{unseen} = 28.5$ }
109
+ \includegraphics[width =\textwidth]{2_Figures/3_Task/2_Mod_CPE/4_lb_c_28.5.pdf}
110
+ %MSE = 0.622
111
+ \label{fig_48_0}
112
+ \end{subfigure}%
113
+ \hfill
114
+ \begin{subfigure}{0.5\textwidth}
115
+ \centering
116
+ \caption{$\beta_{unseen} = 32.5$ }
117
+ \includegraphics[width =\textwidth]{2_Figures/3_Task/2_Mod_CPE/5_lb_c_32.5.pdf}
118
+ %MSE = 0.677
119
+ \label{fig_48_1}
120
+ \end{subfigure}
121
+ \vspace{-0.3cm}
122
+ \caption{\emph{SLS}, original vs. modeled centroid position, $\beta_{unseen} = 28.5$ and $\beta_{unseen} = 32.5$ }
123
+ \label{fig_48}
124
+ \end{figure}
125
+
126
+ % ----------------- EXTRAPOLATION ----------------------------------------------
127
+ \begin{figure}[!h]
128
+ \begin{subfigure}{0.5\textwidth}
129
+ \centering
130
+ \caption{$\beta_{unseen} = 26.5$ }
131
+ \includegraphics[width =\textwidth]{2_Figures/3_Task/2_Mod_CPE/22_lb_c_26.5.pdf}
132
+ %MSE = 0.622
133
+ \label{fig_49_0}
134
+ \end{subfigure}%
135
+ \hfill
136
+ \begin{subfigure}{0.5\textwidth}
137
+ \centering
138
+ \caption{$\beta_{unseen} = 37$ }
139
+ \includegraphics[width =\textwidth]{2_Figures/3_Task/2_Mod_CPE/23_lb_c_37.0.pdf}
140
+ %MSE = 0.677
141
+ \label{fig_49_1}
142
+ \end{subfigure}
143
+ \vspace{-0.3cm}
144
+ \caption{\emph{SLS}, original vs. modeled centroid position, extrapolated $\beta_{unseen} = 26.5$ and $\beta_{unseen} = 37$ }
145
+ \label{fig_49}
146
+ \end{figure}
147
+
148
+
149
+ % --------- MODEL LOrenz K= 20
150
+ \begin{figure}[!h]
151
+ \begin{subfigure}{.5\textwidth}
152
+ \centering
153
+ \caption{$\beta_{unseen} = 31.75$}
154
+ \includegraphics[width =\textwidth]{2_Figures/3_Task/2_Mod_CPE/6_lb_c_31.75.pdf}
155
+ %MSE = 1.857
156
+ \end{subfigure}%
157
+ \hfill
158
+ \begin{subfigure}{.5\textwidth}
159
+ \centering
160
+ \caption{$\beta_{unseen} = 51.75$ }
161
+ \includegraphics[width =\textwidth]{2_Figures/3_Task/2_Mod_CPE/7_lb_c_51.75.pdf}
162
+ %MSE = 2.536
163
+ \end{subfigure}
164
+ \vspace{-0.3cm}
165
+ \caption{\emph{LS20}, original vs. modeled centroid position, $\beta_{unseen} = 31.75$ and $\beta_{unseen} = 51.75$}
166
+ \label{fig_50}
167
+ \end{figure}
168
+
169
+ % --------- MODEL 25_Four_Wing_1_K_15 ---------
170
+ \begin{figure}[!h]
171
+ \begin{subfigure}{.5\textwidth}
172
+ \centering
173
+ \caption{$\beta_{unseen} = 8.7$}
174
+ \includegraphics[width =\textwidth]{2_Figures/3_Task/2_Mod_CPE/8_lb_c_8.7.pdf}
175
+ %MSE = 1.617
176
+ \end{subfigure}%
177
+ \hfill
178
+ \begin{subfigure}{.5\textwidth}
179
+ \centering
180
+ \caption{$\beta_{unseen} = 10.1$ }
181
+ \includegraphics[width =\textwidth]{2_Figures/3_Task/2_Mod_CPE/9_lb_c_10.1.pdf}
182
+ %MSE = 1.5
183
+ \end{subfigure}
184
+ \vspace{-0.3cm}
185
+ \caption{\emph{FW15}, original vs. modeled centroid position, $\beta_{unseen} = 8.7$ and $\beta_{unseen} = 10.1$}
186
+ \label{fig_52}
187
+
188
+ \end{figure}
189
+
190
+ Quantitative measurements are performed by applying the Mean Square Error (MSE) following equation \eqref{eq_30_MSE}.
191
+ The variables are denoted as the number of samples $n$, which in this case is equal to the number of centroids $n = K$, the known $f(x_k)$ and the predicted $y_k$ centroid position.\newline
192
+
193
+ \begin{equation}
194
+ MSE = \frac{1}{n} \, \sum_{i=1}^n \left(f(x_k) - y_k\right)^2
195
+ \label{eq_30_MSE}
196
+ \end{equation}
197
+
198
+ The measured MSE errors for all displayed results are summarized in table \ref{tab_5_MSE}.
199
+ The MSE for results of $\beta_{unseen} = 28.5$ and $\beta_{unseen} = 32.5$ in figures \ref{fig_48} is $0.622$ and $0.677$, respectively.
200
+ Consequently, the performance of \gls{cnmc} is also confirmed quantitatively.
201
+ Figures in \ref{fig_50} illustrate the outcome for \emph{LS20} for $\beta_{unseen} = 31.75$ and $\beta_{unseen} = 51.75$.
202
+ In section \ref{sec_3_1_Tracking_Results} it is explained that for \emph{LS20} cluster network deformations appear.
203
+ Nevertheless, the outcome visually and quantitatively endorses the \emph{CPE} modeling capabilities.
204
+ Figures in \ref{fig_52} depict the outcome for \emph{FW15} for $\beta_{unseen} = 8.7$ and $\beta_{unseen} = 10.1$.
205
+ A few orange lines are visible, however overall the outcome is very satisfactory.\newline
206
+
207
+ \begin{table}
208
+ \centering
209
+ \begin{tabular}{c c c c }
210
+ \textbf{Figure} &\textbf{Model} & $\boldsymbol{\beta_{unseen}}$ & \textbf{MSE} \\
211
+ \hline \\
212
+ [-0.8em]
213
+ \ref{fig_48} & \emph{SLS}& $28.5$ & $0.622$ \\
214
+ \ref{fig_48} & \emph{SLS}& $32.5$ & $0.677$ \\
215
+ \ref{fig_49} & \emph{SLS}& $26.5$ & $1.193$ \\
216
+ \ref{fig_49} & \emph{SLS}& $37$ & $5.452$ \\
217
+ \ref{fig_50} & \emph{LS20}& $31.75$ & $1.857$ \\
218
+ \ref{fig_50} & \emph{LS20}& $51.75$ & $2.536$ \\
219
+ \ref{fig_52} & \emph{FW15}& $8.7$ & $1.617$ \\
220
+ \ref{fig_52} & \emph{FW15}& $10.1$ & $1.5$
221
+ \end{tabular}
222
+ \caption{MSE for different Model configurations and $\vec{\beta}_{unseen}$}
223
+ \label{tab_5_MSE}
224
+ \end{table}
225
+
226
+ It can be concluded that the \emph{CPE} modeling performance is satisfying.
227
+ In the case of a few cluster network deformations, \gls{cnmc} is capable of providing acceptable results.
228
+ However, as shown with \emph{SLS}, if the model's training range $\vec{\beta}_{tr}$ and the number of $K$ was selected appropriately, the MSE can be minimized.
Data/0_Latex_True/3_Task/3_SVD_NMF.tex ADDED
@@ -0,0 +1,314 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ \section{Transition properties modeling}
2
+ \label{sec_3_3_SVD_NMF}
3
+ In the subsection \ref{subsec_2_4_2_QT}, it has been explained that \gls{cnmc} has two built-in modal decomposition methods for the $\bm Q / \bm T$ tensors, i.e., \gls{svd} and NMF.
4
+ There are two main concerns for which performance measurements are needed.
5
+ First, in subsection \ref{subsec_3_3_1_SVD_Speed}, the computational costs of both methods are examined.
6
+ Then in subsection \ref{subsec_3_3_2_SVD_Quality}, the \gls{svd} and \gls{nmf} prediction quality will be presented and assessed.
7
+
8
+ \subsection{Computational cost}
9
+ \label{subsec_3_3_1_SVD_Speed}
10
+ In this subsection, the goal is to evaluate the computational cost of the two decomposition methods implemented in \gls{cnmc}.
11
+ \gls{nmf} was already used in \emph{first CNMc} and it was found to be one of the most computational expensive tasks.
12
+ With an increasing model order $L$ it became the most computational task by far, which is acknowledged by \cite{Max2021}.
13
+ The run time was one of the main reasons why \gls{svd} should be implemented in \gls{cnmc}.
14
+ To see if \gls{svd} can reduce run time, both methods shall be compared.\newline
15
+
16
+ First, it is important to mention that \gls{nmf} is executed for one single predefined mode number $r$.
17
+ It is possible that a selected $r$ is not optimal, since $r$ is a parameter that depends not only on the chosen dynamical system but also on other parameters, e.g., the number of centroids $K$ and training model parameter values $n_{\beta, tr}$, as well as \gls{nmf} specific attributes.
18
+ These are the maximal number of iterations in which the optimizer can converge and tolerance convergence.
19
+ However, to find an appropriate $r$, \gls{nmf} can be executed multiple times with different values for $r$.
20
+ Comparing the execution time of \gls{nmf} with multiple invocations against \gls{svd} can be regarded as an unbalanced comparison.
21
+ Even though for a new dynamical system and its configuration the optimal $r_{opt}$ for \gls{nmf} is most likely to be found over a parameter study, for the upcoming comparison, the run time of one single \gls{nmf} solution is measured.\newline
22
+
23
+ The model for this purpose is \emph{SLS}. Since \emph{SLS} is trained with the output of 7 pairwise different model parameter values $n_{\beta,tr} = 7$, the maximal rank in \gls{svd} is limited to 7.
24
+ Nevertheless, allowing \gls{nmf} to find a solution $r$ was defined as $r=9$, the maximal number of iterations in which the optimizer can converge is 10 million and the convergence tolerance is $1\mathrm{e}{-6}$.
25
+ Both methods can work with sparse matrices.
26
+ However, the \gls{svd} solver is specifically designed to solve sparse matrices.
27
+ The measured times for decomposing the $\bm Q / \bm T$ tensors for 7 different $L$ are listed in table \ref{tab_6_NMF_SVD}.
28
+ It can be observed that for \gls{svd} up to $L=6$, the computational time for both $\bm Q / \bm T$ tensors is less than 1 second.
29
+ Such an outcome is efficient for science and industry applications.
30
+ With $L=7$ a big jump in time for both $\bm Q / \bm T$ is found.
31
+ However, even after this increase, the decomposition took around 5 seconds, which still is acceptable.\newline
32
+
33
+ \begin{table}
34
+ \centering
35
+ \begin{tabular}{c| c c |c c }
36
+ \textbf{$L$} &\textbf{SVD} $\bm Q$ & \textbf{NMF} $\bm Q$
37
+ &\textbf{SVD} $\bm T$ & \textbf{NMF} $\bm T$\\
38
+ \hline \\
39
+ [-0.8em]
40
+ $1$ & $2 \,\mathrm{e}{-4}$ s & $64$ s & $8 \, \mathrm{e}{-05}$ s & $3 \, \mathrm{e}{-2}$ s \\
41
+
42
+ $2$ & $1 \, \mathrm{e}{-4}$ s & $8 \, \mathrm{e}{-2}$ s & $1 \, \mathrm{e}{-4}$ s & $1$ h \\
43
+
44
+ $3$ & $2 \, \mathrm{e}{-4}$ s & $10$ s & $2 \, \mathrm{e}{-4}$ s & $0.1$ s \\
45
+
46
+ $4$ & $4 \, \mathrm{e}{-3}$ s & $20$ s & $7 \, \mathrm{e}{-3}$ s & $1.5$ h \\
47
+
48
+ $5$ & $6 \, \mathrm{e}{-2}$ s & $> 3$ h & $3 \, \mathrm{e}{-2}$ s & - \\
49
+
50
+ $6$ & $0.4$ s & - & $0.4$ s & - \\
51
+
52
+ $7$ & $5.17$ s & - & $4.52$ s & -
53
+ \end{tabular}
54
+ \caption{Execution time for \emph{SLS} of \gls{nmf} and \gls{svd} for different $L$ }
55
+ \label{tab_6_NMF_SVD}
56
+ \end{table}
57
+
58
+ Calculating $\bm Q$ with \gls{nmf} for $L=1$ already takes 64 seconds.
59
+ This is more than \gls{svd} demanded for $L=7$.
60
+ The $\bm T$ tensor on the other is much faster and is below a second.
61
+ However, as soon as $L=2$ is selected, $\bm T$ takes 1 full hour, $L=4$ more than 1 hour.
62
+ The table for \gls{nmf} is not filled, since running $\bm Q$ for $L=5$ was taking more than 3 hours, but still did not finish.
63
+ Therefore, the time measurement was aborted.
64
+ This behavior was expected since it was already mentioned in \cite{Max2021}.
65
+ Overall, the execution time for \gls{nmf} is not following a trend, e.g., computing $\bm T$ for $L=3$ is faster than for $L=2$ and $\bm Q$ for $L=4$ is faster than for $L=1$.
66
+ In other words, there is no obvious rule, on whether even a small $L$ could lead to hours of run time.\newline
67
+
68
+ It can be concluded that \gls{svd} is much faster than \gls{nmf} and it also shows a clear trend, i.e. the computation time is expected to increase with $L$.
69
+ \gls{nmf} on the other hand first requires an appropriate mode number $r$, which most likely demands a parameter study.
70
+ However, even for a single \gls{nmf} solution, it can take hours.
71
+ With increasing $L$ the amount of run time is generally expected to increase, even though no clear rule can be defined.
72
+ Furthermore, it needs to be highlighted that \gls{nmf} was tested on a small model, where $n_{\beta,tr} = 7$. The author of this thesis experienced an additional increase in run time when $n_{\beta,tr}$ is selected higher.
73
+ Also, executing \gls{nmf} on multiple dynamical systems or model configurations might become infeasible in terms of time.
74
+ Finally, with the implementation of \gls{svd}, the bottleneck in modeling $\bm Q / \bm T$ could be eliminated.
75
+
76
+
77
+ \subsection{Prediction quality}
78
+ \label{subsec_3_3_2_SVD_Quality}
79
+ In this subsection, the quality of the \gls{svd} and \gls{nmf} $\bm Q / \bm T$ predictions are evaluated.
80
+ The used model configuration for this aim is \emph{SLS}.
81
+ First, only the $\bm Q$ output with \gls{svd} followed by \gls{nmf} shall be analyzed and compared. Then, the same is done for the $\bm T$ output.\newline
82
+
83
+
84
+ In order to see how many modes $r$ were chosen for \gls{svd} the two figures \ref{fig_54} and \ref{fig_55} are shown.
85
+ It can be derived that with $r = 4$, $99 \%$ of the information content could be captured. The presented results are obtained for $\bm Q$ and $L =1$.\newline
86
+
87
+ \begin{figure}[!h]
88
+ %\vspace{0.5cm}
89
+ \begin{minipage}[h]{0.47\textwidth}
90
+ \centering
91
+ \includegraphics[width =\textwidth]
92
+ {2_Figures/3_Task/2_Mod_CPE/10_lb_Q_Cumlative_E.pdf}
93
+ \caption{\emph{SLS}, \gls{svd}, cumulative energy of $\bm Q$ for $L=1$}
94
+ \label{fig_54}
95
+ \end{minipage}
96
+ \hfill
97
+ \begin{minipage}{0.47\textwidth}
98
+ \centering
99
+ \includegraphics[width =\textwidth]
100
+ {2_Figures/3_Task/2_Mod_CPE/11_lb_Q_Sing_Val.pdf}
101
+ \caption{\emph{SLS}, \gls{svd}, singular values of $\bm Q$ for $L=1$}
102
+ \label{fig_55}
103
+ \end{minipage}
104
+ \end{figure}
105
+
106
+ Figures \ref{fig_56} to \ref{fig_58} depict the original $\bm{Q}(\beta_{unseen} = 28.5)$, which is generated with CNM, the \gls{cnmc} predicted $\bm{\tilde{Q}}(\beta_{unseen} = 28.5)$ and their deviation $| \bm{Q}(\beta_{unseen} = 28.5) - \bm{\tilde{Q}}(\beta_{unseen} = 28.5) |$, respectively.
107
+ In the graphs, the probabilities to move from centroid $c_p$ to $c_j$ are indicated.
108
+ Contrasting figure \ref{fig_56} and \ref{fig_57} exhibits barely noticeable differences.
109
+ For highlighting present deviations, the direct comparison between the \gls{cnm} and \gls{cnmc} predicted $\bm Q$ tensors is given in figure \ref{fig_58}.
110
+ It can be observed that the highest value is $max( \bm{Q}(\beta_{unseen} = 28.5) - \bm{\tilde{Q}}(\beta_{unseen} = 28.5) |) \approx 0.0697 \approx 0.07$.
111
+ Note that all predicted $\bm Q$ and $\bm T$ tensors are obtained with \gls{rf} as the regression model.
112
+ \newline
113
+
114
+ \begin{figure}[!h]
115
+ %\vspace{0.5cm}
116
+ \begin{subfigure}[h]{0.5\textwidth}
117
+ \centering
118
+ \caption{Original $\bm{Q}(\beta_{unseen} = 28.5)$}
119
+ \includegraphics[width =\textwidth]
120
+ {2_Figures/3_Task/2_Mod_CPE/12_lb_0_Q_Orig_28.5.pdf}
121
+ \label{fig_56}
122
+ \end{subfigure}
123
+ \hfill
124
+ \begin{subfigure}{0.5\textwidth}
125
+ \centering
126
+ \caption{\gls{cnmc} predicted $\bm{\tilde{Q}}(\beta_{unseen} = 28.5)$ }
127
+ \includegraphics[width =\textwidth]
128
+ {2_Figures/3_Task/2_Mod_CPE/13_lb_2_Q_Aprox_28.5.pdf}
129
+ \label{fig_57}
130
+ \end{subfigure}
131
+
132
+ \smallskip
133
+ \centering
134
+ \begin{subfigure}{0.7\textwidth}
135
+ \caption{Deviation $| \bm{Q}(\beta_{unseen}) - \bm{\tilde{Q}}(\beta_{unseen}) |$ }
136
+ \includegraphics[width =\textwidth]
137
+ {2_Figures/3_Task/2_Mod_CPE/14_lb_4_Delta_Q_28.5.pdf}
138
+ \label{fig_58}
139
+ \end{subfigure}
140
+ \vspace{-0.3cm}
141
+ \caption{\emph{SLS}, \gls{svd}, original $\bm{Q}(\beta_{unseen} = 28.5)$ , \gls{cnmc} predicted $\bm{\tilde{Q}}(\beta_{unseen} = 28.5)$ and deviation $| \bm{Q}(\beta_{unseen} = 28.5) - \bm{\tilde{Q}}(\beta_{unseen} = 28.5) |$ for $L=1$}
142
+ \label{fig_58_Full}
143
+ \end{figure}
144
+
145
+ The same procedure shall now be performed with NMF.
146
+ The results are depicted in figures \ref{fig_59} and \ref{fig_60}.
147
+ Note that the original \gls{cnm} $\bm{Q}(\beta_{unseen} = 28.5)$ does not change, thus figure \ref{fig_56} can be reused.
148
+ By exploiting figure \ref{fig_61}, it can be observed that the highest deviation for the \gls{nmf} version is $max( \bm{Q}(\beta_{unseen} = 28.5) - \bm{\tilde{Q}}(\beta_{unseen} = 28.5) |) \approx 0.0699 \approx 0.07$.
149
+ The maximal error of \gls{nmf} $(\approx 0.0699)$ is slightly higher than that of \gls{svd} $(\approx 0.0697)$.
150
+ Nevertheless, both methods have a very similar maximal error and seeing visually other significant differences is hard.
151
+ \newline
152
+
153
+ \begin{figure}[!h]
154
+ %\vspace{0.5cm}
155
+ \begin{subfigure}[h]{0.5\textwidth}
156
+ \centering
157
+ \caption{\gls{cnmc} predicted $\bm{\tilde{Q}}(\beta_{unseen} = 28.5)$ }
158
+ \includegraphics[width =\textwidth]
159
+ {2_Figures/3_Task/2_Mod_CPE/15_lb_2_Q_Aprox_28.5.pdf}
160
+ \label{fig_59}
161
+ \end{subfigure}
162
+ \hfill
163
+ \begin{subfigure}{0.5\textwidth}
164
+ \caption{Deviation $| \bm{Q}(\beta_{unseen} ) - \bm{\tilde{Q}}(\beta_{unseen} ) |$}
165
+ \includegraphics[width =\textwidth]
166
+ {2_Figures/3_Task/2_Mod_CPE/16_lb_4_Delta_Q_28.5.pdf}
167
+ \label{fig_60}
168
+ \end{subfigure}
169
+ \vspace{-0.3cm}
170
+ \caption{\emph{SLS}, \gls{nmf}, \gls{cnmc} predicted $\bm{\tilde{Q}}(\beta_{unseen} = 28.5)$ and deviation $| \bm{Q}(\beta_{unseen} = 28.5) - \bm{\tilde{Q}}(\beta_{unseen} = 28.5) |$ for $L=1$}
171
+ \end{figure}
172
+
173
+ In order to have a quantifiable error value, the Mean absolute error (MAE) following equation \eqref{eq_23} is leveraged.
174
+ The MAE errors for \gls{svd} and \gls{nmf} are $MAE_{SVD} = 0.002 580 628$ and $MAE_{NMF} = 0.002 490 048$, respectively.
175
+ \gls{nmf} is slightly better than \gls{svd} with $ MAE_{SVD} - MAE_{NMF} \approx 1 \mathrm{e}{-4}$, which can be considered to be negligibly small.
176
+ Furthermore, it must be stated that \gls{svd} was only allowed to use $r_{SVD} = 4$ modes, due to the $99 \%$ energy demand, whereas \gls{nmf} used $r_{NMF} = 9$ modes.
177
+ Given that \gls{svd} is stable in computational time, i.e., it is not assumed that for low $L$, the computational cost scales up to hours, \gls{svd} is the clear winner for this single comparison. \newline
178
+
179
+
180
+ For the sake of completeness, the procedure shall be conducted once as well for the $\bm T$ tensor.
181
+ For this purpose figures \ref{fig_61} to \ref{fig_65} shall be considered.
182
+ It can be inspected that the maximal errors for \gls{svd} and \gls{nmf} are $max( \bm{T}(\beta_{unseen} = 28.5) - \bm{\tilde{T}}(\beta_{unseen} = 28.5) |) \approx 0.126 $ and
183
+ $max( \bm{T}(\beta_{unseen} = 28.5) - \bm{\tilde{T}}(\beta_{unseen} = 28.5) | ) \approx 0.115 $, respectively.
184
+ The MAE errors are, $MAE_{SVD} = 0.002 275 379 $ and $MAE_{NMF} = 0.001 635 510$.
185
+ \gls{nmf} is again slightly better than \gls{svd} with $ MAE_{SVD} - MAE_{NMF} \approx 6 \mathrm{e}{-4}$, which is a deviation of $\approx 0.06 \%$ and might also be considered as negligibly small. \newline
186
+
187
+
188
+ %------------------------------------- SVD T -----------------------------------
189
+ \begin{figure}[!h]
190
+ \begin{subfigure}{0.5 \textwidth}
191
+ \centering
192
+ \caption{Original \gls{cnm} $\bm{T}(\beta_{unseen} = 28.5)$ }
193
+ \includegraphics[width =\textwidth]
194
+ {2_Figures/3_Task/2_Mod_CPE/17_lb_1_T_Orig_28.5.pdf}
195
+ \label{fig_61}
196
+ \end{subfigure}
197
+ \hfill
198
+ \begin{subfigure}{.5 \textwidth}
199
+ \centering
200
+ \caption{\gls{cnmc} predicted $\bm{\tilde{T}}(\beta_{unseen} = 28.5)$}
201
+ \includegraphics[width =\textwidth]
202
+ {2_Figures/3_Task/2_Mod_CPE/18_lb_3_T_Aprox_28.5.pdf}
203
+ \label{fig_62}
204
+ \end{subfigure}
205
+
206
+ \smallskip
207
+ \centering
208
+ \begin{subfigure}{0.7\textwidth}
209
+ \caption{Deviation $| \bm{T}(\beta_{unseen}) - \bm{\tilde{T}}(\beta_{unseen}) |$}
210
+ \includegraphics[width =\textwidth]
211
+ {2_Figures/3_Task/2_Mod_CPE/19_lb_5_Delta_T_28.5.pdf}
212
+ \label{fig_63}
213
+ \end{subfigure}
214
+ \vspace{-0.3cm}
215
+ \caption{\emph{SLS}, \gls{svd}, original $\bm{T}(\beta_{unseen} = 28.5)$, predicted $\bm{\tilde{T}}(\beta_{unseen} = 28.5)$ and deviation $| \bm{T}(\beta_{unseen} = 28.5) - \bm{\tilde{T}}(\beta_{unseen} = 28.5) |$ for $L=1$}
216
+ \end{figure}
217
+
218
+
219
+ %------------------------------------- NMF T -----------------------------------
220
+ \begin{figure}[!h]
221
+ %\vspace{0.5cm}
222
+ \begin{subfigure}[h]{0.5\textwidth}
223
+ \centering
224
+ \caption{\gls{cnmc} predicted $\bm{\tilde{T}}(\beta_{unseen} = 28.5)$}
225
+ \includegraphics[width =\textwidth]
226
+ {2_Figures/3_Task/2_Mod_CPE/20_lb_3_T_Aprox_28.5.pdf}
227
+ \label{fig_64}
228
+ \end{subfigure}
229
+ \hfill
230
+ \begin{subfigure}{0.5\textwidth}
231
+ \centering
232
+ \caption{Deviation $| \bm{T}(\beta_{unseen}) - \bm{\tilde{T}}(\beta_{unseen}) |$}
233
+ \includegraphics[width =\textwidth]
234
+ {2_Figures/3_Task/2_Mod_CPE/21_lb_5_Delta_T_28.5.pdf}
235
+ \label{fig_65}
236
+ \end{subfigure}
237
+ \vspace{-0.3cm}
238
+ \caption{\emph{SLS}, \gls{nmf}, \gls{cnmc} predicted $\bm{\tilde{T}}(\beta_{unseen} = 28.5)$ and deviation $| \bm{T}(\beta_{unseen} = 28.5) - \bm{\tilde{T}}(\beta_{unseen} = 28.5) |$ for $L=1$}
239
+ \end{figure}
240
+
241
+ Additional MAE errors for different $L$ and $\beta{unseen}= 28.5,\, \beta{unseen}= 32.5$ are collected in table \ref{tab_7_NMF_SVD_QT}.
242
+ First, it can be outlined that regardless of the chosen method, \gls{svd} or \gls{nmf}, all encountered MAE errors are very small.
243
+ Consequently, it can be recorded that \gls{cnmc} convinces with an overall well approximation of the $\bm Q / \bm T$ tensors.
244
+ Second, comparing \gls{svd} and \gls{nmf} through their respective MAE errors, it can be inspected that the deviation of both is mostly in the order of $ \mathcal{O} \approx 1 \mathrm{e}{-2}$.
245
+ It is a difference in $\approx 0.1 \%$ and can again be considered to be insignificantly small.\newline
246
+
247
+ Despite this, \gls{nmf} required the additional change given in equation \eqref{eq_33}, which did not apply to \gls{svd}.
248
+ The transition time entries at the indexes where the probability is positive should be positive as well. Yet, this is not always the case when \gls{nmf} is executed. To correct that, these probability entries are manually set to zero.
249
+ This rule was also actively applied to the results presented above.
250
+ Still, the outcome is very satisfactory, because the modeling errors are found to be small.
251
+ \newline
252
+
253
+ \begin{table}[!h]
254
+ \centering
255
+ \begin{tabular}{c c| c c| c c }
256
+ \textbf{$L$} &$\beta_{unseen}$
257
+ & $\boldsymbol{MAE}_{\gls{svd}, \bm Q}$
258
+ &$\boldsymbol{MAE}_{\gls{nmf}, \bm Q}$
259
+ & $\boldsymbol{MAE}_{\gls{svd}, \bm T}$
260
+ &$\boldsymbol{MAE}_{\gls{nmf}, \bm T}$ \\
261
+ \hline \\
262
+ [-0.8em]
263
+ $1$ & $28.5$
264
+ & $0.002580628 $ & $0.002490048$
265
+ & $0.002275379 $ & $0.001635510$\\
266
+
267
+ $1$ & $32.5$
268
+ & $0.003544923$ & $0.003650155$
269
+ & $0.011152145$ & $0.010690052$\\
270
+
271
+ $2$ & $28.5$
272
+ & $0.001823848$ & $0.001776276$
273
+ & $0.000409955$ & $0.000371242$\\
274
+
275
+ $2$ & $32.5$
276
+ & $0.006381635$ & $0.006053059$
277
+ & $0.002417142$ & $0.002368680$\\
278
+
279
+ $3$ & $28.5$
280
+ & $0.000369228$ & $0.000356817$
281
+ & $0.000067680$ & $0.000062964$\\
282
+
283
+ $3$ & $32.5$
284
+ & $0.001462458$ & $0.001432738$
285
+ & $0.000346298$ & $0.000343520$\\
286
+
287
+
288
+ $4$ & $28.5$
289
+ & $0.000055002$ & $0.000052682$
290
+ & $0.000009420$ & $0.000008790$\\
291
+
292
+ $4$ & $32.5$
293
+ & $0.000215147$ & $0.000212329$
294
+ & $0.000044509$ & $0.000044225$
295
+
296
+ \end{tabular}
297
+ \caption{\emph{SLS}, Mean absolute error for different $L$ and two $\beta_{unseen}$}
298
+ \label{tab_7_NMF_SVD_QT}
299
+ \end{table}
300
+
301
+ \begin{equation}
302
+ \begin{aligned}
303
+ TGZ := \bm T ( \bm Q > 0) \leq 0 \\
304
+ \bm Q ( TGZ) := 0
305
+ \end{aligned}
306
+ \label{eq_33}
307
+ \end{equation}
308
+
309
+ In summary, both methods \gls{nmf} and \gls{svd} provide a good approximation of the $\bm Q / \bm T$ tensors.
310
+ The deviation between the prediction quality of both is negligibly small.
311
+ However, since \gls{svd} is much faster than \gls{nmf} and does not require an additional parameter study, the recommended decomposition method is \gls{svd}.
312
+ Furthermore, it shall be highlighted that \gls{svd} used only $r = 4$ modes for the $\bm Q$ case, whereas for \gls{nmf} $r=9$ were used.
313
+ Finally, as a side remark, all the displayed figures and the MAE errors are generated and calculated with \gls{cnm}'s default implemented methods.
314
+ \FloatBarrier
Data/0_Latex_True/3_Task/4_SVD_Regression.tex ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ \section{Transition property regression models}
2
+ \label{sec_3_4_SVD_Regression}
3
+ In this section, the results of the 3 different regression methods, \glsfirst{rf}, AdaBoost and Gaussian Process (GP) are compared.
4
+ All the 3 regressors are implemented in \gls{cnmc} and can be selected via \emph{settings.py}.
5
+ The utilized model configuration is \emph{SLS} and the decomposition method is \gls{svd}.\newline
6
+
7
+
8
+ First, it shall be noted that \gls{cnmc} also offers the possibility to apply \emph{pySindy}.
9
+ However, \emph{pySindy} has struggled to represent the training data in the first place, thus it cannot be employed for predicting $\beta_{unseen}$.
10
+ The latter does not mean that \emph{pySindy} is not applicable for the construction of a surrogate model for the decomposed $\bm Q / \bm T$ modes, but rather that the selected candidate library was not powerful enough.
11
+ Nevertheless, only results for the 3 initially mentioned regressors will be discussed.\newline
12
+
13
+ In figures \ref{fig_66} to \ref{fig_71} the true (dashed) and the approximation (solid) of the first 4 $\bm Q / \bm T$ modes are shown for the methods RF, AdaBoost and GP, respectively.
14
+ To begin with, it can be noted that the mode behavior over different model parameter values $mod(\beta)$ is discontinuous, i.e., it exhibits spikes or sudden changes.
15
+ In figures \ref{fig_66} and \ref{fig_67} it can be observed that \gls{rf} reflects the actual behavior of $mod(\beta)$ quite well.
16
+ However, it encounters difficulties in capturing some spikes.
17
+ AdaBoost on the other hand proves in figures \ref{fig_68} and \ref{fig_69} to represent the spikes better.
18
+ Overall, AdaBoost outperforms \gls{rf} in mirroring training data. \newline
19
+
20
+ \begin{figure}[!h]
21
+ %\vspace{0.5cm}
22
+ \begin{subfigure}[h]{0.5 \textwidth}
23
+ \centering
24
+ \caption{$\bm Q$}
25
+ \includegraphics[width =\textwidth]
26
+ {2_Figures/3_Task/3_SVD_QT/0_model_Decomp_Regr_RF_More_Q.pdf}
27
+ \label{fig_66}
28
+ \end{subfigure}
29
+ \hfill
30
+ \begin{subfigure}{0.5 \textwidth}
31
+ \centering
32
+ \caption{$\bm T$}
33
+ \includegraphics[width =\textwidth]
34
+ {2_Figures/3_Task/3_SVD_QT/1_model_Decomp_Regr_RF_More_T.pdf}
35
+ \label{fig_67}
36
+ \end{subfigure}
37
+ \vspace{-0.3cm}
38
+ \caption{\emph{SLS}, \gls{svd}, $\bm Q / \bm T$ modes approximation with \gls{rf} for $L=1$}
39
+ \end{figure}
40
+
41
+ \begin{figure}[!h]
42
+ %\vspace{0.5cm}
43
+ \begin{subfigure}[h]{0.5 \textwidth}
44
+ \centering
45
+ \caption{$\bm Q$}
46
+ \includegraphics[width =\textwidth]
47
+ {2_Figures/3_Task/3_SVD_QT/2_model_Decomp_Regr_ABoost_More_Q.pdf}
48
+ \label{fig_68}
49
+ \end{subfigure}
50
+ \hfill
51
+ \begin{subfigure}{0.5 \textwidth}
52
+ \centering
53
+ \caption{$\bm T$}
54
+ \includegraphics[width =\textwidth]
55
+ {2_Figures/3_Task/3_SVD_QT/3_model_Decomp_Regr_ABoost_More_T.pdf}
56
+ \label{fig_69}
57
+ \end{subfigure}
58
+ \vspace{-0.3cm}
59
+ \caption{\emph{SLS}, \gls{svd}, $\bm Q / \bm T$ mode approximation with AdaBoost for $L=1$}
60
+ \end{figure}
61
+
62
+ \begin{figure}[!h]
63
+ %\vspace{0.5cm}
64
+ \begin{subfigure}[h]{0.5 \textwidth}
65
+ \centering
66
+ \caption{$\bm Q$}
67
+ \includegraphics[width =\textwidth]
68
+ {2_Figures/3_Task/3_SVD_QT/4_model_Decomp_Regr_GP_More_Q.pdf}
69
+ \label{fig_70}
70
+ \end{subfigure}
71
+ \hfill
72
+ \begin{subfigure}{0.5 \textwidth}
73
+ \centering
74
+ \caption{$\bm T$}
75
+ \includegraphics[width =\textwidth]
76
+ {2_Figures/3_Task/3_SVD_QT/5_model_Decomp_Regr_GP_More_T.pdf}
77
+ \label{fig_71}
78
+ \end{subfigure}
79
+ \vspace{-0.3cm}
80
+ \caption{\emph{SLS}, \gls{svd}, $\bm Q / \bm T$ mode approximation with GP for $L=1$}
81
+ \end{figure}
82
+
83
+ Gaussian Process (GP) is a very powerful method for regression.
84
+ Often this is also true when reproducing $mod(\beta)$.
85
+ However, there are cases where the performance is insufficient, as shown in figures \ref{fig_70} and \ref{fig_71}.
86
+ Applying GP results in absolutely incorrect predicted tensors $\bm \tilde{Q}(\beta_{unseen}),\, \bm \tilde{T}(\beta_{unseen})$, where too many tensors entries are wrongly forced to zero.
87
+ Therefore, $\bm \tilde{Q}(\beta_{unseen}),\, \bm \tilde{T}(\beta_{unseen})$ will eventually lead to an unacceptably high deviation from the original trajectory.
88
+ Consequently, the GP regression is not applicable for the decomposed $\bm Q / \bm T$ modes without further modification.\newline
89
+
90
+ The two remaining regressors are \glsfirst{rf} and AdaBoost.
91
+ Although AdaBoost is better at capturing the true modal behavior $mod(\beta)$, there is no guarantee that it will always be equally better at predicting the modal behavior for unseen model parameter values $mod(\beta_{unseen})$.
92
+ In table \ref{tab_8_RF_ABoost} the MAE errors for different $L$ and $\beta_{unseen} = [\, 28.5,\, 32.5\,]$ are provided.
93
+ Since the table exhibits much information, the results can also be read qualitatively through the graphs \ref{fig_72_QT_28} and \ref{fig_72_QT_32} for $\beta_{unseen} = 28.5$ and $\beta_{unseen} = 32.5$, respectively.
94
+ For the visual inspection, it is important to observe the order of the vertical axis scaling.
95
+ It can be noted that the MAE errors themselves and the deviation between the \gls{rf} and AdaBoost MAE errors are very low.
96
+ Thus, it can be stated that \gls{rf} as well ad AdaBoost are both well-suited regressors.\newline
97
+
98
+
99
+ \begin{table}[!h]
100
+ \centering
101
+ \begin{tabular}{c c| c c| c c }
102
+ \textbf{$L$} &$\beta_{unseen}$
103
+ & $\boldsymbol{MAE}_{RF, \bm Q}$
104
+ &$\boldsymbol{MAE}_{AdaBoost, \bm Q}$
105
+ & $\boldsymbol{MAE}_{RF, \bm T}$
106
+ &$\boldsymbol{MAE}_{AdaBoost, \bm T}$ \\
107
+ \hline \\
108
+ [-0.8em]
109
+ $1$ & $28.5$
110
+ & $0.002580628 $ & $0.002351781$
111
+ & $0.002275379 $ & $0.002814208$\\
112
+
113
+ $1$ & $32.5$
114
+ & $0.003544923$ & $0.004133114$
115
+ & $0.011152145$ & $0.013054876$\\
116
+
117
+ $2$ & $28.5$
118
+ & $0.001823848$ & $0.001871858$
119
+ & $0.000409955$ & $0.000503748$\\
120
+
121
+ $2$ & $32.5$
122
+ & $0.006381635$ & $0.007952153$
123
+ & $0.002417142$ & $0.002660403$\\
124
+
125
+ $3$ & $28.5$
126
+ & $0.000369228$ & $0.000386292$
127
+ & $0.000067680$ & $0.000082808$\\
128
+
129
+ $3$ & $32.5$
130
+ & $0.001462458$ & $0.001613434$
131
+ & $0.000346298$ & $0.000360097$\\
132
+
133
+
134
+ $4$ & $28.5$
135
+ & $0.000055002$ & $0.000059688$
136
+ & $0.000009420$ & $0.000011500$\\
137
+
138
+ $4$ & $32.5$
139
+ & $0.000215147$ & $0.000230404$
140
+ & $0.000044509$ & $0.000046467$\\
141
+
142
+ $5$ & $28.5$
143
+ & $0.000007276$ & $0.000007712$
144
+ & $0.000001312$ & $0.000001600$\\
145
+
146
+ $5$ & $32.5$
147
+ & $0.000028663$ & $0.000030371$
148
+ & $0.000005306$ & $0.000005623$\\
149
+
150
+ $6$ & $28.5$
151
+ & $0.000000993$ & $0.000052682$
152
+ & $0.000000171$ & $0.000000206$\\
153
+
154
+ $6$ & $32.5$
155
+ & $0.000003513$ & $0.000003740$
156
+ & $0.000000629$ & $0.000000668$\\
157
+
158
+ $7$ & $28.5$
159
+ & $0.000000136$ & $0.000000149$
160
+ & $0.000000023$ & $0.000000031$ \\
161
+
162
+ $7$ & $32.5$
163
+ & $0.000000422$ & $0.000000454$
164
+ & $0.000000078$ & $0.000000082$
165
+
166
+
167
+ \end{tabular}
168
+ \caption{\emph{SLS}, Mean absolute error for comparing \gls{rf} and AdaBoost different $L$ and two $\beta_{unseen}$}
169
+ \label{tab_8_RF_ABoost}
170
+ \end{table}
171
+
172
+ \begin{figure}[!h]
173
+ %\vspace{0.5cm}
174
+ \begin{subfigure}[h]{0.5 \textwidth}
175
+ \centering
176
+ \caption{$\bm Q$}
177
+ \includegraphics[width =\textwidth]
178
+ {2_Figures/3_Task/3_SVD_QT/6_Q_28_5.pdf}
179
+ \label{fig_72_Q_28}
180
+ \end{subfigure}
181
+ \hfill
182
+ \begin{subfigure}{0.5 \textwidth}
183
+ \centering
184
+ \caption{$\bm T$}
185
+ \includegraphics[width =\textwidth]
186
+ {2_Figures/3_Task/3_SVD_QT/7_T_28_5.pdf}
187
+ \label{fig_72_T_28}
188
+ \end{subfigure}
189
+ \vspace{-0.3cm}
190
+ \caption{\emph{SLS}, Mean absolute error for comparing \gls{rf} and AdaBoost different $L$ and $\beta_{unseen} = 28.5$}
191
+ \label{fig_72_QT_28}
192
+ \end{figure}
193
+
194
+ \begin{figure}[!h]
195
+ %\vspace{0.5cm}
196
+ \begin{subfigure}[h]{0.5 \textwidth}
197
+ \centering
198
+ \caption{$\bm Q$}
199
+ \includegraphics[width =\textwidth]
200
+ {2_Figures/3_Task/3_SVD_QT/8_Q_32_5.pdf}
201
+ \label{fig_72_Q_32}
202
+ \end{subfigure}
203
+ \hfill
204
+ \begin{subfigure}{0.5 \textwidth}
205
+ \centering
206
+ \caption{$\bm T$}
207
+ \includegraphics[width =\textwidth]
208
+ {2_Figures/3_Task/3_SVD_QT/9_T_32_5.pdf}
209
+ \label{fig_72_T_32}
210
+ \end{subfigure}
211
+ \vspace{-0.3cm}
212
+ \caption{\emph{SLS}, Mean absolute error for comparing \gls{rf} and AdaBoost different $L$ and $\beta_{unseen} = 32.5$}
213
+ \label{fig_72_QT_32}
214
+ \end{figure}
215
+
216
+
217
+ In summary, the following can be said, \gls{rf} and AdaBoost are both performing well in regression. Furthermore, no clear winner between the two regressors can be detected.
218
+ The third option GP is dismissed as it sometimes has unacceptably low regression performance.
219
+ Finally, there is the possibility to use \emph{pySindy}, however, for that, an appropriate candidate library must be defined.
220
+ \FloatBarrier
Data/0_Latex_True/3_Task/5_Pred.tex ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ \section{CNMc predictions}
2
+ \label{sec_3_5_Pred}
3
+ In this section, some representative outputs for the \gls{cnmc} predicted trajectories shall be discussed.
4
+ For that, first, the quality measurement abilities implemented in \gls{cnmc} are elaborated.
5
+ Next, the model \emph{SLS} is analyzed and explained in detail in the subsection \ref{subsec_3_5_1_SLS}.
6
+ Finally, the outcome for other models shall be presented briefly in subsection \ref{subsec_3_5_2_Models}.\newline
7
+
8
+ There are several methods implemented in \gls{cnmc} to assess the quality of the predicted trajectories.
9
+ The first one is the autocorrelation, which will be calculated for all $\vec{\beta}_{unseen}$ and all provided $\vec{L}$, for the true, \gls{cnm} and \gls{cnmc} predicted trajectories.
10
+ As usual, the output is plotted and saved as HTML files for a feature-rich visual inspection.
11
+ For qualitative assessment, the MAE errors are calculated for all $\vec{\beta}_{unseen}$ and $\vec{L}$ for two sets.
12
+ The first set consists of the MAE errors between the true and the \gls{cnm} predicted trajectories.
13
+ The second set contains the MAE errors between the true and the \gls{cnmc} predicted trajectories.
14
+ Both sets are plotted as MAE errors over $L$ and stored as HTML files.
15
+ Furthermore, the one $L$ value which exhibits the least MAE error is printed in the terminal and can be found in the log file as well. \newline
16
+
17
+ The second technique is the \gls{cpd}, which will also be computed for all the 3 trajectories, i.e., true, \gls{cnm} and \gls{cnmc} predicted trajectories.
18
+ The \gls{cpd} depicts the probability of being at one centroid $c_i$.
19
+ For each $\vec{\beta}_{unseen}$ and all $L$ the \gls{cpd} is plotted and saved.
20
+ The third method displays all the 3 trajectories in the state space.
21
+ Moreover, the trajectories are plotted as 2-dimensional graphs, i.e., each axis as a subplot over the time $t$.
22
+ The final method calculates the MAE errors of the $\bm Q / \bm T$ tensors for all $L$.\newline
23
+
24
+ The reason why more than one quality measurement method is integrated into \gls{cnmc} is that \gls{cnmc} should be able to be applied to, among other dynamical systems, chaotic systems.
25
+ The motion of the Lorenz system \eqref{eq_6_Lorenz} is not as complex as of the, e.g., the \emph{Four Wing} \eqref{eq_10_4_Wing}.
26
+ Nevertheless, the Lorenz system already contains quasi-random elements, i.e., the switching from one ear to the other cannot be captured exactly with a surrogate mode. However, the characteristic of the Lorenz system and other chaotic dynamical systems as well can be replicated.
27
+ In order to prove the latter, more than one method to measure the prediction quality is required.
Data/0_Latex_True/3_Task/6_SLS.tex ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ \subsection{Assessment of SLS}
2
+ \label{subsec_3_5_1_SLS}
3
+ In this subsection, the prediction capability for the \emph{SLS} will be analyzed in detail. All the presented output is generated with \gls{svd} as the decomposition method and \gls{rf} as the $\bm Q / \bm T$ regressor.\newline
4
+
5
+ The final objective of \gls{cnmc} is to capture the characteristics of the original trajectory.
6
+ However, it is important to outline that \gls{cnmc} is trained with the \gls{cnm} predicted trajectories.
7
+ Thus, the outcome of \gls{cnmc} highly depends on the ability of \gls{cnm} to represent the original data.
8
+ Consequently, \gls{cnmc} can only be as effective as \gls{cnm} is in the first place, with the approximation of the true data.
9
+ Figures \ref{fig_72} and \ref{fig_73} show the true, \gls{cnm} and \gls{cnmc} predicted trajectories and a focused view on the \gls{cnm} and \gls{cnmc} trajectories, respectively.
10
+ The output was generated for $\beta_{unseen} = 28.5$ and $L =1$.
11
+ First, it can be observed that \gls{cnm} is not able to capture the full radius of the Lorenz attractor.
12
+ This is caused by the low chosen number of centroids $K=10$.
13
+ Furthermore, as mentioned at the beginning of this chapter, the goal is not to replicate the true data one-to-one, but rather to catch the significant behavior of any dynamic system.
14
+ With the low number of centroids $K$, \gls{cnm} extracts the characteristics of the Lorenz system well.
15
+ Second, the other aim for \gls{cnmc} is to match the \gls{cnm} data as closely as possible.
16
+ Both figures \ref{fig_72} and \ref{fig_73} prove that \gls{cnmc} has fulfilled its task very well. \newline
17
+
18
+ \begin{figure}[!h]
19
+ \begin{subfigure}{0.5\textwidth}
20
+ \centering
21
+ \caption{True, \gls{cnm} and \gls{cnmc} predicted trajectories}
22
+ \includegraphics[width =\textwidth]
23
+ {2_Figures/3_Task/4_SLS/0_lb_28.5_All.pdf}
24
+ \label{fig_72}
25
+ \end{subfigure}
26
+ \hfill
27
+ \begin{subfigure}{0.5\textwidth}
28
+ \centering
29
+ \caption{\gls{cnm} and \gls{cnmc} predicted trajectories}
30
+ \includegraphics[width =\textwidth]
31
+ {2_Figures/3_Task/4_SLS/1_lb_28.5.pdf}
32
+ \label{fig_73}
33
+ \end{subfigure}
34
+ \vspace{-0.3cm}
35
+ \caption{\emph{SLS}, $\beta_{unseen}=28.5,\, L=1$, true, \gls{cnm} and \gls{cnmc} predicted trajectories}
36
+ \end{figure}
37
+
38
+
39
+ A close-up of the movement of the different axes is shown in the picture \ref{fig_74}.
40
+ Here, as well, the same can be observed as described above. Namely, the predicted \gls{cnmc} trajectory is not a one-to-one reproduction of the original trajectory.
41
+ However, the characteristics, i.e., the magnitude of the motion in all 3 directions (x, y, z) and the shape of the oscillations, are very similar to the original trajectory.
42
+ Note that even though the true and predicted trajectories are utilized to assess, whether the characteristical behavior could be extracted, a single evaluation based on the trajectories is not sufficient and often not advised or even possible.
43
+ In complex systems, trajectories can change rapidly while dynamical features persist \cite{Fernex2021a}.
44
+ In \gls{cnmc} the predicted trajectories are obtained through the \gls{cnm} propagation, which itself is based on a probabilistic model, i.e. the $\bm Q$ tensor.
45
+ Thus, matching full trajectories becomes even more unrealistic.
46
+ The latter two statements highlight yet again that more than one method of measuring quality is needed.
47
+ To further support the generated outcome the autocorrelation and \gls{cpd} in figure \ref{fig_75} and \ref{fig_76}, respectively, shall be considered.
48
+ It can be inspected that the \gls{cnm} and \gls{cnmc} autocorrelations are matching the true autocorrelation in the shape favorably well.
49
+ Nonetheless, the degree of reflecting the magnitude fully decreases quite fast.
50
+ Considering the \gls{cpd}, it can be recorded that the true \gls{cpd} could overall be reproduced satisfactorily.\newline
51
+
52
+ \begin{figure}[!h]
53
+ \centering
54
+ \includegraphics[width =0.75\textwidth]
55
+ {2_Figures/3_Task/4_SLS/2_lb_28.5_3V_All.pdf}
56
+ \caption{\emph{SLS}, $\beta_{unseen}=28.5, \, L=1$, true, \gls{cnm} and \gls{cnmc} predicted trajectories as 2d graphs }
57
+ \label{fig_74}
58
+ \end{figure}
59
+
60
+
61
+ \begin{figure}[!h]
62
+ \begin{subfigure}{0.5\textwidth}
63
+ \centering
64
+ \caption{autocorrelation}
65
+ \includegraphics[width =\textwidth]
66
+ {2_Figures/3_Task/4_SLS/3_lb_3_all_28.5.pdf}
67
+ \label{fig_75}
68
+ \end{subfigure}
69
+ \hfill
70
+ \begin{subfigure}{0.5\textwidth}
71
+ \centering
72
+ \caption{\gls{cpd}}
73
+ \includegraphics[width =\textwidth]
74
+ {2_Figures/3_Task/4_SLS/4_lb_28.5.pdf}
75
+ \label{fig_76}
76
+ \end{subfigure}
77
+ \vspace{-0.3cm}
78
+ \caption{\emph{SLS}, $\beta_{unseen}= 28.5, \, L =1$, autocorrelation and \gls{cpd} for true, \gls{cnm} and \gls{cnmc} predicted trajectories}
79
+ \end{figure}
80
+
81
+
82
+ To illustrate the influence of $L$, figure \ref{fig_77} shall be viewed.
83
+ It depicts the MAE error for the true and \gls{cnmc} predicted trajectories for $\beta_{unseen}= [\, 28.5,\, 32.5 \, ]$ with $L$ up to 7.
84
+ It can be observed that the choice of $L$ has an impact on the prediction quality measured by autocorrelation.
85
+ For $\beta_{unseen}=28.5$ and $\beta_{unseen}=32.5$, the optimal $L$ values are $L = 2$ and $L = 7$, respectively. To emphasize it even more that with the choice of $L$ the prediction quality can be regulated, figure \ref{fig_78} shall be considered.
86
+ It displays the 3 autocorrelations for $L = 7$.
87
+ Matching the shape of the true autocorrelation was already established with $L =1$ as shown in figure \ref{fig_75}. In addition to that, $L=7$ improves by matching the true magnitude.
88
+ Finally, it shall be mentioned that similar results have been accomplished with other $K$ tested values, where the highest value was $K =50$.
89
+
90
+ \begin{figure}[!h]
91
+ \begin{minipage}{0.47\textwidth}
92
+ \centering
93
+ \includegraphics[width =\textwidth]
94
+ {2_Figures/3_Task/4_SLS/5_lb_1_Orig_CNMc.pdf}
95
+ \caption{\emph{SLS}, MAE error for true and \gls{cnmc} predicted autocorrelations for $\beta_{unseen}= [\, 28.5,$ $32.5 \, ]$ and different values of $L$}
96
+ \label{fig_77}
97
+ \end{minipage}
98
+ \hfill
99
+ \begin{minipage}{0.47\textwidth}
100
+ \centering
101
+ \includegraphics[width =\textwidth]
102
+ {2_Figures/3_Task/4_SLS/6_lb_3_all_32.5.pdf}
103
+ \caption{\emph{SLS}, $\beta_{unseen}=32.5, \, L=7$, \gls{cnm} and \gls{cnmc} predicted autocorrelation }
104
+ \label{fig_78}
105
+ \end{minipage}
106
+ \end{figure}
107
+ \FloatBarrier
Data/0_Latex_True/3_Task/7_Models.tex ADDED
@@ -0,0 +1,320 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ \subsection{Results of further dynamical systems}
2
+ \label{subsec_3_5_2_Models}
3
+ In this subsection, the \gls{cnmc} prediction results for other models will be displayed.
4
+ The chosen dynamical systems with their configurations are the following.
5
+ % ==============================================================================
6
+ \begin{enumerate}
7
+ \item \emph{FW50}, based on the \emph{Four Wing} set of equations \eqref{eq_10_4_Wing} with $K=50, \, \vec{\beta }_{tr} = [\, \beta_0 = 8 ; \, \beta_{end} = 11 \,], \, n_{\beta, tr} = 13$.
8
+
9
+ \item \emph{Rössler15}, based on the \emph{Rössler} set of equations \eqref{eq_7_Ross} with $K=15, \, \vec{\beta }_{tr} = [\, \beta_0 = 6 ; \, \beta_{end} = 13 \,], \, n_{\beta, tr} = 15$.
10
+
11
+ \item \emph{TS15}, based on the \emph{Two Scroll} set of equations \eqref{eq_9_2_Scroll} with $K=15, \, \vec{\beta }_{tr} = [\, \beta_0 = 5 ; \, \beta_{end} = 12 \,], \, n_{\beta, tr} = 15$.
12
+ \end{enumerate}
13
+ All the presented outputs were generated with \gls{svd} as the decomposition method and \gls{rf} as the $\bm Q / \bm T$ regressor.
14
+ Furthermore, the B-spline interpolation in the propagation step of \gls{cnm} was replaced with linear interpolation.
15
+ The B-spline interpolation was originally utilized for smoothing the motion between two centroids.
16
+ However, it was discovered for a high number of $K$, the B-spline interpolation is not able to reproduce the motion between two centroids accurately, but rather would impose unacceptable high deviations or oscillations into the predictions.
17
+ This finding is also mentioned in \cite{Max2021} and addressed as one of \emph{ first CNMc's} limitations.
18
+ Two illustrative examples of the unacceptable high deviations caused by the B-spline interpolation are given in figures \ref{fig_82_Traject} and \ref{fig_82_Autocorr}.
19
+ The results are obtained for \emph{LS20} for $\beta = 31.75$ and $\beta = 51.75$ with $L=3$.
20
+ In figures \ref{fig_82_Traj_B} and \ref{fig_83_Traj_B} it can be inspected that the B-spline interpolation has a highly undesired impact on the predicted trajectories.
21
+ In Contrast to that, in figures, \ref{fig_82_Traj_L} and \ref{fig_83_Traj_L}, where linear interpolation is utilized, no outliers are added to the predictions.
22
+ The impact of the embedded outliers, caused by the B-spline interpolation, on the autocorrelation is depicted in figures \ref{fig_82_Auto_B} and \ref{fig_83_Auto_B}.
23
+ The order of the deviation between the true and the \gls{cnmc} predicted autocorrelation can be grasped by inspecting the vertical axis scale.
24
+ Comparing it with the linear interpolated autocorrelations, shown in figures \ref{fig_82_Auto_L} and \ref{fig_83_Auto_L}, it can be recorded that the deviation between the true and predicted autocorrelations is significantly lower than in the B-spline interpolation case.
25
+ \newline
26
+
27
+ Nevertheless, it is important to highlight that the B-spline interpolation is only a tool for smoothing the motion between two centroids.
28
+ The quality of the modeled $\bm Q / \bm T$ cannot be assessed directly by comparing the trajectories and the autocorrelations.
29
+ To stress that the \gls{cpd} in figure \ref{fig_82_CPD_B} and \ref{fig_83_CPD_B} shall be considered.
30
+ It can be observed that \gls{cpd} does not represent the findings of the autocorrelations, i.e., the true and predicted behavior agree acceptably overall.
31
+ This is because the type of interpolation has no influence on the modeling of the probability tensor $\bm Q$.
32
+ Thus, the outcome with the B-spline interpolation should not be regarded as an instrument that enables making assumptions about the entire prediction quality of \gls{cnmc}. The presented points underline again the fact that more than one method should be considered to evaluate the prediction quality of \gls{cnmc}.
33
+ \newline
34
+
35
+
36
+ \begin{figure}[!h]
37
+ \begin{subfigure}{0.5\textwidth}
38
+ \centering
39
+ \caption{Trajectories, B-spline, $\beta_{unseen} = 31.75$ }
40
+ \includegraphics[width =\textwidth]
41
+ {2_Figures/3_Task/5_Models/18_lb_31.75_All.pdf}
42
+ \label{fig_82_Traj_B}
43
+ \end{subfigure}
44
+ \hfill
45
+ \begin{subfigure}{0.5\textwidth}
46
+ \centering
47
+ \caption{Trajectories, B-spline, $\beta_{unseen} = 51.75$}
48
+ \includegraphics[width =\textwidth]
49
+ {2_Figures/3_Task/5_Models/19_lb_51.75_All.pdf}
50
+ \label{fig_83_Traj_B}
51
+ \end{subfigure}
52
+
53
+ % ------------- Linear ----------------------
54
+ \smallskip
55
+ \begin{subfigure}{0.5\textwidth}
56
+ \centering
57
+ \caption{Trajectories, linear, $\beta_{unseen} = 31.75$ }
58
+ \includegraphics[width =\textwidth]
59
+ {2_Figures/3_Task/5_Models/24_lb_31.75_All.pdf}
60
+ \label{fig_82_Traj_L}
61
+ \end{subfigure}
62
+ \hfill
63
+ \begin{subfigure}{0.5\textwidth}
64
+ \centering
65
+ \caption{Trajectories, linear, $\beta_{unseen} = 51.75$}
66
+ \includegraphics[width =\textwidth]
67
+ {2_Figures/3_Task/5_Models/25_lb_51.75_All.pdf}
68
+ \label{fig_83_Traj_L}
69
+ \end{subfigure}
70
+ \vspace{-0.3cm}
71
+ \caption{Illustrative undesired oscillations cased by the B-spline interpolation and its impact on the predicted trajectory contrasted with linear interpolation, \emph{LS20}, $\beta = 31.75$ and $\beta =51.75$, $L=3$}
72
+ \label{fig_82_Traject}
73
+ \end{figure}
74
+
75
+ %----------------------------------- AUTOCOR -----------------------------------
76
+
77
+ \begin{figure}[!h]
78
+ \begin{subfigure}{0.5\textwidth}
79
+ \centering
80
+ \caption{Autocorrelations, B-spline, $\beta = 31.75$ }
81
+ \includegraphics[width =\textwidth]
82
+ {2_Figures/3_Task/5_Models/20_lb_3_all_31.75.pdf}
83
+ \label{fig_82_Auto_B}
84
+ \end{subfigure}
85
+ \hfill
86
+ \begin{subfigure}{0.5\textwidth}
87
+ \centering
88
+ \caption{Autocorrelations, B-spline, $\beta_{unseen} = 51.75$}
89
+ \includegraphics[width =\textwidth]
90
+ {2_Figures/3_Task/5_Models/21_lb_3_all_51.75.pdf}
91
+ \label{fig_83_Auto_B}
92
+ \end{subfigure}
93
+
94
+ \smallskip
95
+ % ------------- LINEAR ----------------------
96
+ \begin{subfigure}{0.5\textwidth}
97
+ \centering
98
+ \caption{Autocorrelations, linear, $\beta = 31.75$ }
99
+ \includegraphics[width =\textwidth]
100
+ {2_Figures/3_Task/5_Models/26_lb_3_all_31.75.pdf}
101
+ \label{fig_82_Auto_L}
102
+ \end{subfigure}
103
+ \hfill
104
+ \begin{subfigure}{0.5\textwidth}
105
+ \centering
106
+ \caption{Autocorrelations, linear, $\beta_{unseen} = 51.75$}
107
+ \includegraphics[width =\textwidth]
108
+ {2_Figures/3_Task/5_Models/27_lb_3_all_51.75.pdf}
109
+ \label{fig_83_Auto_L}
110
+ \end{subfigure}
111
+ \vspace{-0.3cm}
112
+ \caption{Illustrative undesired oscillations cased by the B-spline interpolation and its impact on the predicted autocorrelations contrasted with linear interpolation, \emph{LS20}, $\beta = 31.75$ and $\beta =51.75$, $L=3$}
113
+ \label{fig_82_Autocorr}
114
+ \end{figure}
115
+
116
+ \begin{figure}[!h]
117
+ % ------------- CPD ----------------------
118
+ \begin{subfigure}{0.5\textwidth}
119
+ \centering
120
+ \caption{\gls{cpd}, $\beta = 31.75$ }
121
+ \includegraphics[width =\textwidth]
122
+ {2_Figures/3_Task/5_Models/22_lb_31.75.pdf}
123
+ \label{fig_82_CPD_B}
124
+ \end{subfigure}
125
+ \hfill
126
+ \begin{subfigure}{0.5\textwidth}
127
+ \centering
128
+ \caption{\gls{cpd}, $\beta_{unseen} = 51.75$}
129
+ \includegraphics[width =\textwidth]
130
+ {2_Figures/3_Task/5_Models/23_lb_51.75.pdf}
131
+ \label{fig_83_CPD_B}
132
+ \end{subfigure}
133
+ \vspace{-0.3cm}
134
+ \caption{Illustrative the B-spline interpolation and its impact on the \glspl{cpd}, \emph{LS20}, $\beta = 31.75$ and $\beta =51.75$, $L=3$}
135
+ \end{figure}
136
+
137
+ \FloatBarrier
138
+ The results generated with the above mentioned linear interpolation for \emph{FW50}, \emph{Rössler15} and \emph{TS15} are depicted in figures \ref{fig_79} to \ref{fig_81}, respectively.
139
+ Each of them consists of an illustrative trajectory, 3D and 2D trajectories, the autocorrelations, the \gls{cpd} and the MAE error between the true and \gls{cnmc} predicted trajectories for a range of $\vec{L}$ and some $\vec{\beta}_{unseen}$.
140
+ The illustrative trajectory includes arrows, which provide additional information.
141
+ First, the direction of the motion, then the size of the arrows represents the velocity of the system. Furthermore, the change in the size of the arrows is equivalent to a change in the velocity, i.e., the acceleration.
142
+ Systems like the \emph{TS15} exhibit a fast change in the size of the arrows, i.e., the acceleration is nonlinear.
143
+ The more complex the behavior of the acceleration is, the more complex the overall system becomes.
144
+ The latter statement serves to emphasize that \gls{cnmc} can be applied not only to rather simple systems such as the Lorenz attractor \cite{lorenz1963deterministic}, but also to more complex systems such as the \emph{FW50} and \emph{TS15}.\newline
145
+
146
+ All in all, the provided results for the 3 systems are very similar to those already explained in the previous subsection \ref{subsec_3_5_1_SLS}.
147
+ Thus, the results presented are for demonstration purposes and will not be discussed further.
148
+ However, the 3 systems also have been calculated with different values for $K$.
149
+ For \emph{FW50}, the range of $\vec{K}= [\, 15, \, 30, \, 50 \, ]$ was explored with the finding that the influence of $K$ remained quite small.
150
+ For \emph{Rössler15} and \emph{TS15}, the ranges were chosen as $\vec{K}= [\, 15, \, 30, \, 100\,]$ and $\vec{K}= [\, 15, \, 75 \,]$, respectively.
151
+ The influence of $K$ was found to be insignificant also for the latter two systems.
152
+ % ==============================================================================
153
+ % ======================= FW50 =================================================
154
+ % ==============================================================================
155
+ \begin{figure}[!h]
156
+ \begin{subfigure}{0.5\textwidth}
157
+ \centering
158
+ \caption{Illustrative trajectory $\beta = 9$ }
159
+ \includegraphics[width =\textwidth]
160
+ {2_Figures/3_Task/5_Models/0_lb_9.000.pdf}
161
+ \end{subfigure}
162
+ \hfill
163
+ \begin{subfigure}{0.5\textwidth}
164
+ \centering
165
+ \caption{Trajectories, $\beta_{unseen} = 8.1$}
166
+ \includegraphics[width =\textwidth]
167
+ {2_Figures/3_Task/5_Models/1_lb_8.1_All.pdf}
168
+ \end{subfigure}
169
+
170
+ \smallskip
171
+ \begin{subfigure}{0.5\textwidth}
172
+ \centering
173
+ \caption{2D-trajectories, $\beta_{unseen} = 8.1$}
174
+ \includegraphics[width =\textwidth]
175
+ {2_Figures/3_Task/5_Models/2_lb_8.1_3V_All.pdf}
176
+ \end{subfigure}
177
+ \hfill
178
+ \begin{subfigure}{0.5\textwidth}
179
+ \centering
180
+ \caption{Autocorrelations, $\beta_{unseen} = 8.1$}
181
+ \includegraphics[width =\textwidth]
182
+ {2_Figures/3_Task/5_Models/3_lb_3_all_8.1.pdf}
183
+ \end{subfigure}
184
+
185
+
186
+ \smallskip
187
+ \begin{subfigure}{0.5\textwidth}
188
+ \centering
189
+ \caption{\gls{cpd}, $\beta_{unseen} = 8.1$}
190
+ \includegraphics[width =\textwidth]
191
+ {2_Figures/3_Task/5_Models/4_lb_8.1.pdf}
192
+ \end{subfigure}
193
+ \hfill
194
+ \begin{subfigure}{0.5\textwidth}
195
+ \centering
196
+ \caption{Autocorrelations $MAE(L,\, \beta_{unseen})$}
197
+ \includegraphics[width =\textwidth]
198
+ {2_Figures/3_Task/5_Models/5_lb_1_Orig_CNMc.pdf}
199
+ \end{subfigure}
200
+ \vspace{-0.3cm}
201
+ \caption{Results for \emph{FW50}, $\beta_{unseen} = 8.1, \, L= 2$}
202
+ \label{fig_79}
203
+ \end{figure}
204
+ % ==============================================================================
205
+ % ======================= FW50 =================================================
206
+ % ==============================================================================
207
+
208
+ % ==============================================================================
209
+ % ======================= Rossler 15 ===========================================
210
+ % ==============================================================================
211
+ \begin{figure}[!h]
212
+ \begin{subfigure}{0.5\textwidth}
213
+ \centering
214
+ \caption{Illustrative trajectory $\beta = 7.5$ }
215
+ \includegraphics[width =\textwidth]
216
+ {2_Figures/3_Task/5_Models/6_lb_7.500.pdf}
217
+ \end{subfigure}
218
+ \hfill
219
+ \begin{subfigure}{0.5\textwidth}
220
+ \centering
221
+ \caption{Trajectories, $\beta_{unseen} = 9.6$}
222
+ \includegraphics[width =\textwidth]
223
+ {2_Figures/3_Task/5_Models/7_lb_9.6_All.pdf}
224
+ \end{subfigure}
225
+
226
+ \smallskip
227
+ \begin{subfigure}{0.5\textwidth}
228
+ \centering
229
+ \caption{2D-trajectories, $\beta_{unseen} = 9.6$}
230
+ \includegraphics[width =\textwidth]
231
+ {2_Figures/3_Task/5_Models/8_lb_9.6_3V_All.pdf}
232
+ \end{subfigure}
233
+ \hfill
234
+ \begin{subfigure}{0.5\textwidth}
235
+ \centering
236
+ \caption{Autocorrelations, $\beta_{unseen} = 9.6$}
237
+ \includegraphics[width =\textwidth]
238
+ {2_Figures/3_Task/5_Models/9_lb_3_all_9.6.pdf}
239
+ \end{subfigure}
240
+
241
+
242
+ \smallskip
243
+ \begin{subfigure}{0.5\textwidth}
244
+ \centering
245
+ \caption{\gls{cpd}, $\beta_{unseen} = 9.6$}
246
+ \includegraphics[width =\textwidth]
247
+ {2_Figures/3_Task/5_Models/10_lb_9.6.pdf}
248
+ \end{subfigure}
249
+ \hfill
250
+ \begin{subfigure}{0.5\textwidth}
251
+ \centering
252
+ \caption{Autocorrelations $MAE(L,\, \beta_{unseen})$}
253
+ \includegraphics[width =\textwidth]
254
+ {2_Figures/3_Task/5_Models/11_lb_1_Orig_CNMc.pdf}
255
+ \end{subfigure}
256
+ \vspace{-0.3cm}
257
+ \caption{Results for \emph{Rössler15}, $\beta_{unseen} = 9.6,\, L =1$}
258
+ \label{fig_80}
259
+ \end{figure}
260
+ % ==============================================================================
261
+ % ======================= Rossler 15 ===========================================
262
+ % ==============================================================================
263
+
264
+
265
+ % ==============================================================================
266
+ % ======================= TS 15 ===========================================
267
+ % ==============================================================================
268
+ \begin{figure}[!h]
269
+ \begin{subfigure}{0.5\textwidth}
270
+ \centering
271
+ \caption{Illustrative trajectory $\beta = 11$ }
272
+ \includegraphics[width =\textwidth]
273
+ {2_Figures/3_Task/5_Models/12_lb_11.000.pdf}
274
+ \end{subfigure}
275
+ \hfill
276
+ \begin{subfigure}{0.5\textwidth}
277
+ \centering
278
+ \caption{Trajectories, $\beta_{unseen} = 5.1$}
279
+ \includegraphics[width =\textwidth]
280
+ {2_Figures/3_Task/5_Models/13_lb_5.1_All.pdf}
281
+ \end{subfigure}
282
+
283
+ \smallskip
284
+ \begin{subfigure}{0.5\textwidth}
285
+ \centering
286
+ \caption{2D-trajectories, $\beta_{unseen} = 5.1$}
287
+ \includegraphics[width =\textwidth]
288
+ {2_Figures/3_Task/5_Models/14_lb_5.1_3V_All.pdf}
289
+ \end{subfigure}
290
+ \hfill
291
+ \begin{subfigure}{0.5\textwidth}
292
+ \centering
293
+ \caption{Autocorrelations, $\beta_{unseen} = 5.1$}
294
+ \includegraphics[width =\textwidth]
295
+ {2_Figures/3_Task/5_Models/15_lb_3_all_5.1.pdf}
296
+ \end{subfigure}
297
+
298
+
299
+ \smallskip
300
+ \begin{subfigure}{0.5\textwidth}
301
+ \centering
302
+ \caption{\gls{cpd}, $\beta_{unseen} = 5.1$}
303
+ \includegraphics[width =\textwidth]
304
+ {2_Figures/3_Task/5_Models/16_lb_5.1.pdf}
305
+ \end{subfigure}
306
+ \hfill
307
+ \begin{subfigure}{0.5\textwidth}
308
+ \centering
309
+ \caption{Autocorrelations $MAE(L,\, \beta_{unseen})$}
310
+ \includegraphics[width =\textwidth]
311
+ {2_Figures/3_Task/5_Models/17_lb_1_Orig_CNMc.pdf}
312
+ \end{subfigure}
313
+ \vspace{-0.3cm}
314
+ \caption{Results for \emph{TS15}, $\beta_{unseen} = 5.1,\, L =2$}
315
+ \label{fig_81}
316
+ \end{figure}
317
+ % ==============================================================================
318
+ % ======================= TS 15 ================================================
319
+ % ==============================================================================
320
+
Data/0_Latex_True/4_Task/1_Concl.tex ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ \chapter{Conclusion and outlook}
2
+ A tool to capture and predict the behavior of nonlinear complex and chaotic dynamical systems within a range of some model parameter values $\vec{\beta}$ is presented.
3
+ The tool is called \glsfirst{cnmc}.
4
+ It could be shown that \gls{cnmc} is able to capture and make predictions for the well-known Lorenz system \cite{lorenz1963deterministic}.
5
+ With having removed one of the major limitations in the first attempt of \gls{cnmc} \cite{Max2021}, the introduced version of \gls{cnmc} is not limited to any dimension anymore.
6
+ Furthermore, the restriction of the dynamical system to exhibit a circular trajectory is removed.
7
+ Since these two limitations could be removed, the presented \gls{cnmc} can be applied to any general dynamical system.
8
+ To outline this fact, 10 different dynamical systems are implemented by default in \gls{cnmc}.
9
+ Some of these dynamical systems were used to evaluate \gls{cnmc} performance.
10
+ It could be observed that \gls{cnmc} is not only able to deal with the Lorenz system but also with more complicated systems.
11
+ The objective is to represent the characteristic behavior of general dynamical systems that could be fulfilled on all tested systems.\newline
12
+
13
+ The third limitation which could be removed is the unacceptably high computational time with \glsfirst{nmf}.
14
+ It could be highlighted that \glsfirst{svd} returns the decomposition within seconds, instead of hours, without adding any inaccuracies.
15
+ Moreover, \gls{svd} does not require a parameter study.
16
+ Executing \gls{nmf} once is already computational more expensive than \gls{svd}, but with a parameter study, \gls{nmf} becomes even more unsatisfactory in the application.
17
+ By having removed these 3 major limitations, \gls{cnmc} can be applied to any dynamical system within a reasonable computational time on a regular laptop.
18
+ Nevertheless, \gls{cnmc} contains algorithms, which highly benefit from computational power. Thus, faster outputs are achieved with clusters.
19
+ Also, with having replaced the B-spline interpolation through linear interpolation, the predicted trajectories can be visually depicted appropriately without the
20
+ Another important introduced advancement is that the B-spline interpolation was replaced by linear interpolation. This allows to avoid unreasonably high interpolation errors (oscillations) of the trajectory and enables an appropriate visualization.
21
+ \newline
22
+
23
+
24
+ \gls{cnmc} Is written from scratch in a modular way such that implementing it into existing code, replacing employed algorithms with others is straightforward or used as a black-box function.
25
+ All important parameters can be adjusted via one file (\emph{settings.py}).
26
+ Helpful post-processing features are part of \gls{cnmc} and can also be controlled with \emph{settings.py}.
27
+ Overall \gls{cnmc} includes a high number of features, e.g., a log file, storing results at desired steps, saving plots as HTML files which allow extracting further information about the outcome, the ability to execute multiple models consequentially, and activating and disabling each step of \gls{cnmc}.
28
+ All displayed outputs in this thesis were generated with \gls{cnmc}.
29
+ Finally, one limitation which remains shall be mentioned.
30
+ The used \gls{svd} code receives sparse matrices, however, it returns a dense matrix. The consequence is that with high model orders $L$, quickly multiple hundreds of gigabytes of RAM are required.
31
+ The maximal $L$ which could be achieved on the laptop of the author, which has 16 GB RAM, is $L=7$.\newline
32
+
33
+ As an outlook, a new \gls{svd} algorithm should be searched for or written from scratch.
34
+ The demand for the new \gls{svd} solver is that it must receive sparse matrices and also returns the solution in form of sparse matrices.
35
+ With that $L$ could be increased, i.e., $L>7$.
36
+ In this thesis, it could be shown that \gls{cnmc} can handle chaotic systems well. Thus, the next step could be, replacing the current data generation step, where differential equations are solved, with actual \gls{cfd} data as input.
37
+ Hence, the objective would be to apply \gls{cnmc} to real \gls{cfd} data to predict flow fields.
Data/0_Latex_True/4_Task/2_Zusammen_Deutsch.tex ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ \chapter{Zusammenfassung auf Deutsch}
2
+ Die Arbeit wurde an der Technischen Universität Braunschweig geschrieben.
3
+ Da diese Arbeit auf eine Fremdsprache geschrieben wurde, soll der Anforderung der TU-Braunschweig, dass eine Zusammenfassung auf Deutsch, welche etwa 1 DIN A4-Seite beträgt, nachgekommen werden.
4
+ Zunächst wird kurz die Motivation dieser Master-Arbeit erklärt. Im Anschluss sollen die Ergebnisse im Kurzen erörtert werden.\newline
5
+
6
+ In dieser Master-Arbeit war es Ziel, eine bereits bestehende Methode, das sog. \glsfirst{cnmc}, zu verbessern. Die Vorversion ist in \cite{Max2021} beschrieben. Hier konnte gezeigt werden, dass \gls{cnmc} für das Lorenz System, \cite{lorenz1963deterministic} vielversprechende Approximationen zulässt.
7
+ Das Lorenz System ist recht bekannt unter den chaotischen Systemen. Ein chaotisches System ist ein dynamisches System, was selbst durch Differenzialgleichungen beschrieben wird.
8
+ Sinn von \gls{cnmc} ist daher, das Approximieren bzw. Vorhersagen von Trajektorien (zeitliche Lösung der Differenzialgleichung) von dynamischen Systemen.
9
+ \gls{cnmc} wurde innerhalb der ersten Version speziell für das Lorenz System entwickelt, sodass es nicht für allgemeingültige dynamische System verwendet werden konnte.
10
+ Die Limitierungen verlangten unter anderem, dass die Trajektorie kreisförmig seien müsse. Zudem, musste ein 3-dimensionales Problem vorliegen. Weiters kam hinzu, dass ein wichtiger Schritt in dem \gls{cnmc} Arbeitsablauf (Moden-Findung) mehrere Stunden in Anspruch nahm und somit die Anwendung von \gls{cnmc} unattraktiver machte.
11
+ Aufgrund dessen, dass es Schwierigkeiten beim Ausführen der ersten \gls{cnmc}-Version gab, wurde \gls{cnmc} von neu programmiert.\newline
12
+
13
+
14
+ Zunächst wurde der Code nun in der Form geschrieben, dass der Nutzer nach Belieben neue dynamische Systeme einfach hinzufügen kann. Standardmäßig kommt \gls{cnmc} bereits mit 10 verschiedenen dynamischen Systemen. Danach wurden zwei wichtige Limitierungen entfernt. Die Erste, \gls{cnmc} kann inzwischen mit jedem Verhalten der Trajektorie umgehen. In anderen Worten, die Trajektorie des dynamischen Systems muss nicht kreisförmig sein. Zweitens ist \gls{cnmc} nicht mehr durch die Anzahl der Dimension restriktiert. Vereinfacht ausgedrückt, ob \gls{cnmc} auf eine 3d oder eine andere beliege dimensionale Differenzialgleichung angewendet werden soll, spielt keine Rolle mehr.
15
+ Für den Schritt, in welchem die Moden einer Daten-Matrix gefunden werden, stehen aktuell zwei verschiedene Möglichkeiten zu Verfügung, \glsfirst{nmf} und \glsfirst{svd}. \gls{nmf} wurde bereits in der ersten Version von \gls{cnmc} verwendet.
16
+ Doch wurde es dahingehend weiter verbessert, dass jetzt das Finden des wichtigen Parameters, der Anzahl der verwendeten Moden, automatisiert durchgeführt wird.
17
+ Somit kann \gls{nmf} automatisiert auf unterschiedliche dynamische System angewendet werden.
18
+ \gls{svd} ist die zweite Methode und wurde implementiert, um die hohe Rechenzeit des \gls{nmf} zu verhindern.
19
+ Es konnte gezeigt werden, dass \gls{svd} tatsächlich, um ein vielfaches schneller als \gls{nmf} ist.
20
+ Die Rechenzeit von \gls{svd} bewegt sich im Bereich von Sekunden, wohingegen \gls{nmf} mehrere Stunden in Anspruch nehmen kann.
21
+ Auch wurde auch gezeigt, dass beide Methoden qualitativ gleichwertige Ergebnisse liefern.\newline
22
+
23
+
24
+ Eine weitere wichtige Änderung, welche in der aktuellen \gls{cnmc} Version implementiert ist die, dass eine sog. B-Spline Interpolation durch eine lineare Interpolation ersetzt wurde. Als Folge können unangebracht hohe Interpolationsfehler (Oszillationen) der Trajektorie umgangen werden. Durch letztere Änderung können die Ergebnisse nun auch Graph dargestellt werden, ohne dass durch die B-Spline Interpolation eingebrachte Ausreißer eine visuelle Auswertung unmöglich machen.\newline
25
+
26
+
27
+ Mit dieser Arbeit konnte gezeigt werden, dass \gls{cnmc} nicht nur für das Lorenz System, sondern für allgemeingültige dynamische Systeme verwendet werden kann. Hierfür wurden beispielsweise die Ergebnisse für drei andere dynamische Systeme gezeigt. Die aktuelle \gls{cnmc} Version wurde in einer modularen Art geschrieben, welche es erlaubt, einzelne Algorithmen leicht durch andere zu ersetzen.
28
+ Jeder einzelne Haupt-Schritt in \gls{cnmc} kann aktiviert oder deaktiviert werden. Dadurch können bereits vorhanden Ergebnisse eingeladen werden, anstatt diese jedes Mal neu zu berechnen. Das Resultat ist eine hohe Ersparnis an Rechenzeit. \gls{cnmc} kommt mit vielen Features, über eine einzige Datei lässt sich der gesamte Ablauf von \gls{cnmc} steuern. Wodurch bestimmt werden kann, welche Parameter in den einzelnen Schritten verwendet werden, wo Ergebnisse abgespeichert und geladen werden sollen, sowie auch wo und ob die Ergebnisse visuell abgespeichert werden sollen.
29
+ Die Resultate werden für die visuelle Inspektion als HTML-Dateien zur Verfügung gestellt. Damit ist es möglich weitere Informationen zu erhalten, wie beispielsweise, das Ablesen von Werten an bestimmten Stellen und anderen nützlichen Funktionen, wie etwa das Rotieren, Zoomen und Ausblenden einzelner Graphen.
30
+ Das Ziel war es, dem Nutzer einen Post-Processor mitzugeben, sodass er auch ohne weitere kostenpflichtige Software visuelle Auswertungen vornehmen kann. Doch \gls{cnmc} hat auch eine log-Datei integriert, in welcher alle Ausgaben, wie unter anderem Ergebnisse einzelner Qualitätsmesstechniken (Metriken bzw. Normen) nachgelesen werden können.\newline
31
+
32
+
33
+ Zusammenfassend lässt sich sagen, mit dieser Master-Thesis befindet sich \gls{cnmc} in einem Zustand, in welchem es für allgemeingültige dynamische Systeme angewendet werden kann. Das Implementieren von weiteren Systemen wurde vereinfacht und wichtige Limitierungen, wie Anzahl der Dimensional und unzulässig hohe Rechenzeit konnten beseitigt werden. Zudem ist das Tool gut dokumentiert, und bietet diverse Features an, worunter beispielsweise die Post-Processing Möglichkeiten inbegriffen sind.
34
+
35
+
36
+
37
+
Data/10_Law/license_Hippocratic ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* ========================================================================== */
2
+ /* =============================== Jav Comment ============================== */
3
+ /* ========================================================================== */
4
+ This project uses the library: https://github.com/animate-css/animate.css
5
+ it has a very nice license which shall be linked to:
6
+
7
+ please see the original version of the license: https://raw.githubusercontent.com/animate-css/animate.css/main/LICENSE
8
+
9
+ The content of the license is copied below the Jav Comment End section
10
+
11
+ Also, I want to say thank you to all the people who have contributed to the https://github.com/animate-css/animate.css library. Not only because of the actual content of their library but especially because through their project I came to know about the very appealing Hippocratic License
12
+
13
+ Update: 12.03.2023 - The library animate-css library is not required - not used in this project as for now
14
+
15
+ /* ========================================================================== */
16
+ /* ============================= Jav Comment End ============================ */
17
+ /* ========================================================================== */
18
+
19
+ Animate.css Copyright 2021 Daniel Eden (“Licensor”)
20
+
21
+ Hippocratic License Version Number: 2.1.
22
+
23
+ Purpose. The purpose of this License is for the Licensor named above to permit the Licensee (as defined below) broad permission, if consistent with Human Rights Laws and Human Rights Principles (as each is defined below), to use and work with the Software (as defined below) within the full scope of Licensor’s copyright and patent rights, if any, in the Software, while ensuring attribution and protecting the Licensor from liability.
24
+
25
+ Permission and Conditions. The Licensor grants permission by this license (“License”), free of charge, to the extent of Licensor’s rights under applicable copyright and patent law, to any person or entity (the “Licensee”) obtaining a copy of this software and associated documentation files (the “Software”), to do everything with the Software that would otherwise infringe (i) the Licensor’s copyright in the Software or (ii) any patent claims to the Software that the Licensor can license or becomes able to license, subject to all of the following terms and conditions:
26
+
27
+ * Acceptance. This License is automatically offered to every person and entity subject to its terms and conditions. Licensee accepts this License and agrees to its terms and conditions by taking any action with the Software that, absent this License, would infringe any intellectual property right held by Licensor.
28
+
29
+ * Notice. Licensee must ensure that everyone who gets a copy of any part of this Software from Licensee, with or without changes, also receives the License and the above copyright notice (and if included by the Licensor, patent, trademark and attribution notice). Licensee must cause any modified versions of the Software to carry prominent notices stating that Licensee changed the Software. For clarity, although Licensee is free to create modifications of the Software and distribute only the modified portion created by Licensee with additional or different terms, the portion of the Software not modified must be distributed pursuant to this License. If anyone notifies Licensee in writing that Licensee has not complied with this Notice section, Licensee can keep this License by taking all practical steps to comply within 30 days after the notice. If Licensee does not do so, Licensee’s License (and all rights licensed hereunder) shall end immediately.
30
+
31
+ * Compliance with Human Rights Principles and Human Rights Laws.
32
+
33
+ 1. Human Rights Principles.
34
+
35
+ (a) Licensee is advised to consult the articles of the United Nations Universal Declaration of Human Rights and the United Nations Global Compact that define recognized principles of international human rights (the “Human Rights Principles”). Licensee shall use the Software in a manner consistent with Human Rights Principles.
36
+
37
+ (b) Unless the Licensor and Licensee agree otherwise, any dispute, controversy, or claim arising out of or relating to (i) Section 1(a) regarding Human Rights Principles, including the breach of Section 1(a), termination of this License for breach of the Human Rights Principles, or invalidity of Section 1(a) or (ii) a determination of whether any Law is consistent or in conflict with Human Rights Principles pursuant to Section 2, below, shall be settled by arbitration in accordance with the Hague Rules on Business and Human Rights Arbitration (the “Rules”); provided, however, that Licensee may elect not to participate in such arbitration, in which event this License (and all rights licensed hereunder) shall end immediately. The number of arbitrators shall be one unless the Rules require otherwise.
38
+
39
+ Unless both the Licensor and Licensee agree to the contrary: (1) All documents and information concerning the arbitration shall be public and may be disclosed by any party; (2) The repository referred to under Article 43 of the Rules shall make available to the public in a timely manner all documents concerning the arbitration which are communicated to it, including all submissions of the parties, all evidence admitted into the record of the proceedings, all transcripts or other recordings of hearings and all orders, decisions and awards of the arbitral tribunal, subject only to the arbitral tribunal's powers to take such measures as may be necessary to safeguard the integrity of the arbitral process pursuant to Articles 18, 33, 41 and 42 of the Rules; and (3) Article 26(6) of the Rules shall not apply.
40
+
41
+ 2. Human Rights Laws. The Software shall not be used by any person or entity for any systems, activities, or other uses that violate any Human Rights Laws. “Human Rights Laws” means any applicable laws, regulations, or rules (collectively, “Laws”) that protect human, civil, labor, privacy, political, environmental, security, economic, due process, or similar rights; provided, however, that such Laws are consistent and not in conflict with Human Rights Principles (a dispute over the consistency or a conflict between Laws and Human Rights Principles shall be determined by arbitration as stated above). Where the Human Rights Laws of more than one jurisdiction are applicable or in conflict with respect to the use of the Software, the Human Rights Laws that are most protective of the individuals or groups harmed shall apply.
42
+
43
+ 3. Indemnity. Licensee shall hold harmless and indemnify Licensor (and any other contributor) against all losses, damages, liabilities, deficiencies, claims, actions, judgments, settlements, interest, awards, penalties, fines, costs, or expenses of whatever kind, including Licensor’s reasonable attorneys’ fees, arising out of or relating to Licensee’s use of the Software in violation of Human Rights Laws or Human Rights Principles.
44
+
45
+ * Failure to Comply. Any failure of Licensee to act according to the terms and conditions of this License is both a breach of the License and an infringement of the intellectual property rights of the Licensor (subject to exceptions under Laws, e.g., fair use). In the event of a breach or infringement, the terms and conditions of this License may be enforced by Licensor under the Laws of any jurisdiction to which Licensee is subject. Licensee also agrees that the Licensor may enforce the terms and conditions of this License against Licensee through specific performance (or similar remedy under Laws) to the extent permitted by Laws. For clarity, except in the event of a breach of this License, infringement, or as otherwise stated in this License, Licensor may not terminate this License with Licensee.
46
+
47
+ * Enforceability and Interpretation. If any term or provision of this License is determined to be invalid, illegal, or unenforceable by a court of competent jurisdiction, then such invalidity, illegality, or unenforceability shall not affect any other term or provision of this License or invalidate or render unenforceable such term or provision in any other jurisdiction; provided, however, subject to a court modification pursuant to the immediately following sentence, if any term or provision of this License pertaining to Human Rights Laws or Human Rights Principles is deemed invalid, illegal, or unenforceable against Licensee by a court of competent jurisdiction, all rights in the Software granted to Licensee shall be deemed null and void as between Licensor and Licensee. Upon a determination that any term or provision is invalid, illegal, or unenforceable, to the extent permitted by Laws, the court may modify this License to affect the original purpose that the Software be used in compliance with Human Rights Principles and Human Rights Laws as closely as possible. The language in this License shall be interpreted as to its fair meaning and not strictly for or against any party.
48
+
49
+ * Disclaimer. TO THE FULL EXTENT ALLOWED BY LAW, THIS SOFTWARE COMES “AS IS,” WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED, AND LICENSOR AND ANY OTHER CONTRIBUTOR SHALL NOT BE LIABLE TO ANYONE FOR ANY DAMAGES OR OTHER LIABILITY ARISING FROM, OUT OF, OR IN CONNECTION WITH THE SOFTWARE OR THIS LICENSE, UNDER ANY KIND OF LEGAL CLAIM.
50
+
51
+ This Hippocratic License is an Ethical Source license (https://ethicalsource.dev) and is offered for use by licensors and licensees at their own risk, on an “AS IS” basis, and with no warranties express or implied, to the maximum extent permitted by Laws.
Data/1_Writing/0_Deco/0_Frontpage.qmd ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ % \includegraphics[width=0.42\textwidth]{./2_Figures/TUBraunschweig_4C.pdf} &
3
+
4
+ \begin{center}
5
+ \begin{tabular}{p{\textwidth}}
6
+
7
+ \begin{minipage}{\textwidth}
8
+ % \centering
9
+ \includegraphics[width=0.4\textwidth]{./2_Figures/TUBraunschweig_4C.pdf}
10
+ \end{minipage}
11
+ % \begin{minipage}{0.5\textwidth}
12
+ % \centering
13
+ % \includegraphics[width=0.5\textwidth]{./2_Figures/0_Deco/dlr_Logo.jpeg}
14
+ % \end{minipage}
15
+
16
+
17
+ \vspace{1cm}
18
+
19
+ \\
20
+
21
+ \begin{center}
22
+ \large{\textsc{
23
+ Master thesis number: 486\\
24
+ }}
25
+ \end{center}
26
+
27
+ \begin{center}
28
+ \LARGE{\textsc{
29
+ Flow predictions using control-oriented cluster-based network modeling\\
30
+ }}
31
+ \end{center}
32
+
33
+ \\
34
+
35
+
36
+ \begin{center}
37
+ \large{Technische Universität Braunschweig \\
38
+ Institute of Fluid Mechanics
39
+ }
40
+ \end{center}
41
+
42
+
43
+ \begin{center}
44
+ **\Large{Master Thesis}** \end{center}
45
+
46
+
47
+ \begin{center}
48
+ written by
49
+ \end{center}
50
+
51
+ \begin{center}
52
+ \large{\textbf{Javed Arshad Butt}} \\
53
+
54
+ \large{5027847} \\
55
+ \end{center}
56
+
57
+ \begin{center}
58
+ \large{born on 20.05.1996 in Gujrat}
59
+ \end{center}
60
+
61
+ \vspace{3cm}
62
+ \begin{center}
63
+ \begin{tabular}{lll}
64
+ \textbf{Submission date:} & & 29.04.2022\\
65
+ \textbf{Supervisor :} & & Dr. Richard Semaan \\
66
+ \textbf{Examiner :} & & Prof. Dr.-Ing. R. Radespiel\\
67
+
68
+
69
+ \end{tabular}
70
+ \end{center}
71
+
72
+ \end{tabular}
73
+ \end{center}
74
+ %Damit die erste Seite = Deckblatt nicht nummeriert wird.
75
+ \thispagestyle{empty}
76
+
77
+
78
+
79
+
80
+
Data/1_Writing/0_Deco/1_Erkl.qmd ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ # Declaration of independent authorship {.unnumbered}
4
+
5
+ I hereby declare that the present work, the master thesis, is solely and independently done by myself in all aspects, such as developments, code implementations, and writing of the thesis.
6
+ In addition, I confirm that I did not use any tools, materials or sources other than those explicitly specified.\newline \break
7
+
8
+ \vspace{1cm}
9
+ \noindent Full name: Javed Arshad Butt \newline \break
10
+
11
+
12
+ \noindent Date and place: 29.04.2022, Braunschweig\newline \break
13
+
14
+ \vspace{1cm}
15
+ \noindent Signature:
16
+
17
+ ![](../../2_Figures/0_Deco/signature_1.jpg){width=30% .unnumbered}
18
+
19
+ Hello Hello
20
+ here and Hello adn Hello asdn Hello asjsfjd siudfdfg Hello Hello
21
+
22
+ <!-- {{< include 4_Mast.pdf >}} -->
Data/1_Writing/0_Deco/2_1_Abstract.qmd ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Abstract {.unnumbered}
2
+
3
+ In this master thesis, a data-driven modeling technique is proposed.
4
+ It enables making predictions for general dynamic systems for unknown model parameter values or operating conditions.
5
+ The tool is denoted as \gls{cnmc}.
6
+ The most recent developed version delivered promising results for the chaotic Lorenz system [@lorenz1963deterministic].
7
+ Since, the earlier work was restricted to the application of only one dynamical system, with this contribution the first major improvement was to allow \gls{cnmc} to be utilized for any general dynamical system.
8
+ For this, \gls{cnmc} was written from scratch in a modular manner.
9
+ The limitation of the number of the dimension and the shape of the trajectory of the dynamical systems are removed.
10
+ Adding a new dynamic system was designed such that it should be as straightforward as possible.
11
+ To affirm this point, 10 dynamic systems, most of which are chaotic systems, are included by default.
12
+ To be able to run \gls{cnmc} on arbitrary dynamic systems in an automated way, a parameter study for the modal decomposition method \gls{nmf} was implemented.
13
+ However, since a single \gls{nmf} solution took up to hours, a second option was added, i.e., \gls{svd}.
14
+ With \gls{svd} the most time-consuming task could be brought to a level of seconds.
15
+ The improvements introduced, allow \gls{cnmc} to be executed on a general dynamic system on a normal computer in a reasonable time.
16
+ Furthermore, \gls{cnmc} comes with its integrated post-processor in form of HTML files to inspect the generated plots in detail.
17
+ All the parameters used in \gls{cnmc} some additional beneficial features can be controlled via one settings file.
Data/1_Writing/0_Deco/2_Thanks.qmd ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Acknowledgments {.unnumbered}
3
+
4
+ All praise and thanks to the **ONE**, Who does neither need my praise nor my thanks.
5
+ To the **ONE**, Who is independent of everything and everyone, but on Whom everything and everyone depends.
6
+
7
+ \vspace{1cm}
8
+ Thank you, Dr. Semaan - you provided me with the possibility to work on such a compelling and challenging topic. Even though the difficult tasks were not always pleasant, I very much appreciate the opportunity to have worked on these captivating tasks.
9
+ Thank you for the time and effort you invested in this work.
10
+ Also, thank you for the weekly English exercises and for explaining to me how to pronounce methodology correctly :D
Data/1_Writing/0_Deco/3_Used_Abbrev.log ADDED
@@ -0,0 +1,326 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ This is pdfTeX, Version 3.14159265-2.6-1.40.20 (TeX Live 2019/Debian) (preloaded format=pdflatex 2020.12.21) 25 APR 2022 13:20
2
+ entering extended mode
3
+ restricted \write18 enabled.
4
+ file:line:error style messages enabled.
5
+ %&-line parsing enabled.
6
+ **/home/jav/Schreibtisch/Uni/27_Master/3_Latex/2_Writing/1_Latex_Files/0_Deco/3_Used_Abbrev
7
+ (/home/jav/Schreibtisch/Uni/27_Master/3_Latex/2_Writing/1_Latex_Files/0_Deco/3_Used_Abbrev.tex
8
+ LaTeX2e <2020-02-02> patch level 2
9
+ L3 programming layer <2020-02-14>
10
+ /home/jav/Schreibtisch/Uni/27_Master/3_Latex/2_Writing/1_Latex_Files/0_Deco/3_Used_Abbrev.tex:9: Undefined control sequence.
11
+ l.9 \DeclareAcronym
12
+ {usa}{
13
+ The control sequence at the end of the top line
14
+ of your error message was never \def'ed. If you have
15
+ misspelled it (e.g., `\hobx'), type `I' and the correct
16
+ spelling (e.g., `I\hbox'). Otherwise just continue,
17
+ and I'll forget about whatever was undefined.
18
+
19
+
20
+ /home/jav/Schreibtisch/Uni/27_Master/3_Latex/2_Writing/1_Latex_Files/0_Deco/3_Used_Abbrev.tex:9: LaTeX Error: Missing \begin{document}.
21
+
22
+ See the LaTeX manual or LaTeX Companion for explanation.
23
+ Type H <return> for immediate help.
24
+ ...
25
+
26
+ l.9 \DeclareAcronym{u
27
+ sa}{
28
+ You're in trouble here. Try typing <return> to proceed.
29
+ If that doesn't work, type X <return> to quit.
30
+
31
+ Missing character: There is no u in font nullfont!
32
+ Missing character: There is no s in font nullfont!
33
+ Missing character: There is no a in font nullfont!
34
+ Missing character: There is no s in font nullfont!
35
+ Missing character: There is no h in font nullfont!
36
+ Missing character: There is no o in font nullfont!
37
+ Missing character: There is no r in font nullfont!
38
+ Missing character: There is no t in font nullfont!
39
+ Missing character: There is no = in font nullfont!
40
+ Missing character: There is no U in font nullfont!
41
+ Missing character: There is no S in font nullfont!
42
+ Missing character: There is no A in font nullfont!
43
+ Missing character: There is no , in font nullfont!
44
+ Missing character: There is no l in font nullfont!
45
+ Missing character: There is no o in font nullfont!
46
+ Missing character: There is no n in font nullfont!
47
+ Missing character: There is no g in font nullfont!
48
+ Missing character: There is no = in font nullfont!
49
+ Missing character: There is no U in font nullfont!
50
+ Missing character: There is no n in font nullfont!
51
+ Missing character: There is no i in font nullfont!
52
+ Missing character: There is no t in font nullfont!
53
+ Missing character: There is no e in font nullfont!
54
+ Missing character: There is no d in font nullfont!
55
+ Missing character: There is no S in font nullfont!
56
+ Missing character: There is no t in font nullfont!
57
+ Missing character: There is no a in font nullfont!
58
+ Missing character: There is no t in font nullfont!
59
+ Missing character: There is no e in font nullfont!
60
+ Missing character: There is no s in font nullfont!
61
+ Missing character: There is no o in font nullfont!
62
+ Missing character: There is no f in font nullfont!
63
+ Missing character: There is no A in font nullfont!
64
+ Missing character: There is no m in font nullfont!
65
+ Missing character: There is no e in font nullfont!
66
+ Missing character: There is no r in font nullfont!
67
+ Missing character: There is no i in font nullfont!
68
+ Missing character: There is no c in font nullfont!
69
+ Missing character: There is no a in font nullfont!
70
+ Missing character: There is no , in font nullfont!
71
+ /home/jav/Schreibtisch/Uni/27_Master/3_Latex/2_Writing/1_Latex_Files/0_Deco/3_Used_Abbrev.tex:13: Undefined control sequence.
72
+ l.13 \DeclareAcronym
73
+ {eu}{
74
+ The control sequence at the end of the top line
75
+ of your error message was never \def'ed. If you have
76
+ misspelled it (e.g., `\hobx'), type `I' and the correct
77
+ spelling (e.g., `I\hbox'). Otherwise just continue,
78
+ and I'll forget about whatever was undefined.
79
+
80
+ Missing character: There is no e in font nullfont!
81
+ Missing character: There is no u in font nullfont!
82
+ Missing character: There is no s in font nullfont!
83
+ Missing character: There is no h in font nullfont!
84
+ Missing character: There is no o in font nullfont!
85
+ Missing character: There is no r in font nullfont!
86
+ Missing character: There is no t in font nullfont!
87
+ Missing character: There is no = in font nullfont!
88
+ Missing character: There is no E in font nullfont!
89
+ Missing character: There is no U in font nullfont!
90
+ Missing character: There is no , in font nullfont!
91
+ Missing character: There is no l in font nullfont!
92
+ Missing character: There is no o in font nullfont!
93
+ Missing character: There is no n in font nullfont!
94
+ Missing character: There is no g in font nullfont!
95
+ Missing character: There is no = in font nullfont!
96
+ Missing character: There is no E in font nullfont!
97
+ Missing character: There is no u in font nullfont!
98
+ Missing character: There is no r in font nullfont!
99
+ Missing character: There is no o in font nullfont!
100
+ Missing character: There is no p in font nullfont!
101
+ Missing character: There is no e in font nullfont!
102
+ Missing character: There is no a in font nullfont!
103
+ Missing character: There is no n in font nullfont!
104
+ Missing character: There is no U in font nullfont!
105
+ Missing character: There is no n in font nullfont!
106
+ Missing character: There is no i in font nullfont!
107
+ Missing character: There is no o in font nullfont!
108
+ Missing character: There is no n in font nullfont!
109
+ Missing character: There is no , in font nullfont!
110
+ /home/jav/Schreibtisch/Uni/27_Master/3_Latex/2_Writing/1_Latex_Files/0_Deco/3_Used_Abbrev.tex:17: Undefined control sequence.
111
+ l.17 \DeclareAcronym
112
+ {ussr}{
113
+ The control sequence at the end of the top line
114
+ of your error message was never \def'ed. If you have
115
+ misspelled it (e.g., `\hobx'), type `I' and the correct
116
+ spelling (e.g., `I\hbox'). Otherwise just continue,
117
+ and I'll forget about whatever was undefined.
118
+
119
+ Missing character: There is no u in font nullfont!
120
+ Missing character: There is no s in font nullfont!
121
+ Missing character: There is no s in font nullfont!
122
+ Missing character: There is no r in font nullfont!
123
+ Missing character: There is no s in font nullfont!
124
+ Missing character: There is no h in font nullfont!
125
+ Missing character: There is no o in font nullfont!
126
+ Missing character: There is no r in font nullfont!
127
+ Missing character: There is no t in font nullfont!
128
+ Missing character: There is no = in font nullfont!
129
+ Missing character: There is no U in font nullfont!
130
+ Missing character: There is no S in font nullfont!
131
+ Missing character: There is no S in font nullfont!
132
+ Missing character: There is no R in font nullfont!
133
+ Missing character: There is no , in font nullfont!
134
+ Missing character: There is no l in font nullfont!
135
+ Missing character: There is no o in font nullfont!
136
+ Missing character: There is no n in font nullfont!
137
+ Missing character: There is no g in font nullfont!
138
+ Missing character: There is no = in font nullfont!
139
+ Missing character: There is no U in font nullfont!
140
+ Missing character: There is no n in font nullfont!
141
+ Missing character: There is no i in font nullfont!
142
+ Missing character: There is no o in font nullfont!
143
+ Missing character: There is no n in font nullfont!
144
+ Missing character: There is no o in font nullfont!
145
+ Missing character: There is no f in font nullfont!
146
+ Missing character: There is no S in font nullfont!
147
+ Missing character: There is no o in font nullfont!
148
+ Missing character: There is no v in font nullfont!
149
+ Missing character: There is no i in font nullfont!
150
+ Missing character: There is no e in font nullfont!
151
+ Missing character: There is no t in font nullfont!
152
+ Missing character: There is no S in font nullfont!
153
+ Missing character: There is no o in font nullfont!
154
+ Missing character: There is no c in font nullfont!
155
+ Missing character: There is no i in font nullfont!
156
+ Missing character: There is no a in font nullfont!
157
+ Missing character: There is no l in font nullfont!
158
+ Missing character: There is no i in font nullfont!
159
+ Missing character: There is no s in font nullfont!
160
+ Missing character: There is no t in font nullfont!
161
+ Missing character: There is no R in font nullfont!
162
+ Missing character: There is no e in font nullfont!
163
+ Missing character: There is no p in font nullfont!
164
+ Missing character: There is no u in font nullfont!
165
+ Missing character: There is no b in font nullfont!
166
+ Missing character: There is no l in font nullfont!
167
+ Missing character: There is no i in font nullfont!
168
+ Missing character: There is no c in font nullfont!
169
+ Missing character: There is no s in font nullfont!
170
+ Missing character: There is no , in font nullfont!
171
+
172
+ Overfull \hbox (20.0pt too wide) in paragraph at lines 9--21
173
+ []
174
+ []
175
+
176
+ /home/jav/Schreibtisch/Uni/27_Master/3_Latex/2_Writing/1_Latex_Files/0_Deco/3_Used_Abbrev.tex:23: Undefined control sequence.
177
+ l.23 \ac
178
+ {usa}, \ac{usa}
179
+ The control sequence at the end of the top line
180
+ of your error message was never \def'ed. If you have
181
+ misspelled it (e.g., `\hobx'), type `I' and the correct
182
+ spelling (e.g., `I\hbox'). Otherwise just continue,
183
+ and I'll forget about whatever was undefined.
184
+
185
+
186
+ /home/jav/Schreibtisch/Uni/27_Master/3_Latex/2_Writing/1_Latex_Files/0_Deco/3_Used_Abbrev.tex:23: LaTeX Error: Missing \begin{document}.
187
+
188
+ See the LaTeX manual or LaTeX Companion for explanation.
189
+ Type H <return> for immediate help.
190
+ ...
191
+
192
+ l.23 \ac{u
193
+ sa}, \ac{usa}
194
+ You're in trouble here. Try typing <return> to proceed.
195
+ If that doesn't work, type X <return> to quit.
196
+
197
+ Missing character: There is no u in font nullfont!
198
+ Missing character: There is no s in font nullfont!
199
+ Missing character: There is no a in font nullfont!
200
+ Missing character: There is no , in font nullfont!
201
+ /home/jav/Schreibtisch/Uni/27_Master/3_Latex/2_Writing/1_Latex_Files/0_Deco/3_Used_Abbrev.tex:23: Undefined control sequence.
202
+ l.23 \ac{usa}, \ac
203
+ {usa}
204
+ The control sequence at the end of the top line
205
+ of your error message was never \def'ed. If you have
206
+ misspelled it (e.g., `\hobx'), type `I' and the correct
207
+ spelling (e.g., `I\hbox'). Otherwise just continue,
208
+ and I'll forget about whatever was undefined.
209
+
210
+ Missing character: There is no u in font nullfont!
211
+ Missing character: There is no s in font nullfont!
212
+ Missing character: There is no a in font nullfont!
213
+
214
+ Overfull \hbox (20.0pt too wide) in paragraph at lines 23--24
215
+ []
216
+ []
217
+
218
+ /home/jav/Schreibtisch/Uni/27_Master/3_Latex/2_Writing/1_Latex_Files/0_Deco/3_Used_Abbrev.tex:25: Undefined control sequence.
219
+ l.25 \ac
220
+ {eu}, \ac{eu}
221
+ The control sequence at the end of the top line
222
+ of your error message was never \def'ed. If you have
223
+ misspelled it (e.g., `\hobx'), type `I' and the correct
224
+ spelling (e.g., `I\hbox'). Otherwise just continue,
225
+ and I'll forget about whatever was undefined.
226
+
227
+
228
+ /home/jav/Schreibtisch/Uni/27_Master/3_Latex/2_Writing/1_Latex_Files/0_Deco/3_Used_Abbrev.tex:25: LaTeX Error: Missing \begin{document}.
229
+
230
+ See the LaTeX manual or LaTeX Companion for explanation.
231
+ Type H <return> for immediate help.
232
+ ...
233
+
234
+ l.25 \ac{e
235
+ u}, \ac{eu}
236
+ You're in trouble here. Try typing <return> to proceed.
237
+ If that doesn't work, type X <return> to quit.
238
+
239
+ Missing character: There is no e in font nullfont!
240
+ Missing character: There is no u in font nullfont!
241
+ Missing character: There is no , in font nullfont!
242
+ /home/jav/Schreibtisch/Uni/27_Master/3_Latex/2_Writing/1_Latex_Files/0_Deco/3_Used_Abbrev.tex:25: Undefined control sequence.
243
+ l.25 \ac{eu}, \ac
244
+ {eu}
245
+ The control sequence at the end of the top line
246
+ of your error message was never \def'ed. If you have
247
+ misspelled it (e.g., `\hobx'), type `I' and the correct
248
+ spelling (e.g., `I\hbox'). Otherwise just continue,
249
+ and I'll forget about whatever was undefined.
250
+
251
+ Missing character: There is no e in font nullfont!
252
+ Missing character: There is no u in font nullfont!
253
+
254
+ Overfull \hbox (20.0pt too wide) in paragraph at lines 25--26
255
+ []
256
+ []
257
+
258
+ /home/jav/Schreibtisch/Uni/27_Master/3_Latex/2_Writing/1_Latex_Files/0_Deco/3_Used_Abbrev.tex:27: Undefined control sequence.
259
+ l.27 \ac
260
+ {ussr}, \ac{ussr}
261
+ The control sequence at the end of the top line
262
+ of your error message was never \def'ed. If you have
263
+ misspelled it (e.g., `\hobx'), type `I' and the correct
264
+ spelling (e.g., `I\hbox'). Otherwise just continue,
265
+ and I'll forget about whatever was undefined.
266
+
267
+
268
+ /home/jav/Schreibtisch/Uni/27_Master/3_Latex/2_Writing/1_Latex_Files/0_Deco/3_Used_Abbrev.tex:27: LaTeX Error: Missing \begin{document}.
269
+
270
+ See the LaTeX manual or LaTeX Companion for explanation.
271
+ Type H <return> for immediate help.
272
+ ...
273
+
274
+ l.27 \ac{u
275
+ ssr}, \ac{ussr}
276
+ You're in trouble here. Try typing <return> to proceed.
277
+ If that doesn't work, type X <return> to quit.
278
+
279
+ Missing character: There is no u in font nullfont!
280
+ Missing character: There is no s in font nullfont!
281
+ Missing character: There is no s in font nullfont!
282
+ Missing character: There is no r in font nullfont!
283
+ Missing character: There is no , in font nullfont!
284
+ /home/jav/Schreibtisch/Uni/27_Master/3_Latex/2_Writing/1_Latex_Files/0_Deco/3_Used_Abbrev.tex:27: Undefined control sequence.
285
+ l.27 \ac{ussr}, \ac
286
+ {ussr}
287
+ The control sequence at the end of the top line
288
+ of your error message was never \def'ed. If you have
289
+ misspelled it (e.g., `\hobx'), type `I' and the correct
290
+ spelling (e.g., `I\hbox'). Otherwise just continue,
291
+ and I'll forget about whatever was undefined.
292
+
293
+ Missing character: There is no u in font nullfont!
294
+ Missing character: There is no s in font nullfont!
295
+ Missing character: There is no s in font nullfont!
296
+ Missing character: There is no r in font nullfont!
297
+
298
+ Overfull \hbox (20.0pt too wide) in paragraph at lines 27--28
299
+ []
300
+ []
301
+
302
+ /home/jav/Schreibtisch/Uni/27_Master/3_Latex/2_Writing/1_Latex_Files/0_Deco/3_Used_Abbrev.tex:29: Undefined control sequence.
303
+ l.29 \printacronyms
304
+
305
+ The control sequence at the end of the top line
306
+ of your error message was never \def'ed. If you have
307
+ misspelled it (e.g., `\hobx'), type `I' and the correct
308
+ spelling (e.g., `I\hbox'). Otherwise just continue,
309
+ and I'll forget about whatever was undefined.
310
+
311
+ )
312
+ ! Emergency stop.
313
+ <*> ..._Writing/1_Latex_Files/0_Deco/3_Used_Abbrev
314
+
315
+ *** (job aborted, no legal \end found)
316
+
317
+
318
+ Here is how much of TeX's memory you used:
319
+ 17 strings out of 481239
320
+ 639 string characters out of 5920376
321
+ 236564 words of memory out of 5000000
322
+ 15384 multiletter control sequences out of 15000+600000
323
+ 532338 words of font info for 24 fonts, out of 8000000 for 9000
324
+ 1141 hyphenation exceptions out of 8191
325
+ 12i,0n,15p,161b,16s stack positions out of 5000i,500n,10000p,200000b,80000s
326
+ ! ==> Fatal error occurred, no output PDF file produced!
Data/1_Writing/0_Deco/3_Used_Abbrev.qmd ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Abbreviations {.unnumbered}
2
+
3
+ [ODE]{#ode}
4
+ : &emsp; <b>O</b>rdinary <b>D</b>ifferential <b>E</b>quation
5
+
6
+ [CNM]{#cnm}
7
+ : &emsp; <b>C</b>luster-based <b>N</b>etwork <b>M</b>odeling
8
+
9
+ [<i>CNMc</i>]{#cnmc}
10
+ : &emsp; <b>c</b>ontrol-oriented <b>C</b>luster-based <b>N</b>etwork <b>M</b>odeling
11
+
12
+ [CMM]{#cmm}
13
+ : &emsp; <b>C</b>luster <b>M</b>arkov-based <b>M</b>odeling
14
+
15
+ [CFD]{#cfd}
16
+ : &emsp; <b>C</b>omputational <b>F</b>luid <b>D</b>ynamics
17
+
18
+ [RANS]{#rans}
19
+ : &emsp; <b>R</b>eynolds <b>A</b>veraged <b>N</b>avier <b>S</b>tockes
20
+
21
+ [DLR]{#dlr}
22
+ : &emsp; German Aerospace Center
23
+
24
+ [GPU]{#gpu}
25
+ : &emsp; <b>G</b>raphics <b>P</b>rocessing <b>U</b>nit
26
+
27
+ [CPU]{#cpu}
28
+ : &emsp; <b>C</b>omputer <b>P</b>rocessing <b>U</b>nit
29
+
30
+ [SDIC]{#sdic}
31
+ : &emsp; <b>S</b>ensitive <b>D</b>ependence on <b>I</b>nitial <b>C</b>onditions
32
+
33
+ [NMF]{#nmf}
34
+ : &emsp; <b>N</b>on-negative <b>M</b>atrix <b>F</b>actorization
35
+
36
+ [SVD]{#svd}
37
+ : &emsp; <b>S</b>ingular <b>V</b>alue <b>D</b>ecomposition
38
+
39
+ [RF]{#rf}
40
+ : &emsp; <b>R</b>andom <b>F</b>orest
41
+
42
+ [CPD]{#cpd}
43
+ : &emsp; <b>C</b>luster <b>P</b>robability <b>D</b>istribution
44
+
45
+ [CPE]{#cpe}
46
+ : &emsp; <b>C</b>entroid <b>P</b>osition <b>E</b>volution
47
+
48
+ [DTW]{#dtw}
49
+ : &emsp; <b>D</b>ynamical <b>T</b>ime <b>W</b>arping
50
+
51
+ [KNN]{#knn}
52
+ : &emsp; <b>KN</b>earest <b>N</b>eighbor
53
+
54
+
55
+ \newacronym{ode}{ODE}{\glstextformat{\textbf{O}}rdinary \glstextformat{\textbf{D}}ifferential \glstextformat{\textbf{E}}quation}
56
+
57
+ \newacronym{cnm}{CNM}{\glstextformat{\textbf{C}}luster-based \glstextformat{\textbf{N}}etwork \glstextformat{\textbf{M}}odeling}
58
+
59
+ \newacronym{cnmc}{\glstextformat{\emph{CNMc}}}{\glstextformat{\textbf{c}}ontrol-oriented \glstextformat{\textbf{C}}luster-based \glstextformat{\textbf{N}}etwork \glstextformat{\textbf{M}}odeling}
60
+
61
+ \newacronym[]{cmm}{CMM}{\glstextformat{\textbf{C}}luster \glstextformat{\textbf{M}}arkov-based \glstextformat{\textbf{M}}odeling}
62
+
63
+ \newacronym{cfd}{CFD}{\glstextformat{\textbf{C}}omputational \glstextformat{\textbf{F}}luid \glstextformat{\textbf{D}}ynamics}
64
+
65
+ \newacronym{rans}{RANS}{\glstextformat{\textbf{R}}eynolds \glstextformat{\textbf{A}}veraged \glstextformat{\textbf{N}}avier \glstextformat{\textbf{S}}tockes}
66
+
67
+ \newacronym{dlr}{DLR}{German Aerospace Center}
68
+
69
+ \newacronym{gpu}{GPU}{\glstextformat{\textbf{G}}raphics \glstextformat{\textbf{P}}rocessing \glstextformat{\textbf{U}}nit}
70
+
71
+ \newacronym{cpu}{CPU}{\glstextformat{\textbf{C}}omputer \glstextformat{\textbf{P}}rocessing \glstextformat{\textbf{U}}nit}
72
+
73
+ \newacronym[]{sdic}{SDIC}{\glstextformat{\textbf{S}}ensitive \glstextformat{\textbf{D}}ependence on \glstextformat{\textbf{I}}nitial \glstextformat{\textbf{C}}onditions}
74
+
75
+ \newacronym[]{nmf}{NMF}{\glstextformat{\textbf{N}}on-negative \glstextformat{\textbf{M}}atrix \glstextformat{\textbf{F}}actorization}
76
+
77
+ \newacronym[]{svd}{SVD}{\glstextformat{\textbf{S}}ingular \glstextformat{\textbf{V}}alue \glstextformat{\textbf{D}}ecomposition}
78
+
79
+ \newacronym[]{rf}{RF}{\glstextformat{\textbf{R}}andom \glstextformat{\textbf{F}}orest}
80
+
81
+ \newacronym[]{cpd}{CPD}{\glstextformat{\textbf{C}}luster \glstextformat{\textbf{P}}robability \glstextformat{\textbf{D}}istribution}
82
+
83
+ \newacronym[]{cpevol}{CPE}{\glstextformat{\textbf{C}}entroid \glstextformat{\textbf{P}}osition \glstextformat{\textbf{E}}volution}
84
+
85
+
86
+ \newacronym[]{dtw}{DTW}{\glstextformat{\textbf{D}}ynamical \glstextformat{\textbf{T}}ime \glstextformat{\textbf{W}}arping}
87
+
88
+ \newacronym[]{knn}{KNN}{\glstextformat{\textbf{K}-\textbf{N}}earest \glstextformat{\textbf{N}}eighbor}
89
+
Data/1_Writing/1_Task/1_Introduction.qmd ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ # Introduction {#sec-chap_1_Intro}
4
+ In this work, a tool called \glsfirst{cnmc} is further developed.
5
+ The overall goal, in very brief terms, is to generate a model, which is able to
6
+ predict the trajectories of general dynamical systems. The model
7
+ shall be capable of predicting the trajectories when a model parameter
8
+ value is changed.
9
+ Some basics about dynamical systems are covered in
10
+ subsection [-@sec-subsec_1_1_1_Principles] and in-depth explanations about \gls{cnmc} are given in
11
+ chapter [-@sec-chap_2_Methodlogy]. \newline
12
+
13
+ However, for a short and broad introduction to \gls{cnmc} the workflow depicted in figure @fig-fig_1_CNMC_Workflow shall be highlighted.
14
+ The input it receives is data of a dynamical system or space state vectors for a range of model parameter values. The two main important outcomes are some accuracy measurements and the predicted trajectory for each desired model parameter value.
15
+ Any inexperienced user may only have a look at the predicted trajectories to
16
+ quickly decide visually whether the prediction matches the trained data. Since \gls{cnmc} is written in a modular manner, meaning it can be regarded as
17
+ a black-box function, it can easily be integrated into other existing codes or
18
+ workflows. \newline
19
+
20
+ ![Broad overview: Workflow of \gls{cnmc}](../../3_Figs_Pyth/1_Task/1_CNMc.svg){#fig-fig_1_CNMC_Workflow}
Data/1_Writing/1_Task/2_0_Motivation.qmd ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- % ============================================================================== -->
2
+ <!-- % ==================== Motivation ============================================== -->
3
+ <!-- % ============================================================================== -->
4
+ ## Motivation {#sec-sec_Motivation}
5
+ \gls{cfd} is an
6
+ indispensable technique, when aimed to obtain information about aerodynamic properties, such
7
+ as drag and lift distributions. Modern \gls{cfd} solvers, such as \gls{dlr}'s *TAU*
8
+ [@Langer2014] often solves
9
+ the \gls{rans} equations to obtain one flow-field. Advanced solvers like *TAU* apply advanced
10
+ mathematical knowledge to speed up calculations and
11
+ heavily exploit multiple \glspl{cpu} in an optimized manner. Nevertheless,
12
+ depending on the size of the object and accuracy demands or in other terms mesh grid size, the computation often is not economically
13
+ efficient enough. If the object for which a flow field is desired is a full aircraft, then even with a big cluster and making use of symmetry properties of the shape of the airplane, if such exists, the computation of one single
14
+ flow field can still easily cost one or even multiple months in computation time. \newline
15
+
16
+ In modern science, there is a trend towards relying on \glspl{gpu} instead of \glspl{cpu}. Graphic cards possess much
17
+ more cores than a CPU. However, even with the utilization of \glspl{gpu} and GPU-optimized \gls{cfd} solvers, the computation is still very expensive. Not only in time but also
18
+ in electricity costs.
19
+ Running calculations on a cluster for multiple months is such expensive that wind tunnel measurements can be considered to be the economically more
20
+ efficient choice to make.
21
+ Regarding accuracy, wind tunnel measurements and \gls{cfd} simulations with state-of-the-art solvers can be considered to be
22
+ equally useful. When using \gls{cfd} solvers, there is one more thing to keep
23
+ in mind.
24
+ Each outcome is only valid for one single set of input parameters.
25
+ Within the set of input parameters, the user often is only interested
26
+ in the impact of one parameter, e.g., the angle of attack. Consequently,
27
+ wanting to capture the effect of the change of the angle of attack on the flow field,
28
+ multiple \gls{cfd} calculations need to be performed, i.e., for each desired
29
+ angle of attack.
30
+ Based on the chosen angle of attack the solver might be able to converge faster to a solution. However, the calculation time
31
+ needs to be added up for each desired angle of attack.
32
+ In terms of time and energy costs, this could again be more expensive than wind-tunnel
33
+ measurements. Wind tunnel measurements are difficult to set up, but once a
34
+ configuration is available, measuring flow field properties with it, in general, is known to be faster and easier than running \gls{cfd} simulations.\newline
35
+
36
+ <!--% -------------------------------------------------------------------------------->
37
+ Within the scope of this work, a data-driven tool was developed that allows predictions for dynamic systems.
38
+ In [@Max2021] the first version of it showed promising results.
39
+ However, it was dedicated to the solution of one single dynamical system, i.e., the Lorenz system [@lorenz1963deterministic].
40
+ Due to the focus on one singular dynamical system, the proposed \glsfirst{cnmc} was not verified for other dynamical systems.
41
+ Hence, one of the major goals of this thesis is to enable \gls{cnmc} to be applied to any general dynamical system.
42
+ For this, it is important to state that because of two main reasons \gls{cnmc} was not built upon the first version of \gls{cnmc}, but written from scratch.
43
+ First, since the initial version of \gls{cnmc} was designed for only a single dynamic system, extending it to a general \gls{cnmc} was considered more time-consuming than starting fresh.
44
+ Second, not all parts of the initial version of \gls{cnmc} could be executed without errors.
45
+ The current \gls{cnmc} is therefore developed in a modular manner, i.e., on the one hand, the implementation of any other dynamical system is straightforward.
46
+ To exemplify this, 10 different dynamic systems are available by default, so new dynamic systems can be added analogously.\newline
47
+
48
+ The second important aspect for allowing \gls{cnmc} to be utilized in any general dynamical system is the removal of the two limitations.
49
+ In the first version of \gls{cnmc} the behavior of the dynamical systems had to be circular as, e.g., the ears of the Lorenz system [@lorenz1963deterministic] are.
50
+ Next, its dimensionality must be strictly 3-dimensional.
51
+ Neither is a general dynamical system is not bound to exhibit a circular motion nor to be 3-dimensional.
52
+ By removing these two limitations \gls{cnmc} can be leveraged on any dynamical system.
53
+ However, the first version of \gls{cnmc} employed \glsfirst{nmf} as the modal decomposition method.
54
+ The exploited \gls{nmf} algorithm is highly computationally intensive, which makes a universal \gls{cnmc} application economically inefficient.
55
+ Therefore, the current \gls{cnmc} has been extended by the option to choose between the \gls{nmf} and the newly implemented \glsfirst{svd}.
56
+ The aim is not only that \gls{cnmc} is returning results within an acceptable timescale, but also to ensure that the quality of the modal decomposition remains at least at an equal level.
57
+ Proofs for the latter can be found in section [-@sec-sec_3_3_SVD_NMF].\newline
58
+
59
+ With these modifications, the current \gls{cnmc} is now able to be used in any dynamical system within a feasible time frame.
60
+ The next addressed issue is the B-spline interpolation.
61
+ It is used in the propagation step of \glsfirst{cnm} [@Fernex2021] to smooth the predicted trajectory.
62
+ However, as already noted in [@Max2021], when the number of the clustering centroids $K$ is $K \gtrapprox 15$, the B-spline interpolation embeds oscillations with unacceptable high deviations from the original trajectories.
63
+ To resolve this problem, the B-spline interpolation is replaced with linear interpolation.
64
+ By preventing the occurrence of outliers caused by the B-spline interpolation, neither the autocorrelation defined in subsection [-@sec-subsec_1_1_3_first_CNMc] nor the predicted trajectories are made impractical .
65
+ Apart from the main ability of \gls{cnmc} a high number of additional features are available, e.g., the entire pipeline of \gls{cnmc} with all its parameters can be adjusted via one file (*settings.py*), an incorporated log file, storing results at desired steps, the ability to execute multiple dynamical models consequentially and activating and disabling each step of \gls{cnmc}.
66
+ The latter is particularly designed for saving computational time.
67
+ Also, \gls{cnmc} comes with its own post-processor.
68
+ It is optional to generate and save the plots.
69
+ However, in the case of utilizing this feature, the plots are available as HTML files which, e.g., allow extracting further information about the outcome or rotating and zooming in 3d plots.
70
+
71
+
Data/1_Writing/1_Task/2_State_Of_Art.qmd ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ <!-- % =================================================== -->
3
+ <!-- % ==================== STATE OF THE ART ============= -->
4
+ <!-- % =================================================== -->
5
+
6
+ ## State of the art {#sec-sec_1_1_State}
7
+
8
+ The desire to get fast \gls{cfd} output is not new and also
9
+ a data-driven approach is found in the literature.
10
+ This section aims to describe some evolutionary steps of \glsfirst{cnmc}. Given that this work is built upon the most recent advancements,
11
+ they will be explained in particular detail.
12
+ Whereas the remaining development stages are briefly
13
+ summarized to mainly clarify the differences and
14
+ mention the reasons why improvements were desired. Since, this topic
15
+ demands some prior knowledge to follow \gls{cnmc}'s workflow and goal, some basic principles about important topics shall be given in their subsection.\newline
16
+
17
+ The first data-driven approach, which is known to the author,
18
+ is by [@Kaiser2014] and shall be called \gls{cmm}.
19
+ \gls{cnmc} is not directly built upon \gls{cmm} but on the latest version
20
+ of \gls{cnm} and is described in [@Fernex2021].
21
+ \gls{cnmc} invokes \gls{cnm} many times in order to use
22
+ its outcome for further progress. Therefore, it's evident that only if \gls{cnm} is understood, CNMc's
23
+ progress can be followed. \gls{cmm} on the other hand has only a historical link to \gls{cnmc}, but no line of code of \gls{cmm} is invoked in \gls{cnmc}'s workflow. Consequently, \gls{cnm} will be explained in more detail than \gls{cmm}.
24
+
25
+ ### Principles {#sec-subsec_1_1_1_Principles}
26
+ CNM [@Fernex2021] is a method that uses some machine learning
27
+ techniques, graphs, and probability theory to mirror the behavior of
28
+ complex systems. These complex systems are described often by dynamical systems, which themselves are simply a set of
29
+ differential equations. Differential equations are useful to
30
+ capture motion. Thus, a dynamical system can be seen as a synonym for motion
31
+ over time. Some differential equations can be
32
+ solved in closed form, meaning analytically. However, for most of them
33
+ either it is too difficult to obtain an analytical solution or the
34
+ analytical solution is very unhandy or unknown. Unhandy in terms of the solution
35
+ being expressed in too many terms. Therefore, in most
36
+ cases, differential equations are solved numerically. Since
37
+ the purpose of \gls{cnm} is not to be only used for analytically
38
+ solvable equations, a numerical ordinary differential integrator
39
+ is used. \newline
40
+
41
+ The default solver is *SciPy*'s *RK45* solver.
42
+ It is a widely deployed solver and can also be applied to
43
+ chaotic systems for integration
44
+ over a certain amount of time.
45
+ Another option for solving chaotic \gls{ode}s is
46
+ *LSODA*. The developers of *pySindy* [@Silva2020; @Kaptanoglu2022]
47
+ state on their homepage [@pysindy_Home] that
48
+ *LSODA* even outperforms the default *RK45* when it comes to chaotic dynamical systems. The reasons why for \gls{cnmc} still *RK45* was chosen will be given in
49
+ section
50
+ [-@sec-sec_2_2_Data_Gen].
51
+ It is important to remember that turbulent flows are chaotic.
52
+ This is the main reason why in this work \gls{cnmc}, has been designed to handle not only general dynamical systems but also general chaotic attractors.
53
+ Other well-known instances where chaos is found are, e.g., the weather, the
54
+ motion of planets and also the financial market is believed to be chaotic.
55
+ For more places, where chaos is found the reader is referred to [@Argyris2017].\newline
56
+
57
+ Note that \gls{cnmc} is designed for all kinds of dynamical systems, it is not restricted to linear, nonlinear or chaotic systems.
58
+ Therefore, chaotic systems shall be recorded to be only one application example of \gls{cnmc}.
59
+ However, because chaotic attractors were primarily exploited in the context of the performed investigations in this work, a slightly lengthier introduction to chaotic systems is provided in the appendix [-@sec-ch_Ap_Chaotic].
60
+ Two terms that will be used extensively over this entire thesis are called model parameter value $\beta$ and a range of model parameter values $\vec{\beta}$. A regular differential equation can be expressed as
61
+ in equation @eq-eq_1_0_DGL, where $F$ is denoted as the function which describes the dynamical system.
62
+ The vector $\vec{x}(t)$ is the state vector.
63
+ The form in which differential equations are viewed in this work is given in equation @eq-eq_1_1_MPV .
64
+
65
+ $$
66
+ \begin{equation}
67
+ F = \dot{\vec{x}}(t) = \frac{\vec{x}(t)}{dt} = f(\vec{x}(t))
68
+ \label{eq_1_0_DGL}
69
+ \end{equation}
70
+ $$ {#eq-eq_1_0_DGL}
71
+
72
+ $$
73
+ \begin{equation}
74
+ F_{CNMc} = \left(\dot{\vec{x}}(t), \, \vec{\beta} \right) =
75
+ \left( \frac{\vec{x}(t)}{dt}, \, \vec{\beta} \right) =
76
+ f(\vec{x}(t), \, \vec{\beta} )
77
+ \label{eq_1_1_MPV}
78
+ \end{equation}
79
+ $$ {#eq-eq_1_1_MPV}
80
+
81
+ Note the vector $\vec{\beta}$ indicates a range of model parameter values, i.e., the differential equation is solved for each model parameter value $\beta$ separately.
82
+ The model parameter value $\beta$ is a constant and does not depend on the time, but rather it is a user-defined value.
83
+ In other terms, it remains unchanged over the entire timeline for which the dynamical system is solved.
84
+ The difference between $F$ and $F_{CNMc}$ is that $F$ is the differential equation for only one $\beta$, while $F_{CNMc}$ can be considered as the same differential equation, however, solved, for a range of individual $\beta$ values.
85
+ The subscript \gls{cnmc} stresses that fact that \gls{cnmc} is performed for a range of model parameter values $\vec{\beta}$.
86
+ Some dynamical systems, which will be used for \gls{cnmc}'s validation can be found in section [-@sec-sec_2_2_Data_Gen]. They are written as a set of differential equations in the $\beta$ dependent form.
87
+ Even a tiny change in $\beta$ can result in the emergence of an entirely different trajectory. \newline
88
+ <!-- % The behavior could exhibit such strong alterations, such -->
89
+ <!-- % that one might believe to require new underlying differential equations. -->
90
+ <!-- % These heavy transitions are called bifurcations.\newline -->
91
+
92
+ <!-- % Although bifurcations -->
93
+ <!-- % cause the trajectory to vary seemingly arbitrary, -->
94
+ <!-- % there exist canonical bifurcation types. Explanations for -->
95
+ <!-- % deriving their equations and visualization are well covered in literature -->
96
+ <!-- % and can be found, -->
97
+ <!-- % e.g., in \cite{Argyris2017,Kutz2022,Strogatz2019}. Although a detailed coverage of bifurcations is not feasible within the scope of this thesis, the method of how the trajectory is changed in such a significant way shall be outlined. -->
98
+ <!--% Namely, bifurcations can replace, remove and generate new attractors, e.g., the above introduced fix-point, limit cycle and torus attractor. -->
99
+ <!--% Bifurcations were mentioned here only for the sake of completeness. Indeed, one of the final goals for \gls{cnmc} is the extension to handle bifurcations. However, the latter is not part of this thesis.\newline -->
100
+
101
+ In summary, the following key aspects can be concluded. The reason why \gls{cnmc} in future releases is believed to be able to manage real \gls{cfd} fluid flow data and make predictions for unknown model parameter values $\beta$ is that turbulent flows are chaotic. Thus, allowing \gls{cnmc} to work with chaotic attractors in the course of this thesis is considered to be the first step toward predicting entire flow fields.
102
+ <!--% The second point is that there is no real unified definition of chaos, but there are some aspects that are more prevalent in the literature.-->
103
+
104
+
Data/1_Writing/1_Task/3_CNM.qmd ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- % ================================================= -->
2
+ <!-- % ================ Meet \gls{cnm} ======================= -->
3
+ <!-- % ================================================= -->
4
+ ## Cluster-based Network Modeling (CNM) {#sec-sec_1_1_2_CNM}
5
+ In this subsection, the workflow of \gls{cnm} [@Fernex2021] will be elaborated, as well as the previous attempt to expand the algorithm to accommodate a range of model parameter values $\vec{\beta}$.
6
+ \gls{cnm} [@Fernex2021] is the basis on which \gls{cnmc} is built or rather
7
+ \gls{cnmc} invokes \gls{cnm} multiple times for one of its preprocessing steps.
8
+ CNM can be split up into 4 main tasks, which are
9
+ data collection, clustering, calculating
10
+ transition properties and propagation.
11
+ The first step is to collect the data, which can be provided from any dynamic system or numerical simulations.
12
+ In this study, only dynamical systems are investigated.
13
+ Once the data for the dynamical system is passed to \gls{cnm}, the data is clustered, e.g., with k-means++ algorithm [@Arthur2006].
14
+ A detailed elaboration about this step is given in section [-@sec-sec_2_3_Clustering]. \gls{cnm} exploits graph theory for approximating the trajectory as a movement on nodes.
15
+ These nodes are equivalent to the centroids, which are acquired through clustering.
16
+ Next, the motion, i.e., movement from one centroid to another, shall be clarified.\newline
17
+
18
+ In order to fully describe the motion on the centroids, the time at which
19
+ one centroid is visited is exited, and also the order of movement must be known.
20
+ Note, when saying the motion is on the centroids, that
21
+ means the centroids or characteristic nodes do not move
22
+ at all. The entire approximated motion of the original trajectory
23
+ on the nodes is described with the transition
24
+ property matrices $\boldsymbol Q$ and $\boldsymbol T$.
25
+ The matrices $\boldsymbol Q$ and $\boldsymbol T$ are the transition probability and transition time matrices, respectively.
26
+ $\boldsymbol Q$ is used to apply probability theory for predicting the next following most likely centroid. In other words, if
27
+ the current location is at any node $c_i$,
28
+ $\boldsymbol Q$ will provide all possible successor centroids
29
+ with their corresponding transition probabilities.
30
+ Thus, the motion on the centroids
31
+ through $\boldsymbol Q$ is probability-based.
32
+ In more detail, the propagation of the motion on the centroids can be described as equation @eq-eq_34 .
33
+ The variables are denoted as the propagated $\vec{x}(t)$ trajectory, time $t$, centroid positions $\vec{c}_k,\, \vec{c}_j$, the time $t_j$ where centroid $\vec{c}_j$ is left and the transition time $T_{k,j}$ from $\vec{c}_j$ to $\vec{c}_k$ [@Fernex2021].
34
+ Furthermore, for the sake of a smooth trajectory, the motion between the centroids is interpolated through a spline interpolation.\newline
35
+ $$
36
+ \begin{equation}
37
+ \vec{x}(t) = \alpha_{kj} (t) \, \vec{c}_k + [\, 1 - \alpha_{kj} (t)\,] \, \vec{c}_j, \quad \alpha_{kj} (t) = \frac{t-t_j}{T_{k,j}}
38
+ \label{eq_34}
39
+ \end{equation}
40
+ $$ {#eq-eq_34}
41
+
42
+
43
+ The $\boldsymbol Q$ matrix only contains non-trivial transitions, i.e.,
44
+ if after a transition the centroid remains on the same centroid, the transition is not considered to be a real transition in \gls{cnm}.
45
+ This idea
46
+ is an advancement to the original work of Kaiser et al. [@Kaiser2014].
47
+ In Kaiser et al. [@Kaiser2014] the transition is modeled
48
+ as a Markov model. Markov models enable non-trivial transitions. Consequently,
49
+ the diagonals of the resulting non-direct transition matrix $\boldsymbol{Q_n}$
50
+ exhibits the highest values. The diagonal elements stand for non-trivial
51
+ transitions which lead to idling on the same centroid
52
+ many times. Such behavior is encountered and described by Kaiser et al. [@Kaiser2014].\newline
53
+
54
+
55
+ There are 3 more important aspects that come along when
56
+ adhering to Markov models. First, the propagation of motion is done
57
+ by matrix-vector multiplication. In the case of the existence of a
58
+ stationary state, the solution
59
+ will converge to the stationary state, with an increasing number of iterations, where no change with time happens.
60
+ A dynamical system can only survive as long as change with time exists.
61
+ In cases where no change with respect to time is encountered, equilibrium
62
+ or fixed points are found.
63
+ Now, if a stationary state or fixed point
64
+ exists in the considered dynamical system, the propagation
65
+ will tend to converge to this fixed point. However, the nature of
66
+ Markov models must not necessarily be valid for general dynamical systems.
67
+ Another way to see that is by applying some linear algebra. The
68
+ long-term behavior of the Markov transition matrix can be obtained
69
+ with equation @eq-eq_3_Infinite . Here, $l$ is the number
70
+ of iterations to get from one stage to another. Kaiser et al.
71
+ [@Kaiser2014] depict in a figure, how the values of
72
+ $\boldsymbol{Q_n}$ evolves after $1 \mathrm{e}{+3}$ steps. $\boldsymbol{Q_n}$ has
73
+ become more uniform.
74
+ $$
75
+ \begin{equation}
76
+ \label{eq_3_Infinite}
77
+ \lim\limits_{l \to \infty} \boldsymbol{Q_n}^l
78
+ \end{equation}
79
+ $$ {#eq-eq_3_Infinite}
80
+
81
+ If the number of steps is increased even further
82
+ and all the rows would have the same probability value,
83
+ $\boldsymbol{Q_n}$ would converge to a stationary point. What
84
+ also can be concluded from rows being equal is that it does not matter
85
+ from where the dynamical system was started or what its
86
+ initial conditions were. The probability
87
+ to end at one specific state or centroid is constant as
88
+ the number of steps approaches infinity. Following that,
89
+ it would violate the sensitive dependency on initial conditions,
90
+ which often is considered to be mandatory for modeling chaotic systems. Moreover, chaotic
91
+ systems amplify any perturbation exponentially, whether at time
92
+ $t = 0$ or at time $t>>0$. \newline
93
+
94
+ Thus, a stationary transition matrix $\boldsymbol{Q_n}$ is prohibited by chaos at any time step.
95
+ This can be found to be one of the main reasons, why
96
+ the **C**luster **M**arkov based **M**odeling (\gls{cmm})
97
+ often fails to
98
+ predict the trajectory.
99
+ Li et al. [@Li2021] summarize this observation
100
+ compactly as after some time the initial condition
101
+ would be forgotten and the asymptotic distribution would be reached.
102
+ Further, they stated, that due to this fact, \gls{cmm} would
103
+ not be suited for modeling dynamical systems.
104
+ The second problem which is involved, when deploying
105
+ regular Markov modeling is that the future only depends
106
+ on the current state. However, [@Fernex2021] has shown
107
+ with the latest \gls{cnm} version that incorporating also past
108
+ centroid positions for predicting the next centroid position
109
+ increases the prediction quality. The latter effect is especially
110
+ true when systems are complex.\newline
111
+
112
+
113
+ However, for multiple consecutive time steps
114
+ the trajectories position still could be assigned to the same
115
+ centroid position (trivial transitions).
116
+ Thus, past centroids are those centroids that are found when going
117
+ back in time through only non-trivial transitions. The number of incorporated
118
+ past centroids is given as equation @eq-eq_5_B_Past, where $L$ is denoted
119
+ as the model order number. It represents the number of all
120
+ considered centroids, where the current and all the past centroids are included, with which the prediction of the successor centroid
121
+ is made.
122
+ $$
123
+ \begin{equation}
124
+ B_{past} = L -1
125
+ \label{eq_5_B_Past}
126
+ \end{equation}
127
+ $$ {#eq-eq_5_B_Past}
128
+
129
+ Furthermore, in [@Fernex2021] it is not simply believed that an
130
+ increasing model
131
+ order $L$ would increase the outcome quality in every case.
132
+ Therefore, a study on the number of $L$ and the clusters $K$
133
+ was conducted. The results proved that the choice of
134
+ $L$ and $K$ depend on the considered dynamical system.
135
+ \newline
136
+
137
+ The third problem encountered when Markov models are used is
138
+ that the time step must be provided. This time step is used
139
+ to define when a transition is expected. In case
140
+ the time step is too small, some amount of iterations is
141
+ required to transit to the next centroid. Thus, non-trivial
142
+ transitions would occur. In case the time step is too high,
143
+ the intermediate centroids would be missed. Such behavior
144
+ would be a coarse approximation of the real dynamics. Visually this can
145
+ be thought of as jumping from one centroid to another while
146
+ having skipped one or multiple centroids. The reconstructed
147
+ trajectory could lead to an entirely wrong representation of the
148
+ state-space.
149
+ CNM generates the transition time matrix $\boldsymbol T$ from data
150
+ and therefore no input from the user is required.\newline
151
+
152
+ A brief review of how the $\boldsymbol Q$ is built shall be provided.
153
+ Since the concept of
154
+ model order, $L$ has been explained, it can be clarified that
155
+ it is not always right to call $\boldsymbol Q$ and $\boldsymbol T$ matrices.
156
+ The latter is only correct, if $L = 1$, otherwise it must be
157
+ denoted as a tensor. $\boldsymbol Q$ and $\boldsymbol T$ can always be
158
+ referred to as tensors since a tensor incorporates matrices, i.e., a matrix is a tensor of rank 2.
159
+ In order to generate $\boldsymbol Q$,
160
+ $L$ must be defined, such that the shape of $\boldsymbol Q$ is
161
+ known. The next step is to gather all sequences of clusters
162
+ $c_i$. To understand that, we imagine the following scenario,
163
+ $L = 3$, which means 2 centroids from the past and the
164
+ current one are
165
+ incorporated to predict the next centroid.
166
+ Furthermore, imagining that two cluster sequence scenarios were found,
167
+ $c_0 \rightarrow c_1 \rightarrow c_2$ and $c_5 \rightarrow c_1 \rightarrow c_2$.
168
+ These cluster sequences tell us that the current centroid is $c_2$ and the remaining centroids belong to the past.
169
+ In order to complete the sequence for $L = 3$, the successor cluster also needs
170
+ to be added, $c_0 \rightarrow c_1 \rightarrow c_2 \rightarrow c_5$ and $c_5 \rightarrow c_1 \rightarrow c_2 \rightarrow c_4$.
171
+ The following step is to calculate the likelihood
172
+ of a transition to a specific successor cluster. This is done with equation @eq-eq_4_Poss, where $n_{k, \boldsymbol{j}}$
173
+ is the amount of complete sequences, where also the successor
174
+ is found. The index $j$ is written as a vector in order
175
+ to generalize the equation for $L \ge 1$. It then contains
176
+ all incorporated centroids from the past and the current centroid.
177
+ The index $k$ represents the successor centroid ($\boldsymbol{j} \rightarrow k$).
178
+ Finally, $n_{\boldsymbol{j}}$ counts all the matching incomplete sequences.
179
+ $$
180
+ \begin{equation}
181
+ \label{eq_4_Poss}
182
+ P_{k, \boldsymbol j} = \frac{n_{k,\boldsymbol{j}}}{n_{\boldsymbol{j}}}
183
+ \end{equation}
184
+ $$ {#eq-eq_4_Poss}
185
+
186
+ After having collected all the possible complete cluster sequences with their corresponding probabilities $\boldsymbol Q$, the transition time tensors $\boldsymbol T$ can be inferred from the data.
187
+ With that, the residence time on each cluster is known and can be
188
+ used for computing the transition times for every
189
+ single transition. At this stage, it shall be highlighted again,
190
+ CNM approximates its data fully with only two
191
+ matrices or when $L \ge 2$ tensors, $\boldsymbol Q$ and $\boldsymbol T$. The
192
+ final step is the prorogation following equation @eq-eq_34 .
193
+ For smoothing the propagation between two centroids the B-spline interpolation
194
+ is applied.
195
+
196
+ <!-- % It can be concluded that one of the major differences between \gls{cnm} and \gls{cmm} is that {cnm} dismissed Markov modeling. -->
197
+ <!-- % Hence, only direct or non-trivial transition are possible. -->
198
+ <!-- % Fernex et al. [@Fernex2021] improved [@Li2021] by -->
199
+ <!-- % rejecting one more property of Markov chains, namely -->
200
+ <!-- % that the future state could be inferred exclusively from the current state. -->
201
+ <!-- % Through the upgrade of [@Fernex2021], incorporating past states for the prediction of future states could be exploited. -->
202
+
203
+ {{< include 4_CNMc.qmd >}}
Data/1_Writing/1_Task/4_CNMc.qmd ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### First version of CNMc {#sec-subsec_1_1_3_first_CNMc}
2
+ Apart from this thesis, there already has been an
3
+ attempt to build \glsfirst{cnmc}.
4
+ The procedure, progress and results of the most recent effort are described in [@Max2021].
5
+ Also, in the latter, the main idea was to predict the trajectories
6
+ for dynamical systems with a control term or a model parameter value $\beta$.
7
+ In this subsection, a review of
8
+ [@Max2021] shall be given with pointing out which parts need to be improved. In addition, some distinctions between the previous version of \gls{cnmc} and the most recent version are named.
9
+ Further applied modifications are provided in chapter [-@sec-chap_2_Methodlogy].\newline
10
+
11
+ To avoid confusion between the \gls{cnmc} version described in this thesis and the prior \gls{cnmc} version, the old version will be referred to as *first CNMc*.
12
+ *First CNMc* starts by defining a range of model parameter values
13
+ $\vec{\beta}$.
14
+ It was specifically designed to only be able to make predictions for the Lorenz attractor [@lorenz1963deterministic], which is described with the set of equations @eq-eq_6_Lorenz given in section [-@sec-sec_2_2_Data_Gen].
15
+ An illustrative trajectory is of the Lorenz system [@lorenz1963deterministic] with $\beta = 28$ is depicted in figure @fig-fig_2_Lorenz_Example .\newline
16
+
17
+ <!-- % ============================================================================== -->
18
+ <!-- % ============================ PLTS ============================================ -->
19
+ <!--% ==============================================================================-->
20
+
21
+ ![Illustrative trajectory of the Lorenz attractor [@lorenz1963deterministic], $\beta = 28$](../../3_Figs_Pyth/1_Task/2_Lorenz.svg){#fig-fig_2_Lorenz_Example}
22
+
23
+ Having chosen a range of model parameter values $\vec{\beta}$, the Lorenz system was solved numerically and its solution was supplied to \gls{cnm} in order to run k-means++ on all received trajectories.
24
+ <!-- % It assigns each data point to a cluster and -->
25
+ <!-- % calculates all the $K$ cluster centroids for all provided trajectories. -->
26
+ <!-- % Each cluster has an identity that in literature is known as a label, with which it can be accessed. -->
27
+ The centroid label allocation by the k-means+ algorithm is conducted randomly.
28
+ Thus, linking or matching centroid labels from one model parameter value $\beta_i$ to another model parameter value $\beta_j$, where $i \neq j$, is performed in 3 steps.
29
+ The first two steps are ordering the $\vec{\beta}$ in ascending
30
+ order and transforming the Cartesian coordinate system into a spherical coordinate system.
31
+ With the now available azimuth angle, each centroid is labeled in increasing order of the azimuth angle.
32
+ The third step is to match the centroids across $\vec{\beta}$, i.e., $\beta_i$ with $\beta_j$.
33
+ For this purpose, the centroid label from the prior model parameter value
34
+ is used as a reference to match its corresponding nearest centroid in the next model parameter value.
35
+ As a result, one label can be assigned to one centroid across the available $\vec{\beta}$.\newline
36
+
37
+
38
+ Firstly, [@Max2021] showed that ambiguous regions can
39
+ occur. Here the matching of the centroids across the $\vec{\beta}$ can
40
+ not be trusted anymore.
41
+ Secondly, the deployed coordinate transformation is assumed to only work properly in 3 dimensions. There is the possibility to set one
42
+ or two variables to zero in order to use it in two or one dimension, respectively.
43
+ However, it is not known, whether such an artificially decrease of dimensions yields a successful outcome for lower-dimensional (2- and 1-dimensional) dynamical systems. In the event of a 4-dimensional or even higher dimensional case, the proposed coordinate transformation cannot be used anymore.
44
+ In conclusion, the transformation is only secure to be utilized in 3 dimensions.
45
+ Thirdly, which is also acknowledged by [@Max2021] is that the
46
+ coordinate transformation forces the dynamical system to have
47
+ a circular-like trajectory, e.g., as the in figure @fig-fig_2_Lorenz_Example depicted Lorenz system does.
48
+ Since not every dynamical system is forced to have a circular-like trajectory, it is one of the major parts which needs to be improved, when *first CNMc* is meant to be leveraged for all kinds of dynamical systems.
49
+ Neither the number of dimensions nor the shape of the trajectory should matter for a generalized \gls{cnmc}.\newline
50
+
51
+
52
+ Once the centroids are matched across all the available $\vec{\beta}$ pySINDy [@Brunton2016; @Silva2020; @Kaptanoglu2022] is used
53
+ to build a regression model. This regression model serves the purpose
54
+ of capturing all centroid positions of the calculated model parameter
55
+ values $\vec{\beta }$ and making predictions for unseen $\vec{\beta}_{unseen}$.
56
+ Next, a preprocessing step is performed on the
57
+ transition property tensors $\boldsymbol Q$ and $\boldsymbol T$. Both are
58
+ scaled, such that the risk of a bias is assumed to be reduced.
59
+ Then, on both \glsfirst{nmf} [@Lee1999] is
60
+ applied.
61
+ Following equation @eq-eq_5_NMF \gls{nmf} [@Lee1999] returns
62
+ two matrices, i.e., $\boldsymbol W$ and $\boldsymbol H$.
63
+ The matrices exhibit a physically
64
+ relevant meaning. $\boldsymbol W$ corresponds to a mode collection and $\boldsymbol H$ contains
65
+ the weighting factor for each corresponding mode.\newline
66
+ $$
67
+ \begin{equation}
68
+ \label{eq_5_NMF}
69
+ \boldsymbol {A_{i \mu}} \approx \boldsymbol A^{\prime}_{i \mu} = (\boldsymbol W \boldsymbol H)_{i \mu} = \sum_{a = 1}^{r}
70
+ \boldsymbol W_{ia} \boldsymbol H_{a \mu}
71
+ \end{equation}
72
+ $$ {#eq-eq_5_NMF}
73
+
74
+ The number of modes $r$ depends on the underlying dynamical system.
75
+ Firstly, the \gls{nmf} is utilized by deploying optimization.
76
+ The goal is to satisfy the condition that, the deviation between the original matrix and the approximated matrix shall be below a chosen threshold.
77
+ For this purpose, the number of required optimization iterations easily can be
78
+ in the order of $\mathcal{O} (1 \mathrm{e}+7)$. The major drawback here is that such a high number of iterations is computationally very expensive.
79
+ Secondly, for *first CNMc* the number of modes $r$ must be known beforehand.
80
+ Since in most cases this demand cannot be fulfilled two issues arise.
81
+ On the one hand, running \gls{nmf} on a single known $r$ can already be considered to be computationally expensive.
82
+ On the other hand, conducting a study to find the appropriate $r$ involves even more computational effort.
83
+ Pierzyna [@Max2021] acknowledges this issue and defined it to be one of the major limitations. \newline
84
+
85
+
86
+ The next step is to generate a regression model with \glsfirst{rf}.
87
+ Some introductory words about \gls{rf} are given in subsection [-@sec-subsec_2_4_2_QT].
88
+ As illustrated in [@Max2021], \gls{rf} was able to reproduce the training data reasonably well.
89
+ However, it faced difficulties to approximate spike-like curves.
90
+ Once the centroid positions and the two transitions property tensors $\boldsymbol Q$ and $\boldsymbol T$ are known, they are passed to \gls{cnm} to calculate the predicted trajectories.
91
+ For assessing the prediction quality two methods are used, i.e., the autocorrelation and the \glsfirst{cpd}.
92
+ \gls{cpd} outlines the probability of being on one of the $K$ clusters.
93
+ The autocorrelation given in equation @eq-eq_35 allows comparing two trajectories with a phase-mismatch [@protas2015optimal] and it measures how well a point in trajectory correlates with a point that is some time steps ahead.
94
+ The variables in equation @eq-eq_35 are denoted as time lag $\tau$, state space vector $\boldsymbol x$, time $t$ and the inner product $(\boldsymbol x, \boldsymbol y) = \boldsymbol x \cdot \boldsymbol{y}^T$. \newline
95
+ $$
96
+ \begin{equation}
97
+ R(\tau) = \frac{1}{T - \tau} \int\limits_{0}^{T-\tau}\, (\boldsymbol{x} (t), \boldsymbol{x}(t+ \tau)) dt, \quad \tau \in [\, 0, \, T\,]
98
+ \label{eq_35}
99
+ \end{equation}
100
+ $$ {#eq-eq_35}
101
+
102
+ *First CNMc* proved to work well for the Lorenz system only for the number of centroids up to $K=10$ and small $\beta$.
103
+ Among the points which need to be improved is the method to match the centroids across the chosen $\vec{\beta}$.
104
+ Because of this, two of the major problems occur, i.e., the limitation to 3 dimensions and the behavior of the trajectory must be circular, similar to the Lorenz system [@lorenz1963deterministic].
105
+ These demands are the main obstacles to the application of *first CNMc* to all kinds of dynamical systems.
106
+ The modal decomposition with \gls{nmf} is the most computationally intensive part and should be replaced by a faster alternative.
107
+
108
+
Data/1_Writing/2_Task/0_Methodlogy.qmd ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Methodology {#sec-chap_2_Methodlogy}
2
+ In this chapter, the entire pipeline for designing the proposed
3
+ \gls{cnmc} is elaborated. For this purpose, the ideas behind
4
+ the individual processes are explained.
5
+ Results from the step tracking onwards will be presented in chapter [-@sec-ch_3].
6
+ Having said that, \gls{cnmc} consists of multiple main process steps or stages.
7
+ First, a broad overview of the \gls{cnmc}'s workflow shall be given.
8
+ Followed by a detailed explanation for each major operational step. The
9
+ implemented process stages are presented in the same order as they are
10
+ executed in \gls{cnmc}. However, \gls{cnmc} is not forced
11
+ to go through each stage. If the output of some steps is
12
+ already available, the execution of the respective steps can be skipped. \newline
13
+
14
+ The main idea behind such an implementation is to prevent computing the same task multiple times.
15
+ Computational time can be reduced if the output of some \gls{cnmc} steps are available.
16
+ Consequently, it allows users to be flexible in their explorations.
17
+ It could be the case that only one step of \textsc{CNMc} is desired to be examined with different settings or even with newly implemented functions without running the full \gls{cnmc} pipeline.
18
+ Let the one \gls{cnmc} step be denoted as C, then it is possible to skip steps A and B if their output is already calculated and thus available.
19
+ Also, the upcoming steps can be skipped or activated depending on the need for their respective outcomes.
20
+ Simply put, the mentioned flexibility enables to load data for A and B and execute only C. Executing follow-up steps or loading their data is also made selectable.
21
+ <!-- % -->
22
+ <!-- %------------------------------- SHIFT FROM INTRODUCTION ---------------------- -->
23
+ <!-- % -->
24
+ Since the tasks of this thesis required much coding,
25
+ it is important to
26
+ mention the used programming language and the dependencies.
27
+ As for the programming language,
28
+ *Python 3* [@VanRossum2009] was chosen. For the libraries, only a few important libraries will be mentioned, because the number of used libraries is high. Note, each used module is
29
+ freely available on the net and no licenses are required to be purchased.
30
+ \newline
31
+
32
+ The important libraries in terms of performing actual calculations are
33
+ *NumPy* [@harris2020array], *SciPy* [@2020SciPy-NMeth], *Scikit-learn* [@scikit-learn], *pySindy* [@Silva2020; @Kaptanoglu2022], for multi-dimensional sparse matrix management *sparse* and for plotting only *plotly* [@plotly] was deployed. One of the reason why *plotly* is preferred over *Matplotlib* [@Hunter:2007] are post-processing capabilities, which now a re available. Note, the previous *\gls{cmm*c} version used *Matplotlib* [@Hunter:2007], which in this work has been fully replaced by *plotly* [@plotly]. More reasons why this modification is useful and new implemented post-processing capabilities will be given in the upcoming sections.\newline
34
+
35
+ For local coding, the author's Linux-Mint-based laptop with the following hardware was deployed: CPU: Intel Core i7-4702MQ \gls{cpu}@ 2.20GHz × 4, RAM: 16GB.
36
+ The Institute of fluid dynamics of the Technische Universität Braunschweig
37
+ also supported this work by providing two more powerful computation resources.
38
+ The hardware specification will not be mentioned, due to the fact, that all computations and results elaborated in this thesis can be obtained by
39
+ the hardware described above (authors laptop). However, the two provided
40
+ resources shall be mentioned and explained if \gls{cnmc} benefits from
41
+ faster computers. The first bigger machine is called *Buran*, it is a
42
+ powerful Linux-based working station and access to it is directly provided by
43
+ the chair of fluid dynamics. \newline
44
+
45
+ The second resource is the high-performance
46
+ computer or cluster available across the Technische Universität Braunschweig
47
+ *Phoenix*. The first step, where the dynamical systems are solved through an \gls{ode} solver
48
+ is written in a parallel manner. This step can if specified in the *settings.py* file, be performed in parallel and thus benefits from
49
+ multiple available cores. However, most implemented \gls{ode}s are solved within
50
+ a few seconds. There are also some dynamical systems implemented whose
51
+ ODE solution can take a few minutes. Applying \gls{cnmc} on latter dynamical
52
+ systems results in solving their \gls{ode}s for multiple different model parameter values. Thus, deploying the parallelization can be advised in the latter mentioned time-consuming \gls{ode}s.\newline
53
+
54
+ By far the most time-intensive part of the improved \gls{cnmc} is the clustering step. The main computation for this step is done with
55
+ {Scikit-learn} [@scikit-learn]. It is heavily parallelized and the
56
+ computation time can be reduced drastically when multiple threads are available.
57
+ Other than that, *NumPy* and *SciPy* are well-optimized libraries and
58
+ are assumed to benefit from powerful computers. In summary, it shall be stated that a powerful machine is for sure advised when multiple dynamical
59
+ systems with a range of different settings shall be investigated since parallelization is available. Yet executing \gls{cnmc} on a single dynamical system, a regular laptop can be regarded as
60
+ a sufficient tool.
Data/1_Writing/2_Task/1_0_CNMC_Data.qmd ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ <!--%------------------------------- SHIFT FROM INTRODUCTION ------------------------>
3
+
4
+ <!-- % ===================================================================== -->
5
+ <!-- % ============= Workflow ============================================== -->
6
+ <!-- % ===================================================================== -->
7
+ ## CNMc's data and workflow {#sec-sec_2_1_Workflow}
8
+ In this section, the 5 main points that characterize \gls{cnmc} will be discussed.
9
+ Before diving directly into \gls{cnmc}'s workflow some remarks
10
+ are important to be made.
11
+ First, \gls{cnmc} is written from scratch, it is not simply an updated version of the described *first CNMc* in subsection
12
+ [-@sec-subsec_1_1_3_first_CNMc].
13
+ Therefore, the workflow described in this section for \gls{cnmc} will not match that of *first CNMc*, e.g., *first CNMc* had no concept of *settings.py* and it was not utilizing *Plotly* [@plotly] to facilitate post-processing capabilities.
14
+ The reasons for a fresh start were given in subsection [-@sec-subsec_1_1_3_first_CNMc].
15
+ However, the difficulty of running *first CNMc* and the time required to adjust *first CNMc* such that a generic dynamic system could be utilized were considered more time-consuming than starting from zero. \newline
16
+
17
+ Second, the reader is reminded to have the following in mind.
18
+ Although it is called pipeline or workflow, \gls{cnmc} is not obliged to run the whole workflow. With *settings.py* file, which will be explained below, it is possible to run only specific selected tasks.
19
+ The very broad concept of \gls{cnmc} was already provided at the beginning of chapter [-@sec-chap_1_Intro].
20
+ However, instead of providing data of dynamical systems for different model parameter values, the user defines a so-called *settings.py* file and executes \gls{cnmc}.
21
+ The outcome of \gls{cnmc} consists, very broadly, of the predicted trajectories and some accuracy measurements as depicted in figure
22
+ @fig-fig_1_CNMC_Workflow .
23
+ In the following, a more in-depth view shall be given.\newline
24
+
25
+
26
+ The extension of *settings.py* is a regular *Python* file. However, it is a dictionary, thus there is no need to acquire and have specific knowledge about *Python*.
27
+ The syntax of *Python's* dictionary is quite similar to that of the *JSON* dictionary, in that the setting name is supplied within a quote mark
28
+ and the argument is stated after a colon. In order to understand the main points of \gls{cnmc}, its main data and workflow are depicted @fig-fig_3_Workflow as an XDSM diagram [@Lambe2012].
29
+ \newline
30
+
31
+ <!-- % ============================================-->
32
+ <!-- % ================ 2nd Workflow ==============-->
33
+ <!-- % ============================================-->
34
+ <!-- NOTE Sideway figure -->
35
+ ![\gls{cnmc} general workflow overview](../../3_Figs_Pyth/2_Task/0_Workflow.svg){#fig-fig_3_Workflow}
36
+
37
+
38
+ The first action for executing \gls{cnmc} is to define *settings.py*. It contains descriptive information about the entire pipeline, e.g., which dynamical system to use, which model parameters to select for training, which for testing, which method to use for modal decomposition and mode regression.
39
+ To be precise, it contains all the configuration attributes of all the 5 main \gls{cnmc} steps and some other handy extra functions. It is written in
40
+ a very clear way such that settings to the corresponding stages of \gls{cnmc}
41
+ and the extra features can be distinguished at first glance.
42
+ First, there are separate dictionaries for each of the 5 steps to ensure that the desired settings are made where they are needed.
43
+ Second, instead of regular line breaks, multiline comment blocks with the stage names in the center are used.
44
+ Third, almost every *settings.py* attribute is explained with comments.
45
+ Fourth, there are some cases, where
46
+ a specific attribute needs to be reused in other steps.
47
+ The user is not required to adapt it manually for all its occurrences, but rather to change it only on the first occasion, where the considered function is defined.
48
+ *Python* will automatically ensure that all remaining steps receive the change correctly.
49
+ Other capabilities implemented in *settings.py* are mentioned when they are actively exploited.
50
+ In figure @fig-fig_3_Workflow it can be observed that after passing *settings.py* a so-called *Informer* and a log file are obtained.
51
+ The *Informer* is a file, which is designed to save all user-defined settings in *settings.py* for each execution of \gls{cnmc}.
52
+ Also, here the usability and readability of the output are important and have been formatted accordingly. It proves to be particularly useful when a dynamic system with different settings is to be calculated, e.g., to observe the influence of one or multiple parameters. \newline
53
+
54
+ One of the important attributes which
55
+ can be arbitrarily defined by the user in *settings.py* and thus re-found in the *Informer* is the name of the model.
56
+ In \gls{cnmc} multiple dynamical systems are implemented, which can be chosen by simply changing one attribute in *settings.py*.
57
+ Different models could be calculated with the same settings, thus this clear and fast possibility to distinguish between multiple calculations is required.
58
+ The name of the model is not only be saved in the *Informer* but it will
59
+ be used to generate a folder, where all of \gls{cnmc} output for this single
60
+ \gls{cnmc} workflow will be stored.
61
+ The latter should contribute to on the one hand that the \gls{cnmc} models can be easily distinguished from each other and on the other hand that all results of one model are obtained in a structured way.
62
+ \newline
63
+
64
+ When executing \gls{cnmc} many terminal outputs are displayed. This allows the user to be kept up to date on the current progress on the one hand and to see important results directly on the other.
65
+ In case of unsatisfying results, \gls{cnmc} could be aborted immediately, instead of having to compute the entire workflow. In other words, if a computation expensive \gls{cnmc} task shall be performed, knowing about possible issues in the first steps can
66
+ be regarded as a time-saving mechanism.
67
+ The terminal outputs are formatted to include the date, time, type of message, the message itself and the place in the code where the message can be found.
68
+ The terminal outputs are colored depending on the type of the message, e.g., green is used for successful computations.
69
+ Colored terminal outputs are applied for the sake of readability.
70
+ More relevant outputs can easily be distinguished from others.
71
+ The log file can be considered as a memory since, in it, the terminal outputs are saved.\newline
72
+
73
+ The stored terminal outputs are in the format as the terminal output described above, except that no coloring is utilized.
74
+ An instance, where the log file can be very helpful is the following. Some implemented quality measurements give very significant information about prediction reliability. Comparing different settings in terms of prediction capability would become very challenging if the terminal outputs would be lost whenever the \gls{cnmc} terminal is closed. The described *Informer* and the log file can be beneficial as explained, nevertheless, they are optional.
75
+ That is, both come as two of the extra features mentioned above and can be turned off in *settings.py*.\newline
76
+
77
+ Once *settings.py* is defined, \gls{cnmc} will filter the provided input, adapt the settings if required and send the corresponding parts to their respective steps.
78
+ The sending of the correct settings is depicted in figure @fig-fig_3_Workflow, where the abbreviation *st* stands for settings.
79
+ The second abbreviation *SOP* is found for all 5 stages and denotes storing output and plots. All the outcome is stored in a compressed form such that memory can be saved. All the plots are saved as HTML files. There are many reasons to do so, however, to state the most crucial ones. First, the HTML file can be opened on any operating system.
80
+ In other words, it does not matter if Windows, Linux or Mac is used.
81
+ Second, the big difference to an image is that HTML files can be upgraded with, e.g., CSS, JavaScript and PHP functions.
82
+ Each received HTML plot is equipped with some post-processing features, e.g., zooming, panning and taking screenshots of the modified view. When zooming in or out the axes labels are adapted accordingly. Depending on the position of
83
+ the cursor, a panel with the exact coordinates of one point and other information such as the $\beta$ are made visible. \newline
84
+
85
+ In the same way that data is stored in a compressed format, all HTML files are generated in such a way that additional resources are not written directly into the HTML file, but a link is used so that the required content is obtained via the Internet.
86
+ Other features associated with HTML plots and which data are saved will be explained in their respective section in this chapter.
87
+ The purpose of \gls{cnmc} is to generate a surrogate model with which predictions can be made for unknown model parameter values ${\beta}$.
88
+ For a revision on important terminology as model parameter value $\beta$
89
+ the reader is referred to subsection [-@sec-subsec_1_1_1_Principles].
90
+ Usually, in order to obtain a sound predictive model, machine learning methods require a considerable amount of data. Therefore, the \gls{ode} is solved for a set of $\vec{\beta }$. An in-depth explanation for the first is provided in
91
+ section [-@sec-sec_2_2_Data_Gen].
92
+ The next step is to cluster all the received trajectories deploying kmeans++ [@Arthur2006]. Once this has been done, tracking can take be performed.
93
+ Here the objective is to keep track of the positions of all the centroids when $\beta$ is changed over the whole range of $\vec{\beta }$.
94
+ A more detailed description is given in section [-@sec-sec_2_3_Tracking].\newline
95
+
96
+
97
+ The modeling step is divided into two subtasks, which are not displayed as such in figure @fig-fig_3_Workflow . The first subtask aims to get a model that yields all positions of all the $K$ centroids for an unseen $\beta_{unseen}$, where an unseen $\beta_{unseen}$ is any $\beta$ that was not used to train the model. In the second subtask, multiple tasks are performed.
98
+ First, the regular \gls{cnm} [@Fernex2021] shall be applied to all the tracked clusters from the tracking step. For this purpose, the format of the tracked results is adapted in a way such that \gls{cnm} can be executed without having to modify \gls{cnm} itself. By running \gls{cnm} on the tracked data of all $\vec{\beta }$, the transition property tensors $\boldsymbol Q$ and $\boldsymbol T$ for all $\vec{\beta }$ are received. \newline
99
+
100
+ Second, all the $\boldsymbol Q$ and the $\boldsymbol T$ tensors are stacked to form $\boldsymbol {Q_{stacked}}$ and $\boldsymbol {T_{stacked}}$ matrices.
101
+ These stacked matrices are subsequently supplied to one of the two possible implemented modal decomposition methods.
102
+ Third, a regression model for the obtained modes is constructed.
103
+ Clarifications on the modeling stage can be found in section [-@sec-sec_2_4_Modeling].\newline
104
+
105
+ The final step is to make the actual predictions for all provided $\beta_{unseen}$ and allow the operator to draw conclusions about the trustworthiness of the predictions.
106
+ For the trustworthiness, among others, the three quality measurement concepts explained in subsection
107
+ [-@sec-subsec_1_1_3_first_CNMc]
108
+ are leveraged. Namely, comparing the \gls{cnmc} and \gls{cnm} predicted trajectories by overlaying them directly. The two remaining techniques, which were already applied in regular \gls{cnm} [@Fernex2021], are the \glsfirst{cpd} and the autocorrelation.\newline
109
+
110
+ The data and workflow in figure @fig-fig_3_Workflow do not reveal one additional feature of the implementation of \gls{cnmc}. That is, inside the folder *Inputs* multiple subfolders containing a *settings.py* file, e.g., different dynamical systems, can be inserted to allow a sequential run. In the case of an empty subfolder, \gls{cnmc} will inform the user about that and continue its execution without an error.
111
+ As explained above, each model will have its own folder where the entire output will be stored.
112
+ To switch between the multiple and a single *settings.py* version, the *settings.py* file outside the *Inputs* folder needs to be modified. The argument for that is *multiple\_Settings*.\newline
113
+
114
+ Finally, one more extra feature shall be mentioned. After having computed expensive models, it is not desired to overwrite the log file or any other output.
115
+ To prevent such unwanted events, it is possible to leverage the overwriting attribute in *settings.py*. If overwriting is disabled, \gls{cnmc} would verify whether a folder with the specified model name already exists.
116
+ In the positive case, \gls{cnmc} would initially only propose an alternative model name. Only if the suggested model name would not overwrite any existing folders, the suggestion will be accepted as the new model name.
117
+ Both, whether the model name was chosen in *settings.py* as well the new final replaced model name is going to be printed out in the terminal line.\newline
118
+
119
+ In summary, the data and workflow of \gls{cnmc} are shown in Figure @fig-fig_3_Workflow and are sufficient for a broad understanding of the main steps.
120
+ However, each of the 5 steps can be invoked individually, without having to run the full pipeline. Through the implementation of *settings.py* \gls{cnmc} is highly flexible. All settings for the steps and the extra features can be managed with *settings.py*.
121
+ A log file containing all terminal outputs as well a summary of chosen settings is stored in a separate file called *Informer* are part of \gls{cnmc}'s tools.
122
+