-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathsoftware-engineering.typ
More file actions
1785 lines (1464 loc) · 59.7 KB
/
software-engineering.typ
File metadata and controls
1785 lines (1464 loc) · 59.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#set text(font: "New Computer Modern", lang: "en", weight: "light", size: 11pt)
#set page(margin: 1.75in)
#set par(leading: 0.55em, spacing: 0.85em, justify: true)
#set heading(numbering: "1.1")
#set math.equation(numbering: "(1)")
#set raw(lang: "cpp")
// #set list(marker: [--])
#show sym.emptyset: sym.diameter
#show figure: set block(breakable: true)
#show heading: set block(above: 1.4em, below: 1em)
#show raw: it => {
set text(font: "CaskaydiaCove NFM", weight: "light", size: 8.5pt)
set block(width: 100%, inset: 1em, fill: luma(252), stroke: .5pt + silver)
it
}
#show outline.entry.where(level: 1): it => {
show repeat: none
v(1em, weak: true)
text(size: 1em, strong(it))
}
#let load-listing-from-file(filename, start: none, end: none) = {
assert(end == none or start != none)
assert(start == none or end == none or start < end)
let repository = "https://github.com/CuriousCI/software-engineering/tree/main/"
figure(
caption: link(repository + filename, raw(filename, lang: "typ")),
raw(
{
let file = read(filename)
if (start != none) {
file = file.split("\n").slice(start, end).join("\n")
}
file
},
block: true,
),
)
}
#let listing(kind, caption, body) = {
strong({
upper(kind.first()) + kind.slice(1)
sym.space
context counter(kind).step()
context counter(kind).display()
sym.space
})
[(#caption)*.*]
sym.space
body
}
#let listing-def(caption, body) = listing("definition", caption, body)
#let listing-problem(caption, body) = listing("problem", caption, body)
#let listing-example(caption, body) = listing("example", caption, body)
#let raw-ref(id) = box(
width: 9pt,
place(
dy: -8pt,
dx: -0pt,
box(
radius: 100%,
width: 9pt,
height: 9pt,
inset: 1pt,
stroke: .5pt, // fill: black,
align(center + horizon, text(
font: "CaskaydiaCove NFM",
size: 7pt,
repr(id),
)),
),
),
)
#show raw: it => {
show regex("/\* \d \*/"): it => {
set text(red)
show regex("(\*|/| )"): ""
show regex("\d"): it => {
raw-ref(int(it.text))
}
underline(strong(it))
}
it
}
#let cppreference(dest) = link(
"https://en.cppreference.com/w/cpp/numeric/random/uniform_int_distribution",
[`(`#underline(offset: 2pt, `docs`)`)`],
)
#page(align(
center + horizon,
{
title[Software Engineering]
text(size: 1.3em)[ Ionuț Cicio \ ]
align(bottom, datetime.today().display("[day]/[month]/[year]"))
},
))
#page[The latest version of the `.pdf` and the referenced material can be found
at the following link: #underline(link(
"https://github.com/CuriousCI/software-engineering",
)[https://github.com/CuriousCI/software-engineering])]
#page(outline(indent: auto, depth: 3))
#set page(numbering: "1")
= Model based software design
Software projects require *design choices* that often can't be driven by
experience or reasoning alone. That's why a *model* of the project is needed to
compare different solutions before committing to a design choice.
== The _"Amazon Prime Video"_ dilemma
If you were tasked with designing the software architecture for Amazon Prime
Video, which choices would you make? What if you had the to keep the costs
minimal? Would you use a distributed architecture or a monolith application?
More often than not, monolith applications are considered more costly and less
scalable than the counterpart, due to an inefficient usage of resources. But, in
a recent article, a Senior SDE at Prime Video describes how they _"*reduced the
cost* of the audio/video monitoring infrastructure by *90%*"_ @prime by using a
monolith architecture.
There isn't a definitive way to answer these type of questions, but one way to
go about it is building a model of the system to compare the solutions. In the
case of Prime Video, _"the audio/video monitoring service consists of three
major components:"_ @prime
- the _media converter_ converts input audio/video streams
- the _defect detectors_ analyze frames and audio buffers in real-time
- the _orchestrator_ controls the flow in the service
#figure(
caption: "audio/video monitoring system process",
image("public/audio-video-monitor.svg", width: 100%),
)
To answer questions about the system, it can be simulated by modeling its
components as *Markov decision processes*.
== Probabilistic software modeling <traffic>
=== Markov chains <markov-chain>
#listing-def[Markov chain][
A Markov chain $M$ is a pair $(S, p)$ where
- $S$ is the set of states
- $p : S times S -> [0, 1]$ is the transition probability
The function $p$ is such that $p(s'|s)$ is the probability to transition
from state $s$ to state $s'$. For it to be a probability it must follow
@markov-chain-constrain
]
$ forall s in S space.en sum_(s' in S) p(s'|s) = 1 $ <markov-chain-constrain>
A Markov chain (or Markov process) is characterized by _memorylesness_ (also
called the Markov property), meaning that predictions on future states can be
made solely on the present state of the Markov chain and predictions are not
influenced by the history of transitions that led up to the present state.
#figure(
image("public/weather-system.svg", width: 75%),
caption: [example Markov chain with $S = {#[`rainy`], #[`sunny`]}$],
) <rainy-sunny>
#v(5pt)
If a given Markov chain $M$ transitions at *discrete timesteps* (i.e. the time
steps $t_1, t_2, ...$ are a countable) and the *state space* is countable, then
it's called a DTMC (discrete-time Markov chain). There are other classifications
for continuous state space and continuous-time.
#figure(caption: [transition matrix of @rainy-sunny])[
#table(
columns: (auto, auto, auto),
stroke: luma(75) + .1pt,
table.header($p$, [sunny], [rainy]),
[sunny], $0.8$, $0.2$,
[rainy], $0.5$, $0.5$,
)
] <rainy-sunny-transition-matrix>
#v(5pt)
A Markov chain $M$ can be written as a *transition matrix*, like the one in
@rainy-sunny-transition-matrix. Later in the guide it will be shown that
implementing transition matrices, thus Markov chains, is really simple with the
`<random>` library in `C++`.
=== Markov decision processes <mdp>
A Markov decision process (MDP), despite sharing the name, is *different* from a
Markov chain, because it interacts with an *external environment*.
#listing-def("Markov decision process")[A Markov decision process $M$ is
conventionally a tuple $(U, Y, X, p, g)$ where
- $U$ is the set of input values
- $Y$ is the set of output values
- $X$ is the set of states
- $p : X times X times U -> [0, 1]$ is the transition probability
- $g : X -> Y$ is the output function
]
The same constrain in @markov-chain-constrain holds for MDPs, with an important
difference: *for each input value*, the sum of the transition probabilities for
that input value must be 1.
$
forall x in X, u in U space.en sum_(x' in X) p(x'|x, u) = 1
$
Where $p(x'|x, u)$ is the probability to transition from state $x$ to state $x'$
when the input is $u$.
#listing-example[Software development process][
The software development process of a company can be modeled as a MDP
$M = (U, Y, X, p, g)$ where
- $U = {epsilon}$ #footnote[If $U$ is empty $M$ can't transition, at least 1
input is required, i.e. $epsilon$]
- $Y = "euro" times "duration"$
- $X = {x_0, x_1, x_2, x_3, x_4}$
]
#align(center)[
#figure(
image("public/development-process-markov-chain.svg"),
caption: "the model of a team's development process",
) <development-process>
]
#v(5pt)
#columns({
math.equation(
block: true,
numbering: none,
$
g(x) = cases(
(0, 0) & quad x in {x_0, x_4},
(20000, 2) & quad x in {x_1, x_3},
(40000, 4) & quad x in { x_2 }
)
$,
)
colbreak()
table(
columns: (auto, auto, auto, auto, auto, auto),
stroke: luma(75) + .1pt,
align: center + horizon,
table.header($epsilon$, $x_0$, $x_1$, $x_2$, $x_3$, $x_4$),
$x_0$, $0$, $1$, $0$, $0$, $0$,
$x_1$, $0$, $.3$, $.7$, $0$, $0$,
$x_2$, $0$, $.1$, $.2$, $.7$, $0$,
$x_3$, $0$, $.1$, $.1$, $.1$, $.7$,
$x_4$, $0$, $0$, $0$, $0$, $1$,
)
})
#v(5pt)
Only one transition matrix is needed, as $|U| = 1$ (there is only one input
value). If $U$ had multiple input values, like ${"start", "stop", "wait"}$, then
multiple transition matrices would have been required, one for each input value.
=== Networks of Markov decision processes
Multiple MDPs can be connected into a network, and the network is itself a MDP
that maintains the MDP properties (the intuition is there, but it's too much
syntax for me to be bothered to write it).
#listing-def("Network of MDPs")[
Given a pair of Markov decision processes $M_1, M_2$ where
- $M_1 = (U_1, Y_1, X_1, p_1, g_1)$
- $M_2 = (U_2, Y_2, X_2, p_2, g_2)$
Let $M = (U, Y, X, p, g)$ be the network of $M_1, M_2$ such that
- TODO
]
#pagebreak()
== Numerical analysis tips and tricks
=== Incremental mean <incremental-average>
Given a set of values $X = {x_1, ..., x_n} subset RR$ the mean value is defined
as
$ overline(x)_n = (sum_(i = 1)^n x_i) / n $
$overline(x)_n$ can be computed with the procedure in @mean.
#load-listing-from-file("listings/mean.cpp", start: 4, end: 10) <mean>
The problem with this procedure is that, by adding up all the values before the
division, the numerator could *overflow*, even if the value of $overline(x)_n$
fits within the IEEE-754 limits. Nonetheless, $overline(x)_n$ can be calculated
incrementally.
$
overline(x)_(n + 1) = (sum_(i = 1)^(n + 1) x_i) / (n + 1) =
((sum_(i = 1)^n x_i) + x_(n + 1)) / (n + 1) =
(sum_(i = 1)^n x_i) / (n + 1) + x_(n + 1) / (n + 1) = \
((sum_(i = 1)^n x_i) n) / ((n + 1) n) + x_(n + 1) / (n + 1) =
(sum_(i = 1)^n x_i) / n dot.c n / (n + 1) + x_(n + 1) / (n + 1) = \
overline(x)_n dot n / (n + 1) + x_(n + 1) / (n + 1)
$
With this formula the numbers added up are smaller: $overline(x)_n$, the mean,
is multiplied by $n / (n + 1) tilde 1$, and added up to
$x_(n + 1) / (n + 1) < x_(n + 1)$.
#load-listing-from-file(
"listings/mean.cpp",
start: 11,
end: 20,
)
The examples in ```typ listings/mean.cpp``` show how the incremental computation
of the mean gives a valid result, whereas the traditional procedure returns
`Inf`.
=== Welford's online algorithm <welford>
In a similar fashion it could be faster and require less memory to calculate the
standard deviation incrementally. Welford's online algorithm can be used for
this purpose @welford-online.
$
M_(2, n) = sum_(i=1)^n (x_i - overline(x)_n)^2 \
M_(2, n) = M_(2, n-1) + (x_n - overline(x)_(n - 1))(x_n - overline(x)_n) \
sigma^2_n = M_(2, n) / n \
$
Given $M_2$, if $n > 0$, the standard deviation is $sqrt(M_(2, n) / n)$. The
average can be calculated incrementally like in @incremental-average.
#load-listing-from-file("mocc/math.cpp", start: 4, end: 24)
=== Euler method
Many systems can be modeled via differential equations. When an ordinary
differential equation can't be solved analitically the solution must be
approximated. There are many techniques: one of the simplest ones (yet less
accurate and efficient) is the forward Euler method, described by the following
equation:
$ y_(n + 1) = y_n + Delta dot.c f(x_n, y_n) $ <euler-method>
Where the function $y$ is the solution to a problem like
$cases(y(x_0) = y_0, y'(x) = f(x, y(x)))$
Where $y(x_0) = y_0$ be the initial condition of the system, and
$y' = f(x, y(x))$ be the known derivative of $y$. To approximate $y$ a step
$Delta$ is chosen (a smaller $Delta$ results in a better approximation).
@euler-method can be intuitively explained like this: the value of $y$ at each
step is the previous value of $y$ plus the value of its derivative $y'$
multiplied by $Delta$. In @euler-method, $y'$ is multiplied by $Delta$ because
when simulating one step of the system all the derivatives from $x_n$ to
$x_(n + 1)$ must be added up:
$ (x_(n + 1) - x_n) dot.c f(x_n, y_n) = Delta dot.c f(x_n, y_n) $
Let's consider the example in @euler-method-example.
$
cases(y(x_0) = 0, y'(x) = 2x), quad "with" Delta in { 1, 1/2, 1/3, 1/4 }
$ <euler-method-example>
The following program approximates @euler-method-example with different $Delta$
values.
#load-listing-from-file("listings/euler.cpp", start: 4, end: 6)
#load-listing-from-file("listings/euler.cpp", start: 17, end: 25)
The approximation in @euler-method-figure is close to $x^2$, but not very
precise, however, error analysis is beyond this guide's scope.
#figure(
caption: ```typ public/euler.svg```,
image("public/euler.svg", width: 74%),
) <euler-method-figure>
=== Monte Carlo method
Monte Carlo methods, or Monte Carlo experiments, are a broad class of
computational algorithms that rely on repeated random sampling to obtain
numerical results. @monte-carlo-method
The underlying concept is to use randomness to solve problems that might be
deterministic in principle [...] Monte Carlo methods are mainly used in three
distinct problem classes: optimization, numerical integration, and generating
draws from a probability distribution. @monte-carlo-method
#listing-problem[No budget left][
The following problem involves $"MyCAD"^trademark$ the next generation
#strong[C]omputer #strong[A]ided #strong[D]rawing software. After a year of
development, the remaining budget for $"MyCAD"^trademark$ is only
$550 euro$; during the past year it has been observed that the cost to
develop a new feature for $"MyCAD"^trademark$ is described by the uniform
distribution $cal(U)(300 euro, 1000 euro)$. In order to choose whether to
spend the reamining budget, find the probability that the next feature of
$"MyCAD"^trademark$ costs less than $550 euro$.
]
#load-listing-from-file("listings/montecarlo.cpp")
The idea behind the Monte Carlo method is to execute a large number of
*independent* experiments with the *same probability distribution* (i.i.d.
experiments). Each experiment yields a value and, given the law of large
numbers, the mean of the values yielded by the experiments tends to match to the
mean value of the distribution as the number of experiments increases.
In the _"No budget left"_ Problem the experiments can be modeled with a
Bernoulli distribution, since either the next feature costs less than $550 euro$
or not. The parameter $p$ of the Bernoulli distribution is the probability which
needs to be estimated.
Each experiment draws a uniform random number $c$ between 300 and 1000 (the cost
of the feature), and yields either $0$ or $1$ as described in
@montecarlo-bernoulli.
$
cases(
0 & quad c >= 550 euro,
1 & quad c < 550 euro
)
$ <montecarlo-bernoulli>
This means that the parameter $p$ of the Bernoulli distribution, which is the
probability that the feature costs less than $550 euro$, is calculated as
$
#math.frac([number of experiments with value $0 +$ number of experiments
with value
$1$], [total number of experiments]) \ =^(1.) \
#math.frac([number of experiments with value $1$], [total number of
experiments]) \ =^(2.) \
#math.frac([number of experiments with value less than $550 euro$], [total
number of experiments])
$
1. $0$ is the identity element of the sum
2. by the definition in @montecarlo-bernoulli
This type of calculation can be very easily distributed on a HPC cluster, and is
generally an embarrassingly parallel problem @embarrassingly-parallel, since
each experiment is independent from the others.
// - TODO: find the analytical result, compare to simulation $approx 0.3569$
#pagebreak()
= C++ for modeling
This section covers the basics useful for the exam, assuming the reader already
knows `C` and has some knowledge about `OOP`.
== C++ prelude
`C++` is a strange language, and some of its quirks need to be pointed out to
have a better understanding of what the code does in later sections.
=== Operator overloading <operator-overloading>
#load-listing-from-file(
"listings/operator_overloading.cpp",
) <overloading-example>
C++, like many other languages, allows the programmer to define how a certain
operator should behave on an object (eg. `+, -, ++, +=, <<, (), [], etc...`).
This feature is called *operator overloading*, and many languages other than C++
support it:
- in `Python` operator overloading is done by implementing methods with special
names, like ```python __add__()``` @python-operator-overloading
- in `Rust` it's done by implementing `Trait`s associated with the operations,
like ```rust std::ops::Add``` @rust-operator-overloading.
- `Java` and `C` do *not* support operator overloading
For example, `std::cout` is an instance of the `std::basic_ostream class`, which
overloads the method "`operator<<()`" @basic-ostream; `std::cout << "Hello"` is
a valid piece of code which prints on the standard output the string `"Hello"`,
it does *not* do a bitwise left shift like it would in `C`.
== Randomness in the standard library <random-library>
The `C++` standard library offers tools to easily implement the Markov processes
discussed in @markov-chain and @mdp.
=== Randomness (random engines)
In `C++` there are many ways to generate random numbers
@pseudo-random-number-generation. Generally it's not recommended to use the
`random()` function. It's better to use a random generator (like
`std::default_random_engine`), because it's fast, deterministic (given a seed,
the sequence of generated numbers is the same) and can be used with
distributions. A `random_device` is a non deterministic generator: it uses a
*hardware entropy source* (if available) to generate the random numbers.
#load-listing-from-file("listings/random.cpp")
The typical course of action is to instantiate a `random_device /* 1 */`, and
use it to generate a seed `/* 2 */` for a `random_engine /* 3 */`.
Given that random engines can be used with distributions, they're really useful
to implement MDPs. Random number generation is an example of overloading of
`operator()` `/* 4 */`, like in @operator-overloading.
From this point on, `std::default_random_engine` will be reffered to as `urng_t`
(uniform random number generator type) for brevity.
#figure[
```
#include <random>
/* The keyword "using" allows to create type aliases. */
using urng_t = std::default_random_engine;
/* The constructor of urng_t is called with parameter 190201. */
int main() { urng_t urng(190201); }
```
]
=== Probability distributions <distributions>
Just the capability to generate random numbers isn't enough, these numbers need
to be manipulated to fit certain needs. Luckly, `C++` covers most of them. To
give an idea, the MDP in @development-process can be easily simulated with the
code in @markov-chain-transition-matrix below.
#load-listing-from-file(
"listings/markov_chain_transition_matrix.cpp",
) <markov-chain-transition-matrix>
==== Uniform discrete distribution #cppreference("https://en.cppreference.com/w/cpp/numeric/random/uniform_int_distribution") <uniform-int>
#listing-problem[Sleepy system][
To test the _sleepy system_ $S$ it's necessary to build a generator that
sends a value $v_i$ to $S$ after waiting $delta_i$ seconds after sending
value $v_(i - 1)$ _(otherwise $S$ is idle)_. The value of $delta_i$ is an
integer chosen with the uniform distribution $cal(U)(20, 30)$.
]
The `C` code to compute $delta_i$ would be `delta = 20 + rand() % 11;`, which is
very *error prone*, hard to remember and has no semantic value.
In `C++` the same can be done in a simpler and cleaner way:
```
urng_t urng = pseudo_random_engine_from_device();
std::uniform_int_distribution<> random_delta(20, 30); /* 1 */
size_t delta = random_delta(urng); /* 2 */
```
The wait time $delta_i$ can be easily generated without needing to remember any
formula or trick `/* 2 */`. The distribution is defined once at the beginning
`/* 1 */`, and it can be easily changed without introducing bugs or
inconsistencies. It's also worth to take a look at a possible implementation of
Problem 2 (with the addition that the $i$-th value sent is $delta_i$, meaning
$v_i = delta_i$), as it comes up very often in software models.
#load-listing-from-file("listings/time_intervals_generator.cpp")
The `uniform_int_distribution` has many other uses, for example, it could
uniformly generate a random state in a MDP. Let `STATES_SIZE` be the number of
states
```
uniform_int_distribution<> random_state(0, STATES_SIZE-1 t1);
```
`random_state` generates a random state when used. Be careful! Remember to use
`STATES_SIZE-1` #raw-ref(1), because `uniform_int_distribution` is inclusive.
Forgettig `-1` can lead to very sneaky bugs, like random segfaults at different
instructions. It's very hard to debug unless using `gdb`. The
`uniform_int_distribution` can also generate negative integers, for example
$z in { x | x in ZZ and x in [-10, 15]}$.
==== Uniform continuous distribution #cppreference("https://en.cppreference.com/w/cpp/numeric/random/uniform_real_distribution") <uniform-real>
It's the same as above, with the difference that it generates *real* numbers in
the range $[a, b) subset RR$.
==== Bernoulli distribution #cppreference("https://en.cppreference.com/w/cpp/numeric/random/uniform_real_distribution") <bernoulli>
#listing-problem[Network protocols][
To model a network protocol $P$ it's necessary to model requests. When sent,
a request can randomly fail with probability $p = 0.001$.
]
Generally, a random fail can be simulated by generating $r in [0, 1]$ and
checking whether $r < p$.
```
std::uniform_real_distribution<> uniform(0, 1);
if (uniform(urng) < 0.001) {
fail();
}
```
A `std::bernoulli_distribution` is a better fit for this specification, as it
generates a boolean value and its semantics represents "an event that could
happen with a certain probability $p$".
```
std::bernoulli_distribution random_fail(0.001);
if (random_fail(urng)) {
fail();
}
```
#pagebreak()
==== Normal distribution #cppreference("https://en.cppreference.com/w/cpp/numeric/random/normal_distribution") <normal>
Typical Normal distribution, requires the mean #raw-ref(1) and the stddev
#raw-ref(2) .
#load-listing-from-file("listings/normal_distribution.cpp")
```bash
8 **
9 ****
10 *******
11 *********
12 *********
13 *******
14 ****
15 **
```
==== Exponential distribution #cppreference("https://en.cppreference.com/w/cpp/numeric/random/exponential_distribution") <exponential>
#listing-problem[Cheaper servers][
A server receives requests at a rate of 5 requests per minute from each
client. You want to rebuild the architecture of the server to make it
cheaper. To test if the new architecture can handle the load, its required
to build a model of client that sends requests at random intervals with an
expected rate of 5 requests per minute.
]
It's easier to simulate the system in seconds (to have more precise
measurements). If the client sends 5/min, the rate in seconds should be
$lambda = 5 / 60 ~ 0.083$ requests per second.
#load-listing-from-file("listings/exponential_distribution.cpp")
The code above has a counter to measure how many requests were sent each minute.
A new counter is added every 60 seconds #raw-ref(1) , and it's incremented by 1
each time a request is sent #raw-ref(2) . At the end, the average of the counts
is calculated #raw-ref(3) , and it comes out to be about 5 requests every 60
seconds (or 5 requests per minute).
==== Poisson distribution #cppreference("https://en.cppreference.com/w/cpp/numeric/random/poisson_distribution") <poisson>
The Poisson distribution is closely related to the Exponential distribution, as
it randomly generates a number of items in a time unit given the average rate.
#load-listing-from-file("listings/poisson_distribution.cpp")
```bash
0 *
1 *******
2 **************
3 *******************
4 ********************
5 ***************
6 **********
7 *****
8 **
9 *
```
==== Geometric distribution #cppreference("https://en.cppreference.com/w/cpp/numeric/random/geometric_distribution") <geometric>
A typical geometric distribution, has the same API as the others.
=== Discrete distribution and transition matrices #cppreference("https://en.cppreference.com/w/cpp/numeric/random/discrete_distribution") <discrete>
#listing-problem[E-commerce][
To choose the architecture for an e-commerce it's necessary to simulate
realistic purchases. After interviewing 678 people it's determined that 232
of them would buy a shirt from your e-commerce, 158 would buy a hoodie and
the other 288 would buy pants.
]
The objective is to simulate random purchases reflecting the results of the
interviews. One way to do it is to calculate the percentage of buyers for each
item, generate $r in [0, 1]$, and do some checks on $r$. However, this
specification can be implemented very easily in `C++` by using a
`std::discrete_distribution`, without having to do any calculation or write
complex logic.
#load-listing-from-file("listings/discrete_distribution.cpp")
The `rand_item` instance generates a random integer $x in {0, 1, 2}$ (because 3
items were sepcified in the array #raw-ref(1) , if the items were 10, then $x$
would have been s.t. $0 <= x <= 9$). The `= {a, b, c}` syntax can be used to
intialize the a discrete distribution because `C++` allows to pass a
`std::array` to a constructor @std-array.
The `discrete_distribution` uses the in the array to generates the probability
for each integer. For example, the probability to generate `0` would be
calculated as $232 / (232 + 158 + 288)$, the probability to generate `1` would
be $158 / (232 + 158 + 288)$ an the probability to generate `2` would be
$288 / (232 + 158 + 288)$. This way, the sum of the probabilities is always 1,
and the probability is proportional to the weight.
To map the integers to the actual items #raw-ref(2) an `enum` is used: for
simple enums each entry can be converted automatically to its integer value (and
viceversa). In `C++` there is another construct, the `enum class` which doesn't
allow implicit conversion (the conversion must be done with a function or with
`static_cast`), but it's more typesafe. // (see @enum).
The `discrete_distribution` can also be used for transition matrices, like the
one in @rainy-sunny-transition-matrix. It's enough to assign each state a number
(e.g. `sunny = 0, rainy = 1`), and model the transition probability of *each
state* as a discrete distribution.
```
std::discrete_distribution[] markov_chain_transition_matrix = {
/* 0 */ { /* 0 */ 0.8, /* 1 */ 0.2},
/* 1 */ { /* 0 */ 0.5, /* 1 */ 0.5}
}
```
In the example above the probability to go from state `0 (sunny)` to `0 (sunny)`
is 0.8, the probability to go from state `0 (sunny)` to `1 (rainy)` is 0.2
etc...
The `discrete_distribution` can be initialized if the weights aren't already
know and must be calculated.
#figure(caption: ```typ practice/2025-01-09/1/main.cpp```)[
```
for (auto &weights t1 : matrix) {
markov_chain_transition_matrix.push_back(
std::discrete_distribution<>(
weights.begin(), t2 weights.end() t3 )
);
}
```
]
The weights are stored in a `vector` #raw-ref(1) , and the
`discrete_distribution` for each state is initialized by indicating the pointer
at the beginning #raw-ref(2) and at the end #raw-ref(3) of the vector. This
works with dynamic arrays too.
#pagebreak()
// == Memory and data structures
//
// === Manual memory allocation
//
// If you allocate with `new`, you must deallocate with `delete`, you can't mixup
// them with ```c malloc()``` and ```c free()```
//
// To avoid manual memory allocation, most of the time it's enough to use the
// structures in the standard library, like `std::vector<T>`.
//
// === Data structures
//
// ==== Vectors <std-vector>
// // === `std::vector<T>()` <std-vector>
//
// You don't have to allocate memory, basically never! You just use the structures
// that are implemented in the standard library, and most of the time they are
// enough for our use cases. They are really easy to use.
//
// Vectors can be used as stacks.
//
// ==== Deques <std-deque>
//
// // === ```cpp std::deque<T>()``` <std-deque>
// Deques are very common, they are like vectors, but can be pushed and popped in
// both ends, and can b used as queues.
//
// ==== Sets <std-set>
//
// Not needed as much, works like the Python set. Can be either a set (ordered) or
// an unordered set (uses hashes)
//
// ==== Maps <std-map>
//
// Could be useful. Can be either a map (ordered) or an unordered map (uses hashes)
//
// == Simplest method to work with *files*
// // == Input/Output
// //
// // Input output is very simple in C++.
// //
// // === Standard I/O <iostream>
// //
// // === Files <files>
// //
// // Working with files is way easier in `C++`
// //
// // ```cpp
// // #include <fstream>
// //
// // int main(){
// // std::ofstream output("output.txt");
// // std::ifstream params("params.txt");
// //
// // while (etc...) {}
// //
// // output.close();
// // params.close();
// // }
// // ```
//
// == Program structure
//
// === Classes
//
// - TODO:
// - Maybe constructor
// - Maybe operators? (more like nah)
// - virtual stuff (interfaces)
//
// === Structs
//
// - basically like classes, but with everything public by default
//
// === Enums <enum>
//
// - enum vs enum class
// - an example maybe
// - they are useful enough to model a finite domain
//
// === Inheritance
//
// #pagebreak()
//
// = Fixing segfaults with gdb
//
// It's super useful! Trust me, if you learn this everything is way easier (printf
// won't be useful anymore)
//
// First of all, use the `-ggdb3` flags to compile the code. Remember to not use
// any optimization like `-O3`... using optimizations makes the program harder to
// debug.
//
// ```makefile
// DEBUG_FLAGS := -ggdb3 -Wall -Wextra -pedantic
// ```
//
// Then it's as easy as running `gdb ./main`
//
// - TODO: could be useful to write a script if too many args
// - TODO: just bash code to compile and run
// - TODO (just the most useful stuff, technically not enough):
// - r
// - c
// - n
// - c 10
// - enter (last instruction)
// - b
// - on lines
// - on symbols
// - on specific files
// - clear
// - display
// - set print pretty on
//
//
// #pagebreak()
= Code presented in lectures
Each example has 4 digits `xxxx` that are the same as the ones in the `software`
folder in the course material. The code will be *as simple as possible* to
better explain the core functionality, but it's *strongly suggested* to try to
add structure _(classes etc..)_ where it *seems fit*.
== First examples
This section puts together the *formal definitions* and the `C++` knowledge to
implement some simple MDPs.
=== A simple MDP `"course/1100"` <a-simple-markov-chain>
The first MDP example is $M = (U, Y, X, p, g)$ where - $U = {epsilon}$ // #footnote[See @mdp-example]
- $Y = X$ the set of outputs matches the set of states
- $X = [0,1] times [0,1] = [0, 1]^2$ each state is a vector of two real numbers
- $p : X times X times U -> X$, the transition probability, is uniform over $X$
for each input
- $g : X -> Y : x |-> x$ outputs the current state
- $(0, 0)$ is the initial state
#load-listing-from-file("course/1100/main.cpp")
=== Network of MDPs pt.1 `"course/1200"` <simple-mdps-connection-1>
This example has two discrete-time MDPs $M_1, M_2$ s.t.
- $M_1 = (U_1, Y_1, X_1, p_1, g_1)$
- $M_2 =(U_2, Y_2, X_2, p_2, g_2)$
$M_1$ and $M_2$ are similar to the MDP in @a-simple-markov-chain (i.e.
$X = [0, 1]^2$), with the difference that $forall i in {1, 2} space U_i = X_i$,
and $p$ is redefined in this example in the following way:
$
forall i in {1, 2}, x', x in X, u in U quad p_i (x'|x, u) = cases(1 & "if" x' = u, 0 & "otherwise")
$
#listing-def("Discrete time steps")[
Given a time step $t in NN$, let $U(t), X(t)$ be respectively the input and
state at time $t$.
]
The value of $U(t+1)$ for each MDP in this example is defined as
$
& U_1(t + 1) = vec(x_1 dot.c cal(U)(0, 1), x_2 dot.c cal(U)(0, 1)) "where" g_2 (X(t)) = vec(x_1, x_2) \
& U_2(t + 1) = vec(x_1 + cal(U)(0, 1), x_2 + cal(U)(0, 1)) "where" g_1 (X(t)) = vec(x_1, x_2)
$ <mdps-connection-1>
Thus, given that $X_i (t) =^1 U_i (t)$ with probability 1,
$g_i (X_i (t)) =^2 X_i (t)$, and the definition in @mdps-connection-1, the
connection between $M_1$ and $M_2$ can be defined as
// $U_i = [0, 1] times [0, 1]$, having
// Given that $g_i (X_i (t)) = X_i (t)$ and $U_i (t) = X_i (t)$, the connection in
// @mdps-connection-1 can be simplified:
$
& X_1 (t + 1) =^1 vec(x_1 dot.c cal(U)(0, 1), x_2 dot.c cal(U)(0, 1)) "where" X_2 (t) =^2 vec(x_1, x_2) \
& X_2 (t + 1) =^1 vec(x_1 + cal(U)(0, 1), x_2 + cal(U)(0, 1)) "where" X_1(t) =^2 vec(x_1, x_2)
$ <mdps-connection-2>
With @mdps-connection-2 the code is easier to write, but this approach works
only for small examples. For more complex systems it's better to design a module
for each component and handle the connections more explicitly.
#load-listing-from-file("course/1200/main.cpp")
=== Network of MDPs pt.2 `"course/1300"`
This example is similar to the one in @simple-mdps-connection-1, with a few
notable differences:
- $U_i = Y_i = X_i = RR times RR$
- the initial states are $x_1 = (1, 1) in X_1, x_2 = (2, 2) in X_2$
- the connections are slightly more complex.
- no probability is involved
Having
$
p(vec(x_1 ', x_2 ')|vec(x_1, x_2), vec(u_1, u_2)) = cases(1 & "if" ..., 0 & "otherwise") " where ..."
$
#load-listing-from-file("course/1300/main.cpp") <mdps-connection-3>
=== Network of MDPs pt.3 `"course/1400"`
The original model behaves exactly lik @mdps-connection-3, with a different
implementation. As an exercise, the reader is encouraged to come up with a
different implementation for @mdps-connection-3.
#pagebreak()
== Traffic light `"course/2000"`
This example models a traffic light. The three original versions presented in
the course (`2100`, `2200` and `2300`) all have the same behaviour, with
different implementations. The code reported in this document behaves like the
original versions, with a simpler implementation. Let $T$ be the MDP that
describes the traffic light where
- $U = {epsilon, sigma}$ where
- $epsilon$ means _"do nothing"_
- $sigma$ means _"switch light"_
- $Y = X$
- $X = {GG, RR, YY}$ where
- $GG = "green"$
- $RR = "red"$
- $YY = "yellow"$
- $g(x) = x$