YassineYousfi commited on
Commit
06242ba
1 Parent(s): 0b1e65b
Files changed (18) hide show
  1. Makefile +9 -0
  2. README copy.md +2 -0
  3. app.py +7 -0
  4. common.cpp +177 -0
  5. common.h +35 -0
  6. example.py +36 -0
  7. lib/stc.so +0 -0
  8. requirements.txt +1 -0
  9. sse_mathfun.h +762 -0
  10. stc.py +192 -0
  11. stc_embed_c.cpp +476 -0
  12. stc_embed_c.h +22 -0
  13. stc_extract_c.cpp +101 -0
  14. stc_extract_c.h +19 -0
  15. stc_interface.cpp +48 -0
  16. stc_interface.h +13 -0
  17. stc_ml_c.cpp +932 -0
  18. stc_ml_c.h +64 -0
Makefile ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
1
+
2
+ SRC = stc_interface.cpp stc_embed_c.cpp stc_extract_c.cpp common.cpp stc_ml_c.cpp
3
+ OBJ= stc_interface.o stc_embed_c.o stc_extract_c.o common.o stc_ml_c.o
4
+ default:
5
+ g++ -std=c++98 -fPIC -O3 -c $(SRC)
6
+ g++ -shared -o lib/stc.so $(OBJ)
7
+ rm -f *.o
8
+ clean:
9
+ rm -f *.o *.pyc
README copy.md ADDED
@@ -0,0 +1,2 @@
 
 
1
+ # pySTC
2
+ A Python interface for [Syndrome Trellis Codes](http://dde.binghamton.edu/download/syndrome/) Steganography
app.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ def greet(name):
4
+ return "Hello " + name + "!!"
5
+
6
+ iface = gr.Interface(fn=greet, inputs="text", outputs="text")
7
+ iface.launch()
common.cpp ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <cstdio>
2
+ #include <cstring>
3
+ #include <cstdlib>
4
+ #include "common.h"
5
+
6
+ #include <boost/random/uniform_int.hpp>
7
+ #include <boost/random/variate_generator.hpp>
8
+ #include <boost/random/mersenne_twister.hpp>
9
+
10
+ u32 mats[] = {
11
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12
+ 109, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13
+ 109, 79, 83, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14
+ 89, 127, 99, 69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
15
+ 95, 75, 121, 71, 109, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16
+ 71, 117, 127, 75, 89, 109, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17
+ 111, 83, 127, 97, 77, 117, 89, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18
+ 113, 111, 87, 93, 99, 73, 117, 123, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
19
+ 89, 97, 115, 81, 77, 117, 87, 127, 123, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20
+ 95, 107, 109, 79, 117, 67, 121, 123, 103, 81, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21
+ 117, 71, 109, 79, 101, 115, 123, 81, 77, 95, 87, 0, 0, 0, 0, 0, 0, 0, 0, 0,
22
+ 119, 73, 81, 125, 123, 103, 99, 127, 109, 69, 89, 107, 0, 0, 0, 0, 0, 0, 0, 0,
23
+ 87, 127, 117, 81, 97, 67, 101, 93, 105, 109, 75, 115, 123, 0, 0, 0, 0, 0, 0, 0,
24
+ 93, 107, 115, 95, 121, 81, 75, 99, 111, 85, 79, 119, 105, 65, 0, 0, 0, 0, 0, 0,
25
+ 123, 85, 79, 87, 127, 65, 115, 93, 101, 111, 73, 119, 105, 99, 91, 0, 0, 0, 0, 0,
26
+ 127, 99, 121, 111, 71, 109, 103, 117, 113, 65, 105, 87, 101, 75, 93, 123, 0, 0, 0, 0,
27
+ 89, 93, 111, 117, 103, 127, 77, 95, 85, 105, 67, 69, 113, 123, 99, 75, 119, 0, 0, 0,
28
+ 65, 99, 77, 85, 101, 91, 125, 103, 127, 111, 69, 93, 75, 95, 119, 113, 105, 115, 0, 0,
29
+ 91, 117, 77, 107, 101, 127, 115, 83, 85, 119, 105, 113, 93, 71, 111, 121, 97, 73, 81, 0,
30
+ 95, 111, 117, 83, 97, 75, 87, 127, 85, 93, 105, 115, 77, 101, 99, 89, 71, 121, 67, 123,
31
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
32
+ 247, 149, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
33
+ 143, 187, 233, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
34
+ 235, 141, 161, 207, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
35
+ 219, 185, 151, 255, 197, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
36
+ 251, 159, 217, 167, 221, 133, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
37
+ 201, 143, 231, 251, 189, 169, 155, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
38
+ 143, 245, 177, 253, 217, 163, 155, 197, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
39
+ 233, 145, 219, 185, 231, 215, 173, 129, 243, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
40
+ 139, 201, 177, 167, 213, 253, 227, 199, 185, 159, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
41
+ 183, 145, 223, 199, 245, 139, 187, 157, 217, 237, 163, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42
+ 223, 145, 137, 219, 197, 243, 247, 189, 135, 181, 207, 235, 0, 0, 0, 0, 0, 0, 0, 0,
43
+ 229, 205, 237, 187, 135, 241, 183, 163, 151, 243, 213, 137, 159, 0, 0, 0, 0, 0, 0, 0,
44
+ 205, 165, 239, 211, 231, 247, 133, 227, 219, 189, 249, 185, 149, 129, 0, 0, 0, 0, 0, 0,
45
+ 131, 213, 255, 207, 227, 221, 173, 185, 197, 147, 235, 247, 217, 143, 229, 0, 0, 0, 0, 0,
46
+ 247, 139, 157, 223, 187, 147, 177, 249, 165, 153, 161, 227, 237, 255, 207, 197, 0, 0, 0, 0,
47
+ 205, 139, 239, 183, 147, 187, 249, 225, 253, 163, 173, 233, 209, 159, 255, 149, 197, 0, 0, 0,
48
+ 177, 173, 195, 137, 211, 249, 191, 135, 175, 155, 229, 215, 203, 225, 247, 237, 221, 227, 0, 0,
49
+ 159, 189, 195, 163, 255, 147, 219, 247, 231, 157, 139, 173, 185, 197, 207, 245, 193, 241, 233, 0,
50
+ 235, 179, 219, 253, 241, 131, 213, 231, 247, 223, 201, 193, 191, 249, 145, 237, 155, 165, 141, 173,
51
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
52
+ 339, 489, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
53
+ 469, 441, 379, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54
+ 371, 439, 277, 479, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
55
+ 413, 489, 443, 327, 357, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
56
+ 509, 453, 363, 409, 425, 303, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
57
+ 377, 337, 443, 487, 467, 421, 299, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
58
+ 497, 349, 279, 395, 365, 427, 399, 297, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
59
+ 435, 373, 395, 507, 441, 325, 279, 289, 319, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
60
+ 301, 379, 509, 411, 293, 467, 455, 261, 343, 447, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
61
+ 367, 289, 445, 397, 491, 279, 373, 315, 435, 473, 327, 0, 0, 0, 0, 0, 0, 0, 0, 0,
62
+ 465, 379, 319, 275, 293, 407, 373, 427, 445, 497, 347, 417, 0, 0, 0, 0, 0, 0, 0, 0,
63
+ 473, 401, 267, 311, 359, 347, 333, 441, 405, 381, 497, 463, 269, 0, 0, 0, 0, 0, 0, 0,
64
+ 467, 283, 405, 303, 269, 337, 385, 441, 511, 361, 455, 355, 353, 311, 0, 0, 0, 0, 0, 0,
65
+ 489, 311, 259, 287, 445, 471, 419, 345, 289, 391, 405, 411, 371, 457, 331, 0, 0, 0, 0, 0,
66
+ 493, 427, 305, 309, 339, 447, 381, 335, 323, 423, 453, 457, 443, 313, 371, 353, 0, 0, 0, 0,
67
+ 271, 301, 483, 401, 369, 367, 435, 329, 319, 473, 441, 491, 325, 455, 389, 341, 317, 0, 0, 0,
68
+ 333, 311, 509, 319, 391, 441, 279, 467, 263, 487, 393, 405, 473, 303, 353, 337, 451, 365, 0, 0,
69
+ 301, 477, 361, 445, 505, 363, 375, 277, 271, 353, 337, 503, 457, 357, 287, 323, 435, 345, 497, 0,
70
+ 281, 361, 413, 287, 475, 359, 483, 351, 337, 425, 453, 423, 301, 309, 331, 499, 507, 277, 375, 471,
71
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
72
+ 519, 885, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
73
+ 579, 943, 781, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
74
+ 685, 663, 947, 805, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
75
+ 959, 729, 679, 609, 843, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
76
+ 959, 973, 793, 747, 573, 659, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
77
+ 631, 559, 1023, 805, 709, 913, 979, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
78
+ 607, 867, 731, 1013, 625, 973, 825, 925, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
79
+ 743, 727, 851, 961, 813, 605, 527, 563, 867, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
80
+ 863, 921, 943, 523, 653, 969, 563, 597, 753, 621, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
81
+ 729, 747, 901, 839, 815, 935, 777, 641, 1011, 603, 973, 0, 0, 0, 0, 0, 0, 0, 0, 0,
82
+ 581, 831, 659, 877, 781, 929, 1003, 1021, 655, 729, 983, 611, 0, 0, 0, 0, 0, 0, 0, 0,
83
+ 873, 1013, 859, 887, 579, 697, 769, 927, 679, 683, 911, 753, 733, 0, 0, 0, 0, 0, 0, 0,
84
+ 991, 767, 845, 977, 923, 609, 633, 769, 533, 829, 859, 759, 687, 657, 0, 0, 0, 0, 0, 0,
85
+ 781, 663, 731, 829, 851, 941, 601, 997, 719, 675, 947, 939, 657, 549, 647, 0, 0, 0, 0, 0,
86
+ 619, 879, 681, 601, 1015, 797, 737, 841, 839, 869, 931, 789, 767, 547, 823, 635, 0, 0, 0, 0,
87
+ 855, 567, 591, 1019, 745, 945, 769, 671, 803, 799, 925, 701, 517, 653, 885, 731, 581, 0, 0, 0,
88
+ 887, 643, 785, 611, 905, 669, 703, 1017, 575, 763, 625, 869, 731, 861, 847, 941, 933, 577, 0, 0,
89
+ 867, 991, 1021, 709, 599, 741, 933, 921, 619, 789, 957, 791, 969, 525, 591, 763, 657, 683, 829, 0,
90
+ 1009, 1003, 901, 715, 643, 803, 805, 975, 667, 619, 569, 769, 685, 767, 853, 671, 881, 907, 955, 523,
91
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
92
+ 1655, 1493, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
93
+ 1859, 1481, 1119, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
94
+ 1395, 1737, 1973, 1259, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
95
+ 1339, 1067, 1679, 1641, 2021, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
96
+ 1657, 1331, 1783, 2043, 1097, 1485, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
97
+ 1611, 1141, 1849, 2001, 1511, 1359, 1245, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
98
+ 1215, 1733, 1461, 2025, 1251, 1945, 1649, 1851, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
99
+ 1275, 1373, 1841, 1509, 1631, 1737, 1055, 1891, 1041, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
100
+ 1715, 1117, 1503, 2025, 1027, 1959, 1365, 1739, 1301, 1233, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
101
+ 1101, 1127, 1145, 1157, 1195, 1747, 1885, 1527, 1325, 2033, 1935, 0, 0, 0, 0, 0, 0, 0, 0, 0,
102
+ 1369, 1255, 1809, 1889, 1183, 1495, 1223, 1781, 2029, 1327, 1075, 1065, 0, 0, 0, 0, 0, 0, 0, 0,
103
+ 1157, 1499, 1871, 1365, 1559, 1149, 1293, 1571, 1641, 1971, 1807, 1673, 2023, 0, 0, 0, 0, 0, 0, 0,
104
+ 1929, 1533, 1135, 1359, 1547, 1723, 1529, 1107, 1273, 1879, 1709, 1141, 1897, 1161, 0, 0, 0, 0, 0, 0,
105
+ 1861, 1801, 1675, 1699, 1103, 1665, 1657, 1287, 1459, 2047, 1181, 1835, 1085, 1377, 1511, 0, 0, 0, 0, 0,
106
+ 1915, 1753, 1945, 1391, 1205, 1867, 1895, 1439, 1719, 1185, 1685, 1139, 1229, 1791, 1821, 1295, 0, 0, 0, 0,
107
+ 1193, 1951, 1469, 1737, 1047, 1227, 1989, 1717, 1735, 1643, 1857, 1965, 1405, 1575, 1907, 1173, 1299, 0, 0, 0,
108
+ 1641, 1887, 1129, 1357, 1543, 1279, 1687, 1975, 1839, 1775, 1109, 1337, 1081, 1435, 1603, 2037, 1249, 1153, 0, 0,
109
+ 1999, 1065, 1387, 1977, 1555, 1915, 1219, 1469, 1889, 1933, 1819, 1315, 1319, 1693, 1143, 1361, 1815, 1109, 1631, 0,
110
+ 1253, 1051, 1827, 1871, 1613, 1759, 2015, 1229, 1585, 1057, 1409, 1831, 1943, 1491, 1557, 1195, 1339, 1449, 1675, 1679,
111
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
112
+ 3475, 2685, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
113
+ 3865, 2883, 2519, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
114
+ 4019, 3383, 3029, 2397, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
115
+ 2725, 3703, 3391, 2235, 2669, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
116
+ 2489, 3151, 2695, 3353, 4029, 3867, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
117
+ 2467, 2137, 3047, 3881, 3125, 2683, 3631, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
118
+ 2739, 3163, 2137, 4031, 2967, 3413, 3749, 2301, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
119
+ 3443, 2305, 3365, 2231, 2127, 3697, 3535, 4041, 2621, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
120
+ 3641, 2777, 2789, 2357, 3003, 2729, 3229, 2925, 3443, 2291, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
121
+ 3567, 2361, 2061, 2219, 3905, 2285, 2871, 3187, 2455, 2783, 2685, 0, 0, 0, 0, 0, 0, 0, 0, 0,
122
+ 4043, 2615, 2385, 3911, 3267, 2871, 3667, 3037, 2905, 2921, 2129, 2299, 0, 0, 0, 0, 0, 0, 0, 0,
123
+ 2315, 2997, 3743, 2729, 3117, 2297, 2585, 3141, 3283, 3943, 3613, 3345, 4047, 0, 0, 0, 0, 0, 0, 0,
124
+ 3967, 3069, 3377, 3909, 3691, 2439, 2533, 3075, 2129, 3319, 3433, 3035, 2745, 2631, 0, 0, 0, 0, 0, 0,
125
+ 3023, 3349, 2111, 2385, 3907, 3959, 3425, 3801, 2135, 2671, 2637, 2977, 2999, 3107, 2277, 0, 0, 0, 0, 0,
126
+ 2713, 2695, 3447, 2537, 2685, 3755, 3953, 3901, 3193, 3107, 2407, 3485, 2097, 3091, 2139, 2261, 0, 0, 0, 0,
127
+ 3065, 4059, 2813, 3043, 2849, 3477, 3205, 3381, 2747, 3203, 3937, 3603, 3625, 3559, 3831, 2243, 2343, 0, 0, 0,
128
+ 3999, 3183, 2717, 2307, 2103, 3353, 2761, 2541, 2375, 2327, 3277, 2607, 3867, 3037, 2163, 2261, 3649, 2929, 0, 0,
129
+ 2543, 2415, 3867, 3709, 3161, 2369, 4087, 2205, 3785, 2515, 2133, 2913, 3941, 3371, 2605, 3269, 3385, 3025, 2323, 0,
130
+ 2939, 2775, 3663, 2413, 2573, 2205, 3821, 3513, 2699, 3379, 2479, 2663, 2367, 2517, 3027, 3201, 3177, 3281, 4069, 2069,
131
+ };
132
+
133
+ u32 *getMatrix(int width, int height) {
134
+ u32 *cols;
135
+ cols = (u32*)malloc(width * sizeof(u32));
136
+
137
+ if(width >= 2 && width <= 20 && height >= 7 && height <= 12) { // get it from the array
138
+ memcpy(cols, &mats[(height - 7) * 400 + (width - 1) * 20], width * sizeof(u32));
139
+ } else { // generate a random one
140
+ int i, j;
141
+ u32 r, mask, bop;
142
+
143
+ /* This was here because random submatrices designed with the same columns are known to be bad. But sometimes the
144
+ * payload is so small that there is no other way.
145
+ *
146
+ * Modified by Tomas Filler.
147
+ */
148
+
149
+ boost::mt19937 generator( 1 );
150
+ boost::variate_generator< boost::mt19937&, boost::uniform_int< > > rng( generator, boost::uniform_int< >( 0, RAND_MAX ) );
151
+
152
+ mask = (1 << (height - 2)) - 1;
153
+ bop = (1 << (height - 1)) + 1;
154
+ if((1 << (height - 2)) < width) {
155
+ // fprintf(stderr, "Cannot generate matrix for this payload. Choose a higher constraint height.\n");
156
+ // generate the columns randomly but let first and last row be full of 1s.
157
+ // I know, there will be identical columns.
158
+ for(i = 0; i < width; i++) {
159
+ r = ((rng() & mask) << 1) + bop;
160
+ cols[i] = r;
161
+ }
162
+ } else {
163
+ for(i = 0; i < width; i++) {
164
+ for(j = -1; j < i;) {
165
+ r = ((rng() & mask) << 1) + bop;
166
+ for(j = 0; j < i; j++) {
167
+ if(cols[j] == r)
168
+ break;
169
+ }
170
+ }
171
+ cols[i] = r;
172
+ }
173
+ }
174
+
175
+ }
176
+ return cols;
177
+ }
common.h ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #ifndef COMMON_H
2
+ #define COMMON_H
3
+
4
+ #include <string>
5
+
6
+ typedef unsigned int u32;
7
+ typedef unsigned short u16;
8
+ typedef unsigned char u8;
9
+
10
+ extern u32 mats[];
11
+
12
+ /* Simple class for throwing exceptions */
13
+ class stc_exception : public std::exception {
14
+ public:
15
+ stc_exception(std::string message, u32 error_id) { this->message = message; this->error_id = error_id; }
16
+ virtual ~stc_exception() throw() {}
17
+ virtual const char* what() const throw() { return message.c_str(); }
18
+ u32 error_id;
19
+ private:
20
+ std::string message;
21
+ };
22
+
23
+ /*
24
+ The following error_ids are in use:
25
+ 1 = Submatrix height must not exceed 31.
26
+ 2 = Not enough memory.
27
+ 3 = The message cannot be longer than the cover object.
28
+ 4 = No solution exists. - This happen when there are too many Inf values in cost vector and thus the solution does not exist due to sparse parity-check matrix.
29
+ 5 = Price vector limit exceeded. - There is a limit to cost elements when you use integer version of the algorithm. Try to use costs in double.
30
+ 6 = Maximum number of trials in layered construction exceeded.
31
+ */
32
+
33
+ u32 *getMatrix(int width, int height);
34
+
35
+ #endif
example.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+
3
+ import stc
4
+ import numpy as np
5
+ import imageio
6
+ from scipy import signal
7
+
8
+ input_image = 'files/1.pgm'
9
+
10
+ def HILL(input_image):
11
+ H = np.array(
12
+ [[-1, 2, -1],
13
+ [ 2, -4, 2],
14
+ [-1, 2, -1]])
15
+ L1 = np.ones((3, 3)).astype('float32')/(3**2)
16
+ L2 = np.ones((15, 15)).astype('float32')/(15**2)
17
+ I = imageio.imread(input_image)
18
+ costs = signal.convolve2d(I, H, mode='same')
19
+ costs = abs(costs)
20
+ costs = signal.convolve2d(costs, L1, mode='same')
21
+ costs = 1/costs
22
+ costs = signal.convolve2d(costs, L2, mode='same')
23
+ costs[costs == np.inf] = 1
24
+ return costs
25
+
26
+ costs = HILL(input_image)
27
+ print(costs)
28
+
29
+ stc.embed(input_image, costs, 'files/message.txt', 's3cr3t', 'files/stego.png')
30
+ stc.extract('files/stego.png', 's3cr3t', 'files/output.txt')
31
+
32
+ print(open('files/output.txt', 'r').read())
33
+
34
+
35
+
36
+
lib/stc.so ADDED
Binary file (75.8 kB). View file
requirements.txt ADDED
@@ -0,0 +1 @@
 
1
+ pycryptodome
sse_mathfun.h ADDED
@@ -0,0 +1,762 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* SIMD (SSE1+MMX or SSE2) implementation of sin, cos, exp and log
2
+
3
+ Inspired by Intel Approximate Math library, and based on the
4
+ corresponding algorithms of the cephes math library
5
+
6
+ The default is to use the SSE1 version. If you define USE_SSE2 the
7
+ the SSE2 intrinsics will be used in place of the MMX intrinsics. Do
8
+ not expect any significant performance improvement with SSE2.
9
+ */
10
+
11
+ /* Copyright (C) 2007 Julien Pommier
12
+
13
+ This software is provided 'as-is', without any express or implied
14
+ warranty. In no event will the authors be held liable for any damages
15
+ arising from the use of this software.
16
+
17
+ Permission is granted to anyone to use this software for any purpose,
18
+ including commercial applications, and to alter it and redistribute it
19
+ freely, subject to the following restrictions:
20
+
21
+ 1. The origin of this software must not be misrepresented; you must not
22
+ claim that you wrote the original software. If you use this software
23
+ in a product, an acknowledgment in the product documentation would be
24
+ appreciated but is not required.
25
+ 2. Altered source versions must be plainly marked as such, and must not be
26
+ misrepresented as being the original software.
27
+ 3. This notice may not be removed or altered from any source distribution.
28
+
29
+ (this is the zlib license)
30
+ */
31
+
32
+ #include <xmmintrin.h>
33
+
34
+ /* yes I know, the top of this file is quite ugly */
35
+
36
+ #define USE_SSE2 // use SSE2 version
37
+
38
+ #ifdef _MSC_VER /* visual c++ */
39
+ # define ALIGN16_BEG __declspec(align(16))
40
+ # define ALIGN16_END
41
+ #else /* gcc or icc */
42
+ # define ALIGN16_BEG
43
+ # define ALIGN16_END __attribute__((aligned(16)))
44
+ #endif
45
+
46
+ /* __m128 is ugly to write */
47
+ typedef __m128 v4sf; // vector of 4 float (sse1)
48
+
49
+ #ifdef USE_SSE2
50
+ # include <emmintrin.h>
51
+ typedef __m128i v4si; // vector of 4 int (sse2)
52
+ #else
53
+ typedef __m64 v2si; // vector of 2 int (mmx)
54
+ #endif
55
+
56
+ /* declare some SSE constants -- why can't I figure a better way to do that? */
57
+ #define _PS_CONST(Name, Val) \
58
+ static const ALIGN16_BEG float _ps_##Name[4] ALIGN16_END = { Val, Val, Val, Val }
59
+ #define _PI32_CONST(Name, Val) \
60
+ static const ALIGN16_BEG int _pi32_##Name[4] ALIGN16_END = { Val, Val, Val, Val }
61
+ #define _PS_CONST_TYPE(Name, Type, Val) \
62
+ static const ALIGN16_BEG Type _ps_##Name[4] ALIGN16_END = { Val, Val, Val, Val }
63
+
64
+ _PS_CONST(1 , 1.0f);
65
+ _PS_CONST(0p5, 0.5f);
66
+ /* the smallest non denormalized float number */
67
+ _PS_CONST_TYPE(min_norm_pos, int, 0x00800000);
68
+ _PS_CONST_TYPE(mant_mask, int, 0x7f800000);
69
+ _PS_CONST_TYPE(inv_mant_mask, int, ~0x7f800000);
70
+
71
+ _PS_CONST_TYPE(sign_mask, int, 0x80000000);
72
+ _PS_CONST_TYPE(inv_sign_mask, int, ~0x80000000);
73
+
74
+ _PI32_CONST(1, 1);
75
+ _PI32_CONST(inv1, ~1);
76
+ _PI32_CONST(2, 2);
77
+ _PI32_CONST(4, 4);
78
+ _PI32_CONST(0x7f, 0x7f);
79
+
80
+ _PS_CONST(cephes_SQRTHF, 0.707106781186547524);
81
+ _PS_CONST(cephes_log_p0, 7.0376836292E-2);
82
+ _PS_CONST(cephes_log_p1, - 1.1514610310E-1);
83
+ _PS_CONST(cephes_log_p2, 1.1676998740E-1);
84
+ _PS_CONST(cephes_log_p3, - 1.2420140846E-1);
85
+ _PS_CONST(cephes_log_p4, + 1.4249322787E-1);
86
+ _PS_CONST(cephes_log_p5, - 1.6668057665E-1);
87
+ _PS_CONST(cephes_log_p6, + 2.0000714765E-1);
88
+ _PS_CONST(cephes_log_p7, - 2.4999993993E-1);
89
+ _PS_CONST(cephes_log_p8, + 3.3333331174E-1);
90
+ _PS_CONST(cephes_log_q1, -2.12194440e-4);
91
+ _PS_CONST(cephes_log_q2, 0.693359375);
92
+
93
+ #if defined (__MINGW32__)
94
+
95
+ /* the ugly part below: many versions of gcc used to be completely buggy with respect to some intrinsics
96
+ The movehl_ps is fixed in mingw 3.4.5, but I found out that all the _mm_cmp* intrinsics were completely
97
+ broken on my mingw gcc 3.4.5 ...
98
+
99
+ Note that the bug on _mm_cmp* does occur only at -O0 optimization level
100
+ */
101
+
102
+ inline __m128 my_movehl_ps(__m128 a, const __m128 b) {
103
+ asm (
104
+ "movhlps %2,%0\n\t"
105
+ : "=x" (a)
106
+ : "0" (a), "x"(b)
107
+ );
108
+ return a; }
109
+ #warning "redefined _mm_movehl_ps (see gcc bug 21179)"
110
+ #define _mm_movehl_ps my_movehl_ps
111
+
112
+ inline __m128 my_cmplt_ps(__m128 a, const __m128 b) {
113
+ asm (
114
+ "cmpltps %2,%0\n\t"
115
+ : "=x" (a)
116
+ : "0" (a), "x"(b)
117
+ );
118
+ return a;
119
+ }
120
+ inline __m128 my_cmpgt_ps(__m128 a, const __m128 b) {
121
+ asm (
122
+ "cmpnleps %2,%0\n\t"
123
+ : "=x" (a)
124
+ : "0" (a), "x"(b)
125
+ );
126
+ return a;
127
+ }
128
+ inline __m128 my_cmpeq_ps(__m128 a, const __m128 b) {
129
+ asm (
130
+ "cmpeqps %2,%0\n\t"
131
+ : "=x" (a)
132
+ : "0" (a), "x"(b)
133
+ );
134
+ return a;
135
+ }
136
+ #warning "redefined _mm_cmpxx_ps functions..."
137
+ #define _mm_cmplt_ps my_cmplt_ps
138
+ #define _mm_cmpgt_ps my_cmpgt_ps
139
+ #define _mm_cmpeq_ps my_cmpeq_ps
140
+ #endif
141
+
142
+ #ifndef USE_SSE2
143
+ typedef union xmm_mm_union {
144
+ __m128 xmm;
145
+ __m64 mm[2];
146
+ } xmm_mm_union;
147
+
148
+ #define COPY_XMM_TO_MM(xmm_, mm0_, mm1_) { \
149
+ xmm_mm_union u; u.xmm = xmm_; \
150
+ mm0_ = u.mm[0]; \
151
+ mm1_ = u.mm[1]; \
152
+ }
153
+
154
+ #define COPY_MM_TO_XMM(mm0_, mm1_, xmm_) { \
155
+ xmm_mm_union u; u.mm[0]=mm0_; u.mm[1]=mm1_; xmm_ = u.xmm; \
156
+ }
157
+
158
+ #endif // USE_SSE2
159
+
160
+ /* natural logarithm computed for 4 simultaneous float
161
+ return NaN for x <= 0
162
+ */
163
+ v4sf log_ps(v4sf x) {
164
+ #ifdef USE_SSE2
165
+ v4si emm0;
166
+ #else
167
+ v2si mm0, mm1;
168
+ #endif
169
+ v4sf one = *(v4sf*)_ps_1;
170
+
171
+ v4sf invalid_mask = _mm_cmple_ps(x, _mm_setzero_ps());
172
+
173
+ x = _mm_max_ps(x, *(v4sf*)_ps_min_norm_pos); /* cut off denormalized stuff */
174
+
175
+ #ifndef USE_SSE2
176
+ /* part 1: x = frexpf(x, &e); */
177
+ COPY_XMM_TO_MM(x, mm0, mm1);
178
+ mm0 = _mm_srli_pi32(mm0, 23);
179
+ mm1 = _mm_srli_pi32(mm1, 23);
180
+ #else
181
+ emm0 = _mm_srli_epi32(_mm_castps_si128(x), 23);
182
+ #endif
183
+ /* keep only the fractional part */
184
+ x = _mm_and_ps(x, *(v4sf*)_ps_inv_mant_mask);
185
+ x = _mm_or_ps(x, *(v4sf*)_ps_0p5);
186
+
187
+ #ifndef USE_SSE2
188
+ /* now e=mm0:mm1 contain the really base-2 exponent */
189
+ mm0 = _mm_sub_pi32(mm0, *(v2si*)_pi32_0x7f);
190
+ mm1 = _mm_sub_pi32(mm1, *(v2si*)_pi32_0x7f);
191
+ v4sf e = _mm_cvtpi32x2_ps(mm0, mm1);
192
+ _mm_empty(); /* bye bye mmx */
193
+ #else
194
+ emm0 = _mm_sub_epi32(emm0, *(v4si*)_pi32_0x7f);
195
+ v4sf e = _mm_cvtepi32_ps(emm0);
196
+ #endif
197
+
198
+ e = _mm_add_ps(e, one);
199
+
200
+ /* part2:
201
+ if( x < SQRTHF ) {
202
+ e -= 1;
203
+ x = x + x - 1.0;
204
+ } else { x = x - 1.0; }
205
+ */
206
+ v4sf mask = _mm_cmplt_ps(x, *(v4sf*)_ps_cephes_SQRTHF);
207
+ v4sf tmp = _mm_and_ps(x, mask);
208
+ x = _mm_sub_ps(x, one);
209
+ e = _mm_sub_ps(e, _mm_and_ps(one, mask));
210
+ x = _mm_add_ps(x, tmp);
211
+
212
+
213
+ v4sf z = _mm_mul_ps(x,x);
214
+
215
+ v4sf y = *(v4sf*)_ps_cephes_log_p0;
216
+ y = _mm_mul_ps(y, x);
217
+ y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p1);
218
+ y = _mm_mul_ps(y, x);
219
+ y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p2);
220
+ y = _mm_mul_ps(y, x);
221
+ y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p3);
222
+ y = _mm_mul_ps(y, x);
223
+ y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p4);
224
+ y = _mm_mul_ps(y, x);
225
+ y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p5);
226
+ y = _mm_mul_ps(y, x);
227
+ y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p6);
228
+ y = _mm_mul_ps(y, x);
229
+ y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p7);
230
+ y = _mm_mul_ps(y, x);
231
+ y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p8);
232
+ y = _mm_mul_ps(y, x);
233
+
234
+ y = _mm_mul_ps(y, z);
235
+
236
+
237
+ tmp = _mm_mul_ps(e, *(v4sf*)_ps_cephes_log_q1);
238
+ y = _mm_add_ps(y, tmp);
239
+
240
+
241
+ tmp = _mm_mul_ps(z, *(v4sf*)_ps_0p5);
242
+ y = _mm_sub_ps(y, tmp);
243
+
244
+ tmp = _mm_mul_ps(e, *(v4sf*)_ps_cephes_log_q2);
245
+ x = _mm_add_ps(x, y);
246
+ x = _mm_add_ps(x, tmp);
247
+ x = _mm_or_ps(x, invalid_mask); // negative arg will be NAN
248
+ return x;
249
+ }
250
+
251
+ _PS_CONST(exp_hi, 88.3762626647949f);
252
+ _PS_CONST(exp_lo, -88.3762626647949f);
253
+
254
+ _PS_CONST(cephes_LOG2EF, 1.44269504088896341);
255
+ _PS_CONST(cephes_exp_C1, 0.693359375);
256
+ _PS_CONST(cephes_exp_C2, -2.12194440e-4);
257
+
258
+ _PS_CONST(cephes_exp_p0, 1.9875691500E-4);
259
+ _PS_CONST(cephes_exp_p1, 1.3981999507E-3);
260
+ _PS_CONST(cephes_exp_p2, 8.3334519073E-3);
261
+ _PS_CONST(cephes_exp_p3, 4.1665795894E-2);
262
+ _PS_CONST(cephes_exp_p4, 1.6666665459E-1);
263
+ _PS_CONST(cephes_exp_p5, 5.0000001201E-1);
264
+
265
+ v4sf exp_ps(v4sf x) {
266
+ v4sf tmp = _mm_setzero_ps(), fx;
267
+ #ifdef USE_SSE2
268
+ v4si emm0;
269
+ #else
270
+ v2si mm0, mm1;
271
+ #endif
272
+ v4sf one = *(v4sf*)_ps_1;
273
+
274
+ x = _mm_min_ps(x, *(v4sf*)_ps_exp_hi);
275
+ x = _mm_max_ps(x, *(v4sf*)_ps_exp_lo);
276
+
277
+ /* express exp(x) as exp(g + n*log(2)) */
278
+ fx = _mm_mul_ps(x, *(v4sf*)_ps_cephes_LOG2EF);
279
+ fx = _mm_add_ps(fx, *(v4sf*)_ps_0p5);
280
+
281
+ /* how to perform a floorf with SSE: just below */
282
+ #ifndef USE_SSE2
283
+ /* step 1 : cast to int */
284
+ tmp = _mm_movehl_ps(tmp, fx);
285
+ mm0 = _mm_cvttps_pi32(fx);
286
+ mm1 = _mm_cvttps_pi32(tmp);
287
+ /* step 2 : cast back to float */
288
+ tmp = _mm_cvtpi32x2_ps(mm0, mm1);
289
+ #else
290
+ emm0 = _mm_cvttps_epi32(fx);
291
+ tmp = _mm_cvtepi32_ps(emm0);
292
+ #endif
293
+ /* if greater, substract 1 */
294
+ v4sf mask = _mm_cmpgt_ps(tmp, fx);
295
+ mask = _mm_and_ps(mask, one);
296
+ fx = _mm_sub_ps(tmp, mask);
297
+
298
+ tmp = _mm_mul_ps(fx, *(v4sf*)_ps_cephes_exp_C1);
299
+ v4sf z = _mm_mul_ps(fx, *(v4sf*)_ps_cephes_exp_C2);
300
+ x = _mm_sub_ps(x, tmp);
301
+ x = _mm_sub_ps(x, z);
302
+
303
+ z = _mm_mul_ps(x,x);
304
+
305
+ v4sf y = *(v4sf*)_ps_cephes_exp_p0;
306
+ y = _mm_mul_ps(y, x);
307
+ y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p1);
308
+ y = _mm_mul_ps(y, x);
309
+ y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p2);
310
+ y = _mm_mul_ps(y, x);
311
+ y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p3);
312
+ y = _mm_mul_ps(y, x);
313
+ y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p4);
314
+ y = _mm_mul_ps(y, x);
315
+ y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p5);
316
+ y = _mm_mul_ps(y, z);
317
+ y = _mm_add_ps(y, x);
318
+ y = _mm_add_ps(y, one);
319
+
320
+ /* build 2^n */
321
+ #ifndef USE_SSE2
322
+ z = _mm_movehl_ps(z, fx);
323
+ mm0 = _mm_cvttps_pi32(fx);
324
+ mm1 = _mm_cvttps_pi32(z);
325
+ mm0 = _mm_add_pi32(mm0, *(v2si*)_pi32_0x7f);
326
+ mm1 = _mm_add_pi32(mm1, *(v2si*)_pi32_0x7f);
327
+ mm0 = _mm_slli_pi32(mm0, 23);
328
+ mm1 = _mm_slli_pi32(mm1, 23);
329
+
330
+ v4sf pow2n;
331
+ COPY_MM_TO_XMM(mm0, mm1, pow2n);
332
+ _mm_empty();
333
+ #else
334
+ emm0 = _mm_cvttps_epi32(fx);
335
+ emm0 = _mm_add_epi32(emm0, *(v4si*)_pi32_0x7f);
336
+ emm0 = _mm_slli_epi32(emm0, 23);
337
+ v4sf pow2n = _mm_castsi128_ps(emm0);
338
+ #endif
339
+ y = _mm_mul_ps(y, pow2n);
340
+ return y;
341
+ }
342
+
343
+ _PS_CONST(minus_cephes_DP1, -0.78515625);
344
+ _PS_CONST(minus_cephes_DP2, -2.4187564849853515625e-4);
345
+ _PS_CONST(minus_cephes_DP3, -3.77489497744594108e-8);
346
+ _PS_CONST(sincof_p0, -1.9515295891E-4);
347
+ _PS_CONST(sincof_p1, 8.3321608736E-3);
348
+ _PS_CONST(sincof_p2, -1.6666654611E-1);
349
+ _PS_CONST(coscof_p0, 2.443315711809948E-005);
350
+ _PS_CONST(coscof_p1, -1.388731625493765E-003);
351
+ _PS_CONST(coscof_p2, 4.166664568298827E-002);
352
+ _PS_CONST(cephes_FOPI, 1.27323954473516); // 4 / M_PI
353
+
354
+
355
+ /* evaluation of 4 sines at onces, using only SSE1+MMX intrinsics so
356
+ it runs also on old athlons XPs and the pentium III of your grand
357
+ mother.
358
+
359
+ The code is the exact rewriting of the cephes sinf function.
360
+ Precision is excellent as long as x < 8192 (I did not bother to
361
+ take into account the special handling they have for greater values
362
+ -- it does not return garbage for arguments over 8192, though, but
363
+ the extra precision is missing).
364
+
365
+ Note that it is such that sinf((float)M_PI) = 8.74e-8, which is the
366
+ surprising but correct result.
367
+
368
+ Performance is also surprisingly good, 1.33 times faster than the
369
+ macos vsinf SSE2 function, and 1.5 times faster than the
370
+ __vrs4_sinf of amd's ACML (which is only available in 64 bits). Not
371
+ too bad for an SSE1 function (with no special tuning) !
372
+ However the latter libraries probably have a much better handling of NaN,
373
+ Inf, denormalized and other special arguments..
374
+
375
+ On my core 1 duo, the execution of this function takes approximately 95 cycles.
376
+
377
+ From what I have observed on the experiments with Intel AMath lib, switching to an
378
+ SSE2 version would improve the perf by only 10%.
379
+
380
+ Since it is based on SSE intrinsics, it has to be compiled at -O2 to
381
+ deliver full speed.
382
+ */
383
+ v4sf sin_ps(v4sf x) { // any x
384
+ v4sf xmm1, xmm2 = _mm_setzero_ps(), xmm3, sign_bit, y;
385
+
386
+ #ifdef USE_SSE2
387
+ v4si emm0, emm2;
388
+ #else
389
+ v2si mm0, mm1, mm2, mm3;
390
+ #endif
391
+ sign_bit = x;
392
+ /* take the absolute value */
393
+ x = _mm_and_ps(x, *(v4sf*)_ps_inv_sign_mask);
394
+ /* extract the sign bit (upper one) */
395
+ sign_bit = _mm_and_ps(sign_bit, *(v4sf*)_ps_sign_mask);
396
+
397
+ /* scale by 4/Pi */
398
+ y = _mm_mul_ps(x, *(v4sf*)_ps_cephes_FOPI);
399
+
400
+ //printf("plop:"); print4(y);
401
+ #ifdef USE_SSE2
402
+ /* store the integer part of y in mm0 */
403
+ emm2 = _mm_cvttps_epi32(y);
404
+ /* j=(j+1) & (~1) (see the cephes sources) */
405
+ emm2 = _mm_add_epi32(emm2, *(v4si*)_pi32_1);
406
+ emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_inv1);
407
+ y = _mm_cvtepi32_ps(emm2);
408
+ /* get the swap sign flag */
409
+ emm0 = _mm_and_si128(emm2, *(v4si*)_pi32_4);
410
+ emm0 = _mm_slli_epi32(emm0, 29);
411
+ /* get the polynom selection mask
412
+ there is one polynom for 0 <= x <= Pi/4
413
+ and another one for Pi/4<x<=Pi/2
414
+
415
+ Both branches will be computed.
416
+ */
417
+ emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_2);
418
+ emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
419
+
420
+ v4sf swap_sign_bit = _mm_castsi128_ps(emm0);
421
+ v4sf poly_mask = _mm_castsi128_ps(emm2);
422
+ sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit);
423
+ #else
424
+ /* store the integer part of y in mm0:mm1 */
425
+ xmm2 = _mm_movehl_ps(xmm2, y);
426
+ mm2 = _mm_cvttps_pi32(y);
427
+ mm3 = _mm_cvttps_pi32(xmm2);
428
+ /* j=(j+1) & (~1) (see the cephes sources) */
429
+ mm2 = _mm_add_pi32(mm2, *(v2si*)_pi32_1);
430
+ mm3 = _mm_add_pi32(mm3, *(v2si*)_pi32_1);
431
+ mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_inv1);
432
+ mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_inv1);
433
+ y = _mm_cvtpi32x2_ps(mm2, mm3);
434
+ /* get the swap sign flag */
435
+ mm0 = _mm_and_si64(mm2, *(v2si*)_pi32_4);
436
+ mm1 = _mm_and_si64(mm3, *(v2si*)_pi32_4);
437
+ mm0 = _mm_slli_pi32(mm0, 29);
438
+ mm1 = _mm_slli_pi32(mm1, 29);
439
+ /* get the polynom selection mask */
440
+ mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_2);
441
+ mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_2);
442
+ mm2 = _mm_cmpeq_pi32(mm2, _mm_setzero_si64());
443
+ mm3 = _mm_cmpeq_pi32(mm3, _mm_setzero_si64());
444
+ v4sf swap_sign_bit, poly_mask;
445
+ COPY_MM_TO_XMM(mm0, mm1, swap_sign_bit);
446
+ COPY_MM_TO_XMM(mm2, mm3, poly_mask);
447
+ sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit);
448
+ _mm_empty(); /* good-bye mmx */
449
+ #endif
450
+
451
+ /* The magic pass: "Extended precision modular arithmetic"
452
+ x = ((x - y * DP1) - y * DP2) - y * DP3; */
453
+ xmm1 = *(v4sf*)_ps_minus_cephes_DP1;
454
+ xmm2 = *(v4sf*)_ps_minus_cephes_DP2;
455
+ xmm3 = *(v4sf*)_ps_minus_cephes_DP3;
456
+ xmm1 = _mm_mul_ps(y, xmm1);
457
+ xmm2 = _mm_mul_ps(y, xmm2);
458
+ xmm3 = _mm_mul_ps(y, xmm3);
459
+ x = _mm_add_ps(x, xmm1);
460
+ x = _mm_add_ps(x, xmm2);
461
+ x = _mm_add_ps(x, xmm3);
462
+
463
+ /* Evaluate the first polynom (0 <= x <= Pi/4) */
464
+ y = *(v4sf*)_ps_coscof_p0;
465
+ v4sf z = _mm_mul_ps(x,x);
466
+
467
+ y = _mm_mul_ps(y, z);
468
+ y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p1);
469
+ y = _mm_mul_ps(y, z);
470
+ y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p2);
471
+ y = _mm_mul_ps(y, z);
472
+ y = _mm_mul_ps(y, z);
473
+ v4sf tmp = _mm_mul_ps(z, *(v4sf*)_ps_0p5);
474
+ y = _mm_sub_ps(y, tmp);
475
+ y = _mm_add_ps(y, *(v4sf*)_ps_1);
476
+
477
+ /* Evaluate the second polynom (Pi/4 <= x <= 0) */
478
+
479
+ v4sf y2 = *(v4sf*)_ps_sincof_p0;
480
+ y2 = _mm_mul_ps(y2, z);
481
+ y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p1);
482
+ y2 = _mm_mul_ps(y2, z);
483
+ y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p2);
484
+ y2 = _mm_mul_ps(y2, z);
485
+ y2 = _mm_mul_ps(y2, x);
486
+ y2 = _mm_add_ps(y2, x);
487
+
488
+ /* select the correct result from the two polynoms */
489
+ xmm3 = poly_mask;
490
+ y2 = _mm_and_ps(xmm3, y2); //, xmm3);
491
+ y = _mm_andnot_ps(xmm3, y);
492
+ y = _mm_add_ps(y,y2);
493
+ /* update the sign */
494
+ y = _mm_xor_ps(y, sign_bit);
495
+
496
+ return y;
497
+ }
498
+
499
+ /* almost the same as sin_ps */
500
+ v4sf cos_ps(v4sf x) { // any x
501
+ v4sf xmm1, xmm2 = _mm_setzero_ps(), xmm3, y;
502
+ #ifdef USE_SSE2
503
+ v4si emm0, emm2;
504
+ #else
505
+ v2si mm0, mm1, mm2, mm3;
506
+ #endif
507
+ /* take the absolute value */
508
+ x = _mm_and_ps(x, *(v4sf*)_ps_inv_sign_mask);
509
+
510
+ /* scale by 4/Pi */
511
+ y = _mm_mul_ps(x, *(v4sf*)_ps_cephes_FOPI);
512
+
513
+ #ifdef USE_SSE2
514
+ /* store the integer part of y in mm0 */
515
+ emm2 = _mm_cvttps_epi32(y);
516
+ /* j=(j+1) & (~1) (see the cephes sources) */
517
+ emm2 = _mm_add_epi32(emm2, *(v4si*)_pi32_1);
518
+ emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_inv1);
519
+ y = _mm_cvtepi32_ps(emm2);
520
+
521
+ emm2 = _mm_sub_epi32(emm2, *(v4si*)_pi32_2);
522
+
523
+ /* get the swap sign flag */
524
+ emm0 = _mm_andnot_si128(emm2, *(v4si*)_pi32_4);
525
+ emm0 = _mm_slli_epi32(emm0, 29);
526
+ /* get the polynom selection mask */
527
+ emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_2);
528
+ emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
529
+
530
+ v4sf sign_bit = _mm_castsi128_ps(emm0);
531
+ v4sf poly_mask = _mm_castsi128_ps(emm2);
532
+ #else
533
+ /* store the integer part of y in mm0:mm1 */
534
+ xmm2 = _mm_movehl_ps(xmm2, y);
535
+ mm2 = _mm_cvttps_pi32(y);
536
+ mm3 = _mm_cvttps_pi32(xmm2);
537
+
538
+ /* j=(j+1) & (~1) (see the cephes sources) */
539
+ mm2 = _mm_add_pi32(mm2, *(v2si*)_pi32_1);
540
+ mm3 = _mm_add_pi32(mm3, *(v2si*)_pi32_1);
541
+ mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_inv1);
542
+ mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_inv1);
543
+
544
+ y = _mm_cvtpi32x2_ps(mm2, mm3);
545
+
546
+
547
+ mm2 = _mm_sub_pi32(mm2, *(v2si*)_pi32_2);
548
+ mm3 = _mm_sub_pi32(mm3, *(v2si*)_pi32_2);
549
+
550
+ /* get the swap sign flag in mm0:mm1 and the
551
+ polynom selection mask in mm2:mm3 */
552
+
553
+ mm0 = _mm_andnot_si64(mm2, *(v2si*)_pi32_4);
554
+ mm1 = _mm_andnot_si64(mm3, *(v2si*)_pi32_4);
555
+ mm0 = _mm_slli_pi32(mm0, 29);
556
+ mm1 = _mm_slli_pi32(mm1, 29);
557
+
558
+ mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_2);
559
+ mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_2);
560
+
561
+ mm2 = _mm_cmpeq_pi32(mm2, _mm_setzero_si64());
562
+ mm3 = _mm_cmpeq_pi32(mm3, _mm_setzero_si64());
563
+
564
+ v4sf sign_bit, poly_mask;
565
+ COPY_MM_TO_XMM(mm0, mm1, sign_bit);
566
+ COPY_MM_TO_XMM(mm2, mm3, poly_mask);
567
+ _mm_empty(); /* good-bye mmx */
568
+ #endif
569
+ /* The magic pass: "Extended precision modular arithmetic"
570
+ x = ((x - y * DP1) - y * DP2) - y * DP3; */
571
+ xmm1 = *(v4sf*)_ps_minus_cephes_DP1;
572
+ xmm2 = *(v4sf*)_ps_minus_cephes_DP2;
573
+ xmm3 = *(v4sf*)_ps_minus_cephes_DP3;
574
+ xmm1 = _mm_mul_ps(y, xmm1);
575
+ xmm2 = _mm_mul_ps(y, xmm2);
576
+ xmm3 = _mm_mul_ps(y, xmm3);
577
+ x = _mm_add_ps(x, xmm1);
578
+ x = _mm_add_ps(x, xmm2);
579
+ x = _mm_add_ps(x, xmm3);
580
+
581
+ /* Evaluate the first polynom (0 <= x <= Pi/4) */
582
+ y = *(v4sf*)_ps_coscof_p0;
583
+ v4sf z = _mm_mul_ps(x,x);
584
+
585
+ y = _mm_mul_ps(y, z);
586
+ y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p1);
587
+ y = _mm_mul_ps(y, z);
588
+ y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p2);
589
+ y = _mm_mul_ps(y, z);
590
+ y = _mm_mul_ps(y, z);
591
+ v4sf tmp = _mm_mul_ps(z, *(v4sf*)_ps_0p5);
592
+ y = _mm_sub_ps(y, tmp);
593
+ y = _mm_add_ps(y, *(v4sf*)_ps_1);
594
+
595
+ /* Evaluate the second polynom (Pi/4 <= x <= 0) */
596
+
597
+ v4sf y2 = *(v4sf*)_ps_sincof_p0;
598
+ y2 = _mm_mul_ps(y2, z);
599
+ y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p1);
600
+ y2 = _mm_mul_ps(y2, z);
601
+ y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p2);
602
+ y2 = _mm_mul_ps(y2, z);
603
+ y2 = _mm_mul_ps(y2, x);
604
+ y2 = _mm_add_ps(y2, x);
605
+
606
+ /* select the correct result from the two polynoms */
607
+ xmm3 = poly_mask;
608
+ y2 = _mm_and_ps(xmm3, y2); //, xmm3);
609
+ y = _mm_andnot_ps(xmm3, y);
610
+ y = _mm_add_ps(y,y2);
611
+ /* update the sign */
612
+ y = _mm_xor_ps(y, sign_bit);
613
+
614
+ return y;
615
+ }
616
+
617
+ /* since sin_ps and cos_ps are almost identical, sincos_ps could replace both of them..
618
+ it is almost as fast, and gives you a free cosine with your sine */
619
+ void sincos_ps(v4sf x, v4sf *s, v4sf *c) {
620
+ v4sf xmm1, xmm2, xmm3 = _mm_setzero_ps(), sign_bit_sin, y;
621
+ #ifdef USE_SSE2
622
+ v4si emm0, emm2, emm4;
623
+ #else
624
+ v2si mm0, mm1, mm2, mm3, mm4, mm5;
625
+ #endif
626
+ sign_bit_sin = x;
627
+ /* take the absolute value */
628
+ x = _mm_and_ps(x, *(v4sf*)_ps_inv_sign_mask);
629
+ /* extract the sign bit (upper one) */
630
+ sign_bit_sin = _mm_and_ps(sign_bit_sin, *(v4sf*)_ps_sign_mask);
631
+
632
+ /* scale by 4/Pi */
633
+ y = _mm_mul_ps(x, *(v4sf*)_ps_cephes_FOPI);
634
+
635
+ #ifdef USE_SSE2
636
+ /* store the integer part of y in emm2 */
637
+ emm2 = _mm_cvttps_epi32(y);
638
+
639
+ /* j=(j+1) & (~1) (see the cephes sources) */
640
+ emm2 = _mm_add_epi32(emm2, *(v4si*)_pi32_1);
641
+ emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_inv1);
642
+ y = _mm_cvtepi32_ps(emm2);
643
+
644
+ emm4 = emm2;
645
+
646
+ /* get the swap sign flag for the sine */
647
+ emm0 = _mm_and_si128(emm2, *(v4si*)_pi32_4);
648
+ emm0 = _mm_slli_epi32(emm0, 29);
649
+ v4sf swap_sign_bit_sin = _mm_castsi128_ps(emm0);
650
+
651
+ /* get the polynom selection mask for the sine*/
652
+ emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_2);
653
+ emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
654
+ v4sf poly_mask = _mm_castsi128_ps(emm2);
655
+ #else
656
+ /* store the integer part of y in mm2:mm3 */
657
+ xmm3 = _mm_movehl_ps(xmm3, y);
658
+ mm2 = _mm_cvttps_pi32(y);
659
+ mm3 = _mm_cvttps_pi32(xmm3);
660
+
661
+ /* j=(j+1) & (~1) (see the cephes sources) */
662
+ mm2 = _mm_add_pi32(mm2, *(v2si*)_pi32_1);
663
+ mm3 = _mm_add_pi32(mm3, *(v2si*)_pi32_1);
664
+ mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_inv1);
665
+ mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_inv1);
666
+
667
+ y = _mm_cvtpi32x2_ps(mm2, mm3);
668
+
669
+ mm4 = mm2;
670
+ mm5 = mm3;
671
+
672
+ /* get the swap sign flag for the sine */
673
+ mm0 = _mm_and_si64(mm2, *(v2si*)_pi32_4);
674
+ mm1 = _mm_and_si64(mm3, *(v2si*)_pi32_4);
675
+ mm0 = _mm_slli_pi32(mm0, 29);
676
+ mm1 = _mm_slli_pi32(mm1, 29);
677
+ v4sf swap_sign_bit_sin;
678
+ COPY_MM_TO_XMM(mm0, mm1, swap_sign_bit_sin);
679
+
680
+ /* get the polynom selection mask for the sine */
681
+
682
+ mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_2);
683
+ mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_2);
684
+ mm2 = _mm_cmpeq_pi32(mm2, _mm_setzero_si64());
685
+ mm3 = _mm_cmpeq_pi32(mm3, _mm_setzero_si64());
686
+ v4sf poly_mask;
687
+ COPY_MM_TO_XMM(mm2, mm3, poly_mask);
688
+ #endif
689
+
690
+ /* The magic pass: "Extended precision modular arithmetic"
691
+ x = ((x - y * DP1) - y * DP2) - y * DP3; */
692
+ xmm1 = *(v4sf*)_ps_minus_cephes_DP1;
693
+ xmm2 = *(v4sf*)_ps_minus_cephes_DP2;
694
+ xmm3 = *(v4sf*)_ps_minus_cephes_DP3;
695
+ xmm1 = _mm_mul_ps(y, xmm1);
696
+ xmm2 = _mm_mul_ps(y, xmm2);
697
+ xmm3 = _mm_mul_ps(y, xmm3);
698
+ x = _mm_add_ps(x, xmm1);
699
+ x = _mm_add_ps(x, xmm2);
700
+ x = _mm_add_ps(x, xmm3);
701
+
702
+ #ifdef USE_SSE2
703
+ emm4 = _mm_sub_epi32(emm4, *(v4si*)_pi32_2);
704
+ emm4 = _mm_andnot_si128(emm4, *(v4si*)_pi32_4);
705
+ emm4 = _mm_slli_epi32(emm4, 29);
706
+ v4sf sign_bit_cos = _mm_castsi128_ps(emm4);
707
+ #else
708
+ /* get the sign flag for the cosine */
709
+ mm4 = _mm_sub_pi32(mm4, *(v2si*)_pi32_2);
710
+ mm5 = _mm_sub_pi32(mm5, *(v2si*)_pi32_2);
711
+ mm4 = _mm_andnot_si64(mm4, *(v2si*)_pi32_4);
712
+ mm5 = _mm_andnot_si64(mm5, *(v2si*)_pi32_4);
713
+ mm4 = _mm_slli_pi32(mm4, 29);
714
+ mm5 = _mm_slli_pi32(mm5, 29);
715
+ v4sf sign_bit_cos;
716
+ COPY_MM_TO_XMM(mm4, mm5, sign_bit_cos);
717
+ _mm_empty(); /* good-bye mmx */
718
+ #endif
719
+
720
+ sign_bit_sin = _mm_xor_ps(sign_bit_sin, swap_sign_bit_sin);
721
+
722
+
723
+ /* Evaluate the first polynom (0 <= x <= Pi/4) */
724
+ v4sf z = _mm_mul_ps(x,x);
725
+ y = *(v4sf*)_ps_coscof_p0;
726
+
727
+ y = _mm_mul_ps(y, z);
728
+ y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p1);
729
+ y = _mm_mul_ps(y, z);
730
+ y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p2);
731
+ y = _mm_mul_ps(y, z);
732
+ y = _mm_mul_ps(y, z);
733
+ v4sf tmp = _mm_mul_ps(z, *(v4sf*)_ps_0p5);
734
+ y = _mm_sub_ps(y, tmp);
735
+ y = _mm_add_ps(y, *(v4sf*)_ps_1);
736
+
737
+ /* Evaluate the second polynom (Pi/4 <= x <= 0) */
738
+
739
+ v4sf y2 = *(v4sf*)_ps_sincof_p0;
740
+ y2 = _mm_mul_ps(y2, z);
741
+ y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p1);
742
+ y2 = _mm_mul_ps(y2, z);
743
+ y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p2);
744
+ y2 = _mm_mul_ps(y2, z);
745
+ y2 = _mm_mul_ps(y2, x);
746
+ y2 = _mm_add_ps(y2, x);
747
+
748
+ /* select the correct result from the two polynoms */
749
+ xmm3 = poly_mask;
750
+ v4sf ysin2 = _mm_and_ps(xmm3, y2);
751
+ v4sf ysin1 = _mm_andnot_ps(xmm3, y);
752
+ y2 = _mm_sub_ps(y2,ysin2);
753
+ y = _mm_sub_ps(y, ysin1);
754
+
755
+ xmm1 = _mm_add_ps(ysin1,ysin2);
756
+ xmm2 = _mm_add_ps(y,y2);
757
+
758
+ /* update the sign */
759
+ *s = _mm_xor_ps(xmm1, sign_bit_sin);
760
+ *c = _mm_xor_ps(xmm2, sign_bit_cos);
761
+ }
762
+
stc.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+
3
+ import sys
4
+ import os.path
5
+ import math
6
+ import random
7
+ import struct
8
+ import hashlib
9
+ from PIL import Image
10
+ from ctypes import *
11
+ from Crypto.Cipher import AES
12
+ from Crypto.Random import get_random_bytes
13
+ from Crypto.Util.Padding import pad, unpad
14
+
15
+ def prepare_message(filename, password):
16
+
17
+ f = open(filename, 'r')
18
+ content_data = f.read().encode('utf-8')
19
+
20
+ # Prepare a header with basic data about the message
21
+ content_ver=struct.pack("B", 1) # version 1
22
+ content_len=struct.pack("!I", len(content_data))
23
+ content=content_ver+content_len+content_data
24
+
25
+ # encrypt
26
+ enc = encrypt(content, password)
27
+
28
+ array=[]
29
+ for b in enc:
30
+ for i in range(8):
31
+ array.append((b >> i) & 1)
32
+ return array
33
+
34
+
35
+ # {{{ encrypt()
36
+ def encrypt(plain_text, password):
37
+
38
+ salt = get_random_bytes(AES.block_size)
39
+
40
+ # use the Scrypt KDF to get a private key from the password
41
+ private_key = hashlib.scrypt(
42
+ password.encode(), salt=salt, n=2**14, r=8, p=1, dklen=32)
43
+
44
+ cipher = AES.new(private_key, AES.MODE_CBC)
45
+ cipher_text = cipher.encrypt(pad(plain_text, AES.block_size))
46
+ enc = salt+cipher.iv+cipher_text
47
+
48
+ return enc
49
+ # }}}
50
+
51
+ # {{{ decrypt()
52
+ def decrypt(cipher_text, password):
53
+
54
+ salt = cipher_text[:AES.block_size]
55
+ iv = cipher_text[AES.block_size:AES.block_size*2]
56
+ cipher_text = cipher_text[AES.block_size*2:]
57
+
58
+ # Fix padding
59
+ mxlen = len(cipher_text)-(len(cipher_text)%AES.block_size)
60
+ cipher_text = cipher_text[:mxlen]
61
+
62
+ private_key = hashlib.scrypt(
63
+ password.encode(), salt=salt, n=2**14, r=8, p=1, dklen=32)
64
+
65
+ cipher = AES.new(private_key, AES.MODE_CBC, iv=iv)
66
+ decrypted = cipher.decrypt(cipher_text)
67
+ #decrypted = unpad(decrypted, AES.block_size)
68
+
69
+ return decrypted
70
+ # }}}
71
+
72
+
73
+
74
+
75
+ def embed(input_img_path, cost_matrix, msg_file_path, password, output_img_path, payload=0.40):
76
+
77
+ me = os.path.abspath(os.path.dirname(__file__))
78
+ lib = cdll.LoadLibrary(os.path.join(me, "lib", "stc.so"))
79
+
80
+ # Prepare cover image
81
+ im=Image.open(input_img_path)
82
+ if im.mode in ['L']:
83
+ width, height = im.size
84
+ if im.mode in ['RGB', 'RGBA', 'RGBX']:
85
+ pass
86
+ I = im.load()
87
+ cover = (c_int*(width*height))()
88
+ idx=0
89
+ for j in range(height):
90
+ for i in range(width):
91
+ cover[idx] = I[i, j]
92
+ idx += 1
93
+
94
+
95
+ # Prepare costs
96
+ INF = 2**31-1
97
+ costs = (c_float*(width*height*3))()
98
+ idx=0
99
+ for j in range(height):
100
+ for i in range(width):
101
+ if cover[idx]==0:
102
+ costs[3*idx+0] = INF
103
+ costs[3*idx+1] = 0
104
+ costs[3*idx+2] = cost_matrix[i, j]
105
+ elif cover[idx]==255:
106
+ costs[3*idx+0] = cost_matrix[i, j]
107
+ costs[3*idx+1] = 0
108
+ costs[3*idx+2] = INF
109
+ else:
110
+ costs[3*idx+0] = cost_matrix[i, j]
111
+ costs[3*idx+1] = 0
112
+ costs[3*idx+2] = cost_matrix[i, j]
113
+ idx += 1
114
+
115
+ # Prepare message
116
+ msg_bits = prepare_message(msg_file_path, password)
117
+ if len(msg_bits)>width*height*payload:
118
+ print("Message too long")
119
+ sys.exit(0)
120
+ m = int(width*height*payload)
121
+ message = (c_ubyte*m)()
122
+ for i in range(m):
123
+ if i<len(msg_bits):
124
+ message[i] = msg_bits[i]
125
+ else:
126
+ message[i] = 0
127
+ # Hide message
128
+ stego = (c_int*(width*height))()
129
+ a = lib.stc_hide(width*height, cover, costs, m, message, stego)
130
+
131
+ # Save output message
132
+ idx=0
133
+ for j in range(height):
134
+ for i in range(width):
135
+ im.putpixel((i, j), stego[idx])
136
+ idx += 1
137
+ im.save(output_img_path)
138
+ im.close()
139
+
140
+
141
+
142
+ def extract(stego_img_path, password, output_msg_path, payload=0.40):
143
+
144
+ me = os.path.abspath(os.path.dirname(__file__))
145
+ lib = cdll.LoadLibrary(os.path.join(me, "lib", "stc.so"))
146
+
147
+ # Prepare stego image
148
+ im=Image.open(stego_img_path)
149
+ if im.mode in ['L']:
150
+ width, height = im.size
151
+ if im.mode in ['RGB', 'RGBA', 'RGBX']:
152
+ pass
153
+ I = im.load()
154
+ stego = (c_int*(width*height))()
155
+ idx=0
156
+ for j in range(height):
157
+ for i in range(width):
158
+ stego[idx] = I[i, j]
159
+ idx += 1
160
+
161
+ # Extract the message
162
+ n = width*height;
163
+ m = int(n*payload)
164
+ extracted_message = (c_ubyte*m)()
165
+ s = lib.stc_unhide(n, stego, m, extracted_message)
166
+
167
+ # Save the message
168
+ enc = bytearray()
169
+ idx=0
170
+ bitidx=0
171
+ bitval=0
172
+ for b in extracted_message:
173
+ if bitidx==8:
174
+ enc.append(bitval)
175
+ bitidx=0
176
+ bitval=0
177
+ bitval |= b<<bitidx
178
+ bitidx+=1
179
+ if bitidx==8:
180
+ enc.append(bitval)
181
+
182
+ # decrypt
183
+ cleartext = decrypt(enc, password)
184
+
185
+ # Extract the header and the message
186
+ content_ver=struct.unpack_from("B", cleartext, 0)
187
+ content_len=struct.unpack_from("!I", cleartext, 1)
188
+ content=cleartext[5:content_len[0]+5]
189
+
190
+ f = open(output_msg_path, 'w')
191
+ f.write(content.decode())
192
+ f.close()
stc_embed_c.cpp ADDED
@@ -0,0 +1,476 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <cstdlib>
2
+ #include <cstring>
3
+ #include <cmath>
4
+ #include <cfloat>
5
+ #include <limits>
6
+ #include <emmintrin.h>
7
+ #include <cstdio>
8
+ #include <sstream>
9
+ #include "stc_embed_c.h"
10
+
11
+ // {{{ aligned_malloc()
12
+ void *aligned_malloc( unsigned int bytes, int align ) {
13
+ int shift;
14
+ char *temp = (char *) malloc( bytes + align );
15
+
16
+ if ( temp == NULL ) return temp;
17
+ shift = align - (int) (((unsigned long long) temp) & (align - 1));
18
+ temp = temp + shift;
19
+ temp[-1] = shift;
20
+ return (void *) temp;
21
+ }
22
+ // }}}
23
+
24
+ // {{{ aligned_free()
25
+ void aligned_free( void *vptr ) {
26
+ char *ptr = (char *) vptr;
27
+ free( ptr - ptr[-1] );
28
+ return;
29
+ }
30
+ // }}}
31
+
32
+ // {{{ maxLessThan255()
33
+ inline __m128i maxLessThan255( const __m128i v1, const __m128i v2 ) {
34
+ register __m128i mask = _mm_set1_epi32( 0xffffffff );
35
+ return _mm_max_epu8( _mm_andnot_si128( _mm_cmpeq_epi8( v1, mask ), v1 ), _mm_andnot_si128( _mm_cmpeq_epi8( v2, mask ), v2 ) );
36
+ }
37
+ // }}}
38
+
39
+ // {{{ max16B()
40
+ inline u8 max16B( __m128i maxp ) {
41
+ u8 mtemp[4];
42
+ maxp = _mm_max_epu8( maxp, _mm_srli_si128(maxp, 8) );
43
+ maxp = _mm_max_epu8( maxp, _mm_srli_si128(maxp, 4) );
44
+ *((int*) mtemp) = _mm_cvtsi128_si32( maxp );
45
+ if ( mtemp[2] > mtemp[0] ) mtemp[0] = mtemp[2];
46
+ if ( mtemp[3] > mtemp[1] ) mtemp[1] = mtemp[3];
47
+ if ( mtemp[1] > mtemp[0] ) return mtemp[1];
48
+ else return mtemp[0];
49
+ }
50
+ // }}}
51
+
52
+ // {{{ min16B()
53
+ inline u8 min16B( __m128i minp ) {
54
+ u8 mtemp[4];
55
+ minp = _mm_min_epu8( minp, _mm_srli_si128(minp, 8) );
56
+ minp = _mm_min_epu8( minp, _mm_srli_si128(minp, 4) );
57
+ *((int*) mtemp) = _mm_cvtsi128_si32( minp );
58
+ if ( mtemp[2] < mtemp[0] ) mtemp[0] = mtemp[2];
59
+ if ( mtemp[3] < mtemp[1] ) mtemp[1] = mtemp[3];
60
+ if ( mtemp[1] < mtemp[0] ) return mtemp[1];
61
+ else return mtemp[0];
62
+ }
63
+ // }}}
64
+
65
+ // {{{ stc_embed()
66
+ double stc_embed( const u8 *vector, int vectorlength, const u8 *syndrome, int syndromelength, const void *pricevectorv, bool usefloat,
67
+ u8 *stego, int matrixheight ) {
68
+ int height, i, k, l, index, index2, parts, m, sseheight, altm, pathindex;
69
+ u32 column, colmask, state;
70
+ double totalprice;
71
+
72
+ u8 *ssedone;
73
+ u32 *path, *columns[2];
74
+ int *matrices, *widths;
75
+
76
+ if ( matrixheight > 31 ) throw stc_exception( "Submatrix height must not exceed 31.", 1 );
77
+
78
+ height = 1 << matrixheight;
79
+ colmask = height - 1;
80
+ height = (height + 31) & (~31);
81
+
82
+ parts = height >> 5;
83
+
84
+ if ( stego != NULL ) {
85
+ path = (u32*) malloc( vectorlength * parts * sizeof(u32) );
86
+ if ( path == NULL ) {
87
+ std::stringstream ss;
88
+ ss << "Not enough memory (" << (unsigned int) (vectorlength * parts * sizeof(u32)) << " byte array could not be allocated).";
89
+ throw stc_exception( ss.str(), 2 );
90
+ }
91
+ pathindex = 0;
92
+ }
93
+
94
+ {
95
+ int shorter, longer, worm;
96
+ double invalpha;
97
+
98
+ matrices = (int *) malloc( syndromelength * sizeof(int) );
99
+ widths = (int *) malloc( syndromelength * sizeof(int) );
100
+
101
+ invalpha = (double) vectorlength / syndromelength;
102
+ if ( invalpha < 1 ) {
103
+ free( matrices );
104
+ free( widths );
105
+ if ( stego != NULL ) free( path );
106
+ throw stc_exception( "The message cannot be longer than the cover object.", 3 );
107
+ }
108
+ /* THIS IS OBSOLETE. Algorithm still works for alpha >1/2. You need to take care of cases with too many Infs in cost vector.
109
+ if(invalpha < 2) {
110
+ printf("The relative payload is greater than 1/2. This may result in poor embedding efficiency.\n");
111
+ }
112
+ */
113
+ shorter = (int) floor( invalpha );
114
+ longer = (int) ceil( invalpha );
115
+ if ( (columns[0] = getMatrix( shorter, matrixheight )) == NULL ) {
116
+ free( matrices );
117
+ free( widths );
118
+ if ( stego != NULL ) free( path );
119
+ return -1;
120
+ }
121
+ if ( (columns[1] = getMatrix( longer, matrixheight )) == NULL ) {
122
+ free( columns[0] );
123
+ free( matrices );
124
+ free( widths );
125
+ if ( stego != NULL ) free( path );
126
+ return -1;
127
+ }
128
+ worm = 0;
129
+ for ( i = 0; i < syndromelength; i++ ) {
130
+ if ( worm + longer <= (i + 1) * invalpha + 0.5 ) {
131
+ matrices[i] = 1;
132
+ widths[i] = longer;
133
+ worm += longer;
134
+ } else {
135
+ matrices[i] = 0;
136
+ widths[i] = shorter;
137
+ worm += shorter;
138
+ }
139
+ }
140
+ }
141
+
142
+ if ( usefloat ) {
143
+ /*
144
+ SSE FLOAT VERSION
145
+ */
146
+ int pathindex8 = 0;
147
+ int shift[2] = { 0, 4 };
148
+ u8 mask[2] = { 0xf0, 0x0f };
149
+ float *prices;
150
+ u8 *path8 = (u8*) path;
151
+ double *pricevector = (double*) pricevectorv;
152
+ double total = 0;
153
+ float inf = std::numeric_limits< float >::infinity();
154
+
155
+ sseheight = height >> 2;
156
+ ssedone = (u8*) malloc( sseheight * sizeof(u8) );
157
+ prices = (float*) aligned_malloc( height * sizeof(float), 16 );
158
+
159
+ {
160
+ __m128 fillval = _mm_set1_ps( inf );
161
+ for ( i = 0; i < height; i += 4 ) {
162
+ _mm_store_ps( &prices[i], fillval );
163
+ ssedone[i >> 2] = 0;
164
+ }
165
+ }
166
+
167
+ prices[0] = 0.0f;
168
+
169
+ for ( index = 0, index2 = 0; index2 < syndromelength; index2++ ) {
170
+ register __m128 c1, c2;
171
+
172
+ for ( k = 0; k < widths[index2]; k++, index++ ) {
173
+ column = columns[matrices[index2]][k] & colmask;
174
+
175
+ if ( vector[index] == 0 ) {
176
+ c1 = _mm_setzero_ps();
177
+ c2 = _mm_set1_ps( (float) pricevector[index] );
178
+ } else {
179
+ c1 = _mm_set1_ps( (float) pricevector[index] );
180
+ c2 = _mm_setzero_ps();
181
+ }
182
+
183
+ total += pricevector[index];
184
+
185
+ for ( m = 0; m < sseheight; m++ ) {
186
+ if ( !ssedone[m] ) {
187
+ register __m128 v1, v2, v3, v4;
188
+ altm = (m ^ (column >> 2));
189
+ v1 = _mm_load_ps( &prices[m << 2] );
190
+ v2 = _mm_load_ps( &prices[altm << 2] );
191
+ v3 = v1;
192
+ v4 = v2;
193
+ ssedone[m] = 1;
194
+ ssedone[altm] = 1;
195
+ switch ( column & 3 ) {
196
+ case 0:
197
+ break;
198
+ case 1:
199
+ v2 = _mm_shuffle_ps(v2, v2, 0xb1);
200
+ v3 = _mm_shuffle_ps(v3, v3, 0xb1);
201
+ break;
202
+ case 2:
203
+ v2 = _mm_shuffle_ps(v2, v2, 0x4e);
204
+ v3 = _mm_shuffle_ps(v3, v3, 0x4e);
205
+ break;
206
+ case 3:
207
+ v2 = _mm_shuffle_ps(v2, v2, 0x1b);
208
+ v3 = _mm_shuffle_ps(v3, v3, 0x1b);
209
+ break;
210
+ }
211
+ v1 = _mm_add_ps( v1, c1 );
212
+ v2 = _mm_add_ps( v2, c2 );
213
+ v3 = _mm_add_ps( v3, c2 );
214
+ v4 = _mm_add_ps( v4, c1 );
215
+
216
+ v1 = _mm_min_ps( v1, v2 );
217
+ v4 = _mm_min_ps( v3, v4 );
218
+
219
+ _mm_store_ps( &prices[m << 2], v1 );
220
+ _mm_store_ps( &prices[altm << 2], v4 );
221
+
222
+ if ( stego != NULL ) {
223
+ v2 = _mm_cmpeq_ps( v1, v2 );
224
+ v3 = _mm_cmpeq_ps( v3, v4 );
225
+ path8[pathindex8 + (m >> 1)] = (path8[pathindex8 + (m >> 1)] & mask[m & 1]) | (_mm_movemask_ps( v2 ) << shift[m
226
+ & 1]);
227
+ path8[pathindex8 + (altm >> 1)] = (path8[pathindex8 + (altm >> 1)] & mask[altm & 1]) | (_mm_movemask_ps( v3 )
228
+ << shift[altm & 1]);
229
+ }
230
+ }
231
+ }
232
+
233
+ for ( i = 0; i < sseheight; i++ ) {
234
+ ssedone[i] = 0;
235
+ }
236
+
237
+ pathindex += parts;
238
+ pathindex8 += parts << 2;
239
+ }
240
+
241
+ if ( syndrome[index2] == 0 ) {
242
+ for ( i = 0, l = 0; i < sseheight; i += 2, l += 4 ) {
243
+ _mm_store_ps( &prices[l], _mm_shuffle_ps(_mm_load_ps(&prices[i << 2]), _mm_load_ps(&prices[(i + 1) << 2]), 0x88) );
244
+ }
245
+ } else {
246
+ for ( i = 0, l = 0; i < sseheight; i += 2, l += 4 ) {
247
+ _mm_store_ps( &prices[l], _mm_shuffle_ps(_mm_load_ps(&prices[i << 2]), _mm_load_ps(&prices[(i + 1) << 2]), 0xdd) );
248
+ }
249
+ }
250
+
251
+ if ( syndromelength - index2 <= matrixheight ) colmask >>= 1;
252
+
253
+ {
254
+ register __m128 fillval = _mm_set1_ps( inf );
255
+ for ( l >>= 2; l < sseheight; l++ ) {
256
+ _mm_store_ps( &prices[l << 2], fillval );
257
+ }
258
+ }
259
+ }
260
+
261
+ totalprice = prices[0];
262
+
263
+ aligned_free( prices );
264
+ free( ssedone );
265
+
266
+ if ( totalprice >= total ) {
267
+ free( matrices );
268
+ free( widths );
269
+ free( columns[0] );
270
+ free( columns[1] );
271
+ if ( stego != NULL ) free( path );
272
+ throw stc_exception( "No solution exist.", 4 );
273
+ }
274
+ } else {
275
+ /*
276
+ SSE UINT8 VERSION
277
+ */
278
+ int pathindex16 = 0, subprice = 0;
279
+ u8 maxc = 0, minc = 0;
280
+ u8 *prices, *pricevector = (u8*) pricevectorv;
281
+ u16 *path16 = (u16 *) path;
282
+ __m128i *prices16B;
283
+
284
+ sseheight = height >> 4;
285
+ ssedone = (u8*) malloc( sseheight * sizeof(u8) );
286
+ prices = (u8*) aligned_malloc( height * sizeof(u8), 16 );
287
+ prices16B = (__m128i *) prices;
288
+
289
+ {
290
+ __m128i napln = _mm_set1_epi32( 0xffffffff );
291
+ for ( i = 0; i < sseheight; i++ ) {
292
+ _mm_store_si128( &prices16B[i], napln );
293
+ ssedone[i] = 0;
294
+ }
295
+ }
296
+
297
+ prices[0] = 0;
298
+
299
+ for ( index = 0, index2 = 0; index2 < syndromelength; index2++ ) {
300
+ register __m128i c1, c2, maxp, minp;
301
+
302
+ if ( (u32) maxc + pricevector[index] >= 254 ) {
303
+ aligned_free( path );
304
+ free( ssedone );
305
+ free( matrices );
306
+ free( widths );
307
+ free( columns[0] );
308
+ free( columns[1] );
309
+ if ( stego != NULL ) free( path );
310
+ throw stc_exception( "Price vector limit exceeded.", 5 );
311
+ }
312
+
313
+ for ( k = 0; k < widths[index2]; k++, index++ ) {
314
+ column = columns[matrices[index2]][k] & colmask;
315
+
316
+ if ( vector[index] == 0 ) {
317
+ c1 = _mm_setzero_si128();
318
+ c2 = _mm_set1_epi8( pricevector[index] );
319
+ } else {
320
+ c1 = _mm_set1_epi8( pricevector[index] );
321
+ c2 = _mm_setzero_si128();
322
+ }
323
+
324
+ minp = _mm_set1_epi8( -1 );
325
+ maxp = _mm_setzero_si128();
326
+
327
+ for ( m = 0; m < sseheight; m++ ) {
328
+ if ( !ssedone[m] ) {
329
+ register __m128i v1, v2, v3, v4;
330
+ altm = (m ^ (column >> 4));
331
+ v1 = _mm_load_si128( &prices16B[m] );
332
+ v2 = _mm_load_si128( &prices16B[altm] );
333
+ v3 = v1;
334
+ v4 = v2;
335
+ ssedone[m] = 1;
336
+ ssedone[altm] = 1;
337
+ if ( column & 8 ) {
338
+ v2 = _mm_shuffle_epi32(v2, 0x4e);
339
+ v3 = _mm_shuffle_epi32(v3, 0x4e);
340
+ }
341
+ if ( column & 4 ) {
342
+ v2 = _mm_shuffle_epi32(v2, 0xb1);
343
+ v3 = _mm_shuffle_epi32(v3, 0xb1);
344
+ }
345
+ if ( column & 2 ) {
346
+ v2 = _mm_shufflehi_epi16(v2, 0xb1);
347
+ v3 = _mm_shufflehi_epi16(v3, 0xb1);
348
+ v2 = _mm_shufflelo_epi16(v2, 0xb1);
349
+ v3 = _mm_shufflelo_epi16(v3, 0xb1);
350
+ }
351
+ if ( column & 1 ) {
352
+ v2 = _mm_or_si128( _mm_srli_epi16( v2, 8 ), _mm_slli_epi16( v2, 8 ) );
353
+ v3 = _mm_or_si128( _mm_srli_epi16( v3, 8 ), _mm_slli_epi16( v3, 8 ) );
354
+ }
355
+ v1 = _mm_adds_epu8( v1, c1 );
356
+ v2 = _mm_adds_epu8( v2, c2 );
357
+ v3 = _mm_adds_epu8( v3, c2 );
358
+ v4 = _mm_adds_epu8( v4, c1 );
359
+
360
+ v1 = _mm_min_epu8( v1, v2 );
361
+ v4 = _mm_min_epu8( v3, v4 );
362
+
363
+ _mm_store_si128( &prices16B[m], v1 );
364
+ _mm_store_si128( &prices16B[altm], v4 );
365
+
366
+ minp = _mm_min_epu8( minp, _mm_min_epu8( v1, v4 ) );
367
+ maxp = _mm_max_epu8( maxp, maxLessThan255( v1, v4 ) );
368
+
369
+ if ( stego != NULL ) {
370
+ v2 = _mm_cmpeq_epi8( v1, v2 );
371
+ v3 = _mm_cmpeq_epi8( v3, v4 );
372
+ path16[pathindex16 + m] = (u16) _mm_movemask_epi8( v2 );
373
+ path16[pathindex16 + altm] = (u16) _mm_movemask_epi8( v3 );
374
+ }
375
+ }
376
+ }
377
+
378
+ maxc = max16B( maxp );
379
+ minc = min16B( minp );
380
+
381
+ maxc -= minc;
382
+ subprice += minc;
383
+ {
384
+ register __m128i mask = _mm_set1_epi32( 0xffffffff );
385
+ register __m128i m = _mm_set1_epi8( minc );
386
+ for ( i = 0; i < sseheight; i++ ) {
387
+ register __m128i res;
388
+ register __m128i pr = prices16B[i];
389
+ res = _mm_andnot_si128( _mm_cmpeq_epi8( pr, mask ), m );
390
+ prices16B[i] = _mm_sub_epi8( pr, res );
391
+ ssedone[i] = 0;
392
+ }
393
+ }
394
+
395
+ pathindex += parts;
396
+ pathindex16 += parts << 1;
397
+ }
398
+
399
+ {
400
+ register __m128i mask = _mm_set1_epi32( 0x00ff00ff );
401
+
402
+ if ( minc == 255 ) {
403
+ aligned_free( path );
404
+ free( ssedone );
405
+ free( matrices );
406
+ free( widths );
407
+ free( columns[0] );
408
+ free( columns[1] );
409
+ if ( stego != NULL ) free( path );
410
+ throw stc_exception( "The syndrome is not in the syndrome matrix range.", 4 );
411
+ }
412
+
413
+ if ( syndrome[index2] == 0 ) {
414
+ for ( i = 0, l = 0; i < sseheight; i += 2, l++ ) {
415
+ _mm_store_si128( &prices16B[l], _mm_packus_epi16( _mm_and_si128( _mm_load_si128( &prices16B[i] ), mask ),
416
+ _mm_and_si128( _mm_load_si128( &prices16B[i + 1] ), mask ) ) );
417
+ }
418
+ } else {
419
+ for ( i = 0, l = 0; i < sseheight; i += 2, l++ ) {
420
+ _mm_store_si128( &prices16B[l], _mm_packus_epi16( _mm_and_si128( _mm_srli_si128(_mm_load_si128(&prices16B[i]), 1),
421
+ mask ), _mm_and_si128( _mm_srli_si128(_mm_load_si128(&prices16B[i + 1]), 1), mask ) ) );
422
+ }
423
+ }
424
+
425
+ if ( syndromelength - index2 <= matrixheight ) colmask >>= 1;
426
+
427
+ register __m128i fillval = _mm_set1_epi32( 0xffffffff );
428
+ for ( ; l < sseheight; l++ )
429
+ _mm_store_si128( &prices16B[l], fillval );
430
+ }
431
+ }
432
+
433
+ totalprice = subprice + prices[0];
434
+
435
+ aligned_free( prices );
436
+ free( ssedone );
437
+ }
438
+
439
+ if ( stego != NULL ) {
440
+ pathindex -= parts;
441
+ index--;
442
+ index2--;
443
+ state = 0;
444
+
445
+ // unused
446
+ // int h = syndromelength;
447
+ state = 0;
448
+ colmask = 0;
449
+ for ( ; index2 >= 0; index2-- ) {
450
+ for ( k = widths[index2] - 1; k >= 0; k--, index-- ) {
451
+ if ( k == widths[index2] - 1 ) {
452
+ state = (state << 1) | syndrome[index2];
453
+ if ( syndromelength - index2 <= matrixheight ) colmask = (colmask << 1) | 1;
454
+ }
455
+
456
+ if ( path[pathindex + (state >> 5)] & (1 << (state & 31)) ) {
457
+ stego[index] = 1;
458
+ state = state ^ (columns[matrices[index2]][k] & colmask);
459
+ } else {
460
+ stego[index] = 0;
461
+ }
462
+
463
+ pathindex -= parts;
464
+ }
465
+ }
466
+ free( path );
467
+ }
468
+
469
+ free( matrices );
470
+ free( widths );
471
+ free( columns[0] );
472
+ free( columns[1] );
473
+
474
+ return totalprice;
475
+ }
476
+ // }}}
stc_embed_c.h ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #ifndef STC_EMBED_C_H
2
+ #define STC_EMBED_C_H
3
+
4
+ #include "common.h"
5
+ /* Inputs:
6
+ cover - the binary cover vector
7
+ coverlength - length of the cover vector
8
+ message - the binary message to be hidden
9
+ messagelength - length of the message
10
+ profile - the vector of distortion weights (either double if usedouble = true, or u8 id usedouble = false)
11
+ usedouble - true = use double precision weight, false = use u8 weights
12
+ stego - pointer to an array of length 'coverlength' to receive the stego message; this parameter can be NULL
13
+ constr_height - the constraint height of the matrix; the higher, the better the efficiency but the greater the embedding time
14
+
15
+ Return value:
16
+ On success, the function returns the total distortion introduced by the embedding.
17
+ On error, the function returns -1.
18
+ */
19
+
20
+ double stc_embed(const u8 *cover, int coverlength, const u8 *message, int messagelength, const void *profile, bool usedouble, u8 *stego, int constr_height = 10);
21
+
22
+ #endif
stc_extract_c.cpp ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <cstdlib>
2
+ #include <cstring>
3
+ #include <cmath>
4
+ #include <cstdio>
5
+ #include "stc_extract_c.h"
6
+
7
+ // {{{ stc_extract()
8
+ int stc_extract(const u8 *vector, int vectorlength, u8 *message, int syndromelength, int matrixheight)
9
+ {
10
+ int i, j, k, index, index2, base, height;
11
+
12
+ u8 *binmat[2];
13
+ int *matrices, *widths;
14
+
15
+ height = matrixheight;
16
+
17
+ if(matrixheight > 31) {
18
+ fprintf(stderr, "Submatrix height must not exceed 31.");
19
+ return -1;
20
+ }
21
+
22
+ {
23
+ double invalpha;
24
+ int shorter, longer, worm;
25
+ u32 *columns[2];
26
+
27
+ matrices = (int *)malloc(syndromelength * sizeof(int));
28
+ widths = (int *)malloc(syndromelength * sizeof(int));
29
+
30
+ invalpha = (double)vectorlength / syndromelength;
31
+ if(invalpha < 1) {
32
+ fprintf(stderr, "The message cannot be longer than the cover object.\n");
33
+ return -1;
34
+ }
35
+ shorter = (int)floor(invalpha);
36
+ longer = (int)ceil(invalpha);
37
+ if((columns[0] = getMatrix(shorter, matrixheight)) == NULL) {
38
+ free(widths);
39
+ free(matrices);
40
+ return -1;
41
+ }
42
+ if((columns[1] = getMatrix(longer, matrixheight)) == NULL) {
43
+ free(columns[0]);
44
+ free(widths);
45
+ free(matrices);
46
+ return -1;
47
+ }
48
+ worm = 0;
49
+ for(i = 0; i < syndromelength; i++) {
50
+ if(worm + longer <= (i + 1) * invalpha + 0.5) {
51
+ matrices[i] = 1;
52
+ widths[i] = longer;
53
+ worm += longer;
54
+ } else {
55
+ matrices[i] = 0;
56
+ widths[i] = shorter;
57
+ worm += shorter;
58
+ }
59
+ }
60
+ binmat[0] = (u8*)malloc(shorter * matrixheight * sizeof(u8));
61
+ binmat[1] = (u8*)malloc(longer * matrixheight * sizeof(u8));
62
+ for(i = 0, index = 0; i < shorter; i++) {
63
+ for(j = 0; j < matrixheight; j++, index++) {
64
+ binmat[0][index] = (columns[0][i] & (1 << j)) ? 1 : 0;
65
+ }
66
+ }
67
+ for(i = 0, index = 0; i < longer; i++) {
68
+ for(j = 0; j < matrixheight; j++, index++) {
69
+ binmat[1][index] = (columns[1][i] & (1 << j)) ? 1 : 0;
70
+ }
71
+ }
72
+ free(columns[0]);
73
+ free(columns[1]);
74
+ }
75
+
76
+ for(i = 0; i < syndromelength; i++) {
77
+ message[i] = 0;
78
+ }
79
+
80
+ for(index = 0, index2 = 0; index2 < syndromelength; index2++) {
81
+ for(k = 0, base = 0; k < widths[index2]; k++, index++, base += matrixheight) {
82
+ if(vector[index]) {
83
+ for(i = 0; i < height; i++) {
84
+ message[index2 + i] ^= binmat[matrices[index2]][base + i];
85
+ }
86
+ }
87
+ }
88
+ if(syndromelength - index2 <= matrixheight)
89
+ height--;
90
+ }
91
+
92
+ free(matrices);
93
+ free(widths);
94
+ free(binmat[0]);
95
+ free(binmat[1]);
96
+
97
+ return 0;
98
+ }
99
+ // }}}
100
+
101
+
stc_extract_c.h ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #ifndef STC_EXTRACT_C_H
2
+ #define STC_EXTRACT_C_H
3
+
4
+ #include "common.h"
5
+
6
+ /* Inputs:
7
+ stego - the binary stego vector
8
+ stegolength - the length of the stego vector
9
+ message - pointer to an array of legth 'messagelength' to receive the extracted message
10
+ messagelegth - the length of the embedded message
11
+ constr_height - the constraint height of the matrix used for embedding the message
12
+
13
+ Return values:
14
+ 0 on succes, -1 on error
15
+ */
16
+
17
+ int stc_extract(const u8 *stego, int stegolength, u8 *message, int messagelength, int constr_height = 10);
18
+
19
+ #endif
stc_interface.cpp ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <iostream>
2
+ #include <iomanip>
3
+ #include <cmath>
4
+ #include <cstdlib>
5
+ #include <ctime>
6
+
7
+ #include "stc_ml_c.h"
8
+ #include "stc_interface.h"
9
+
10
+
11
+ uint h = 10; // constraint height of STC code
12
+
13
+ int stc_hide(uint cover_length, int* cover, float* costs,
14
+ uint message_length, u8* message, int* stego) {
15
+
16
+ const uint n = cover_length;
17
+ uint m = message_length;
18
+
19
+ // if the message cannot be embedded due to large amount of
20
+ // wet pixels, then try again with smaller message. Try at most 10 times.
21
+ uint trials = 10;
22
+
23
+ //std::cout << "message_length: " << message_length << std::endl;
24
+ unsigned int* num_msg_bits = new unsigned int[2];
25
+ float dist = stc_pm1_pls_embed(n, cover, costs, m, message, h, 2147483647, stego, num_msg_bits, trials, 0);
26
+ //std::cout << "hide -->" << num_msg_bits[0] << ", " << num_msg_bits[1] << std::endl;
27
+ delete[] num_msg_bits;
28
+
29
+ return 0;
30
+ }
31
+
32
+
33
+ int stc_unhide(uint stego_length, int* stego,
34
+ uint message_length, u8* message) {
35
+
36
+ unsigned int* num_msg_bits = new unsigned int[2];
37
+ num_msg_bits[1] = (uint) (message_length/2);
38
+ num_msg_bits[0] = message_length-num_msg_bits[1];
39
+
40
+ //std::cout << "message_length: " << message_length << std::endl;
41
+ //std::cout << "unhide -->" << num_msg_bits[0] << ", " << num_msg_bits[1] << std::endl;
42
+
43
+ stc_ml_extract(stego_length, stego, 2, num_msg_bits, h, message);
44
+
45
+ return 0;
46
+ }
47
+
48
+
stc_interface.h ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ #ifndef STC_INTERFACE_H
3
+ #define STC_INTERFACE_H
4
+
5
+ extern "C" {
6
+ int stc_hide(uint cover_length, int* cover, float* costs,
7
+ uint message_length, u8* message, int* stego);
8
+
9
+ int stc_unhide(uint stego_length, int* stego,
10
+ uint message_length, u8* message);
11
+ }
12
+
13
+ #endif
stc_ml_c.cpp ADDED
@@ -0,0 +1,932 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include "stc_ml_c.h"
2
+
3
+ #include <xmmintrin.h>
4
+ #include <cmath>
5
+ #include <limits>
6
+ #include <algorithm>
7
+ #include <sstream>
8
+ #include <fstream>
9
+ #include <iomanip>
10
+ #include <string.h> // due to memcpy
11
+
12
+
13
+ #include <boost/random/uniform_int.hpp> // this is required for Marsene-Twister random number generator
14
+ #include <boost/random/variate_generator.hpp>
15
+ #include <boost/random/mersenne_twister.hpp>
16
+
17
+
18
+ #include "stc_embed_c.h"
19
+ #include "stc_extract_c.h"
20
+ #include "sse_mathfun.h" // library with optimized functions obtained from http://gruntthepeon.free.fr/ssemath/
21
+
22
+ // {{{ write_vector_to_file()
23
+ template< class T > void write_vector_to_file( uint n, T *ptr, const char* file_name ) {
24
+
25
+ std::ofstream f( file_name );
26
+ for ( uint i = 0; i < n; i++ )
27
+ f << std::left << std::setw( 20 ) << i << std::left << std::setw( 20 ) << ptr[i] << std::endl;
28
+ f.close();
29
+ }
30
+ // }}}
31
+
32
+ // {{{ write_matrix_to_file()
33
+ // write column-ordered matrix into file
34
+ template< class T > void write_matrix_to_file( uint rows, uint columns, T *ptr, const char* file_name ) {
35
+
36
+ std::ofstream f( file_name );
37
+ for ( uint i = 0; i < rows; i++ ) {
38
+ f << std::left << std::setw( 20 ) << i;
39
+ for ( uint j = 0; j < columns; j++ )
40
+ f << std::left << std::setw( 20 ) << ptr[j * rows + i];
41
+ f << std::endl;
42
+ }
43
+ f.close();
44
+ }
45
+ // }}}
46
+
47
+ // {{{ align_*()
48
+ // Templates to handle aligned version of new and delete operators.
49
+ // These functions are necessary for creating arrays aligned address of certain multiples, such as 16.
50
+ template< class T > T* align_new( unsigned int n, unsigned int align_size ) {
51
+ char *ptr, *ptr2, *aligned_ptr;
52
+ int align_mask = align_size - 1;
53
+
54
+ ptr = new char[n * sizeof(T) + align_size + sizeof(int)];
55
+ if ( ptr == 0 ) return 0;
56
+
57
+ ptr2 = ptr + sizeof(int);
58
+ aligned_ptr = ptr2 + (align_size - ((size_t) ptr2 & align_mask));
59
+
60
+ ptr2 = aligned_ptr - sizeof(int);
61
+ *((int*) ptr2) = (int) (aligned_ptr - ptr);
62
+
63
+ return (T*) aligned_ptr;
64
+ }
65
+
66
+ template< class T > void align_delete( T *ptr ) {
67
+ int *ptr2 = (int*) ptr - 1;
68
+ char *p;
69
+
70
+ p = (char*) ptr;
71
+ p -= *ptr2;
72
+ delete[] p;
73
+ }
74
+ // }}}
75
+
76
+ // {{{ randperm()
77
+ /* Generates random permutation of length n based on the MT random number generator with seed 'seed'. */
78
+ void randperm( uint n, uint seed, uint* perm ) {
79
+
80
+ boost::mt19937 *generator = new boost::mt19937( seed );
81
+ boost::variate_generator< boost::mt19937, boost::uniform_int< > > *randi = new boost::variate_generator< boost::mt19937,
82
+ boost::uniform_int< > >( *generator, boost::uniform_int< >( 0, INT_MAX ) );
83
+
84
+ // generate random permutation - this is used to shuffle cover pixels to randomize the effect of different neighboring pixels
85
+ for ( uint i = 0; i < n; i++ )
86
+ perm[i] = i;
87
+ for ( uint i = 0; i < n; i++ ) {
88
+ uint j = (*randi)() % (n - i);
89
+ uint tmp = perm[i];
90
+ perm[i] = perm[i + j];
91
+ perm[i + j] = tmp;
92
+ }
93
+
94
+ delete generator;
95
+ delete randi;
96
+ }
97
+ // }}}
98
+
99
+ // {{{ sum_inplace()
100
+ inline float sum_inplace( __m128 x ) {
101
+ float y;
102
+ // add all 4 terms from x together
103
+ x = _mm_add_ps( x, _mm_shuffle_ps(x,x,_MM_SHUFFLE(1,0,3,2)) );
104
+ x = _mm_add_ps( x, _mm_shuffle_ps(x,x,_MM_SHUFFLE(2,3,0,1)) );
105
+ _mm_store_ss( &y, x );
106
+ return y;
107
+ }
108
+ // }}}
109
+
110
+ // {{{ calc_entropy()
111
+ float calc_entropy( uint n, uint k, float* costs, float lambda ) {
112
+
113
+ float const LOG2 = log( 2.0 );
114
+ __m128 inf = _mm_set1_ps( F_INF );
115
+ __m128 v_lambda = _mm_set1_ps( -lambda );
116
+ __m128 z, d, rho, p, entr, mask;
117
+
118
+ entr = _mm_setzero_ps();
119
+ for ( uint i = 0; i < n / 4; i++ ) {
120
+ z = _mm_setzero_ps();
121
+ d = _mm_setzero_ps();
122
+ for ( uint j = 0; j < k; j++ ) {
123
+ rho = _mm_load_ps( costs + j * n + 4 * i ); // costs array must be aligned in memory
124
+ p = exp_ps( _mm_mul_ps( v_lambda, rho ) );
125
+ z = _mm_add_ps( z, p );
126
+
127
+ mask = _mm_cmpeq_ps( rho, inf ); // if p<eps, then do not accumulate it to d since x*exp(-x) tends to zero
128
+ p = _mm_mul_ps( rho, p );
129
+ p = _mm_andnot_ps( mask, p ); // apply mask
130
+ d = _mm_add_ps( d, p );
131
+ }
132
+ entr = _mm_sub_ps( entr, _mm_div_ps( _mm_mul_ps( v_lambda, d ), z ) );
133
+ entr = _mm_add_ps( entr, log_ps( z ) );
134
+ }
135
+ return sum_inplace( entr ) / LOG2;
136
+ }
137
+ // }}}
138
+
139
+ // {{{ get_lambda_entropy()
140
+ float get_lambda_entropy( uint n, uint k, float *costs, float payload, float initial_lambda = 10 ) {
141
+
142
+ float p1, p2, p3, lambda1, lambda2, lambda3;
143
+ int j = 0;
144
+ uint iterations = 0;
145
+
146
+ lambda1 = 0;
147
+ p1 = n * log( (float)k ) / log( 2.0f );
148
+ lambda3 = initial_lambda;
149
+ p3 = payload + 1; // this is just an initial value
150
+ lambda2 = initial_lambda;
151
+ while ( p3 > payload ) {
152
+ lambda3 *= 2;
153
+ p3 = calc_entropy( n, k, costs, lambda3 );
154
+ j++;
155
+ iterations++;
156
+ // beta is probably unbounded => it seems that we cannot find beta such that
157
+ // relative payload will be smaller than requested. Binary search does not make sence here.
158
+ if ( j > 10 ) {
159
+ return lambda3;
160
+ }
161
+ }
162
+ while ( (p1 - p3) / n > payload / n * 1e-2 ) { // binary search for parameter lambda
163
+ lambda2 = lambda1 + (lambda3 - lambda1) / 2;
164
+ p2 = calc_entropy( n, k, costs, lambda2 );
165
+ if ( p2 < payload ) {
166
+ lambda3 = lambda2;
167
+ p3 = p2;
168
+ } else {
169
+ lambda1 = lambda2;
170
+ p1 = p2;
171
+ }
172
+ iterations++; // this is for monitoring the number of iterations
173
+ }
174
+ return lambda1 + (lambda3 - lambda1) / 2;
175
+ }
176
+ // }}}
177
+
178
+ // {{{ calc_distortion()
179
+ float calc_distortion( uint n, uint k, float* costs, float lambda ) {
180
+
181
+ __m128 eps = _mm_set1_ps( std::numeric_limits< float >::epsilon() );
182
+ __m128 v_lambda = _mm_set1_ps( -lambda );
183
+ __m128 z, d, rho, p, dist, mask;
184
+
185
+ dist = _mm_setzero_ps();
186
+ for ( uint i = 0; i < n / 4; i++ ) { // n must be multiple of 4
187
+ z = _mm_setzero_ps();
188
+ d = _mm_setzero_ps();
189
+ for ( uint j = 0; j < k; j++ ) {
190
+ rho = _mm_load_ps( costs + j * n + 4 * i ); // costs array must be aligned in memory
191
+ p = exp_ps( _mm_mul_ps( v_lambda, rho ) );
192
+ z = _mm_add_ps( z, p );
193
+ mask = _mm_cmplt_ps( p, eps ); // if p<eps, then do not accumulate it to d since x*exp(-x) tends to zero
194
+ p = _mm_mul_ps( rho, p );
195
+ p = _mm_andnot_ps( mask, p );
196
+ d = _mm_add_ps( d, p );
197
+ }
198
+ dist = _mm_add_ps( dist, _mm_div_ps( d, z ) );
199
+ }
200
+ return sum_inplace( dist );
201
+ }
202
+ // }}}
203
+
204
+ // {{{ get_lambda_distortion()
205
+ float get_lambda_distortion( uint n, uint k, float *costs, float distortion, float initial_lambda = 10, float precision = 1e-3,
206
+ uint iter_limit = 30 ) {
207
+
208
+ float dist1, dist2, dist3, lambda1, lambda2, lambda3;
209
+ int j = 0;
210
+ uint iterations = 0;
211
+
212
+ lambda1 = 0;
213
+ dist1 = calc_distortion( n, k, costs, lambda1 );
214
+ lambda3 = initial_lambda;
215
+ dist2 = F_INF; // this is just an initial value
216
+ lambda2 = initial_lambda;
217
+ dist3 = distortion + 1;
218
+ while ( dist3 > distortion ) {
219
+ lambda3 *= 2;
220
+ dist3 = calc_distortion( n, k, costs, lambda3 );
221
+ j++;
222
+ iterations++;
223
+ // beta is probably unbounded => it seems that we cannot find beta such that
224
+ // relative payload will be smaller than requested. Binary search cannot converge.
225
+ if ( j > 10 ) {
226
+ return lambda3;
227
+ }
228
+ }
229
+ while ( (fabs( dist2 - distortion ) / n > precision) && (iterations < iter_limit) ) { // binary search for parameter lambda
230
+ lambda2 = lambda1 + (lambda3 - lambda1) / 2;
231
+ dist2 = calc_distortion( n, k, costs, lambda2 );
232
+ if ( dist2 < distortion ) {
233
+ lambda3 = lambda2;
234
+ dist3 = dist2;
235
+ } else {
236
+ lambda1 = lambda2;
237
+ dist1 = dist2;
238
+ }
239
+ iterations++; // this is for monitoring the number of iterations
240
+ }
241
+ return lambda1 + (lambda3 - lambda1) / 2;
242
+ }
243
+ // }}}
244
+
245
+ // {{{ binary_entropy_array()
246
+ float binary_entropy_array( uint n, float *prob ) {
247
+
248
+ float h = 0;
249
+ float const LOG2 = log( 2.0 );
250
+ float const EPS = std::numeric_limits< float >::epsilon();
251
+
252
+ for ( uint i = 0; i < n; i++ )
253
+ if ( (prob[i] > EPS) && (1 - prob[i] > EPS) ) h -= prob[i] * log( prob[i] ) + (1 - prob[i]) * log( 1 - prob[i] );
254
+
255
+ return h / LOG2;
256
+ }
257
+ // }}}
258
+
259
+ // {{{ entropy_array()
260
+ float entropy_array( uint n, float* prob ) {
261
+
262
+ double h = 0;
263
+ double const LOG2 = log( 2.0 );
264
+ double const EPS = std::numeric_limits< double >::epsilon();
265
+
266
+ for ( uint i = 0; i < n; i++ )
267
+ if ( prob[i] > EPS ) h -= prob[i] * log( prob[i] );
268
+
269
+ return h / LOG2;
270
+ }
271
+ // }}}
272
+
273
+ // {{{ mod()
274
+ inline uint mod( int x, int m ) {
275
+ int tmp = x - (x / m) * m + m;
276
+ return tmp % m;
277
+ }
278
+ // }}}
279
+
280
+
281
+
282
+ /* EMBEDDING ALGORITHMS */
283
+
284
+ // {{{ stc_embed_trial()
285
+ void stc_embed_trial( uint n, float* cover_bit_prob0, u8* message, uint stc_constraint_height, uint &num_msg_bits, uint* perm, u8* stego,
286
+ uint &trial, uint max_trials, const char* debugging_file = "cost.txt" ) {
287
+
288
+ bool success = false;
289
+ u8* cover = new u8[n];
290
+ double* cost = new double[n];
291
+ while ( !success ) {
292
+ randperm( n, num_msg_bits, perm );
293
+ for ( uint i = 0; i < n; i++ ) {
294
+ cover[perm[i]] = (cover_bit_prob0[i] < 0.5) ? 1 : 0;
295
+ cost[perm[i]] = -log( (1 / std::max( cover_bit_prob0[i], 1 - cover_bit_prob0[i] )) - 1 );
296
+ if ( cost[perm[i]] != cost[perm[i]] ) // if p20[i]>1 due to numerical error (this is possible due to float data type)
297
+ cost[perm[i]] = D_INF; // then cost2[i] is NaN, it should be Inf
298
+ }
299
+ memcpy( stego, cover, n ); // initialize stego array by cover array
300
+ // debugging
301
+ // write_vector_to_file<double>(n, cost, debugging_file);
302
+ try {
303
+ if ( num_msg_bits != 0 ) stc_embed( cover, n, message, num_msg_bits, (void*) cost, true, stego, stc_constraint_height );
304
+ success = true;
305
+ } catch ( stc_exception& e ) {
306
+ if ( e.error_id != 4 ) { // error_id=4 means No solution exists, thus we try to embed with different permutation.
307
+ delete[] cost;
308
+ delete[] cover;
309
+ throw e;
310
+ }
311
+ num_msg_bits--; // by decreasing the number of bits, we change the permutation used to shuffle the bits
312
+ trial++;
313
+ if ( trial > max_trials ) {
314
+ delete[] cost;
315
+ delete[] cover;
316
+ throw stc_exception( "Maximum number of trials in layered construction exceeded (2).", 6 );
317
+ }
318
+ }
319
+ }
320
+ delete[] cost;
321
+ delete[] cover;
322
+ }
323
+ // }}}
324
+
325
+ // {{{ check_costs()
326
+ // SANITY CHECKS for cost arrays
327
+ void check_costs( uint n, uint k, float *costs ) {
328
+
329
+ bool test_nan, test_non_inf, test_minus_inf;
330
+ for ( uint i = 0; i < n; i++ ) {
331
+ test_nan = false; // Is any element NaN? Should be FALSE
332
+ test_non_inf = false; // Is any element finite? Should be TRUE
333
+ test_minus_inf = false; // Is any element minus Inf? should be FALSE
334
+ for ( uint j = 0; j < k; j++ ) {
335
+ test_nan |= (costs[k * i + j] != costs[k * i + j]);
336
+ test_non_inf |= ((costs[k * i + j] != -F_INF) & (costs[k * i + j] != F_INF));
337
+ test_minus_inf |= (costs[k * i + j] == -F_INF);
338
+ }
339
+ if ( test_nan ) {
340
+ std::stringstream ss;
341
+ ss << "Incorrect cost array." << i << "-th element contains NaN value. This is not a valid cost.";
342
+ throw stc_exception( ss.str(), 6 );
343
+ }
344
+ if ( !test_non_inf ) {
345
+ std::stringstream ss;
346
+ ss << "Incorrect cost array." << i << "-th element does not contain any finite cost value. This is not a valid cost.";
347
+ throw stc_exception( ss.str(), 6 );
348
+ }
349
+ if ( test_minus_inf ) {
350
+ std::stringstream ss;
351
+ ss << "Incorrect cost array." << i << "-th element contains -Inf value. This is not a valid cost.";
352
+ throw stc_exception( ss.str(), 6 );
353
+ }
354
+ }
355
+ }
356
+ // }}}
357
+
358
+ // {{{ stc_pm1_pls_embed()
359
+ // MULTI-LAYERED EMBEDDING for plus/minus one changes
360
+ // payload limited case - returns distortion
361
+ float stc_pm1_pls_embed( uint cover_length, int* cover, float* costs, uint message_length, u8* message, // input variables
362
+ uint stc_constraint_height, float wet_cost, // other input parameters
363
+ int* stego, uint* num_msg_bits, uint &max_trials, float* coding_loss ) { // output variables
364
+
365
+ return stc_pm1_dls_embed( cover_length, cover, costs, message_length, message, F_INF, stc_constraint_height, 0, wet_cost, stego,
366
+ num_msg_bits, max_trials, coding_loss );
367
+ }
368
+ // }}}
369
+
370
+ // {{{ stc_pm1_dls_embed()
371
+ // distortion limited case - returns distortion
372
+ float stc_pm1_dls_embed( uint cover_length, int* cover, float* costs, uint message_length, u8* message, float target_distortion, // input variables
373
+ uint stc_constraint_height, float expected_coding_loss, float wet_cost, // other input parameters
374
+ int* stego, uint* num_msg_bits, uint &max_trials, float* coding_loss ) { // output variables
375
+
376
+ check_costs( cover_length, 3, costs );
377
+ float dist = 0;
378
+
379
+ int *stego_values = new int[4 * cover_length];
380
+ float *costs_ml2 = new float[4 * cover_length];
381
+ for ( uint i = 0; i < cover_length; i++ ) {
382
+ costs_ml2[4 * i + mod( (cover[i] - 1 + 4), 4 )] = costs[3 * i + 0]; // set cost of changing by -1
383
+ stego_values[4 * i + mod( (cover[i] - 1 + 4), 4 )] = cover[i] - 1;
384
+ costs_ml2[4 * i + mod( (cover[i] + 0 + 4), 4 )] = costs[3 * i + 1]; // set cost of changing by 0
385
+ stego_values[4 * i + mod( (cover[i] + 0 + 4), 4 )] = cover[i];
386
+ costs_ml2[4 * i + mod( (cover[i] + 1 + 4), 4 )] = costs[3 * i + 2]; // set cost of changing by +1
387
+ stego_values[4 * i + mod( (cover[i] + 1 + 4), 4 )] = cover[i] + 1;
388
+ costs_ml2[4 * i + mod( (cover[i] + 2 + 4), 4 )] = wet_cost; // set cost of changing by +2
389
+ stego_values[4 * i + mod( (cover[i] + 2 + 4), 4 )] = cover[i] + 2;
390
+ }
391
+
392
+ // run general 2 layered embedding in distortion limited regime
393
+ dist = stc_ml2_embed( cover_length, costs_ml2, stego_values, message_length, message, target_distortion, stc_constraint_height,
394
+ expected_coding_loss, stego, num_msg_bits, max_trials, coding_loss );
395
+ delete[] costs_ml2;
396
+ delete[] stego_values;
397
+
398
+ return dist;
399
+ }
400
+ // }}}
401
+
402
+ // {{{ stc_pm2_dls_embed()
403
+ // MULTI-LAYERED EMBEDDING for plus/minus one and two changes
404
+ // payload limited case - returns distortion
405
+ float stc_pm2_pls_embed( uint cover_length, int* cover, float* costs, uint message_length, u8* message, // input variables
406
+ uint stc_constraint_height, float wet_cost, // other input parameters
407
+ int* stego, uint* num_msg_bits, uint &max_trials, float* coding_loss ) { // output variables
408
+
409
+ return stc_pm2_dls_embed( cover_length, cover, costs, message_length, message, F_INF, stc_constraint_height, 0, wet_cost, stego,
410
+ num_msg_bits, max_trials, coding_loss );
411
+ }
412
+ // }}}
413
+
414
+ // {{{ stc_pm2_dls_embed()
415
+ // distortion limited case - returns distortion
416
+ float stc_pm2_dls_embed( uint cover_length, int* cover, float* costs, uint message_length, u8* message, float target_distortion, // input variables
417
+ uint stc_constraint_height, float expected_coding_loss, float wet_cost, // other input parameters
418
+ int* stego, uint* num_msg_bits, uint &max_trials, float* coding_loss ) { // output variables
419
+
420
+ check_costs( cover_length, 5, costs );
421
+ int *stego_values = new int[8 * cover_length];
422
+ float* costs_ml3 = new float[8 * cover_length];
423
+ std::fill_n( costs_ml3, 8 * cover_length, wet_cost ); // initialize new cost array
424
+
425
+ for ( uint i = 0; i < cover_length; i++ ) {
426
+ costs_ml3[8 * i + mod( (cover[i] - 2 + 8), 8 )] = costs[5 * i + 0]; // set cost of changing by -2
427
+ stego_values[8 * i + mod( (cover[i] - 2 + 8), 8 )] = cover[i] - 2;
428
+ costs_ml3[8 * i + mod( (cover[i] - 1 + 8), 8 )] = costs[5 * i + 1]; // set cost of changing by -1
429
+ stego_values[8 * i + mod( (cover[i] - 1 + 8), 8 )] = cover[i] - 1;
430
+ costs_ml3[8 * i + mod( (cover[i] + 0 + 8), 8 )] = costs[5 * i + 2]; // set cost of changing by 0
431
+ stego_values[8 * i + mod( (cover[i] + 0 + 8), 8 )] = cover[i] + 0;
432
+ costs_ml3[8 * i + mod( (cover[i] + 1 + 8), 8 )] = costs[5 * i + 3]; // set cost of changing by +1
433
+ stego_values[8 * i + mod( (cover[i] + 1 + 8), 8 )] = cover[i] + 1;
434
+ costs_ml3[8 * i + mod( (cover[i] + 2 + 8), 8 )] = costs[5 * i + 4]; // set cost of changing by +2
435
+ stego_values[8 * i + mod( (cover[i] + 2 + 8), 8 )] = cover[i] + 2;
436
+ stego_values[8 * i + mod( (cover[i] + 3 + 8), 8 )] = cover[i] + 3; // these values are not used and are defined
437
+ stego_values[8 * i + mod( (cover[i] + 4 + 8), 8 )] = cover[i] + 4; // just to have the array complete
438
+ stego_values[8 * i + mod( (cover[i] + 5 + 8), 8 )] = cover[i] + 5; //
439
+ }
440
+
441
+ // run general 3 layered embedding in distortion limited regime
442
+ float dist = stc_ml3_embed( cover_length, costs_ml3, stego_values, message_length, message, target_distortion, stc_constraint_height,
443
+ expected_coding_loss, stego, num_msg_bits, max_trials, coding_loss );
444
+ delete[] costs_ml3;
445
+ delete[] stego_values;
446
+
447
+ return dist;
448
+ }
449
+ // }}}
450
+
451
+ // GENERAL MULTI-LAYERED EMBEDDING
452
+
453
+ // {{{ stc_ml1_embed()
454
+ // algorithm for embedding into 1 layer, both payload- and distortion-limited case
455
+ float stc_ml1_embed( uint cover_length, int* cover, short* direction, float* costs, uint message_length, u8* message,
456
+ float target_distortion,// input variables
457
+ uint stc_constraint_height, float expected_coding_loss, // other input parameters
458
+ int* stego, uint* num_msg_bits, uint &max_trials, float* coding_loss ) { // output variables
459
+
460
+ float distortion, lambda = 0, m_max = 0;
461
+ bool success = false;
462
+ uint m_actual = 0;
463
+ uint n = cover_length + 4 - (cover_length % 4); // cover length rounded to multiple of 4
464
+ uint *perm1 = new uint[n];
465
+
466
+ float* c = align_new< float > ( 2 * n, 16 );
467
+ std::fill_n( c, 2 * n, F_INF );
468
+ std::fill_n( c, n, 0 );
469
+ for ( uint i = 0; i < cover_length; i++ ) { // copy and transpose data for better reading via SSE instructions
470
+ c[mod( cover[i], 2 ) * n + i] = 0; // cost of not changing the element
471
+ c[mod( (cover[i] + 1), 2 ) * n + i] = costs[i]; // cost of changing the element
472
+ }
473
+
474
+ if ( target_distortion != F_INF ) { // distortion-limited sender
475
+ lambda = get_lambda_distortion( n, 2, c, target_distortion, 2 ); //
476
+ m_max = (1 - expected_coding_loss) * calc_entropy( n, 2, c, lambda ); //
477
+ m_actual = std::min( message_length, (uint) floor( m_max ) ); //
478
+ }
479
+ if ( (target_distortion == F_INF) || (m_actual < floor( m_max )) ) { // payload-limited sender
480
+ m_actual = std::min( cover_length, message_length ); // or distortion-limited sender with
481
+ }
482
+
483
+ /* SINGLE LAYER OF 1ST LSBs */
484
+ num_msg_bits[0] = m_actual;
485
+ uint trial = 0;
486
+ u8* cover1 = new u8[cover_length];
487
+ double* cost1 = new double[cover_length];
488
+ u8* stego1 = new u8[cover_length];
489
+ while ( !success ) {
490
+ randperm( cover_length, num_msg_bits[0], perm1 );
491
+ for ( uint i = 0; i < cover_length; i++ ) {
492
+ cover1[perm1[i]] = mod( cover[i], 2 );
493
+ cost1[perm1[i]] = costs[i];
494
+ if ( cost1[perm1[i]] != cost1[perm1[i]] ) cost1[perm1[i]] = D_INF;
495
+ }
496
+ memcpy( stego1, cover1, cover_length ); // initialize stego array by cover array
497
+ // debugging
498
+ // write_vector_to_file<double>(n, cost, debugging_file);
499
+ try {
500
+ if ( num_msg_bits[0] != 0 ) stc_embed( cover1, cover_length, message, num_msg_bits[0], (void*) cost1, true, stego1,
501
+ stc_constraint_height );
502
+ success = true;
503
+ } catch ( stc_exception& e ) {
504
+ if ( e.error_id != 4 ) { // error_id=4 means No solution exists, thus we try to embed with different permutation.
505
+ delete[] cost1;
506
+ delete[] cover1;
507
+ delete[] stego1;
508
+ delete[] perm1;
509
+ delete[] c;
510
+ throw e;
511
+ }
512
+ num_msg_bits[0]--; // by decreasing the number of bits, we change the permutation used to shuffle the bits
513
+ trial++;
514
+ if ( trial > max_trials ) {
515
+ delete[] cost1;
516
+ delete[] cover1;
517
+ delete[] stego1;
518
+ delete[] perm1;
519
+ delete[] c;
520
+ throw stc_exception( "Maximum number of trials in layered construction exceeded (1).", 6 );
521
+ }
522
+ }
523
+ }
524
+
525
+ /* FINAL CALCULATIONS */
526
+ distortion = 0;
527
+ for ( uint i = 0; i < cover_length; i++ ) {
528
+ stego[i] = (stego1[perm1[i]] == cover1[perm1[i]]) ? cover[i] : cover[i] + direction[i];
529
+ distortion += (stego1[perm1[i]] == cover1[perm1[i]]) ? 0 : costs[i];
530
+ }
531
+ if ( coding_loss != 0 ) {
532
+ float lambda_dist = get_lambda_distortion( n, 2, c, distortion, lambda, 0, 20 ); // use 20 iterations to make lambda_dist precise
533
+ float max_payload = calc_entropy( n, 2, c, lambda_dist );
534
+ (*coding_loss) = (max_payload - m_actual) / max_payload; // fraction of max_payload lost due to practical coding scheme
535
+ }
536
+ max_trials = trial;
537
+
538
+ delete[] cost1;
539
+ delete[] cover1;
540
+ delete[] stego1;
541
+ delete[] perm1;
542
+ align_delete< float > ( c );
543
+
544
+ return distortion;
545
+ }
546
+ // }}}
547
+
548
+ // {{{ stc_ml2_embed()
549
+ // algorithm for embedding into 2 layers with possibility to use only 1 layer, both payload- and distortion-limited cases
550
+ float stc_ml2_embed( uint cover_length, float* costs, int* stego_values, uint message_length, u8* message, float target_distortion, // input variables
551
+ uint stc_constraint_height, float expected_coding_loss, // other input parameters
552
+ int* stego, uint* num_msg_bits, uint &max_trials, float* coding_loss ) { // output and optional variables
553
+
554
+ float distortion, dist_coding_loss, lambda = 0, m_max = 0;
555
+ uint m_actual = 0;
556
+ uint n = cover_length + 4 - (cover_length % 4); // cover length rounded to multiple of 4
557
+
558
+ check_costs( cover_length, 4, costs );
559
+ // if only binary embedding is sufficient, then use only 1st LSB layer
560
+ bool lsb1_only = true;
561
+ for ( uint i = 0; i < cover_length; i++ ) {
562
+ uint n_finite_costs = 0; // number of finite cost values
563
+ uint lsb_xor = 0;
564
+ for ( uint k = 0; k < 4; k++ )
565
+ if ( costs[4 * i + k] != F_INF ) {
566
+ n_finite_costs++;
567
+ lsb_xor ^= (k % 2);
568
+ }
569
+ lsb1_only &= ((n_finite_costs <= 2) & (lsb_xor == 1));
570
+ }
571
+ if ( lsb1_only ) { // use stc_ml1_embed method
572
+ distortion = 0;
573
+ int *cover = new int[cover_length];
574
+ short *direction = new short[cover_length];
575
+ float *costs_ml1 = new float[cover_length];
576
+ for ( uint i = 0; i < cover_length; i++ ) { // normalize such that minimal element is 0 - this helps numerical stability
577
+ uint min_id = 0;
578
+ float f_min = F_INF;
579
+ for ( uint j = 0; j < 4; j++ )
580
+ if ( f_min > costs[4 * i + j] ) {
581
+ f_min = costs[4 * i + j]; // minimum value
582
+ min_id = j; // index of the minimal entry
583
+ }
584
+ costs_ml1[i] = F_INF;
585
+ cover[i] = stego_values[4 * i + min_id];
586
+ for ( uint j = 0; j < 4; j++ )
587
+ if ( (costs[4 * i + j] != F_INF) && (min_id != j) ) {
588
+ distortion += f_min;
589
+ costs_ml1[i] = costs[4 * i + j] - f_min;
590
+ direction[i] = stego_values[4 * i + j] - cover[i];
591
+ }
592
+ }
593
+
594
+ distortion += stc_ml1_embed( cover_length, cover, direction, costs_ml1, message_length, message, target_distortion,
595
+ stc_constraint_height, expected_coding_loss, stego, num_msg_bits, max_trials, coding_loss );
596
+ delete[] direction;
597
+ delete[] costs_ml1;
598
+ delete[] cover;
599
+ return distortion;
600
+ }
601
+
602
+ // copy and transpose data for faster reading via SSE instructions
603
+ float* c = align_new< float > ( 4 * n, 16 );
604
+ std::fill_n( c, 4 * n, F_INF );
605
+ std::fill_n( c, n, 0 );
606
+ for ( uint i = 0; i < 4 * cover_length; i++ )
607
+ c[n * (i % 4) + i / 4] = costs[i];
608
+ // write_matrix_to_file<float>(n, 4, c, "cost_ml2.txt");
609
+ for ( uint i = 0; i < n; i++ ) { // normalize such that minimal element is 0 - this helps numerical stability
610
+ float f_min = F_INF;
611
+ for ( uint j = 0; j < 4; j++ )
612
+ f_min = std::min( f_min, c[j * n + i] );
613
+ for ( uint j = 0; j < 4; j++ )
614
+ c[j * n + i] -= f_min;
615
+ }
616
+
617
+ if ( target_distortion != F_INF ) {
618
+ lambda = get_lambda_distortion( n, 4, c, target_distortion, 2 );
619
+ m_max = (1 - expected_coding_loss) * calc_entropy( n, 4, c, lambda );
620
+ m_actual = std::min( message_length, (uint) floor( m_max ) );
621
+ }
622
+ if ( (target_distortion == F_INF) || (m_actual < floor( m_max )) ) {
623
+ m_actual = std::min( 2 * cover_length, message_length );
624
+ lambda = get_lambda_entropy( n, 4, c, m_actual, 2 );
625
+ }
626
+ /*
627
+ p = exp(-lambda*costs);
628
+ p = p./(ones(4,1)*sum(p));
629
+ */
630
+ float* p = align_new< float > ( 4 * n, 16 );
631
+ __m128 v_lambda = _mm_set1_ps( -lambda );
632
+ for ( uint i = 0; i < n / 4; i++ ) {
633
+ __m128 sum = _mm_setzero_ps();
634
+ for ( uint j = 0; j < 4; j++ ) {
635
+ __m128 x = _mm_load_ps( c + j * n + 4 * i );
636
+ x = exp_ps( _mm_mul_ps( v_lambda, x ) );
637
+ _mm_store_ps( p + j * n + 4 * i, x );
638
+ sum = _mm_add_ps( sum, x );
639
+ }
640
+ for ( uint j = 0; j < 4; j++ ) {
641
+ __m128 x = _mm_load_ps( p + j * n + 4 * i );
642
+ x = _mm_div_ps( x, sum );
643
+ _mm_store_ps( p + j * n + 4 * i, x );
644
+ }
645
+ }
646
+ // this is for debugging purposes
647
+ // float payload_dbg = entropy_array(4*n, p);
648
+
649
+ uint trial = 0;
650
+ float* p10 = new float[cover_length];
651
+ float* p20 = new float[cover_length];
652
+ u8* stego1 = new u8[cover_length];
653
+ u8* stego2 = new u8[cover_length];
654
+ uint *perm1 = new uint[cover_length];
655
+ uint *perm2 = new uint[cover_length];
656
+
657
+ /* LAYER OF 2ND LSBs */
658
+ for ( uint i = 0; i < cover_length; i++ )
659
+ p20[i] = p[i] + p[i + n]; // p20 = p(1,:)+p(2,:); % probability of 2nd LSB of stego equal 0
660
+ //num_msg_bits[1] = (uint) floor( binary_entropy_array( cover_length, p20 ) ); // msg_bits(2) = floor(sum(binary_entropy(p20))); % number of msg bits embedded into 2nd LSBs
661
+ num_msg_bits[1] = (uint) (message_length/2 /*+ message_length%2*/ ); // XXX
662
+
663
+ try {
664
+ stc_embed_trial( cover_length, p20, message, stc_constraint_height, num_msg_bits[1], perm2, stego2, trial, max_trials, "cost2.txt" );
665
+ } catch ( stc_exception& e ) {
666
+ delete[] p10;
667
+ delete[] p20;
668
+ delete[] perm1;
669
+ delete[] perm2;
670
+ delete[] stego1;
671
+ delete[] stego2;
672
+ align_delete< float > ( c );
673
+ align_delete< float > ( p );
674
+ throw e;
675
+ }
676
+
677
+ /* LAYER OF 1ST LSBs */
678
+ for ( uint i = 0; i < cover_length; i++ ) //
679
+ if ( stego2[perm2[i]] == 0 ) // % conditional probability of 1st LSB of stego equal 0 given LSB2=0
680
+ p10[i] = p[i] / (p[i] + p[i + n]); // p10(i) = p(1,i)/(p(1,i)+p(2,i));
681
+ else // % conditional probability of 1st LSB of stego equal 0 given LSB2=1
682
+ p10[i] = p[i + 2 * n] / (p[i + 2 * n] + p[i + 3 * n]); // p10(i) = p(3,i)/(p(3,i)+p(4,i));
683
+ num_msg_bits[0] = m_actual - num_msg_bits[1]; // msg_bits(1) = m_actual-msg_bits(2); % number of msg bits embedded into 1st LSBs
684
+ try {
685
+ stc_embed_trial( cover_length, p10, message + num_msg_bits[1], stc_constraint_height, num_msg_bits[0], perm1, stego1, trial,
686
+ max_trials, "cost1.txt" );
687
+ } catch ( stc_exception& e ) {
688
+ delete[] p10;
689
+ delete[] p20;
690
+ delete[] perm1;
691
+ delete[] perm2;
692
+ delete[] stego1;
693
+ delete[] stego2;
694
+ align_delete< float > ( c );
695
+ align_delete< float > ( p );
696
+ throw e;
697
+ }
698
+ delete[] p10;
699
+ delete[] p20;
700
+
701
+ /* FINAL CALCULATIONS */
702
+ distortion = 0;
703
+ for ( uint i = 0; i < cover_length; i++ ) {
704
+ stego[i] = stego_values[4 * i + 2 * stego2[perm2[i]] + stego1[perm1[i]]];
705
+ distortion += costs[4 * i + 2 * stego2[perm2[i]] + stego1[perm1[i]]];
706
+ }
707
+ if ( coding_loss != 0 ) {
708
+ dist_coding_loss = 0;
709
+ for ( uint i = 0; i < cover_length; i++ )
710
+ dist_coding_loss += c[i + n * (2 * stego2[perm2[i]] + stego1[perm1[i]])];
711
+ float lambda_dist = get_lambda_distortion( n, 4, c, dist_coding_loss, lambda, 0, 20 ); // use 20 iterations to make lambda_dist precise
712
+ float max_payload = calc_entropy( n, 4, c, lambda_dist );
713
+ (*coding_loss) = (max_payload - m_actual) / max_payload; // fraction of max_payload lost due to practical coding scheme
714
+ }
715
+ max_trials = trial;
716
+
717
+ delete[] stego1;
718
+ delete[] stego2;
719
+ delete[] perm1;
720
+ delete[] perm2;
721
+ align_delete< float > ( c );
722
+ align_delete< float > ( p );
723
+
724
+ return distortion;
725
+ }
726
+ // }}}
727
+
728
+ // {{{ stc_ml3_embed()
729
+ // algorithm for embedding into 3 layers, both payload- and distortion-limited case
730
+ float stc_ml3_embed( uint cover_length, float* costs, int* stego_values, uint message_length, u8* message, float target_distortion, // input variables
731
+ uint stc_constraint_height, float expected_coding_loss, // other input parameters
732
+ int* stego, uint* num_msg_bits, uint &max_trials, float* coding_loss ) { // output and optional variables
733
+
734
+ float distortion, dist_coding_loss, lambda = 0, m_max = 0;
735
+ uint m_actual = 0;
736
+ uint n = cover_length + 4 - (cover_length % 4); // cover length rounded to multiple of 4
737
+
738
+ check_costs( cover_length, 8, costs );
739
+ float* c = align_new< float > ( 8 * n, 16 );
740
+ std::fill_n( c, 8 * n, F_INF );
741
+ std::fill_n( c, n, 0 );
742
+ for ( uint i = 0; i < 8 * cover_length; i++ )
743
+ c[n * (i % 8) + i / 8] = costs[i]; // copy and transpose data for better reading via SSE instructions
744
+ // write_matrix_to_file<float>(n, 8, c, "cost_ml3.txt");
745
+ for ( uint i = 0; i < n; i++ ) { // normalize such that minimal element is 0 - this helps numerical stability
746
+ float f_min = F_INF;
747
+ for ( uint j = 0; j < 8; j++ )
748
+ f_min = std::min( f_min, c[j * n + i] );
749
+ for ( uint j = 0; j < 8; j++ )
750
+ c[j * n + i] -= f_min;
751
+ }
752
+
753
+ if ( target_distortion != F_INF ) {
754
+ lambda = get_lambda_distortion( n, 8, c, target_distortion, 2.0 );
755
+ m_max = (1 - expected_coding_loss) * calc_entropy( n, 8, c, lambda );
756
+ m_actual = std::min( message_length, (uint) floor( m_max ) );
757
+ }
758
+ if ( (target_distortion == F_INF) || (m_actual < floor( m_max )) ) {
759
+ m_actual = std::min( 3 * cover_length, message_length );
760
+ lambda = get_lambda_entropy( n, 8, c, m_actual, 2.0 );
761
+ }
762
+ /*
763
+ p = exp(-lambda*costs);
764
+ p = p./(ones(8,1)*sum(p));
765
+ */
766
+ float* p = align_new< float > ( 8 * n, 16 );
767
+ __m128 v_lambda = _mm_set1_ps( -lambda );
768
+ for ( uint i = 0; i < n / 4; i++ ) {
769
+ __m128 sum = _mm_setzero_ps();
770
+ for ( uint j = 0; j < 8; j++ ) {
771
+ __m128 x = _mm_load_ps( c + j * n + 4 * i );
772
+ x = exp_ps( _mm_mul_ps( v_lambda, x ) );
773
+ _mm_store_ps( p + j * n + 4 * i, x );
774
+ sum = _mm_add_ps( sum, x );
775
+ }
776
+ for ( uint j = 0; j < 8; j++ ) {
777
+ __m128 x = _mm_load_ps( p + j * n + 4 * i );
778
+ x = _mm_div_ps( x, sum );
779
+ _mm_store_ps( p + j * n + 4 * i, x );
780
+ }
781
+ }
782
+ // this is for debugging
783
+ // float payload_dbg = entropy_array(8*n, p);
784
+
785
+ uint trial = 0;
786
+ float* p10 = new float[cover_length];
787
+ float* p20 = new float[cover_length];
788
+ float* p30 = new float[cover_length];
789
+ u8* stego1 = new u8[cover_length];
790
+ u8* stego2 = new u8[cover_length];
791
+ u8* stego3 = new u8[cover_length];
792
+ uint *perm1 = new uint[cover_length];
793
+ uint *perm2 = new uint[cover_length];
794
+ uint *perm3 = new uint[cover_length];
795
+
796
+ /* LAYER OF 3RD LSBs */
797
+ for ( uint i = 0; i < cover_length; i++ )
798
+ p30[i] = p[i] + p[i + n] + p[i + 2 * n] + p[i + 3 * n]; //
799
+ num_msg_bits[2] = (uint) floor( binary_entropy_array( cover_length, p30 ) ); //
800
+ try {
801
+ stc_embed_trial( cover_length, p30, message, stc_constraint_height, num_msg_bits[2], perm3, stego3, trial, max_trials, "cost3.txt" );
802
+ } catch ( stc_exception& e ) {
803
+ delete[] p10;
804
+ delete[] p20;
805
+ delete[] p30;
806
+ delete[] perm1;
807
+ delete[] perm2;
808
+ delete[] perm3;
809
+ delete[] stego1;
810
+ delete[] stego2;
811
+ delete[] stego3;
812
+ align_delete< float > ( c );
813
+ align_delete< float > ( p );
814
+ throw e;
815
+ }
816
+
817
+ /* LAYER OF 2ND LSBs */
818
+ for ( uint i = 0; i < cover_length; i++ ) { //
819
+ int s = 4 * stego3[perm3[i]]; // % conditional probability of 2nd LSB of stego equal 0 given LSB3
820
+ p20[i] = (p[i + s * n] + p[i + (s + 1) * n]) / (p[i + s * n] + p[i + (s + 1) * n] + p[i + (s + 2) * n] + p[i + (s + 3) * n]);
821
+ }
822
+ num_msg_bits[1] = (uint) floor( binary_entropy_array( cover_length, p20 ) );// msg_bits(2) = floor(sum(binary_entropy(p20))); % number of msg bits embedded into 2nd LSBs
823
+ try {
824
+ stc_embed_trial( cover_length, p20, message + num_msg_bits[2], stc_constraint_height, num_msg_bits[1], perm2, stego2, trial,
825
+ max_trials, "cost2.txt" );
826
+ } catch ( stc_exception& e ) {
827
+ delete[] p10;
828
+ delete[] p20;
829
+ delete[] p30;
830
+ delete[] perm1;
831
+ delete[] perm2;
832
+ delete[] perm3;
833
+ delete[] stego1;
834
+ delete[] stego2;
835
+ delete[] stego3;
836
+ align_delete< float > ( c );
837
+ align_delete< float > ( p );
838
+ throw e;
839
+ }
840
+
841
+ /* LAYER OF 1ST LSBs */
842
+ for ( uint i = 0; i < cover_length; i++ ) { //
843
+ int s = 4 * stego3[perm3[i]] + 2 * stego2[perm2[i]]; // % conditional probability of 1st LSB of stego equal 0 given LSB3 and LSB2
844
+ p10[i] = p[i + s * n] / (p[i + s * n] + p[i + (s + 1) * n]);
845
+ }
846
+ num_msg_bits[0] = m_actual - num_msg_bits[1] - num_msg_bits[2]; // msg_bits(1) = m_actual-msg_bits(2)-msg_bits(3); % number of msg bits embedded into 1st LSBs
847
+ try {
848
+ stc_embed_trial( cover_length, p10, message + num_msg_bits[1] + num_msg_bits[2], stc_constraint_height, num_msg_bits[0], perm1,
849
+ stego1, trial, max_trials, "cost1.txt" );
850
+ } catch ( stc_exception& e ) {
851
+ delete[] p10;
852
+ delete[] p20;
853
+ delete[] p30;
854
+ delete[] perm1;
855
+ delete[] perm2;
856
+ delete[] perm3;
857
+ delete[] stego1;
858
+ delete[] stego2;
859
+ delete[] stego3;
860
+ align_delete< float > ( c );
861
+ align_delete< float > ( p );
862
+ throw e;
863
+ }
864
+ delete[] p10;
865
+ delete[] p20;
866
+ delete[] p30;
867
+ max_trials = trial;
868
+
869
+ /* FINAL CALCULATIONS */
870
+ distortion = 0;
871
+ for ( uint i = 0; i < cover_length; i++ ) {
872
+ stego[i] = stego_values[8 * i + 4 * stego3[perm3[i]] + 2 * stego2[perm2[i]] + stego1[perm1[i]]];
873
+ distortion += costs[8 * i + 4 * stego3[perm3[i]] + 2 * stego2[perm2[i]] + stego1[perm1[i]]];
874
+ }
875
+ if ( coding_loss != 0 ) {
876
+ dist_coding_loss = 0;
877
+ for ( uint i = 0; i < cover_length; i++ )
878
+ dist_coding_loss += c[i + n * (4 * stego3[perm3[i]] + 2 * stego2[perm2[i]] + stego1[perm1[i]])];
879
+ float lambda_dist = get_lambda_distortion( n, 8, c, dist_coding_loss, lambda, 0, 20 ); // use 20 iterations to make lambda_dist precise
880
+ float max_payload = calc_entropy( n, 8, c, lambda_dist );
881
+ (*coding_loss) = (max_payload - m_actual) / max_payload; // fraction of max_payload lost due to practical coding scheme
882
+ }
883
+
884
+ delete[] perm1;
885
+ delete[] perm2;
886
+ delete[] perm3;
887
+ delete[] stego1;
888
+ delete[] stego2;
889
+ delete[] stego3;
890
+ align_delete< float > ( c );
891
+ align_delete< float > ( p );
892
+
893
+ return distortion;
894
+ }
895
+ // }}}
896
+
897
+
898
+ /* EXTRACTION ALGORITHMS */
899
+
900
+ // {{{ stc_ml_extract()
901
+ /** Extraction algorithm for any l-layered construction.
902
+ @param stego_length - ...
903
+ @param stego - ...
904
+ @param msg_bits - ...
905
+ @param stc_constraint_height - ...
906
+ @param message - ...
907
+ */
908
+ void stc_ml_extract( uint stego_length, int* stego, uint num_of_layers, uint* num_msg_bits, // input variables
909
+ uint stc_constraint_height, // other input parameters
910
+ u8* message ) { // output variables
911
+
912
+ u8* stego_bits = new u8[stego_length];
913
+ u8* msg_ptr = message;
914
+ uint *perm = new uint[stego_length];
915
+
916
+ for ( uint l = num_of_layers; l > 0; l-- ) { // extract message from every layer starting from most significant ones
917
+ // extract bits from l-th LSB plane
918
+ if ( num_msg_bits[l - 1] > 0 ) {
919
+ randperm( stego_length, num_msg_bits[l - 1], perm );
920
+ for ( uint i = 0; i < stego_length; i++ )
921
+ stego_bits[perm[i]] = mod( stego[i], (1 << l) ) >> (l - 1);
922
+ stc_extract( stego_bits, stego_length, msg_ptr, num_msg_bits[l - 1], stc_constraint_height );
923
+ msg_ptr += num_msg_bits[l - 1];
924
+ }
925
+ }
926
+
927
+ delete[] stego_bits;
928
+ delete[] perm;
929
+ }
930
+ // }}}
931
+
932
+
stc_ml_c.h ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #ifndef STC_ML_H
2
+ #define STC_ML_H
3
+
4
+ #include <limits>
5
+ #include "common.h"
6
+ #include "stc_embed_c.h"
7
+ #include "stc_extract_c.h"
8
+
9
+ typedef unsigned int uint;
10
+ typedef unsigned char u8;
11
+
12
+ const float F_INF = std::numeric_limits<float>::infinity();
13
+ const float D_INF = std::numeric_limits<double>::infinity();
14
+
15
+ // EMBEDDING ALGORITHMS ***********************************************************************************************************
16
+
17
+ // MULTI-LAYERED EMBEDDING for plus/minus one changes
18
+ // payload limited case - returns distortion
19
+ float stc_pm1_pls_embed(uint cover_length, int* cover, float* costs, uint message_length, u8* message, // input variables
20
+ uint stc_constraint_height, float wet_cost, // other input parameters
21
+ int* stego, uint* num_msg_bits, uint &max_trials, float* coding_loss = 0); // output variables
22
+ // distortion limited case - returns distortion
23
+ float stc_pm1_dls_embed(uint cover_length, int* cover, float* costs, uint message_length, u8* message, float target_distortion, // input variables
24
+ uint stc_constraint_height, float expected_coding_loss, float wet_cost, // other input parameters
25
+ int* stego, uint* num_msg_bits, uint &max_trials, float* coding_loss = 0); // output variables
26
+
27
+ // MULTI-LAYERED EMBEDDING for plus/minus one and two changes
28
+ // payload limited case - returns distortion
29
+ float stc_pm2_pls_embed(uint cover_length, int* cover, float* costs, uint message_length, u8* message, // input variables
30
+ uint stc_constraint_height, float wet_cost, // other input parameters
31
+ int* stego, uint* num_msg_bits, uint &max_trials, float* coding_loss = 0); // output variables
32
+ // distortion limited case - returns distortion
33
+ float stc_pm2_dls_embed(uint cover_length, int* cover, float* costs, uint message_length, u8* message, float target_distortion, // input variables
34
+ uint stc_constraint_height, float expected_coding_loss, float wet_cost, // other input parameters
35
+ int* stego, uint* num_msg_bits, uint &max_trials, float* coding_loss = 0); // output variables
36
+
37
+ // GENERAL MULTI-LAYERED EMBEDDING
38
+ // algorithm for embedding into 1 layer, both payload- and distortion-limited case
39
+ float stc_ml1_embed(uint cover_length, int* cover, short* direction, float* costs, uint message_length, u8* message, float target_distortion,// input variables
40
+ uint stc_constraint_height, float expected_coding_loss, // other input parameters
41
+ int* stego, uint* num_msg_bits, uint &max_trials, float* coding_loss = 0); // output variables
42
+ // algorithm for embedding into 2 layers, both payload- and distortion-limited case
43
+ float stc_ml2_embed(uint cover_length, float* costs, int* stego_values, uint message_length, u8* message, float target_distortion, // input variables
44
+ uint stc_constraint_height, float expected_coding_loss, // other input parameters
45
+ int* stego, uint* num_msg_bits, uint &max_trials, float* coding_loss = 0); // output and optional variables
46
+ // algorithm for embedding into 3 layers, both payload- and distortion-limited case
47
+ float stc_ml3_embed(uint cover_length, float* costs, int* stego_values, uint message_length, u8* message, float target_distortion, // input variables
48
+ uint stc_constraint_height, float expected_coding_loss, // other input parameters
49
+ int* stego, uint* num_msg_bits, uint &max_trials, float* coding_loss = 0); // output and optional variables
50
+
51
+ // EXTRACTION ALGORITHMS **********************************************************************************************************
52
+
53
+ /** Extraction algorithm for 2 layered construction. Can be used with: stc_pm1_pls_embed, stc_pm1_dls_embed, stc_ml2_embed
54
+ @param stego_length - ...
55
+ @param stego - ...
56
+ @param msg_bits - ...
57
+ @param stc_constraint_height - ...
58
+ @param message - ...
59
+ */
60
+ void stc_ml_extract(uint stego_length, int* stego, uint num_of_layers, uint* num_msg_bits, // input variables
61
+ uint stc_constraint_height, // other input parameters
62
+ u8* message); // output variables
63
+
64
+ #endif // STC_ML_H