diff --git a/config/fcdram_hksynx.toml b/config/fcdram_hksynx.toml new file mode 100755 index 0000000..f18d7b5 --- /dev/null +++ b/config/fcdram_hksynx.toml @@ -0,0 +1 @@ +safe_space_rows = [192, 161, 49, 32, 177, 33, 176, 193, 80, 65, 160, 64, 209, 48, 208, 81] diff --git a/in.dot b/in.dot new file mode 100644 index 0000000..a184655 --- /dev/null +++ b/in.dot @@ -0,0 +1,25 @@ +digraph { +rankdir=BT; +0 [label="0",shape=box,style=filled,fillcolor=snow2] +1 [label="1",shape=triangle,style=filled,fillcolor=snow2] +2 [label="2",shape=triangle,style=filled,fillcolor=snow2] +3 [label="3",shape=triangle,style=filled,fillcolor=snow2] +4 [label="4",shape=ellipse,style=filled,fillcolor=white] +5 [label="5",shape=ellipse,style=filled,fillcolor=white] +6 [label="6",shape=ellipse,style=filled,fillcolor=white] +po0 [shape=invtriangle,style=filled,fillcolor=snow2] +0 -> 4 [style=solid] +2 -> 4 [style=solid] +3 -> 4 [style=solid] +0 -> 5 [style=solid] +1 -> 5 [style=solid] +3 -> 5 [style=dashed] +0 -> 6 [style=dashed] +4 -> 6 [style=solid] +5 -> 6 [style=solid] +6 -> po0 [style=solid] +{rank = same; 0; 1; 2; 3; } +{rank = same; 4; 5; } +{rank = same; 6; } +{rank = same; po0; } +} diff --git a/rs/Cargo.lock b/rs/Cargo.lock old mode 100644 new mode 100755 index e2952a9..7bb67c9 --- a/rs/Cargo.lock +++ b/rs/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + [[package]] name = "aliasable" version = "0.1.3" @@ -14,6 +23,56 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" +[[package]] +name = "anstream" +version = "0.6.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" + +[[package]] +name = "anstyle-parse" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e" +dependencies = [ + "anstyle", + "once_cell", + "windows-sys", +] + [[package]] name = "autocfg" version = "1.4.0" @@ -22,15 +81,15 @@ checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "bitflags" -version = "2.8.0" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" +checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" [[package]] name = "bumpalo" -version = "3.16.0" +version = "3.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" +checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" [[package]] name = "cfg-if" @@ -38,6 +97,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "colorchoice" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" + [[package]] name = "crossbeam-utils" version = "0.8.21" @@ -50,7 +115,7 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "abb749745461743bb477fba3ef87c663d5965876155c676c9489cfe0963de5ab" dependencies = [ - "env_logger", + "env_logger 0.9.3", "hashbrown", "indexmap", "log", @@ -79,9 +144,19 @@ dependencies = [ [[package]] name = "either" -version = "1.13.0" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "env_filter" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0" +dependencies = [ + "log", + "regex", +] [[package]] name = "env_logger" @@ -92,23 +167,36 @@ dependencies = [ "log", ] +[[package]] +name = "env_logger" +version = "0.11.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" +dependencies = [ + "anstream", + "anstyle", + "env_filter", + "jiff", + "log", +] + [[package]] name = "equivalent" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "foldhash" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0d2fde1f7b3d48b8395d5f2de76c18a528bd6a9cdde438df747bfcba3e05d6f" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" [[package]] name = "hashbrown" -version = "0.15.2" +version = "0.15.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" +checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3" dependencies = [ "allocator-api2", "equivalent", @@ -121,11 +209,17 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + [[package]] name = "indexmap" -version = "2.7.0" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62f822373a4fe84d4bb149bf54e584a7f4abec90e072ed49cda0edea5b95471f" +checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" dependencies = [ "equivalent", "hashbrown", @@ -133,9 +227,54 @@ dependencies = [ [[package]] name = "indoc" -version = "2.0.5" +version = "2.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + +[[package]] +name = "jiff" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a194df1107f33c79f4f93d02c80798520551949d59dfad22b6157048a88cca93" +dependencies = [ + "jiff-static", + "log", + "portable-atomic", + "portable-atomic-util", + "serde", +] + +[[package]] +name = "jiff-static" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c6e1db7ed32c6c71b759497fae34bf7933636f75a251b9e736555da426f6442" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] name = "js-sys" @@ -149,9 +288,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.169" +version = "0.2.172" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" +checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" [[package]] name = "lime-rs" @@ -159,16 +298,31 @@ version = "0.1.0" dependencies = [ "eggmock", "either", + "env_logger 0.11.8", + "itertools", + "log", "ouroboros", + "priority-queue", "rustc-hash", + "serde", + "serde_json", "smallvec", + "strum", + "strum_macros", + "toml", ] [[package]] name = "log" -version = "0.4.25" +version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "num-bigint" @@ -200,9 +354,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.20.2" +version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "ouroboros" @@ -221,7 +375,7 @@ version = "0.18.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c7028bdd3d43083f6d8d4d5187680d0d3560d54df4cc9d752005268b41e64d0" dependencies = [ - "heck", + "heck 0.4.1", "proc-macro2", "proc-macro2-diagnostics", "quote", @@ -234,11 +388,37 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "portable-atomic" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e" + +[[package]] +name = "portable-atomic-util" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" +dependencies = [ + "portable-atomic", +] + +[[package]] +name = "priority-queue" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5676d703dda103cbb035b653a9f11448c0a7216c7926bd35fcb5865475d0c970" +dependencies = [ + "autocfg", + "equivalent", + "indexmap", +] + [[package]] name = "proc-macro2" -version = "1.0.93" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" dependencies = [ "unicode-ident", ] @@ -273,27 +453,68 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.38" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ "proc-macro2", ] [[package]] name = "raw-cpuid" -version = "11.3.0" +version = "11.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6928fa44c097620b706542d428957635951bade7143269085389d42c8a4927e" +checksum = "c6df7ab838ed27997ba19a4664507e6f82b41fe6e20be42929332156e5e85146" dependencies = [ "bitflags", ] +[[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + [[package]] name = "rustc-hash" -version = "2.1.0" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + +[[package]] +name = "rustversion" +version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7fb8039b3032c191086b10f11f319a6e99e1e82889c5cc6046f515c9db1d497" +checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" + +[[package]] +name = "ryu" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" [[package]] name = "saturating" @@ -303,15 +524,56 @@ checksum = "ece8e78b2f38ec51c51f5d475df0a7187ba5111b2a28bdc761ee05b075d40a71" [[package]] name = "seq-macro" -version = "0.3.5" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" +checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc" + +[[package]] +name = "serde" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.143" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + +[[package]] +name = "serde_spanned" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40734c41988f7306bb04f0ecf60ec0f3f1caa34290e4e8ea471dcd3346483b83" +dependencies = [ + "serde", +] [[package]] name = "smallvec" -version = "1.14.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd" +checksum = "8917285742e9f3e1683f0a9c4e6b57960b7314d0b08d30d1ecd426713ee2eee9" [[package]] name = "static_assertions" @@ -319,6 +581,25 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "strum" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f64def088c51c9510a8579e3c5d67c65349dcf755e5479ad3d010aa6454e2c32" + +[[package]] +name = "strum_macros" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c77a8c5abcaf0f9ce05d62342b7d298c346515365c36b673df4ebe3ced01fde8" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "rustversion", + "syn", +] + [[package]] name = "symbol_table" version = "0.4.0" @@ -338,9 +619,9 @@ checksum = "7c68d531d83ec6c531150584c42a4290911964d5f0d79132b193b67252a23b71" [[package]] name = "syn" -version = "2.0.96" +version = "2.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5d0adab1ae378d7f53bdebc67a39f1f151407ef230f0ce2883572f5d8985c80" +checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" dependencies = [ "proc-macro2", "quote", @@ -367,11 +648,56 @@ dependencies = [ "syn", ] +[[package]] +name = "toml" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0207d6ed1852c2a124c1fbec61621acb8330d2bf969a5d0643131e9affd985a5" +dependencies = [ + "indexmap", + "serde", + "serde_spanned", + "toml_datetime", + "toml_parser", + "toml_writer", + "winnow", +] + +[[package]] +name = "toml_datetime" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bade1c3e902f58d73d3f294cd7f20391c1cb2fbcb643b73566bc773971df91e3" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_parser" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5c1c469eda89749d2230d8156a5969a69ffe0d6d01200581cdc6110674d293e" +dependencies = [ + "winnow", +] + +[[package]] +name = "toml_writer" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b679217f2848de74cabd3e8fc5e6d66f40b7da40f8e1954d92054d9010690fd5" + [[package]] name = "unicode-ident" -version = "1.0.14" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "version_check" @@ -474,6 +800,85 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "winnow" +version = "0.7.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74c7b26e3480b707944fc872477815d29a8e429d2f93a1ce000f5fa84a15cbcd" + [[package]] name = "yansi" version = "1.0.1" diff --git a/rs/Cargo.toml b/rs/Cargo.toml old mode 100644 new mode 100755 index 98b498f..95cbd90 --- a/rs/Cargo.toml +++ b/rs/Cargo.toml @@ -7,11 +7,22 @@ edition = "2021" crate-type = ["cdylib"] [dependencies] -eggmock = { path = "../../eggmock" } +# eggmock = { path = "../../eggmock" } +eggmock = { path = "../../eggmock-fork" } rustc-hash = "2.1.0" either = "1.13.0" smallvec = "1.14.0" ouroboros = "0.18.0" +log = "0.4" +env_logger = "0.11.8" +itertools = "0.14.0" +priority-queue = "2.5.0" +toml = { version = "0.9.1", features = ["serde"] } +strum_macros = "0.27.1" +strum = "0.27.1" +serde = { version="1.0.219", features = ["derive"] } +serde_json = "1.0.143" # for writing chosen compute rows (in FCDRAM) to a json config-file to avoid recomputation [build-dependencies] -eggmock = { path = "../../eggmock" } \ No newline at end of file +# eggmock = { path = "../../eggmock" } +eggmock = { path = "../../eggmock-fork" } diff --git a/rs/build.rs b/rs/build.rs old mode 100644 new mode 100755 diff --git a/rs/in.dot b/rs/in.dot new file mode 100755 index 0000000..86804ac --- /dev/null +++ b/rs/in.dot @@ -0,0 +1,22 @@ +digraph { +rankdir=BT; +0 [label="0",shape=box,style=filled,fillcolor=snow2] +1 [label="1",shape=triangle,style=filled,fillcolor=snow2] +2 [label="2",shape=triangle,style=filled,fillcolor=snow2] +3 [label="3",shape=triangle,style=filled,fillcolor=snow2] +4 [label="4",shape=ellipse,style=filled,fillcolor=white] +5 [label="5",shape=ellipse,style=filled,fillcolor=white] +6 [label="6",shape=ellipse,style=filled,fillcolor=white] +po0 [shape=invtriangle,style=filled,fillcolor=snow2] +2 -> 4 [style=solid] +3 -> 4 [style=solid] +1 -> 5 [style=solid] +3 -> 5 [style=dashed] +4 -> 6 [style=dashed] +5 -> 6 [style=dashed] +6 -> po0 [style=dashed] +{rank = same; 0; 1; 2; 3; } +{rank = same; 4; 5; } +{rank = same; 6; } +{rank = same; po0; } +} diff --git a/rs/src/ambit/compilation.rs b/rs/src/ambit/compilation.rs old mode 100644 new mode 100755 index 75e37bd..47737cb --- a/rs/src/ambit/compilation.rs +++ b/rs/src/ambit/compilation.rs @@ -7,8 +7,8 @@ use eggmock::{Id, Mig, NetworkWithBackwardEdges, Node, Signal}; use rustc_hash::{FxHashMap, FxHashSet}; use std::cmp::max; -pub struct CompilationState<'a, 'n, N> { - network: &'n N, +pub struct CompilationState<'a, 'n, P> { + network: &'n P, /// contains all not yet computed network nodes that can be immediately computed (i.e. all /// inputs of the node are already computed) candidates: FxHashSet<(Id, Mig)>, @@ -89,12 +89,14 @@ pub fn compile<'a>( Ok(program) } -impl<'a, 'n, N: NetworkWithBackwardEdges> CompilationState<'a, 'n, N> { - pub fn new(architecture: &'a Architecture, network: &'n N) -> Self { +impl<'a, 'n, P: NetworkWithBackwardEdges> CompilationState<'a, 'n, P> { + /// - `candidates`: , computed from `network + /// - `outputs`: direktly read-out from `network` + pub fn new(architecture: &'a Architecture, network: &'n P) -> Self { let mut candidates = FxHashSet::default(); - // check all parents of leafs whether they have only leaf children, in which case they are + // check all parents of leaves whether they have only leaf children, in which case they are // candidates - for leaf in network.leafs() { + for leaf in network.leaves() { for candidate_id in network.node_outputs(leaf) { let candidate = network.node(candidate_id); if candidate diff --git a/rs/src/ambit/extraction.rs b/rs/src/ambit/extraction.rs old mode 100644 new mode 100755 diff --git a/rs/src/ambit/mod.rs b/rs/src/ambit/mod.rs old mode 100644 new mode 100755 diff --git a/rs/src/ambit/optimization.rs b/rs/src/ambit/optimization.rs old mode 100644 new mode 100755 diff --git a/rs/src/ambit/program.rs b/rs/src/ambit/program.rs old mode 100644 new mode 100755 diff --git a/rs/src/ambit/rows.rs b/rs/src/ambit/rows.rs old mode 100644 new mode 100755 index c9f82b2..ea74585 --- a/rs/src/ambit/rows.rs +++ b/rs/src/ambit/rows.rs @@ -47,7 +47,7 @@ impl<'a> Rows<'a> { } fn add_leafs(&mut self, ntk: &impl NetworkWithBackwardEdges) { - let leafs = ntk.leafs(); + let leafs = ntk.leaves(); self.rows.reserve(leafs.size_hint().0); for id in leafs { let node = ntk.node(id); diff --git a/rs/src/fc_dram/architecture.rs b/rs/src/fc_dram/architecture.rs new file mode 100755 index 0000000..827fe52 --- /dev/null +++ b/rs/src/fc_dram/architecture.rs @@ -0,0 +1,666 @@ +//! Contains all architecture-specific descriptions +//! - [`FCDRAMArchitecture`] = DRAM-module-specific specific implementation of FCDRAMArchitecture +//! - [`Instruction`] = contains all instructions supported by FC-DRAM architecture +//! - [ ] `RowAddress`: utility functions to get subarray-id and row-addr within that subarray from +//! +//! RowAddress (eg via bit-shifting given bitmasks for subarray-id & row-addr to put on-top of RowAddress + +use std::{cmp::Ordering, collections::{HashMap, HashSet}, fmt::{self, Display, Formatter}, ops, sync::LazyLock}; +use serde::{Deserialize, Serialize}; +use strum_macros::EnumIter; + +pub const NR_SUBARRAYS: u64 = 2u64.pow(7); +pub const ROWS_PER_SUBARRAY: u64 = 2u64.pow(9); +pub const SUBARRAY_ID_BITMASK: u64 = 0b1_111_111_000_000_000; // 7 highest bits=subarray id +pub const ROW_ID_BITMASK: u64 = 0b0_000_000_111_111_111; // 7 highest bits=subarray id + +// some utility functions +pub fn subarrayid_to_subarray_address(subarray_id: SubarrayId) -> RowAddress { + RowAddress(subarray_id.0 << ROWS_PER_SUBARRAY.ilog2()) // lower bits=rows in subarray +} + +/// All Subarrays (except the ones at the edges) have two neighboring subarrays: one below (subarray_id+1) and one above (subarray_id-1) +/// - currently the following subarrays are used together for computations: 0&1,2&3,4&5,.. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, EnumIter, Serialize, Deserialize)] +pub enum NeighboringSubarrayRelPosition { + /// `subarray_id-1` + Above, + /// `subarray_id+1` + Below, +} + +impl NeighboringSubarrayRelPosition { + /// Get whether `subarray1` is above or below `relative_to` + pub fn get_relative_position(subarray: &SubarrayId, relative_to: &SubarrayId) -> Self { + assert!((subarray.0 as isize - relative_to.0 as isize).abs() == 1, "Given Arrays are not neighboring arrays"); + if subarray.0 > relative_to.0 { + NeighboringSubarrayRelPosition::Below + } else { + NeighboringSubarrayRelPosition::Above + } + } +} + +/// Main variable specifying architecture of DRAM-module for which to compile for +/// - this is currently just an example implementation for testing purpose; (TODO: make this configurable at runtime) +/// +/// TODO: add field to simulate row-decoder circuitry, needed for impl Simultaneous-row-activation +/// TODO: make this configurable at runtime +pub static ARCHITECTURE: LazyLock = LazyLock::new(|| { + + let mut row_activated_by_rowaddress_tuple: HashMap> = HashMap::new(); // for each row store which RowAddress-combinations activate it + + // Implementation of the Hypothetical Row Decoder from [3] Chap4.2 + // - GWLD (Global Wordline Decoder)=decode higher bits to select addressed subarray + // - LWLD (Local Wordline Decoder)=hierarchy of decoders which decode lower bits; latches remain set when using `APA` + // - see [3] Chap4.2: nr of Predecoders in LWLD determines number & addresses of simultaneously activated rows + // - does work for the example shown in [3] Chap3.2: `APA(256,287)` activates rows `287,286,281,280,263,262,257,256` + // TODO: add overlapping of higher-order-bits (GWLD) + // - at the moment high-order bits (=subarray-id) needs to be added manually using eg `subarrayid_to_subarray_address()` helper function + // TODO: maybe evaluate statically? + let get_activated_rows_from_apa = |row1: RowAddress, row2: RowAddress| -> Vec { + // 1. Define Predecoders by defining for which of the bits they're responsible + // each Predecoder is resonsible for some of the lower order bits + let predecoder_bitmasks = [ + 0b110000000, // first predecoder (PE) predecodes bits[8,7] + 0b001100000, // Predecoder PD + 0b000011000, // Predecoder PC + 0b000000110, // Predecoder PB + 0b000000001, // last predecoder (PA) predecodes bits[0] + ]; + + // for each predecoder store which bits will remain set due to `APA(row1,row)`: + let overlapping_bits = [ + // latches set by `ACT(row1)` --- latches set by `ACT(row2)` + [ row1.0 & predecoder_bitmasks[0], row2.0 & predecoder_bitmasks[0]], + [ row1.0 & predecoder_bitmasks[1], row2.0 & predecoder_bitmasks[1]], + [ row1.0 & predecoder_bitmasks[2], row2.0 & predecoder_bitmasks[2]], + [ row1.0 & predecoder_bitmasks[3], row2.0 & predecoder_bitmasks[3]], + [ row1.0 & predecoder_bitmasks[4], row2.0 & predecoder_bitmasks[4]], + ]; + + let mut activated_rows = vec!(); // TODO: get other activated rows and add them to `activated_rows` + // compute all simultaneously activated rows + for i in 0..1 << predecoder_bitmasks.len() { + let activated_row = overlapping_bits.iter() + // start with all row-address bits unset (=0) and first predecoder stage (=1) + .fold((RowAddress(0), 1), |(row, predecoder_stage_onehot), new_row_bits|{ + let bitmask_to_choose = (i & predecoder_stage_onehot) > 0; + (RowAddress(row.0 | new_row_bits[bitmask_to_choose as usize]), predecoder_stage_onehot << 1) + }); + activated_rows.push(activated_row.0); + } + // debug!("`APA({row1},{row2})` activates the following rows simultaneously: {activated_rows:?}"); + activated_rows.dedup(); // no need for `.unique()` since this implementation adds equivalent RowAddresses one after the other (!check!!) + // NOTE: works in-place + // remove duplicate entries + activated_rows.into_iter().collect::>().into_iter().collect() + }; + + // just a dummy implementation, see [5] Chap3.2 for details why determining the distance based on the Row Addresses issued by the MemController is difficult + let get_distance_of_row_to_sense_amps = |row: RowAddress, subarray_rel_position: NeighboringSubarrayRelPosition| -> RowDistanceToSenseAmps { + // NOTE: last & first subarrays only have sense-amps from one side + if (row.get_subarray_id().0 == NR_SUBARRAYS-1 && subarray_rel_position == NeighboringSubarrayRelPosition::Below) || (row.get_subarray_id().0 == 0 && subarray_rel_position == NeighboringSubarrayRelPosition::Above) { + panic!("Edge subarrays have sense-amps only connected from one side"); + } + + let local_row_address= RowAddress(row.0 & ROW_ID_BITMASK); + + let distance_to_above_subarray = match local_row_address { + i if i.0 < ROWS_PER_SUBARRAY / 2 / 3 => RowDistanceToSenseAmps::Close, // 1st third of subarray-half + i if i.0 < ROWS_PER_SUBARRAY / 2 / 6 => RowDistanceToSenseAmps::Middle, // 2nd third of subarray-half + _ => RowDistanceToSenseAmps::Far, // everything else is treated as being far away + }; + + match subarray_rel_position { + NeighboringSubarrayRelPosition::Above => distance_to_above_subarray, + NeighboringSubarrayRelPosition::Below => distance_to_above_subarray.reverse(), // rows close to above subarray are far from below subarray etc + } + }; + + // precompute things based on given SRA (simultaneous row activation function) + let mut precomputed_simultaneous_row_activations = HashMap::new(); + for i in 0..ROWS_PER_SUBARRAY { + precomputed_simultaneous_row_activations.insert((RowAddress(i),RowAddress(i)), vec!(RowAddress(i))); // special case: no other row is activated when executing `APA(r1,r1)` + for j in i+1..ROWS_PER_SUBARRAY { + let activated_rows = get_activated_rows_from_apa(RowAddress(i), RowAddress(j)); + precomputed_simultaneous_row_activations.insert((RowAddress(i),RowAddress(j)), activated_rows.clone()); + precomputed_simultaneous_row_activations.insert((RowAddress(j),RowAddress(i)), activated_rows.clone()); + + for row in activated_rows { + row_activated_by_rowaddress_tuple.entry(row) + .or_default() + .insert((RowAddress(i),RowAddress(j))); + } + } + } + // debug!("Precomputed SRAs: {:#?}", precomputed_simultaneous_row_activations.iter().take(20).collect::>()); + + let sra_degree_to_rowaddress_combinations= precomputed_simultaneous_row_activations.iter() + .fold(HashMap::new(), |mut acc: HashMap>, (row_combi, activated_rows)| { + acc.entry(SupportedNrOperands::try_from(activated_rows.len() as u8).unwrap()).or_default().push(*row_combi); + acc + }); + // output how many combinations of row-addresses activate the given nr of rows + // debug!("SRAs row-nr to row-addr mapping: {:#?}", sra_degree_to_rowaddress_combinations.iter().map(|(k,v)| format!("{k} rows activated in {} addr-combinations", v.len())).collect::>()); + + FCDRAMArchitecture { + nr_subarrays: NR_SUBARRAYS, + rows_per_subarray: ROWS_PER_SUBARRAY, + get_activated_rows_from_apa, + precomputed_simultaneous_row_activations, + row_activated_by_rowaddress_tuple, + sra_degree_to_rowaddress_combinations, + get_distance_of_row_to_sense_amps, + } +}); + +/// - ! must be smaller than `rows_per_subarray * nr_subarrays` (this is NOT checked!) +#[derive(Copy, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct RowAddress(pub u64); + +impl RowAddress { + /// Return subarray-id the row lies in + pub fn get_subarray_id(&self) -> SubarrayId { + SubarrayId((self.0 & SUBARRAY_ID_BITMASK) >> ROWS_PER_SUBARRAY.ilog2()) + } + + /// Converts RowAddress to the same row address but in the other subarray + pub fn local_rowaddress_to_subarray_id(&self, subarray_id: SubarrayId) -> RowAddress { + let local_row_address = self.0 & ROW_ID_BITMASK; + RowAddress( local_row_address | subarrayid_to_subarray_address(subarray_id).0 ) + } +} + +impl fmt::Display for RowAddress { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}.{}", self.get_subarray_id().0, self.0 & ROW_ID_BITMASK) + } +} + +impl fmt::Debug for RowAddress { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}.{}", self.get_subarray_id().0, self.0 & ROW_ID_BITMASK) + } +} + +impl From for RowAddress { + fn from(value: u64) -> Self { + RowAddress(value) + } +} + +#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] +pub struct SubarrayId(pub u64); + +impl SubarrayId { + /// Currently all ops only work on half of the cells (every 2nd cell) with two subarrays being + /// in a compute/reference subarray "relation" with exactly one other neighboring subarray. + /// This function returns that other partner (compute/reference) subarray + pub fn get_partner_subarray(&self) -> Self { + if self.0 % 2 == 0 { + SubarrayId(self.0 + 1) + } else { + SubarrayId(self.0 - 1) + } + } +} + +impl fmt::Display for SubarrayId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +#[derive(Debug, PartialEq, Clone, Copy)] +pub struct SuccessRate(pub f64); + +impl SuccessRate { + pub fn new(success_rate: f64) -> Self { + if (0.0..=1.0).contains(&success_rate) { + SuccessRate(success_rate) + } else { + panic!("SuccessRate must in [0,1], but was {success_rate}"); + } + } +} + +impl Eq for SuccessRate {} + +impl PartialOrd for SuccessRate { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) // delegate to total_cmp + } +} + +impl Ord for SuccessRate { + fn cmp(&self, other: &Self) -> Ordering { + self.0.total_cmp(&other.0) + } +} + +impl ops::Mul for SuccessRate { + type Output = SuccessRate; + + fn mul(self, rhs: Self) -> Self::Output { + SuccessRate::new(rhs.0 * self.0) + } +} + +impl From for SuccessRate { + fn from(val: f64) -> Self { + SuccessRate::new(val) + } +} + +/// TODO: add field encoding topology of subarrays (to determine which of them share sense-amps) +pub struct FCDRAMArchitecture { + /// Nr of subarrays in a DRAM module + pub nr_subarrays: u64, + /// Nr of rows in a single subarray + pub rows_per_subarray: u64, + /// Returns all activated rows when issuing `APA(row1, row2)` + /// - NOTE: `row1`,`row2` are expected to reside in adjacent subarrays + /// - NOTE: the simultaneously activated rows are expected to have the same addresses in both subarrays + /// - eg `APA(11,29)` (with 1st digit=subarray-id, 2nd digit=row-id) could simultaneously activate rows `0,1,7,9` in subarray1 and subarray2 + get_activated_rows_from_apa: fn(RowAddress, RowAddress) -> Vec, + /// Stores which rows are simultaneously activated for each combination of Row-Addresses (provided to `APA`-operation) + /// - REASON: getting the simultaneously activated will probably be requested very frequently (time-space tradeoff, rather than recomputing on every request)) + /// - REMEMBER: set `subarrayid` of passed row-addresses to 0 (activated rows are precomputed exemplary for RowAddresses in subarray=0 since activated rows do not depend on corresponding subarrays) + pub precomputed_simultaneous_row_activations: HashMap<(RowAddress, RowAddress), Vec>, + /// Map degree of SRA (=nr of activated rows by that SRA) to all combinations of RowAddresses which have that degree of SRA + /// - use to eg restrict the choice of row-addresses for n-ary AND/OR (eg 4-ary AND -> at least activate 8 rows; more rows could be activated when using input replication) + /// NOTE: LogicOp determiens success-rate + pub sra_degree_to_rowaddress_combinations: HashMap>, + // pub sra_degree_to_rowaddress_combinations: HashMap<(u8, LogicOp), BTreeMap<(RowAddress,RowAddress), SuccessRate>>, // to large runtime-overhead :/ + /// Stores for every rows which combinations of RowAddresses activate that row (needed for finding appropriate safe space rows) + pub row_activated_by_rowaddress_tuple: HashMap>, + /// Given a row-addr this returns the distance of it to the sense-amps (!determinse success-rate of op using that `row` as an operand) (see [1] Chap5.2) + /// - NOTE: a realistic implementation should use the Methodology from [1] to determine this distance (RowHammer) + /// - there is no way of telling the distance of a row without testing manually (see [5] Chap3.2: "consecutive row addresses issued by the memory controller can be mapped to entirely different regions of DRAM") + pub get_distance_of_row_to_sense_amps: fn(RowAddress, NeighboringSubarrayRelPosition) -> RowDistanceToSenseAmps, +} + +/// Implement this trait for your specific DRAM-module to support FCDRAM-functionality +/// - contains the mapping of logical-ops to FCDRAM-Architecture (see +/// [`FCDRAMArchitecture::get_instructions_implementation_of_logic_ops`] +/// +/// # Possible Changes in Future +/// +/// - add trait-bound to a more general `Architecture`-trait to fit in the overall framework? +impl FCDRAMArchitecture { + + /// Returns FC-DRAM operations to perform for each logical operation, with operand-rows NOT set !!! + /// - addresses of row operands need to be overwritten during compilation ! + /// + /// REMINDER: for OR&AND additional [`Instruction::FracOp`]s need to be issued to setup the + /// reference subarray containing `reference_rows` in order to perform the given `logic_op` on + /// the `compute_rows` inside the computation rows + /// + /// REMINDER: do increase the success rate of `FracOp` storing a fractional value (`V_{DD}/2` + /// in this case, several FracOps are usually issued) + /// - ->`FracOp`s are replicated during compilation as necessary, this is done during compilation + /// + /// NOTE: `compute_rows` are expected to lay in the same subarray and `reference_rows` in one + /// subarray adjacent to the compute subarray (!this is not checked but assumed to be true!) + pub fn get_instructions_implementation_of_logic_ops(logic_op: LogicOp) -> Vec { + match logic_op { + LogicOp::NOT => vec!(Instruction::ApaNOT(RowAddress(0), RowAddress(0))), + LogicOp::AND => vec!(Instruction::FracOp(RowAddress(0)), Instruction::ApaNOT(RowAddress(0), RowAddress(0))), + LogicOp::OR => vec!(Instruction::FracOp(RowAddress(0)), Instruction::ApaNOT(RowAddress(0), RowAddress(0))), + LogicOp::NAND => { + // 1. AND, 2. NOT + FCDRAMArchitecture::get_instructions_implementation_of_logic_ops(LogicOp::AND) + .into_iter() + .chain( FCDRAMArchitecture::get_instructions_implementation_of_logic_ops(LogicOp::NOT)) + .collect() + }, + LogicOp::NOR => { + // 1. OR, 2. NOT + FCDRAMArchitecture::get_instructions_implementation_of_logic_ops(LogicOp::OR) + .into_iter() + .chain( FCDRAMArchitecture::get_instructions_implementation_of_logic_ops(LogicOp::NOT)) + .collect() + } + } + } + + fn get_activated_rows() { + + } +} + +/// Categories of distances of rows to sense-amops +/// - HIB (higher is better): that's why [`RowDistanceToSenseAmps::Close`] has the highest int value +#[derive(Hash,Eq,PartialEq,PartialOrd,Ord)] +pub enum RowDistanceToSenseAmps { + Close=2, + Middle=1, + Far=0, +} + +impl RowDistanceToSenseAmps { + /// Reverse distance (Far-> Close, Middle -> Middle, Close->Far), useful when row's distance to other neighboring subarray (below/above) is needed + pub fn reverse(&self) -> Self { + match &self { + RowDistanceToSenseAmps::Close => RowDistanceToSenseAmps::Far, + RowDistanceToSenseAmps::Middle=> RowDistanceToSenseAmps::Middle, + RowDistanceToSenseAmps::Far=> RowDistanceToSenseAmps::Close, + } + } +} + +type Comment = String; +/// Instructions used in FC-DRAM +/// - NOT: implemented using `APA` +/// - AND/OR: implemented by (see [1] Chap6.1.2) +/// 1. setting `V_{AND}`/`V_{OR}` in reference subarray and then issuing (using FracOperation +/// for storing `V_{DD}/2`) +/// 2. Issue `APA(R_{REF},R_{COM})` to simultaneously activate `N` rows in reference subarray +/// and `N` rows in compute subarray +/// 3. Wait for `t_{RAS}` (=overwrites activated cells in compute subarray with AND/OR-result) +/// 4. Issue `PRE` to complete the operation +/// +/// Additionally RowClone-operations are added for moving data around if needed (eg if valid data +/// would be affected by following Simultaneous-Row-Activations) +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub enum Instruction { + /// Needed for initializing neutral row in reference subarray (to set `V_{AND}`/`V_{OR}` (see + /// [1]( + /// Implemented using AP without any extra cycles in between) (see [2]) + /// - `PRE` "interrupt the process of row activation, and prevent the sense amplifier from being enabled" + FracOp(RowAddress), + /// Multiple-Row Activation: `ACT R_F -> PRE -> ACT R_L -> PRE` of rows `R_F`,`R_L` for rows within + /// different subarrays. As a result `R_L` holds the negated value of `R_F` (see Chap5.1 of PaperFunctionally Complete DRAMs + /// src=1st operand, dst=2nd operand + ApaNOT(RowAddress,RowAddress), + /// Multiple-Row Activation: `ACT R_F -> PRE -> ACT R_L -> PRE` of rows `R_F`,`R_L` for rows within + /// different subarrays (but with different timings than `ApaNOT`!) + /// src=1st operand, dst=2nd operand + ApaAndOr(RowAddress,RowAddress), + /// Fast-Parallel-Mode RowClone for cloning row-data within same subarray + /// - corresponds to `AA`, basically copies from src-row -> row-buffer -> dst-row + /// - first operand=src, 2nd operand=dst where `src` and `dst` MUST reside in the same subarray ! + /// + /// Comment indicates what this FPM was issued for (for simpler debugability) + RowCloneFPM(RowAddress, RowAddress, Comment), +} + +impl Display for Instruction { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let display_row = |row: &RowAddress| { format!("{}.{}", row.get_subarray_id().0, row.0 & ROW_ID_BITMASK)}; // display subarray separately + // TODO: change string-representation to display subarray-id + let description = match self { + Instruction::FracOp(row) => format!("AP({})", display_row(row)), + Instruction::ApaNOT(row1,row2) => format!("APA_NOT({},{})", display_row(row1), display_row(row2)), + Instruction::ApaAndOr(row1,row2) => { + let (src_array, dst_array) = (row1.get_subarray_id(), row2.get_subarray_id()); + let activated_rows: Vec = ARCHITECTURE.precomputed_simultaneous_row_activations.get(&(row1.local_rowaddress_to_subarray_id(SubarrayId(0)),row2.local_rowaddress_to_subarray_id(SubarrayId(0)))).unwrap() + .iter().flat_map(|row| vec!(row.local_rowaddress_to_subarray_id(src_array), row.local_rowaddress_to_subarray_id(dst_array))) + .collect(); + format!("APA_AND_OR({},{}) // activates {:?}", display_row(row1), display_row(row2), activated_rows) + }, + Instruction::RowCloneFPM(row1, row2, comment) => format!("AAP({},{}) // {}", display_row(row1), display_row(row2), comment), + }; + write!(f, "{}", description) + } +} + +/// TODO: where to put logic for determining which rows are activated simultaneously given two +/// row-addresses +impl Instruction { + + /// TODO: rewrite this (eg `ApaNOT` and `ApaAndOr` take different amount of time !!, see Figure + pub fn get_nr_memcycles(&self) -> u16 { + match self { + Instruction::FracOp(__) => 7, // see [2] ChapIII.A, (two cmd-cycles + five idle cycles) + // TODO: change to ns (t_{RAS}+6ns) - `t_{RAS}` to mem cycles + Instruction::ApaNOT(_, _) => 3, // NOTE: this is not explicitly written in the paper, TODO: check with authors + Instruction::ApaAndOr(_, _) => 3, // NOTE: this is not explicitly written in the paper, TODO: check with authors + Instruction::RowCloneFPM(_, _, _) => 2, // see [4] Chap3.2 (TODO: not correct, given as 90ns?) + } + } + + /// Success Rate of instructions depends on: + /// - for AND/OR (`APA`): number of input operands (see [1] Chap6.3) + /// - data pattern can't be taken into consideration here since its not known at compile-time (unknown at compile-time) + /// - as well as temperature and DRAM speed rate (ignored here) + /// + /// TAKEAWAY: `OR` is more reliable than `AND` + pub fn get_success_rate_of_apa(&self, implemented_op: LogicOp) -> SuccessRate { + + // Quote from [1] Chap6.3: "the distance of all simultaneously activated rows" - unclear how this classification happend exactly. Let's be conservative and assume the worst-case behavior + // (furthest away row for src-operands). For dst-rows we use the one closest to the sense-amps, since we can choose from which of the rows to read/save the result form + + let success_rate_by_row_distance = implemented_op.get_success_rate_by_row_distance(); + + // include nr of operands and distance of rows to sense-amps into success-rate + match self { + Instruction::ApaNOT( src, dst) => { + let activated_rows = ARCHITECTURE.precomputed_simultaneous_row_activations.get(&(*src,*dst)).expect("[ERR] Missing SRA for ({r1},{r2}"); + let nr_operands = activated_rows.len(); // ASSUMPTION: it seems like "operands" referred to the number of activated rows (see [1] + // taken from [1] Chap6.3 + let success_rate_per_operandnr = HashMap::from([ + (2, 94.94), + (4, 94.94), + (8, 95.85), + (16, 95.87), + (32, 0.000) // no value in paper :// + ]); + // nr_operand_success_rate.get(&nr_operands); + + let (src_array, dst_array) = (src.get_subarray_id(), dst.get_subarray_id()); + let furthest_src_row = activated_rows.iter() + .map(|row| (ARCHITECTURE.get_distance_of_row_to_sense_amps)(*row, NeighboringSubarrayRelPosition::get_relative_position(&src_array, &dst_array))) // RowDistanceToSenseAmps::Far; // TODO: get this + .max() + .expect("[ERR] Activated rows were empty"); + // NOTE: SRA is assumed to activate the same row-addresses in both subarrays + let closest_dst_row = activated_rows.iter() + .map(|row| (ARCHITECTURE.get_distance_of_row_to_sense_amps)(*row, NeighboringSubarrayRelPosition::get_relative_position(&dst_array, &src_array))) // RowDistanceToSenseAmps::Far; // TODO: get this + .min() + .expect("[ERR] Activated rows were empty"); + let total_success_rate = *success_rate_per_operandnr.get(&nr_operands).expect("[ERR] {nr_operands} not =1|2|4|8|16, the given SRA function seems to not comply with this core assumption.") + * success_rate_by_row_distance.get(&(furthest_src_row, closest_dst_row)).unwrap().0; + SuccessRate::new(total_success_rate) + }, + _ => SuccessRate::new(1.0), + } + } +} + +/// Contains logical operations which are supported (natively) on FCDRAM-Architecture +/// - see [`FCDRAMArchitecture::get_instructions_implementation_of_logic_ops`] for how these logic-ops are mapped to FCDRAM-instructions +#[derive(Debug, Clone, Copy, Eq, Hash, PartialEq, EnumIter)] +pub enum LogicOp { + NOT, + AND, + OR, + /// implemented using AND+NOT + NAND, + /// implemented using OR+NOT + NOR, +} + +impl LogicOp { + + /// see [1] Chap5.3 and Chap6.3 + pub fn get_success_rate_by_row_distance(&self) -> HashMap<(RowDistanceToSenseAmps,RowDistanceToSenseAmps), SuccessRate> { + match self { + LogicOp::NOT => HashMap::from([ + // ((src,dst), success_rate) + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Close), 51.71.into()), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Close), 54.93.into()), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Close), 44.16.into()), + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Middle), 57.47.into()), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Middle), 53.47.into()), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Middle), 81.92.into()), + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Far), 45.34.into()), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Far), 85.02.into()), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Far), 75.13.into()), + ]), + LogicOp::AND => HashMap::from([ + // ((reference,compute), success_rate) + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Close), 98.81.into()), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Close), 99.20.into()), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Close), 80.04.into()), + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Middle), 97.08.into()), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Middle), 83.26.into()), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Middle), 97.71.into()), + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Far), 75.84.into()), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Far), 95.29.into()), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Far), 94.95.into()), + ]), + LogicOp::OR => HashMap::from([ + // ((reference,compute), success_rate) + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Close), 99.51.into()), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Close), 99.65.into()), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Close), 94.29.into()), + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Middle), 98.98.into()), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Middle), 94.15.into()), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Middle), 98.95.into()), + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Far), 89.23.into()), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Far), 98.59.into()), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Far), 98.80.into()), + ]), + LogicOp::NAND => HashMap::from([ + // ((reference,compute), success_rate) + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Close), 98.81.into()), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Close), 99.20.into()), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Close), 79.59.into()), + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Middle), 97.08.into()), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Middle), 82.98.into()), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Middle), 97.67.into()), + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Far), 75.50.into()), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Far), 95.19.into()), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Far), 94.95.into()), + ]), + LogicOp::NOR => HashMap::from([ + // ((reference,compute), success_rate) + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Close), 99.51.into()), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Close), 99.65.into()), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Close), 94.09.into()), + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Middle), 98.97.into()), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Middle), 94.03.into()), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Middle), 98.90.into()), + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Far), 89.15.into()), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Far), 98.52.into()), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Far), 98.80.into()), + ]), + } + } + + /// taken from Figure7 (NOT) & Figure15 (AND/OR/NAND/NOR) in [1] (using Mean-dot) + /// - NOTE: since the values have been read from the diagram they might differ +-3% from the actually measured values + /// - remeasuring the values on own setup might be beneficial here ! + /// + /// In General: + /// - for AND/OR/NAND/NOR: "The success rate of bitwise operations consistently increases as the number of input operands increases." (see Observation 11 [1]) + /// - for NOT: seems to be the opposite + pub fn get_success_rate_by_nr_operands(&self) -> HashMap { + match self { + LogicOp::NOT => HashMap::from([ + // ((src,dst), success_rate) + (SupportedNrOperands::One, 98.5.into()), + (SupportedNrOperands::Two, 97.5.into()), + (SupportedNrOperands::Four, 97.0.into()), + (SupportedNrOperands::Eight, 28.0.into()), + (SupportedNrOperands::Sixteen, 10.0.into()), + (SupportedNrOperands::Thirtytwo, 8.0.into()), + ]), + LogicOp::AND => HashMap::from([ + // ((reference,compute), success_rate) + (SupportedNrOperands::Two, 86.0.into()), + (SupportedNrOperands::Four, 91.5.into()), + (SupportedNrOperands::Eight, 92.5.into()), + (SupportedNrOperands::Sixteen, 96.0.into()), + ]), + LogicOp::OR => HashMap::from([ + // ((reference,compute), success_rate) + // TODO + (SupportedNrOperands::Two, 97.5.into()), + (SupportedNrOperands::Four, 97.0.into()), + (SupportedNrOperands::Eight, 28.0.into()), + (SupportedNrOperands::Sixteen, 10.0.into()), + ]), + LogicOp::NAND => HashMap::from([ + // ((reference,compute), success_rate) + // TODO + (SupportedNrOperands::Two, 97.5.into()), + (SupportedNrOperands::Four, 97.0.into()), + (SupportedNrOperands::Eight, 28.0.into()), + (SupportedNrOperands::Sixteen, 10.0.into()), + ]), + LogicOp::NOR => HashMap::from([ + // ((reference,compute), success_rate) + // TODO + (SupportedNrOperands::Two, 97.5.into()), + (SupportedNrOperands::Four, 97.0.into()), + (SupportedNrOperands::Eight, 28.0.into()), + (SupportedNrOperands::Sixteen, 10.0.into()), + ]), + } + } +} + +/// Support operands numbers for AND/OR/NOT operations +#[derive(Debug, Clone, Copy, EnumIter, Hash, PartialEq, Eq, Serialize, Deserialize)] +#[repr(u8)] // You can change the representation (e.g., u8, u16, etc.) +pub enum SupportedNrOperands { + /// One operand only supported for `NOT` + One = 1, + Two = 2, + Four = 4, + Eight = 8, + Sixteen = 16, + /// Only performed for `NOT` + Thirtytwo = 32 +} + +impl TryFrom for SupportedNrOperands { + type Error = (); + + fn try_from(value: u8) -> Result { + match value { + 1 => Ok(SupportedNrOperands::One), + 2 => Ok(SupportedNrOperands::Two), + 4 => Ok(SupportedNrOperands::Four), + 8 => Ok(SupportedNrOperands::Eight), + 16 => Ok(SupportedNrOperands::Sixteen), + 32 => Ok(SupportedNrOperands::Thirtytwo), + _ => Err(()), + } + } +} + +impl TryFrom for SupportedNrOperands { + type Error = (); + + fn try_from(value: usize) -> Result { + Self::try_from(value as u8) + } +} + +/// Implements behavior of the RowDecoderCircuitry as described in [3] +/// TODO: remove in favor of passing arbitrary closure to [`FCDRAMArchitecture::get_activated_rows_from_apa`] +pub trait RowDecoder { + /// Returns vector of simultaneously activated rows when issuing `APA(r1,r2)`-cmd + /// NOTE: this may depend on the used DRAM - see [3] for a method for reverse-engineering + /// which rows are activated simultaneously (also see RowClone) + fn get_simultaneously_activated_rows_of_apa_op(&self, r1: RowAddress, r2: RowAddress) -> Vec; + + // TODO: get activation pattern for given rows r1,r2 (N:N vs N:2N) - or just check whether + // N:2N: is supported and let `get_simultaneously_activated_rows_of_apa_op()` handle the rest? + // - NOTE: currenlty N:2N activation pattern is not supported +} + +// TODO +#[cfg(test)] +mod tests { + + use super::*; + + #[test] + fn test_sra() { + println!("{:?}", ARCHITECTURE.sra_degree_to_rowaddress_combinations.get(&SupportedNrOperands::try_from(8 as u8).unwrap()).unwrap().first()); + } +} diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs new file mode 100755 index 0000000..c6a4560 --- /dev/null +++ b/rs/src/fc_dram/compiler.rs @@ -0,0 +1,938 @@ +//! +//! - [`Compiler`] = performs actual compilation +//! - [`CompilationState`] = stores states encountered during compilation (eg which values reside in which rows, rows containing live values, ..) +//! - also see [`RowState`] +//! - [`SchedulingPrio`] = used to prioritize/order instruction for Instruction Scheduling +//! +//! - [`Compiler::compile()`] = main function - compiles given logic network for the given [`architecture`] into a [`program`] using some [`optimization`] + +use crate::fc_dram::architecture::{Instruction, ROWS_PER_SUBARRAY}; + +use super::{ + architecture::{LogicOp, NeighboringSubarrayRelPosition, SubarrayId, SupportedNrOperands, ARCHITECTURE, NR_SUBARRAYS, ROW_ID_BITMASK}, CompilerSettings, Program, RowAddress +}; +use eggmock::{Aoig, Id, NetworkWithBackwardEdges, Node, Signal}; +use log::debug; +use priority_queue::PriorityQueue; +use strum::IntoEnumIterator; +use std::{cmp::Ordering, collections::HashMap, ffi::CStr, fmt::Debug, fs::{self, File}, io::Write, path::Path, vec}; + +use serde::{Serialize, Deserialize}; + +/// Provides [`Compiler::compile()`] to compile a logic network into a [`Program`] +pub struct Compiler { + /// compiler-options set by user + settings: CompilerSettings, + /// Stores the state of all rows at each compilation step + comp_state: CompilationState, + /// For each nr of operands this field store the rowaddress-combination to issue to activate + /// the desired nr of rows (the choice is made best on success-rate and maximizing the nr of rows which potentially can't be used for storage + /// since they would activate rows where values could reside in) + /// - This is a Design Decision taken: compute rows are rows reserved for performing computations, all other rows are usable as "Register" + compute_row_activations: HashMap<(SupportedNrOperands, NeighboringSubarrayRelPosition), (RowAddress,RowAddress)>, + /// Stores all subarrays in which the signal has to be available + signal_to_subarrayids: HashMap>, + /// see [`Self::get_all_noninverted_src_signals`]. First `Vec`=noninverted src signals, 2nd `Vec`=inverted src signals + computed_noninverted_scr_signals: HashMap,Vec)>, +} + +/// Serializable struct for storing chosen compute rows in a json file +#[derive(Serialize,Deserialize)] +struct ComputeRowRecord { + operands: SupportedNrOperands, + position: NeighboringSubarrayRelPosition, + rows: (RowAddress, RowAddress), +} + +impl Compiler { + /// Constants are repeated to fill complete row + const CONSTANTS: [usize; 2] = [0, 1]; + + pub fn new(settings: CompilerSettings) -> Self { + Compiler{ + settings, + comp_state: CompilationState::new( HashMap::new() ), + compute_row_activations: HashMap::new(), + signal_to_subarrayids: HashMap::new(), + computed_noninverted_scr_signals: HashMap::new(), + } + } + + /// Compiles given `network` into a FCDRAM-[`Program`] that can be run on given `architecture` + /// + /// General Procedure of compilation + /// 1) Map Logical-Ops to FCDRAM-Primitives (operating on virtual rows) + /// 2) Map virtual rows to actual physical rows (spilling/moving rows if necessary using `RowClone`) + /// - similarly to Register Allocation + /// + /// - [ ] TODO: increase success-rate using input replication ? at which point to add input replication? + /// + /// - [ ] TODO: output in which rows + /// - 1) data is expected to be placed before program runs + /// - 2) outputs can be found after the program has run + pub fn compile( + &mut self, + network: &impl NetworkWithBackwardEdges, + ) -> Program { + let mut program = Program::new(vec!()); + + // debug!("Compiling {:?}", network); + // 0. Prepare compilation: + // - select safe-space rows + // - place inputs&constants into DRAM module (and store where inputs have been placed in `program`) + // - initialize candidates (with which to start execution) + self.init_comp_state(network, &mut program); + + // println!("{:?}", network.outputs().collect::>()); + // debug!("Nodes in network:"); + // for node in network.iter() { + // debug!("{:?},", node); + // } + + // 1. Actual compilation + while let Some((next_candidate, _)) = self.comp_state.candidates.pop() { + let executed_instructions = &mut self.execute_next_instruction(&next_candidate, network); + program.instructions.append(executed_instructions); + + // update new candidates (`next_candidate` is now available) + let new_candidates = self.get_new_candidates(network, next_candidate.0, next_candidate.1); + debug!("New candidates: {:?}", new_candidates); + + self.comp_state.candidates.extend(new_candidates); + } + + debug!("Instructions: {:?}", program.instructions); + // optimize(&mut program); + debug!("{:?}", self.comp_state.value_states); + + // store output operand location so user can retrieve them after running the program + let outputs = network.outputs(); + // TODO: doesn't work yet + program.output_row_operands_placement = outputs.flat_map(|out| { + let subarrays = self.signal_to_subarrayids.get(&out).unwrap(); + let mut placements = vec!(); + for subarray in subarrays { + let row_address = self.comp_state.value_states.get(&(out,*subarray)).expect("ERROR: one of the outputs hasn't been computed yet..."); + placements.push((out, *row_address)); + } + placements + }).collect(); + program + } + + /// Rather than making sure rows in which live values reside remain untouched, this approach chooses to select fixed RowAddress combinations for all support numbers of operands + /// - this function sets [`Compiler::compute_row_activations`] to use as compute rows + /// - NOTE: this choice is expected to be applicable to row activations in all subarrays since the SRA work equivalently between subarrays + /// - ASSUMPTION: there are no architectural differences btw subarrays + /// + /// # Limitations + /// + /// There are several drawbacks of choosing fixed compute rows: + /// 1. *LogicOp* is not taken into consideration: different compute rows might (in theory) perform better for specific LogicOps (see [`LogicOp::get_success_rate_by_row_distance()`] which returns different SuccessRates based on the corresponding LogicOp) + /// 2. Compute Rows might perform better for the next subarray (+1) than for the previous (-1) subarray (choice of subarray determines which SenseAmps are used and hence the distance btw rows and SenseAmps) + /// + /// This choice aims to finding a good compromise btw those limitations. + /// TODO: NEXT + fn choose_compute_rows(&mut self) { + for nr_operands in SupportedNrOperands::iter() { + for sense_amp_position in NeighboringSubarrayRelPosition::iter() { + + let possible_row_combis = ARCHITECTURE.sra_degree_to_rowaddress_combinations.get(&nr_operands).expect("Given Architecture doesn't support SRA of {nr_operands} operands"); + let best_row_combi = possible_row_combis.iter().fold(possible_row_combis[0], |best_row_combi, next_row_combi| { + // compare row-combis based on avg success-rate and return the better one of them + + let avg_distance_next_row_combi: u64 = { + let activated_rows = ARCHITECTURE.precomputed_simultaneous_row_activations.get(next_row_combi).unwrap(); + activated_rows.iter().map(|&row| { + // move subarray to 1st subarray (instead of 0th, which is at the edge and hence has no sense-amps above) + let subarray1_id = ((ROW_ID_BITMASK << 1) | 1) ^ ROW_ID_BITMASK; + let row = RowAddress(subarray1_id | row.0); // makes sure that `get_distance_of_row_to_sense_amps` doesn't panic since SRA returns subarray=0 by default (which is an edge subarray) + (ARCHITECTURE.get_distance_of_row_to_sense_amps)(row, sense_amp_position) as u64 + }).sum() + }; + let avg_distance_best_row_combi: u64 = { + // move subarray to 1st subarray (instead of 0th, which is at the edge and hence has no sense-amps above) + let activated_rows = ARCHITECTURE.precomputed_simultaneous_row_activations.get(&best_row_combi).unwrap(); + activated_rows.iter().map(|&row| { + let subarray1_id = ((ROW_ID_BITMASK << 1) | 1) ^ ROW_ID_BITMASK; + let row = RowAddress(subarray1_id | row.0); // makes sure that `get_distance_of_row_to_sense_amps` doesn't panic since SRA returns subarray=0 by default (which is an edge subarray) + // println!("{:b}", row.0); + (ARCHITECTURE.get_distance_of_row_to_sense_amps)(row, sense_amp_position) as u64 + }).sum() + }; + + if avg_distance_next_row_combi > avg_distance_best_row_combi { + *next_row_combi + } else { + best_row_combi + } + }); + + self.compute_row_activations.insert((nr_operands, sense_amp_position), best_row_combi); + } + } + } + + /// Places (commonly used) constants in safe-space rows + /// - ! all safe-space rows are assumed to be empty when placing constants (constans are the first things to be placed into safe-space rows) + /// - currently placed constants: all 0s and all 1s (for [`Compiler::init_reference_subarray`] + /// - TODO: store placement of constants in `program` + fn place_constants(&mut self, program: &mut Program) { + // place constants in EVERY subarray + for subarray in 0..NR_SUBARRAYS { + for constant in Self::CONSTANTS { + let next_free_row = self.comp_state.free_rows_per_subarray.get_mut(&SubarrayId(subarray)).and_then(|v| v.pop()).expect("No free rows in subarray {subarray} :("); + self.comp_state.constant_values.insert(constant, next_free_row.local_rowaddress_to_subarray_id(SubarrayId(0))); + self.comp_state.dram_state.insert(next_free_row, RowState { is_compute_row: false, live_value: None, constant: Some(constant)} ); + program.constants_row_placement.insert(constant, next_free_row); + } + } + } + + /// Place inputs onto appropriate rows, storing the decided placement into `program.input_row_operands_placement` + /// - NOTE: constants are expected to be placed before the inputs + /// TODO: algo which looks ahead which input-row-placement might be optimal (->reduce nr of move-ops to move intermediate results around & keep inputs close to sense-amps + /// TODO: parititon logic network into subgraphs s.t. subgraphs can be mapped onto subarrays reducing nr of needed moves + fn place_inputs(&mut self, network: &impl NetworkWithBackwardEdges, program: &mut Program) { + + for input in network.leaves().collect::>() { + // check whether the signal is required in inverted or noninverted form and place it accordingly in all subarrays where it is needed + let original_signal = Signal::new(input, false); + let inverted_signal = Signal::new(input, true); + + if let Some(original_input_locations) = self.signal_to_subarrayids.get(&original_signal) { + for subarray in original_input_locations { + let next_free_row = self.comp_state.free_rows_per_subarray.get_mut(subarray).and_then(|v| v.pop()).expect("OOM: No more free rows in subarray {subarray} for placing inputs"); + self.comp_state.value_states.insert((original_signal, *subarray), next_free_row); + + program.input_row_operands_placement.entry(original_signal).or_default().push(next_free_row); + } + } + + if let Some(inverted_input_locations) = self.signal_to_subarrayids.get(&inverted_signal) { + for subarray in inverted_input_locations { + let next_free_row = self.comp_state.free_rows_per_subarray.get_mut(subarray).and_then(|v| v.pop()).expect("OOM: No more free rows in subarray {subarray} for placing inputs"); + self.comp_state.value_states.insert((inverted_signal, *subarray), next_free_row); + + program.input_row_operands_placement.entry(inverted_signal).or_default().push(next_free_row); + } + } + } + } + + /// Initialize candidates with all nodes that are computable + /// NOTE: initially all nodes whose src-operands are primary inputs only are marked as candidates in all subarrays (since inputs are expected to be placed in all those subarrays by the user) + fn init_candidates(&mut self, network: &impl NetworkWithBackwardEdges) { + let inputs: Vec = network.leaves().collect(); + + // init candidates with all nodes having only inputs as src-operands + for &input in inputs.as_slice() { + // every output has a prio determined eg by how many src-operands it uses last (->to minimize nr of live values in rows) + let mut outputs_with_prio: PriorityQueue<(Signal, SubarrayId), SchedulingPrio> = network.node_outputs(input) + .filter(|output| network.node(*output).inputs().iter().all(|other_input| inputs.contains(&other_input.node_id()) )) // only those nodes are candidates, whose src-operands are ALL inputs (->only primary inputs are directly available) + .flat_map( |output| { + let output_signal = Signal::new(output, false); + let mut output_candidates = vec!(); + + // for every subarray in which the signal is needed + for subarray in self.signal_to_subarrayids.get(&output_signal).expect("Signal is not mapped to a subarray yet??") { + output_candidates.push(((output_signal,*subarray), self.compute_scheduling_prio_for_node(output_signal, *subarray, network))); + } + + // if negation is also needed + let inverted_output_signal = Signal::new(output, true); + if self.signal_to_subarrayids.contains_key(&inverted_output_signal) { + // for every subarray in which the signal is needed + for subarray in self.signal_to_subarrayids.get(&inverted_output_signal).expect("Signal is not mapped to a subarray yet??") { + output_candidates.push(((inverted_output_signal,*subarray), self.compute_scheduling_prio_for_node(inverted_output_signal, *subarray, network))); + } + } + + output_candidates + }) + .collect(); + + self.comp_state.candidates.append(&mut outputs_with_prio); + debug!("{:?} has the following outputs: {:?}", input, network.node_outputs(input).collect::>()); + } + } + + /// Returns list of candidates that can be computed once `computed_node` is available + fn get_new_candidates(&mut self, network: &impl NetworkWithBackwardEdges, computed_node: Signal, subarray: SubarrayId) -> PriorityQueue<(Signal, SubarrayId), SchedulingPrio> { + debug!("Candidates: {:?}", self.comp_state.candidates); + debug!("DRAM state: {:?}", self.comp_state.value_states); + network.node_outputs(computed_node.node_id()) + // filter for new nodes that have all their input-operands available now in the same subarray (->only inputs of computed nodes could have changed to candidate-state, other nodes remain uneffected) + .filter({|out| network.node(*out).inputs().iter() + .all( |input| { + debug!("OUTPUT: {out:?} with input {input:?}"); + debug!("Out: {:?}, In: {:?}", out, input); + self.comp_state.value_states.contains_key(&(*input, subarray)) + }) + }) + .flat_map(|id| { + let noninverted_signal = Signal::new(id, false); + let mut new_candidates = vec!(((noninverted_signal, subarray), self.compute_scheduling_prio_for_node(noninverted_signal, subarray, network))); // noninverted version needs to be computed anyway + let inverted_signal = Signal::new(id, true); + // if needed also schedule `inverted_signal` for computation + if self.signal_to_subarrayids.contains_key(&inverted_signal) { + new_candidates.push(((inverted_signal, subarray.get_partner_subarray()), self.compute_scheduling_prio_for_node(noninverted_signal, subarray, network))); + } + new_candidates.into_iter().collect::>() + }) // TODO: check if inverted signal is required as well! + .collect() + } + + /// Initialize compilation state: + /// - choose compute rows (by setting [`Self::compute_row_activations`] + /// - decide in which rows to place constants + /// - assign subarray-ids to each NodeId + /// - return code to place input operands in `program` + fn init_comp_state(&mut self, network: &impl NetworkWithBackwardEdges, program: &mut Program) { + let config_file = unsafe { CStr::from_ptr(self.settings.config_file) }.to_str().unwrap(); + let config = Path::new(config_file); + + // 0.1 Allocate compute rows: rows reserved for performing computations, all other rows are usable as "Register" + if config.is_file() { + // if config-file has been provided: get compute rows from that config file rather than recomputing them + + let contents = fs::read_to_string(config).unwrap(); + // Read file contents + + // Parse JSON into Vec + let records: Vec = + serde_json::from_str(&contents).unwrap_or_else(|_| panic!("Failed to parse JSON for file {:?}", config.to_str())); + + // Convert into the HashMap structure + self.compute_row_activations = records + .into_iter() + .map(|rec| { + ( + (rec.operands, rec.position), + (rec.rows.0, rec.rows.1), + ) + }) + .collect(); + + // This is the result for SKHYNIX DRAM (comment out if to save dev-time generating json-file): + // self.compute_row_activations = HashMap::from([ + // ((SupportedNrOperands::One, NeighboringSubarrayRelPosition::Above), (RowAddress(8), RowAddress(8))), + // ((SupportedNrOperands::One, NeighboringSubarrayRelPosition::Below), (RowAddress(303), RowAddress(303))), + // ((SupportedNrOperands::Two, NeighboringSubarrayRelPosition::Above), (RowAddress(15), RowAddress(79))), + // ((SupportedNrOperands::Two, NeighboringSubarrayRelPosition::Below), (RowAddress(293), RowAddress(357))), + // ((SupportedNrOperands::Four, NeighboringSubarrayRelPosition::Above), (RowAddress(60), RowAddress(42))), + // ((SupportedNrOperands::Four, NeighboringSubarrayRelPosition::Below), (RowAddress(472), RowAddress(412))), + // ((SupportedNrOperands::Eight, NeighboringSubarrayRelPosition::Above), (RowAddress(42), RowAddress(15))), + // ((SupportedNrOperands::Eight, NeighboringSubarrayRelPosition::Below), (RowAddress(203), RowAddress(283))), + // ((SupportedNrOperands::Sixteen, NeighboringSubarrayRelPosition::Above), (RowAddress(32), RowAddress(83))), + // ((SupportedNrOperands::Sixteen, NeighboringSubarrayRelPosition::Below), (RowAddress(470), RowAddress(252))), + // ((SupportedNrOperands::Thirtytwo, NeighboringSubarrayRelPosition::Above), (RowAddress(307), RowAddress(28))), + // ((SupportedNrOperands::Thirtytwo, NeighboringSubarrayRelPosition::Below), (RowAddress(149), RowAddress(318))), + // ]); + + + } else { + self.choose_compute_rows(); // choose which rows will serve as compute rows (those are stored in `self.compute_row_activations` + + // write chosen compute rows to config-file + let records: Vec = self.compute_row_activations + .iter() + .map(|((operands, position), (row1, row2))| ComputeRowRecord { + operands: *operands, + position: *position, + rows: (*row1, *row2), + }) + .collect(); + + // Store compute-row choice in json file + let json_output = serde_json::to_string_pretty(&records).unwrap(); + // Write to file + let output_path = Path::new("fcdram_hksynx_compute_rows.json"); // NOTE: the chosen compute rows are specific to HKSYNX DRAM for now + let mut file = File::create(output_path).expect("Failed to create output file"); + file.write_all(json_output.as_bytes()).expect("Failed to write JSON to file"); + println!("Stored chosen compute-rows into {:?}. Pass this file via `.config_file` to safe considerate compilation time for choosing the compute rows", output_path.to_str()); + } + + // 0.2 Save free rows + // At the start all rows, except for the compute rows, are free rows + let compute_rows = self.compute_row_activations.values().fold(vec!(), |all_compute_rows, next_compute_row_combi| { + let new_compute_rows = ARCHITECTURE.precomputed_simultaneous_row_activations.get(next_compute_row_combi).expect("Compute row can't be activated??"); + all_compute_rows.iter().chain(new_compute_rows).cloned().collect() + }); + let mut free_rows = (0..ROWS_PER_SUBARRAY).map(RowAddress::from).collect::>(); + free_rows.retain(|r| {!compute_rows.contains(r)}); + for subarray in 0..NR_SUBARRAYS { + let free_rows_in_subarray = free_rows.iter().map(|row| row.local_rowaddress_to_subarray_id(SubarrayId(subarray))).collect(); // transform local row address to row addresses in corresponding `subarray` + self.comp_state.free_rows_per_subarray.entry(SubarrayId(subarray)).insert_entry(free_rows_in_subarray); + } + + // 0.3 Group operands by subarray (ensure all operands are placed in the right subarray) + self.assign_signals_to_subarrays(network); // sets `self.signal_to_subarrayids` + + // NEXT: 0.4 Place all constants and inputs and mark the inputs as being live + self.place_constants(program); // constants are placed in each subarray + debug!("Placed constants: {:?}", self.comp_state.constant_values); + + self.place_inputs(network, program); // place input-operands into rows + debug!("Placed inputs {:?} in {:?}", network.leaves().collect::>(), self.comp_state.value_states); + + // 0.5 Setup: store all network-nodes yet to be compiled + self.init_candidates(network); + debug!("Initialized candidates {:?}", self.comp_state.candidates); + } + + /// Assigns signals to subarrays and through this determines placement of those signal in the DRAM module + /// - sets [`Self::signal_to_subarrayids`] + /// + /// # Assumptions + /// + /// - assumes `network` is acyclic ! + /// + /// # TODO + /// + /// - make sure nr of signals placed in a subarray is <= nr of available rows (without compute rows) + /// - think about merging subarray assignment (s.t. several outputs end up in same subarray, so that inputs can be reused among them) + fn assign_signals_to_subarrays(&mut self, network: &impl NetworkWithBackwardEdges) { + // 1. determine all signals which go into outputs without being negated (and hence can be stored in same subarray) + // - also store signals which do need to be negated (and process them in the next step) + // - TODO: continue graph traversal with src-operands of the outputs (until primary inputs are reached) + let mut subarray_id = 1; // start with 1 since edge subarrays cant be used as compute subarrays + for output in network.outputs() { + self.signal_to_subarrayids.insert(output, vec!(SubarrayId(subarray_id))); // determine (virtual) subarray in which output will reside - assignment might change in a later stage by merging several outputs into same subarray if there is enough space + let neighboring_subarray = SubarrayId(subarray_id).get_partner_subarray(); + let (actual_subarray , neighboring_subarray) = { + + // if output is inverted, then the non-inverted value resides in the partner subarray + if output.is_inverted() { + self.signal_to_subarrayids.insert(Signal::new(output.node_id(), false), vec!(neighboring_subarray)); // determine (virtual) subarray in which output will reside + (neighboring_subarray, SubarrayId(subarray_id)) + } else { + (SubarrayId(subarray_id), neighboring_subarray) + } + }; + + let (noninverted_src_signals, inverted_src_signals) = self.get_all_noninverted_and_inverted_src_signals(output, network); + + // println!("Noninverted src signals: {:?}", noninverted_src_signals.clone()); + // all directly (might in theory) reside in the same subarray as `output` (since no NOTS are inbtw which locate them to a neighboring subarray) + for connected_signal in noninverted_src_signals { + self.signal_to_subarrayids.entry(connected_signal).or_default().push(actual_subarray); // determine (virtual) subarray in which output will reside + } + + // Place direct inputs that ARE inverted in same subarray (while their non-inverted version will end up in the neighboring subarray) + for inverted_signal in inverted_src_signals.as_slice() { + self.signal_to_subarrayids.entry(*inverted_signal).or_default().push(actual_subarray); // determine (virtual) subarray in which output will reside + } + // place inverted signals in neighboring subarray, 2x inverted signals in same subarray, 3x inverted signals in neighboring subarray etc... + // TODO: same thing as before: place non-inverted version of inverted signals in opposite subarray ! + // !!! doesnt support >=2 NOTs on one path yet !!! + let mut unvisited_signals_in_same_subarray: Vec = vec!(); // inverting even nr of times leads to signals being placed in same subarray + let mut unvisited_signals_in_neighboring_subarray: Vec = inverted_src_signals.iter() // =those signals that are negated an odd nr of times + .filter(|signal| !network.node(signal.node_id()).is_leaf() ) // leaves don't need to be placed in neighboring subarray since inputs are placed by user + .map(|signal| Signal::new(signal.node_id(), false)).collect(); // before negation the signals are in the neighboring subarray + while !unvisited_signals_in_same_subarray.is_empty() || !unvisited_signals_in_neighboring_subarray.is_empty() { + // println!("Same subarray: {:?}", unvisited_signals_in_same_subarray); + // println!("Neighboring subarray: {:?}", unvisited_signals_in_neighboring_subarray); + if let Some(signal_neighboring_subarray) = unvisited_signals_in_neighboring_subarray.pop() { + + debug!("Neighboring: {signal_neighboring_subarray:?}"); + self.signal_to_subarrayids.entry(signal_neighboring_subarray).or_default().push(neighboring_subarray); + // these are placed in the Above subarray (arbitrary decision, epxloring whether this makes a difference might be explored in future) + // NOTE: signals that are inverted an even nr of times are placed in the same subarray as the output + let (signals_neighboring_subarray, signals_inverted_even_nr_times) = self.get_all_noninverted_and_inverted_src_signals(signal_neighboring_subarray, network); + for signal_inverted_odd_nr_times in signals_neighboring_subarray { + debug!("ODD: {signal_inverted_odd_nr_times:?} placed in subarray {neighboring_subarray}"); + self.signal_to_subarrayids.entry(signal_inverted_odd_nr_times).or_default().push(neighboring_subarray); + } + + // signals which are inverted again require the non-inverted version to be in the other subarray + let mut signals_to_invert_once_more: Vec = signals_inverted_even_nr_times.into_iter().filter(|signal| { + if network.node(signal.node_id()).is_leaf() { + // is input signal + self.signal_to_subarrayids.entry(*signal).or_default().push(neighboring_subarray); + false + } else { true } + }).collect(); // inputs are placed by user (also inverted ones) + for even_times_inverted_signals in signals_to_invert_once_more.as_slice() { + let signal = Signal::new(even_times_inverted_signals.node_id(), false); + self.signal_to_subarrayids.entry(signal).or_default().push(actual_subarray); + } + unvisited_signals_in_same_subarray.append(&mut signals_to_invert_once_more); + } + + if let Some(signal_same_subarray) = unvisited_signals_in_same_subarray.pop() { + + debug!("Same: {signal_same_subarray:?}"); + self.signal_to_subarrayids.entry(signal_same_subarray).or_default().push(actual_subarray); + // signals inverted even nr of times are placed in the same subarray as the `output` Signal + // NOTE: signals that are inverted an odd nr of times are placed in the neighboring subarray of the output + let (signals_same_subarray_of_output, signals_inverted_even_nr_times) = self.get_all_noninverted_and_inverted_src_signals(signal_same_subarray, network); + for signal in signals_same_subarray_of_output { + self.signal_to_subarrayids.entry(signal).or_default().push(actual_subarray); + } + + // signals which are inverted again require the non-inverted version to be in the other subarray + let mut signals_to_invert_once_more: Vec = signals_inverted_even_nr_times.into_iter().filter(|signal| { + if network.node(signal.node_id()).is_leaf() { + // is input signal + self.signal_to_subarrayids.entry(*signal).or_default().push(neighboring_subarray); + false + } else { true } + }).collect(); // inputs are placed by user (also inverted ones) + for even_times_inverted_signals in signals_to_invert_once_more.as_slice() { + let signal = Signal::new(even_times_inverted_signals.node_id(), false); + self.signal_to_subarrayids.entry(signal).or_default().push(neighboring_subarray); + } + unvisited_signals_in_neighboring_subarray.append(&mut signals_to_invert_once_more); + } + } + + // for the beginning place all outputs in different subarrays. A 2nd pass may optimize/merge subarrays later on + subarray_id += 2; // maybe +=2 to account for negated operands being stored in neighboring subarray? (TODO: test with some example networks) + } + + + debug!("Signals to subarrayids: {:?}", self.signal_to_subarrayids); + } + + /// Returns all src signals which are not inverted. These are exactly those signals that can be placed in the same subarray as. + /// + /// # Returns + /// + /// Tuple of + /// 1. Vector of src Signals that are **not** inverted + /// 2. Vector of src Signals that are indeed inverted (need to be processed further, only first inverted signal is returned for a subtree) + fn get_all_noninverted_and_inverted_src_signals(&mut self, signal: Signal, network: &impl NetworkWithBackwardEdges) -> (Vec, Vec) { + let signal_node = network.node(signal.node_id()); + + let mut noninverted_src_signals = vec!(); + let mut inverted_src_signals = vec!(); + let mut stack_unvisited_noninverted_src_operands = Vec::from(signal_node.inputs()); + + while let Some(src_operand) = stack_unvisited_noninverted_src_operands.pop() { + if src_operand.is_inverted() { + inverted_src_signals.push(src_operand); // store subarray to which this input has to be placed to as a neighbor, further processing elsewhere + } else { + noninverted_src_signals.push(src_operand); + let src_operand_node = network.node(src_operand.node_id()); + stack_unvisited_noninverted_src_operands.append(&mut Vec::from(src_operand_node.inputs())); + } + } + + self.computed_noninverted_scr_signals.insert(signal, (inverted_src_signals.clone(), noninverted_src_signals.clone())); // to save (possible) recomputation next time + (noninverted_src_signals, inverted_src_signals) + } + + /// Returns instructions to initialize all given `ref_rows` in reference-subarray for corresponding logic-op + /// - NOTE: [1] doesn't describe how the 0s/1s get into the reference subarray. We use `RowCloneFPM` ([4])) to copy the constant 0s/1s from the reserved safe-space row into the corresponding reference subarray row + fn init_reference_subarray(&self, mut ref_rows: Vec, logic_op: LogicOp) -> Vec { + match logic_op { + LogicOp::AND => { + let frac_row = ref_rows.pop().expect("Min 1 row has to be passed for initializing ref subarray"); // TODO: include success-rate considerations to choose best row to use for storing `V_{DD}/2` + let row_address_1 = self.comp_state.constant_values.get(&1).expect("Constants are expected to be placed in every subarray beforehand") + .local_rowaddress_to_subarray_id(frac_row.get_subarray_id()); // row address where all 1s (V_DD) are to bestored + let mut instructions = vec!(); + for _ in 0..self.settings.repetition_fracops { + instructions.push(Instruction::FracOp(frac_row)); + } + for other_row in ref_rows { + instructions.push(Instruction::RowCloneFPM(row_address_1, other_row, String::from("Init ref-subarray with 1s"))); + } + instructions + }, + LogicOp::OR => { + let frac_row = ref_rows.pop().expect("Min 1 row has to be passed for initializing ref subarray"); // TODO: include success-rate considerations to choose best row to use for storing `V_{DD}/2` + let row_address_0 = self.comp_state.constant_values.get(&0).expect("Constants are expected to be placed in every subarray beforehand") + .local_rowaddress_to_subarray_id(frac_row.get_subarray_id()); // row address where all 0s (GND) are to be stored + let mut instructions = vec!(); + for _ in 0..self.settings.repetition_fracops { + instructions.push(Instruction::FracOp(frac_row)); + } + for other_row in ref_rows { + instructions.push(Instruction::RowCloneFPM(row_address_0, other_row, String::from("Init ref-subarray with 0s"))); + } + instructions + }, + LogicOp::NOT => vec!(), + _ => panic!("{logic_op:?} not supported yet"), + } + } + + /// Returns instructions to be executed for performing `NOT` on `src_row` into `dst_row` and updates the `comp_state` holding the negated value + /// - NOTE: currenlty only single-operand NOTs are supported bc + /// 1) more operands lead to (slightly) worse results (see Figure10 in [1]) + /// 2) since there are separate compute rows using multiple dst rows doesn't make sense (the values need to be copied out of the dst-rows anyway into non-compute rows) + fn execute_not(&mut self, signal_to_invert: &Signal, dst_array: SubarrayId) -> Vec { + let mut instructions = vec!(); + let row_combi = self.compute_row_activations.get(&(SupportedNrOperands::One, NeighboringSubarrayRelPosition::Above)).unwrap(); + let src_array = dst_array.get_partner_subarray(); + + // 1. Copy non-inverted operand into src-row + let src_row = row_combi.0.local_rowaddress_to_subarray_id(src_array); + let unnegated_signal = Signal::new(signal_to_invert.node_id(), false); + let src_location = self.comp_state.value_states.get(&(unnegated_signal, src_array)).unwrap_or_else(|| panic!("Src operand {src_row} is not live in subarray {src_array} (see {signal_to_invert:?})??")); + + instructions.push(self.execute_intrasubarray_rowclone(*src_location, src_row, String::from("Move into compute row"))); + + // 2. Execute NOT + let dst_row = row_combi.1.local_rowaddress_to_subarray_id(dst_array); + instructions.push(Instruction::ApaNOT(src_row, dst_row)); + + // 3. Copy negated value out of compute rows + let free_row = self.comp_state.free_rows_per_subarray.get_mut(&dst_array).and_then(|v| v.pop()).unwrap_or_else(|| panic!("OOM: No free rows in subarray {dst_array}")); + instructions.push(self.execute_intrasubarray_rowclone(dst_row, free_row, String::from("Move into free row"))); + self.comp_state.value_states.insert((*signal_to_invert, dst_array), free_row); + + instructions + } + + /// Returns the instructions needed to perform `language_op` placing the result in a free row in the `compute_subarray` + /// - TODO: also store negated signal (in reference subarray) if it's needed later on? + fn execute_and_or(&mut self, node_id: Id, compute_subarray: SubarrayId, network: &impl NetworkWithBackwardEdges) -> Vec { + let mut instructions = vec!(); + + let (reference_subarray, rel_position_sense_amps_to_compute_subarray) = if compute_subarray.0 % 2 == 0 { // currently the following arrays are compute&reference subarrays of each other: 0&1,2&3,4&5,... + (compute_subarray.0 + 1, NeighboringSubarrayRelPosition::Below) + } else { + (compute_subarray.0 - 1, NeighboringSubarrayRelPosition::Above) + }; + + + let nr_src_operands = SupportedNrOperands::try_from(network.node(node_id).inputs().len()).unwrap(); + let compute_row_combi= self.compute_row_activations.get(&(nr_src_operands, rel_position_sense_amps_to_compute_subarray)).expect("Nr input operands is {}"); + let compute_rows = ARCHITECTURE.precomputed_simultaneous_row_activations.get(compute_row_combi).unwrap(); + + // 0. Move all src_operands into compute rows + // TODO: map compute rows to right subarray + for (&compute_row, &src_signal) in compute_rows.iter().zip(network.node(node_id).inputs()) { + let compute_row = compute_row.local_rowaddress_to_subarray_id(compute_subarray); + let &src_row = self.comp_state.value_states.get(&(src_signal, compute_subarray)).unwrap_or_else(|| panic!("Src signal {src_signal:?} is not present in compute subarray {compute_subarray} ???")); + instructions.push(self.execute_intrasubarray_rowclone(src_row, compute_row, String::from("Move into compute row"))); + } + + + let language_op = network.node(node_id); + let logic_op = match language_op { + // REMINDER: operand-nr is extracted by looking at nr of children beforehand + Aoig::And(_) | Aoig::And4(_) | Aoig::And8(_)| Aoig::And16(_) => LogicOp::AND, + Aoig::Or(_) | Aoig::Or4(_) | Aoig::Or8(_) | Aoig::Or16(_) => LogicOp::OR, + _ => panic!("candidate is expected to be a logic op"), + }; + + // 1. Initialize reference subarray + let ref_rows = compute_rows.iter().map(|c| c.local_rowaddress_to_subarray_id(SubarrayId(reference_subarray))).collect(); + instructions.append(&mut self.init_reference_subarray(ref_rows, logic_op)); + + // 2. Execute actual computation + instructions.push(Instruction::ApaAndOr(compute_row_combi.0.local_rowaddress_to_subarray_id(compute_subarray), compute_row_combi.1.local_rowaddress_to_subarray_id(SubarrayId(reference_subarray)))); + + // 3. Move result into non-compute row + // TODO: add instruction to move value into free-row !! + let free_row = self.comp_state.free_rows_per_subarray.get_mut(&compute_subarray).and_then(|v| v.pop()).unwrap_or_else(|| panic!("No more free rows in subarray {compute_subarray}")); + instructions.push(self.execute_intrasubarray_rowclone(compute_rows[0], free_row, String::from("Move into free row"))); + self.comp_state.value_states.insert((Signal::new(node_id, false), compute_subarray), free_row); // TODO: for inverted signals not in result_subarray, right? + + instructions + } + + fn execute_intrasubarray_rowclone(&self, src_row: RowAddress, dst_row: RowAddress, comment: String) -> Instruction { + Instruction::RowCloneFPM(src_row, dst_row, comment) + } + + /// Returns Instructions to execute given `next_candidate` (which is a signal which needs to reside in a specific subarray after performing the execution) + /// - [ ] make sure that operation to be executed on those rows won't simultaneously activate other rows holding valid data which will be used by future operations + fn execute_next_instruction(&mut self, next_candidate: &(Signal, SubarrayId), network: &impl NetworkWithBackwardEdges) -> Vec { + let (signal, result_subarray) = next_candidate; + let node_id = signal.node_id(); + + // quick validity check: ensure all inputs are already present in the required array + // assert!(network.node(node_id).inputs().iter().all(|input| { + // // TODO + // })); + + let mut next_instructions = vec!(); + // 1. Perform actual operation of the node + + let compute_subarray = if signal.is_inverted() { result_subarray.get_partner_subarray() } else { *result_subarray }; // for inverted signals first compute the noninverted signal in the other subarray + + // println!("EXECUTING {:?}", next_candidate); + next_instructions.append(&mut self.execute_and_or(node_id, compute_subarray, network)); + + // 2. Negate the result (if needed) + if signal.is_inverted() { + let mut negate_instructions = self.execute_not(signal, *result_subarray); + next_instructions.append(&mut negate_instructions); + } + next_instructions + } + + /// Compute `SchedulingPrio` for a given `signal` located in the `subarray` + /// - used for inserting new candidates + /// + /// TODO: write unittest for this function + fn compute_scheduling_prio_for_node(&self, signal: Signal, subarray: SubarrayId, network: &impl NetworkWithBackwardEdges) -> SchedulingPrio { + let nr_last_value_uses = network.node(signal.node_id()).inputs() // for each input check whether `id` is the last node using it + .iter() + .fold(0, |acc, input| { + let input_id = Signal::node_id(input); + let non_computed_outputs: Vec = network.node_outputs(input_id) // get all other nodes still relying on this input + .filter(|out| { + + let out_signal = Signal::new(*out,false); + out_signal != signal && // all output signals except for the current one + !(self.comp_state.value_states.contains_key(&(out_signal, subarray))) // that are not yet computed (not rows present in `subarray` holding that value + }) // filter for uses of `input` which still rely on it (=those that are not computed yet, except for currently checked node + .collect(); + if non_computed_outputs.is_empty() { + acc + 1 + } else { + acc + } + }); + + SchedulingPrio { + nr_last_value_uses, + nr_src_operands: network.node(signal.node_id()).inputs().len(), + nr_result_operands: network.node_outputs(signal.node_id()).collect::>().len(), + } + } +} + +/// Stores the current state of a row at a concrete compilations step +#[derive(Default)] // by default not a compute_row, no live-value and no constant inside row +pub struct RowState { + /// `compute_rows` are reservered rows which solely exist for performing computations, see [`Compiler::compute_row_activations`] + is_compute_row: bool, + /// `None` if the value inside this row is currently not live + live_value: Option, + /// Mostly 0s/1s (for initializing reference subarray), see [`CompilationState::constant_values`] + constant: Option, +} + +/// Keep track of current progress of the compilation (eg which rows are used, into which rows data is placed, ...) +pub struct CompilationState { + /// For each row in the dram-module store its state (whether it's a compute row or if not whether/which value is stored inside it + dram_state: HashMap, + /// For each subarray it stores the row in which the `Signal` is located + value_states: HashMap<(Signal, SubarrayId), RowAddress>, + /// Stores row location of constant + /// - REMINDER: some constants are stored in fixed rows (!in each subarray), eg 0s and 1s for initializing reference subarray + constant_values: HashMap, + /// List of candidates (ops ready to be issued) prioritized by some metric by which they are scheduled for execution + /// - NOTE: calculate Nodes `SchedulingPrio` using + candidates: PriorityQueue<(Signal, SubarrayId), SchedulingPrio>, + /// For each Subarray store which rows are free (and hence can be used for storing values) + free_rows_per_subarray: HashMap>, +} + +impl CompilationState { + pub fn new(dram_state: HashMap) -> Self { + Self { + dram_state, + value_states: HashMap::new(), + constant_values: HashMap::new(), + candidates: PriorityQueue::new(), + free_rows_per_subarray: HashMap::new(), + } + } +} + +/// Contains info to order nodes for Instruction Scheduling +/// GOAL: minimize register usage: +/// 1. Number of last-value-uses +/// 2. Number src operands: ASSUMPTIONS=executing that op reduces value-usage of all of those inputs, higher prob. of last-use in next steps) +/// 3. Total number of nodes that have `node` as input-operands +/// - possible extension: weigh result-operands based on how many of their src-operands are already computed +#[derive(PartialEq, Eq, Debug)] +struct SchedulingPrio { + /// Nr of values which are used last by that node + nr_last_value_uses: u64, + /// Number of source operands the Node has + nr_src_operands: usize, + /// Number of result operands the Node has + nr_result_operands: usize, +} + +/// To execute the next instruction based on the following criteria: +/// 1. Select candidate which operates on most values which are used last (->to release safe-space right after) +/// 2. IF EQUAL: Select candidate with most successors (->pushes more candidates to select from in next step) +/// 3. IF EQUAL: Select any of the remaining candidates +impl Ord for SchedulingPrio { + fn cmp(&self, other: &Self) -> Ordering { + self.nr_last_value_uses.cmp(&other.nr_last_value_uses) + .then(self.nr_src_operands.cmp(&other.nr_src_operands)) // if `nr_last_value_uses` is equal + .then(self.nr_result_operands.cmp(&other.nr_result_operands)) + } +} + +impl PartialOrd for SchedulingPrio { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +#[cfg(test)] +mod tests { + use eggmock::egg::{self, EGraph, Extractor, RecExpr}; + use eggmock::{AoigLanguage, ComputedNetworkWithBackwardEdges, Network}; + + use crate::fc_dram::cost_estimation::CompilingCostFunction; + + use super::*; use std::ffi::CString; + // import all elements from parent-module + use std::sync::Once; + + // ERROR: `eggmock`-API doesn't allow this.. + // // For data shared among unittests but initalized only once + // static TEST_DATA: LazyLock<_> = LazyLock::new(|| { + // let mut egraph: EGraph = Default::default(); + // let my_expression: RecExpr = "(and (and a c) (and b c))".parse().unwrap(); + // let extractor = Extractor::new( &egraph, CompilingCostFunction {}); + // let ntk = (extractor, vec!(egg::Id::from(9))); + // + // ComputedNetworkWithBackwardEdges::new(&ntk) + // }); + + // ERROR: This also does not work bc of the weird implementation of a network + // fn simple_egraph() -> ComputedNetworkWithBackwardEdges<'static, (Extractor<'static, CompilingCostFunction, AoigLanguage, ()>, Vec)> { + // let mut egraph: EGraph = Default::default(); + // let my_expression: RecExpr = "(and (and 1 3) (and 2 3))".parse().unwrap(); + // egraph.add_expr(&my_expression); + // let extractor = Extractor::new( &egraph, CompilingCostFunction {}); + // let ntk = &(extractor, vec!(egg::Id::from(5))); + // ntk.dump(); + // // Id(5): And([Signal(false, Id(2)), Signal(false, Id(4))]) + // // Id(4): And([Signal(false, Id(3)), Signal(false, Id(1))]) + // // Id(1): Input(3) + // // Id(3): Input(2) + // // Id(2): And([Signal(false, Id(0)), Signal(false, Id(1))]) + // // Id(0): Input(1) + // + // let ntk_backward = ComputedNetworkWithBackwardEdges::new(ntk); + // ntk_backward + // } + + static INIT: Once = Once::new(); + + fn init() -> Compiler { + INIT.call_once(|| { + env_logger::init(); + }); + Compiler::new(CompilerSettings { + print_program: true, verbose: true, print_compilation_stats: false, min_success_rate: 0.999, repetition_fracops: 5, safe_space_rows_per_subarray: 16, + config_file: CString::new("/config/fcdram_hksynx_compute_rows.json").unwrap().as_ptr(), + do_save_config: false } ) + } + + #[test] + fn test_input_placement () { + + let mut compiler = init(); + let mut egraph: EGraph = Default::default(); + + // Create Input nodes + let id1 = egraph.add(AoigLanguage::Input(0)); // Id(1) + let id2 = egraph.add(AoigLanguage::Input(1)); // Id(2) + let id3 = egraph.add(AoigLanguage::Input(2)); // Id(3) + + // And([Signal(false, Id(2)), Signal(false, Id(3))]) → Id(4) + let id4 = egraph.add(AoigLanguage::And([id2, id3])); + + // And([Signal(false, Id(1)), Signal(true, Id(3))]) → Id(6) + let not_id3 = egraph.add(AoigLanguage::Not(id3)); + let id6 = egraph.add(AoigLanguage::And([id1, not_id3])); + + // Or([Signal(false, Id(4)), Signal(false, Id(6))]) → Id(10) + let id10 = egraph.add(AoigLanguage::Or([id4, id6])); + + let extractor = Extractor::new( &egraph, CompilingCostFunction {}); + let ntk = &(extractor, vec!(id10)); + ntk.dump(); + + let ntk_with_backward_edges = ntk.with_backward_edges(); + + let program = compiler.compile(&ntk_with_backward_edges); + + println!("{program}"); + } + + #[test] + fn test_candidate_initialization() { + let mut compiler = init(); + + let mut egraph: EGraph = Default::default(); + let my_expression: RecExpr = "(and (and 1 3) (and 2 3))".parse().unwrap(); + egraph.add_expr(&my_expression); + let output2 = egraph.add(AoigLanguage::And([eggmock::egg::Id::from(0), eggmock::egg::Id::from(2)])); // additional `And` with one src-operand=input and one non-input src operand + debug!("EGraph used for candidate-init: {:?}", egraph); + let extractor = Extractor::new( &egraph, CompilingCostFunction {}); + let ntk = &(extractor, vec!(egg::Id::from(5), output2)); + ntk.dump(); + // Id(5): And([Signal(false, Id(2)), Signal(false, Id(4))]) + // Id(4): And([Signal(false, Id(3)), Signal(false, Id(1))]) + // Id(1): Input(3) + // Id(3): Input(2) + // Id(2): And([Signal(false, Id(0)), Signal(false, Id(1))]) + // Id(0): Input(1) + + // act is if one `AND` has already been computed -> other and (`Id(2)`) should be the only candidate left + // compiler.comp_state.value_states.insert(Signal::new(eggmock::Id::from(4), false), ValueState{ is_computed: true, row_location: None }); + // + // let ntk_backward = ComputedNetworkWithBackwardEdges::new(ntk); + // + // compiler.init_candidates(&ntk_backward); + // let is_candidate_ids: HashSet = compiler.comp_state.candidates.iter().map(|(id,_)| *id).collect(); + // let should_candidate_ids: HashSet = HashSet::from([Signal::new( eggmock::Id::from(2), false), Signal::new(eggmock::Id::from(4), false)]); + // assert_eq!( is_candidate_ids, should_candidate_ids); + + // TODO: test-case with node that relies on one input src-operand and one non-input (intermediate node) src-operand + } + + #[test] + fn test_new_candidates() { + let mut compiler = init(); + + } + + #[test] + fn test_compute_scheduling_prio_for_node() { + let mut compiler = init(); + + let mut egraph: EGraph = Default::default(); + let my_expression: RecExpr = "(and (and 1 3) (and 2 3))".parse().unwrap(); + egraph.add_expr(&my_expression); + let extractor = Extractor::new( &egraph, CompilingCostFunction {}); + let ntk = &(extractor, vec!(egg::Id::from(5))); + ntk.dump(); + // Id(5): And([Signal(false, Id(2)), Signal(false, Id(4))]) + // Id(4): And([Signal(false, Id(3)), Signal(false, Id(1))]) + // Id(1): Input(3) + // Id(3): Input(2) + // Id(2): And([Signal(false, Id(0)), Signal(false, Id(1))]) + // Id(0): Input(1) + + // act is if one `AND` has already been computed -> other and (`Id(2)`) should be the only candidate left + // compiler.comp_state.value_states.insert(Signal::new(eggmock::Id::from(2), false), ValueState{ is_computed: true, row_location: None }); + // + // let ntk_backward = ComputedNetworkWithBackwardEdges::new(ntk); + // + // let scheduling_prio = compiler.compute_scheduling_prio_for_node(Signal::new(eggmock::Id::from(4), false), &ntk_backward); + // assert_eq!(scheduling_prio, SchedulingPrio { nr_last_value_uses: 2, nr_src_operands: 2, nr_result_operands: 1 } ); + + } + + // TODO + #[test] + fn test_select_compute_and_ref_subarray() { + let compiler = init(); + // let (selected_subarray, _) = compiler.select_compute_and_ref_subarray(vec!(RowAddress(0b1_000_000_000), RowAddress(0b1_000_010_000), RowAddress(0b111_000_000_000), RowAddress(0b10_100_000_000),)); + // assert_eq!(selected_subarray, 0b1_000_000_000); + } + + #[ignore] + fn test_program_validity() { + // 1. test that no APA activates a safe-space row + + // .. + } +} diff --git a/rs/src/fc_dram/cost_estimation.rs b/rs/src/fc_dram/cost_estimation.rs new file mode 100755 index 0000000..8adcf52 --- /dev/null +++ b/rs/src/fc_dram/cost_estimation.rs @@ -0,0 +1,219 @@ +//! Computation of Cost-Metrics (currently includes success rate and nr of mem-cycles) + +use eggmock::egg::{CostFunction, Id}; +use eggmock::{AoigLanguage, egg::Language}; +use log::debug; +use std::cmp::Ordering; +use std::collections::HashMap; +use std::ops; +use std::rc::Rc; + +use super::architecture::{FCDRAMArchitecture, LogicOp, SuccessRate}; + +pub struct CompilingCostFunction{} + +/// A metric that estimates the runtime cost of executing an [`super::Instruction`] (in the program) +#[derive(Debug, Clone, Copy, Eq)] +pub struct InstructionCost { + /// Probability that the whole program will run successfully + success_rate: SuccessRate, + /// Nr of memcycles it takes to execute the corresponding instruction + mem_cycles: usize, +} + +/// Needed to implement `enode.fold()` for computing overall cost from node together with its children +impl ops::Add for InstructionCost { + type Output = InstructionCost; + + fn add(self, rhs: InstructionCost) -> Self::Output { + if self.success_rate.0.abs() > 1.0 || rhs.success_rate.0.abs() > 1.0 { // program_cost > 0 since `usize` is always non-negative + panic!("Compilingcost must be monotonically increasing!"); + } + + InstructionCost { + success_rate: self.success_rate * rhs.success_rate, // monotonically decreasing + mem_cycles: self.mem_cycles + rhs.mem_cycles, // monotonically increasing + } + } +} + +impl PartialEq for InstructionCost { + fn eq(&self, other: &Self) -> bool { + self.success_rate == other.success_rate && self.mem_cycles == other.mem_cycles + } +} + +/// First compare based on success-rate, then on program-cost +/// TODO: more fine-grained comparison !! +impl PartialOrd for InstructionCost { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +/// First compare based on success-rate, then on program-cost +/// - [`Ordering::Greater`] = better +/// TODO: more fine-grained comparison !! +impl Ord for InstructionCost { + fn cmp(&self, other: &Self) -> Ordering { + // better success-rate is always better than higher program-cost (TODO: improve this) + if self.success_rate == other.success_rate { // TOOD: cmp based on some margin (eg +-0.2%) + self.mem_cycles.cmp(&other.mem_cycles) // lower is better + } else { + self.success_rate.cmp(&other.success_rate).reverse() // higher is better + } + } +} + +impl CostFunction for CompilingCostFunction { + type Cost = Rc; + + /// Compute cost of given `enode` using `cost_fn` + /// + /// Parameters determining cost of an enode: + /// - distance of row-operands to sense amplifiers + /// - operation: + /// - AND= + /// - OR= + /// - NOT= + /// + /// TODO: NEXT + /// - [ ] Subgraph direkt kompilieren ?? + fn cost(&mut self, enode: &AoigLanguage, mut cost_fn: C) -> Self::Cost + where + C: FnMut(Id) -> Self::Cost, + { + // TODO: detect self-cycles, other cycles will be detected by compiling, which will result in an error + + // see Figure17 [1] + let and_nr_operand_to_success_rate: HashMap = HashMap::from([(2,0.85), (4,0.88), (8,0.9), (16,0.92)]); // read from graph (only estimates anyway) + let or_nr_operand_to_success_rate: HashMap = HashMap::from([(2,0.88), (4,0.9), (8,0.96), (16,0.98)]); // read from graph (only estimates anyway) + + // return higher success-rates for higher n in nary AND/OR (see Figure18 [1]) + let nr_operands = enode.children().len(); + + // get op-cost of executing `enode`: + let op_cost = match *enode { + AoigLanguage::False | AoigLanguage::Input(_) => { + InstructionCost { + success_rate: SuccessRate::new(1.0), + mem_cycles: 1, // !=0 to ensure Cost-Function is *strictly monotonically increasing* (TODO: monotonicity isn"t needed here, right?") + } + }, + AoigLanguage::And(_) | AoigLanguage::And4(_) | AoigLanguage::And8(_) | AoigLanguage::And16(_) => { + + let mem_cycles_and = FCDRAMArchitecture::get_instructions_implementation_of_logic_ops(LogicOp::AND) + .iter().fold(0, |acc, instr| { acc + instr.get_nr_memcycles() as usize }); + debug!("Cycles AND: {}", mem_cycles_and); + let expected_success_rate = 0.83; // see Figure17 [1], assume that compute rows have a "middle" distance to sense-amps + let &success_rate_operand = and_nr_operand_to_success_rate.get(&nr_operands).unwrap(); + InstructionCost { + success_rate: SuccessRate::new(expected_success_rate * success_rate_operand), + mem_cycles: mem_cycles_and, + } + + }, + AoigLanguage::Or(_) | AoigLanguage::Or4(_) | AoigLanguage::Or8(_) | AoigLanguage::Or16(_) => { + let mem_cycles_or = FCDRAMArchitecture::get_instructions_implementation_of_logic_ops(LogicOp::OR) + .iter().fold(0, |acc, instr| { acc + instr.get_nr_memcycles() as usize }); + debug!("Cycles OR: {}", mem_cycles_or); + let expected_success_rate = 0.94; // see Figure17 [1], assume that compute rows have a "middle" distance to sense-amps + let success_rate_operand = or_nr_operand_to_success_rate.get(&nr_operands).unwrap(); + InstructionCost { + success_rate: SuccessRate::new(expected_success_rate * success_rate_operand), + mem_cycles: mem_cycles_or, + } + }, + // eg prefer `OR(a,b)` to `NOT(AND( NOT(a), NOT(b)))` + AoigLanguage::Not(_) => { + let mem_cycles_not = FCDRAMArchitecture::get_instructions_implementation_of_logic_ops(LogicOp::NOT) + .iter().fold(0, |acc, instr| { acc + instr.get_nr_memcycles() as usize }); + debug!("Cycles NOT: {}", mem_cycles_not); + let expected_success_rate = 0.77; // see Figure11 in [1] (we only use single operand NOT currently) + + InstructionCost { + success_rate: SuccessRate::new(expected_success_rate), + mem_cycles: mem_cycles_not, + } + }, + }; + + debug!("Folding {:?}", enode); + Rc::new(enode.fold(op_cost, |sum, id| sum + *(cost_fn(id)) )) // TODO: doesn't work yet :/ + + // Rc::new(CompilingCost { + // success_rate: 0.0, + // program_cost: 7, + // }) + } +} + +#[cfg(test)] +mod tests { + use eggmock::egg::{self, rewrite, EGraph, Extractor, RecExpr, Rewrite, Runner}; + use eggmock::{AoigLanguage, ComputedNetworkWithBackwardEdges, Network}; + + use crate::fc_dram::compiler::Compiler; + use crate::fc_dram::cost_estimation::CompilingCostFunction; + use crate::fc_dram::CompilerSettings; + + use super::*; + use std::ffi::CString; + // import all elements from parent-module + use std::sync::Once; + + static INIT: Once = Once::new(); + + fn init() -> Compiler { + INIT.call_once(|| { + env_logger::init(); + }); + Compiler::new(CompilerSettings { + print_program: true, verbose: true, print_compilation_stats: false, min_success_rate: 0.999, repetition_fracops: 5, safe_space_rows_per_subarray: 16, + config_file: CString::new("/home/alex/Documents/Studium/Sem6/inf_pm_fpa/lime-fork/config/fcdram_hksynx.toml").unwrap().as_ptr(), + do_save_config: false } ) + } + + /// TODO ! + #[test] + fn test_cost_function () { + let mut compiler = init(); + let rewrite_rules: Vec> = vec![ + // TODO: add "or" - and De-Morgan ? + rewrite!("commute-and"; "(and ?a ?b)" => "(and ?b ?a)"), + rewrite!("and-1"; "(and ?a 1)" => "?a"), + rewrite!("and-0"; "(and ?a 0)" => "0"), + // TODO: first add `AOIG`-language and add conversion AOIG<->AIG (so mockturtle's aig can still be used underneath) + rewrite!("and-or"; "(! (or (! ?a) (! ?b)))" => "(and ?a ?b)"), // (De-Morgan) ! not checked whether this works + rewrite!("or-and"; "(! (and (! ?a) (! ?b)))" => "(or ?a ?b)" ), // (De-Morgan) ! not checked whether this works + rewrite!("and-or-more-not"; "(and ?a ?b)" => "(! (or (! ?a) (! ?b)))"), // (De-Morgan) ! not checked whether this works + rewrite!("or-and-more-not"; "(or ?a ?b)" => "(! (and (! ?a) (! ?b)))"), // (De-Morgan) ! not checked whether this works + rewrite!("and-same"; "(and ?a ?a)" => "?a"), + rewrite!("not_not"; "(! (! ?a))" => "?a"), + ]; + + let mut egraph: EGraph = Default::default(); + let my_expression: RecExpr = "(and (and 1 3) (and 2 3))".parse().unwrap(); + egraph.add_expr(&my_expression); + let output2 = egraph.add(AoigLanguage::And([eggmock::egg::Id::from(0), eggmock::egg::Id::from(2)])); // additional `And` with one src-operand=input and one non-input src operand + debug!("EGraph used for candidate-init: {:?}", egraph); + let egraph_clone = egraph.clone(); + let extractor = Extractor::new( &egraph_clone, CompilingCostFunction {}); + let ntk = &(extractor, vec!(egg::Id::from(5), output2)); + ntk.dump(); + // Id(5): And([Signal(false, Id(2)), Signal(false, Id(4))]) + // Id(4): And([Signal(false, Id(3)), Signal(false, Id(1))]) + // Id(1): Input(3) + // Id(3): Input(2) + // Id(2): And([Signal(false, Id(0)), Signal(false, Id(1))]) + // Id(0): Input(1) + + let runner = Runner::default().with_egraph(egraph).run(rewrite_rules.as_slice()); + + let graph = runner.egraph; + + let ntk_backward = ComputedNetworkWithBackwardEdges::new(ntk); + + // TODO: test-case with node that relies on one input src-operand and one non-input (intermediate node) src-operand + } +} diff --git a/rs/src/fc_dram/mod.rs b/rs/src/fc_dram/mod.rs new file mode 100755 index 0000000..0622785 --- /dev/null +++ b/rs/src/fc_dram/mod.rs @@ -0,0 +1,241 @@ +//! NOTE: currently FCDRAM has only been shown to work with HK Sync Modules +//! +//! # Literature +//! +//! - [1] Functionally-Complete Boolean Logic in Real DRAM Chips: Experimental Characterization and Analysis, 2024 +//! - [2] FracDRAM: Fractional Values in Off-the-Shelf DRAM, 2022 +//! - [3] PULSAR: Simultaneous Many-Row Activation for Reliable and High-Performance Computing in Off-the-Shelf DRAM Chips, 2024 +//! - [4] RowClone: fast and energy-efficient in-DRAM bulk data copy and initialization, 2013 +//! - [5] Design-Induced Latency Variation in Modern DRAM Chips: Characterization, Analysis, and Latency Reduction Mechanisms, 2017 +//! - explains why distance of rows to sense-amps influence success-rate of executed op +//! +//! # Submodules +//! +//! - [`architecture`] - defines Instructions (and performance-metrics of Instructions in that architecture) used in FC-DRAM +//! - [`compiler`] - compiles given LogicNetwork for FC-DRAM architecture +//! - [`generator`] - Generates output code or reports based on analysis. (TODO) +//! - [`optimization`] - applies architecture-specific optimizations to generated program (TODO: don't use here but in MLIR instead) +//! - [ ] [`program`] +//! - [`utils`] - utilities (helper macros/...) +pub mod architecture; +pub mod compiler; +pub mod cost_estimation; +pub mod optimization; +pub mod program; +pub mod utils; + +use std::sync::LazyLock; +use std::time::Instant; + +use crate::measure_time; + +use self::compiler::Compiler; +use self::cost_estimation::CompilingCostFunction; + +use eggmock::egg::{rewrite, EGraph, Extractor, Id, Rewrite, Runner}; +use eggmock::{ + AigReceiverFFI, Aoig, AoigLanguage, Network, NetworkWithBackwardEdges, Receiver, ReceiverFFI, Signal +}; +use log::debug; +use program::*; +use architecture::*; + +/// Rewrite rules to use in E-Graph Rewriting (see [egg](https://egraphs-good.github.io/)) +/// TODO: adjust rewriting rules to FCDRAM (=AND/OR related rewrites like De-Morgan?) +static REWRITE_RULES: LazyLock>> = LazyLock::new(|| { + let rules = vec![ + // TODO: sth is wrong with these rewrite rules - they produce a non-equivalent logic network ! + // rewrite!("commute-and"; "(and ?a ?b)" => "(and ?b ?a)"), + rewrite!("and-1"; "(and ?a (! f))" => "?a"), + rewrite!("and-0"; "(and ?a f)" => "f"), + rewrite!("or-1"; "(or ?a (! f))" => "(! f)"), + rewrite!("or-0"; "(or ?a f)" => "?a"), + + rewrite!("and-or"; "(! (or (! ?a) (! ?b)))" => "(and ?a ?b)"), // (De-Morgan) + rewrite!("or-and"; "(! (and (! ?a) (! ?b)))" => "(or ?a ?b)" ), // (De-Morgan) + // rewrite!("and-or-more-not"; "(and ?a ?b)" => "(! (or (! ?a) (! ?b)))"), // (De-Morgan) + // rewrite!("or-and-more-not"; "(or ?a ?b)" => "(! (and (! ?a) (! ?b)))"), // (De-Morgan) + rewrite!("and-same"; "(and ?a ?a)" => "?a"), + rewrite!("not_not"; "(! (! ?a))" => "?a"), + + // in general more operands are better for AND/OR (see [1]) + rewrite!("and2_to_4"; "(and (and ?a ?b) (and ?c ?d))" => "(and4 ?a ?b ?c ?d)"), + rewrite!("and4_to_8"; "(and (and4 ?a ?b ?c ?d) (and4 ?e ?f ?g ?h))" => "(and8 ?a ?b ?c ?d ?e ?f ?g ?h)"), + rewrite!("and8_to_16"; "(and (and8 ?a ?b ?c ?d ?e ?f ?g ?h) (and8 ?i ?j ?k ?l ?m ?n ?o ?p))" => "(and16 ?a ?b ?c ?d ?e ?f ?g ?h ?i ?j ?k ?l ?m ?n ?o ?p)"), + rewrite!("or2_to_4"; "(or (or ?a ?b) (or ?c ?d))" => "(or4 ?a ?b ?c ?d)"), + rewrite!("or4_to_8"; "(or (or4 ?a ?b ?c ?d) (or4 ?e ?f ?g ?h))" => "(or8 ?a ?b ?c ?d ?e ?f ?g ?h)"), + rewrite!("or8_to_16"; "(or (or8 ?a ?b ?c ?d ?e ?f ?g ?h) (or8 ?i ?j ?k ?l ?m ?n ?o ?p))" => "(or16 ?a ?b ?c ?d ?e ?f ?g ?h ?i ?j ?k ?l ?m ?n ?o ?p)"), + // no use for NOT with multiple dsts (for now) + ]; + rules +}); + +/// Compilation result (program + E-Graph) +#[ouroboros::self_referencing] +struct CompilerOutput { + /// Result E-Graph + graph: EGraph, + /// (, output-nodes) + #[borrows(graph)] + #[covariant] + /// A network consists of nodes (accessed via `Extractor` and separately stored `outputs` (`Vec`) + ntk: ( + Extractor<'this, CompilingCostFunction, AoigLanguage, ()>, // `'this`=self-reference, used to extract best-node from `E-Class` of `AoigLanguage`-nodes based on `CompilingCostFunction` + Vec, // vector of outputs + ), + /// Compiled Program Program is compiled using previously (EGraph-)extracted `ntk` + #[borrows(ntk)] + program: Program, +} + +/// Initiates compilation and prints compilation-statistics (and program if `settings.verbose=true` +/// - returned receiver allows converting result-graph in both directions (C++ <=> Rust) +/// - `settings`: compiler-options +fn compiling_receiver<'a>( + rules: &'a [Rewrite], + settings: CompilerSettings, +) -> impl Receiver + use<'a> { + // REMINDER: EGraph implements `Receiver` + let mut compiler = Compiler::new(settings.clone()); + EGraph::::new(()) + .map(move |(graph, outputs)| { // `.map()` of `Provider`-trait!, outputs=vector of EClasses + + debug!("Input EGraph nodes: {:?}", graph.nodes()); + debug!("Input EGraph's EClasses : {:?}", graph.classes() + .map(|eclass| (eclass.id, &eclass.nodes) ) + .collect::)>>() + ); + // 1. Create E-Graph: run equivalence saturation + debug!("Running equivalence saturation..."); + let runner = measure_time!( + Runner::default().with_egraph(graph).run(rules), "t_runner", settings.print_compilation_stats + ); + + if settings.verbose { + println!("== Runner Report"); + runner.print_report(); + } + + let graph = runner.egraph; + + CompilerOutput::new( + graph, + |graph| { + // 2. Given E-Graph: Retrieve best graph using custom `CompilingCostFunction` + debug!("Extracting..."); + let extractor = measure_time!( + Extractor::new( + graph, + CompilingCostFunction {}, + ), + "t_extractor", settings.print_compilation_stats + ); + debug!("Outputs: {outputs:?}"); + (extractor, outputs) // produce `ntk` + }, + |ntk| { + // ===== MAIN CALL (actual compilation) ===== + // 3. Compile program using extracted network + + debug!("Compiling..."); + debug!("Network outputs: {:?}", ntk.outputs().collect::>()); + ntk.dump(); + let ntk_with_backward_edges = ntk.with_backward_edges(); + debug!("Network Leaves: {:?}", ntk_with_backward_edges.leaves().collect::>()); + debug!("Network Outputs of first leaf: {:?}", + ntk_with_backward_edges.node_outputs( + ntk_with_backward_edges.leaves().next().unwrap() + ).collect::>() + ); + + let program = measure_time!( + compiler.compile(&ntk_with_backward_edges), "t_compiler", settings.print_compilation_stats + ); + // ===================== + + // print program if compiler-setting is set + // TOOD: write program to output-file instead !! + if settings.print_program || settings.verbose { + if settings.verbose { + println!("== Program") + } + println!("{program}"); + } + program + }, + ) + }) +} + +#[derive(Debug, Clone)] +#[repr(C)] +/// Compiler options +/// - TODO: add flags like minimal success-rate for program +pub struct CompilerSettings { + /// Whether to print the compiled program + print_program: bool, + /// Whether to enable verbose output + verbose: bool, + /// Whether to print stats like runtimes of individual compiler-stages during compilation + print_compilation_stats: bool, + /// Minimal success rate to be guaranteed for success compiled program + /// REMINDER: FCDRAM-operations dont have a 100%-success rate to create the correct results + /// TODO: not used yet by compiler + min_success_rate: f64, + // /// Location to config-file holding fcdram-specific configs + // fcdram_config_file: Path, + + /// How many times to issue FracOps to store `V_{DD}/2` in one of the activated rows for AND/OR + repetition_fracops: u64, + /// Nr of rows to use as a safe space for operands per subarray + /// - REMINDER: after `AND`/`OR`-ops the src-operands are overwritten by the op-result, so to reuse operands they're put into specially designated rows (="safe-space") which won't be overwritten + /// - Ops reusing those operands have to clone the values from the safe-space prior to issuing the Op + /// - NOTE: rows which are used as safe-space are determined by analyzing patterns in Simultaneous-row activation for the specific architecture (to ensure that safe-space rows won't be activated on any combination of row-addresses) + /// + /// TODO: if `config_file` is passed, make sure nr safe-space-rows is equal to nr of rows detailed in config-file + /// + /// DEPRECATED: current implementation select compute rows instead + safe_space_rows_per_subarray: u8, + /// Location of config-file (to which to write the compiled configs) - if this config file doesn't exist then a new one is generated under this given path + config_file: *const i8, + /// Whether to save the configuration file (for used safe-space rows, placement of constant 0s&1s, ..) + do_save_config: bool, +} + +// TODO: this will be needed once E-Graph Validation is added (=once we want to transfer the E-Graph back to mockturtle) +// /// ?? (maybe FFI for rewriting graph using mockturtle?) +// #[no_mangle] +// extern "C" fn fcdram_rewriter(settings: CompilerSettings) -> AigReceiverFFI> { +// RewriterFFI::new(FCDramRewriter(settings)) +// } + +/// Statistic results about Compilation-Process +#[repr(C)] +struct CompilerStatistics { + egraph_classes: u64, + egraph_nodes: u64, + egraph_size: u64, + + instruction_count: u64, +} + +/// Entry point for cpp-code +/// - `settings`: settings to use when running compiler +#[no_mangle] +extern "C" fn fcdram_compile(settings: CompilerSettings) -> AigReceiverFFI { + + env_logger::init(); // needed for `export RUST_LOG=debug` to work + let receiver = + compiling_receiver(REWRITE_RULES.as_slice(), settings) + .map(|output| { + let graph = output.borrow_graph(); + CompilerStatistics { + egraph_classes: graph.number_of_classes() as u64, + egraph_nodes: graph.total_number_of_nodes() as u64, + egraph_size: graph.total_size() as u64, + instruction_count: output.borrow_program().instructions.len() as u64, + } + }); + // return graph back to calling cpp-code + AigReceiverFFI::new(receiver.adapt(Into::into)) +} diff --git a/rs/src/fc_dram/optimization.rs b/rs/src/fc_dram/optimization.rs new file mode 100755 index 0000000..6bb4c82 --- /dev/null +++ b/rs/src/fc_dram/optimization.rs @@ -0,0 +1,32 @@ +//! Optimize code from `compiler.rs` +//! - things to optimize for: +//! - performance (nr of required mem-cycles): mostly reduce nr of RowClone-ops (rest is mostly predetermined by logic-graph due to 1:1 mapping of LogicalOps -> FCDRAM Primitives) +//! - success rate (mostly by choosing right rows to optimize for distance to sense-amps), includes input replication +//! - memory-footprint (reduce nr of subarrays used by program) +//! - manually adapt safe-space to program requirements: unused safe-space rows could still be used ?! +//! - [ ] Rematerialization ? +use crate::fc_dram::architecture::{RowAddress, Instruction}; + +use super::{architecture::FCDRAMArchitecture, program::Program}; + +pub fn optimize(program: &mut Program) { + if program.instructions.is_empty() { + return; + } + let mut opt = Optimization { program }; + // TODO: perform optimizations ! +} + +pub struct Optimization<'p> { + program: &'p mut Program, +} + +// TODO: manual optimizations? +impl Optimization<'_> { + /// TODO: perform some basic compiler-optimization like dead_code_elimination? or will this + /// already be done by the MLIR dialect? + fn dead_code_elimination(&mut self) { + todo!() + } +} + diff --git a/rs/src/fc_dram/program.rs b/rs/src/fc_dram/program.rs new file mode 100755 index 0000000..87a8ce9 --- /dev/null +++ b/rs/src/fc_dram/program.rs @@ -0,0 +1,77 @@ +//! Functionality for generating actual program using architecture defined in [`architecture`] by +//! compiling given logic-network (see [`compilation`]) and potentially adding some manual +//! optimizations ([`optimization`]) +use super::architecture::RowAddress; +use crate::fc_dram::architecture::{Instruction, ROW_ID_BITMASK}; +use eggmock::Signal; +use std::collections::HashMap; +use std::fmt::{Display, Formatter}; + + +#[derive(Debug, Clone)] +pub struct Program { + pub instructions: Vec, + /// Specifies in which rows constants have to be placed (!have to be placed in EVERY subarray) + /// - TODO: adjust this to only place in subarrays which are actually used as reference subarrays during program execution + pub constants_row_placement: HashMap, + /// Specifies where row-operands should be placed prior to calling this program + /// (This is a convention which tells the user of this lib where the data should be placed within the DRAM before executing this program) + /// - NOTE: Signals might have to be placed in several subarrays (REMINDER: movement in btw subarrays is not supported by FCDRAM) + pub input_row_operands_placement: HashMap>, + /// Specifies into which rows output-operands will have been placed after the program has run successfully + pub output_row_operands_placement: HashMap, +} + +impl Program { + pub fn new(instructions: Vec) -> Self { + Self { + instructions, + constants_row_placement: HashMap::new(), + input_row_operands_placement: HashMap::new(), + output_row_operands_placement: HashMap::new(), + } + } +} + +/// Print the generated program in human-readable form +impl Display for Program { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let display_row = |row: &RowAddress| { + format!("{}.{}", row.get_subarray_id(), row.0 & ROW_ID_BITMASK) + }; // display subarray separately + + + let display_rows = |rows: Vec| { + let formatted: Vec = rows.iter() + .map(|&row| format!("{}.{}", row.get_subarray_id(), row.0 & ROW_ID_BITMASK)) + .collect(); + + format!("[{}]", formatted.join(", ")) + }; // display subarray separately + + writeln!(f, "---------------------------------------")?; + writeln!(f, "Input operand placement:")?; + for (signal, rows) in &self.input_row_operands_placement { + writeln!(f, "{:?} in {}", signal, display_rows(rows.to_vec()))?; + } + writeln!(f, "---------------------------------------")?; + writeln!(f, "Constant operand placement:")?; + for (constant, row) in &self.constants_row_placement { + writeln!(f, "{} in {}", constant, display_row(&row.local_rowaddress_to_subarray_id(super::architecture::SubarrayId(0))))?; + } + writeln!(f, "---------------------------------------")?; + + + for instr in &self.instructions { + writeln!(f, "{}", instr)?; + } + + writeln!(f, "---------------------------------------")?; + writeln!(f, "Output operand placement:")?; + for (signal, row) in &self.output_row_operands_placement{ + writeln!(f, "{:?} in {}", signal, display_row(row))?; + } + writeln!(f, "---------------------------------------")?; + Ok(()) + } +} diff --git a/rs/src/fc_dram/utils.rs b/rs/src/fc_dram/utils.rs new file mode 100755 index 0000000..b9d4e8b --- /dev/null +++ b/rs/src/fc_dram/utils.rs @@ -0,0 +1,14 @@ +/// Measure time of `func` and print it if `do_print_timings` is set +#[macro_export] +macro_rules! measure_time { + ($func:expr, $label:expr, $do_print_timings:expr) => {{ + let start_time = Instant::now(); + let result = $func; + let t_runtime = start_time.elapsed().as_secs_f64(); + + if $do_print_timings { + println!("{}: {:.6}sec", $label, t_runtime); + } + result + }}; +} diff --git a/rs/src/lib.rs b/rs/src/lib.rs old mode 100644 new mode 100755 index f96b170..38d52d5 --- a/rs/src/lib.rs +++ b/rs/src/lib.rs @@ -2,3 +2,4 @@ mod ambit; mod opt_extractor; +pub mod fc_dram; diff --git a/safe/lime-infinite b/safe/lime-infinite new file mode 100755 index 0000000..eb84662 Binary files /dev/null and b/safe/lime-infinite differ diff --git a/scripts/build.sh b/scripts/build.sh new file mode 100755 index 0000000..25ac13a --- /dev/null +++ b/scripts/build.sh @@ -0,0 +1,6 @@ +export RUST_LOG=debug +mkdir -p build +cd build +cmake .. +make lime +./lime diff --git a/src/ambit.h b/src/ambit.h old mode 100644 new mode 100755 diff --git a/src/ambit_benchmark_main.cpp b/src/ambit_benchmark_main.cpp old mode 100644 new mode 100755 diff --git a/src/fcdram.h b/src/fcdram.h new file mode 100755 index 0000000..0cffc9d --- /dev/null +++ b/src/fcdram.h @@ -0,0 +1,37 @@ +#pragma once + +#include "eggmock.h" + +#include +#include + +extern "C" +{ + struct fcdram_compiler_statistics + { + uint64_t egraph_classes; + uint64_t egraph_nodes; + uint64_t egraph_size; + + uint64_t instruction_count; + }; + + /** + * @param print_compilation_stats Whether to print stats like `t_runner`,`t_extractor`,`t_compiler` + */ + struct fcdram_compiler_settings + { + bool print_program; + bool verbose; + bool print_compilation_stats; + double min_success_rate; + uint64_t repetition_fracops; + uint8_t safe_space_rows_per_subarray; + const char *config_file; + bool do_save_config; + }; + + eggmock::aig_receiver fcdram_rewriter( fcdram_compiler_settings settings ); + eggmock::aig_receiver fcdram_compile( fcdram_compiler_settings settings ); + // void fcdram_compile(); +} diff --git a/src/fcdram_benchmark_main.cpp b/src/fcdram_benchmark_main.cpp new file mode 100755 index 0000000..e740242 --- /dev/null +++ b/src/fcdram_benchmark_main.cpp @@ -0,0 +1,52 @@ +/** + * Runs compilation for given logic network (see `utils.hpp` for pre-provided logic networks) + */ +#include "fcdram.h" +#include "eggmock.h" +#include "utils.h" + +#include +#include +#include + +using namespace mockturtle; +using namespace eggmock; +using namespace std::chrono; + +// usage: exec [network] +int main( int const argc, char** argv ) +{ + if ( argc != 2 ) + { + std::cerr << "usage: " << argv[0] << std::endl; + return 1; + } + + std::optional aig = get_ntk( argv[1] ); + if ( !aig ) + { + return 1; + } + + auto const pre_opt_size = aig->size(); + + auto const opt_begin = system_clock::now(); + preoptimize_aig( *aig ); + auto const t_opt = duration_cast( system_clock::now() - opt_begin ).count(); + + auto constexpr settings = fcdram_compiler_settings{ + .print_program = false, + .verbose = false, + }; + + const auto [egraph_classes, egraph_nodes, egraph_size, + instruction_count, + t_runner, t_extractor, t_compiler] = + send_aig( *aig, fcdram_compile( settings ) ); + + std::cout << t_opt << "\t" << t_runner << "\t" << t_extractor << "\t" << t_compiler << "\t" + << pre_opt_size << "\t" << aig->size() << "\t" << aig->num_cis() << "\t" << aig->num_cos() << "\t" + << instruction_count << "\t" + << egraph_classes << "\t" << egraph_nodes << "\t" << egraph_size; + return 0; +} diff --git a/src/main.cpp b/src/main.cpp old mode 100644 new mode 100755 index 43d30fd..73079c9 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,4 +1,5 @@ #include "ambit.h" +#include "fcdram.h" #include #include @@ -6,17 +7,8 @@ using namespace mockturtle; using namespace eggmock; -int main() +void run_ambit_example(mig_network in) { - mig_network in; - const auto b_i = in.create_pi(); - const auto b_i_next = in.create_pi(); - const auto m = in.create_pi(); - - const auto O1 = in.create_and( m, b_i_next ); - const auto O2 = in.create_and( in.create_not( m ), b_i ); - const auto bi = in.create_or( O1, O2 ); - in.create_po( bi ); write_dot( in, "in.dot" ); @@ -34,4 +26,75 @@ int main() std::cout << "t3:" << result.t_compiler << std::endl; write_dot( out, "out.dot" ); + // ambit_compiler_statistics result = eggmock::send_mig( in, ambit_compile(settings) ); +} + +/** + * TODO: change `mig` to `aig`?? + */ +void run_fcdram_example() +{ + + aig_network in; + // const auto b_i = in.create_pi(); + // const auto b_i_next = in.create_pi(); + // const auto m = in.create_pi(); + // + // const auto O1 = in.create_and( m, b_i_next ); + // const auto O2 = in.create_and( in.create_not( m ), b_i ); + // const auto O3 = in.create_and( in.create_not( O2 ), O1 ); + // const auto bi = in.create_or( O1, O2 ); + // in.create_po( bi ); + // in.create_po( O3 ); + + // test and(and2,and2) -> and4 + const auto i1 = in.create_pi(); + const auto i2 = in.create_pi(); + const auto i3 = in.create_pi(); + const auto i4 = in.create_pi(); + + const auto o1 = in.create_and( i1, i2); + const auto o2 = in.create_and( i3, i4); + const auto o3 = in.create_and( o1,o2 ); + in.create_po( o3 ); + + write_dot( in, "in.dot" ); + std::cout << "Sending graph to fcdram_compile..." << std::endl; + // fcdram_compile(); + + // use `eggmock` to send mockturtle-graph to `lime`'s entry point `fcdram_compile()` + fcdram_compiler_statistics result = eggmock::send_aig( in, fcdram_compile( fcdram_compiler_settings{ + .print_program = true, + .verbose = true, + .print_compilation_stats = true, + .min_success_rate= 99.9999, + .repetition_fracops=5, // issue 5 FracOps per init of reference subarray + .safe_space_rows_per_subarray = 16, + .config_file = "", + .do_save_config = true, + } ) ); + // std::cout << "IC:" << result.instruction_count << std::endl; + // std::cout << "t1:" << result.t_runner << std::endl; + // std::cout << "t2:" << result.t_extractor << std::endl; + // std::cout << "t3:" << result.t_compiler << std::endl; + + // aig_network rewritten = rewrite_mig( in, fcdram_rewriter() ); + // write_dot( rewritten, "out.dot" ); +} + +int main() +{ + mig_network in; + const auto b_i = in.create_pi(); + const auto b_i_next = in.create_pi(); + const auto m = in.create_pi(); + + const auto O1 = in.create_and( m, b_i_next ); + const auto O2 = in.create_and( in.create_not( m ), b_i ); + const auto bi = in.create_or( O1, O2 ); + in.create_po( bi ); + + write_dot( in, "in.dot" ); + run_ambit_example(in); + // run_fcdram_example(); } diff --git a/src/utils.cpp b/src/utils.cpp old mode 100644 new mode 100755 diff --git a/src/utils.h b/src/utils.h old mode 100644 new mode 100755 index df46a48..a47d6d4 --- a/src/utils.h +++ b/src/utils.h @@ -22,6 +22,11 @@ std::optional read_ntk( std::string const& path ); void preoptimize_mig( mockturtle::mig_network& ntk ); +/** + * Collection of logic networks (eg for benchmarking) + * - included logic networks: Full Adder ("fa"), Multiplexer ("mux"), Greater than ("gt"), "kogge_stone" + * - carry_ripple_adder_inplace ("add"), carry_ripple_multiplier ("mul"), sum-adder ("pop") + */ template std::optional get_ntk( std::string const& key ) { @@ -183,6 +188,9 @@ std::optional get_ntk( std::string const& key ) return {}; } +/** + * Read network from .aig/.pla/.verilog file + */ template std::optional read_ntk( const std::string& path ) {