From 898ecb1241884fead04164b1d579bc85588708cf Mon Sep 17 00:00:00 2001 From: alku662e Date: Mon, 28 Apr 2025 15:41:33 +0200 Subject: [PATCH 01/51] Start implementing fcdram --- rs/src/fc_dram/architecture.rs | 273 ++++++++++++++++++++++++++ rs/src/fc_dram/compilation.rs | 348 +++++++++++++++++++++++++++++++++ rs/src/fc_dram/extraction.rs | 224 +++++++++++++++++++++ rs/src/fc_dram/mod.rs | 180 +++++++++++++++++ rs/src/fc_dram/optimization.rs | 233 ++++++++++++++++++++++ rs/src/fc_dram/program.rs | 235 ++++++++++++++++++++++ src/ambit.h | 22 +++ src/ambit_benchmark_main.cpp | 1 - src/main.cpp | 3 +- 9 files changed, 1517 insertions(+), 2 deletions(-) create mode 100644 rs/src/fc_dram/architecture.rs create mode 100644 rs/src/fc_dram/compilation.rs create mode 100644 rs/src/fc_dram/extraction.rs create mode 100644 rs/src/fc_dram/mod.rs create mode 100644 rs/src/fc_dram/optimization.rs create mode 100644 rs/src/fc_dram/program.rs diff --git a/rs/src/fc_dram/architecture.rs b/rs/src/fc_dram/architecture.rs new file mode 100644 index 0000000..9a7b1ed --- /dev/null +++ b/rs/src/fc_dram/architecture.rs @@ -0,0 +1,273 @@ +//! Contains all architecture-specific descriptions + + +/// TODO: merge `rows.rs` with `mod.rs` and move into `arch.rs` +use super::{Architecture, BitwiseOperand, RowAddress}; +use eggmock::{Id, Mig, ProviderWithBackwardEdges, Signal}; +use rustc_hash::FxHashMap; +use std::collections::hash_map::Entry; + +type RowAddress = u64; + +/// Equivalent to a DRAM row. +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum Row { + /// Row used as input operand + In(RowAddress), + /// Row used as output operand + Out(RowAddress), + /// Why do we need spill rows???? + Spill(RowAddress), +} + +/// Contains a snapshot state of the rows in an Ambit-like DRAM +#[derive(Debug, Clone)] +pub struct Rows<'a> { + signals: FxHashMap>, + rows: FxHashMap, + spill_counter: u32, + /// Representation of the underlying PuD Architecture + architecture: &'a Architecture, +} + +/// Maps operands from network to LIM rows (?) +impl<'a> Rows<'a> { + /// Initializes the rows with the leaf values in the given network (this values are known from + /// the start; intermediate node-values still need to be computed) + pub fn new( + ntk: &impl ProviderWithBackwardEdges, + architecture: &'a Architecture, + ) -> Self { + let mut rows = Rows { + signals: FxHashMap::default(), + rows: FxHashMap::default(), + spill_counter: 0, + architecture, + }; + rows.add_leaves(ntk); + rows + } + + /// Assign leaves to rows + /// REMINDER: leaves are either inputs or constants + /// TODO: choose rows which are close to Sense-Amps ?? + /// TODO: change from Mig to Aig? + fn add_leaves(&mut self, ntk: &impl ProviderWithBackwardEdges) { + let leaves = ntk.leaves(); + self.rows.reserve(leaves.size_hint().0); + for id in leaves { + let node = ntk.node(id); + match node { + Mig::Input(i) => { + self.set_empty_row_signal(RowOperand::In(i), Signal::new(id, false)); + }, + Mig::False => { + let signal = Signal::new(id, false); + self.set_empty_row_signal(RowOperand::Const(false), signal); + self.set_empty_row_signal(RowOperand::Const(true), signal.invert()); + }, + _ => unreachable!("leaf node should be either an input or a constant"), + }; + } + } + + /// Returns the current signal of the given row. + pub fn get_row_signal(&self, row: RowOperand) -> Option { + self.rows.get(&row).cloned() + } + + /// Returns the signal of the given address + /// TODO: how does FC-DRAM deal with inverted signals? + pub fn get_address_signal(&self, address: RowAddress) -> Option { + self.get_row_signal(address.row())? + .maybe_invert(address.inverted()) + .into() + } + + /// Returns the signal of the given operand + pub fn get_operand_signal(&self, operand: BitwiseOperand) -> Option { + self.get_address_signal(operand.into()) + } + + /// Returns all rows with the given signal. + pub fn get_rows(&self, signal: Signal) -> impl Iterator + '_ { + self.signals.get(&signal).into_iter().flatten().cloned() + } + + /// Returns true iff a signal with the given id is stored in a row. + pub fn contains_id(&self, id: Id) -> bool { + self.get_rows(Signal::new(id, false)).next().is_some() + || self.get_rows(Signal::new(id, true)).next().is_some() + } + + /// Adds a new spill row with the given signal and returns its id. + /// TODO: Why would we need spill rows??? + pub fn add_spill(&mut self, signal: Signal) -> u32 { + self.spill_counter += 1; + self.set_empty_row_signal(RowOperand::Spill(self.spill_counter), signal); + self.spill_counter + } + + /// Sets the current signal of the given operand + /// Returns the signal of the operand previous to this operation if it was changed. + pub fn set_signal(&mut self, address: RowAddress, signal: Signal) -> Option { + self.set_row_signal(address.row(), signal.maybe_invert(address.inverted()))? + .maybe_invert(address.inverted()) + .into() + } + + /// Equivalent to `set_row_signals`, but additionally ensures that the row was previously empty + /// or contained the same signal. + fn set_empty_row_signal(&mut self, row: RowOperand, signal: Signal) { + assert_eq!( + self.set_row_signal(row, signal), + None, + "row {row:?} should be empty" + ) + } + + /// Sets the signal of the given row, updating `self.rows` and `self.signals` accordingly. + /// Returns the previous signal of the given row if it was changed. + fn set_row_signal(&mut self, row: RowOperand, signal: Signal) -> Option { + let row_entry = self.rows.entry(row); + + // detach previous signal + let prev = match &row_entry { + Entry::Occupied(v) => { + let prev = *v.get(); + if prev == signal { + // signal already correctly set + return None; + } + Some(prev) + } + _ => None, + }; + if let Some(prev) = prev { + let prev_locations = self.signals.get_mut(&prev).unwrap(); + prev_locations.swap_remove(prev_locations.iter().position(|r| *r == row).unwrap()); + } + + // set new signal + row_entry.insert_entry(signal); + self.signals.entry(signal).or_default().push(row); + + prev + } + + /// Free rows + /// NOTE: id depends on whether the signal is inverted or not (=MSB set or not) + pub fn free_id_rows(&mut self, id: Id) { + let non_inv = Signal::new(id, false); + let inv = Signal::new(id, true); + for sig in [non_inv, inv] { + let Some(rows) = self.signals.remove(&sig) else { + continue + }; + for row in rows { + self.rows.remove(&row); + } + } + } +} + +/// Single Row or multiple rows (since FC-DRAM can also work with multiple row-operands) +/// TODO: change this to optionally take in multiple rows +/// IDEA: enum of single row vs multiple rows? +pub struct RowOperand { + rows: Vec, +} + +impl RowOperand { + /// Currently supported nr of row-operands: 1,2,4,8,16,32 + /// - ! the given `RowAddresses` are expected to refer to rows within the SAME subarray (TODO: + /// check if this is forced by FC-DRAM architecture) + pub fn new(rows: Vec) -> Self { + const SUPPORTED_ROW_OPERAND_NR: [usize; 6] = [1,2,4,8,16,32]; + if SUPPORTED_ROW_OPERAND_NR.contains(&rows.len()) { + panic!("[ERROR] FC-DRAM currently supports only 1|2|4|8|16|32 row-operands"); + } + Self { + rows, + } + } +} + +/// Consists of n rows +/// `rows_in_subarray` should be either 512 OR 1024, but not both ! +pub struct DRAMSubarray { + rows: [RowOperand; n], +} + +/// Represents subarrays and which of those subarrays are neighbors +pub struct DRAMChip { + +} + +#[derive(Clone, Debug)] +pub struct Architecture { + maj_ops: Vec, + multi_activations: Vec>, + num_dcc: u8, +} + +impl Architecture { + pub fn new(multi_activations: Vec>, num_dcc: u8) -> Self { + let maj_ops = multi_activations + .iter() + .enumerate() + .filter(|(_, ops)| ops.len() == 3) + .map(|(i, _)| i) + .collect(); + Self { + maj_ops, + multi_activations, + num_dcc, + } + } +} + +static ARCHITECTURE: LazyLock = LazyLock::new(|| { + use BitwiseOperand::*; + Architecture::new( + vec![ + // 2 rows + vec![ + DCC { + index: 0, + inverted: true, + }, + T(0), + ], + vec![ + DCC { + inverted: true, + index: 1, + }, + T(1), + ], + vec![T(2), T(3)], + vec![T(0), T(3)], + // 3 rows + vec![T(0), T(1), T(2)], + vec![T(1), T(2), T(3)], + vec![ + DCC { + index: 0, + inverted: false, + }, + T(1), + T(2), + ], + vec![ + DCC { + index: 0, + inverted: false, + }, + T(0), + T(3), + ], + ], + 2, + ) +}); diff --git a/rs/src/fc_dram/compilation.rs b/rs/src/fc_dram/compilation.rs new file mode 100644 index 0000000..e8d9f5b --- /dev/null +++ b/rs/src/fc_dram/compilation.rs @@ -0,0 +1,348 @@ +use super::{ + optimization::optimize, Address, Architecture, BitwiseOperand, Program, ProgramState, + RowAddress, +}; +use crate::fcdram::rows::Row; +use eggmock::{Id, Mig, Aig, Node, ProviderWithBackwardEdges, Signal}; +use rustc_hash::{FxHashMap, FxHashSet}; +use std::cmp::max; + +/// Compiles given `network` to a program that can be run on given `architecture` +pub fn compile<'a>( + architecture: &'a Architecture, + network: &impl ProviderWithBackwardEdges, +) -> Program<'a> { + let mut state = CompilationState::new(architecture, network); + let mut max_cand_size = 0; // TODO: unused? + while !state.candidates.is_empty() { + max_cand_size = max(max_cand_size, state.candidates.len()); + + // TODO: ??? + let (id, node, _, _, _) = state + .candidates + .iter() + .copied() + .map(|(id, node)| { + let outputs = state.network.node_outputs(id).count(); + let output = state.network.outputs().any(|out| out.node_id() == id); + let not_present = node + .inputs() + .iter() + .map(|signal| { + let present = state + .program + .rows() + .get_rows(*signal) + .any(|row| matches!(row, Row::Bitwise(_))); + !present as u8 + }) + .sum::(); + (id, node, not_present, outputs, output) + }) + .min_by_key(|(_, _, not_present, outputs, output)| (*not_present, *outputs, !output)) + .unwrap(); + + // TODO: ??? + let output = state.outputs.get(&id).copied(); + if let Some((output, signal)) = output { + if signal.is_inverted() { + state.compute(id, node, None); + state.program.signal_copy( + signal, + RowAddress::Out(output), + state.program.rows().get_free_dcc().unwrap_or(0), + ); + } else { + state.compute(id, node, Some(Address::Out(output))); + } + let leftover_uses = *state.leftover_use_count(id); + if leftover_uses == 1 { + state.program.free_id_rows(id); + } + } else { + state.compute(id, node, None); + } + } + + let mut program = state.program.into(); + optimize(&mut program); + program +} + +pub struct CompilationState<'a, 'n, P> { + /// Network (P=Provider, obsolte naming) + network: &'n P, + candidates: FxHashSet<(Id, Mig)>, // TODO: probably change to `Aig` ? + program: ProgramState<'a>, + + outputs: FxHashMap, + leftover_use_count: FxHashMap, +} + +impl<'a, 'n, P: ProviderWithBackwardEdges> CompilationState<'a, 'n, P> { + pub fn new(architecture: &'a Architecture, network: &'n P) -> Self { + let mut candidates = FxHashSet::default(); + // check all parents of leaves whether they have only leaf children, in which case they are + // candidates + for leaf in network.leaves() { + for candidate_id in network.node_outputs(leaf) { + let candidate = network.node(candidate_id); + if candidate + .inputs() + .iter() + .all(|signal| network.node(signal.node_id()).is_leaf()) + { + candidates.insert((candidate_id, candidate)); + } + } + } + let program = ProgramState::new(architecture, network); + + let outputs = network + .outputs() + .enumerate() + .map(|(id, sig)| (sig.node_id(), (id as u64, sig))) + .collect(); + + Self { + network, + candidates, + program, + outputs, + leftover_use_count: FxHashMap::default(), + } + } + + pub fn leftover_use_count(&mut self, id: Id) -> &mut usize { + self.leftover_use_count.entry(id).or_insert_with(|| { + self.network.node_outputs(id).count() + self.outputs.contains_key(&id) as usize + }) + } + + pub fn compute(&mut self, id: Id, node: Mig, out_address: Option
) { + if !self.candidates.remove(&(id, node)) { + panic!("not a candidate"); + } + let Mig::Maj(mut signals) = node else { + panic!("can only compute majs") + }; + + // select which MAJ instruction to use + // for this we use the operation with has the most already correctly placed operands + let mut opt = None; + for id in self.architecture().maj_ops.iter().copied() { + let operands = self.architecture().multi_activations[id] + .as_slice() + .try_into() + .expect("maj has to have 3 operands"); + let (matches, match_no) = self.get_mapping(&mut signals, operands); + let dcc_cost = self.optimize_dcc_usage(&mut signals, operands, &matches); + let spilling_cost = self.spilling_cost(operands, &matches); + let cost = 3.0 - match_no as f32 + dcc_cost as f32 + 0.5 * spilling_cost as f32; + let is_opt = match &opt { + None => true, + Some((opt_no, _, _, _)) => *opt_no > cost, + }; + if is_opt { + opt = Some((cost, id, matches, signals)); + } + } + let (_, maj_id, matches, signals) = opt.unwrap(); + let operands = &self.architecture().multi_activations[maj_id]; + + // now we need to place the remaining non-matching operands... + + // for that we first find a free DCC row for possibly inverting missing signals without + // accidentally overriding a signal that is already placed correctly + let used_dcc = || { + operands.iter().filter_map(|op| match op { + BitwiseOperand::DCC { index: i, .. } => Some(i), + _ => None, + }) + }; + let free_dcc = (0..self.architecture().num_dcc) + .find(|i| !used_dcc().any(|used| *used == *i)) + .expect("cannot use all DCC rows in one MAJ operation"); + + // then we can copy the signals into their places + for i in 0..3 { + if matches[i] { + continue; + } + self.program + .signal_copy(signals[i], RowAddress::Bitwise(operands[i]), free_dcc); + } + + // all signals are in place, now we can perform the MAJ operation + self.program + .maj(maj_id, Signal::new(id, false), out_address); + + // free up rows if possible + // (1) for the MAJ-signal + if *self.leftover_use_count(id) == 0 { + self.program.free_id_rows(id); + } + // (2) for the input signals + 'outer: for i in 0..3 { + // decrease use count only once per id + for j in 0..i { + if signals[i].node_id() == signals[j].node_id() { + continue 'outer; + } + } + *self.leftover_use_count(signals[i].node_id()) -= 1 + } + + // lastly, determine new candidates + for parent_id in self.network.node_outputs(id) { + let parent_node = self.network.node(parent_id); + if parent_node + .inputs() + .iter() + .all(|s| self.program.rows().contains_id(s.node_id())) + { + self.candidates.insert((parent_id, parent_node)); + } + } + } + + fn optimize_dcc_usage( + &self, + signals: &mut [Signal; 3], + operands: &[BitwiseOperand; 3], + matching: &[bool; 3], + ) -> i32 { + // first, try using a DCC row for all non-matching rows that require inversion + let mut dcc_adjusted = [false; 3]; + let mut changed = true; + while changed { + changed = false; + for i in 0..3 { + if matching[i] + || operands[i].is_dcc() + || self.program.rows().get_rows(signals[i]).next().is_some() + { + continue; + } + // the i-th operand needs inversion. let's try doing this by swapping it with a + // signal of a DCC row so that we require one less copy operation + for j in 0..3 { + if i == j || matching[j] || dcc_adjusted[j] { + continue; + } + if operands[j].is_dcc() { + signals.swap(i, j); + dcc_adjusted[j] = true; + changed = true; + } + } + } + } + + let mut cost = 0; + for ((signal, operand), matching) in signals.iter().zip(operands).zip(matching) { + if *matching || operand.is_dcc() { + continue; + } + // if the signal is not stored somewhere, i.e. only the inverted signal is present, this + // requires a move via a DCC row to the actual operand + if self.program.rows().get_rows(*signal).next().is_none() { + cost += 1 + } + } + cost + } + + fn spilling_cost(&self, operands: &[BitwiseOperand; 3], matching: &[bool; 3]) -> i32 { + let mut cost = 0; + for i in 0..3 { + if matching[i] { + continue; + } + let Some(_signal) = self + .program + .rows() + .get_row_signal(Row::Bitwise(operands[i].row())) + else { + continue; + }; + cost += 1 + // // signal and inverted signal not present somewhere else + // if self.program.rows().get_rows(signal).count() < 2 + // && self + // .program + // .rows() + // .get_rows(signal.invert()) + // .next() + // .is_none() + // { + // cost += 1 + // } + } + cost + } + + /// Reorders the `signals` so that the maximum number of the given signal-operator-pairs already + /// match according to the current program state. + /// The returned array contains true for each operand that then already contains the correct + /// signal and the number is equal to the number of trues in the array. + fn get_mapping( + &self, + signals: &mut [Signal; 3], + operands: &[BitwiseOperand; 3], + ) -> ([bool; 3], usize) { + let signals_with_idx = { + let mut i = 0; + signals.map(|signal| { + i += 1; + (signal, i - 1) + }) + }; + let operand_signals = operands.map(|op| self.program.rows().get_operand_signal(op)); + + // reorder signals by how often their signal is already available in an operand + let mut signals_with_matches = signals_with_idx.map(|(s, i)| { + ( + s, + i, + operand_signals + .iter() + .filter(|sig| **sig == Some(s)) + .count(), + ) + }); + signals_with_matches.sort_by(|a, b| a.2.cmp(&b.2)); + + // then we can assign places one by one and get an optimal mapping (probably, proof by + // intuition only) + + // contains for each operand index whether the signal at that position is already the + // correct one + let mut result = [false; 3]; + // contains the mapping of old signal index to operand index + let mut new_positions = [0usize, 1, 2]; + // contains the number of assigned signals (i.e. #true in result) + let mut assigned_signals = 0; + + for (signal, signal_idx, _) in signals_with_matches { + // find operand index for that signal + let Some((target_idx, _)) = operand_signals + .iter() + .enumerate() + .find(|(idx, sig)| **sig == Some(signal) && !result[*idx]) + else { + continue; + }; + result[target_idx] = true; + let new_idx = new_positions[signal_idx]; + signals.swap(target_idx, new_idx); + new_positions.swap(target_idx, new_idx); + assigned_signals += 1; + } + (result, assigned_signals) + } + + fn architecture(&self) -> &'a Architecture { + self.program.architecture + } +} diff --git a/rs/src/fc_dram/extraction.rs b/rs/src/fc_dram/extraction.rs new file mode 100644 index 0000000..7c005dc --- /dev/null +++ b/rs/src/fc_dram/extraction.rs @@ -0,0 +1,224 @@ +//! Computation of Compiling Costs + +use eggmock::egg::{CostFunction, Id}; +use eggmock::{EggIdToSignal, MigLanguage, Mig, NetworkLanguage, Provider, Signal}; +use either::Either; +use rustc_hash::FxHashMap; +use std::cell::RefCell; +use std::cmp::{max, Ordering}; +use std::iter; +use std::ops::{Deref, Index}; +use std::rc::Rc; +use super::{compile, Architecture}; + +pub struct CompilingCostFunction<'a> { + pub architecture: &'a Architecture +} + +#[derive(Debug, Default, Eq, PartialEq)] +pub enum NotNesting { + #[default] + NotANot, + FirstNot, + NestedNots, +} + +#[derive(Debug)] +pub struct StackedPartialGraph { + nodes: Vec>>, + first_free_id: usize, + root: MigLanguage, +} + +#[derive(Debug)] +pub struct CollapsedPartialGraph { + nodes: Rc>, + first_free_id: usize, + root_id: Id, +} + +impl StackedPartialGraph { + pub fn leaf(node: MigLanguage) -> Self { + Self { + nodes: Vec::new(), + first_free_id: 0, + root: node, + } + } + + pub fn new( + root: MigLanguage, + child_graphs: impl IntoIterator>, + ) -> Self { + let mut nodes = Vec::new(); + let mut first_free_id = 0; + for graph in child_graphs { + nodes.push(graph.nodes.clone()); + first_free_id = max(first_free_id, graph.first_free_id); + } + Self { + nodes, + first_free_id, + root, + } + } + + pub fn collapse(&self, real_id: Id) -> CollapsedPartialGraph { + let mut nodes: FxHashMap = FxHashMap::default(); + let first_free_id = max(self.first_free_id, usize::from(real_id)); + nodes.extend( + self.nodes + .iter() + .flat_map(|map| map.iter().map(|(id, node)| (*id, node.clone()))), + ); + nodes.insert(real_id, self.root.clone()); + CollapsedPartialGraph { + nodes: Rc::new(nodes), + first_free_id, + root_id: real_id, + } + } +} + +/// TODO: add reliability as cost-metric +#[derive(Debug)] +pub struct CompilingCost { + partial: RefCell>>, + not_nesting: NotNesting, + program_cost: usize, +} + +impl CostFunction for CompilingCostFunction<'_> { + type Cost = Rc; + + /// Compute cost of given `enode` + fn cost(&mut self, enode: &MigLanguage, mut costs: C) -> Self::Cost + where + C: FnMut(Id) -> Self::Cost, + { + let root = enode.clone(); + let cost = match enode { + MigLanguage::False | MigLanguage::Input(_) => CompilingCost::leaf(root), + MigLanguage::Not(id) => { + let cost = costs(*id); + + let nesting = if cost.not_nesting == NotNesting::NotANot { + NotNesting::FirstNot + } else { + NotNesting::NestedNots + }; + CompilingCost::with_children(self.architecture, root, iter::once((*id, cost)), nesting) + } + MigLanguage::Maj(children) => CompilingCost::with_children( + self.architecture, + root, + children.map(|id| (id, costs(id))), + NotNesting::NotANot, + ), + }; + Rc::new(cost) + } +} + +impl CompilingCost { + pub fn leaf(root: MigLanguage) -> Self { + Self { + partial: RefCell::new(Either::Left(StackedPartialGraph::leaf(root))), + not_nesting: NotNesting::NotANot, + program_cost: 0, + } + } + + pub fn with_children( + architecture: &Architecture, + root: MigLanguage, + child_costs: impl IntoIterator)>, + not_nesting: NotNesting, + ) -> Self { + let child_graphs = child_costs + .into_iter() + .map(|(id, cost)| cost.collapsed_graph(id)); + let partial_graph = StackedPartialGraph::new(root, child_graphs); + let program_cost = compile(architecture, &partial_graph.with_backward_edges()).instructions.len(); + Self { + partial: RefCell::new(Either::Left(partial_graph)), + not_nesting, + program_cost, + } + } + + pub fn collapsed_graph(&self, id: Id) -> Rc { + let mut partial = self.partial.borrow_mut(); + let stacked = match partial.deref() { + Either::Left(stacked) => stacked, + Either::Right(collapsed) => { + assert_eq!(collapsed.root_id, id); + return collapsed.clone(); + } + }; + let collapsed = Rc::new(stacked.collapse(id)); + *partial = Either::Right(collapsed.clone()); + collapsed + } +} + +impl StackedPartialGraph { + pub fn get_root_id(&self) -> Id { + Id::from(self.first_free_id + 1) + } +} + +impl Index for StackedPartialGraph { + type Output = MigLanguage; + + fn index(&self, index: Id) -> &Self::Output { + if index == self.get_root_id() { + &self.root + } else { + self.nodes.iter().filter_map(|m| m.get(&index)).next().unwrap() + } + } +} + +impl Provider for StackedPartialGraph { + type Node = Mig; + + fn outputs(&self) -> impl Iterator { + iter::once(self.to_signal(self.get_root_id())) + } + + fn node(&self, id: eggmock::Id) -> Self::Node { + self[Id::from(id)] + .to_node(|id| self.to_signal(id)) + .expect("id should point to a non-not node") + } +} + +impl PartialEq for CompilingCost { + fn eq(&self, other: &Self) -> bool { + if other.not_nesting == NotNesting::NestedNots && self.not_nesting == NotNesting::NestedNots { + true + } else { + self.program_cost.eq(&other.program_cost) + } + } +} + +impl PartialOrd for CompilingCost { + fn partial_cmp(&self, other: &Self) -> Option { + #[allow(clippy::collapsible_else_if)] + if self.not_nesting == NotNesting::NestedNots { + if other.not_nesting == NotNesting::NestedNots { + Some(Ordering::Equal) + } else { + Some(Ordering::Greater) + } + } else { + if other.not_nesting == NotNesting::NestedNots { + Some(Ordering::Less) + } else { + self.program_cost.partial_cmp(&other.program_cost) + } + } + } +} diff --git a/rs/src/fc_dram/mod.rs b/rs/src/fc_dram/mod.rs new file mode 100644 index 0000000..68c3a04 --- /dev/null +++ b/rs/src/fc_dram/mod.rs @@ -0,0 +1,180 @@ +mod compilation; +mod extraction; +mod optimization; +mod program; +mod architecture; + +use std::sync::LazyLock; +use std::time::Instant; + +use self::compilation::compile; +use self::extraction::CompilingCostFunction; + +use eggmock::egg::{rewrite, EGraph, Extractor, Id, Rewrite, Runner}; +use eggmock::{ + Mig, MigLanguage, MigReceiverFFI, Provider, Receiver, ReceiverFFI, Rewriter, + RewriterFFI, // TODO: add AOIG-rewrite (bc FC-DRAM supports AND&OR natively)? +}; +use program::*; +use architecture::*; + +/// Rewrite rules to use in E-Graph Rewriting (see [egg](https://egraphs-good.github.io/)) +static REWRITE_RULES: LazyLock>> = LazyLock::new(|| { + let mut rules = vec![ + rewrite!("commute_1"; "(maj ?a ?b ?c)" => "(maj ?b ?a ?c)"), + rewrite!("commute_2"; "(maj ?a ?b ?c)" => "(maj ?a ?c ?b)"), + rewrite!("not_not"; "(! (! ?a))" => "?a"), + rewrite!("maj_1"; "(maj ?a ?a ?b)" => "?a"), + rewrite!("maj_2"; "(maj ?a (! ?a) ?b)" => "?b"), + rewrite!("associativity"; "(maj ?a ?b (maj ?c ?b ?d))" => "(maj ?d ?b (maj ?c ?b ?a))"), + ]; + rules.extend(rewrite!("invert"; "(! (maj ?a ?b ?c))" <=> "(maj (! ?a) (! ?b) (! ?c))")); + rules.extend(rewrite!("distributivity"; "(maj ?a ?b (maj ?c ?d ?e))" <=> "(maj (maj ?a ?b ?c) (maj ?a ?b ?d) ?e)")); + rules +}); + +/// Store compilation output and timing statistics how long compilation stages took +/// TODO: unit of t? sec or ms? +struct CompilingReceiverResult<'a> { + output: CompilerOutput<'a>, + + t_runner: u128, + t_extractor: u128, + t_compiler: u128, +} + +#[ouroboros::self_referencing] +struct CompilerOutput<'a> { + graph: EGraph, + #[borrows(graph)] + #[covariant] + ntk: ( + Extractor<'this, CompilingCostFunction<'a>, MigLanguage, ()>, + Vec, + ), + #[borrows(ntk)] + program: Program<'a>, +} + +fn compiling_receiver<'a>( + architecture: &'a Architecture, + rules: &'a [Rewrite], + settings: CompilerSettings, +) -> impl Receiver, Node = Mig> + 'a { + EGraph::::new(()).map(move |(graph, outputs)| { + let t_runner = std::time::Instant::now(); + let runner = Runner::default().with_egraph(graph).run(rules); + let t_runner = t_runner.elapsed().as_millis(); + if settings.verbose { + println!("== Runner Report"); + runner.print_report(); + } + let graph = runner.egraph; + + let mut t_extractor = 0; + let mut t_compiler = 0; + + let output = CompilerOutput::new( + graph, + |graph| { + let start_time = Instant::now(); + let extractor = Extractor::new( + &graph, + CompilingCostFunction { + architecture: &architecture, + }, + ); + t_extractor = start_time.elapsed().as_millis(); + (extractor, outputs) + }, + |ntk| { + let start_time = Instant::now(); + let program = compile(architecture, &ntk.with_backward_edges()); + t_compiler = start_time.elapsed().as_millis(); + if settings.print_program || settings.verbose { + if settings.verbose { + println!("== Program") + } + println!("{program}"); + } + program + }, + ); + if settings.verbose { + println!("== Timings"); + println!("t_runner: {t_runner}ms"); + println!("t_extractor: {t_extractor}ms"); + println!("t_compiler: {t_compiler}ms"); + } + CompilingReceiverResult { + output, + t_runner, + t_extractor, + t_compiler, + } + }) +} + +#[derive(Debug, Copy, Clone)] +#[repr(C)] +struct CompilerSettings { + print_program: bool, + verbose: bool, +} + +struct FCDramRewriter(CompilerSettings); + +impl Rewriter for FCDramRewriter { + type Node = Mig; + type Intermediate = CompilingReceiverResult<'static>; + + fn create_receiver( + &mut self, + ) -> impl Receiver> + 'static { + compiling_receiver(&*ARCHITECTURE, REWRITE_RULES.as_slice(), self.0) + } + + fn rewrite( + self, + result: CompilingReceiverResult<'static>, + output: impl Receiver, + ) { + result.output.borrow_ntk().send(output); + } +} + +#[no_mangle] +extern "C" fn fcdram_rewriter(settings: CompilerSettings) -> MigReceiverFFI> { + RewriterFFI::new(FCDramRewriter(settings)) +} + +#[repr(C)] +struct CompilerStatistics { + egraph_classes: u64, + egraph_nodes: u64, + egraph_size: u64, + + instruction_count: u64, + + t_runner: u64, + t_extractor: u64, + t_compiler: u64, +} + +#[no_mangle] +extern "C" fn fcdram_compile(settings: CompilerSettings) -> MigReceiverFFI { + let receiver = + compiling_receiver(&*&ARCHITECTURE, REWRITE_RULES.as_slice(), settings).map(|res| { + let graph = res.output.borrow_graph(); + CompilerStatistics { + egraph_classes: graph.number_of_classes() as u64, + egraph_nodes: graph.total_number_of_nodes() as u64, + egraph_size: graph.total_size() as u64, + instruction_count: res.output.borrow_program().instructions.len() as u64, + t_runner: res.t_runner as u64, + t_extractor: res.t_extractor as u64, + t_compiler: res.t_compiler as u64, + } + }); + MigReceiverFFI::new(receiver) +} diff --git a/rs/src/fc_dram/optimization.rs b/rs/src/fc_dram/optimization.rs new file mode 100644 index 0000000..ae4609a --- /dev/null +++ b/rs/src/fc_dram/optimization.rs @@ -0,0 +1,233 @@ +use super::{Program, RowOperand}; +use crate::ambit::program::{Address, BitwiseAddress, Instruction}; +use rustc_hash::FxHashSet; + +pub fn optimize(program: &mut Program) { + if program.instructions.len() == 0 { + return; + } + let mut opt = Optimization { program }; + opt.dead_code_elimination(); + opt.merge_aap(); + opt.merge_ap_aap(); +} + +pub struct Optimization<'p, 'a> { + program: &'p mut Program<'a>, +} + +impl Optimization<'_, '_> { + fn dead_code_elimination(&mut self) { + let liveness = row_liveness(self.program); + let mut i = 0; + let mut liveness_i = 0; + 'outer: while liveness_i < liveness.len() - 1 { + let instruction = self.program.instructions[i]; + let liveness_after = &liveness[liveness_i + 1]; + + // if no overridden row is live after this instruction, we can remove the instruction + for row in instruction.overridden_rows(self.program.architecture) { + if matches!(row, RowOperand::Out(_)) || liveness_after.contains(&row) { + i += 1; + liveness_i += 1; + continue 'outer; + } + } + + liveness_i += 1; + self.program.instructions.remove(i); + } + } + + fn merge_aap(&mut self) { + let instructions = &mut self.program.instructions; + let mut i = 0; + while i < instructions.len() { + let instruction = instructions[i]; + + // let's just handle the simple case where we copy from a single row activation to some + // new bitwise operand + let Instruction::AAP(from_address, to) = instruction else { + i += 1; + continue; + }; + let Some(from) = from_address.as_single_row() else { + i += 1; + continue; + }; + let Address::Bitwise(BitwiseAddress::Single(to)) = to else { + i += 1; + continue; + }; + + let mut target_addresses = FxHashSet::default(); + target_addresses.insert(to); + instructions.remove(i); + + // search for other operand copies from the copied-from address + // note that we can only replace these copies if the target operand is neither read from + // nor written to between the current instruction and the candidate instruction + let mut candidate_i = i; + let mut used_rows = FxHashSet::default(); + while candidate_i < instructions.len() { + let candidate = instructions[candidate_i]; + if candidate + .overridden_rows(self.program.architecture) + .any(|row| row == from.row()) + { + break; + } + let target = || { + let Instruction::AAP(candidate_from, candidate_to) = candidate else { + return None; + }; + let Address::Bitwise(BitwiseAddress::Single(candidate_to)) = candidate_to + else { + return None; + }; + if from_address == candidate_from + && !used_rows.contains(&RowOperand::Bitwise(candidate_to.row())) + { + Some(candidate_to) + } else { + None + } + }; + let target = target(); + used_rows.extend( + candidate + .used_addresses(self.program.architecture) + .map(|addr| addr.row()), + ); + match target { + Some(to) => { + target_addresses.insert(to); + instructions.remove(candidate_i); + } + None => { + candidate_i += 1; + } + } + } + + // now let's find a covering of the operators and replace the instructions accordingly + let mut copied = FxHashSet::default(); + for (activation_i, multi_activation) in self + .program + .architecture + .multi_activations + .iter() + .enumerate() + { + if multi_activation + .iter() + .any(|op| !target_addresses.contains(op)) + { + continue; + } + // TODO: not all such activation may be necessary + copied.extend(multi_activation.iter().copied()); + instructions.insert( + i, + Instruction::AAP( + from.into(), + Address::Bitwise(BitwiseAddress::Multiple(activation_i)), + ), + ); + i += 1; + } + for op in target_addresses.difference(&copied) { + instructions.insert( + i, + Instruction::AAP(from.into(), Address::Bitwise(BitwiseAddress::Single(*op))), + ); + i += 1; + } + } + } + + fn merge_ap_aap(&mut self) { + let instructions = &mut self.program.instructions; + let mut i = 0; + + 'outer: while i < instructions.len() { + let instruction = instructions[i]; + let Instruction::AP(address) = instruction else { + i += 1; + continue; + }; + let Address::Bitwise(BitwiseAddress::Multiple(mult_i)) = address else { + i += 1; + continue; + }; + let mut operands = self.program.architecture.multi_activations[mult_i].clone(); + let mut used_rows = FxHashSet::default(); + for candidate_i in i + 1..instructions.len() { + if operands.is_empty() { + break; + } + let candidate = instructions[candidate_i]; + + if let Instruction::AAP(Address::Bitwise(BitwiseAddress::Single(operand)), target) = + candidate + { + if target + .row_addresses(self.program.architecture) + .any(|addr| used_rows.contains(&addr.row())) + && operands.contains(&operand) + { + instructions[i] = Instruction::AAP(address, target); + instructions.remove(candidate_i); + i += 1; + continue 'outer; + } + } + + used_rows.extend( + candidate + .used_addresses(self.program.architecture) + .map(|add| add.row()), + ); + for row in candidate.overridden_rows(self.program.architecture) { + let mut i = 0; + while i < operands.len() { + if RowOperand::Bitwise(operands[i].row()) == row { + operands.swap_remove(i); + } else { + i += 1; + } + } + } + } + i += 1; + } + } +} + +/// Returns a vector of the same length as the program where the entry at the index of an +/// instruction the set at that index contains the rows that are live just before the instruction +fn row_liveness(program: &Program) -> Vec> { + let mut result = Vec::with_capacity(program.instructions.len()); + + let mut currently_live = FxHashSet::default(); + for instruction in program.instructions.iter().rev() { + let mut is_live = false; + for row in instruction.overridden_rows(program.architecture) { + if matches!(row, RowOperand::Out(_)) || currently_live.contains(&row) { + is_live = true; + } + currently_live.remove(&row); + } + if is_live { + currently_live.extend( + instruction + .input_operands(program.architecture) + .map(|addr| addr.row()), + ); + } + result.push(currently_live.clone()); + } + + result.reverse(); + result +} diff --git a/rs/src/fc_dram/program.rs b/rs/src/fc_dram/program.rs new file mode 100644 index 0000000..f8954a1 --- /dev/null +++ b/rs/src/fc_dram/program.rs @@ -0,0 +1,235 @@ +use super::{Architecture, BitwiseOperand, BitwiseRow, RowOperand, Rows}; +use eggmock::{Id, Mig, ProviderWithBackwardEdges, Signal}; +use std::fmt::{Display, Formatter}; +use std::ops::{Deref, DerefMut}; + +/// Instructions which operate on DRAM-Rows +/// TODO: adjust instructions to FC-DRAM?! +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum Instruction { + /// TODO: What does this instr do? + /// TODO: which row is operand, which one is a result? + AAP(RowOperand, RowOperand), + /// TODO: What does this instr do? + AP(RowOperand), + /// Multiple-Row Activation: `ACT R_F -> PRE -> ACT R_L -> PRE` of rows `R_F`,`R_L` for rows within + /// different subarrays. As a result `R_L` holds the negated value of `R_F` + /// Used to implement NOT directly + APAP(RowOperand,RowOperand), +} + +#[derive(Debug, Clone)] +pub struct Program<'a> { + pub architecture: &'a Architecture, + pub instructions: Vec, +} + +#[derive(Debug, Clone)] +pub struct ProgramState<'a> { + program: Program<'a>, + /// currently used rows + rows: Rows<'a>, +} + +impl<'a> Program<'a> { + pub fn new(architecture: &'a Architecture, instructions: Vec) -> Self { + Self { + architecture, + instructions, + } + } +} + +impl Instruction { + /// Return Addreses of Rows which are used by this instruction (=operand-rows AND result-row) + pub fn used_addresses<'a>( + &self, + architecture: &'a Architecture, + ) -> impl Iterator + 'a { + let from = match self { + Instruction::AAP(from, _) => from, + Instruction::AP(op) => op, + } + .row_addresses(architecture); + let to = match self { + Instruction::AAP(_, to) => Some(*to), + _ => None, + } + .into_iter() + .flat_map(|addr| addr.row_addresses(architecture)); + from.chain(to) + } + + /// Return addresses of operand-rows + pub fn input_operands<'a>( + &self, + architecture: &'a Architecture, + ) -> impl Iterator + 'a { + let from = match self { + Instruction::AAP(from, _) => from, + Instruction::AP(op) => op, + }; + from.row_addresses(architecture) + } + + pub fn overridden_rows<'a>( + &self, + architecture: &'a Architecture, + ) -> impl Iterator + 'a { + let first = match self { + Instruction::AP(a) => a, + Instruction::AAP(a, _) => a, + } + .clone(); + let first = match first { + Address::Bitwise(BitwiseAddress::Multiple(idx)) => { + architecture.multi_activations[idx].as_slice() + } + _ => &[], + } + .iter() + .map(|op| RowOperand::Bitwise(op.row())); + + let second = match self { + Instruction::AP(_) => None, + Instruction::AAP(_, a) => Some(a.row_addresses(architecture).map(|addr| addr.row())), + } + .into_iter() + .flatten(); + + first.chain(second) + } +} + +impl<'a> ProgramState<'a> { + pub fn new( + architecture: &'a Architecture, + network: &impl ProviderWithBackwardEdges, + ) -> Self { + Self { + program: Program::new(architecture, Vec::new()), + rows: Rows::new(network, architecture), + } + } + + pub fn maj(&mut self, op: usize, out_signal: Signal, out_address: Option
) { + let operands = &self.architecture.multi_activations[op]; + for operand in operands { + self.set_signal(RowAddress::Bitwise(*operand), out_signal); + } + let instruction = match out_address { + Some(out) => Instruction::AAP(BitwiseAddress::Multiple(op).into(), out), + None => Instruction::AP(BitwiseAddress::Multiple(op).into()), + }; + self.instructions.push(instruction) + } + + /// TODO: Does FC-DRAM need copying of signals? + /// pub fn signal_copy(&mut self, signal: Signal, target: RowAddress, intermediate_dcc: u8) { + /// } + + /// Sets the value of the operand in `self.rows` to the given signal. If that removes the last + /// reference to the node of the previous signal of the operator, insert spill code for the + /// previous signal + /// **ALWAYS** call this before inserting the actual instruction, otherwise the spill code will + /// spill the wrong value + fn set_signal(&mut self, address: RowAddress, signal: Signal) { + if let Some(previous_signal) = self.rows.set_signal(address, signal) { + if !self.rows.contains_id(previous_signal.node_id()) { + let spill_id = self.rows.add_spill(previous_signal); + self.instructions + .push(Instruction::AAP(address.into(), Address::Spill(spill_id))); + } + } + } + + pub fn free_id_rows(&mut self, id: Id) { + self.rows.free_id_rows(id); + } + + pub fn rows(&self) -> &Rows { + &self.rows + } +} + +impl<'a> Deref for ProgramState<'a> { + type Target = Program<'a>; + + fn deref(&self) -> &Self::Target { + &self.program + } +} + +impl DerefMut for ProgramState<'_> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.program + } +} + +impl<'a> From> for Program<'a> { + fn from(value: ProgramState<'a>) -> Self { + value.program + } +} + +/// Print the generated program in human-readable form +impl Display for Program<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let write_operand = |f: &mut Formatter<'_>, o: &BitwiseOperand| -> std::fmt::Result { + match o { + BitwiseOperand::T(t) => write!(f, "T{}", t), + BitwiseOperand::DCC { inverted, index } => { + if *inverted { + write!(f, "~DCC{}", index) + } else { + write!(f, "DCC{}", index) + } + } + } + }; + let write_address = |f: &mut Formatter<'_>, a: &Address| -> std::fmt::Result { + match a { + Address::In(i) => write!(f, "I{}", i), + Address::Out(i) => write!(f, "O{}", i), + Address::Spill(i) => write!(f, "S{}", i), + Address::Const(c) => write!(f, "C{}", if *c { "1" } else { "0" }), + Address::Bitwise(b) => match b { + BitwiseAddress::Single(o) => write_operand(f, o), + BitwiseAddress::Multiple(id) => { + let operands = &self.architecture.multi_activations[*id]; + for i in 0..operands.len() { + if i == 0 { + write!(f, "[")?; + } else { + write!(f, ", ")?; + } + write_operand(f, &operands[i])?; + if i == operands.len() - 1 { + write!(f, "]")?; + } + } + Ok(()) + } + }, + } + }; + + for instruction in &self.instructions { + match instruction { + Instruction::AAP(a, b) => { + write!(f, "AAP ")?; + write_address(f, a)?; + write!(f, " ")?; + write_address(f, b)?; + writeln!(f)?; + } + Instruction::AP(a) => { + write!(f, "AP ")?; + write_address(f, a)?; + writeln!(f)?; + } + } + } + Ok(()) + } +} diff --git a/src/ambit.h b/src/ambit.h index 9aa1719..d08ce57 100644 --- a/src/ambit.h +++ b/src/ambit.h @@ -27,4 +27,26 @@ extern "C" eggmock::mig_receiver ambit_rewriter( ambit_compiler_settings settings ); eggmock::mig_receiver ambit_compile( ambit_compiler_settings settings ); + + struct fcdram_compiler_statistics + { + uint64_t egraph_classes; + uint64_t egraph_nodes; + uint64_t egraph_size; + + uint64_t instruction_count; + + uint64_t t_runner; + uint64_t t_extractor; + uint64_t t_compiler; + }; + + struct fcdram_compiler_settings + { + bool print_program; + bool verbose; + }; + + eggmock::mig_receiver fcdram_rewriter( fcdram_compiler_settings settings ); + eggmock::mig_receiver fcdram_compile( fcdram_compiler_settings settings ); } diff --git a/src/ambit_benchmark_main.cpp b/src/ambit_benchmark_main.cpp index 8ffbe2c..273bf08 100644 --- a/src/ambit_benchmark_main.cpp +++ b/src/ambit_benchmark_main.cpp @@ -31,7 +31,6 @@ int main( int const argc, char** argv ) preoptimize_mig( *mig ); auto const t_opt = duration_cast( system_clock::now() - opt_begin ).count(); - auto constexpr settings = ambit_compiler_settings{ .print_program = false, .verbose = false, diff --git a/src/main.cpp b/src/main.cpp index a906962..ca8ddaf 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -23,6 +23,7 @@ int main() write_dot( in, "in.dot" ); + /// TODO: use fc-dram here ambit_compiler_statistics result = eggmock::send_mig( in, ambit_compile( ambit_compiler_settings{ .print_program = true, .verbose = true, @@ -34,4 +35,4 @@ int main() // mig_network rewritten = rewrite_mig( in, ambit_rewriter() ); // write_dot( rewritten, "out.dot" ); -} \ No newline at end of file +} From c09909b2f3331c880075f9b018b8f77e69daff1b Mon Sep 17 00:00:00 2001 From: alku662e Date: Mon, 28 Apr 2025 16:15:48 +0200 Subject: [PATCH 02/51] wip --- rs/src/ambit/compilation.rs | 4 +-- rs/src/ambit/mod.rs | 18 ++++++++++++ rs/src/ambit/rows.rs | 10 +++---- src/ambit.h | 22 --------------- src/fcdram.h | 30 ++++++++++++++++++++ src/fcdram_benchmark_main.cpp | 52 +++++++++++++++++++++++++++++++++++ src/main.cpp | 49 ++++++++++++++++++++++++--------- src/utils.h | 8 ++++++ 8 files changed, 151 insertions(+), 42 deletions(-) create mode 100644 src/fcdram.h create mode 100644 src/fcdram_benchmark_main.cpp diff --git a/rs/src/ambit/compilation.rs b/rs/src/ambit/compilation.rs index 73e7a8f..55c7ae5 100644 --- a/rs/src/ambit/compilation.rs +++ b/rs/src/ambit/compilation.rs @@ -75,9 +75,9 @@ pub struct CompilationState<'a, 'n, P> { impl<'a, 'n, P: ProviderWithBackwardEdges> CompilationState<'a, 'n, P> { pub fn new(architecture: &'a Architecture, network: &'n P) -> Self { let mut candidates = FxHashSet::default(); - // check all parents of leafs whether they have only leaf children, in which case they are + // check all parents of leaves whether they have only leaf children, in which case they are // candidates - for leaf in network.leafs() { + for leaf in network.leaves() { for candidate_id in network.node_outputs(leaf) { let candidate = network.node(candidate_id); if candidate diff --git a/rs/src/ambit/mod.rs b/rs/src/ambit/mod.rs index 1a47507..230f9f0 100644 --- a/rs/src/ambit/mod.rs +++ b/rs/src/ambit/mod.rs @@ -264,3 +264,21 @@ extern "C" fn ambit_compile(settings: CompilerSettings) -> MigReceiverFFI MigReceiverFFI { + let receiver = + compiling_receiver(&*&ARCHITECTURE, REWRITE_RULES.as_slice(), settings).map(|res| { + let graph = res.output.borrow_graph(); + CompilerStatistics { + egraph_classes: graph.number_of_classes() as u64, + egraph_nodes: graph.total_number_of_nodes() as u64, + egraph_size: graph.total_size() as u64, + instruction_count: res.output.borrow_program().instructions.len() as u64, + t_runner: res.t_runner as u64, + t_extractor: res.t_extractor as u64, + t_compiler: res.t_compiler as u64, + } + }); + MigReceiverFFI::new(receiver) +} diff --git a/rs/src/ambit/rows.rs b/rs/src/ambit/rows.rs index 83c58ce..4246e13 100644 --- a/rs/src/ambit/rows.rs +++ b/rs/src/ambit/rows.rs @@ -42,14 +42,14 @@ impl<'a> Rows<'a> { spill_counter: 0, architecture, }; - rows.add_leafs(ntk); + rows.add_leaves(ntk); rows } - fn add_leafs(&mut self, ntk: &impl ProviderWithBackwardEdges) { - let leafs = ntk.leafs(); - self.rows.reserve(leafs.size_hint().0); - for id in leafs { + fn add_leaves(&mut self, ntk: &impl ProviderWithBackwardEdges) { + let leaves = ntk.leaves(); + self.rows.reserve(leaves.size_hint().0); + for id in leaves { let node = ntk.node(id); match node { Mig::Input(i) => { diff --git a/src/ambit.h b/src/ambit.h index d08ce57..9aa1719 100644 --- a/src/ambit.h +++ b/src/ambit.h @@ -27,26 +27,4 @@ extern "C" eggmock::mig_receiver ambit_rewriter( ambit_compiler_settings settings ); eggmock::mig_receiver ambit_compile( ambit_compiler_settings settings ); - - struct fcdram_compiler_statistics - { - uint64_t egraph_classes; - uint64_t egraph_nodes; - uint64_t egraph_size; - - uint64_t instruction_count; - - uint64_t t_runner; - uint64_t t_extractor; - uint64_t t_compiler; - }; - - struct fcdram_compiler_settings - { - bool print_program; - bool verbose; - }; - - eggmock::mig_receiver fcdram_rewriter( fcdram_compiler_settings settings ); - eggmock::mig_receiver fcdram_compile( fcdram_compiler_settings settings ); } diff --git a/src/fcdram.h b/src/fcdram.h new file mode 100644 index 0000000..06f339c --- /dev/null +++ b/src/fcdram.h @@ -0,0 +1,30 @@ +#pragma once + +#include "eggmock.h" + +#include + +extern "C" +{ + struct fcdram_compiler_statistics + { + uint64_t egraph_classes; + uint64_t egraph_nodes; + uint64_t egraph_size; + + uint64_t instruction_count; + + uint64_t t_runner; + uint64_t t_extractor; + uint64_t t_compiler; + }; + + struct fcdram_compiler_settings + { + bool print_program; + bool verbose; + }; + + eggmock::mig_receiver fcdram_rewriter( fcdram_compiler_settings settings ); + eggmock::mig_receiver fcdram_compile( fcdram_compiler_settings settings ); +} diff --git a/src/fcdram_benchmark_main.cpp b/src/fcdram_benchmark_main.cpp new file mode 100644 index 0000000..cd3191e --- /dev/null +++ b/src/fcdram_benchmark_main.cpp @@ -0,0 +1,52 @@ +/** + * Runs compilation for given logic network (see `utils.hpp` for pre-provided logic networks) + */ +#include "fcdram.h" +#include "eggmock.h" +#include "utils.h" + +#include +#include +#include + +using namespace mockturtle; +using namespace eggmock; +using namespace std::chrono; + +// usage: exec [network] +int main( int const argc, char** argv ) +{ + if ( argc != 2 ) + { + std::cerr << "usage: " << argv[0] << std::endl; + return 1; + } + + std::optional mig = get_ntk( argv[1] ); + if ( !mig ) + { + return 1; + } + + auto const pre_opt_size = mig->size(); + + auto const opt_begin = system_clock::now(); + preoptimize_mig( *mig ); + auto const t_opt = duration_cast( system_clock::now() - opt_begin ).count(); + + auto constexpr settings = fcdram_compiler_settings{ + .print_program = false, + .verbose = false, + }; + + const auto [egraph_classes, egraph_nodes, egraph_size, + instruction_count, + t_runner, t_extractor, t_compiler] = + send_mig( *mig, fcdram_compile( settings ) ); + + std::cout << t_opt << "\t" << t_runner << "\t" << t_extractor << "\t" << t_compiler << "\t" + << pre_opt_size << "\t" << mig->size() << "\t" << mig->num_cis() << "\t" << mig->num_cos() << "\t" + << instruction_count << "\t" + << egraph_classes << "\t" << egraph_nodes << "\t" << egraph_size; + return 0; +} diff --git a/src/main.cpp b/src/main.cpp index ca8ddaf..8477abd 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,6 +1,7 @@ #include "eggmock.h" #include "ambit.h" +#include "fcdram.h" #include #include @@ -9,6 +10,39 @@ using namespace mockturtle; using namespace eggmock; +void run_ambit_example(mig_network in) +{ + ambit_compiler_statistics result = eggmock::send_mig( in, ambit_compile( ambit_compiler_settings{ + .print_program = true, + .verbose = true, + } ) ); + std::cout << "IC:" << result.instruction_count << std::endl; + std::cout << "t1:" << result.t_runner << std::endl; + std::cout << "t2:" << result.t_extractor << std::endl; + std::cout << "t3:" << result.t_compiler << std::endl; + // mig_network rewritten = rewrite_mig( in, ambit_rewriter() ); + // write_dot( rewritten, "out.dot" ); + +} + +/** + * TODO: change `mig` to `aig`?? + */ +void run_fcdram_example(mig_network in) +{ + fcdram_compiler_statistics result = eggmock::send_mig( in, fcdram_compile( fcdram_compiler_settings{ + .print_program = true, + .verbose = true, + } ) ); + std::cout << "IC:" << result.instruction_count << std::endl; + std::cout << "t1:" << result.t_runner << std::endl; + std::cout << "t2:" << result.t_extractor << std::endl; + std::cout << "t3:" << result.t_compiler << std::endl; + + // mig_network rewritten = rewrite_mig( in, fcdram_rewriter() ); + // write_dot( rewritten, "out.dot" ); +} + int main() { mig_network in; @@ -22,17 +56,6 @@ int main() in.create_po( bi ); write_dot( in, "in.dot" ); - - /// TODO: use fc-dram here - ambit_compiler_statistics result = eggmock::send_mig( in, ambit_compile( ambit_compiler_settings{ - .print_program = true, - .verbose = true, - } ) ); - std::cout << "IC:" << result.instruction_count << std::endl; - std::cout << "t1:" << result.t_runner << std::endl; - std::cout << "t2:" << result.t_extractor << std::endl; - std::cout << "t3:" << result.t_compiler << std::endl; - - // mig_network rewritten = rewrite_mig( in, ambit_rewriter() ); - // write_dot( rewritten, "out.dot" ); + run_ambit_example(in); + run_fcdram_example(in); } diff --git a/src/utils.h b/src/utils.h index df46a48..a47d6d4 100644 --- a/src/utils.h +++ b/src/utils.h @@ -22,6 +22,11 @@ std::optional read_ntk( std::string const& path ); void preoptimize_mig( mockturtle::mig_network& ntk ); +/** + * Collection of logic networks (eg for benchmarking) + * - included logic networks: Full Adder ("fa"), Multiplexer ("mux"), Greater than ("gt"), "kogge_stone" + * - carry_ripple_adder_inplace ("add"), carry_ripple_multiplier ("mul"), sum-adder ("pop") + */ template std::optional get_ntk( std::string const& key ) { @@ -183,6 +188,9 @@ std::optional get_ntk( std::string const& key ) return {}; } +/** + * Read network from .aig/.pla/.verilog file + */ template std::optional read_ntk( const std::string& path ) { From 4e9d0fb35679e82b76fd1180ac02e2fa42a96927 Mon Sep 17 00:00:00 2001 From: alku662e Date: Wed, 7 May 2025 18:29:22 +0200 Subject: [PATCH 03/51] WIP --- rs/src/fc_dram/architecture.rs | 353 ++++++++++----------------------- rs/src/fc_dram/compilation.rs | 289 ++++++--------------------- rs/src/fc_dram/extraction.rs | 110 ++++------ rs/src/fc_dram/mod.rs | 71 ++++--- rs/src/fc_dram/optimization.rs | 225 +-------------------- rs/src/fc_dram/program.rs | 186 ++--------------- 6 files changed, 271 insertions(+), 963 deletions(-) diff --git a/rs/src/fc_dram/architecture.rs b/rs/src/fc_dram/architecture.rs index 9a7b1ed..9e4c752 100644 --- a/rs/src/fc_dram/architecture.rs +++ b/rs/src/fc_dram/architecture.rs @@ -1,273 +1,120 @@ //! Contains all architecture-specific descriptions +//! - [`FCDRAMArchitecture`] = trait which needs to be implemented for your DRAM-module +//! - [`Instruction`] = contains all instructions supported by FC-DRAM architecture +//! - [ ] `RowAddress`: utility functions to get subarray-id and row-addr within that subarray from +//! - [ ] ->create `pub struct Architecture` +//! RowAddress (eg via bit-shifting given bitmasks for subarray-id & row-addr to put on-top of +//! RowAddress /// TODO: merge `rows.rs` with `mod.rs` and move into `arch.rs` -use super::{Architecture, BitwiseOperand, RowAddress}; use eggmock::{Id, Mig, ProviderWithBackwardEdges, Signal}; use rustc_hash::FxHashMap; use std::collections::hash_map::Entry; - -type RowAddress = u64; - -/// Equivalent to a DRAM row. -#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] -pub enum Row { - /// Row used as input operand - In(RowAddress), - /// Row used as output operand - Out(RowAddress), - /// Why do we need spill rows???? - Spill(RowAddress), +use std::fmt::{Display, Formatter}; + +/// Implement this trait for your specific DRAM-module to support FCDRAM-functionality +/// - contains the mapping of logical-ops to FCDRAM-Architecture (see +/// [`FCDRAMArchitecture::get_instructions_implementation_of_logic_ops`] +/// +/// # Possible Changes in Future +/// +/// - add trait-bound to a more general `Architecture`-trait to fit in the overall framework? +pub trait FCDRAMArchitecture { + /// Returns vector of simultaneously activated rows when issuing `APA(r1,r2)`-cmd + /// NOTE: this may depend on the used DRAM - see [3] for a method for reverse-engineering + /// which rows are activated simultaneously (also see RowClone) + fn get_simultaneously_activated_rows_of_apa_op(r1: RowAddress, r2: RowAddress) -> Vec; + + /// Implements given logic operation using FCDRAM-Instructions + /// REMINDER: for OR&AND additionall [`Instruction::FracOp`]s need to be issued to setup the + /// reference subarray containing `reference_rows` in order to perform the given `logic_op` on + /// the `compute_rows` inside the computation rows + /// + /// - [ ] TODO: for `NOT`: `reference_rows`=??? (empty or =result rows?) + /// + /// NOTE: `compute_rows` are expected to lay in the same subarray and `reference_rows` in one + /// subarray adjacent to the compute subarray (!this is not checked but assumed to be true!) + fn get_instructions_implementation_of_logic_ops(logic_op: SupportedLogicOps, compute_rows: Vec, reference_rows: Vec) -> Vec { + todo!() + } } -/// Contains a snapshot state of the rows in an Ambit-like DRAM -#[derive(Debug, Clone)] -pub struct Rows<'a> { - signals: FxHashMap>, - rows: FxHashMap, - spill_counter: u32, - /// Representation of the underlying PuD Architecture - architecture: &'a Architecture, +pub type RowAddress = u64; + +/// Instructions used in FC-DRAM +/// - NOT: implemented using `APA` +/// - AND/OR: implemented by (see [1] Chap6.1.2) +/// 1. setting `V_{AND}`/`V_{OR}` in reference subarray and then issuing (using FracOperation +/// for storing `V_{DD}/2`) +/// 2. Issue `APA(R_{REF},R_{COM})` to simultaneously activate `N` rows in reference subarray +/// and `N` rows in compute subarray +/// 3. Wait for `t_{RAS}` (=overwrites activated cells in compute subarray with AND/OR-result) +/// 4. Issue `PRE` to complete the operation +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum Instruction { + /// Needed for initializing neutral row in reference subarray (to set `V_{AND}`/`V_{OR}` (see + /// [1]( + /// Implemented using AP without any extra cycles in between) (see [2]) + /// - `PRE` "interrupt the process of row activation, and prevent the sense amplifier from being enabled" + FracOp(RowAddress), + /// Multiple-Row Activation: `ACT R_F -> PRE -> ACT R_L -> PRE` of rows `R_F`,`R_L` for rows within + /// different subarrays. As a result `R_L` holds the negated value of `R_F` (see Chap5.1 of + /// PaperFunctionally Complete DRAMs + /// Used to implement NOT directly + APA(RowAddress,RowAddress), } -/// Maps operands from network to LIM rows (?) -impl<'a> Rows<'a> { - /// Initializes the rows with the leaf values in the given network (this values are known from - /// the start; intermediate node-values still need to be computed) - pub fn new( - ntk: &impl ProviderWithBackwardEdges, - architecture: &'a Architecture, - ) -> Self { - let mut rows = Rows { - signals: FxHashMap::default(), - rows: FxHashMap::default(), - spill_counter: 0, - architecture, - }; - rows.add_leaves(ntk); - rows - } - - /// Assign leaves to rows - /// REMINDER: leaves are either inputs or constants - /// TODO: choose rows which are close to Sense-Amps ?? - /// TODO: change from Mig to Aig? - fn add_leaves(&mut self, ntk: &impl ProviderWithBackwardEdges) { - let leaves = ntk.leaves(); - self.rows.reserve(leaves.size_hint().0); - for id in leaves { - let node = ntk.node(id); - match node { - Mig::Input(i) => { - self.set_empty_row_signal(RowOperand::In(i), Signal::new(id, false)); - }, - Mig::False => { - let signal = Signal::new(id, false); - self.set_empty_row_signal(RowOperand::Const(false), signal); - self.set_empty_row_signal(RowOperand::Const(true), signal.invert()); - }, - _ => unreachable!("leaf node should be either an input or a constant"), - }; - } - } - - /// Returns the current signal of the given row. - pub fn get_row_signal(&self, row: RowOperand) -> Option { - self.rows.get(&row).cloned() - } - - /// Returns the signal of the given address - /// TODO: how does FC-DRAM deal with inverted signals? - pub fn get_address_signal(&self, address: RowAddress) -> Option { - self.get_row_signal(address.row())? - .maybe_invert(address.inverted()) - .into() - } - - /// Returns the signal of the given operand - pub fn get_operand_signal(&self, operand: BitwiseOperand) -> Option { - self.get_address_signal(operand.into()) - } - - /// Returns all rows with the given signal. - pub fn get_rows(&self, signal: Signal) -> impl Iterator + '_ { - self.signals.get(&signal).into_iter().flatten().cloned() - } - - /// Returns true iff a signal with the given id is stored in a row. - pub fn contains_id(&self, id: Id) -> bool { - self.get_rows(Signal::new(id, false)).next().is_some() - || self.get_rows(Signal::new(id, true)).next().is_some() - } - - /// Adds a new spill row with the given signal and returns its id. - /// TODO: Why would we need spill rows??? - pub fn add_spill(&mut self, signal: Signal) -> u32 { - self.spill_counter += 1; - self.set_empty_row_signal(RowOperand::Spill(self.spill_counter), signal); - self.spill_counter - } - - /// Sets the current signal of the given operand - /// Returns the signal of the operand previous to this operation if it was changed. - pub fn set_signal(&mut self, address: RowAddress, signal: Signal) -> Option { - self.set_row_signal(address.row(), signal.maybe_invert(address.inverted()))? - .maybe_invert(address.inverted()) - .into() - } - - /// Equivalent to `set_row_signals`, but additionally ensures that the row was previously empty - /// or contained the same signal. - fn set_empty_row_signal(&mut self, row: RowOperand, signal: Signal) { - assert_eq!( - self.set_row_signal(row, signal), - None, - "row {row:?} should be empty" - ) - } - - /// Sets the signal of the given row, updating `self.rows` and `self.signals` accordingly. - /// Returns the previous signal of the given row if it was changed. - fn set_row_signal(&mut self, row: RowOperand, signal: Signal) -> Option { - let row_entry = self.rows.entry(row); - - // detach previous signal - let prev = match &row_entry { - Entry::Occupied(v) => { - let prev = *v.get(); - if prev == signal { - // signal already correctly set - return None; - } - Some(prev) - } - _ => None, +impl Display for Instruction { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let description = match self { + Instruction::FracOp(row) => "AP(row)", + Instruction::APA(row1,row2) => "APA({row1},{row2})", }; - if let Some(prev) = prev { - let prev_locations = self.signals.get_mut(&prev).unwrap(); - prev_locations.swap_remove(prev_locations.iter().position(|r| *r == row).unwrap()); - } - - // set new signal - row_entry.insert_entry(signal); - self.signals.entry(signal).or_default().push(row); - - prev + write!(f, "{}", description) } - - /// Free rows - /// NOTE: id depends on whether the signal is inverted or not (=MSB set or not) - pub fn free_id_rows(&mut self, id: Id) { - let non_inv = Signal::new(id, false); - let inv = Signal::new(id, true); - for sig in [non_inv, inv] { - let Some(rows) = self.signals.remove(&sig) else { - continue - }; - for row in rows { - self.rows.remove(&row); - } - } - } -} - -/// Single Row or multiple rows (since FC-DRAM can also work with multiple row-operands) -/// TODO: change this to optionally take in multiple rows -/// IDEA: enum of single row vs multiple rows? -pub struct RowOperand { - rows: Vec, } -impl RowOperand { - /// Currently supported nr of row-operands: 1,2,4,8,16,32 - /// - ! the given `RowAddresses` are expected to refer to rows within the SAME subarray (TODO: - /// check if this is forced by FC-DRAM architecture) - pub fn new(rows: Vec) -> Self { - const SUPPORTED_ROW_OPERAND_NR: [usize; 6] = [1,2,4,8,16,32]; - if SUPPORTED_ROW_OPERAND_NR.contains(&rows.len()) { - panic!("[ERROR] FC-DRAM currently supports only 1|2|4|8|16|32 row-operands"); - } - Self { - rows, - } +/// TODO: where to put logic for determining which rows are activated simultaneously given two +/// row-addresses +impl Instruction { + /// Return Addreses of Rows which are used by this instruction (=operand-rows AND result-row) + /// - REMINDER: although only two row-operands are given to `APA`, more rows can be/are affected due to *Simultaneous Row Activation* (see [3]) + /// TODO + pub fn used_addresses<'a>( + &self, + ) -> impl Iterator + 'a { + todo!() + // let from = match self { + // Instruction::AAP(from, _) => from, + // Instruction::AP(op) => op, + // } + // .row_addresses(architecture); + // let to = match self { + // Instruction::AAP(_, to) => Some(*to), + // _ => None, + // } + // .into_iter() + // .flat_map(|addr| addr.row_addresses(architecture)); + // from.chain(to) } -} - -/// Consists of n rows -/// `rows_in_subarray` should be either 512 OR 1024, but not both ! -pub struct DRAMSubarray { - rows: [RowOperand; n], -} - -/// Represents subarrays and which of those subarrays are neighbors -pub struct DRAMChip { -} - -#[derive(Clone, Debug)] -pub struct Architecture { - maj_ops: Vec, - multi_activations: Vec>, - num_dcc: u8, -} - -impl Architecture { - pub fn new(multi_activations: Vec>, num_dcc: u8) -> Self { - let maj_ops = multi_activations - .iter() - .enumerate() - .filter(|(_, ops)| ops.len() == 3) - .map(|(i, _)| i) - .collect(); - Self { - maj_ops, - multi_activations, - num_dcc, - } + /// Returns all row-addresses whose values are overriden by this instruction + /// TODO + pub fn overridden_rows<'a>( + &self, + ) -> impl Iterator + 'a { + todo!() } } -static ARCHITECTURE: LazyLock = LazyLock::new(|| { - use BitwiseOperand::*; - Architecture::new( - vec![ - // 2 rows - vec![ - DCC { - index: 0, - inverted: true, - }, - T(0), - ], - vec![ - DCC { - inverted: true, - index: 1, - }, - T(1), - ], - vec![T(2), T(3)], - vec![T(0), T(3)], - // 3 rows - vec![T(0), T(1), T(2)], - vec![T(1), T(2), T(3)], - vec![ - DCC { - index: 0, - inverted: false, - }, - T(1), - T(2), - ], - vec![ - DCC { - index: 0, - inverted: false, - }, - T(0), - T(3), - ], - ], - 2, - ) -}); +/// Contains logical operations which are supported (natively) on FCDRAM-Architecture +pub enum SupportedLogicOps { + NOT, + AND, + OR, + /// implemented using AND+NOT + NAND, + /// implemented using OR+NOT + NOR, +} diff --git a/rs/src/fc_dram/compilation.rs b/rs/src/fc_dram/compilation.rs index e8d9f5b..bdebd94 100644 --- a/rs/src/fc_dram/compilation.rs +++ b/rs/src/fc_dram/compilation.rs @@ -1,72 +1,68 @@ +//! +//! - [`compile()`] = main function - compiles given logic network for the given [`architecture`] +//! into a [`program`] using some [`optimization`] + use super::{ - optimization::optimize, Address, Architecture, BitwiseOperand, Program, ProgramState, - RowAddress, + architecture::FCDRAMArchitecture, optimization::optimize, Program, ProgramState, RowAddress }; -use crate::fcdram::rows::Row; use eggmock::{Id, Mig, Aig, Node, ProviderWithBackwardEdges, Signal}; use rustc_hash::{FxHashMap, FxHashSet}; use std::cmp::max; /// Compiles given `network` to a program that can be run on given `architecture` pub fn compile<'a>( - architecture: &'a Architecture, network: &impl ProviderWithBackwardEdges, -) -> Program<'a> { - let mut state = CompilationState::new(architecture, network); - let mut max_cand_size = 0; // TODO: unused? - while !state.candidates.is_empty() { - max_cand_size = max(max_cand_size, state.candidates.len()); - - // TODO: ??? - let (id, node, _, _, _) = state - .candidates - .iter() - .copied() - .map(|(id, node)| { - let outputs = state.network.node_outputs(id).count(); - let output = state.network.outputs().any(|out| out.node_id() == id); - let not_present = node - .inputs() - .iter() - .map(|signal| { - let present = state - .program - .rows() - .get_rows(*signal) - .any(|row| matches!(row, Row::Bitwise(_))); - !present as u8 - }) - .sum::(); - (id, node, not_present, outputs, output) - }) - .min_by_key(|(_, _, not_present, outputs, output)| (*not_present, *outputs, !output)) - .unwrap(); - - // TODO: ??? - let output = state.outputs.get(&id).copied(); - if let Some((output, signal)) = output { - if signal.is_inverted() { - state.compute(id, node, None); - state.program.signal_copy( - signal, - RowAddress::Out(output), - state.program.rows().get_free_dcc().unwrap_or(0), - ); - } else { - state.compute(id, node, Some(Address::Out(output))); - } - let leftover_uses = *state.leftover_use_count(id); - if leftover_uses == 1 { - state.program.free_id_rows(id); - } - } else { - state.compute(id, node, None); - } - } - - let mut program = state.program.into(); - optimize(&mut program); - program +) -> Program<'a, A> { + // let mut state = CompilationState::new(architecture, network); + // let mut max_cand_size = 0; // TODO: unused? + // while !state.candidates.is_empty() { + // max_cand_size = max(max_cand_size, state.candidates.len()); + // + // // TODO: ??? + // let (id, node, _, _, _) = state + // .candidates + // .iter() + // .copied() + // .map(|(id, node)| { + // let outputs = state.network.node_outputs(id).count(); + // let output = state.network.outputs().any(|out| out.node_id() == id); + // let not_present = node + // .inputs() + // .iter() + // .map(|signal| { + // let present = state + // .program + // .rows(); + // // .any(|row| matches!(row, Row::Bitwise(_))); + // todo!() + // }) + // .sum::(); + // (id, node, not_present, outputs, output) + // }) + // .min_by_key(|(_, _, not_present, outputs, output)| (*not_present, *outputs, !output)) + // .unwrap(); + // + // // TODO: ??? + // let output = state.outputs.get(&id).copied(); + // if let Some((output, signal)) = output { + // if signal.is_inverted() { + // state.compute(id, node, None); + // } else { + // // state.compute(id, node, Some(Address::Out(output))); + // state.compute(id, node, None); + // } + // let leftover_uses = *state.leftover_use_count(id); + // if leftover_uses == 1 { + // state.program.free_id_rows(id); + // } + // } else { + // state.compute(id, node, None); + // } + // } + // + // let mut program = state.program.into(); + // optimize(&mut program); + // program } pub struct CompilationState<'a, 'n, P> { @@ -80,7 +76,7 @@ pub struct CompilationState<'a, 'n, P> { } impl<'a, 'n, P: ProviderWithBackwardEdges> CompilationState<'a, 'n, P> { - pub fn new(architecture: &'a Architecture, network: &'n P) -> Self { + pub fn new(network: &'n P) -> Self { let mut candidates = FxHashSet::default(); // check all parents of leaves whether they have only leaf children, in which case they are // candidates @@ -96,7 +92,7 @@ impl<'a, 'n, P: ProviderWithBackwardEdges> CompilationState<'a, 'n, } } } - let program = ProgramState::new(architecture, network); + let program = ProgramState::new(network); let outputs = network .outputs() @@ -119,167 +115,8 @@ impl<'a, 'n, P: ProviderWithBackwardEdges> CompilationState<'a, 'n, }) } - pub fn compute(&mut self, id: Id, node: Mig, out_address: Option
) { - if !self.candidates.remove(&(id, node)) { - panic!("not a candidate"); - } - let Mig::Maj(mut signals) = node else { - panic!("can only compute majs") - }; - - // select which MAJ instruction to use - // for this we use the operation with has the most already correctly placed operands - let mut opt = None; - for id in self.architecture().maj_ops.iter().copied() { - let operands = self.architecture().multi_activations[id] - .as_slice() - .try_into() - .expect("maj has to have 3 operands"); - let (matches, match_no) = self.get_mapping(&mut signals, operands); - let dcc_cost = self.optimize_dcc_usage(&mut signals, operands, &matches); - let spilling_cost = self.spilling_cost(operands, &matches); - let cost = 3.0 - match_no as f32 + dcc_cost as f32 + 0.5 * spilling_cost as f32; - let is_opt = match &opt { - None => true, - Some((opt_no, _, _, _)) => *opt_no > cost, - }; - if is_opt { - opt = Some((cost, id, matches, signals)); - } - } - let (_, maj_id, matches, signals) = opt.unwrap(); - let operands = &self.architecture().multi_activations[maj_id]; - - // now we need to place the remaining non-matching operands... - - // for that we first find a free DCC row for possibly inverting missing signals without - // accidentally overriding a signal that is already placed correctly - let used_dcc = || { - operands.iter().filter_map(|op| match op { - BitwiseOperand::DCC { index: i, .. } => Some(i), - _ => None, - }) - }; - let free_dcc = (0..self.architecture().num_dcc) - .find(|i| !used_dcc().any(|used| *used == *i)) - .expect("cannot use all DCC rows in one MAJ operation"); - - // then we can copy the signals into their places - for i in 0..3 { - if matches[i] { - continue; - } - self.program - .signal_copy(signals[i], RowAddress::Bitwise(operands[i]), free_dcc); - } - - // all signals are in place, now we can perform the MAJ operation - self.program - .maj(maj_id, Signal::new(id, false), out_address); - - // free up rows if possible - // (1) for the MAJ-signal - if *self.leftover_use_count(id) == 0 { - self.program.free_id_rows(id); - } - // (2) for the input signals - 'outer: for i in 0..3 { - // decrease use count only once per id - for j in 0..i { - if signals[i].node_id() == signals[j].node_id() { - continue 'outer; - } - } - *self.leftover_use_count(signals[i].node_id()) -= 1 - } - - // lastly, determine new candidates - for parent_id in self.network.node_outputs(id) { - let parent_node = self.network.node(parent_id); - if parent_node - .inputs() - .iter() - .all(|s| self.program.rows().contains_id(s.node_id())) - { - self.candidates.insert((parent_id, parent_node)); - } - } - } - - fn optimize_dcc_usage( - &self, - signals: &mut [Signal; 3], - operands: &[BitwiseOperand; 3], - matching: &[bool; 3], - ) -> i32 { - // first, try using a DCC row for all non-matching rows that require inversion - let mut dcc_adjusted = [false; 3]; - let mut changed = true; - while changed { - changed = false; - for i in 0..3 { - if matching[i] - || operands[i].is_dcc() - || self.program.rows().get_rows(signals[i]).next().is_some() - { - continue; - } - // the i-th operand needs inversion. let's try doing this by swapping it with a - // signal of a DCC row so that we require one less copy operation - for j in 0..3 { - if i == j || matching[j] || dcc_adjusted[j] { - continue; - } - if operands[j].is_dcc() { - signals.swap(i, j); - dcc_adjusted[j] = true; - changed = true; - } - } - } - } - - let mut cost = 0; - for ((signal, operand), matching) in signals.iter().zip(operands).zip(matching) { - if *matching || operand.is_dcc() { - continue; - } - // if the signal is not stored somewhere, i.e. only the inverted signal is present, this - // requires a move via a DCC row to the actual operand - if self.program.rows().get_rows(*signal).next().is_none() { - cost += 1 - } - } - cost - } - - fn spilling_cost(&self, operands: &[BitwiseOperand; 3], matching: &[bool; 3]) -> i32 { - let mut cost = 0; - for i in 0..3 { - if matching[i] { - continue; - } - let Some(_signal) = self - .program - .rows() - .get_row_signal(Row::Bitwise(operands[i].row())) - else { - continue; - }; - cost += 1 - // // signal and inverted signal not present somewhere else - // if self.program.rows().get_rows(signal).count() < 2 - // && self - // .program - // .rows() - // .get_rows(signal.invert()) - // .next() - // .is_none() - // { - // cost += 1 - // } - } - cost + pub fn compute(&mut self, id: Id, node: Mig, out_address: Option) { + todo!() } /// Reorders the `signals` so that the maximum number of the given signal-operator-pairs already @@ -289,7 +126,7 @@ impl<'a, 'n, P: ProviderWithBackwardEdges> CompilationState<'a, 'n, fn get_mapping( &self, signals: &mut [Signal; 3], - operands: &[BitwiseOperand; 3], + operands: &[RowAddress; 3], ) -> ([bool; 3], usize) { let signals_with_idx = { let mut i = 0; @@ -341,8 +178,4 @@ impl<'a, 'n, P: ProviderWithBackwardEdges> CompilationState<'a, 'n, } (result, assigned_signals) } - - fn architecture(&self) -> &'a Architecture { - self.program.architecture - } } diff --git a/rs/src/fc_dram/extraction.rs b/rs/src/fc_dram/extraction.rs index 7c005dc..803e412 100644 --- a/rs/src/fc_dram/extraction.rs +++ b/rs/src/fc_dram/extraction.rs @@ -9,33 +9,10 @@ use std::cmp::{max, Ordering}; use std::iter; use std::ops::{Deref, Index}; use std::rc::Rc; -use super::{compile, Architecture}; +use super::architecture::FCDRAMArchitecture; +use super::compile; -pub struct CompilingCostFunction<'a> { - pub architecture: &'a Architecture -} - -#[derive(Debug, Default, Eq, PartialEq)] -pub enum NotNesting { - #[default] - NotANot, - FirstNot, - NestedNots, -} - -#[derive(Debug)] -pub struct StackedPartialGraph { - nodes: Vec>>, - first_free_id: usize, - root: MigLanguage, -} - -#[derive(Debug)] -pub struct CollapsedPartialGraph { - nodes: Rc>, - first_free_id: usize, - root_id: Id, -} +pub struct CompilingCostFunction<'a> {} impl StackedPartialGraph { pub fn leaf(node: MigLanguage) -> Self { @@ -88,7 +65,7 @@ pub struct CompilingCost { program_cost: usize, } -impl CostFunction for CompilingCostFunction<'_> { +impl CostFunction for CompilingCostFunction<'_, A> { type Cost = Rc; /// Compute cost of given `enode` @@ -96,27 +73,28 @@ impl CostFunction for CompilingCostFunction<'_> { where C: FnMut(Id) -> Self::Cost, { - let root = enode.clone(); - let cost = match enode { - MigLanguage::False | MigLanguage::Input(_) => CompilingCost::leaf(root), - MigLanguage::Not(id) => { - let cost = costs(*id); - - let nesting = if cost.not_nesting == NotNesting::NotANot { - NotNesting::FirstNot - } else { - NotNesting::NestedNots - }; - CompilingCost::with_children(self.architecture, root, iter::once((*id, cost)), nesting) - } - MigLanguage::Maj(children) => CompilingCost::with_children( - self.architecture, - root, - children.map(|id| (id, costs(id))), - NotNesting::NotANot, - ), - }; - Rc::new(cost) + todo!() + // let root = enode.clone(); + // let cost = match enode { + // MigLanguage::False | MigLanguage::Input(_) => CompilingCost::leaf(root), + // MigLanguage::Not(id) => { + // let cost = costs(*id); + // + // let nesting = if cost.not_nesting == NotNesting::NotANot { + // NotNesting::FirstNot + // } else { + // NotNesting::NestedNots + // }; + // CompilingCost::with_children(self.architecture, root, iter::once((*id, cost)), nesting) + // } + // MigLanguage::Maj(children) => CompilingCost::with_children( + // self.architecture, + // root, + // children.map(|id| (id, costs(id))), + // NotNesting::NotANot, + // ), + // }; + // Rc::new(cost) } } @@ -130,36 +108,22 @@ impl CompilingCost { } pub fn with_children( - architecture: &Architecture, root: MigLanguage, child_costs: impl IntoIterator)>, - not_nesting: NotNesting, ) -> Self { - let child_graphs = child_costs - .into_iter() - .map(|(id, cost)| cost.collapsed_graph(id)); - let partial_graph = StackedPartialGraph::new(root, child_graphs); - let program_cost = compile(architecture, &partial_graph.with_backward_edges()).instructions.len(); - Self { - partial: RefCell::new(Either::Left(partial_graph)), - not_nesting, - program_cost, - } + todo!() + // let child_graphs = child_costs + // .into_iter() + // .map(|(id, cost)| cost.collapsed_graph(id)); + // let partial_graph = StackedPartialGraph::new(root, child_graphs); + // let program_cost = compile(architecture, &partial_graph.with_backward_edges()).instructions.len(); + // Self { + // partial: RefCell::new(Either::Left(partial_graph)), + // not_nesting, + // program_cost, + // } } - pub fn collapsed_graph(&self, id: Id) -> Rc { - let mut partial = self.partial.borrow_mut(); - let stacked = match partial.deref() { - Either::Left(stacked) => stacked, - Either::Right(collapsed) => { - assert_eq!(collapsed.root_id, id); - return collapsed.clone(); - } - }; - let collapsed = Rc::new(stacked.collapse(id)); - *partial = Either::Right(collapsed.clone()); - collapsed - } } impl StackedPartialGraph { diff --git a/rs/src/fc_dram/mod.rs b/rs/src/fc_dram/mod.rs index 68c3a04..ec419b8 100644 --- a/rs/src/fc_dram/mod.rs +++ b/rs/src/fc_dram/mod.rs @@ -1,8 +1,23 @@ +//! # Literature +//! +//! - [1] Functionally-Complete Boolean Logic in Real DRAM Chips: Experimental Characterization and Analysis, 2024 +//! - [2] FracDRAM: Fractional Values in Off-the-Shelf DRAM, 2022 +//! - [3] PULSAR: Simultaneous Many-Row Activation for Reliable and High-Performance Computing in Off-the-Shelf DRAM Chips, 2024 +//! +//! # Submodules +//! +//! - [`architecture`] - defines Instructions (and performance-metrics of Instructions in that +//! architecture) used in FC-DRAM +//! - [`compilation`] - compiles given LogicNetwork for FC-DRAM architecture +//! - [`generator`] — Generates output code or reports based on analysis. +//! - [`implementation_example`] - example implementation of a FCDRAM-Architecture and how to use +//! it mod compilation; mod extraction; mod optimization; mod program; mod architecture; +mod implementation_example; use std::sync::LazyLock; use std::time::Instant; @@ -43,24 +58,23 @@ struct CompilingReceiverResult<'a> { t_compiler: u128, } -#[ouroboros::self_referencing] +// #[ouroboros::self_referencing] struct CompilerOutput<'a> { graph: EGraph, - #[borrows(graph)] - #[covariant] + // #[borrows(graph)] + // #[covariant] ntk: ( Extractor<'this, CompilingCostFunction<'a>, MigLanguage, ()>, Vec, ), - #[borrows(ntk)] - program: Program<'a>, + // #[borrows(ntk)] + program: Program<'a, A>, } fn compiling_receiver<'a>( - architecture: &'a Architecture, rules: &'a [Rewrite], settings: CompilerSettings, -) -> impl Receiver, Node = Mig> + 'a { +) -> impl Receiver, Node = Mig> + 'a { EGraph::::new(()).map(move |(graph, outputs)| { let t_runner = std::time::Instant::now(); let runner = Runner::default().with_egraph(graph).run(rules); @@ -80,9 +94,7 @@ fn compiling_receiver<'a>( let start_time = Instant::now(); let extractor = Extractor::new( &graph, - CompilingCostFunction { - architecture: &architecture, - }, + CompilingCostFunction {}, ); t_extractor = start_time.elapsed().as_millis(); (extractor, outputs) @@ -126,17 +138,18 @@ struct FCDramRewriter(CompilerSettings); impl Rewriter for FCDramRewriter { type Node = Mig; - type Intermediate = CompilingReceiverResult<'static>; + type Intermediate = CompilingReceiverResult<'static, A>; fn create_receiver( &mut self, - ) -> impl Receiver> + 'static { - compiling_receiver(&*ARCHITECTURE, REWRITE_RULES.as_slice(), self.0) + ) -> impl Receiver> + 'static { + todo!() + // compiling_receiver(&*ARCHITECTURE, REWRITE_RULES.as_slice(), self.0) } fn rewrite( self, - result: CompilingReceiverResult<'static>, + result: CompilingReceiverResult<'static, A>, output: impl Receiver, ) { result.output.borrow_ntk().send(output); @@ -163,18 +176,20 @@ struct CompilerStatistics { #[no_mangle] extern "C" fn fcdram_compile(settings: CompilerSettings) -> MigReceiverFFI { - let receiver = - compiling_receiver(&*&ARCHITECTURE, REWRITE_RULES.as_slice(), settings).map(|res| { - let graph = res.output.borrow_graph(); - CompilerStatistics { - egraph_classes: graph.number_of_classes() as u64, - egraph_nodes: graph.total_number_of_nodes() as u64, - egraph_size: graph.total_size() as u64, - instruction_count: res.output.borrow_program().instructions.len() as u64, - t_runner: res.t_runner as u64, - t_extractor: res.t_extractor as u64, - t_compiler: res.t_compiler as u64, - } - }); - MigReceiverFFI::new(receiver) + todo!() + // TODO: create example `ARCHITECTURE` implementing `FCDRAMArchitecture` + // let receiver = + // compiling_receiver(&*&ARCHITECTURE, REWRITE_RULES.as_slice(), settings).map(|res| { + // let graph = res.output.borrow_graph(); + // CompilerStatistics { + // egraph_classes: graph.number_of_classes() as u64, + // egraph_nodes: graph.total_number_of_nodes() as u64, + // egraph_size: graph.total_size() as u64, + // instruction_count: res.output.borrow_program().instructions.len() as u64, + // t_runner: res.t_runner as u64, + // t_extractor: res.t_extractor as u64, + // t_compiler: res.t_compiler as u64, + // } + // }); + // MigReceiverFFI::new(receiver) } diff --git a/rs/src/fc_dram/optimization.rs b/rs/src/fc_dram/optimization.rs index ae4609a..ed341d6 100644 --- a/rs/src/fc_dram/optimization.rs +++ b/rs/src/fc_dram/optimization.rs @@ -1,233 +1,26 @@ -use super::{Program, RowOperand}; -use crate::ambit::program::{Address, BitwiseAddress, Instruction}; +use crate::fc_dram::architecture::{RowAddress, Instruction}; use rustc_hash::FxHashSet; +use super::{architecture::FCDRAMArchitecture, program::Program}; + pub fn optimize(program: &mut Program) { if program.instructions.len() == 0 { return; } let mut opt = Optimization { program }; - opt.dead_code_elimination(); - opt.merge_aap(); - opt.merge_ap_aap(); + // TODO: perform optimizations ! } pub struct Optimization<'p, 'a> { program: &'p mut Program<'a>, } -impl Optimization<'_, '_> { +// TODO: manual optimizations? +impl Optimization<'_, '_,> { + /// TODO: perform some basic compiler-optimization like dead_code_elimination? or will this + /// already be done by the MLIR dialect? fn dead_code_elimination(&mut self) { - let liveness = row_liveness(self.program); - let mut i = 0; - let mut liveness_i = 0; - 'outer: while liveness_i < liveness.len() - 1 { - let instruction = self.program.instructions[i]; - let liveness_after = &liveness[liveness_i + 1]; - - // if no overridden row is live after this instruction, we can remove the instruction - for row in instruction.overridden_rows(self.program.architecture) { - if matches!(row, RowOperand::Out(_)) || liveness_after.contains(&row) { - i += 1; - liveness_i += 1; - continue 'outer; - } - } - - liveness_i += 1; - self.program.instructions.remove(i); - } - } - - fn merge_aap(&mut self) { - let instructions = &mut self.program.instructions; - let mut i = 0; - while i < instructions.len() { - let instruction = instructions[i]; - - // let's just handle the simple case where we copy from a single row activation to some - // new bitwise operand - let Instruction::AAP(from_address, to) = instruction else { - i += 1; - continue; - }; - let Some(from) = from_address.as_single_row() else { - i += 1; - continue; - }; - let Address::Bitwise(BitwiseAddress::Single(to)) = to else { - i += 1; - continue; - }; - - let mut target_addresses = FxHashSet::default(); - target_addresses.insert(to); - instructions.remove(i); - - // search for other operand copies from the copied-from address - // note that we can only replace these copies if the target operand is neither read from - // nor written to between the current instruction and the candidate instruction - let mut candidate_i = i; - let mut used_rows = FxHashSet::default(); - while candidate_i < instructions.len() { - let candidate = instructions[candidate_i]; - if candidate - .overridden_rows(self.program.architecture) - .any(|row| row == from.row()) - { - break; - } - let target = || { - let Instruction::AAP(candidate_from, candidate_to) = candidate else { - return None; - }; - let Address::Bitwise(BitwiseAddress::Single(candidate_to)) = candidate_to - else { - return None; - }; - if from_address == candidate_from - && !used_rows.contains(&RowOperand::Bitwise(candidate_to.row())) - { - Some(candidate_to) - } else { - None - } - }; - let target = target(); - used_rows.extend( - candidate - .used_addresses(self.program.architecture) - .map(|addr| addr.row()), - ); - match target { - Some(to) => { - target_addresses.insert(to); - instructions.remove(candidate_i); - } - None => { - candidate_i += 1; - } - } - } - - // now let's find a covering of the operators and replace the instructions accordingly - let mut copied = FxHashSet::default(); - for (activation_i, multi_activation) in self - .program - .architecture - .multi_activations - .iter() - .enumerate() - { - if multi_activation - .iter() - .any(|op| !target_addresses.contains(op)) - { - continue; - } - // TODO: not all such activation may be necessary - copied.extend(multi_activation.iter().copied()); - instructions.insert( - i, - Instruction::AAP( - from.into(), - Address::Bitwise(BitwiseAddress::Multiple(activation_i)), - ), - ); - i += 1; - } - for op in target_addresses.difference(&copied) { - instructions.insert( - i, - Instruction::AAP(from.into(), Address::Bitwise(BitwiseAddress::Single(*op))), - ); - i += 1; - } - } - } - - fn merge_ap_aap(&mut self) { - let instructions = &mut self.program.instructions; - let mut i = 0; - - 'outer: while i < instructions.len() { - let instruction = instructions[i]; - let Instruction::AP(address) = instruction else { - i += 1; - continue; - }; - let Address::Bitwise(BitwiseAddress::Multiple(mult_i)) = address else { - i += 1; - continue; - }; - let mut operands = self.program.architecture.multi_activations[mult_i].clone(); - let mut used_rows = FxHashSet::default(); - for candidate_i in i + 1..instructions.len() { - if operands.is_empty() { - break; - } - let candidate = instructions[candidate_i]; - - if let Instruction::AAP(Address::Bitwise(BitwiseAddress::Single(operand)), target) = - candidate - { - if target - .row_addresses(self.program.architecture) - .any(|addr| used_rows.contains(&addr.row())) - && operands.contains(&operand) - { - instructions[i] = Instruction::AAP(address, target); - instructions.remove(candidate_i); - i += 1; - continue 'outer; - } - } - - used_rows.extend( - candidate - .used_addresses(self.program.architecture) - .map(|add| add.row()), - ); - for row in candidate.overridden_rows(self.program.architecture) { - let mut i = 0; - while i < operands.len() { - if RowOperand::Bitwise(operands[i].row()) == row { - operands.swap_remove(i); - } else { - i += 1; - } - } - } - } - i += 1; - } + todo!() } } -/// Returns a vector of the same length as the program where the entry at the index of an -/// instruction the set at that index contains the rows that are live just before the instruction -fn row_liveness(program: &Program) -> Vec> { - let mut result = Vec::with_capacity(program.instructions.len()); - - let mut currently_live = FxHashSet::default(); - for instruction in program.instructions.iter().rev() { - let mut is_live = false; - for row in instruction.overridden_rows(program.architecture) { - if matches!(row, RowOperand::Out(_)) || currently_live.contains(&row) { - is_live = true; - } - currently_live.remove(&row); - } - if is_live { - currently_live.extend( - instruction - .input_operands(program.architecture) - .map(|addr| addr.row()), - ); - } - result.push(currently_live.clone()); - } - - result.reverse(); - result -} diff --git a/rs/src/fc_dram/program.rs b/rs/src/fc_dram/program.rs index f8954a1..f4102d1 100644 --- a/rs/src/fc_dram/program.rs +++ b/rs/src/fc_dram/program.rs @@ -1,128 +1,44 @@ -use super::{Architecture, BitwiseOperand, BitwiseRow, RowOperand, Rows}; +//! Functionality for generating actual program using architecture defined in [`architecture`] by +//! compiling given logic-network (see [`compilation`]) and potentially adding some manual +//! optimizations ([`optimization`]) +use super::architecture::{FCDRAMArchitecture, RowAddress}; +use crate::fc_dram::architecture::Instruction; use eggmock::{Id, Mig, ProviderWithBackwardEdges, Signal}; use std::fmt::{Display, Formatter}; use std::ops::{Deref, DerefMut}; -/// Instructions which operate on DRAM-Rows -/// TODO: adjust instructions to FC-DRAM?! -#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] -pub enum Instruction { - /// TODO: What does this instr do? - /// TODO: which row is operand, which one is a result? - AAP(RowOperand, RowOperand), - /// TODO: What does this instr do? - AP(RowOperand), - /// Multiple-Row Activation: `ACT R_F -> PRE -> ACT R_L -> PRE` of rows `R_F`,`R_L` for rows within - /// different subarrays. As a result `R_L` holds the negated value of `R_F` - /// Used to implement NOT directly - APAP(RowOperand,RowOperand), -} #[derive(Debug, Clone)] pub struct Program<'a> { - pub architecture: &'a Architecture, + pub architecture: &'a A, pub instructions: Vec, } #[derive(Debug, Clone)] pub struct ProgramState<'a> { - program: Program<'a>, + program: Program<'a, A>, /// currently used rows - rows: Rows<'a>, + rows: Vec, } -impl<'a> Program<'a> { - pub fn new(architecture: &'a Architecture, instructions: Vec) -> Self { +impl<'a> Program<'a, A> { + pub fn new(instructions: Vec) -> Self { Self { - architecture, instructions, } } } -impl Instruction { - /// Return Addreses of Rows which are used by this instruction (=operand-rows AND result-row) - pub fn used_addresses<'a>( - &self, - architecture: &'a Architecture, - ) -> impl Iterator + 'a { - let from = match self { - Instruction::AAP(from, _) => from, - Instruction::AP(op) => op, - } - .row_addresses(architecture); - let to = match self { - Instruction::AAP(_, to) => Some(*to), - _ => None, - } - .into_iter() - .flat_map(|addr| addr.row_addresses(architecture)); - from.chain(to) - } - - /// Return addresses of operand-rows - pub fn input_operands<'a>( - &self, - architecture: &'a Architecture, - ) -> impl Iterator + 'a { - let from = match self { - Instruction::AAP(from, _) => from, - Instruction::AP(op) => op, - }; - from.row_addresses(architecture) - } - - pub fn overridden_rows<'a>( - &self, - architecture: &'a Architecture, - ) -> impl Iterator + 'a { - let first = match self { - Instruction::AP(a) => a, - Instruction::AAP(a, _) => a, - } - .clone(); - let first = match first { - Address::Bitwise(BitwiseAddress::Multiple(idx)) => { - architecture.multi_activations[idx].as_slice() - } - _ => &[], - } - .iter() - .map(|op| RowOperand::Bitwise(op.row())); - - let second = match self { - Instruction::AP(_) => None, - Instruction::AAP(_, a) => Some(a.row_addresses(architecture).map(|addr| addr.row())), - } - .into_iter() - .flatten(); - - first.chain(second) - } -} - impl<'a> ProgramState<'a> { pub fn new( - architecture: &'a Architecture, network: &impl ProviderWithBackwardEdges, ) -> Self { Self { - program: Program::new(architecture, Vec::new()), - rows: Rows::new(network, architecture), + program: Program::new(Vec::new()), + rows: vec!(), } } - pub fn maj(&mut self, op: usize, out_signal: Signal, out_address: Option
) { - let operands = &self.architecture.multi_activations[op]; - for operand in operands { - self.set_signal(RowAddress::Bitwise(*operand), out_signal); - } - let instruction = match out_address { - Some(out) => Instruction::AAP(BitwiseAddress::Multiple(op).into(), out), - None => Instruction::AP(BitwiseAddress::Multiple(op).into()), - }; - self.instructions.push(instruction) - } /// TODO: Does FC-DRAM need copying of signals? /// pub fn signal_copy(&mut self, signal: Signal, target: RowAddress, intermediate_dcc: u8) { @@ -134,102 +50,42 @@ impl<'a> ProgramState<'a> { /// **ALWAYS** call this before inserting the actual instruction, otherwise the spill code will /// spill the wrong value fn set_signal(&mut self, address: RowAddress, signal: Signal) { - if let Some(previous_signal) = self.rows.set_signal(address, signal) { - if !self.rows.contains_id(previous_signal.node_id()) { - let spill_id = self.rows.add_spill(previous_signal); - self.instructions - .push(Instruction::AAP(address.into(), Address::Spill(spill_id))); - } - } + todo!() } + /// return rows which are currently unused (so they can be used for operations to come) pub fn free_id_rows(&mut self, id: Id) { - self.rows.free_id_rows(id); + todo!() } - pub fn rows(&self) -> &Rows { + pub fn rows(&self) -> &Vec { &self.rows } } impl<'a> Deref for ProgramState<'a> { - type Target = Program<'a>; + type Target = Program<'a, A>; fn deref(&self) -> &Self::Target { &self.program } } -impl DerefMut for ProgramState<'_> { +impl DerefMut for ProgramState<'_,> { fn deref_mut(&mut self) -> &mut Self::Target { &mut self.program } } impl<'a> From> for Program<'a> { - fn from(value: ProgramState<'a>) -> Self { + fn from(value: ProgramState<'a, A>) -> Self { value.program } } /// Print the generated program in human-readable form -impl Display for Program<'_> { +impl DRAMArchitecture> Display for Program<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - let write_operand = |f: &mut Formatter<'_>, o: &BitwiseOperand| -> std::fmt::Result { - match o { - BitwiseOperand::T(t) => write!(f, "T{}", t), - BitwiseOperand::DCC { inverted, index } => { - if *inverted { - write!(f, "~DCC{}", index) - } else { - write!(f, "DCC{}", index) - } - } - } - }; - let write_address = |f: &mut Formatter<'_>, a: &Address| -> std::fmt::Result { - match a { - Address::In(i) => write!(f, "I{}", i), - Address::Out(i) => write!(f, "O{}", i), - Address::Spill(i) => write!(f, "S{}", i), - Address::Const(c) => write!(f, "C{}", if *c { "1" } else { "0" }), - Address::Bitwise(b) => match b { - BitwiseAddress::Single(o) => write_operand(f, o), - BitwiseAddress::Multiple(id) => { - let operands = &self.architecture.multi_activations[*id]; - for i in 0..operands.len() { - if i == 0 { - write!(f, "[")?; - } else { - write!(f, ", ")?; - } - write_operand(f, &operands[i])?; - if i == operands.len() - 1 { - write!(f, "]")?; - } - } - Ok(()) - } - }, - } - }; - - for instruction in &self.instructions { - match instruction { - Instruction::AAP(a, b) => { - write!(f, "AAP ")?; - write_address(f, a)?; - write!(f, " ")?; - write_address(f, b)?; - writeln!(f)?; - } - Instruction::AP(a) => { - write!(f, "AP ")?; - write_address(f, a)?; - writeln!(f)?; - } - } - } - Ok(()) + todo!() } } From 26a6e3ba6fd7e452ee5e5125142946ec73b8fc43 Mon Sep 17 00:00:00 2001 From: alku662e Date: Wed, 7 May 2025 21:58:49 +0200 Subject: [PATCH 04/51] WIP: Code changes finally compile --- rs/src/fc_dram/architecture.rs | 8 +- rs/src/fc_dram/compilation.rs | 110 +++++++++++------------ rs/src/fc_dram/extraction.rs | 155 ++++++++++++--------------------- rs/src/fc_dram/mod.rs | 78 +++++++++-------- rs/src/fc_dram/optimization.rs | 7 +- rs/src/fc_dram/program.rs | 23 +++-- 6 files changed, 177 insertions(+), 204 deletions(-) diff --git a/rs/src/fc_dram/architecture.rs b/rs/src/fc_dram/architecture.rs index 9e4c752..00bcd92 100644 --- a/rs/src/fc_dram/architecture.rs +++ b/rs/src/fc_dram/architecture.rs @@ -81,9 +81,10 @@ impl Instruction { /// Return Addreses of Rows which are used by this instruction (=operand-rows AND result-row) /// - REMINDER: although only two row-operands are given to `APA`, more rows can be/are affected due to *Simultaneous Row Activation* (see [3]) /// TODO - pub fn used_addresses<'a>( + pub fn used_addresses( &self, - ) -> impl Iterator + 'a { + ) -> Vec { + // ) -> impl Iterator { todo!() // let from = match self { // Instruction::AAP(from, _) => from, @@ -103,7 +104,8 @@ impl Instruction { /// TODO pub fn overridden_rows<'a>( &self, - ) -> impl Iterator + 'a { + ) -> Vec { + // ) -> impl Iterator { todo!() } } diff --git a/rs/src/fc_dram/compilation.rs b/rs/src/fc_dram/compilation.rs index bdebd94..e56461e 100644 --- a/rs/src/fc_dram/compilation.rs +++ b/rs/src/fc_dram/compilation.rs @@ -10,9 +10,10 @@ use rustc_hash::{FxHashMap, FxHashSet}; use std::cmp::max; /// Compiles given `network` to a program that can be run on given `architecture` -pub fn compile<'a>( +pub fn compile( network: &impl ProviderWithBackwardEdges, -) -> Program<'a, A> { +) -> Program { + todo!() // let mut state = CompilationState::new(architecture, network); // let mut max_cand_size = 0; // TODO: unused? // while !state.candidates.is_empty() { @@ -65,17 +66,17 @@ pub fn compile<'a>( // program } -pub struct CompilationState<'a, 'n, P> { +pub struct CompilationState<'n, P> { /// Network (P=Provider, obsolte naming) network: &'n P, candidates: FxHashSet<(Id, Mig)>, // TODO: probably change to `Aig` ? - program: ProgramState<'a>, + program: ProgramState, outputs: FxHashMap, leftover_use_count: FxHashMap, } -impl<'a, 'n, P: ProviderWithBackwardEdges> CompilationState<'a, 'n, P> { +impl<'n, P: ProviderWithBackwardEdges> CompilationState<'n, P> { pub fn new(network: &'n P) -> Self { let mut candidates = FxHashSet::default(); // check all parents of leaves whether they have only leaf children, in which case they are @@ -128,54 +129,55 @@ impl<'a, 'n, P: ProviderWithBackwardEdges> CompilationState<'a, 'n, signals: &mut [Signal; 3], operands: &[RowAddress; 3], ) -> ([bool; 3], usize) { - let signals_with_idx = { - let mut i = 0; - signals.map(|signal| { - i += 1; - (signal, i - 1) - }) - }; - let operand_signals = operands.map(|op| self.program.rows().get_operand_signal(op)); - - // reorder signals by how often their signal is already available in an operand - let mut signals_with_matches = signals_with_idx.map(|(s, i)| { - ( - s, - i, - operand_signals - .iter() - .filter(|sig| **sig == Some(s)) - .count(), - ) - }); - signals_with_matches.sort_by(|a, b| a.2.cmp(&b.2)); - - // then we can assign places one by one and get an optimal mapping (probably, proof by - // intuition only) - - // contains for each operand index whether the signal at that position is already the - // correct one - let mut result = [false; 3]; - // contains the mapping of old signal index to operand index - let mut new_positions = [0usize, 1, 2]; - // contains the number of assigned signals (i.e. #true in result) - let mut assigned_signals = 0; - - for (signal, signal_idx, _) in signals_with_matches { - // find operand index for that signal - let Some((target_idx, _)) = operand_signals - .iter() - .enumerate() - .find(|(idx, sig)| **sig == Some(signal) && !result[*idx]) - else { - continue; - }; - result[target_idx] = true; - let new_idx = new_positions[signal_idx]; - signals.swap(target_idx, new_idx); - new_positions.swap(target_idx, new_idx); - assigned_signals += 1; - } - (result, assigned_signals) + todo!() + // let signals_with_idx = { + // let mut i = 0; + // signals.map(|signal| { + // i += 1; + // (signal, i - 1) + // }) + // }; + // let operand_signals = operands.map(|op| self.program.rows().get_operand_signal(op)); + // + // // reorder signals by how often their signal is already available in an operand + // let mut signals_with_matches = signals_with_idx.map(|(s, i)| { + // ( + // s, + // i, + // operand_signals + // .iter() + // .filter(|sig| **sig == Some(s)) + // .count(), + // ) + // }); + // signals_with_matches.sort_by(|a, b| a.2.cmp(&b.2)); + // + // // then we can assign places one by one and get an optimal mapping (probably, proof by + // // intuition only) + // + // // contains for each operand index whether the signal at that position is already the + // // correct one + // let mut result = [false; 3]; + // // contains the mapping of old signal index to operand index + // let mut new_positions = [0usize, 1, 2]; + // // contains the number of assigned signals (i.e. #true in result) + // let mut assigned_signals = 0; + // + // for (signal, signal_idx, _) in signals_with_matches { + // // find operand index for that signal + // let Some((target_idx, _)) = operand_signals + // .iter() + // .enumerate() + // .find(|(idx, sig)| **sig == Some(signal) && !result[*idx]) + // else { + // continue; + // }; + // result[target_idx] = true; + // let new_idx = new_positions[signal_idx]; + // signals.swap(target_idx, new_idx); + // new_positions.swap(target_idx, new_idx); + // assigned_signals += 1; + // } + // (result, assigned_signals) } } diff --git a/rs/src/fc_dram/extraction.rs b/rs/src/fc_dram/extraction.rs index 803e412..7af6e46 100644 --- a/rs/src/fc_dram/extraction.rs +++ b/rs/src/fc_dram/extraction.rs @@ -12,60 +12,18 @@ use std::rc::Rc; use super::architecture::FCDRAMArchitecture; use super::compile; -pub struct CompilingCostFunction<'a> {} +pub struct CompilingCostFunction{} -impl StackedPartialGraph { - pub fn leaf(node: MigLanguage) -> Self { - Self { - nodes: Vec::new(), - first_free_id: 0, - root: node, - } - } - - pub fn new( - root: MigLanguage, - child_graphs: impl IntoIterator>, - ) -> Self { - let mut nodes = Vec::new(); - let mut first_free_id = 0; - for graph in child_graphs { - nodes.push(graph.nodes.clone()); - first_free_id = max(first_free_id, graph.first_free_id); - } - Self { - nodes, - first_free_id, - root, - } - } - - pub fn collapse(&self, real_id: Id) -> CollapsedPartialGraph { - let mut nodes: FxHashMap = FxHashMap::default(); - let first_free_id = max(self.first_free_id, usize::from(real_id)); - nodes.extend( - self.nodes - .iter() - .flat_map(|map| map.iter().map(|(id, node)| (*id, node.clone()))), - ); - nodes.insert(real_id, self.root.clone()); - CollapsedPartialGraph { - nodes: Rc::new(nodes), - first_free_id, - root_id: real_id, - } - } -} +// impl StackedPartialGraph { } // Do I need this?? /// TODO: add reliability as cost-metric #[derive(Debug)] pub struct CompilingCost { - partial: RefCell>>, - not_nesting: NotNesting, + // partial: RefCell>>, program_cost: usize, } -impl CostFunction for CompilingCostFunction<'_, A> { +impl CostFunction for CompilingCostFunction { type Cost = Rc; /// Compute cost of given `enode` @@ -101,8 +59,7 @@ impl CostFunction for CompilingCostFunction<'_, A> { impl CompilingCost { pub fn leaf(root: MigLanguage) -> Self { Self { - partial: RefCell::new(Either::Left(StackedPartialGraph::leaf(root))), - not_nesting: NotNesting::NotANot, + // partial: RefCell::new(Either::Left(StackedPartialGraph::leaf(root))), program_cost: 0, } } @@ -126,63 +83,65 @@ impl CompilingCost { } -impl StackedPartialGraph { - pub fn get_root_id(&self) -> Id { - Id::from(self.first_free_id + 1) - } -} - -impl Index for StackedPartialGraph { - type Output = MigLanguage; - - fn index(&self, index: Id) -> &Self::Output { - if index == self.get_root_id() { - &self.root - } else { - self.nodes.iter().filter_map(|m| m.get(&index)).next().unwrap() - } - } -} - -impl Provider for StackedPartialGraph { - type Node = Mig; - - fn outputs(&self) -> impl Iterator { - iter::once(self.to_signal(self.get_root_id())) - } - - fn node(&self, id: eggmock::Id) -> Self::Node { - self[Id::from(id)] - .to_node(|id| self.to_signal(id)) - .expect("id should point to a non-not node") - } -} +// impl StackedPartialGraph { +// pub fn get_root_id(&self) -> Id { +// Id::from(self.first_free_id + 1) +// } +// } +// +// impl Index for StackedPartialGraph { +// type Output = MigLanguage; +// +// fn index(&self, index: Id) -> &Self::Output { +// if index == self.get_root_id() { +// &self.root +// } else { +// self.nodes.iter().filter_map(|m| m.get(&index)).next().unwrap() +// } +// } +// } +// +// impl Provider for StackedPartialGraph { +// type Node = Mig; +// +// fn outputs(&self) -> impl Iterator { +// iter::once(self.to_signal(self.get_root_id())) +// } +// +// fn node(&self, id: eggmock::Id) -> Self::Node { +// self[Id::from(id)] +// .to_node(|id| self.to_signal(id)) +// .expect("id should point to a non-not node") +// } +// } impl PartialEq for CompilingCost { fn eq(&self, other: &Self) -> bool { - if other.not_nesting == NotNesting::NestedNots && self.not_nesting == NotNesting::NestedNots { - true - } else { - self.program_cost.eq(&other.program_cost) - } + todo!() + // if other.not_nesting == NotNesting::NestedNots && self.not_nesting == NotNesting::NestedNots { + // true + // } else { + // self.program_cost.eq(&other.program_cost) + // } } } impl PartialOrd for CompilingCost { fn partial_cmp(&self, other: &Self) -> Option { - #[allow(clippy::collapsible_else_if)] - if self.not_nesting == NotNesting::NestedNots { - if other.not_nesting == NotNesting::NestedNots { - Some(Ordering::Equal) - } else { - Some(Ordering::Greater) - } - } else { - if other.not_nesting == NotNesting::NestedNots { - Some(Ordering::Less) - } else { - self.program_cost.partial_cmp(&other.program_cost) - } - } + todo!() + // #[allow(clippy::collapsible_else_if)] + // if self.not_nesting == NotNesting::NestedNots { + // if other.not_nesting == NotNesting::NestedNots { + // Some(Ordering::Equal) + // } else { + // Some(Ordering::Greater) + // } + // } else { + // if other.not_nesting == NotNesting::NestedNots { + // Some(Ordering::Less) + // } else { + // self.program_cost.partial_cmp(&other.program_cost) + // } + // } } } diff --git a/rs/src/fc_dram/mod.rs b/rs/src/fc_dram/mod.rs index ec419b8..d82713c 100644 --- a/rs/src/fc_dram/mod.rs +++ b/rs/src/fc_dram/mod.rs @@ -10,14 +10,11 @@ //! architecture) used in FC-DRAM //! - [`compilation`] - compiles given LogicNetwork for FC-DRAM architecture //! - [`generator`] — Generates output code or reports based on analysis. -//! - [`implementation_example`] - example implementation of a FCDRAM-Architecture and how to use -//! it mod compilation; mod extraction; mod optimization; mod program; mod architecture; -mod implementation_example; use std::sync::LazyLock; use std::time::Instant; @@ -34,6 +31,7 @@ use program::*; use architecture::*; /// Rewrite rules to use in E-Graph Rewriting (see [egg](https://egraphs-good.github.io/)) +/// TODO: adjust rewriting rules to FCDRAM static REWRITE_RULES: LazyLock>> = LazyLock::new(|| { let mut rules = vec![ rewrite!("commute_1"; "(maj ?a ?b ?c)" => "(maj ?b ?a ?c)"), @@ -50,35 +48,46 @@ static REWRITE_RULES: LazyLock>> = LazyLock::new(|| /// Store compilation output and timing statistics how long compilation stages took /// TODO: unit of t? sec or ms? -struct CompilingReceiverResult<'a> { - output: CompilerOutput<'a>, +struct CompilingReceiverResult { + /// Actual compilation result + output: CompilerOutput, + /// Statistics about compilation t_runner: u128, t_extractor: u128, t_compiler: u128, } -// #[ouroboros::self_referencing] -struct CompilerOutput<'a> { +/// Compilation result (program + E-Graph) +#[ouroboros::self_referencing] +struct CompilerOutput { + /// Result E-Graph graph: EGraph, - // #[borrows(graph)] - // #[covariant] + #[borrows(graph)] + #[covariant] ntk: ( - Extractor<'this, CompilingCostFunction<'a>, MigLanguage, ()>, + Extractor<'this, CompilingCostFunction, MigLanguage, ()>, Vec, ), - // #[borrows(ntk)] - program: Program<'a, A>, + /// Compiled Program + #[borrows(ntk)] + program: Program, } +/// Initiates compilation and prints compilation-statistics fn compiling_receiver<'a>( rules: &'a [Rewrite], settings: CompilerSettings, -) -> impl Receiver, Node = Mig> + 'a { +) -> impl Receiver + use<'a> { + // REMINDER: EGraph implements `Receiver` EGraph::::new(()).map(move |(graph, outputs)| { let t_runner = std::time::Instant::now(); + + // run equivalence saturation let runner = Runner::default().with_egraph(graph).run(rules); + let t_runner = t_runner.elapsed().as_millis(); + if settings.verbose { println!("== Runner Report"); runner.print_report(); @@ -101,7 +110,7 @@ fn compiling_receiver<'a>( }, |ntk| { let start_time = Instant::now(); - let program = compile(architecture, &ntk.with_backward_edges()); + let program = compile(&ntk.with_backward_edges()); // actual compilation !! t_compiler = start_time.elapsed().as_millis(); if settings.print_program || settings.verbose { if settings.verbose { @@ -138,18 +147,19 @@ struct FCDramRewriter(CompilerSettings); impl Rewriter for FCDramRewriter { type Node = Mig; - type Intermediate = CompilingReceiverResult<'static, A>; + type Intermediate = CompilingReceiverResult; fn create_receiver( &mut self, - ) -> impl Receiver> + 'static { - todo!() - // compiling_receiver(&*ARCHITECTURE, REWRITE_RULES.as_slice(), self.0) + ) -> impl Receiver + 'static { + + // todo!() + compiling_receiver(REWRITE_RULES.as_slice(), self.0) } fn rewrite( self, - result: CompilingReceiverResult<'static, A>, + result: CompilingReceiverResult, output: impl Receiver, ) { result.output.borrow_ntk().send(output); @@ -176,20 +186,20 @@ struct CompilerStatistics { #[no_mangle] extern "C" fn fcdram_compile(settings: CompilerSettings) -> MigReceiverFFI { - todo!() + // todo!() // TODO: create example `ARCHITECTURE` implementing `FCDRAMArchitecture` - // let receiver = - // compiling_receiver(&*&ARCHITECTURE, REWRITE_RULES.as_slice(), settings).map(|res| { - // let graph = res.output.borrow_graph(); - // CompilerStatistics { - // egraph_classes: graph.number_of_classes() as u64, - // egraph_nodes: graph.total_number_of_nodes() as u64, - // egraph_size: graph.total_size() as u64, - // instruction_count: res.output.borrow_program().instructions.len() as u64, - // t_runner: res.t_runner as u64, - // t_extractor: res.t_extractor as u64, - // t_compiler: res.t_compiler as u64, - // } - // }); - // MigReceiverFFI::new(receiver) + let receiver = + compiling_receiver(REWRITE_RULES.as_slice(), settings).map(|res| { + let graph = res.output.borrow_graph(); + CompilerStatistics { + egraph_classes: graph.number_of_classes() as u64, + egraph_nodes: graph.total_number_of_nodes() as u64, + egraph_size: graph.total_size() as u64, + instruction_count: res.output.borrow_program().instructions.len() as u64, + t_runner: res.t_runner as u64, + t_extractor: res.t_extractor as u64, + t_compiler: res.t_compiler as u64, + } + }); + MigReceiverFFI::new(receiver) } diff --git a/rs/src/fc_dram/optimization.rs b/rs/src/fc_dram/optimization.rs index ed341d6..ca28a8c 100644 --- a/rs/src/fc_dram/optimization.rs +++ b/rs/src/fc_dram/optimization.rs @@ -1,3 +1,4 @@ +//! Some manual optimizations (LOWEST PRIORITY) use crate::fc_dram::architecture::{RowAddress, Instruction}; use rustc_hash::FxHashSet; @@ -11,12 +12,12 @@ pub fn optimize(program: &mut Program) { // TODO: perform optimizations ! } -pub struct Optimization<'p, 'a> { - program: &'p mut Program<'a>, +pub struct Optimization<'p> { + program: &'p mut Program, } // TODO: manual optimizations? -impl Optimization<'_, '_,> { +impl Optimization<'_> { /// TODO: perform some basic compiler-optimization like dead_code_elimination? or will this /// already be done by the MLIR dialect? fn dead_code_elimination(&mut self) { diff --git a/rs/src/fc_dram/program.rs b/rs/src/fc_dram/program.rs index f4102d1..465ee74 100644 --- a/rs/src/fc_dram/program.rs +++ b/rs/src/fc_dram/program.rs @@ -9,19 +9,18 @@ use std::ops::{Deref, DerefMut}; #[derive(Debug, Clone)] -pub struct Program<'a> { - pub architecture: &'a A, +pub struct Program { pub instructions: Vec, } #[derive(Debug, Clone)] -pub struct ProgramState<'a> { - program: Program<'a, A>, +pub struct ProgramState { + program: Program, /// currently used rows rows: Vec, } -impl<'a> Program<'a, A> { +impl Program { pub fn new(instructions: Vec) -> Self { Self { instructions, @@ -29,7 +28,7 @@ impl<'a> Program<'a, A> { } } -impl<'a> ProgramState<'a> { +impl ProgramState { pub fn new( network: &impl ProviderWithBackwardEdges, ) -> Self { @@ -63,28 +62,28 @@ impl<'a> ProgramState<'a> { } } -impl<'a> Deref for ProgramState<'a> { - type Target = Program<'a, A>; +impl Deref for ProgramState { + type Target = Program; fn deref(&self) -> &Self::Target { &self.program } } -impl DerefMut for ProgramState<'_,> { +impl DerefMut for ProgramState { fn deref_mut(&mut self) -> &mut Self::Target { &mut self.program } } -impl<'a> From> for Program<'a> { - fn from(value: ProgramState<'a, A>) -> Self { +impl From for Program { + fn from(value: ProgramState) -> Self { value.program } } /// Print the generated program in human-readable form -impl DRAMArchitecture> Display for Program<'_> { +impl Display for Program { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { todo!() } From 10d5c12c2195be93573ce1dac71ea3641bf16a29 Mon Sep 17 00:00:00 2001 From: alku662e Date: Mon, 12 May 2025 16:01:08 +0200 Subject: [PATCH 05/51] :construction: WIP: Added rewrite rules for 'and' --- in.dot | 25 ++++++++ rs/Cargo.lock | 77 ++++++++++++------------ rs/Cargo.toml | 7 ++- rs/src/ambit/mod.rs | 18 ------ rs/src/fc_dram/architecture.rs | 51 ++++++++++++---- rs/src/fc_dram/compilation.rs | 12 ++-- rs/src/fc_dram/extraction.rs | 94 +++++++---------------------- rs/src/fc_dram/mod.rs | 78 +++++++++++++++--------- rs/src/fc_dram/program.rs | 4 +- rs/src/fc_dram_bottom_up/mod.rs | 103 ++++++++++++++++++++++++++++++++ rs/src/lib.rs | 1 + src/fcdram.h | 5 +- src/main.cpp | 19 +++--- 13 files changed, 305 insertions(+), 189 deletions(-) create mode 100644 in.dot create mode 100644 rs/src/fc_dram_bottom_up/mod.rs diff --git a/in.dot b/in.dot new file mode 100644 index 0000000..a184655 --- /dev/null +++ b/in.dot @@ -0,0 +1,25 @@ +digraph { +rankdir=BT; +0 [label="0",shape=box,style=filled,fillcolor=snow2] +1 [label="1",shape=triangle,style=filled,fillcolor=snow2] +2 [label="2",shape=triangle,style=filled,fillcolor=snow2] +3 [label="3",shape=triangle,style=filled,fillcolor=snow2] +4 [label="4",shape=ellipse,style=filled,fillcolor=white] +5 [label="5",shape=ellipse,style=filled,fillcolor=white] +6 [label="6",shape=ellipse,style=filled,fillcolor=white] +po0 [shape=invtriangle,style=filled,fillcolor=snow2] +0 -> 4 [style=solid] +2 -> 4 [style=solid] +3 -> 4 [style=solid] +0 -> 5 [style=solid] +1 -> 5 [style=solid] +3 -> 5 [style=dashed] +0 -> 6 [style=dashed] +4 -> 6 [style=solid] +5 -> 6 [style=solid] +6 -> po0 [style=solid] +{rank = same; 0; 1; 2; 3; } +{rank = same; 4; 5; } +{rank = same; 6; } +{rank = same; po0; } +} diff --git a/rs/Cargo.lock b/rs/Cargo.lock index e2952a9..1cd3f88 100644 --- a/rs/Cargo.lock +++ b/rs/Cargo.lock @@ -22,15 +22,15 @@ checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "bitflags" -version = "2.8.0" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" +checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" [[package]] name = "bumpalo" -version = "3.16.0" +version = "3.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" +checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" [[package]] name = "cfg-if" @@ -79,9 +79,9 @@ dependencies = [ [[package]] name = "either" -version = "1.13.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" [[package]] name = "env_logger" @@ -94,21 +94,21 @@ dependencies = [ [[package]] name = "equivalent" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "foldhash" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0d2fde1f7b3d48b8395d5f2de76c18a528bd6a9cdde438df747bfcba3e05d6f" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" [[package]] name = "hashbrown" -version = "0.15.2" +version = "0.15.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" +checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3" dependencies = [ "allocator-api2", "equivalent", @@ -123,9 +123,9 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "indexmap" -version = "2.7.0" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62f822373a4fe84d4bb149bf54e584a7f4abec90e072ed49cda0edea5b95471f" +checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" dependencies = [ "equivalent", "hashbrown", @@ -133,9 +133,9 @@ dependencies = [ [[package]] name = "indoc" -version = "2.0.5" +version = "2.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" +checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd" [[package]] name = "js-sys" @@ -149,9 +149,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.169" +version = "0.2.172" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" +checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" [[package]] name = "lime-rs" @@ -159,6 +159,7 @@ version = "0.1.0" dependencies = [ "eggmock", "either", + "log", "ouroboros", "rustc-hash", "smallvec", @@ -166,9 +167,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.25" +version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" [[package]] name = "num-bigint" @@ -200,9 +201,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.20.2" +version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "ouroboros" @@ -236,9 +237,9 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" [[package]] name = "proc-macro2" -version = "1.0.93" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" dependencies = [ "unicode-ident", ] @@ -273,27 +274,27 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.38" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ "proc-macro2", ] [[package]] name = "raw-cpuid" -version = "11.3.0" +version = "11.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6928fa44c097620b706542d428957635951bade7143269085389d42c8a4927e" +checksum = "c6df7ab838ed27997ba19a4664507e6f82b41fe6e20be42929332156e5e85146" dependencies = [ "bitflags", ] [[package]] name = "rustc-hash" -version = "2.1.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7fb8039b3032c191086b10f11f319a6e99e1e82889c5cc6046f515c9db1d497" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" [[package]] name = "saturating" @@ -303,15 +304,15 @@ checksum = "ece8e78b2f38ec51c51f5d475df0a7187ba5111b2a28bdc761ee05b075d40a71" [[package]] name = "seq-macro" -version = "0.3.5" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" +checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc" [[package]] name = "smallvec" -version = "1.14.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd" +checksum = "8917285742e9f3e1683f0a9c4e6b57960b7314d0b08d30d1ecd426713ee2eee9" [[package]] name = "static_assertions" @@ -338,9 +339,9 @@ checksum = "7c68d531d83ec6c531150584c42a4290911964d5f0d79132b193b67252a23b71" [[package]] name = "syn" -version = "2.0.96" +version = "2.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5d0adab1ae378d7f53bdebc67a39f1f151407ef230f0ce2883572f5d8985c80" +checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" dependencies = [ "proc-macro2", "quote", @@ -369,9 +370,9 @@ dependencies = [ [[package]] name = "unicode-ident" -version = "1.0.14" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" [[package]] name = "version_check" diff --git a/rs/Cargo.toml b/rs/Cargo.toml index 98b498f..ba08b87 100644 --- a/rs/Cargo.toml +++ b/rs/Cargo.toml @@ -7,11 +7,14 @@ edition = "2021" crate-type = ["cdylib"] [dependencies] -eggmock = { path = "../../eggmock" } +# eggmock = { path = "../../eggmock" } +eggmock = { path = "../../eggmock-fork" } rustc-hash = "2.1.0" either = "1.13.0" smallvec = "1.14.0" ouroboros = "0.18.0" +log = "0.4" [build-dependencies] -eggmock = { path = "../../eggmock" } \ No newline at end of file +# eggmock = { path = "../../eggmock" } +eggmock = { path = "../../eggmock-fork" } diff --git a/rs/src/ambit/mod.rs b/rs/src/ambit/mod.rs index 230f9f0..1a47507 100644 --- a/rs/src/ambit/mod.rs +++ b/rs/src/ambit/mod.rs @@ -264,21 +264,3 @@ extern "C" fn ambit_compile(settings: CompilerSettings) -> MigReceiverFFI MigReceiverFFI { - let receiver = - compiling_receiver(&*&ARCHITECTURE, REWRITE_RULES.as_slice(), settings).map(|res| { - let graph = res.output.borrow_graph(); - CompilerStatistics { - egraph_classes: graph.number_of_classes() as u64, - egraph_nodes: graph.total_number_of_nodes() as u64, - egraph_size: graph.total_size() as u64, - instruction_count: res.output.borrow_program().instructions.len() as u64, - t_runner: res.t_runner as u64, - t_extractor: res.t_extractor as u64, - t_compiler: res.t_compiler as u64, - } - }); - MigReceiverFFI::new(receiver) -} diff --git a/rs/src/fc_dram/architecture.rs b/rs/src/fc_dram/architecture.rs index 00bcd92..0c79a8a 100644 --- a/rs/src/fc_dram/architecture.rs +++ b/rs/src/fc_dram/architecture.rs @@ -2,17 +2,16 @@ //! - [`FCDRAMArchitecture`] = trait which needs to be implemented for your DRAM-module //! - [`Instruction`] = contains all instructions supported by FC-DRAM architecture //! - [ ] `RowAddress`: utility functions to get subarray-id and row-addr within that subarray from -//! - [ ] ->create `pub struct Architecture` //! RowAddress (eg via bit-shifting given bitmasks for subarray-id & row-addr to put on-top of //! RowAddress - -/// TODO: merge `rows.rs` with `mod.rs` and move into `arch.rs` -use eggmock::{Id, Mig, ProviderWithBackwardEdges, Signal}; +use eggmock::{Id, Aig, ProviderWithBackwardEdges, Signal}; use rustc_hash::FxHashMap; use std::collections::hash_map::Entry; use std::fmt::{Display, Formatter}; +pub type RowAddress = u64; + /// Implement this trait for your specific DRAM-module to support FCDRAM-functionality /// - contains the mapping of logical-ops to FCDRAM-Architecture (see /// [`FCDRAMArchitecture::get_instructions_implementation_of_logic_ops`] @@ -21,13 +20,9 @@ use std::fmt::{Display, Formatter}; /// /// - add trait-bound to a more general `Architecture`-trait to fit in the overall framework? pub trait FCDRAMArchitecture { - /// Returns vector of simultaneously activated rows when issuing `APA(r1,r2)`-cmd - /// NOTE: this may depend on the used DRAM - see [3] for a method for reverse-engineering - /// which rows are activated simultaneously (also see RowClone) - fn get_simultaneously_activated_rows_of_apa_op(r1: RowAddress, r2: RowAddress) -> Vec; /// Implements given logic operation using FCDRAM-Instructions - /// REMINDER: for OR&AND additionall [`Instruction::FracOp`]s need to be issued to setup the + /// REMINDER: for OR&AND additional [`Instruction::FracOp`]s need to be issued to setup the /// reference subarray containing `reference_rows` in order to perform the given `logic_op` on /// the `compute_rows` inside the computation rows /// @@ -38,9 +33,21 @@ pub trait FCDRAMArchitecture { fn get_instructions_implementation_of_logic_ops(logic_op: SupportedLogicOps, compute_rows: Vec, reference_rows: Vec) -> Vec { todo!() } + + /// Returns distance of given `row` to the sense amplifiers + /// - important for calculating reliability of the operation (see [1] Chap5.2) + /// - Methodology used in [1] to determine distance: RowHammer + fn get_distance_of_row_to_sense_amps(&self, row: RowAddress) -> RowDistanceToSenseAmps { + todo!() + } } -pub type RowAddress = u64; +/// Categories of distances of rows to sense-amops +pub enum RowDistanceToSenseAmps { + Close, + Middle, + Far, +} /// Instructions used in FC-DRAM /// - NOT: implemented using `APA` @@ -120,3 +127,27 @@ pub enum SupportedLogicOps { /// implemented using OR+NOT NOR, } + +/// Implements behavior of the RowDecoderCircuitry as described in [3] +pub trait RowDecoder { + /// Returns vector of simultaneously activated rows when issuing `APA(r1,r2)`-cmd + /// NOTE: this may depend on the used DRAM - see [3] for a method for reverse-engineering + /// which rows are activated simultaneously (also see RowClone) + fn get_simultaneously_activated_rows_of_apa_op(&self, r1: RowAddress, r2: RowAddress) -> Vec; + + // TODO: get activation pattern for given rows r1,r2 (N:N vs N:2N) - or just check whether + // N:2N: is supported and let `get_simultaneously_activated_rows_of_apa_op()` handle the rest? +} + +/// Dummy Implementation of a single FCDRAM-Bank +/// NOTE: in order to implement FCDRAM on a whole DRAM-module, +/// they user will need to deal with several DRAM-banks separately +pub struct DummyFCDRAMBank { + /// TODO: just replace with bitmask for determining subarray-id?? + nr_subarrays: u16, + nr_rows_per_subarray: u16 +} + +// TODO: +// impl FCDRAMArchitecture for DummyFCDRAMBank {} +// impl RowDecoder for DummyFCDRAMBank {} diff --git a/rs/src/fc_dram/compilation.rs b/rs/src/fc_dram/compilation.rs index e56461e..df6ad00 100644 --- a/rs/src/fc_dram/compilation.rs +++ b/rs/src/fc_dram/compilation.rs @@ -5,13 +5,13 @@ use super::{ architecture::FCDRAMArchitecture, optimization::optimize, Program, ProgramState, RowAddress }; -use eggmock::{Id, Mig, Aig, Node, ProviderWithBackwardEdges, Signal}; +use eggmock::{Id, Aig, Node, ProviderWithBackwardEdges, Signal}; use rustc_hash::{FxHashMap, FxHashSet}; use std::cmp::max; -/// Compiles given `network` to a program that can be run on given `architecture` +/// Compiles given `network` intto a FCDRAM-[`Program`] that can be run on given `architecture` pub fn compile( - network: &impl ProviderWithBackwardEdges, + network: &impl ProviderWithBackwardEdges, ) -> Program { todo!() // let mut state = CompilationState::new(architecture, network); @@ -69,14 +69,14 @@ pub fn compile( pub struct CompilationState<'n, P> { /// Network (P=Provider, obsolte naming) network: &'n P, - candidates: FxHashSet<(Id, Mig)>, // TODO: probably change to `Aig` ? + candidates: FxHashSet<(Id, Aig)>, // TODO: probably change to `Aig` ? program: ProgramState, outputs: FxHashMap, leftover_use_count: FxHashMap, } -impl<'n, P: ProviderWithBackwardEdges> CompilationState<'n, P> { +impl<'n, P: ProviderWithBackwardEdges> CompilationState<'n, P> { pub fn new(network: &'n P) -> Self { let mut candidates = FxHashSet::default(); // check all parents of leaves whether they have only leaf children, in which case they are @@ -116,7 +116,7 @@ impl<'n, P: ProviderWithBackwardEdges> CompilationState<'n, P> { }) } - pub fn compute(&mut self, id: Id, node: Mig, out_address: Option) { + pub fn compute(&mut self, id: Id, node: Aig, out_address: Option) { todo!() } diff --git a/rs/src/fc_dram/extraction.rs b/rs/src/fc_dram/extraction.rs index 7af6e46..7bf2e06 100644 --- a/rs/src/fc_dram/extraction.rs +++ b/rs/src/fc_dram/extraction.rs @@ -1,16 +1,9 @@ //! Computation of Compiling Costs use eggmock::egg::{CostFunction, Id}; -use eggmock::{EggIdToSignal, MigLanguage, Mig, NetworkLanguage, Provider, Signal}; -use either::Either; -use rustc_hash::FxHashMap; -use std::cell::RefCell; -use std::cmp::{max, Ordering}; -use std::iter; -use std::ops::{Deref, Index}; +use eggmock::{EggIdToSignal, AigLanguage, Aig, NetworkLanguage, Provider, Signal}; +use std::cmp::Ordering; use std::rc::Rc; -use super::architecture::FCDRAMArchitecture; -use super::compile; pub struct CompilingCostFunction{} @@ -20,22 +13,26 @@ pub struct CompilingCostFunction{} #[derive(Debug)] pub struct CompilingCost { // partial: RefCell>>, + /// Probability that the whole program will run successfully + success_rate: f64, + /// Estimation of program cost (from input logic-ops) program_cost: usize, } -impl CostFunction for CompilingCostFunction { +impl CostFunction for CompilingCostFunction { type Cost = Rc; /// Compute cost of given `enode` - fn cost(&mut self, enode: &MigLanguage, mut costs: C) -> Self::Cost + /// TODO: NEXT + fn cost(&mut self, enode: &AigLanguage, mut costs: C) -> Self::Cost where C: FnMut(Id) -> Self::Cost, { todo!() // let root = enode.clone(); // let cost = match enode { - // MigLanguage::False | MigLanguage::Input(_) => CompilingCost::leaf(root), - // MigLanguage::Not(id) => { + // AigLanguage::False | AigLanguage::Input(_) => CompilingCost::leaf(root), + // AigLanguage::Not(id) => { // let cost = costs(*id); // // let nesting = if cost.not_nesting == NotNesting::NotANot { @@ -45,7 +42,7 @@ impl CostFunction for CompilingCostFunction { // }; // CompilingCost::with_children(self.architecture, root, iter::once((*id, cost)), nesting) // } - // MigLanguage::Maj(children) => CompilingCost::with_children( + // AigLanguage::Maj(children) => CompilingCost::with_children( // self.architecture, // root, // children.map(|id| (id, costs(id))), @@ -57,15 +54,9 @@ impl CostFunction for CompilingCostFunction { } impl CompilingCost { - pub fn leaf(root: MigLanguage) -> Self { - Self { - // partial: RefCell::new(Either::Left(StackedPartialGraph::leaf(root))), - program_cost: 0, - } - } pub fn with_children( - root: MigLanguage, + root: AigLanguage, child_costs: impl IntoIterator)>, ) -> Self { todo!() @@ -83,65 +74,20 @@ impl CompilingCost { } -// impl StackedPartialGraph { -// pub fn get_root_id(&self) -> Id { -// Id::from(self.first_free_id + 1) -// } -// } -// -// impl Index for StackedPartialGraph { -// type Output = MigLanguage; -// -// fn index(&self, index: Id) -> &Self::Output { -// if index == self.get_root_id() { -// &self.root -// } else { -// self.nodes.iter().filter_map(|m| m.get(&index)).next().unwrap() -// } -// } -// } -// -// impl Provider for StackedPartialGraph { -// type Node = Mig; -// -// fn outputs(&self) -> impl Iterator { -// iter::once(self.to_signal(self.get_root_id())) -// } -// -// fn node(&self, id: eggmock::Id) -> Self::Node { -// self[Id::from(id)] -// .to_node(|id| self.to_signal(id)) -// .expect("id should point to a non-not node") -// } -// } - impl PartialEq for CompilingCost { fn eq(&self, other: &Self) -> bool { - todo!() - // if other.not_nesting == NotNesting::NestedNots && self.not_nesting == NotNesting::NestedNots { - // true - // } else { - // self.program_cost.eq(&other.program_cost) - // } + self.success_rate == other.success_rate && self.program_cost == other.program_cost } } +/// First compare based on success-rate, then on program-cost +/// TODO: more fine-grained comparison !! impl PartialOrd for CompilingCost { fn partial_cmp(&self, other: &Self) -> Option { - todo!() - // #[allow(clippy::collapsible_else_if)] - // if self.not_nesting == NotNesting::NestedNots { - // if other.not_nesting == NotNesting::NestedNots { - // Some(Ordering::Equal) - // } else { - // Some(Ordering::Greater) - // } - // } else { - // if other.not_nesting == NotNesting::NestedNots { - // Some(Ordering::Less) - // } else { - // self.program_cost.partial_cmp(&other.program_cost) - // } - // } + if self.success_rate == other.success_rate { + self.program_cost.partial_cmp(&other.program_cost) + } else { + self.success_rate.partial_cmp(&other.success_rate) + } } } diff --git a/rs/src/fc_dram/mod.rs b/rs/src/fc_dram/mod.rs index d82713c..18d0fe4 100644 --- a/rs/src/fc_dram/mod.rs +++ b/rs/src/fc_dram/mod.rs @@ -6,8 +6,7 @@ //! //! # Submodules //! -//! - [`architecture`] - defines Instructions (and performance-metrics of Instructions in that -//! architecture) used in FC-DRAM +//! - [`architecture`] - defines Instructions (and performance-metrics of Instructions in that architecture) used in FC-DRAM //! - [`compilation`] - compiles given LogicNetwork for FC-DRAM architecture //! - [`generator`] — Generates output code or reports based on analysis. mod compilation; @@ -24,25 +23,27 @@ use self::extraction::CompilingCostFunction; use eggmock::egg::{rewrite, EGraph, Extractor, Id, Rewrite, Runner}; use eggmock::{ - Mig, MigLanguage, MigReceiverFFI, Provider, Receiver, ReceiverFFI, Rewriter, + Aig, AigLanguage, AigReceiverFFI, Provider, Receiver, ReceiverFFI, Rewriter, RewriterFFI, // TODO: add AOIG-rewrite (bc FC-DRAM supports AND&OR natively)? }; use program::*; use architecture::*; /// Rewrite rules to use in E-Graph Rewriting (see [egg](https://egraphs-good.github.io/)) -/// TODO: adjust rewriting rules to FCDRAM -static REWRITE_RULES: LazyLock>> = LazyLock::new(|| { +/// TODO: adjust rewriting rules to FCDRAM (=AND/OR related rewrites like De-Morgan?) +static REWRITE_RULES: LazyLock>> = LazyLock::new(|| { let mut rules = vec![ - rewrite!("commute_1"; "(maj ?a ?b ?c)" => "(maj ?b ?a ?c)"), - rewrite!("commute_2"; "(maj ?a ?b ?c)" => "(maj ?a ?c ?b)"), + // TODO: add "or" + rewrite!("commute-and"; "(and ?a ?b)" => "(and ?b ?a)"), + rewrite!("and-1"; "(and ?a 1)" => "?a"), + rewrite!("and-0"; "(and ?a 0)" => "0"), rewrite!("not_not"; "(! (! ?a))" => "?a"), - rewrite!("maj_1"; "(maj ?a ?a ?b)" => "?a"), - rewrite!("maj_2"; "(maj ?a (! ?a) ?b)" => "?b"), - rewrite!("associativity"; "(maj ?a ?b (maj ?c ?b ?d))" => "(maj ?d ?b (maj ?c ?b ?a))"), + // rewrite!("maj_1"; "(maj ?a ?a ?b)" => "?a"), + // rewrite!("maj_2"; "(maj ?a (! ?a) ?b)" => "?b"), + // rewrite!("associativity"; "(maj ?a ?b (maj ?c ?b ?d))" => "(maj ?d ?b (maj ?c ?b ?a))"), ]; - rules.extend(rewrite!("invert"; "(! (maj ?a ?b ?c))" <=> "(maj (! ?a) (! ?b) (! ?c))")); - rules.extend(rewrite!("distributivity"; "(maj ?a ?b (maj ?c ?d ?e))" <=> "(maj (maj ?a ?b ?c) (maj ?a ?b ?d) ?e)")); + // rules.extend(rewrite!("invert"; "(! (maj ?a ?b ?c))" <=> "(maj (! ?a) (! ?b) (! ?c))")); + // rules.extend(rewrite!("distributivity"; "(maj ?a ?b (maj ?c ?d ?e))" <=> "(maj (maj ?a ?b ?c) (maj ?a ?b ?d) ?e)")); rules }); @@ -62,11 +63,12 @@ struct CompilingReceiverResult { #[ouroboros::self_referencing] struct CompilerOutput { /// Result E-Graph - graph: EGraph, + graph: EGraph, + /// (, output-nodes) #[borrows(graph)] #[covariant] ntk: ( - Extractor<'this, CompilingCostFunction, MigLanguage, ()>, + Extractor<'this, CompilingCostFunction, AigLanguage, ()>, Vec, ), /// Compiled Program @@ -74,16 +76,18 @@ struct CompilerOutput { program: Program, } -/// Initiates compilation and prints compilation-statistics +/// Initiates compilation and prints compilation-statistics (and program if `settings.verbose=true` +/// - returned receiver allows converting result-graph in both directions (C++ <=> Rust) +/// - `settings`: compiler-options fn compiling_receiver<'a>( - rules: &'a [Rewrite], + rules: &'a [Rewrite], settings: CompilerSettings, -) -> impl Receiver + use<'a> { +) -> impl Receiver + use<'a> { // REMINDER: EGraph implements `Receiver` - EGraph::::new(()).map(move |(graph, outputs)| { + EGraph::::new(()).map(move |(graph, outputs)| { // `.map()` of `Provider`-trait: let t_runner = std::time::Instant::now(); - // run equivalence saturation + // 1. Create E-Graph: run equivalence saturation let runner = Runner::default().with_egraph(graph).run(rules); let t_runner = t_runner.elapsed().as_millis(); @@ -92,26 +96,34 @@ fn compiling_receiver<'a>( println!("== Runner Report"); runner.print_report(); } + let graph = runner.egraph; let mut t_extractor = 0; let mut t_compiler = 0; + // 2. Given E-Graph: Compile the actual program let output = CompilerOutput::new( graph, |graph| { let start_time = Instant::now(); + // TODO: what is the extractor for?? let extractor = Extractor::new( - &graph, - CompilingCostFunction {}, + graph, + CompilingCostFunction {}, // TODO: provide CostFunction !! ); t_extractor = start_time.elapsed().as_millis(); (extractor, outputs) }, |ntk| { let start_time = Instant::now(); + + // ===== MAIN CALL ===== let program = compile(&ntk.with_backward_edges()); // actual compilation !! + // ===================== + t_compiler = start_time.elapsed().as_millis(); + // print program if compiler-setting is set if settings.print_program || settings.verbose { if settings.verbose { println!("== Program") @@ -138,39 +150,46 @@ fn compiling_receiver<'a>( #[derive(Debug, Copy, Clone)] #[repr(C)] +/// Compiler options +/// - TODO: add flags like minimal success-rate for program struct CompilerSettings { + /// Whether to print the compiled program print_program: bool, + /// Whether to enable verbose output verbose: bool, + /// Minimal success rate to be guaranteed for success compiled program + /// REMINDER: FCDRAM-operations dont have a 100%-success rate to create the correct results + min_success_rate: u64, } struct FCDramRewriter(CompilerSettings); impl Rewriter for FCDramRewriter { - type Node = Mig; + type Node = Aig; type Intermediate = CompilingReceiverResult; fn create_receiver( &mut self, - ) -> impl Receiver + 'static { - - // todo!() + ) -> impl Receiver + 'static { compiling_receiver(REWRITE_RULES.as_slice(), self.0) } fn rewrite( self, result: CompilingReceiverResult, - output: impl Receiver, + output: impl Receiver, ) { result.output.borrow_ntk().send(output); } } +/// ?? #[no_mangle] -extern "C" fn fcdram_rewriter(settings: CompilerSettings) -> MigReceiverFFI> { +extern "C" fn fcdram_rewriter(settings: CompilerSettings) -> AigReceiverFFI> { RewriterFFI::new(FCDramRewriter(settings)) } +/// Statistic results about Compilation-Process #[repr(C)] struct CompilerStatistics { egraph_classes: u64, @@ -184,8 +203,9 @@ struct CompilerStatistics { t_compiler: u64, } +/// Main function called from `.cpp()` file - receives compiler settings #[no_mangle] -extern "C" fn fcdram_compile(settings: CompilerSettings) -> MigReceiverFFI { +extern "C" fn fcdram_compile(settings: CompilerSettings) -> AigReceiverFFI { // todo!() // TODO: create example `ARCHITECTURE` implementing `FCDRAMArchitecture` let receiver = @@ -201,5 +221,5 @@ extern "C" fn fcdram_compile(settings: CompilerSettings) -> MigReceiverFFI, + network: &impl ProviderWithBackwardEdges, ) -> Self { Self { program: Program::new(Vec::new()), diff --git a/rs/src/fc_dram_bottom_up/mod.rs b/rs/src/fc_dram_bottom_up/mod.rs new file mode 100644 index 0000000..0262253 --- /dev/null +++ b/rs/src/fc_dram_bottom_up/mod.rs @@ -0,0 +1,103 @@ +use log::{info, warn, error, debug, trace}; + +// NEXT TODO: make `compiling_receiver` work +// fn compiling_receiver<'a, A: FCDRAMArchitecture>( +// architecture: &'a A, +// rules: &'a [Rewrite], +// settings: CompilerSettings, +// ) -> impl Receiver, Node = Mig> + 'a { +// EGraph::::new(()).map(move |(graph, outputs)| { +// let t_runner = std::time::Instant::now(); +// let runner = Runner::default().with_egraph(graph).run(rules); +// let t_runner = t_runner.elapsed().as_millis(); +// if settings.verbose { +// println!("== Runner Report"); +// runner.print_report(); +// } +// let graph = runner.egraph; +// +// let mut t_extractor = 0; +// let mut t_compiler = 0; +// +// let output = CompilerOutput::new( +// graph, +// |graph| { +// let start_time = Instant::now(); +// let extractor = Extractor::new( +// &graph, +// CompilingCostFunction { +// architecture: architecture, +// }, +// ); +// t_extractor = start_time.elapsed().as_millis(); +// (extractor, outputs) +// }, +// |ntk| { +// let start_time = Instant::now(); +// let program = compile(architecture, &ntk.with_backward_edges()); +// t_compiler = start_time.elapsed().as_millis(); +// if settings.print_program || settings.verbose { +// if settings.verbose { +// println!("== Program") +// } +// println!("{program}"); +// } +// program +// }, +// ); +// if settings.verbose { +// println!("== Timings"); +// println!("t_runner: {t_runner}ms"); +// println!("t_extractor: {t_extractor}ms"); +// println!("t_compiler: {t_compiler}ms"); +// } +// CompilingReceiverResult { +// output, +// t_runner, +// t_extractor, +// t_compiler, +// } +// }) +// } + +// #[no_mangle] +// extern "C" fn fcdram_rewriter(settings: CompilerSettings) -> MigReceiverFFI> { +// info!("Called fcdram_rewriter"); +// RewriterFFI::new(FCDramRewriter(settings)) +// } + +#[repr(C)] +struct CompilerStatistics { + egraph_classes: u64, + egraph_nodes: u64, + egraph_size: u64, + + instruction_count: u64, + + t_runner: u64, + t_extractor: u64, + t_compiler: u64, +} + +/// Main functions called by `main.cpp` +#[no_mangle] +// extern "C" fn fcdram_compile(settings: CompilerSettings) -> MigReceiverFFI { +extern "C" fn fcdram_compile() { + info!("Called fcdram_compile"); + todo!() + // TODO: create example `ARCHITECTURE` implementing `FCDRAMArchitecture` + // let receiver = + // compiling_receiver(&*&ARCHITECTURE, REWRITE_RULES.as_slice(), settings).map(|res| { + // let graph = res.output.borrow_graph(); + // CompilerStatistics { + // egraph_classes: graph.number_of_classes() as u64, + // egraph_nodes: graph.total_number_of_nodes() as u64, + // egraph_size: graph.total_size() as u64, + // instruction_count: res.output.borrow_program().instructions.len() as u64, + // t_runner: res.t_runner as u64, + // t_extractor: res.t_extractor as u64, + // t_compiler: res.t_compiler as u64, + // } + // }); + // MigReceiverFFI::new(receiver) +} diff --git a/rs/src/lib.rs b/rs/src/lib.rs index 6c7dc72..7144b8c 100644 --- a/rs/src/lib.rs +++ b/rs/src/lib.rs @@ -1,3 +1,4 @@ #![allow(clippy::upper_case_acronyms)] mod ambit; +mod fc_dram; diff --git a/src/fcdram.h b/src/fcdram.h index 06f339c..2905425 100644 --- a/src/fcdram.h +++ b/src/fcdram.h @@ -25,6 +25,7 @@ extern "C" bool verbose; }; - eggmock::mig_receiver fcdram_rewriter( fcdram_compiler_settings settings ); - eggmock::mig_receiver fcdram_compile( fcdram_compiler_settings settings ); + eggmock::aig_receiver fcdram_rewriter( fcdram_compiler_settings settings ); + eggmock::aig_receiver fcdram_compile( fcdram_compiler_settings settings ); + // void fcdram_compile(); } diff --git a/src/main.cpp b/src/main.cpp index 8477abd..40c50ad 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -28,24 +28,27 @@ void run_ambit_example(mig_network in) /** * TODO: change `mig` to `aig`?? */ -void run_fcdram_example(mig_network in) +void run_fcdram_example(aig_network in) { - fcdram_compiler_statistics result = eggmock::send_mig( in, fcdram_compile( fcdram_compiler_settings{ + std::cout << "Sending graph to fcdram_compile..." << std::endl; + // fcdram_compile(); + fcdram_compiler_statistics result = eggmock::send_aig( in, fcdram_compile( fcdram_compiler_settings{ .print_program = true, .verbose = true, } ) ); - std::cout << "IC:" << result.instruction_count << std::endl; - std::cout << "t1:" << result.t_runner << std::endl; - std::cout << "t2:" << result.t_extractor << std::endl; - std::cout << "t3:" << result.t_compiler << std::endl; + // std::cout << "IC:" << result.instruction_count << std::endl; + // std::cout << "t1:" << result.t_runner << std::endl; + // std::cout << "t2:" << result.t_extractor << std::endl; + // std::cout << "t3:" << result.t_compiler << std::endl; - // mig_network rewritten = rewrite_mig( in, fcdram_rewriter() ); + // aig_network rewritten = rewrite_mig( in, fcdram_rewriter() ); // write_dot( rewritten, "out.dot" ); } int main() { mig_network in; + // aig_network in; const auto b_i = in.create_pi(); const auto b_i_next = in.create_pi(); const auto m = in.create_pi(); @@ -57,5 +60,5 @@ int main() write_dot( in, "in.dot" ); run_ambit_example(in); - run_fcdram_example(in); + // run_fcdram_example(in); } From 9ad5fa35ec2e79e7a124c1177533908df59678c4 Mon Sep 17 00:00:00 2001 From: alku662e Date: Thu, 15 May 2025 11:41:08 +0200 Subject: [PATCH 06/51] :construction: Still cleaning up... --- rs/src/fc_dram/architecture.rs | 15 +- .../fc_dram/{compilation.rs => compiler.rs} | 0 rs/src/fc_dram/extraction.rs | 18 +- rs/src/fc_dram/mod.rs | 169 ++++++++---------- rs/src/fc_dram/utils.rs | 14 ++ src/fcdram.h | 8 +- src/fcdram_benchmark_main.cpp | 14 +- src/main.cpp | 17 +- 8 files changed, 134 insertions(+), 121 deletions(-) rename rs/src/fc_dram/{compilation.rs => compiler.rs} (100%) create mode 100644 rs/src/fc_dram/utils.rs diff --git a/rs/src/fc_dram/architecture.rs b/rs/src/fc_dram/architecture.rs index 0c79a8a..5ac8eb5 100644 --- a/rs/src/fc_dram/architecture.rs +++ b/rs/src/fc_dram/architecture.rs @@ -1,17 +1,22 @@ //! Contains all architecture-specific descriptions -//! - [`FCDRAMArchitecture`] = trait which needs to be implemented for your DRAM-module +//! - [`FCDRAMArchitecture`] = DRAM-module-specific specific implementation of FCDRAMArchitecture //! - [`Instruction`] = contains all instructions supported by FC-DRAM architecture //! - [ ] `RowAddress`: utility functions to get subarray-id and row-addr within that subarray from //! RowAddress (eg via bit-shifting given bitmasks for subarray-id & row-addr to put on-top of //! RowAddress -use eggmock::{Id, Aig, ProviderWithBackwardEdges, Signal}; -use rustc_hash::FxHashMap; -use std::collections::hash_map::Entry; use std::fmt::{Display, Formatter}; pub type RowAddress = u64; +pub struct FCDRAMArchitecture { + rows_per_subarray: u64, + nr_subarrays: u64, + // TODO: params for calculating distance btw row and sense-amp, ... (particularly where + // sense-amps are placed within the DRAM module ?! + +} + /// Implement this trait for your specific DRAM-module to support FCDRAM-functionality /// - contains the mapping of logical-ops to FCDRAM-Architecture (see /// [`FCDRAMArchitecture::get_instructions_implementation_of_logic_ops`] @@ -19,7 +24,7 @@ pub type RowAddress = u64; /// # Possible Changes in Future /// /// - add trait-bound to a more general `Architecture`-trait to fit in the overall framework? -pub trait FCDRAMArchitecture { +impl FCDRAMArchitecture { /// Implements given logic operation using FCDRAM-Instructions /// REMINDER: for OR&AND additional [`Instruction::FracOp`]s need to be issued to setup the diff --git a/rs/src/fc_dram/compilation.rs b/rs/src/fc_dram/compiler.rs similarity index 100% rename from rs/src/fc_dram/compilation.rs rename to rs/src/fc_dram/compiler.rs diff --git a/rs/src/fc_dram/extraction.rs b/rs/src/fc_dram/extraction.rs index 7bf2e06..0bec37a 100644 --- a/rs/src/fc_dram/extraction.rs +++ b/rs/src/fc_dram/extraction.rs @@ -5,6 +5,8 @@ use eggmock::{EggIdToSignal, AigLanguage, Aig, NetworkLanguage, Provider, Signal use std::cmp::Ordering; use std::rc::Rc; +use super::architecture::FCDRAMArchitecture; + pub struct CompilingCostFunction{} // impl StackedPartialGraph { } // Do I need this?? @@ -22,13 +24,25 @@ pub struct CompilingCost { impl CostFunction for CompilingCostFunction { type Cost = Rc; - /// Compute cost of given `enode` + /// Compute cost of given `enode` using `cost_fn` + /// + /// Parameters determining cost of an enode: + /// - distance of row-operands to sense amplifiers + /// - operation: + /// - AND= + /// - OR= + /// - NOT= /// TODO: NEXT - fn cost(&mut self, enode: &AigLanguage, mut costs: C) -> Self::Cost + fn cost(&mut self, enode: &AigLanguage, mut cost_fn: C) -> Self::Cost where C: FnMut(Id) -> Self::Cost, { todo!() + // let cost = match enode { + // AigLanguage::False => todo!(), + // AigLanguage::Input(row) => // FCDRAMArchitecture::get_distance_of_row_to_sense_amps(&self, row) + // } + // let root = enode.clone(); // let cost = match enode { // AigLanguage::False | AigLanguage::Input(_) => CompilingCost::leaf(root), diff --git a/rs/src/fc_dram/mod.rs b/rs/src/fc_dram/mod.rs index 18d0fe4..616a599 100644 --- a/rs/src/fc_dram/mod.rs +++ b/rs/src/fc_dram/mod.rs @@ -7,18 +7,23 @@ //! # Submodules //! //! - [`architecture`] - defines Instructions (and performance-metrics of Instructions in that architecture) used in FC-DRAM -//! - [`compilation`] - compiles given LogicNetwork for FC-DRAM architecture +//! - [`compiler`] - compiles given LogicNetwork for FC-DRAM architecture //! - [`generator`] — Generates output code or reports based on analysis. -mod compilation; +//! - [`utils`] - utilities (helper macros/...) +mod compiler; mod extraction; mod optimization; mod program; mod architecture; +mod utils; +use std::path::Path; use std::sync::LazyLock; use std::time::Instant; -use self::compilation::compile; +use crate::measure_time; + +use self::compiler::compile; use self::extraction::CompilingCostFunction; use eggmock::egg::{rewrite, EGraph, Extractor, Id, Rewrite, Runner}; @@ -33,10 +38,11 @@ use architecture::*; /// TODO: adjust rewriting rules to FCDRAM (=AND/OR related rewrites like De-Morgan?) static REWRITE_RULES: LazyLock>> = LazyLock::new(|| { let mut rules = vec![ - // TODO: add "or" + // TODO: add "or" - and De-Morgan ? rewrite!("commute-and"; "(and ?a ?b)" => "(and ?b ?a)"), rewrite!("and-1"; "(and ?a 1)" => "?a"), rewrite!("and-0"; "(and ?a 0)" => "0"), + rewrite!("and-same"; "(and ?a ?a)" => "?a"), rewrite!("not_not"; "(! (! ?a))" => "?a"), // rewrite!("maj_1"; "(maj ?a ?a ?b)" => "?a"), // rewrite!("maj_2"; "(maj ?a (! ?a) ?b)" => "?b"), @@ -47,17 +53,11 @@ static REWRITE_RULES: LazyLock>> = LazyLock::new(|| rules }); -/// Store compilation output and timing statistics how long compilation stages took -/// TODO: unit of t? sec or ms? -struct CompilingReceiverResult { - /// Actual compilation result - output: CompilerOutput, - - /// Statistics about compilation - t_runner: u128, - t_extractor: u128, - t_compiler: u128, -} +/// Main variable specifying architecture of DRAM-module for which to compile for +static ARCHITECTURE: LazyLock = LazyLock::new(|| { + // TODO: init architecture from config file + todo!() +}); /// Compilation result (program + E-Graph) #[ouroboros::self_referencing] @@ -82,69 +82,49 @@ struct CompilerOutput { fn compiling_receiver<'a>( rules: &'a [Rewrite], settings: CompilerSettings, -) -> impl Receiver + use<'a> { +) -> impl Receiver + use<'a> { // REMINDER: EGraph implements `Receiver` - EGraph::::new(()).map(move |(graph, outputs)| { // `.map()` of `Provider`-trait: - let t_runner = std::time::Instant::now(); - - // 1. Create E-Graph: run equivalence saturation - let runner = Runner::default().with_egraph(graph).run(rules); - - let t_runner = t_runner.elapsed().as_millis(); - - if settings.verbose { - println!("== Runner Report"); - runner.print_report(); - } - - let graph = runner.egraph; - - let mut t_extractor = 0; - let mut t_compiler = 0; - - // 2. Given E-Graph: Compile the actual program - let output = CompilerOutput::new( - graph, - |graph| { - let start_time = Instant::now(); - // TODO: what is the extractor for?? - let extractor = Extractor::new( - graph, - CompilingCostFunction {}, // TODO: provide CostFunction !! - ); - t_extractor = start_time.elapsed().as_millis(); - (extractor, outputs) - }, - |ntk| { - let start_time = Instant::now(); - - // ===== MAIN CALL ===== - let program = compile(&ntk.with_backward_edges()); // actual compilation !! - // ===================== - - t_compiler = start_time.elapsed().as_millis(); - // print program if compiler-setting is set - if settings.print_program || settings.verbose { - if settings.verbose { - println!("== Program") + EGraph::::new(()) + .map(move |(graph, outputs)| { // `.map()` of `Provider`-trait: + + // 1. Create E-Graph: run equivalence saturation + let runner = measure_time!(Runner::default().with_egraph(graph).run(rules), "t_runner", settings.print_compilation_stats ); + + + if settings.verbose { + println!("== Runner Report"); + runner.print_report(); + } + + let graph = runner.egraph; + + + // 2. Given E-Graph: Compile the actual program and return result + CompilerOutput::new( + graph, + |graph| { + // TODO: what is the extractor for?? + let extractor = measure_time!( Extractor::new( + graph, + CompilingCostFunction {}, // TODO: provide CostFunction !! + ), "t_extractor", settings.print_compilation_stats ); + (extractor, outputs) + }, + |ntk| { + // ===== MAIN CALL ===== + let program = measure_time!( compile(&ntk.with_backward_edges()), "t_compiler", settings.print_compilation_stats); // actual compilation !! + // ===================== + + // print program if compiler-setting is set + if settings.print_program || settings.verbose { + if settings.verbose { + println!("== Program") + } + println!("{program}"); } - println!("{program}"); - } - program - }, - ); - if settings.verbose { - println!("== Timings"); - println!("t_runner: {t_runner}ms"); - println!("t_extractor: {t_extractor}ms"); - println!("t_compiler: {t_compiler}ms"); - } - CompilingReceiverResult { - output, - t_runner, - t_extractor, - t_compiler, - } + program + }, + ) }) } @@ -157,33 +137,37 @@ struct CompilerSettings { print_program: bool, /// Whether to enable verbose output verbose: bool, + /// Whether to print stats like runtimes of individual compiler-stages during compilation + print_compilation_stats: bool, /// Minimal success rate to be guaranteed for success compiled program /// REMINDER: FCDRAM-operations dont have a 100%-success rate to create the correct results min_success_rate: u64, + // /// Location to config-file holding fcdram-specific configs + // fcdram_config_file: Path, } struct FCDramRewriter(CompilerSettings); impl Rewriter for FCDramRewriter { type Node = Aig; - type Intermediate = CompilingReceiverResult; + type Intermediate = CompilerOutput; fn create_receiver( &mut self, - ) -> impl Receiver + 'static { + ) -> impl Receiver + 'static { compiling_receiver(REWRITE_RULES.as_slice(), self.0) } fn rewrite( self, - result: CompilingReceiverResult, + result: CompilerOutput, output: impl Receiver, ) { - result.output.borrow_ntk().send(output); + result.borrow_ntk().send(output); } } -/// ?? +/// ?? (maybe FFI for rewriting graph using mockturtle?) #[no_mangle] extern "C" fn fcdram_rewriter(settings: CompilerSettings) -> AigReceiverFFI> { RewriterFFI::new(FCDramRewriter(settings)) @@ -197,28 +181,23 @@ struct CompilerStatistics { egraph_size: u64, instruction_count: u64, - - t_runner: u64, - t_extractor: u64, - t_compiler: u64, } /// Main function called from `.cpp()` file - receives compiler settings +/// - `settings`: settings to use when running compiler #[no_mangle] extern "C" fn fcdram_compile(settings: CompilerSettings) -> AigReceiverFFI { // todo!() // TODO: create example `ARCHITECTURE` implementing `FCDRAMArchitecture` let receiver = - compiling_receiver(REWRITE_RULES.as_slice(), settings).map(|res| { - let graph = res.output.borrow_graph(); - CompilerStatistics { - egraph_classes: graph.number_of_classes() as u64, - egraph_nodes: graph.total_number_of_nodes() as u64, - egraph_size: graph.total_size() as u64, - instruction_count: res.output.borrow_program().instructions.len() as u64, - t_runner: res.t_runner as u64, - t_extractor: res.t_extractor as u64, - t_compiler: res.t_compiler as u64, + compiling_receiver(REWRITE_RULES.as_slice(), settings) + .map(|output| { + let graph = output.borrow_graph(); + CompilerStatistics { + egraph_classes: graph.number_of_classes() as u64, + egraph_nodes: graph.total_number_of_nodes() as u64, + egraph_size: graph.total_size() as u64, + instruction_count: output.borrow_program().instructions.len() as u64, } }); AigReceiverFFI::new(receiver) diff --git a/rs/src/fc_dram/utils.rs b/rs/src/fc_dram/utils.rs new file mode 100644 index 0000000..b9d4e8b --- /dev/null +++ b/rs/src/fc_dram/utils.rs @@ -0,0 +1,14 @@ +/// Measure time of `func` and print it if `do_print_timings` is set +#[macro_export] +macro_rules! measure_time { + ($func:expr, $label:expr, $do_print_timings:expr) => {{ + let start_time = Instant::now(); + let result = $func; + let t_runtime = start_time.elapsed().as_secs_f64(); + + if $do_print_timings { + println!("{}: {:.6}sec", $label, t_runtime); + } + result + }}; +} diff --git a/src/fcdram.h b/src/fcdram.h index 2905425..6619355 100644 --- a/src/fcdram.h +++ b/src/fcdram.h @@ -13,16 +13,16 @@ extern "C" uint64_t egraph_size; uint64_t instruction_count; - - uint64_t t_runner; - uint64_t t_extractor; - uint64_t t_compiler; }; + /** + * @param print_compilation_stats Whether to print stats like `t_runner`,`t_extractor`,`t_compiler` + */ struct fcdram_compiler_settings { bool print_program; bool verbose; + bool print_compilation_stats; }; eggmock::aig_receiver fcdram_rewriter( fcdram_compiler_settings settings ); diff --git a/src/fcdram_benchmark_main.cpp b/src/fcdram_benchmark_main.cpp index cd3191e..e740242 100644 --- a/src/fcdram_benchmark_main.cpp +++ b/src/fcdram_benchmark_main.cpp @@ -7,7 +7,7 @@ #include #include -#include +#include using namespace mockturtle; using namespace eggmock; @@ -22,16 +22,16 @@ int main( int const argc, char** argv ) return 1; } - std::optional mig = get_ntk( argv[1] ); - if ( !mig ) + std::optional aig = get_ntk( argv[1] ); + if ( !aig ) { return 1; } - auto const pre_opt_size = mig->size(); + auto const pre_opt_size = aig->size(); auto const opt_begin = system_clock::now(); - preoptimize_mig( *mig ); + preoptimize_aig( *aig ); auto const t_opt = duration_cast( system_clock::now() - opt_begin ).count(); auto constexpr settings = fcdram_compiler_settings{ @@ -42,10 +42,10 @@ int main( int const argc, char** argv ) const auto [egraph_classes, egraph_nodes, egraph_size, instruction_count, t_runner, t_extractor, t_compiler] = - send_mig( *mig, fcdram_compile( settings ) ); + send_aig( *aig, fcdram_compile( settings ) ); std::cout << t_opt << "\t" << t_runner << "\t" << t_extractor << "\t" << t_compiler << "\t" - << pre_opt_size << "\t" << mig->size() << "\t" << mig->num_cis() << "\t" << mig->num_cos() << "\t" + << pre_opt_size << "\t" << aig->size() << "\t" << aig->num_cis() << "\t" << aig->num_cos() << "\t" << instruction_count << "\t" << egraph_classes << "\t" << egraph_nodes << "\t" << egraph_size; return 0; diff --git a/src/main.cpp b/src/main.cpp index 40c50ad..4dcdd6b 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -16,10 +16,10 @@ void run_ambit_example(mig_network in) .print_program = true, .verbose = true, } ) ); - std::cout << "IC:" << result.instruction_count << std::endl; - std::cout << "t1:" << result.t_runner << std::endl; - std::cout << "t2:" << result.t_extractor << std::endl; - std::cout << "t3:" << result.t_compiler << std::endl; + // std::cout << "IC:" << result.instruction_count << std::endl; + // std::cout << "t1:" << result.t_runner << std::endl; + // std::cout << "t2:" << result.t_extractor << std::endl; + // std::cout << "t3:" << result.t_compiler << std::endl; // mig_network rewritten = rewrite_mig( in, ambit_rewriter() ); // write_dot( rewritten, "out.dot" ); @@ -35,6 +35,7 @@ void run_fcdram_example(aig_network in) fcdram_compiler_statistics result = eggmock::send_aig( in, fcdram_compile( fcdram_compiler_settings{ .print_program = true, .verbose = true, + .print_compilation_stats = true, } ) ); // std::cout << "IC:" << result.instruction_count << std::endl; // std::cout << "t1:" << result.t_runner << std::endl; @@ -47,8 +48,8 @@ void run_fcdram_example(aig_network in) int main() { - mig_network in; - // aig_network in; + // mig_network in; + aig_network in; const auto b_i = in.create_pi(); const auto b_i_next = in.create_pi(); const auto m = in.create_pi(); @@ -59,6 +60,6 @@ int main() in.create_po( bi ); write_dot( in, "in.dot" ); - run_ambit_example(in); - // run_fcdram_example(in); + // run_ambit_example(in); + run_fcdram_example(in); } From 4f22b63d777b3d0e8d1c23a23c1d90ed1971a06f Mon Sep 17 00:00:00 2001 From: alku662e Date: Thu, 15 May 2025 23:51:23 +0200 Subject: [PATCH 07/51] :construction: Start implementing FCDRAM-Architecture features. Next up: compilation & extraction (Cost Function) --- rs/src/fc_dram/architecture.rs | 124 ++++++++++++++---- .../{extraction.rs => egraph_extraction.rs} | 2 - rs/src/fc_dram/mod.rs | 36 ++--- 3 files changed, 114 insertions(+), 48 deletions(-) rename rs/src/fc_dram/{extraction.rs => egraph_extraction.rs} (98%) diff --git a/rs/src/fc_dram/architecture.rs b/rs/src/fc_dram/architecture.rs index 5ac8eb5..849dbff 100644 --- a/rs/src/fc_dram/architecture.rs +++ b/rs/src/fc_dram/architecture.rs @@ -5,16 +5,76 @@ //! RowAddress (eg via bit-shifting given bitmasks for subarray-id & row-addr to put on-top of //! RowAddress -use std::fmt::{Display, Formatter}; +use std::{fmt::{Display, Formatter}, sync::LazyLock}; + +/// Main variable specifying architecture of DRAM-module for which to compile for +/// - this is currently just an example implementation for testing purpose; (TODO: make this +/// configurable at runtime) +/// TODO: add field to simulate row-decoder circuitry, needed for impl Simultaneous-row-activation +/// TODO: make this configurable at runtime +static ARCHITECTURE: LazyLock = LazyLock::new(|| { + const NR_SUBARRAYS: i64 = 2i64.pow(7); + const ROWS_PER_SUBARRAY: i64 = 2i64.pow(9); + + // TODO: init architecture a run-time, eg from config file + let get_activated_rows_from_apa = |row1: RowAddress, row2: RowAddress| -> Vec { + let activated_rows = vec!(row1, row2); + // TODO: get other activated rows and add them to `activated_rows` + activated_rows + }; + + let get_distance_of_row_to_sense_amps = |row: RowAddress| -> RowDistanceToSenseAmps { + // ASSUMPTION: last & first rows only have sense-amps from one side + // TODO: is this true? or do all subarrays have a line of sense-amps on both of their ends?? + let distance_to_nearest_sense_amp_in_nr_rows = if row < ROWS_PER_SUBARRAY { + // this row is in the first subarray + row // row-addr = distance to nearest sense-amps + } else if row > (NR_SUBARRAYS-1)*ROWS_PER_SUBARRAY { + // this row is in the last subarray + row - (NR_SUBARRAYS-1)*ROWS_PER_SUBARRAY // =distance to above sense-amps + } else { + // let subarray_id = row / rows_per_subarray; + let row_nr_in_subarray = row % ROWS_PER_SUBARRAY; + if row_nr_in_subarray < ROWS_PER_SUBARRAY { + // row is in the 1st half of the subarray and hence nearer to the "previous" sense-amps + row_nr_in_subarray + } else { + // row is in the 2nd half of the subarray and hence nearer to the "previous" sense-amps + row_nr_in_subarray - ROWS_PER_SUBARRAY/2 + } -pub type RowAddress = u64; + }; + match distance_to_nearest_sense_amp_in_nr_rows { + i if i < ROWS_PER_SUBARRAY / 2 / 3 => RowDistanceToSenseAmps::Close, // 1st third of subarray-half + i if i < ROWS_PER_SUBARRAY / 2 / 6 => RowDistanceToSenseAmps::Middle, // 2nd third of subarray-half + _ => RowDistanceToSenseAmps::Far, // everything else is treated as being far away + } + }; + + FCDRAMArchitecture { + nr_subarrays: NR_SUBARRAYS, + rows_per_subarray: ROWS_PER_SUBARRAY, + get_activated_rows_from_apa, + get_distance_of_row_to_sense_amps, + } +}); + +/// - ! must be smaller than `rows_per_subarray * nr_subarrays` (this is NOT checked!) +pub type RowAddress = i64; pub struct FCDRAMArchitecture { - rows_per_subarray: u64, - nr_subarrays: u64, + /// Nr of subarrays in a DRAM module + nr_subarrays: i64, + /// Nr of rows in a single subarray + rows_per_subarray: i64, + /// Returns all activated rows when issuing `APA(row1, row2)` + get_activated_rows_from_apa: fn(RowAddress, RowAddress) -> Vec, // TODO: params for calculating distance btw row and sense-amp, ... (particularly where // sense-amps are placed within the DRAM module ?! - + /// Given a row-addr this returns the distance of it to the sense-amps (!determinse + /// success-rate of op using that `row` as an operand) (see [1] Chap5.2) + /// - NOTE: Methodology used in [1] to determine distance: RowHammer + get_distance_of_row_to_sense_amps: fn(RowAddress) -> RowDistanceToSenseAmps, } /// Implement this trait for your specific DRAM-module to support FCDRAM-functionality @@ -26,24 +86,39 @@ pub struct FCDRAMArchitecture { /// - add trait-bound to a more general `Architecture`-trait to fit in the overall framework? impl FCDRAMArchitecture { - /// Implements given logic operation using FCDRAM-Instructions + /// Returns FC-DRAM operations to perform for each logical operation, with operand-rows NOT set !!! + /// - addresses of row operands need to be overwritten during compilation ! + /// /// REMINDER: for OR&AND additional [`Instruction::FracOp`]s need to be issued to setup the /// reference subarray containing `reference_rows` in order to perform the given `logic_op` on /// the `compute_rows` inside the computation rows /// - /// - [ ] TODO: for `NOT`: `reference_rows`=??? (empty or =result rows?) + /// REMINDER: do increase the success rate of `FracOp` storing a fractional value (`V_{DD}/2` + /// in this case, several FracOps are usually issued) + /// - ->`FracOp`s are replicated during compilation as necessary, this is done during compilation /// /// NOTE: `compute_rows` are expected to lay in the same subarray and `reference_rows` in one /// subarray adjacent to the compute subarray (!this is not checked but assumed to be true!) - fn get_instructions_implementation_of_logic_ops(logic_op: SupportedLogicOps, compute_rows: Vec, reference_rows: Vec) -> Vec { - todo!() - } - - /// Returns distance of given `row` to the sense amplifiers - /// - important for calculating reliability of the operation (see [1] Chap5.2) - /// - Methodology used in [1] to determine distance: RowHammer - fn get_distance_of_row_to_sense_amps(&self, row: RowAddress) -> RowDistanceToSenseAmps { - todo!() + fn get_instructions_implementation_of_logic_ops(logic_op: SupportedLogicOps) -> Vec { + match logic_op { + SupportedLogicOps::NOT => vec!(Instruction::APA(-1, -1)), + SupportedLogicOps::AND => vec!(Instruction::FracOp(-1), Instruction::APA(-1, -1)), + SupportedLogicOps::OR => vec!(Instruction::FracOp(-1), Instruction::APA(-1, -1)), + SupportedLogicOps::NAND => { + // 1. AND, 2. NOT + FCDRAMArchitecture::get_instructions_implementation_of_logic_ops(SupportedLogicOps::AND) + .into_iter() + .chain( FCDRAMArchitecture::get_instructions_implementation_of_logic_ops(SupportedLogicOps::NOT)) + .collect() + }, + SupportedLogicOps::NOR => { + // 1. OR, 2. NOT + FCDRAMArchitecture::get_instructions_implementation_of_logic_ops(SupportedLogicOps::OR) + .into_iter() + .chain( FCDRAMArchitecture::get_instructions_implementation_of_logic_ops(SupportedLogicOps::NOT)) + .collect() + } + } } } @@ -80,7 +155,7 @@ pub enum Instruction { impl Display for Instruction { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { let description = match self { - Instruction::FracOp(row) => "AP(row)", + Instruction::FracOp(row) => "AP({row})", Instruction::APA(row1,row2) => "APA({row1},{row2})", }; write!(f, "{}", description) @@ -123,6 +198,8 @@ impl Instruction { } /// Contains logical operations which are supported (natively) on FCDRAM-Architecture +/// - see [`FCDRAMArchitecture::get_instructions_implementation_of_logic_ops`] for how these +/// logic-ops are mapped to FCDRAM-instructions pub enum SupportedLogicOps { NOT, AND, @@ -134,6 +211,7 @@ pub enum SupportedLogicOps { } /// Implements behavior of the RowDecoderCircuitry as described in [3] +/// TODO: remove in favor of passing arbitrary closure to [`FCDRAMArchitecture::get_activated_rows_from_apa`] pub trait RowDecoder { /// Returns vector of simultaneously activated rows when issuing `APA(r1,r2)`-cmd /// NOTE: this may depend on the used DRAM - see [3] for a method for reverse-engineering @@ -144,15 +222,3 @@ pub trait RowDecoder { // N:2N: is supported and let `get_simultaneously_activated_rows_of_apa_op()` handle the rest? } -/// Dummy Implementation of a single FCDRAM-Bank -/// NOTE: in order to implement FCDRAM on a whole DRAM-module, -/// they user will need to deal with several DRAM-banks separately -pub struct DummyFCDRAMBank { - /// TODO: just replace with bitmask for determining subarray-id?? - nr_subarrays: u16, - nr_rows_per_subarray: u16 -} - -// TODO: -// impl FCDRAMArchitecture for DummyFCDRAMBank {} -// impl RowDecoder for DummyFCDRAMBank {} diff --git a/rs/src/fc_dram/extraction.rs b/rs/src/fc_dram/egraph_extraction.rs similarity index 98% rename from rs/src/fc_dram/extraction.rs rename to rs/src/fc_dram/egraph_extraction.rs index 0bec37a..a135725 100644 --- a/rs/src/fc_dram/extraction.rs +++ b/rs/src/fc_dram/egraph_extraction.rs @@ -5,8 +5,6 @@ use eggmock::{EggIdToSignal, AigLanguage, Aig, NetworkLanguage, Provider, Signal use std::cmp::Ordering; use std::rc::Rc; -use super::architecture::FCDRAMArchitecture; - pub struct CompilingCostFunction{} // impl StackedPartialGraph { } // Do I need this?? diff --git a/rs/src/fc_dram/mod.rs b/rs/src/fc_dram/mod.rs index 616a599..818c84b 100644 --- a/rs/src/fc_dram/mod.rs +++ b/rs/src/fc_dram/mod.rs @@ -1,3 +1,5 @@ +//! NOTE: currently FCDRAM has only been shown to work with HK Sync Modules +//! //! # Literature //! //! - [1] Functionally-Complete Boolean Logic in Real DRAM Chips: Experimental Characterization and Analysis, 2024 @@ -8,23 +10,24 @@ //! //! - [`architecture`] - defines Instructions (and performance-metrics of Instructions in that architecture) used in FC-DRAM //! - [`compiler`] - compiles given LogicNetwork for FC-DRAM architecture -//! - [`generator`] — Generates output code or reports based on analysis. +//! - [`generator`] - Generates output code or reports based on analysis. (TODO) +//! - [`optimization`] - applies architecture-specific optimizations to generated program (TODO: don't use here but in MLIR instead) +//! - [ ] [`program`] //! - [`utils`] - utilities (helper macros/...) +mod architecture; mod compiler; -mod extraction; +mod egraph_extraction; mod optimization; mod program; -mod architecture; mod utils; -use std::path::Path; use std::sync::LazyLock; use std::time::Instant; use crate::measure_time; use self::compiler::compile; -use self::extraction::CompilingCostFunction; +use self::egraph_extraction::CompilingCostFunction; use eggmock::egg::{rewrite, EGraph, Extractor, Id, Rewrite, Runner}; use eggmock::{ @@ -53,12 +56,6 @@ static REWRITE_RULES: LazyLock>> = LazyLock::new(|| rules }); -/// Main variable specifying architecture of DRAM-module for which to compile for -static ARCHITECTURE: LazyLock = LazyLock::new(|| { - // TODO: init architecture from config file - todo!() -}); - /// Compilation result (program + E-Graph) #[ouroboros::self_referencing] struct CompilerOutput { @@ -104,15 +101,20 @@ fn compiling_receiver<'a>( graph, |graph| { // TODO: what is the extractor for?? - let extractor = measure_time!( Extractor::new( + let extractor = measure_time!( + Extractor::new( graph, - CompilingCostFunction {}, // TODO: provide CostFunction !! - ), "t_extractor", settings.print_compilation_stats ); + CompilingCostFunction {}, + ), // TODO: provide CostFunction !! + "t_extractor", settings.print_compilation_stats + ); (extractor, outputs) }, |ntk| { - // ===== MAIN CALL ===== - let program = measure_time!( compile(&ntk.with_backward_edges()), "t_compiler", settings.print_compilation_stats); // actual compilation !! + // ===== MAIN CALL (actual compilation) ===== + let program = measure_time!( + compile(&ntk.with_backward_edges()), "t_compiler", settings.print_compilation_stats + ); // ===================== // print program if compiler-setting is set @@ -124,7 +126,7 @@ fn compiling_receiver<'a>( } program }, - ) + ) }) } From e3a9ca4f56fec9b782614dae7bf56f85574c2a05 Mon Sep 17 00:00:00 2001 From: alku662e Date: Fri, 16 May 2025 11:13:44 +0200 Subject: [PATCH 08/51] WIP --- rs/src/ambit/compilation.rs | 2 + rs/src/fc_dram/architecture.rs | 7 +- rs/src/fc_dram/compiler.rs | 129 ++------------------------------- 3 files changed, 12 insertions(+), 126 deletions(-) diff --git a/rs/src/ambit/compilation.rs b/rs/src/ambit/compilation.rs index 55c7ae5..f9cea34 100644 --- a/rs/src/ambit/compilation.rs +++ b/rs/src/ambit/compilation.rs @@ -73,6 +73,8 @@ pub struct CompilationState<'a, 'n, P> { } impl<'a, 'n, P: ProviderWithBackwardEdges> CompilationState<'a, 'n, P> { + /// - `candidates`: , computed from `network + /// - `outputs`: direktly read-out from `network` pub fn new(architecture: &'a Architecture, network: &'n P) -> Self { let mut candidates = FxHashSet::default(); // check all parents of leaves whether they have only leaf children, in which case they are diff --git a/rs/src/fc_dram/architecture.rs b/rs/src/fc_dram/architecture.rs index 849dbff..e615d32 100644 --- a/rs/src/fc_dram/architecture.rs +++ b/rs/src/fc_dram/architecture.rs @@ -18,8 +18,9 @@ static ARCHITECTURE: LazyLock = LazyLock::new(|| { // TODO: init architecture a run-time, eg from config file let get_activated_rows_from_apa = |row1: RowAddress, row2: RowAddress| -> Vec { - let activated_rows = vec!(row1, row2); + let mut activated_rows = vec!(row1, row2); // TODO: get other activated rows and add them to `activated_rows` + activated_rows.push(123456); // TEST: add random row activated_rows }; @@ -155,8 +156,8 @@ pub enum Instruction { impl Display for Instruction { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { let description = match self { - Instruction::FracOp(row) => "AP({row})", - Instruction::APA(row1,row2) => "APA({row1},{row2})", + Instruction::FracOp(row) => format!("AP({row})"), + Instruction::APA(row1,row2) => format!("APA({row1},{row2})"), }; write!(f, "{}", description) } diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs index df6ad00..745dfa8 100644 --- a/rs/src/fc_dram/compiler.rs +++ b/rs/src/fc_dram/compiler.rs @@ -5,63 +5,17 @@ use super::{ architecture::FCDRAMArchitecture, optimization::optimize, Program, ProgramState, RowAddress }; -use eggmock::{Id, Aig, Node, ProviderWithBackwardEdges, Signal}; +use eggmock::{Id, Aig, Node, ProviderWithBackwardEdges as NetworkWithBackwardEdges, Signal}; use rustc_hash::{FxHashMap, FxHashSet}; use std::cmp::max; /// Compiles given `network` intto a FCDRAM-[`Program`] that can be run on given `architecture` pub fn compile( - network: &impl ProviderWithBackwardEdges, + network: &impl NetworkWithBackwardEdges, ) -> Program { + + let (outputs, leaves) = (network.outputs(), network.leaves()); todo!() - // let mut state = CompilationState::new(architecture, network); - // let mut max_cand_size = 0; // TODO: unused? - // while !state.candidates.is_empty() { - // max_cand_size = max(max_cand_size, state.candidates.len()); - // - // // TODO: ??? - // let (id, node, _, _, _) = state - // .candidates - // .iter() - // .copied() - // .map(|(id, node)| { - // let outputs = state.network.node_outputs(id).count(); - // let output = state.network.outputs().any(|out| out.node_id() == id); - // let not_present = node - // .inputs() - // .iter() - // .map(|signal| { - // let present = state - // .program - // .rows(); - // // .any(|row| matches!(row, Row::Bitwise(_))); - // todo!() - // }) - // .sum::(); - // (id, node, not_present, outputs, output) - // }) - // .min_by_key(|(_, _, not_present, outputs, output)| (*not_present, *outputs, !output)) - // .unwrap(); - // - // // TODO: ??? - // let output = state.outputs.get(&id).copied(); - // if let Some((output, signal)) = output { - // if signal.is_inverted() { - // state.compute(id, node, None); - // } else { - // // state.compute(id, node, Some(Address::Out(output))); - // state.compute(id, node, None); - // } - // let leftover_uses = *state.leftover_use_count(id); - // if leftover_uses == 1 { - // state.program.free_id_rows(id); - // } - // } else { - // state.compute(id, node, None); - // } - // } - // - // let mut program = state.program.into(); // optimize(&mut program); // program } @@ -69,14 +23,14 @@ pub fn compile( pub struct CompilationState<'n, P> { /// Network (P=Provider, obsolte naming) network: &'n P, - candidates: FxHashSet<(Id, Aig)>, // TODO: probably change to `Aig` ? + candidates: FxHashSet<(Id, Aig)>, program: ProgramState, outputs: FxHashMap, leftover_use_count: FxHashMap, } -impl<'n, P: ProviderWithBackwardEdges> CompilationState<'n, P> { +impl<'n, P: NetworkWithBackwardEdges> CompilationState<'n, P> { pub fn new(network: &'n P) -> Self { let mut candidates = FxHashSet::default(); // check all parents of leaves whether they have only leaf children, in which case they are @@ -109,75 +63,4 @@ impl<'n, P: ProviderWithBackwardEdges> CompilationState<'n, P> { leftover_use_count: FxHashMap::default(), } } - - pub fn leftover_use_count(&mut self, id: Id) -> &mut usize { - self.leftover_use_count.entry(id).or_insert_with(|| { - self.network.node_outputs(id).count() + self.outputs.contains_key(&id) as usize - }) - } - - pub fn compute(&mut self, id: Id, node: Aig, out_address: Option) { - todo!() - } - - /// Reorders the `signals` so that the maximum number of the given signal-operator-pairs already - /// match according to the current program state. - /// The returned array contains true for each operand that then already contains the correct - /// signal and the number is equal to the number of trues in the array. - fn get_mapping( - &self, - signals: &mut [Signal; 3], - operands: &[RowAddress; 3], - ) -> ([bool; 3], usize) { - todo!() - // let signals_with_idx = { - // let mut i = 0; - // signals.map(|signal| { - // i += 1; - // (signal, i - 1) - // }) - // }; - // let operand_signals = operands.map(|op| self.program.rows().get_operand_signal(op)); - // - // // reorder signals by how often their signal is already available in an operand - // let mut signals_with_matches = signals_with_idx.map(|(s, i)| { - // ( - // s, - // i, - // operand_signals - // .iter() - // .filter(|sig| **sig == Some(s)) - // .count(), - // ) - // }); - // signals_with_matches.sort_by(|a, b| a.2.cmp(&b.2)); - // - // // then we can assign places one by one and get an optimal mapping (probably, proof by - // // intuition only) - // - // // contains for each operand index whether the signal at that position is already the - // // correct one - // let mut result = [false; 3]; - // // contains the mapping of old signal index to operand index - // let mut new_positions = [0usize, 1, 2]; - // // contains the number of assigned signals (i.e. #true in result) - // let mut assigned_signals = 0; - // - // for (signal, signal_idx, _) in signals_with_matches { - // // find operand index for that signal - // let Some((target_idx, _)) = operand_signals - // .iter() - // .enumerate() - // .find(|(idx, sig)| **sig == Some(signal) && !result[*idx]) - // else { - // continue; - // }; - // result[target_idx] = true; - // let new_idx = new_positions[signal_idx]; - // signals.swap(target_idx, new_idx); - // new_positions.swap(target_idx, new_idx); - // assigned_signals += 1; - // } - // (result, assigned_signals) - } } From 30d0c4a2451c7aebe58ed1ec7c00bb4dccb77976 Mon Sep 17 00:00:00 2001 From: alku662e Date: Sat, 17 May 2025 18:20:00 +0200 Subject: [PATCH 09/51] Make program run, but produces dummy-output at the moment --- rs/src/ambit/rows.rs | 4 +-- rs/src/fc_dram/compiler.rs | 9 ++++-- rs/src/fc_dram/egraph_extraction.rs | 48 +++++++++++++---------------- rs/src/fc_dram/mod.rs | 12 ++++---- rs/src/fc_dram/program.rs | 12 ++++++-- 5 files changed, 44 insertions(+), 41 deletions(-) diff --git a/rs/src/ambit/rows.rs b/rs/src/ambit/rows.rs index ee12a6a..85441c5 100644 --- a/rs/src/ambit/rows.rs +++ b/rs/src/ambit/rows.rs @@ -42,12 +42,12 @@ impl<'a> Rows<'a> { spill_counter: 0, architecture, }; - rows.add_leaves(ntk); + rows.add_leafs(ntk); rows } fn add_leafs(&mut self, ntk: &impl NetworkWithBackwardEdges) { - let leafs = ntk.leafs(); + let leafs = ntk.leaves(); self.rows.reserve(leafs.size_hint().0); for id in leafs { let node = ntk.node(id); diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs index 745dfa8..c6be2e0 100644 --- a/rs/src/fc_dram/compiler.rs +++ b/rs/src/fc_dram/compiler.rs @@ -3,9 +3,9 @@ //! into a [`program`] using some [`optimization`] use super::{ - architecture::FCDRAMArchitecture, optimization::optimize, Program, ProgramState, RowAddress + architecture::{FCDRAMArchitecture, Instruction}, optimization::optimize, Program, ProgramState, RowAddress }; -use eggmock::{Id, Aig, Node, ProviderWithBackwardEdges as NetworkWithBackwardEdges, Signal}; +use eggmock::{Id, Aig, Node, NetworkWithBackwardEdges, Signal}; use rustc_hash::{FxHashMap, FxHashSet}; use std::cmp::max; @@ -15,7 +15,10 @@ pub fn compile( ) -> Program { let (outputs, leaves) = (network.outputs(), network.leaves()); - todo!() + Program { + instructions: vec!(Instruction::FracOp(-1)) , + } + // todo!() // optimize(&mut program); // program } diff --git a/rs/src/fc_dram/egraph_extraction.rs b/rs/src/fc_dram/egraph_extraction.rs index a135725..cbc3151 100644 --- a/rs/src/fc_dram/egraph_extraction.rs +++ b/rs/src/fc_dram/egraph_extraction.rs @@ -1,7 +1,7 @@ //! Computation of Compiling Costs use eggmock::egg::{CostFunction, Id}; -use eggmock::{EggIdToSignal, AigLanguage, Aig, NetworkLanguage, Provider, Signal}; +use eggmock::{EggIdToSignal, AigLanguage, Aig, NetworkLanguage, Network, Signal}; use std::cmp::Ordering; use std::rc::Rc; @@ -30,16 +30,31 @@ impl CostFunction for CompilingCostFunction { /// - AND= /// - OR= /// - NOT= + /// /// TODO: NEXT fn cost(&mut self, enode: &AigLanguage, mut cost_fn: C) -> Self::Cost where C: FnMut(Id) -> Self::Cost, { - todo!() - // let cost = match enode { - // AigLanguage::False => todo!(), - // AigLanguage::Input(row) => // FCDRAMArchitecture::get_distance_of_row_to_sense_amps(&self, row) - // } + let cost = match enode { + AigLanguage::False => 1, + AigLanguage::Input(_node) => { + // FCDRAMArchitecture::get_distance_of_row_to_sense_amps(&self, row) + 2 + }, + AigLanguage::And([_node1, _node2]) => { + 3 + }, + AigLanguage::Not(_node) => { + 4 + }, + }; + + Rc::new(CompilingCost { + success_rate: 0.0, + program_cost: cost, + }) + // todo!() // let root = enode.clone(); // let cost = match enode { @@ -65,27 +80,6 @@ impl CostFunction for CompilingCostFunction { } } -impl CompilingCost { - - pub fn with_children( - root: AigLanguage, - child_costs: impl IntoIterator)>, - ) -> Self { - todo!() - // let child_graphs = child_costs - // .into_iter() - // .map(|(id, cost)| cost.collapsed_graph(id)); - // let partial_graph = StackedPartialGraph::new(root, child_graphs); - // let program_cost = compile(architecture, &partial_graph.with_backward_edges()).instructions.len(); - // Self { - // partial: RefCell::new(Either::Left(partial_graph)), - // not_nesting, - // program_cost, - // } - } - -} - impl PartialEq for CompilingCost { fn eq(&self, other: &Self) -> bool { self.success_rate == other.success_rate && self.program_cost == other.program_cost diff --git a/rs/src/fc_dram/mod.rs b/rs/src/fc_dram/mod.rs index 818c84b..6bf3251 100644 --- a/rs/src/fc_dram/mod.rs +++ b/rs/src/fc_dram/mod.rs @@ -31,7 +31,7 @@ use self::egraph_extraction::CompilingCostFunction; use eggmock::egg::{rewrite, EGraph, Extractor, Id, Rewrite, Runner}; use eggmock::{ - Aig, AigLanguage, AigReceiverFFI, Provider, Receiver, ReceiverFFI, Rewriter, + Aig, AigLanguage, AigReceiverFFI, Network, Receiver, ReceiverFFI, Rewriter, RewriterFFI, // TODO: add AOIG-rewrite (bc FC-DRAM supports AND&OR natively)? }; use program::*; @@ -65,10 +65,11 @@ struct CompilerOutput { #[borrows(graph)] #[covariant] ntk: ( - Extractor<'this, CompilingCostFunction, AigLanguage, ()>, + Extractor<'this, CompilingCostFunction, AigLanguage, ()>, // `'this`=self-reference Vec, ), /// Compiled Program + /// Program is compiled using previously (EGraph-)extracted `ntk` #[borrows(ntk)] program: Program, } @@ -96,11 +97,10 @@ fn compiling_receiver<'a>( let graph = runner.egraph; - // 2. Given E-Graph: Compile the actual program and return result CompilerOutput::new( graph, |graph| { - // TODO: what is the extractor for?? + // 2. Given E-Graph: Retrieve best graph using custom `CompilingCostFunction` let extractor = measure_time!( Extractor::new( graph, @@ -108,15 +108,15 @@ fn compiling_receiver<'a>( ), // TODO: provide CostFunction !! "t_extractor", settings.print_compilation_stats ); - (extractor, outputs) + (extractor, outputs) // produce `ntk` }, |ntk| { // ===== MAIN CALL (actual compilation) ===== + // 3. Compile program using extracted network let program = measure_time!( compile(&ntk.with_backward_edges()), "t_compiler", settings.print_compilation_stats ); // ===================== - // print program if compiler-setting is set if settings.print_program || settings.verbose { if settings.verbose { diff --git a/rs/src/fc_dram/program.rs b/rs/src/fc_dram/program.rs index a2c171f..073e822 100644 --- a/rs/src/fc_dram/program.rs +++ b/rs/src/fc_dram/program.rs @@ -3,7 +3,7 @@ //! optimizations ([`optimization`]) use super::architecture::{FCDRAMArchitecture, RowAddress}; use crate::fc_dram::architecture::Instruction; -use eggmock::{Id, Aig, ProviderWithBackwardEdges, Signal}; +use eggmock::{Id, Aig, NetworkWithBackwardEdges, Signal}; use std::fmt::{Display, Formatter}; use std::ops::{Deref, DerefMut}; @@ -30,7 +30,7 @@ impl Program { impl ProgramState { pub fn new( - network: &impl ProviderWithBackwardEdges, + network: &impl NetworkWithBackwardEdges, ) -> Self { Self { program: Program::new(Vec::new()), @@ -85,6 +85,12 @@ impl From for Program { /// Print the generated program in human-readable form impl Display for Program { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - todo!() + for instr in &self.instructions { + match instr { + Instruction::FracOp(row) => write!(f, "AP({row})")?, // TODO: repeat this by the factor set in compiler-settings + Instruction::APA(row1, row2, ) => write!(f, "APA({row1},{row2})")?, + } + } + Ok(()) } } From f994f3b4fd861e9cd5668c266f08c71fc1bc14e6 Mon Sep 17 00:00:00 2001 From: alku662e Date: Thu, 22 May 2025 15:11:08 +0200 Subject: [PATCH 10/51] :construction: Implement simultaneous-row-activation and add nr of mem-cycles required per primitive --- rs/Cargo.lock | 280 +++++++++++++++++++++++++++- rs/Cargo.toml | 2 + rs/src/ambit/compilation.rs | 1 + rs/src/fc_dram/architecture.rs | 88 ++++++++- rs/src/fc_dram/compiler.rs | 123 +++++++++--- rs/src/fc_dram/egraph_extraction.rs | 7 +- rs/src/fc_dram/mod.rs | 76 +++++--- rs/src/fc_dram/program.rs | 5 +- rs/src/lib.rs | 4 +- 9 files changed, 519 insertions(+), 67 deletions(-) diff --git a/rs/Cargo.lock b/rs/Cargo.lock index 1cd3f88..2495beb 100644 --- a/rs/Cargo.lock +++ b/rs/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + [[package]] name = "aliasable" version = "0.1.3" @@ -14,6 +23,56 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" +[[package]] +name = "anstream" +version = "0.6.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" + +[[package]] +name = "anstyle-parse" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e" +dependencies = [ + "anstyle", + "once_cell", + "windows-sys", +] + [[package]] name = "autocfg" version = "1.4.0" @@ -38,6 +97,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "colorchoice" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" + [[package]] name = "crossbeam-utils" version = "0.8.21" @@ -50,7 +115,7 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "abb749745461743bb477fba3ef87c663d5965876155c676c9489cfe0963de5ab" dependencies = [ - "env_logger", + "env_logger 0.9.3", "hashbrown", "indexmap", "log", @@ -83,6 +148,16 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +[[package]] +name = "env_filter" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0" +dependencies = [ + "log", + "regex", +] + [[package]] name = "env_logger" version = "0.9.3" @@ -92,6 +167,19 @@ dependencies = [ "log", ] +[[package]] +name = "env_logger" +version = "0.11.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" +dependencies = [ + "anstream", + "anstyle", + "env_filter", + "jiff", + "log", +] + [[package]] name = "equivalent" version = "1.0.2" @@ -137,6 +225,45 @@ version = "2.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd" +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + +[[package]] +name = "jiff" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a194df1107f33c79f4f93d02c80798520551949d59dfad22b6157048a88cca93" +dependencies = [ + "jiff-static", + "log", + "portable-atomic", + "portable-atomic-util", + "serde", +] + +[[package]] +name = "jiff-static" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c6e1db7ed32c6c71b759497fae34bf7933636f75a251b9e736555da426f6442" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "js-sys" version = "0.3.77" @@ -159,6 +286,8 @@ version = "0.1.0" dependencies = [ "eggmock", "either", + "env_logger 0.11.8", + "itertools", "log", "ouroboros", "rustc-hash", @@ -171,6 +300,12 @@ version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + [[package]] name = "num-bigint" version = "0.4.6" @@ -235,6 +370,21 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "portable-atomic" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e" + +[[package]] +name = "portable-atomic-util" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" +dependencies = [ + "portable-atomic", +] + [[package]] name = "proc-macro2" version = "1.0.95" @@ -290,6 +440,35 @@ dependencies = [ "bitflags", ] +[[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + [[package]] name = "rustc-hash" version = "2.1.1" @@ -308,6 +487,26 @@ version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc" +[[package]] +name = "serde" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "smallvec" version = "1.15.0" @@ -374,6 +573,12 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + [[package]] name = "version_check" version = "0.9.5" @@ -475,6 +680,79 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + [[package]] name = "yansi" version = "1.0.1" diff --git a/rs/Cargo.toml b/rs/Cargo.toml index ba08b87..18855e3 100644 --- a/rs/Cargo.toml +++ b/rs/Cargo.toml @@ -14,6 +14,8 @@ either = "1.13.0" smallvec = "1.14.0" ouroboros = "0.18.0" log = "0.4" +env_logger = "0.11.8" +itertools = "0.14.0" [build-dependencies] # eggmock = { path = "../../eggmock" } diff --git a/rs/src/ambit/compilation.rs b/rs/src/ambit/compilation.rs index ecc4ca0..6914e65 100644 --- a/rs/src/ambit/compilation.rs +++ b/rs/src/ambit/compilation.rs @@ -65,6 +65,7 @@ pub fn compile<'a>( pub struct CompilationState<'a, 'n, P> { network: &'n P, + /// Network-Nodes whose inputs all have been computed candidates: FxHashSet<(Id, Mig)>, program: ProgramState<'a>, diff --git a/rs/src/fc_dram/architecture.rs b/rs/src/fc_dram/architecture.rs index e615d32..0406b18 100644 --- a/rs/src/fc_dram/architecture.rs +++ b/rs/src/fc_dram/architecture.rs @@ -5,22 +5,63 @@ //! RowAddress (eg via bit-shifting given bitmasks for subarray-id & row-addr to put on-top of //! RowAddress -use std::{fmt::{Display, Formatter}, sync::LazyLock}; +use std::{fmt::{Display, Formatter}, ops::Add, sync::LazyLock}; + +use itertools::Itertools; +use log::debug; /// Main variable specifying architecture of DRAM-module for which to compile for /// - this is currently just an example implementation for testing purpose; (TODO: make this /// configurable at runtime) /// TODO: add field to simulate row-decoder circuitry, needed for impl Simultaneous-row-activation /// TODO: make this configurable at runtime -static ARCHITECTURE: LazyLock = LazyLock::new(|| { +pub static ARCHITECTURE: LazyLock = LazyLock::new(|| { const NR_SUBARRAYS: i64 = 2i64.pow(7); const ROWS_PER_SUBARRAY: i64 = 2i64.pow(9); + // Implementation of the Hypothetical Row Decoder from [3] Chap4.2 + // - GWLD (Global Wordline Decoder)=decode higher bits to select addressed subarray + // - LWLD (Local Wordline Decoder)=hierarchy of decoders which decode lower bits; latches remain set when using `APA` + // - see [3] Chap4.2: nr of Predecoders in LWLD determines number & addresses of simultaneously activated rows + // - does work for the example shown in [3] Chap3.2: `APA(256,287)` activates rows `287,286,281,280,263,262,257,256` + // TODO: add overlapping of higher-order-bits (GWLD) // TODO: init architecture a run-time, eg from config file + // TODO: maybe evaluate statically? let get_activated_rows_from_apa = |row1: RowAddress, row2: RowAddress| -> Vec { - let mut activated_rows = vec!(row1, row2); - // TODO: get other activated rows and add them to `activated_rows` - activated_rows.push(123456); // TEST: add random row + // 1. Define Predecoders by defining for which of the bits they're responsible + // each Predecoder is resonsible for some of the lower order bits + let predecoder_bitmasks = vec!( + 0b110000000, // first predecoder (PE) predecodes bits[8,7] + 0b001100000, // Predecoder PD + 0b000011000, // Predecoder PC + 0b000000110, // Predecoder PB + 0b000000001, // last predecoder (PA) predecodes bits[0] + ); + + // for each predecoder store which bits will remain set due to `APA(row1,row)`: + let overlapping_bits = vec!( + // latches set by `ACT(row1)` --- latches set by `ACT(row2)` + [ row1 & predecoder_bitmasks[0], row2 & predecoder_bitmasks[0]], + [ row1 & predecoder_bitmasks[1], row2 & predecoder_bitmasks[1]], + [ row1 & predecoder_bitmasks[2], row2 & predecoder_bitmasks[2]], + [ row1 & predecoder_bitmasks[3], row2 & predecoder_bitmasks[3]], + [ row1 & predecoder_bitmasks[4], row2 & predecoder_bitmasks[4]], + ); + + let mut activated_rows = vec!(); // TODO: get other activated rows and add them to `activated_rows` + // compute all simultaneously activated rows + for i in 0..1 << predecoder_bitmasks.len() { + let activated_row = overlapping_bits.iter() + // start with all row-address bits unset (=0) and first predecoder stage (=1) + .fold((0 as RowAddress, 1), |(row, predecoder_stage_onehot), new_row_bits|{ + let bitmask_to_choose = (i & predecoder_stage_onehot) > 0; + (row | new_row_bits[bitmask_to_choose as usize], predecoder_stage_onehot << 1) + }); + activated_rows.push(activated_row.0); + } + debug!("`APA({row1},{row2})` activates the following rows simultaneously: {activated_rows:?}"); + activated_rows.dedup(); // no need for `.unique()` since this implementation adds equivalent RowAddresses one after the other (!check!!) + // NOTE: works in-place activated_rows }; @@ -65,17 +106,17 @@ pub type RowAddress = i64; pub struct FCDRAMArchitecture { /// Nr of subarrays in a DRAM module - nr_subarrays: i64, + pub nr_subarrays: i64, /// Nr of rows in a single subarray - rows_per_subarray: i64, + pub rows_per_subarray: i64, /// Returns all activated rows when issuing `APA(row1, row2)` - get_activated_rows_from_apa: fn(RowAddress, RowAddress) -> Vec, + pub get_activated_rows_from_apa: fn(RowAddress, RowAddress) -> Vec, // TODO: params for calculating distance btw row and sense-amp, ... (particularly where // sense-amps are placed within the DRAM module ?! /// Given a row-addr this returns the distance of it to the sense-amps (!determinse /// success-rate of op using that `row` as an operand) (see [1] Chap5.2) /// - NOTE: Methodology used in [1] to determine distance: RowHammer - get_distance_of_row_to_sense_amps: fn(RowAddress) -> RowDistanceToSenseAmps, + pub get_distance_of_row_to_sense_amps: fn(RowAddress) -> RowDistanceToSenseAmps, } /// Implement this trait for your specific DRAM-module to support FCDRAM-functionality @@ -139,6 +180,9 @@ pub enum RowDistanceToSenseAmps { /// and `N` rows in compute subarray /// 3. Wait for `t_{RAS}` (=overwrites activated cells in compute subarray with AND/OR-result) /// 4. Issue `PRE` to complete the operation +/// +/// Additionally RowClone-operations are added for moving data around if needed (eg if valid data +/// would be affected by following Simultaneous-Row-Activations) #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] pub enum Instruction { /// Needed for initializing neutral row in reference subarray (to set `V_{AND}`/`V_{OR}` (see @@ -150,7 +194,17 @@ pub enum Instruction { /// different subarrays. As a result `R_L` holds the negated value of `R_F` (see Chap5.1 of /// PaperFunctionally Complete DRAMs /// Used to implement NOT directly - APA(RowAddress,RowAddress), + APA(RowAddress,RowAddress), // TODO: Rename to SimultaneousRowActivation or sth the like ? + /// Fast-Parallel-Mode RowClone for cloning row-data within same subarray + /// - corresponds to `AA`, basically copies from src-row -> row-buffer -> dst-row + /// - first operand=src, 2nd operand=dst where `src` and `dst` MUST reside in the same subarray ! + RowCloneFPM(RowAddress, RowAddress), + /// Copies data from src (1st operand) to dst (2nd operand) using RowClonePSM, which copies the + /// data from `this_bank(src_row) -> other_bank(rowX) -> this_bank(dst_row)` (where + /// `other_bank` might be any other bank). Since this copy uses the internal DRAM-bus it works + /// on cacheline-granularity (64B) which might take some time for 8KiB rows... + /// - see [4] Chap3.3 for `TRANSFER`-instruction + RowClonePSM(RowAddress, RowAddress), } impl Display for Instruction { @@ -158,6 +212,11 @@ impl Display for Instruction { let description = match self { Instruction::FracOp(row) => format!("AP({row})"), Instruction::APA(row1,row2) => format!("APA({row1},{row2})"), + Instruction::RowCloneFPM(row1,row2) => format!("AA({row1},{row2})"), + Instruction::RowClonePSM(row1,row2) => format!(" + TRANSFER({row1},(rowX)) + TANSFER(rowX,{row2}) + "), }; write!(f, "{}", description) } @@ -196,6 +255,15 @@ impl Instruction { // ) -> impl Iterator { todo!() } + + pub fn get_nr_memcycles(&self) -> u16 { + match self { + Instruction::FracOp(__) => 7, // see [2] ChapIII.A, (two cmd-cycles + five idle cycles) + Instruction::APA(_, _) => 3, // NOTE: this is not explicitly written in the paper, TODO: check with authors + Instruction::RowCloneFPM(_, _) => 2, // see [4] Chap3.2 + Instruction::RowClonePSM(_, _) => 256, // =(8192B/64B)*2 (*2 since copies two time, to and from `` on 64B-granularity + } + } } /// Contains logical operations which are supported (natively) on FCDRAM-Architecture diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs index c6be2e0..dbbccb7 100644 --- a/rs/src/fc_dram/compiler.rs +++ b/rs/src/fc_dram/compiler.rs @@ -3,42 +3,118 @@ //! into a [`program`] using some [`optimization`] use super::{ - architecture::{FCDRAMArchitecture, Instruction}, optimization::optimize, Program, ProgramState, RowAddress + architecture::{FCDRAMArchitecture, Instruction, ARCHITECTURE}, optimization::optimize, Program, ProgramState, RowAddress }; -use eggmock::{Id, Aig, Node, NetworkWithBackwardEdges, Signal}; +use eggmock::{Aig, ComputedNetworkWithBackwardEdges, Id, NetworkWithBackwardEdges, Node, Signal}; +use log::debug; use rustc_hash::{FxHashMap, FxHashSet}; -use std::cmp::max; +use std::collections::HashMap; + + +/// Places given signal onto rows (modifying `comp_state`) +/// - NOTE: `signals` are assumed to be activated together during execution and are hence placed s.t. minimal nr of other rows holding valid data are simultaneously activated +/// - [ ] make sure that operation to be executed on those rows won't simultaneously activate other rows holding valid data which will be used by future operations +pub fn place_signals_onto_rows( + comp_state: &mut impl NetworkWithBackwardEdges, signals: Vec +) { + +} /// Compiles given `network` intto a FCDRAM-[`Program`] that can be run on given `architecture` +/// - [ ] TODO: output in which rows +/// - 1) data is expected to be placed before program runs +/// - 2) outputs can be found after the program has run pub fn compile( network: &impl NetworkWithBackwardEdges, ) -> Program { - let (outputs, leaves) = (network.outputs(), network.leaves()); - Program { - instructions: vec!(Instruction::FracOp(-1)) , + // 0. Setup: store all network-nodes yet to be compiled + let comp_state = CompilationState::new(network); // initializes `.candidates()` with inputs + nodes whose src-operands are all inputs + let program = Program::new(vec!()); + + let outputs = network.outputs(); + + // TODO: how to get src-operands of `outputs` ?? + // debug!("{:?}", network.node_outputs(outputs.next()).collect()); + // TODO: get src-operands of outputs and place them appropriately (with knowledge about output + // operands!) + + let inputs = network.leaves(); + + + // println!("{:?}", network.outputs().collect::>()); + debug!("Nodes in network:"); + for node in network.iter() { + debug!("{:?},", node); } + + while !comp_state.candidates.is_empty() { + // 1. Make sure rows are placed appropriately (starting from inputs) + + // 1.1 Determine in which rows src-operands for the next candidate-op are located + + // 1.2 Check if issuing `APA(src1,src2)` would activate other rows which hold valid data + // 1.2.1 if yes: move data to other rows for performing this op + + // 1.3 Prepare performing the actual op (setup reference subarray) + // 1.3.1 If activated rows in reference subarray holds valid data: spill to other rows + + // 1.4 Issue actual operation + } + + // let (outputs, leaves) = (network.outputs(), network.leaves()); + + // Program { + // instructions: vec!(Instruction::FracOp(-1)) , + // } // todo!() // optimize(&mut program); - // program + program +} + +pub struct Compiler { + // comp_state: CompilationState<'n, N>, +} + +impl Compiler { + fn new() -> Self { + Compiler{} + } + + fn compile() -> Program { + Program { instructions: vec!() } + } + + /// Place inputs onto appropriate rows + /// TODO: algo which looks ahead which input-row-placement might be optimal (->reduce nr of + /// move-ops to move intermediate results around & keep inputs close to sense-amps + fn init_rows_with_inputs() { + + } } -pub struct CompilationState<'n, P> { - /// Network (P=Provider, obsolte naming) - network: &'n P, +/// Keep track of current progress of the compilation (eg which rows are used, into which rows data is placed, ...) +pub struct CompilationState<'n, N> { + /// Logic-Network which is to be compiled + network: &'n N, + /// Signals whose inputs have already been calculated and are currently placed in some row in + /// the DRAM module (see [`Self::signal_to_row_mapping`]) candidates: FxHashSet<(Id, Aig)>, - program: ProgramState, - outputs: FxHashMap, - leftover_use_count: FxHashMap, + /// Stores which intermediate results are stored at which row-addresses + /// - ✓ use to determine at which row-addresses output-data can be found + signal_to_row_mapping: HashMap, + /// Rows which are free (don't hold data which is still needed) + free_rows: Vec } impl<'n, P: NetworkWithBackwardEdges> CompilationState<'n, P> { + /// initializes `self..candidates()` with inputs + nodes whose src-operands are all inputs pub fn new(network: &'n P) -> Self { let mut candidates = FxHashSet::default(); // check all parents of leaves whether they have only leaf children, in which case they are - // candidates - for leaf in network.leaves() { + // candidates (since all of their inputs are calculated then) + for leaf in network.leaves() { // =inputs for candidate_id in network.node_outputs(leaf) { let candidate = network.node(candidate_id); if candidate @@ -50,20 +126,19 @@ impl<'n, P: NetworkWithBackwardEdges> CompilationState<'n, P> { } } } - let program = ProgramState::new(network); - let outputs = network - .outputs() - .enumerate() - .map(|(id, sig)| (sig.node_id(), (id as u64, sig))) - .collect(); + // let outputs = network + // .outputs() + // .enumerate() + // .map(|(id, sig)| (sig.node_id(), (id as i64, sig))) + // .collect(); + let total_nr_rows_in_dram_module = ARCHITECTURE.nr_subarrays; Self { network, candidates, - program, - outputs, - leftover_use_count: FxHashMap::default(), + signal_to_row_mapping: HashMap::new(), + free_rows: (0..=total_nr_rows_in_dram_module).collect(), // we start with all rows being free at the beginning } } } diff --git a/rs/src/fc_dram/egraph_extraction.rs b/rs/src/fc_dram/egraph_extraction.rs index cbc3151..7853a7b 100644 --- a/rs/src/fc_dram/egraph_extraction.rs +++ b/rs/src/fc_dram/egraph_extraction.rs @@ -32,21 +32,23 @@ impl CostFunction for CompilingCostFunction { /// - NOT= /// /// TODO: NEXT + /// - [ ] Subgraph direkt kompilieren ?? fn cost(&mut self, enode: &AigLanguage, mut cost_fn: C) -> Self::Cost where C: FnMut(Id) -> Self::Cost, { let cost = match enode { - AigLanguage::False => 1, + AigLanguage::False => 0, AigLanguage::Input(_node) => { // FCDRAMArchitecture::get_distance_of_row_to_sense_amps(&self, row) + // TODO: make cost depend on data-pattern of input? 2 }, AigLanguage::And([_node1, _node2]) => { 3 }, AigLanguage::Not(_node) => { - 4 + 1 }, }; @@ -67,6 +69,7 @@ impl CostFunction for CompilingCostFunction { // } else { // NotNesting::NestedNots // }; + // // // CompilingCost::with_children(self.architecture, root, iter::once((*id, cost)), nesting) // } // AigLanguage::Maj(children) => CompilingCost::with_children( diff --git a/rs/src/fc_dram/mod.rs b/rs/src/fc_dram/mod.rs index 6bf3251..ff3d164 100644 --- a/rs/src/fc_dram/mod.rs +++ b/rs/src/fc_dram/mod.rs @@ -5,6 +5,7 @@ //! - [1] Functionally-Complete Boolean Logic in Real DRAM Chips: Experimental Characterization and Analysis, 2024 //! - [2] FracDRAM: Fractional Values in Off-the-Shelf DRAM, 2022 //! - [3] PULSAR: Simultaneous Many-Row Activation for Reliable and High-Performance Computing in Off-the-Shelf DRAM Chips, 2024 +//! - [4] RowClone: fast and energy-efficient in-DRAM bulk data copy and initialization, 2013 //! //! # Submodules //! @@ -14,12 +15,12 @@ //! - [`optimization`] - applies architecture-specific optimizations to generated program (TODO: don't use here but in MLIR instead) //! - [ ] [`program`] //! - [`utils`] - utilities (helper macros/...) -mod architecture; -mod compiler; -mod egraph_extraction; -mod optimization; -mod program; -mod utils; +pub mod architecture; +pub mod compiler; +pub mod egraph_extraction; +pub mod optimization; +pub mod program; +pub mod utils; use std::sync::LazyLock; use std::time::Instant; @@ -31,9 +32,9 @@ use self::egraph_extraction::CompilingCostFunction; use eggmock::egg::{rewrite, EGraph, Extractor, Id, Rewrite, Runner}; use eggmock::{ - Aig, AigLanguage, AigReceiverFFI, Network, Receiver, ReceiverFFI, Rewriter, - RewriterFFI, // TODO: add AOIG-rewrite (bc FC-DRAM supports AND&OR natively)? + Aig, AigLanguage, AigReceiverFFI, Network, NetworkWithBackwardEdges, Receiver, ReceiverFFI, Rewriter, RewriterFFI, Signal // TODO: add AOIG-rewrite (bc FC-DRAM supports AND&OR natively)? }; +use log::{debug, logger}; use program::*; use architecture::*; @@ -45,6 +46,10 @@ static REWRITE_RULES: LazyLock>> = LazyLock::new(|| rewrite!("commute-and"; "(and ?a ?b)" => "(and ?b ?a)"), rewrite!("and-1"; "(and ?a 1)" => "?a"), rewrite!("and-0"; "(and ?a 0)" => "0"), + rewrite!("and-or"; "(! (or (! ?a) (! ?b)))" => "(and ?a ?b)"), // (De-Morgan) ! not checked whether this works + rewrite!("or-and"; "(! (and (! ?a) (! ?b)))" => "(or ?a ?b)" ), // (De-Morgan) ! not checked whether this works + // rewrite!("and-or"; "(and ?a ?b)" => "(! (or (! ?a) (! ?b)))"), // (De-Morgan) ! not checked whether this works + // rewrite!("or-and"; "(or ?a ?b)" => "(! (and (! ?a) (! ?b)))"), // (De-Morgan) ! not checked whether this works rewrite!("and-same"; "(and ?a ?a)" => "?a"), rewrite!("not_not"; "(! (! ?a))" => "?a"), // rewrite!("maj_1"; "(maj ?a ?a ?b)" => "?a"), @@ -64,12 +69,12 @@ struct CompilerOutput { /// (, output-nodes) #[borrows(graph)] #[covariant] + /// A network consists of nodes (accessed via `Extractor` and separately stored `outputs` (`Vec`) ntk: ( - Extractor<'this, CompilingCostFunction, AigLanguage, ()>, // `'this`=self-reference - Vec, + Extractor<'this, CompilingCostFunction, AigLanguage, ()>, // `'this`=self-reference, used to extract best-node from `E-Class` of `AigLanguage`-nodes based on `CompilingCostFunction` + Vec, // vector of outputs ), - /// Compiled Program - /// Program is compiled using previously (EGraph-)extracted `ntk` + /// Compiled Program Program is compiled using previously (EGraph-)extracted `ntk` #[borrows(ntk)] program: Program, } @@ -82,25 +87,33 @@ fn compiling_receiver<'a>( settings: CompilerSettings, ) -> impl Receiver + use<'a> { // REMINDER: EGraph implements `Receiver` + // TODO: deactivate e-graph rewriting, focus on compilation first EGraph::::new(()) - .map(move |(graph, outputs)| { // `.map()` of `Provider`-trait: + .map(move |(graph, outputs)| { // `.map()` of `Provider`-trait!, outputs=vector of EClasses + debug!("Input EGraph nodes: {:?}", graph.nodes()); + debug!("Input EGraph's EClasses : {:?}", graph.classes() + .map(|eclass| (eclass.id, &eclass.nodes) ) + .collect::)>>() + ); // 1. Create E-Graph: run equivalence saturation - let runner = measure_time!(Runner::default().with_egraph(graph).run(rules), "t_runner", settings.print_compilation_stats ); - - - if settings.verbose { - println!("== Runner Report"); - runner.print_report(); - } - - let graph = runner.egraph; + // debug("Running equivalence saturation..."); + // let runner = measure_time!(Runner::default().with_egraph(graph).run(rules), "t_runner", settings.print_compilation_stats ); + // + // + // if settings.verbose { + // println!("== Runner Report"); + // runner.print_report(); + // } + // + // let graph = runner.egraph; CompilerOutput::new( graph, |graph| { // 2. Given E-Graph: Retrieve best graph using custom `CompilingCostFunction` + debug!("Extracting..."); let extractor = measure_time!( Extractor::new( graph, @@ -108,16 +121,30 @@ fn compiling_receiver<'a>( ), // TODO: provide CostFunction !! "t_extractor", settings.print_compilation_stats ); + debug!("Outputs: {outputs:?}"); (extractor, outputs) // produce `ntk` }, |ntk| { // ===== MAIN CALL (actual compilation) ===== // 3. Compile program using extracted network + + debug!("Compiling..."); + debug!("Network outputs: {:?}", ntk.outputs().collect::>()); + let ntk_with_backward_edges = ntk.with_backward_edges(); + debug!("Network Leaves: {:?}", ntk_with_backward_edges.leaves().collect::>()); + debug!("Network Outputs of first leaf: {:?}", + ntk_with_backward_edges.node_outputs( + ntk_with_backward_edges.leaves().next().unwrap() + ).collect::>() + ); + // debug!("" let program = measure_time!( - compile(&ntk.with_backward_edges()), "t_compiler", settings.print_compilation_stats + compile(&ntk_with_backward_edges), "t_compiler", settings.print_compilation_stats ); // ===================== - // print program if compiler-setting is set + + // print program if compiler-setting is set + // TOOD: write program to output-file instead !! if settings.print_program || settings.verbose { if settings.verbose { println!("== Program") @@ -191,6 +218,7 @@ struct CompilerStatistics { extern "C" fn fcdram_compile(settings: CompilerSettings) -> AigReceiverFFI { // todo!() // TODO: create example `ARCHITECTURE` implementing `FCDRAMArchitecture` + env_logger::init(); let receiver = compiling_receiver(REWRITE_RULES.as_slice(), settings) .map(|output| { diff --git a/rs/src/fc_dram/program.rs b/rs/src/fc_dram/program.rs index 073e822..6d8ff98 100644 --- a/rs/src/fc_dram/program.rs +++ b/rs/src/fc_dram/program.rs @@ -86,10 +86,7 @@ impl From for Program { impl Display for Program { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { for instr in &self.instructions { - match instr { - Instruction::FracOp(row) => write!(f, "AP({row})")?, // TODO: repeat this by the factor set in compiler-settings - Instruction::APA(row1, row2, ) => write!(f, "APA({row1},{row2})")?, - } + write!(f, "{}", instr)?; } Ok(()) } diff --git a/rs/src/lib.rs b/rs/src/lib.rs index 7144b8c..35503b7 100644 --- a/rs/src/lib.rs +++ b/rs/src/lib.rs @@ -1,4 +1,4 @@ #![allow(clippy::upper_case_acronyms)] -mod ambit; -mod fc_dram; +pub mod ambit; +pub mod fc_dram; From 5befa0cb1b0cc630927bea289250255b89a2dc60 Mon Sep 17 00:00:00 2001 From: alku662e Date: Fri, 23 May 2025 11:20:00 +0200 Subject: [PATCH 11/51] WIP --- rs/src/fc_dram/compiler.rs | 14 ++++++++++++-- rs/src/fc_dram/mod.rs | 5 +++-- rs/src/fc_dram/program.rs | 8 ++++++++ 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs index dbbccb7..e0fb9df 100644 --- a/rs/src/fc_dram/compiler.rs +++ b/rs/src/fc_dram/compiler.rs @@ -21,6 +21,14 @@ pub fn place_signals_onto_rows( } /// Compiles given `network` intto a FCDRAM-[`Program`] that can be run on given `architecture` +/// +/// General Procedure of compilation +/// 1) Map Logical-Ops to FCDRAM-Primitives (operating on virtual rows) +/// 2) Map virtual rows to actual physical rows (spilling/moving rows if necessary using `RowClone`) +/// - similarly to Register Allocation +/// +/// - [ ] TODO: increase success-rate using input replication ? at which point to add input replication? +/// /// - [ ] TODO: output in which rows /// - 1) data is expected to be placed before program runs /// - 2) outputs can be found after the program has run @@ -28,6 +36,7 @@ pub fn compile( network: &impl NetworkWithBackwardEdges, ) -> Program { + // debug!("Compiling {:?}", network); // 0. Setup: store all network-nodes yet to be compiled let comp_state = CompilationState::new(network); // initializes `.candidates()` with inputs + nodes whose src-operands are all inputs let program = Program::new(vec!()); @@ -39,8 +48,9 @@ pub fn compile( // TODO: get src-operands of outputs and place them appropriately (with knowledge about output // operands!) - let inputs = network.leaves(); - + // start with inputs + let primary_inputs = network.leaves(); + debug!("Primary inputs: {:?}", primary_inputs.collect::>()); // println!("{:?}", network.outputs().collect::>()); debug!("Nodes in network:"); diff --git a/rs/src/fc_dram/mod.rs b/rs/src/fc_dram/mod.rs index ff3d164..ffa94d6 100644 --- a/rs/src/fc_dram/mod.rs +++ b/rs/src/fc_dram/mod.rs @@ -46,8 +46,9 @@ static REWRITE_RULES: LazyLock>> = LazyLock::new(|| rewrite!("commute-and"; "(and ?a ?b)" => "(and ?b ?a)"), rewrite!("and-1"; "(and ?a 1)" => "?a"), rewrite!("and-0"; "(and ?a 0)" => "0"), - rewrite!("and-or"; "(! (or (! ?a) (! ?b)))" => "(and ?a ?b)"), // (De-Morgan) ! not checked whether this works - rewrite!("or-and"; "(! (and (! ?a) (! ?b)))" => "(or ?a ?b)" ), // (De-Morgan) ! not checked whether this works + // TODO: first add `AOIG`-language and add conversion AOIG<->AIG (so mockturtle's aig can still be used underneath) + // rewrite!("and-or"; "(! (or (! ?a) (! ?b)))" => "(and ?a ?b)"), // (De-Morgan) ! not checked whether this works + // rewrite!("or-and"; "(! (and (! ?a) (! ?b)))" => "(or ?a ?b)" ), // (De-Morgan) ! not checked whether this works // rewrite!("and-or"; "(and ?a ?b)" => "(! (or (! ?a) (! ?b)))"), // (De-Morgan) ! not checked whether this works // rewrite!("or-and"; "(or ?a ?b)" => "(! (and (! ?a) (! ?b)))"), // (De-Morgan) ! not checked whether this works rewrite!("and-same"; "(and ?a ?a)" => "?a"), diff --git a/rs/src/fc_dram/program.rs b/rs/src/fc_dram/program.rs index 6d8ff98..58c850f 100644 --- a/rs/src/fc_dram/program.rs +++ b/rs/src/fc_dram/program.rs @@ -4,6 +4,7 @@ use super::architecture::{FCDRAMArchitecture, RowAddress}; use crate::fc_dram::architecture::Instruction; use eggmock::{Id, Aig, NetworkWithBackwardEdges, Signal}; +use std::collections::HashMap; use std::fmt::{Display, Formatter}; use std::ops::{Deref, DerefMut}; @@ -11,6 +12,11 @@ use std::ops::{Deref, DerefMut}; #[derive(Debug, Clone)] pub struct Program { pub instructions: Vec, + /// Specifies where row-operands should be placed prior to calling this program + /// (This is a convention which tells the user of this lib where the data should be placed within the DRAM before executing this program) + pub input_row_operands_placementl: HashMap, + /// Specifies into which rows output-operands will have been placed after the program has run successfully + pub output_row_operands_placementl: HashMap, } #[derive(Debug, Clone)] @@ -24,6 +30,8 @@ impl Program { pub fn new(instructions: Vec) -> Self { Self { instructions, + input_row_operands_placementl: HashMap::new(), + output_row_operands_placementl: HashMap::new(), } } } From b3fcdcc01975c931d9122b9f664814b2a43d047d Mon Sep 17 00:00:00 2001 From: alku662e Date: Tue, 27 May 2025 23:37:08 +0200 Subject: [PATCH 12/51] WIP: Working on simultaneous row activation, preparing Row Allocation Strategies --- rs/src/fc_dram/architecture.rs | 56 ++++++++++++++++++++++------- rs/src/fc_dram/compiler.rs | 10 ++++-- rs/src/fc_dram/egraph_extraction.rs | 5 ++- rs/src/fc_dram/mod.rs | 6 ++++ src/fcdram.h | 1 + src/main.cpp | 1 + 6 files changed, 63 insertions(+), 16 deletions(-) diff --git a/rs/src/fc_dram/architecture.rs b/rs/src/fc_dram/architecture.rs index 0406b18..5f6e528 100644 --- a/rs/src/fc_dram/architecture.rs +++ b/rs/src/fc_dram/architecture.rs @@ -2,17 +2,16 @@ //! - [`FCDRAMArchitecture`] = DRAM-module-specific specific implementation of FCDRAMArchitecture //! - [`Instruction`] = contains all instructions supported by FC-DRAM architecture //! - [ ] `RowAddress`: utility functions to get subarray-id and row-addr within that subarray from -//! RowAddress (eg via bit-shifting given bitmasks for subarray-id & row-addr to put on-top of -//! RowAddress +//! +//! RowAddress (eg via bit-shifting given bitmasks for subarray-id & row-addr to put on-top of RowAddress -use std::{fmt::{Display, Formatter}, ops::Add, sync::LazyLock}; +use std::{collections::{HashMap, HashSet}, fmt::{Display, Formatter}, sync::LazyLock}; -use itertools::Itertools; use log::debug; /// Main variable specifying architecture of DRAM-module for which to compile for -/// - this is currently just an example implementation for testing purpose; (TODO: make this -/// configurable at runtime) +/// - this is currently just an example implementation for testing purpose; (TODO: make this configurable at runtime) +/// /// TODO: add field to simulate row-decoder circuitry, needed for impl Simultaneous-row-activation /// TODO: make this configurable at runtime pub static ARCHITECTURE: LazyLock = LazyLock::new(|| { @@ -30,23 +29,23 @@ pub static ARCHITECTURE: LazyLock = LazyLock::new(|| { let get_activated_rows_from_apa = |row1: RowAddress, row2: RowAddress| -> Vec { // 1. Define Predecoders by defining for which of the bits they're responsible // each Predecoder is resonsible for some of the lower order bits - let predecoder_bitmasks = vec!( + let predecoder_bitmasks = [ 0b110000000, // first predecoder (PE) predecodes bits[8,7] 0b001100000, // Predecoder PD 0b000011000, // Predecoder PC 0b000000110, // Predecoder PB 0b000000001, // last predecoder (PA) predecodes bits[0] - ); + ]; // for each predecoder store which bits will remain set due to `APA(row1,row)`: - let overlapping_bits = vec!( + let overlapping_bits = [ // latches set by `ACT(row1)` --- latches set by `ACT(row2)` [ row1 & predecoder_bitmasks[0], row2 & predecoder_bitmasks[0]], [ row1 & predecoder_bitmasks[1], row2 & predecoder_bitmasks[1]], [ row1 & predecoder_bitmasks[2], row2 & predecoder_bitmasks[2]], [ row1 & predecoder_bitmasks[3], row2 & predecoder_bitmasks[3]], [ row1 & predecoder_bitmasks[4], row2 & predecoder_bitmasks[4]], - ); + ]; let mut activated_rows = vec!(); // TODO: get other activated rows and add them to `activated_rows` // compute all simultaneously activated rows @@ -59,10 +58,12 @@ pub static ARCHITECTURE: LazyLock = LazyLock::new(|| { }); activated_rows.push(activated_row.0); } - debug!("`APA({row1},{row2})` activates the following rows simultaneously: {activated_rows:?}"); + // debug!("`APA({row1},{row2})` activates the following rows simultaneously: {activated_rows:?}"); activated_rows.dedup(); // no need for `.unique()` since this implementation adds equivalent RowAddresses one after the other (!check!!) // NOTE: works in-place - activated_rows + + // remove duplicate entries + activated_rows.into_iter().collect::>().into_iter().collect() }; let get_distance_of_row_to_sense_amps = |row: RowAddress| -> RowDistanceToSenseAmps { @@ -93,10 +94,29 @@ pub static ARCHITECTURE: LazyLock = LazyLock::new(|| { } }; + let mut precomputed_simultaneous_row_activations = HashMap::new(); + for i in 0..ROWS_PER_SUBARRAY { + precomputed_simultaneous_row_activations.insert((i,i), vec!(i)); // special case: no other row is activated when executing `APA(r1,r1)` + for j in i+1..ROWS_PER_SUBARRAY { + let activated_rows = get_activated_rows_from_apa(i, j); + precomputed_simultaneous_row_activations.insert((i,j), activated_rows.clone()); + precomputed_simultaneous_row_activations.insert((j,i), activated_rows); + } + } + debug!("Precomputed SRAs: {:#?}", precomputed_simultaneous_row_activations.iter().take(20).collect::>()); + + let precomputed_activated_rows_nr_to_row_address_tuple_mapping= precomputed_simultaneous_row_activations.iter().fold(HashMap::new(), |mut acc: HashMap>, (key, vec)| { + acc.entry(vec.len() as u8).or_default().push(*key); + acc + }); + debug!("SRAs row-nr to row-addr mapping: {:#?}", precomputed_activated_rows_nr_to_row_address_tuple_mapping.iter().map(|(k,v)| format!("{k} rows activated in {} addr-combinations", v.len())).collect::>()); + FCDRAMArchitecture { nr_subarrays: NR_SUBARRAYS, rows_per_subarray: ROWS_PER_SUBARRAY, get_activated_rows_from_apa, + precomputed_simultaneous_row_activations, + precomputed_activated_rows_nr_to_row_address_tuple_mapping, get_distance_of_row_to_sense_amps, } }); @@ -110,7 +130,16 @@ pub struct FCDRAMArchitecture { /// Nr of rows in a single subarray pub rows_per_subarray: i64, /// Returns all activated rows when issuing `APA(row1, row2)` - pub get_activated_rows_from_apa: fn(RowAddress, RowAddress) -> Vec, + /// - NOTE: `row1`,`row2` are expected to reside in adjacent subarrays + /// - NOTE: the simultaneously activated rows are expected to have the same addresses in both subarrays + /// - eg `APA(11,29)` (with 1st digit=subarray-id, 2nd digit=row-id) could simultaneously activate rows `0,1,7,9` in subarray1 and subarray2 + get_activated_rows_from_apa: fn(RowAddress, RowAddress) -> Vec, + /// Stores which rows are simultaneously activated for each combination of Row-Addresses (provided to `APA`-operation) + /// - REASON: getting the simultaneously activated will probably be requested very frequently (time-space tradeoff, rather than recomputing on every request)) + pub precomputed_simultaneous_row_activations: HashMap<(RowAddress, RowAddress), Vec>, + /// For each nr of activated rows get which tuple of row-addresses activate the given nr of rows + /// - use to eg restrict the choice of row-addresses for n-ary AND/OR (eg 4-ary AND -> at least activate 8 rows; more rows could be activated when using input replication) + pub precomputed_activated_rows_nr_to_row_address_tuple_mapping: HashMap>, // TODO: params for calculating distance btw row and sense-amp, ... (particularly where // sense-amps are placed within the DRAM module ?! /// Given a row-addr this returns the distance of it to the sense-amps (!determinse @@ -227,6 +256,7 @@ impl Display for Instruction { impl Instruction { /// Return Addreses of Rows which are used by this instruction (=operand-rows AND result-row) /// - REMINDER: although only two row-operands are given to `APA`, more rows can be/are affected due to *Simultaneous Row Activation* (see [3]) + /// /// TODO pub fn used_addresses( &self, diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs index e0fb9df..1350326 100644 --- a/rs/src/fc_dram/compiler.rs +++ b/rs/src/fc_dram/compiler.rs @@ -92,7 +92,11 @@ impl Compiler { } fn compile() -> Program { - Program { instructions: vec!() } + Program { + instructions: vec!(), + input_row_operands_placementl: HashMap::new(), + output_row_operands_placementl: HashMap::new(), + } } /// Place inputs onto appropriate rows @@ -115,7 +119,9 @@ pub struct CompilationState<'n, N> { /// - ✓ use to determine at which row-addresses output-data can be found signal_to_row_mapping: HashMap, /// Rows which are free (don't hold data which is still needed) - free_rows: Vec + free_rows: Vec, + /// + row_states: [i32; ARCHITECTURE.nr_subarrays as usize ] } impl<'n, P: NetworkWithBackwardEdges> CompilationState<'n, P> { diff --git a/rs/src/fc_dram/egraph_extraction.rs b/rs/src/fc_dram/egraph_extraction.rs index 7853a7b..dccd172 100644 --- a/rs/src/fc_dram/egraph_extraction.rs +++ b/rs/src/fc_dram/egraph_extraction.rs @@ -42,11 +42,14 @@ impl CostFunction for CompilingCostFunction { AigLanguage::Input(_node) => { // FCDRAMArchitecture::get_distance_of_row_to_sense_amps(&self, row) // TODO: make cost depend on data-pattern of input? - 2 + 0 }, AigLanguage::And([_node1, _node2]) => { + // TODO: get mapping of AND to FCDRAM-Primitives and get how many mem-cycles they take 3 }, + // TODO: increase cost of NOT? (since it moves the value to another subarray!) + // eg prefer `OR(a,b)` to `NOT(AND( NOT(a), NOT(b)))` AigLanguage::Not(_node) => { 1 }, diff --git a/rs/src/fc_dram/mod.rs b/rs/src/fc_dram/mod.rs index ffa94d6..ed57a9c 100644 --- a/rs/src/fc_dram/mod.rs +++ b/rs/src/fc_dram/mod.rs @@ -174,6 +174,12 @@ struct CompilerSettings { min_success_rate: u64, // /// Location to config-file holding fcdram-specific configs // fcdram_config_file: Path, + + /// Nr of rows to use as a safe space for operands per subarray + /// - REMINDER: after `AND`/`OR`-ops the src-operands are overwritten by the op-result, so to reuse operands they're put into specially designated rows (="safe-space") which won't be overwritten + /// - Ops reusing those operands have to clone the values from the safe-space prior to issuing the Op + /// - NOTE: rows which are used as safe-space are determined by analyzing patterns in Simultaneous-row activation for the specific architecture (to ensure that safe-space rows won't be activated on any combination of row-addresses) + safe_space_rows_per_subarray: u16, } struct FCDramRewriter(CompilerSettings); diff --git a/src/fcdram.h b/src/fcdram.h index 6619355..35f9e85 100644 --- a/src/fcdram.h +++ b/src/fcdram.h @@ -23,6 +23,7 @@ extern "C" bool print_program; bool verbose; bool print_compilation_stats; + uint16_t safe_space_rows_per_subarray; }; eggmock::aig_receiver fcdram_rewriter( fcdram_compiler_settings settings ); diff --git a/src/main.cpp b/src/main.cpp index 4dcdd6b..b842ed0 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -36,6 +36,7 @@ void run_fcdram_example(aig_network in) .print_program = true, .verbose = true, .print_compilation_stats = true, + .safe_space_rows_per_subarray = 16, } ) ); // std::cout << "IC:" << result.instruction_count << std::endl; // std::cout << "t1:" << result.t_runner << std::endl; From fee587df126973097c906b55138d3695a0079cd9 Mon Sep 17 00:00:00 2001 From: alku662e Date: Wed, 4 Jun 2025 10:44:49 +0200 Subject: [PATCH 13/51] WIP: started implementing instruction scheduling and row allocation --- rs/src/fc_dram/architecture.rs | 33 +- rs/src/fc_dram/compiler.rs | 489 +++++++++++++++++++++------- rs/src/fc_dram/egraph_extraction.rs | 2 +- rs/src/fc_dram/mod.rs | 13 +- rs/src/fc_dram/optimization.rs | 8 +- 5 files changed, 413 insertions(+), 132 deletions(-) diff --git a/rs/src/fc_dram/architecture.rs b/rs/src/fc_dram/architecture.rs index 5f6e528..7cdd19c 100644 --- a/rs/src/fc_dram/architecture.rs +++ b/rs/src/fc_dram/architecture.rs @@ -9,14 +9,16 @@ use std::{collections::{HashMap, HashSet}, fmt::{Display, Formatter}, sync::Lazy use log::debug; +pub const NR_SUBARRAYS: i64 = 2i64.pow(7); +pub const ROWS_PER_SUBARRAY: i64 = 2i64.pow(9); /// Main variable specifying architecture of DRAM-module for which to compile for /// - this is currently just an example implementation for testing purpose; (TODO: make this configurable at runtime) /// /// TODO: add field to simulate row-decoder circuitry, needed for impl Simultaneous-row-activation /// TODO: make this configurable at runtime pub static ARCHITECTURE: LazyLock = LazyLock::new(|| { - const NR_SUBARRAYS: i64 = 2i64.pow(7); - const ROWS_PER_SUBARRAY: i64 = 2i64.pow(9); + + let mut row_activated_by_rowaddress_tuple: HashMap> = HashMap::new(); // for each row store which RowAddress-combinations activate it // Implementation of the Hypothetical Row Decoder from [3] Chap4.2 // - GWLD (Global Wordline Decoder)=decode higher bits to select addressed subarray @@ -61,7 +63,6 @@ pub static ARCHITECTURE: LazyLock = LazyLock::new(|| { // debug!("`APA({row1},{row2})` activates the following rows simultaneously: {activated_rows:?}"); activated_rows.dedup(); // no need for `.unique()` since this implementation adds equivalent RowAddresses one after the other (!check!!) // NOTE: works in-place - // remove duplicate entries activated_rows.into_iter().collect::>().into_iter().collect() }; @@ -94,35 +95,45 @@ pub static ARCHITECTURE: LazyLock = LazyLock::new(|| { } }; + // precompute things based on given SRA (simultaneous row activation function) let mut precomputed_simultaneous_row_activations = HashMap::new(); for i in 0..ROWS_PER_SUBARRAY { precomputed_simultaneous_row_activations.insert((i,i), vec!(i)); // special case: no other row is activated when executing `APA(r1,r1)` for j in i+1..ROWS_PER_SUBARRAY { let activated_rows = get_activated_rows_from_apa(i, j); precomputed_simultaneous_row_activations.insert((i,j), activated_rows.clone()); - precomputed_simultaneous_row_activations.insert((j,i), activated_rows); + precomputed_simultaneous_row_activations.insert((j,i), activated_rows.clone()); + + for row in activated_rows { + row_activated_by_rowaddress_tuple.entry(row) + .or_default() + .insert((i,j)); + } } } - debug!("Precomputed SRAs: {:#?}", precomputed_simultaneous_row_activations.iter().take(20).collect::>()); + // debug!("Precomputed SRAs: {:#?}", precomputed_simultaneous_row_activations.iter().take(20).collect::>()); - let precomputed_activated_rows_nr_to_row_address_tuple_mapping= precomputed_simultaneous_row_activations.iter().fold(HashMap::new(), |mut acc: HashMap>, (key, vec)| { + let sra_degree_to_rowaddress_combinations= precomputed_simultaneous_row_activations.iter().fold(HashMap::new(), |mut acc: HashMap>, (key, vec)| { acc.entry(vec.len() as u8).or_default().push(*key); acc }); - debug!("SRAs row-nr to row-addr mapping: {:#?}", precomputed_activated_rows_nr_to_row_address_tuple_mapping.iter().map(|(k,v)| format!("{k} rows activated in {} addr-combinations", v.len())).collect::>()); + // output how many combinations of row-addresses activate the given nr of rows + debug!("SRAs row-nr to row-addr mapping: {:#?}", sra_degree_to_rowaddress_combinations.iter().map(|(k,v)| format!("{k} rows activated in {} addr-combinations", v.len())).collect::>()); FCDRAMArchitecture { nr_subarrays: NR_SUBARRAYS, rows_per_subarray: ROWS_PER_SUBARRAY, get_activated_rows_from_apa, precomputed_simultaneous_row_activations, - precomputed_activated_rows_nr_to_row_address_tuple_mapping, + row_activated_by_rowaddress_tuple, + sra_degree_to_rowaddress_combinations, get_distance_of_row_to_sense_amps, } }); /// - ! must be smaller than `rows_per_subarray * nr_subarrays` (this is NOT checked!) pub type RowAddress = i64; +pub type SubarrayId = i64; pub struct FCDRAMArchitecture { /// Nr of subarrays in a DRAM module @@ -137,9 +148,11 @@ pub struct FCDRAMArchitecture { /// Stores which rows are simultaneously activated for each combination of Row-Addresses (provided to `APA`-operation) /// - REASON: getting the simultaneously activated will probably be requested very frequently (time-space tradeoff, rather than recomputing on every request)) pub precomputed_simultaneous_row_activations: HashMap<(RowAddress, RowAddress), Vec>, - /// For each nr of activated rows get which tuple of row-addresses activate the given nr of rows + /// Map degree of SRA (=nr of activated rows by that SRA) to all combinations of RowAddresses which have that degree of SRA /// - use to eg restrict the choice of row-addresses for n-ary AND/OR (eg 4-ary AND -> at least activate 8 rows; more rows could be activated when using input replication) - pub precomputed_activated_rows_nr_to_row_address_tuple_mapping: HashMap>, + pub sra_degree_to_rowaddress_combinations: HashMap>, + /// Stores for every rows which combinations of RowAddresses activate that row (needed for finding appropriate safe space rows) + pub row_activated_by_rowaddress_tuple: HashMap>, // TODO: params for calculating distance btw row and sense-amp, ... (particularly where // sense-amps are placed within the DRAM module ?! /// Given a row-addr this returns the distance of it to the sense-amps (!determinse diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs index 1350326..936f758 100644 --- a/rs/src/fc_dram/compiler.rs +++ b/rs/src/fc_dram/compiler.rs @@ -1,160 +1,421 @@ //! +//! - [`Compiler`] = performs actual compilation +//! - [`CompilationState`] = stores states encountered during compilation (eg which values reside in which rows, rows containing live values, ..) +//! - also see [`RowState`] +//! - [`SchedulingPrio`] = used to prioritize/order instruction for Instruction Scheduling +//! //! - [`compile()`] = main function - compiles given logic network for the given [`architecture`] //! into a [`program`] using some [`optimization`] +use crate::ambit::Architecture; + use super::{ - architecture::{FCDRAMArchitecture, Instruction, ARCHITECTURE}, optimization::optimize, Program, ProgramState, RowAddress + architecture::{FCDRAMArchitecture, Instruction, SubarrayId, ARCHITECTURE, NR_SUBARRAYS, ROWS_PER_SUBARRAY}, optimization::optimize, CompilerSettings, Program, ProgramState, RowAddress }; -use eggmock::{Aig, ComputedNetworkWithBackwardEdges, Id, NetworkWithBackwardEdges, Node, Signal}; +use eggmock::{Aig, AigLanguage, ComputedNetworkWithBackwardEdges, Id, NetworkWithBackwardEdges, Node, Signal}; use log::debug; -use rustc_hash::{FxHashMap, FxHashSet}; -use std::collections::HashMap; +use priority_queue::PriorityQueue; +use std::{cmp::Ordering, collections::{BinaryHeap, HashMap, HashSet}}; -/// Places given signal onto rows (modifying `comp_state`) -/// - NOTE: `signals` are assumed to be activated together during execution and are hence placed s.t. minimal nr of other rows holding valid data are simultaneously activated -/// - [ ] make sure that operation to be executed on those rows won't simultaneously activate other rows holding valid data which will be used by future operations pub fn place_signals_onto_rows( comp_state: &mut impl NetworkWithBackwardEdges, signals: Vec ) { } -/// Compiles given `network` intto a FCDRAM-[`Program`] that can be run on given `architecture` -/// -/// General Procedure of compilation -/// 1) Map Logical-Ops to FCDRAM-Primitives (operating on virtual rows) -/// 2) Map virtual rows to actual physical rows (spilling/moving rows if necessary using `RowClone`) -/// - similarly to Register Allocation -/// -/// - [ ] TODO: increase success-rate using input replication ? at which point to add input replication? -/// -/// - [ ] TODO: output in which rows -/// - 1) data is expected to be placed before program runs -/// - 2) outputs can be found after the program has run -pub fn compile( - network: &impl NetworkWithBackwardEdges, -) -> Program { - - // debug!("Compiling {:?}", network); - // 0. Setup: store all network-nodes yet to be compiled - let comp_state = CompilationState::new(network); // initializes `.candidates()` with inputs + nodes whose src-operands are all inputs - let program = Program::new(vec!()); - - let outputs = network.outputs(); - - // TODO: how to get src-operands of `outputs` ?? - // debug!("{:?}", network.node_outputs(outputs.next()).collect()); - // TODO: get src-operands of outputs and place them appropriately (with knowledge about output - // operands!) - - // start with inputs - let primary_inputs = network.leaves(); - debug!("Primary inputs: {:?}", primary_inputs.collect::>()); - - // println!("{:?}", network.outputs().collect::>()); - debug!("Nodes in network:"); - for node in network.iter() { - debug!("{:?},", node); +pub struct Compiler { + /// compiler-options set by user + settings: CompilerSettings, + /// Stores the state of all rows at each compilation step + comp_state: CompilationState, + /// These rows are reserved in EVERY subarray for storing intermediate results + /// - NOTE: only initialized when compilation starts + safe_space_rows: Vec, + /// RowCombinations which are not allowed to be issued via `APA` since they activate rows within the safe-space + blocked_row_combinations: HashSet<(RowAddress,RowAddress)>, +} + +impl Compiler { + pub fn new(settings: CompilerSettings) -> Self { + Compiler{ + settings, + comp_state: CompilationState::new( HashMap::new() ), + safe_space_rows: vec!(), + blocked_row_combinations: HashSet::new(), + } } - while !comp_state.candidates.is_empty() { - // 1. Make sure rows are placed appropriately (starting from inputs) + /// Compiles given `network` into a FCDRAM-[`Program`] that can be run on given `architecture` + /// + /// General Procedure of compilation + /// 1) Map Logical-Ops to FCDRAM-Primitives (operating on virtual rows) + /// 2) Map virtual rows to actual physical rows (spilling/moving rows if necessary using `RowClone`) + /// - similarly to Register Allocation + /// + /// - [ ] TODO: increase success-rate using input replication ? at which point to add input replication? + /// + /// - [ ] TODO: output in which rows + /// - 1) data is expected to be placed before program runs + /// - 2) outputs can be found after the program has run + pub fn compile( + &mut self, + network: &impl NetworkWithBackwardEdges, + ) -> Program { + + // 0. Prepare compilation: select safe-space rows, place inputs into DRAM module + self.init_comp_state(network); + + let mut program = Program::new(vec!()); + + let outputs = network.outputs(); + + + // TODO: how to get src-operands of `outputs` ?? + // debug!("{:?}", network.node_outputs(outputs.next()).collect()); + // TODO: get src-operands of outputs and place them appropriately (with knowledge about output + // operands!) + + // start with inputs + let primary_inputs = network.leaves(); + debug!("Primary inputs: {:?}", primary_inputs.collect::>()); + + // println!("{:?}", network.outputs().collect::>()); + debug!("Nodes in network:"); + for node in network.iter() { + debug!("{:?},", node); + } + + while !self.comp_state.candidates.is_empty() { + // TODO: extend program with instr that is executed next + let executed_instructions = &mut self.execute_next_instruction(); + program.instructions.append(executed_instructions); + } + + // let (outputs, leaves) = (network.outputs(), network.leaves()); + + // Program { + // instructions: vec!(Instruction::FracOp(-1)) , + // } + // todo!() + // optimize(&mut program); + program + } + + /// Allocates safe-space rows inside the DRAM-module + /// - [ ] TODO: improve algo (in terms of space efficiency) + fn alloc_safe_space_rows(&mut self, nr_safe_space_rows: u8) { + + let supported_nr_safe_space_rows = vec!(1,2,4,8,16,32,64); + if !supported_nr_safe_space_rows.contains(&nr_safe_space_rows) { + panic!("Only the following nr of rows are supported to be activated: {:?}, given: {}", supported_nr_safe_space_rows, nr_safe_space_rows); + } + + // TODO: this is just a quick&dirty implementation. Solving this (probably NP-complete) problem of finding optimal safe-space rows is probably worth solving for every DRAM-module once + let safe_space_rows = { + + // choose any row-addr combi activating exactly `nr_safe_space_rows` and choose all that activated rows to be safe-space rows + let chosen_row_addr_combi = ARCHITECTURE.sra_degree_to_rowaddress_combinations + .get(&nr_safe_space_rows).unwrap() + .first().unwrap(); // just take the first row-combi that activates `nr_safe_space_rows` + ARCHITECTURE.precomputed_simultaneous_row_activations.get(chosen_row_addr_combi).unwrap() + }; + + // deactivate all combination which could activate safe-space rows + for row in safe_space_rows { + for row_combi in ARCHITECTURE.row_activated_by_rowaddress_tuple.get(row).unwrap() { + self.blocked_row_combinations.insert(*row_combi); + } + } + } + + /// Place inputs onto appropriate rows + /// TODO: algo which looks ahead which input-row-placement might be optimal (->reduce nr of move-ops to move intermediate results around & keep inputs close to sense-amps + /// TODO: parititon logic network into subgraphs s.t. subgraphs can be mapped onto subarrays reducing nr of needed moves + fn place_inputs(&mut self, mut inputs: Vec) { + // naive implementation: start placing input just on consecutive rows + // TODO: change input operand placement to be more optimal (->taking into account future use of those operands) + for subarray in 0..NR_SUBARRAYS { + for row in self.safe_space_rows.iter() { + // TODO allocate safe-space rows + if let Some(next_input) = inputs.pop() { + let initial_row_state = RowState { is_compute_row: false, live_value: Some(next_input)}; + let initial_value_state = ValueState { is_computed: true, row_location: Some((subarray, *row))}; + self.comp_state.dram_state.insert((subarray, *row), initial_row_state); + self.comp_state.value_states.insert(next_input, initial_value_state); + } else { + return; + } + } + } + } + + /// Initialize candidates with all nodes that are computable + fn initialize_candidates(&mut self, network: &impl NetworkWithBackwardEdges) { + let inputs = network.leaves(); + + // init candidates with all nodes having inputs as src-operands + + for input in inputs { + // TODO: NEXT - add all non-input nodes whose src-operands are all inputs + let mut outputs_with_prio: PriorityQueue = network.node_outputs(input) + .map( |output| (output, self.compute_scheduling_prio_for_node(output, network)) ) + .collect(); + self.comp_state.candidates.append(&mut outputs_with_prio); + debug!("{:?} has the following outputs: {:?}", input, network.node_outputs(input).collect::>()); + } + + debug!("Selected candidates: {:?}", self.comp_state.candidates); + // delete candidates which have non-input src operands + } + + /// Initialize compilation state: mark unsuable rows (eg safe-space rows), place input operands + fn init_comp_state(&mut self, network: &impl NetworkWithBackwardEdges) { + // debug!("Compiling {:?}", network); + // 0.1 Allocate safe-space rows (for storing intermediate values safely + self.alloc_safe_space_rows(self.settings.safe_space_rows_per_subarray); + // 0.2 Place all inputs and mark them as being live + self.place_inputs( network.leaves().collect::>() ); // place input-operands into rows + // 0.3 Setup: store all network-nodes yet to be compiled + self.initialize_candidates(network); + } + + /// Executes the next instruction based on the following criteria: + /// 1. Select candidate which operates on most values which are used last (->to release safe-space right after) + /// 2. IF EQUAL: Select candidate with most successors (->pushes more candidates to select from in next step) + /// 3. IF EQUAL: Select any of the remaining candidates + /// + /// - [ ] make sure that operation to be executed on those rows won't simultaneously activate other rows holding valid data which will be used by future operations + fn execute_next_instruction(&self) -> Vec { + // 1. Make sure rows are placed appropriately in the rows to be simultaneously activated (starting from inputs) // 1.1 Determine in which rows src-operands for the next candidate-op are located // 1.2 Check if issuing `APA(src1,src2)` would activate other rows which hold valid data - // 1.2.1 if yes: move data to other rows for performing this op + // 1.2.1 if yes: move data to other rows for performing this op // 1.3 Prepare performing the actual op (setup reference subarray) - // 1.3.1 If activated rows in reference subarray holds valid data: spill to other rows + // 1.3.1 If activated rows in reference subarray holds valid data: spill to other rows // 1.4 Issue actual operation + vec!() } - // let (outputs, leaves) = (network.outputs(), network.leaves()); + /// Compute `SchedulingPrio` for a given node + /// - used for inserting new candidates + /// TODO: write unittest for this function + fn compute_scheduling_prio_for_node(&self, id: Id, network: &impl NetworkWithBackwardEdges) -> SchedulingPrio { + let nr_last_value_uses = network.node(id).inputs() // for each input check whether `id` is the last node using it + .iter() + .fold(0, |acc, input| { + let input_id = Signal::node_id(input); + let non_computed_outputs: Vec = network.node_outputs(input_id) // get all other nodes relying on this input + .filter(|out| + *out != id && + !(self.comp_state.value_states.get(out) + .unwrap_or(&ValueState{is_computed: false, row_location: None}) // no entry means this is the first time accessing this value + .is_computed) + ) // filter for uses of `input` which still rely on it (=those that are not computed yet, except for currently checked node + .collect(); + if non_computed_outputs.is_empty() { + acc + 1 + } else { + acc + } + }); - // Program { - // instructions: vec!(Instruction::FracOp(-1)) , - // } - // todo!() - // optimize(&mut program); - program + SchedulingPrio { + nr_last_value_uses, + nr_src_operands: network.node(id).inputs().len(), + nr_result_operands: network.node_outputs(id).collect::>().len(), + } + } } -pub struct Compiler { - // comp_state: CompilationState<'n, N>, +/// Stores the current state of a row at a concrete compilations step +pub struct RowState { + /// True iff that row is currently: 1) Not a safe-sapce row, 2) Doesn't activate any safe-sapce rows, 3) Isn't holding valid values in the role of a reference-subarray row + is_compute_row: bool, + /// `None` if the value inside this row is currently not live + live_value: Option, } -impl Compiler { - fn new() -> Self { - Compiler{} - } +pub struct ValueState { + /// Whether the value has already been computed (->only then it could reside in a row) + /// - the value could also have been computed but spilled already on its last use + /// - helps determining whether src-operand is the last use: for all other output operands of that source operand just check whether they have been already computed + is_computed: bool, + /// Row in which the value resides + row_location: Option<(SubarrayId, RowAddress)>, +} - fn compile() -> Program { - Program { - instructions: vec!(), - input_row_operands_placementl: HashMap::new(), - output_row_operands_placementl: HashMap::new(), +/// Keep track of current progress of the compilation (eg which rows are used, into which rows data is placed, ...) +pub struct CompilationState { + /// For each row in the dram-module store its state + dram_state: HashMap<(SubarrayId, RowAddress), RowState>, + /// Stores row in which an intermediate result (which is still to be used by future ops) is currently located (or whether it has been computed at all) + value_states: HashMap, + /// List of candidates (ops ready to be issued) prioritized by some metric by which they are scheduled for execution + /// - NOTE: calculate Nodes `SchedulingPrio` using + candidates: PriorityQueue, +} + +impl CompilationState { + pub fn new(dram_state: HashMap<(SubarrayId, RowAddress), RowState>) -> Self { + Self { + dram_state, + value_states: HashMap::new(), + candidates: PriorityQueue::new(), } } +} - /// Place inputs onto appropriate rows - /// TODO: algo which looks ahead which input-row-placement might be optimal (->reduce nr of - /// move-ops to move intermediate results around & keep inputs close to sense-amps - fn init_rows_with_inputs() { +// impl<'n, P: NetworkWithBackwardEdges> CompilationState<'n, P> { +// /// initializes `self..candidates()` with inputs + nodes whose src-operands are all inputs +// pub fn new(network: &'n P) -> Self { +// let mut candidates = FxHashSet::default(); +// // check all parents of leaves whether they have only leaf children, in which case they are +// // candidates (since all of their inputs are calculated then) +// for leaf in network.leaves() { // =inputs +// for candidate_id in network.node_outputs(leaf) { +// let candidate = network.node(candidate_id); +// if candidate +// .inputs() +// .iter() +// .all(|signal| network.node(signal.node_id()).is_leaf()) +// { +// candidates.insert((candidate_id, candidate)); +// } +// } +// } +// +// // let outputs = network +// // .outputs() +// // .enumerate() +// // .map(|(id, sig)| (sig.node_id(), (id as i64, sig))) +// // .collect(); +// +// let total_nr_rows_in_dram_module = ARCHITECTURE.nr_subarrays; +// Self { +// network, +// candidates, +// signal_to_row_mapping: HashMap::new(), +// free_rows: (0..=total_nr_rows_in_dram_module).collect(), // we start with all rows being free at the beginning +// } +// } +// } +/// Contains info to order nodes for Instruction Scheduling +/// GOAL: minimize register usage: +/// 1. Number of last-value-uses +/// 2. Number src operands: ASSUMPTIONS=executing that op reduces value-usage of all of those inputs, higher prob. of last-use in next steps) +/// 3. Total number of nodes that have `node` as input-operands +/// - possible extension: weigh result-operands based on how many of their src-operands are already computed +#[derive(PartialEq, Eq, Debug)] +struct SchedulingPrio { + /// Nr of values which are used last by that node + nr_last_value_uses: u64, + /// Number of source operands the Node has + nr_src_operands: usize, + /// Number of result operands the Node has + nr_result_operands: usize, +} + +impl Ord for SchedulingPrio { + fn cmp(&self, other: &Self) -> Ordering { + self.nr_last_value_uses.cmp(&other.nr_last_value_uses) + .then(self.nr_src_operands.cmp(&other.nr_src_operands)) // if `nr_last_value_uses` is equal + .then(self.nr_result_operands.cmp(&other.nr_result_operands)) } } -/// Keep track of current progress of the compilation (eg which rows are used, into which rows data is placed, ...) -pub struct CompilationState<'n, N> { - /// Logic-Network which is to be compiled - network: &'n N, - /// Signals whose inputs have already been calculated and are currently placed in some row in - /// the DRAM module (see [`Self::signal_to_row_mapping`]) - candidates: FxHashSet<(Id, Aig)>, - - /// Stores which intermediate results are stored at which row-addresses - /// - ✓ use to determine at which row-addresses output-data can be found - signal_to_row_mapping: HashMap, - /// Rows which are free (don't hold data which is still needed) - free_rows: Vec, - /// - row_states: [i32; ARCHITECTURE.nr_subarrays as usize ] +impl PartialOrd for SchedulingPrio { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } } -impl<'n, P: NetworkWithBackwardEdges> CompilationState<'n, P> { - /// initializes `self..candidates()` with inputs + nodes whose src-operands are all inputs - pub fn new(network: &'n P) -> Self { - let mut candidates = FxHashSet::default(); - // check all parents of leaves whether they have only leaf children, in which case they are - // candidates (since all of their inputs are calculated then) - for leaf in network.leaves() { // =inputs - for candidate_id in network.node_outputs(leaf) { - let candidate = network.node(candidate_id); - if candidate - .inputs() - .iter() - .all(|signal| network.node(signal.node_id()).is_leaf()) - { - candidates.insert((candidate_id, candidate)); - } - } - } +#[cfg(test)] +mod tests { + use eggmock::egg::{self, EGraph, Extractor, RecExpr}; + use eggmock::Network; - // let outputs = network - // .outputs() - // .enumerate() - // .map(|(id, sig)| (sig.node_id(), (id as i64, sig))) - // .collect(); + use crate::fc_dram::egraph_extraction::CompilingCostFunction; + + use super::*; // import all elements from parent-module + use std::sync::{LazyLock, Mutex}; + + // ERROR: `eggmock`-API doesn't allow this.. + // // For data shared among unittests but initalized only once + // static TEST_DATA: LazyLock<_> = LazyLock::new(|| { + // let mut egraph: EGraph = Default::default(); + // let my_expression: RecExpr = "(and (and a c) (and b c))".parse().unwrap(); + // let extractor = Extractor::new( &egraph, CompilingCostFunction {}); + // let ntk = (extractor, vec!(egg::Id::from(9))); + // + // ComputedNetworkWithBackwardEdges::new(&ntk) + // }); + + static TEST_COMPILER: LazyLock> = LazyLock::new(|| { + Mutex::new( + Compiler::new(CompilerSettings { print_program: true, verbose: true, print_compilation_stats: false, min_success_rate: 0.999, safe_space_rows_per_subarray: 16 } ) + ) + }); + + #[test] // mark function as test-fn + fn test_candidate_initialization() { + env_logger::init(); + + let mut egraph: EGraph = Default::default(); + let my_expression: RecExpr = "(and (and 1 3) (and 2 3))".parse().unwrap(); + egraph.add_expr(&my_expression); + let extractor = Extractor::new( &egraph, CompilingCostFunction {}); + let ntk = &(extractor, vec!(egg::Id::from(5))); + ntk.dump(); + // Id(5): And([Signal(false, Id(2)), Signal(false, Id(4))]) + // Id(4): And([Signal(false, Id(3)), Signal(false, Id(1))]) + // Id(1): Input(3) + // Id(3): Input(2) + // Id(2): And([Signal(false, Id(0)), Signal(false, Id(1))]) + // Id(0): Input(1) + + // act is if one `AND` has already been computed -> other and (`Id(2)`) should be the only candidate left + TEST_COMPILER.lock().unwrap() + .comp_state.value_states.insert(eggmock::Id::from(4), ValueState{ is_computed: true, row_location: None }); + + let ntk_backward = ComputedNetworkWithBackwardEdges::new(ntk); + + TEST_COMPILER.lock().unwrap().initialize_candidates(&ntk_backward); + println!("{:?}", TEST_COMPILER.lock().unwrap().comp_state.candidates); + // let result = add(2, 2); + // assert_eq!(result, 4, "Result isn't equal to 4 :/"); + } + + #[test] + fn test_compute_scheduling_prio_for_node() { + env_logger::init(); + + let mut egraph: EGraph = Default::default(); + let my_expression: RecExpr = "(and (and 1 3) (and 2 3))".parse().unwrap(); + egraph.add_expr(&my_expression); + let extractor = Extractor::new( &egraph, CompilingCostFunction {}); + let ntk = &(extractor, vec!(egg::Id::from(5))); + ntk.dump(); + // Id(5): And([Signal(false, Id(2)), Signal(false, Id(4))]) + // Id(4): And([Signal(false, Id(3)), Signal(false, Id(1))]) + // Id(1): Input(3) + // Id(3): Input(2) + // Id(2): And([Signal(false, Id(0)), Signal(false, Id(1))]) + // Id(0): Input(1) + + // act is if one `AND` has already been computed -> other and (`Id(2)`) should be the only candidate left + TEST_COMPILER.lock().unwrap() + .comp_state.value_states.insert(eggmock::Id::from(2), ValueState{ is_computed: true, row_location: None }); + + let ntk_backward = ComputedNetworkWithBackwardEdges::new(ntk); + + let scheduling_prio = TEST_COMPILER.lock().unwrap().compute_scheduling_prio_for_node(eggmock::Id::from(4), &ntk_backward); + assert_eq!(scheduling_prio, SchedulingPrio { nr_last_value_uses: 2, nr_src_operands: 2, nr_result_operands: 1 } ); - let total_nr_rows_in_dram_module = ARCHITECTURE.nr_subarrays; - Self { - network, - candidates, - signal_to_row_mapping: HashMap::new(), - free_rows: (0..=total_nr_rows_in_dram_module).collect(), // we start with all rows being free at the beginning - } } } diff --git a/rs/src/fc_dram/egraph_extraction.rs b/rs/src/fc_dram/egraph_extraction.rs index dccd172..72537a2 100644 --- a/rs/src/fc_dram/egraph_extraction.rs +++ b/rs/src/fc_dram/egraph_extraction.rs @@ -44,7 +44,7 @@ impl CostFunction for CompilingCostFunction { // TODO: make cost depend on data-pattern of input? 0 }, - AigLanguage::And([_node1, _node2]) => { + AigLanguage::And([_node1, _node2]) | AigLanguage::Or([_node1, _node2]) => { // TODO: get mapping of AND to FCDRAM-Primitives and get how many mem-cycles they take 3 }, diff --git a/rs/src/fc_dram/mod.rs b/rs/src/fc_dram/mod.rs index ed57a9c..dd6f84a 100644 --- a/rs/src/fc_dram/mod.rs +++ b/rs/src/fc_dram/mod.rs @@ -27,7 +27,7 @@ use std::time::Instant; use crate::measure_time; -use self::compiler::compile; +use self::compiler::Compiler; use self::egraph_extraction::CompilingCostFunction; use eggmock::egg::{rewrite, EGraph, Extractor, Id, Rewrite, Runner}; @@ -89,6 +89,7 @@ fn compiling_receiver<'a>( ) -> impl Receiver + use<'a> { // REMINDER: EGraph implements `Receiver` // TODO: deactivate e-graph rewriting, focus on compilation first + let mut compiler = Compiler::new(settings); // TODO: rewrite this to a singleton-class EGraph::::new(()) .map(move |(graph, outputs)| { // `.map()` of `Provider`-trait!, outputs=vector of EClasses @@ -138,9 +139,9 @@ fn compiling_receiver<'a>( ntk_with_backward_edges.leaves().next().unwrap() ).collect::>() ); - // debug!("" + let program = measure_time!( - compile(&ntk_with_backward_edges), "t_compiler", settings.print_compilation_stats + compiler.compile(&ntk_with_backward_edges), "t_compiler", settings.print_compilation_stats ); // ===================== @@ -162,7 +163,7 @@ fn compiling_receiver<'a>( #[repr(C)] /// Compiler options /// - TODO: add flags like minimal success-rate for program -struct CompilerSettings { +pub struct CompilerSettings { /// Whether to print the compiled program print_program: bool, /// Whether to enable verbose output @@ -171,7 +172,7 @@ struct CompilerSettings { print_compilation_stats: bool, /// Minimal success rate to be guaranteed for success compiled program /// REMINDER: FCDRAM-operations dont have a 100%-success rate to create the correct results - min_success_rate: u64, + min_success_rate: f64, // /// Location to config-file holding fcdram-specific configs // fcdram_config_file: Path, @@ -179,7 +180,7 @@ struct CompilerSettings { /// - REMINDER: after `AND`/`OR`-ops the src-operands are overwritten by the op-result, so to reuse operands they're put into specially designated rows (="safe-space") which won't be overwritten /// - Ops reusing those operands have to clone the values from the safe-space prior to issuing the Op /// - NOTE: rows which are used as safe-space are determined by analyzing patterns in Simultaneous-row activation for the specific architecture (to ensure that safe-space rows won't be activated on any combination of row-addresses) - safe_space_rows_per_subarray: u16, + safe_space_rows_per_subarray: u8, } struct FCDramRewriter(CompilerSettings); diff --git a/rs/src/fc_dram/optimization.rs b/rs/src/fc_dram/optimization.rs index ca28a8c..c2f5bdc 100644 --- a/rs/src/fc_dram/optimization.rs +++ b/rs/src/fc_dram/optimization.rs @@ -1,4 +1,10 @@ -//! Some manual optimizations (LOWEST PRIORITY) +//! Optimize code from `compiler.rs` +//! - things to optimize for: +//! - performance (nr of required mem-cycles): mostly reduce nr of RowClone-ops (rest is mostly predetermined by logic-graph due to 1:1 mapping of LogicalOps -> FCDRAM Primitives) +//! - success rate (mostly by choosing right rows to optimize for distance to sense-amps), includes input replication +//! - memory-footprint (reduce nr of subarrays used by program) +//! - manually adapt safe-space to program requirements: unused safe-space rows could still be used ?! +//! - [ ] Rematerialization ? use crate::fc_dram::architecture::{RowAddress, Instruction}; use rustc_hash::FxHashSet; From 97d8b1afdfbef9f9f618e099241a56a9f616772e Mon Sep 17 00:00:00 2001 From: alku662e Date: Fri, 6 Jun 2025 15:12:51 +0200 Subject: [PATCH 14/51] WIP: implemented success-rate for APA-ops, fixed candidate-intialization --- rs/src/fc_dram/architecture.rs | 124 ++++++++++++++++++++++++++-- rs/src/fc_dram/compiler.rs | 80 ++++++++++-------- rs/src/fc_dram/egraph_extraction.rs | 4 +- rs/src/fc_dram/mod.rs | 2 + 4 files changed, 165 insertions(+), 45 deletions(-) diff --git a/rs/src/fc_dram/architecture.rs b/rs/src/fc_dram/architecture.rs index 7cdd19c..0b0443b 100644 --- a/rs/src/fc_dram/architecture.rs +++ b/rs/src/fc_dram/architecture.rs @@ -67,6 +67,7 @@ pub static ARCHITECTURE: LazyLock = LazyLock::new(|| { activated_rows.into_iter().collect::>().into_iter().collect() }; + // just a dummy implementation, see [5] Chap3.2 for details why determining the distance based on the Row Addresses issued by the MemController is difficult let get_distance_of_row_to_sense_amps = |row: RowAddress| -> RowDistanceToSenseAmps { // ASSUMPTION: last & first rows only have sense-amps from one side // TODO: is this true? or do all subarrays have a line of sense-amps on both of their ends?? @@ -153,11 +154,9 @@ pub struct FCDRAMArchitecture { pub sra_degree_to_rowaddress_combinations: HashMap>, /// Stores for every rows which combinations of RowAddresses activate that row (needed for finding appropriate safe space rows) pub row_activated_by_rowaddress_tuple: HashMap>, - // TODO: params for calculating distance btw row and sense-amp, ... (particularly where - // sense-amps are placed within the DRAM module ?! - /// Given a row-addr this returns the distance of it to the sense-amps (!determinse - /// success-rate of op using that `row` as an operand) (see [1] Chap5.2) - /// - NOTE: Methodology used in [1] to determine distance: RowHammer + /// Given a row-addr this returns the distance of it to the sense-amps (!determinse success-rate of op using that `row` as an operand) (see [1] Chap5.2) + /// - NOTE: a realistic implementation should use the Methodology from [1] to determine this distance (RowHammer) + /// - there is no way of telling the distance of a row without testing manually (see [5] Chap3.2: "consecutive row addresses issued by the memory controller can be mapped to entirely different regions of DRAM") pub get_distance_of_row_to_sense_amps: fn(RowAddress) -> RowDistanceToSenseAmps, } @@ -207,10 +206,11 @@ impl FCDRAMArchitecture { } /// Categories of distances of rows to sense-amops +#[derive(Hash,Eq,PartialEq,PartialOrd,Ord)] pub enum RowDistanceToSenseAmps { - Close, - Middle, - Far, + Close=2, + Middle=1, + Far=0, } /// Instructions used in FC-DRAM @@ -307,6 +307,114 @@ impl Instruction { Instruction::RowClonePSM(_, _) => 256, // =(8192B/64B)*2 (*2 since copies two time, to and from `` on 64B-granularity } } + + /// Success Rate off instructions depends on: + /// - for AND/OR (`APA`): number of input operands (see [1] Chap6.3) + /// - data pattern can't be taken into consideration here since its not known at compile-time + /// - as well as temperature and DRAM speed rate + /// + /// TAKEAWAY: `OR` is more reliable than `AND` + pub fn get_success_rate_of_apa(&self, implemented_op: SupportedLogicOps) -> f64 { + + // Quote from [1] Chap6.3: "the distance of all simultaneously activated rows" - unclear how this classification happend exactly. Let's be conservative and assume the worst-case behavior + // (furthest away row for src-operands). For dst-rows we use the one closest to the sense-amps, since we can choose from which of the rows to read/save the result form + + let success_rate_by_row_distance = { + // see [1] Chap5.3 and Chap6.3 + match implemented_op { + SupportedLogicOps::NOT => HashMap::from([ + // ((src,dst), success_rate) + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Close), 51.71), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Close), 54.93), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Close), 44.16), + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Middle), 57.47), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Middle), 53.47), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Middle), 81.92), + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Far), 45.34), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Far), 85.02), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Far), 75.13), + ]), + SupportedLogicOps::AND => HashMap::from([ + // ((reference,compute), success_rate) + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Close), 98.81), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Close), 99.20), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Close), 80.04), + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Middle), 97.08), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Middle), 83.26), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Middle), 97.71), + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Far), 75.84), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Far), 95.29), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Far), 94.95), + ]), + SupportedLogicOps::OR => HashMap::from([ + // ((reference,compute), success_rate) + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Close), 99.51), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Close), 99.65), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Close), 94.29), + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Middle), 98.98), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Middle), 94.15), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Middle), 98.95), + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Far), 89.23), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Far), 98.59), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Far), 98.80), + ]), + SupportedLogicOps::NAND => HashMap::from([ + // ((reference,compute), success_rate) + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Close), 98.81), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Close), 99.20), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Close), 79.59), + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Middle), 97.08), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Middle), 82.98), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Middle), 97.67), + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Far), 75.50), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Far), 95.19), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Far), 94.95), + ]), + SupportedLogicOps::NOR => HashMap::from([ + // ((reference,compute), success_rate) + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Close), 99.51), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Close), 99.65), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Close), 94.09), + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Middle), 98.97), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Middle), 94.03), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Middle), 98.90), + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Far), 89.15), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Far), 98.52), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Far), 98.80), + ]), + } + }; + + match self { + Instruction::APA( r1, r2) => { + let activated_rows = ARCHITECTURE.precomputed_simultaneous_row_activations.get(&(*r1,*r2)).expect("[ERR] Missing SRA for ({r1},{r2}"); + let nr_operands = activated_rows.len(); // ASSUMPTION: it seems like "operands" referred to the number of activated rows (see [1] + // taken from [1] Chap6.3 + let success_rate_per_operandnr = HashMap::from([ + (2, 94.94), + (4, 94.94), + (8, 95.85), + (16, 95.87), + (32, 0.00) // no value in paper :// + ]); + // nr_operand_success_rate.get(&nr_operands); + + let furthest_src_row = activated_rows.iter() + .map(|row| (ARCHITECTURE.get_distance_of_row_to_sense_amps)(*row)) // RowDistanceToSenseAmps::Far; // TODO: get this + .max() + .expect("[ERR] Activated rows were empty"); + // NOTE: SRA is assumed to activate the same row-addresses in both subarrays + let closest_dst_row = activated_rows.iter() + .map(|row| (ARCHITECTURE.get_distance_of_row_to_sense_amps)(*row)) // RowDistanceToSenseAmps::Far; // TODO: get this + .min() + .expect("[ERR] Activated rows were empty"); + let total_success_rate = *success_rate_per_operandnr.get(&nr_operands).expect("[ERR] {nr_operands} not =2|4|8|16, the given SRA function seems to not comply with this core assumption.") + * success_rate_by_row_distance.get(&(furthest_src_row, closest_dst_row)).unwrap(); + total_success_rate + }, + _ => 1.0, + } + } } /// Contains logical operations which are supported (natively) on FCDRAM-Architecture diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs index 936f758..8f67c04 100644 --- a/rs/src/fc_dram/compiler.rs +++ b/rs/src/fc_dram/compiler.rs @@ -86,10 +86,12 @@ impl Compiler { debug!("{:?},", node); } - while !self.comp_state.candidates.is_empty() { - // TODO: extend program with instr that is executed next - let executed_instructions = &mut self.execute_next_instruction(); - program.instructions.append(executed_instructions); + while let Some((next_candidate, _)) = self.comp_state.candidates.pop() { + // TODO: extend program with instr that is executed next + let executed_instructions = &mut self.execute_next_instruction(&next_candidate, network); + program.instructions.append(executed_instructions); + + // update new candidates } // let (outputs, leaves) = (network.outputs(), network.leaves()); @@ -152,21 +154,18 @@ impl Compiler { /// Initialize candidates with all nodes that are computable fn initialize_candidates(&mut self, network: &impl NetworkWithBackwardEdges) { - let inputs = network.leaves(); - - // init candidates with all nodes having inputs as src-operands + let inputs: Vec = network.leaves().collect(); - for input in inputs { + // init candidates with all nodes having only inputs as src-operands + for &input in inputs.as_slice() { // TODO: NEXT - add all non-input nodes whose src-operands are all inputs let mut outputs_with_prio: PriorityQueue = network.node_outputs(input) + .filter(|output| network.node(*output).inputs().iter().all(|other_input| inputs.contains(&other_input.node_id()) )) // only those nodes are candidates, whose src-operands are ALL inputs (->only primary inputs are directly available) .map( |output| (output, self.compute_scheduling_prio_for_node(output, network)) ) .collect(); self.comp_state.candidates.append(&mut outputs_with_prio); debug!("{:?} has the following outputs: {:?}", input, network.node_outputs(input).collect::>()); } - - debug!("Selected candidates: {:?}", self.comp_state.candidates); - // delete candidates which have non-input src operands } /// Initialize compilation state: mark unsuable rows (eg safe-space rows), place input operands @@ -180,13 +179,14 @@ impl Compiler { self.initialize_candidates(network); } - /// Executes the next instruction based on the following criteria: - /// 1. Select candidate which operates on most values which are used last (->to release safe-space right after) - /// 2. IF EQUAL: Select candidate with most successors (->pushes more candidates to select from in next step) - /// 3. IF EQUAL: Select any of the remaining candidates - /// /// - [ ] make sure that operation to be executed on those rows won't simultaneously activate other rows holding valid data which will be used by future operations - fn execute_next_instruction(&self) -> Vec { + fn execute_next_instruction(&mut self, next_candidate: &Id, network: &impl NetworkWithBackwardEdges) -> Vec { + let src_operands: Vec = network.node(*next_candidate).inputs() + .iter() + .map(|signal| signal.node_id()) + .collect(); + + let nr_operands = src_operands.len(); // use to select SRA to activate // 1. Make sure rows are placed appropriately in the rows to be simultaneously activated (starting from inputs) // 1.1 Determine in which rows src-operands for the next candidate-op are located @@ -198,6 +198,7 @@ impl Compiler { // 1.3.1 If activated rows in reference subarray holds valid data: spill to other rows // 1.4 Issue actual operation + vec!() } @@ -321,6 +322,10 @@ struct SchedulingPrio { nr_result_operands: usize, } +/// To execute the next instruction based on the following criteria: +/// 1. Select candidate which operates on most values which are used last (->to release safe-space right after) +/// 2. IF EQUAL: Select candidate with most successors (->pushes more candidates to select from in next step) +/// 3. IF EQUAL: Select any of the remaining candidates impl Ord for SchedulingPrio { fn cmp(&self, other: &Self) -> Ordering { self.nr_last_value_uses.cmp(&other.nr_last_value_uses) @@ -343,7 +348,7 @@ mod tests { use crate::fc_dram::egraph_extraction::CompilingCostFunction; use super::*; // import all elements from parent-module - use std::sync::{LazyLock, Mutex}; + use std::sync::{LazyLock, Mutex, Once}; // ERROR: `eggmock`-API doesn't allow this.. // // For data shared among unittests but initalized only once @@ -356,21 +361,26 @@ mod tests { // ComputedNetworkWithBackwardEdges::new(&ntk) // }); - static TEST_COMPILER: LazyLock> = LazyLock::new(|| { - Mutex::new( - Compiler::new(CompilerSettings { print_program: true, verbose: true, print_compilation_stats: false, min_success_rate: 0.999, safe_space_rows_per_subarray: 16 } ) - ) - }); + static INIT: Once = Once::new(); + + fn init() -> Compiler { + INIT.call_once(|| { + env_logger::init(); + }); + Compiler::new(CompilerSettings { print_program: true, verbose: true, print_compilation_stats: false, min_success_rate: 0.999, safe_space_rows_per_subarray: 16 } ) + } #[test] // mark function as test-fn fn test_candidate_initialization() { - env_logger::init(); + let mut compiler = init(); let mut egraph: EGraph = Default::default(); let my_expression: RecExpr = "(and (and 1 3) (and 2 3))".parse().unwrap(); egraph.add_expr(&my_expression); + let output2 = egraph.add(AigLanguage::And([eggmock::egg::Id::from(0), eggmock::egg::Id::from(2)])); // additional `And` with one src-operand=input and one non-input src operand + debug!("EGraph used for candidate-init: {:?}", egraph); let extractor = Extractor::new( &egraph, CompilingCostFunction {}); - let ntk = &(extractor, vec!(egg::Id::from(5))); + let ntk = &(extractor, vec!(egg::Id::from(5), output2)); ntk.dump(); // Id(5): And([Signal(false, Id(2)), Signal(false, Id(4))]) // Id(4): And([Signal(false, Id(3)), Signal(false, Id(1))]) @@ -380,20 +390,21 @@ mod tests { // Id(0): Input(1) // act is if one `AND` has already been computed -> other and (`Id(2)`) should be the only candidate left - TEST_COMPILER.lock().unwrap() - .comp_state.value_states.insert(eggmock::Id::from(4), ValueState{ is_computed: true, row_location: None }); + compiler .comp_state.value_states.insert(eggmock::Id::from(4), ValueState{ is_computed: true, row_location: None }); let ntk_backward = ComputedNetworkWithBackwardEdges::new(ntk); - TEST_COMPILER.lock().unwrap().initialize_candidates(&ntk_backward); - println!("{:?}", TEST_COMPILER.lock().unwrap().comp_state.candidates); - // let result = add(2, 2); - // assert_eq!(result, 4, "Result isn't equal to 4 :/"); + compiler.initialize_candidates(&ntk_backward); + let is_candidate_ids: HashSet = compiler.comp_state.candidates.iter().map(|(id,_)| *id).collect(); + let should_candidate_ids: HashSet = HashSet::from([eggmock::Id::from(2), eggmock::Id::from(4)]); + assert_eq!( is_candidate_ids, should_candidate_ids); + + // TODO: test-case with node that relies on one input src-operand and one non-input (intermediate node) src-operand } #[test] fn test_compute_scheduling_prio_for_node() { - env_logger::init(); + let mut compiler = init(); let mut egraph: EGraph = Default::default(); let my_expression: RecExpr = "(and (and 1 3) (and 2 3))".parse().unwrap(); @@ -409,12 +420,11 @@ mod tests { // Id(0): Input(1) // act is if one `AND` has already been computed -> other and (`Id(2)`) should be the only candidate left - TEST_COMPILER.lock().unwrap() - .comp_state.value_states.insert(eggmock::Id::from(2), ValueState{ is_computed: true, row_location: None }); + compiler.comp_state.value_states.insert(eggmock::Id::from(2), ValueState{ is_computed: true, row_location: None }); let ntk_backward = ComputedNetworkWithBackwardEdges::new(ntk); - let scheduling_prio = TEST_COMPILER.lock().unwrap().compute_scheduling_prio_for_node(eggmock::Id::from(4), &ntk_backward); + let scheduling_prio = compiler.compute_scheduling_prio_for_node(eggmock::Id::from(4), &ntk_backward); assert_eq!(scheduling_prio, SchedulingPrio { nr_last_value_uses: 2, nr_src_operands: 2, nr_result_operands: 1 } ); } diff --git a/rs/src/fc_dram/egraph_extraction.rs b/rs/src/fc_dram/egraph_extraction.rs index 72537a2..d562a20 100644 --- a/rs/src/fc_dram/egraph_extraction.rs +++ b/rs/src/fc_dram/egraph_extraction.rs @@ -51,12 +51,12 @@ impl CostFunction for CompilingCostFunction { // TODO: increase cost of NOT? (since it moves the value to another subarray!) // eg prefer `OR(a,b)` to `NOT(AND( NOT(a), NOT(b)))` AigLanguage::Not(_node) => { - 1 + 100 // NOTs seem to be horrible (unless the computation proceeds in the other subarray where the NOT result is placed) }, }; Rc::new(CompilingCost { - success_rate: 0.0, + success_rate: 0.0, // TODO program_cost: cost, }) // todo!() diff --git a/rs/src/fc_dram/mod.rs b/rs/src/fc_dram/mod.rs index dd6f84a..1f0255a 100644 --- a/rs/src/fc_dram/mod.rs +++ b/rs/src/fc_dram/mod.rs @@ -6,6 +6,8 @@ //! - [2] FracDRAM: Fractional Values in Off-the-Shelf DRAM, 2022 //! - [3] PULSAR: Simultaneous Many-Row Activation for Reliable and High-Performance Computing in Off-the-Shelf DRAM Chips, 2024 //! - [4] RowClone: fast and energy-efficient in-DRAM bulk data copy and initialization, 2013 +//! - [5] Design-Induced Latency Variation in Modern DRAM Chips: Characterization, Analysis, and Latency Reduction Mechanisms, 2017 +//! - explains why distance of rows to sense-amps influence success-rate of executed op //! //! # Submodules //! From 2a955de08691b9ed0f4b089ea5e1f9fde30103b5 Mon Sep 17 00:00:00 2001 From: alku662e Date: Mon, 9 Jun 2025 11:25:36 +0200 Subject: [PATCH 15/51] :construction: --- rs/src/fc_dram/architecture.rs | 6 +- rs/src/fc_dram/compiler.rs | 136 ++++++++++++++++++++++++--------- rs/src/fc_dram/mod.rs | 1 + 3 files changed, 107 insertions(+), 36 deletions(-) diff --git a/rs/src/fc_dram/architecture.rs b/rs/src/fc_dram/architecture.rs index 0b0443b..7345380 100644 --- a/rs/src/fc_dram/architecture.rs +++ b/rs/src/fc_dram/architecture.rs @@ -11,6 +11,8 @@ use log::debug; pub const NR_SUBARRAYS: i64 = 2i64.pow(7); pub const ROWS_PER_SUBARRAY: i64 = 2i64.pow(9); +pub const SUBARRAY_ID_BITMASK: i64 = 0b1_111_111_000_000_000; // 7 highest bits=subarray id +pub const ROW_ID_BITMASK: i64 = 0b0_000_000_111_111_111; // 7 highest bits=subarray id /// Main variable specifying architecture of DRAM-module for which to compile for /// - this is currently just an example implementation for testing purpose; (TODO: make this configurable at runtime) /// @@ -135,7 +137,9 @@ pub static ARCHITECTURE: LazyLock = LazyLock::new(|| { /// - ! must be smaller than `rows_per_subarray * nr_subarrays` (this is NOT checked!) pub type RowAddress = i64; pub type SubarrayId = i64; +pub type SuccessRate = f64; +/// TODO: add field encoding topology of subarrays (to determine which of them share sense-amps) pub struct FCDRAMArchitecture { /// Nr of subarrays in a DRAM module pub nr_subarrays: i64, @@ -314,7 +318,7 @@ impl Instruction { /// - as well as temperature and DRAM speed rate /// /// TAKEAWAY: `OR` is more reliable than `AND` - pub fn get_success_rate_of_apa(&self, implemented_op: SupportedLogicOps) -> f64 { + pub fn get_success_rate_of_apa(&self, implemented_op: SupportedLogicOps) -> SuccessRate { // Quote from [1] Chap6.3: "the distance of all simultaneously activated rows" - unclear how this classification happend exactly. Let's be conservative and assume the worst-case behavior // (furthest away row for src-operands). For dst-rows we use the one closest to the sense-amps, since we can choose from which of the rows to read/save the result form diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs index 8f67c04..52f74df 100644 --- a/rs/src/fc_dram/compiler.rs +++ b/rs/src/fc_dram/compiler.rs @@ -10,20 +10,13 @@ use crate::ambit::Architecture; use super::{ - architecture::{FCDRAMArchitecture, Instruction, SubarrayId, ARCHITECTURE, NR_SUBARRAYS, ROWS_PER_SUBARRAY}, optimization::optimize, CompilerSettings, Program, ProgramState, RowAddress + architecture::{FCDRAMArchitecture, Instruction, SubarrayId, ARCHITECTURE, NR_SUBARRAYS, ROWS_PER_SUBARRAY, SUBARRAY_ID_BITMASK}, optimization::optimize, CompilerSettings, Program, ProgramState, RowAddress }; -use eggmock::{Aig, AigLanguage, ComputedNetworkWithBackwardEdges, Id, NetworkWithBackwardEdges, Node, Signal}; +use eggmock::{Aig, AigLanguage, ComputedNetworkWithBackwardEdges, Id, NetworkLanguage, NetworkWithBackwardEdges, Node, Signal}; use log::debug; use priority_queue::PriorityQueue; use std::{cmp::Ordering, collections::{BinaryHeap, HashMap, HashSet}}; - -pub fn place_signals_onto_rows( - comp_state: &mut impl NetworkWithBackwardEdges, signals: Vec -) { - -} - pub struct Compiler { /// compiler-options set by user settings: CompilerSettings, @@ -105,6 +98,7 @@ impl Compiler { } /// Allocates safe-space rows inside the DRAM-module + /// - NOTE: nr of safe-space rows must be a power of 2 (x) between 1<=x<=64 /// - [ ] TODO: improve algo (in terms of space efficiency) fn alloc_safe_space_rows(&mut self, nr_safe_space_rows: u8) { @@ -114,17 +108,17 @@ impl Compiler { } // TODO: this is just a quick&dirty implementation. Solving this (probably NP-complete) problem of finding optimal safe-space rows is probably worth solving for every DRAM-module once - let safe_space_rows = { + self.safe_space_rows = { // choose any row-addr combi activating exactly `nr_safe_space_rows` and choose all that activated rows to be safe-space rows let chosen_row_addr_combi = ARCHITECTURE.sra_degree_to_rowaddress_combinations .get(&nr_safe_space_rows).unwrap() .first().unwrap(); // just take the first row-combi that activates `nr_safe_space_rows` - ARCHITECTURE.precomputed_simultaneous_row_activations.get(chosen_row_addr_combi).unwrap() + ARCHITECTURE.precomputed_simultaneous_row_activations.get(chosen_row_addr_combi).unwrap().to_vec() }; // deactivate all combination which could activate safe-space rows - for row in safe_space_rows { + for row in self.safe_space_rows.iter() { for row_combi in ARCHITECTURE.row_activated_by_rowaddress_tuple.get(row).unwrap() { self.blocked_row_combinations.insert(*row_combi); } @@ -135,20 +129,26 @@ impl Compiler { /// TODO: algo which looks ahead which input-row-placement might be optimal (->reduce nr of move-ops to move intermediate results around & keep inputs close to sense-amps /// TODO: parititon logic network into subgraphs s.t. subgraphs can be mapped onto subarrays reducing nr of needed moves fn place_inputs(&mut self, mut inputs: Vec) { - // naive implementation: start placing input just on consecutive rows + // naive implementation: start placing input on consecutive safe-space rows (continuing with next subarray once the current subarray has no more free safe-space rows) // TODO: change input operand placement to be more optimal (->taking into account future use of those operands) - for subarray in 0..NR_SUBARRAYS { - for row in self.safe_space_rows.iter() { - // TODO allocate safe-space rows - if let Some(next_input) = inputs.pop() { - let initial_row_state = RowState { is_compute_row: false, live_value: Some(next_input)}; - let initial_value_state = ValueState { is_computed: true, row_location: Some((subarray, *row))}; - self.comp_state.dram_state.insert((subarray, *row), initial_row_state); - self.comp_state.value_states.insert(next_input, initial_value_state); - } else { - return; - } - } + let mut subarray_iter=0..NR_SUBARRAYS; + let next_subarray = subarray_iter.next().unwrap(); + let mut row_iter= self.safe_space_rows.iter(); + while let Some(next_input) = inputs.pop() { + let (next_subarray, next_row) = if let Some(next_row) = row_iter.next() { + (next_subarray, next_row) + } else { + row_iter = self.safe_space_rows.iter(); + ( subarray_iter.next().expect("OOM: no more safe-space rows and subarrays available") , row_iter.next().expect("No safe-space rows available" )) + }; + + + let row_address = (next_subarray << ROWS_PER_SUBARRAY.ilog2() ) | next_row; // higher bits=id of subarray + + let initial_row_state = RowState { is_compute_row: false, live_value: Some(next_input)}; + let initial_value_state = ValueState { is_computed: true, row_location: Some(row_address)}; + self.comp_state.dram_state.insert(row_address, initial_row_state); + self.comp_state.value_states.insert(next_input, initial_value_state); } } @@ -175,10 +175,30 @@ impl Compiler { self.alloc_safe_space_rows(self.settings.safe_space_rows_per_subarray); // 0.2 Place all inputs and mark them as being live self.place_inputs( network.leaves().collect::>() ); // place input-operands into rows + debug!("Placed inputs {:?} in {:?}", network.leaves().collect::>(), self.comp_state.value_states); // 0.3 Setup: store all network-nodes yet to be compiled self.initialize_candidates(network); } + /// Return id of subarray to use for computation and reference (compute_subarrayid, reference_subarrayid) + /// - based on location of input rows AND current compilation state + /// - [ ] POSSIBLE EXTENSION: include lookahead for future ops and their inputs they depend on + fn select_compute_and_ref_subarray(&self, input_rows: Vec) -> (SubarrayId, SubarrayId) { + // naive implementation: just use the subarray that most of the `input_rows` reside in + // TODO: find better solution + let used_subarray_ids = input_rows.into_iter().map(|row| row & SUBARRAY_ID_BITMASK); + let (&mostly_used_subarray_id, _) = used_subarray_ids + .fold(HashMap::new(), |mut acc, item| { + *acc.entry(item).or_insert(0) += 1; + acc + }) + .iter().max_by_key(|&(_, count)| count).unwrap(); + + let selected_ref_subarray = (mostly_used_subarray_id+1) % NR_SUBARRAYS; // TODO: use 2D-layout of subarrays to determine which of them share sense-amps + + (mostly_used_subarray_id, selected_ref_subarray) + } + /// - [ ] make sure that operation to be executed on those rows won't simultaneously activate other rows holding valid data which will be used by future operations fn execute_next_instruction(&mut self, next_candidate: &Id, network: &impl NetworkWithBackwardEdges) -> Vec { let src_operands: Vec = network.node(*next_candidate).inputs() @@ -187,17 +207,46 @@ impl Compiler { .collect(); let nr_operands = src_operands.len(); // use to select SRA to activate - // 1. Make sure rows are placed appropriately in the rows to be simultaneously activated (starting from inputs) + let nr_rows = nr_operands.next_power_of_two(); + + let src_rows: Vec = src_operands.iter() + .map(|src_operand| { + + debug!("src: {src_operand:?}"); + self.comp_state.value_states.get(src_operand) + .unwrap() + .row_location + .expect("Sth went wrong... if the src-operand is not in a row, then this candidate shouldn't have been added to the list of candidates") + }) + .collect(); + + let (compute_subarray, ref_subarray) = self.select_compute_and_ref_subarray(src_rows); + let language_op = network.node(*next_candidate); - // 1.1 Determine in which rows src-operands for the next candidate-op are located + // match language_op { + // Aig::And() + // } - // 1.2 Check if issuing `APA(src1,src2)` would activate other rows which hold valid data - // 1.2.1 if yes: move data to other rows for performing this op + // 0. Select an SRA (=row-address tuple) for the selected subarray based on highest success-rate + // TODO (possible improvement): input replication by choosing SRA with more activated rows than operands and duplicating operands which are in far-away rows into several rows?) + let (row1,row2) = ARCHITECTURE.sra_degree_to_rowaddress_combinations.get(&(nr_rows as u8)).unwrap() + // TODO: sort by success-rate + .first().expect("No SRA for nr-rows={nr_rows}"); - // 1.3 Prepare performing the actual op (setup reference subarray) - // 1.3.1 If activated rows in reference subarray holds valid data: spill to other rows + // 1. Initialize rows in ref-subarray (if executing AND/OR) + // - TODO: read nr of frac-ops to issue from compiler-settings - // 1.4 Issue actual operation + // 2. Place rows in the simultaneously activated rows in the compute subarray (init other rows with 0 for OR, 1 for AND and same value for NOT) + + + // SKIPPED: 2.2 Check if issuing `APA(src1,src2)` would activate other rows which hold valid data + // - only necessary once we find optimization to not write values to safe-space but reuse them diectly + // 2.2.1 if yes: move data to other rows for performing this op + + // 3. Issue actual operation + + // 4. Copy result data from dst NEAREST to the sense-amps into a safe-space row + // TODO: possible improvement - error correction over all dst-rows (eg majority-vote for each bit, votes weighted by distance to sense-amps?) vec!() } @@ -241,19 +290,20 @@ pub struct RowState { live_value: Option, } +#[derive(Debug)] pub struct ValueState { /// Whether the value has already been computed (->only then it could reside in a row) /// - the value could also have been computed but spilled already on its last use /// - helps determining whether src-operand is the last use: for all other output operands of that source operand just check whether they have been already computed is_computed: bool, /// Row in which the value resides - row_location: Option<(SubarrayId, RowAddress)>, + row_location: Option, } /// Keep track of current progress of the compilation (eg which rows are used, into which rows data is placed, ...) pub struct CompilationState { /// For each row in the dram-module store its state - dram_state: HashMap<(SubarrayId, RowAddress), RowState>, + dram_state: HashMap, /// Stores row in which an intermediate result (which is still to be used by future ops) is currently located (or whether it has been computed at all) value_states: HashMap, /// List of candidates (ops ready to be issued) prioritized by some metric by which they are scheduled for execution @@ -262,7 +312,7 @@ pub struct CompilationState { } impl CompilationState { - pub fn new(dram_state: HashMap<(SubarrayId, RowAddress), RowState>) -> Self { + pub fn new(dram_state: HashMap) -> Self { Self { dram_state, value_states: HashMap::new(), @@ -370,6 +420,15 @@ mod tests { Compiler::new(CompilerSettings { print_program: true, verbose: true, print_compilation_stats: false, min_success_rate: 0.999, safe_space_rows_per_subarray: 16 } ) } + #[test] // mark function as test-fn + fn test_alloc_safe_space_rows() { + let mut compiler = init(); + const REQUESTED_SAFE_SPACE_ROWS: u8 = 8; + compiler.alloc_safe_space_rows(REQUESTED_SAFE_SPACE_ROWS); + + assert_eq!(compiler.safe_space_rows.len(), REQUESTED_SAFE_SPACE_ROWS as usize); + } + #[test] // mark function as test-fn fn test_candidate_initialization() { let mut compiler = init(); @@ -428,4 +487,11 @@ mod tests { assert_eq!(scheduling_prio, SchedulingPrio { nr_last_value_uses: 2, nr_src_operands: 2, nr_result_operands: 1 } ); } + + #[test] + fn test_select_compute_and_ref_subarray() { + let compiler = init(); + let (selected_subarray, _) = compiler.select_compute_and_ref_subarray(vec!(0b1_000_000_000, 0b1_000_010_000, 0b111_000_000_000, 0b10_100_000_000,)); + assert_eq!(selected_subarray, 0b1_000_000_000); + } } diff --git a/rs/src/fc_dram/mod.rs b/rs/src/fc_dram/mod.rs index 1f0255a..d52fed5 100644 --- a/rs/src/fc_dram/mod.rs +++ b/rs/src/fc_dram/mod.rs @@ -174,6 +174,7 @@ pub struct CompilerSettings { print_compilation_stats: bool, /// Minimal success rate to be guaranteed for success compiled program /// REMINDER: FCDRAM-operations dont have a 100%-success rate to create the correct results + /// TODO: not used yet by compiler min_success_rate: f64, // /// Location to config-file holding fcdram-specific configs // fcdram_config_file: Path, From 4fb91a1f89c23f24913053e8c7435b1ab70d5bd7 Mon Sep 17 00:00:00 2001 From: alku662e Date: Mon, 9 Jun 2025 17:32:22 +0200 Subject: [PATCH 16/51] init reference and compute subarray --- rs/src/fc_dram/architecture.rs | 77 +++++++++++----- rs/src/fc_dram/compiler.rs | 160 ++++++++++++++++++++++++++++----- rs/src/fc_dram/mod.rs | 2 + rs/src/fc_dram/program.rs | 2 +- 4 files changed, 196 insertions(+), 45 deletions(-) diff --git a/rs/src/fc_dram/architecture.rs b/rs/src/fc_dram/architecture.rs index 7345380..7fc55a1 100644 --- a/rs/src/fc_dram/architecture.rs +++ b/rs/src/fc_dram/architecture.rs @@ -5,14 +5,20 @@ //! //! RowAddress (eg via bit-shifting given bitmasks for subarray-id & row-addr to put on-top of RowAddress -use std::{collections::{HashMap, HashSet}, fmt::{Display, Formatter}, sync::LazyLock}; +use std::{cmp::Ordering, collections::{BTreeMap, HashMap, HashSet}, fmt::{Display, Formatter}, sync::LazyLock}; use log::debug; +use priority_queue::PriorityQueue; pub const NR_SUBARRAYS: i64 = 2i64.pow(7); pub const ROWS_PER_SUBARRAY: i64 = 2i64.pow(9); pub const SUBARRAY_ID_BITMASK: i64 = 0b1_111_111_000_000_000; // 7 highest bits=subarray id pub const ROW_ID_BITMASK: i64 = 0b0_000_000_111_111_111; // 7 highest bits=subarray id + +pub fn subarrayid_to_subarray_address(subarray_id: SubarrayId) -> RowAddress { + subarray_id << NR_SUBARRAYS.ilog2() +} + /// Main variable specifying architecture of DRAM-module for which to compile for /// - this is currently just an example implementation for testing purpose; (TODO: make this configurable at runtime) /// @@ -116,12 +122,13 @@ pub static ARCHITECTURE: LazyLock = LazyLock::new(|| { } // debug!("Precomputed SRAs: {:#?}", precomputed_simultaneous_row_activations.iter().take(20).collect::>()); - let sra_degree_to_rowaddress_combinations= precomputed_simultaneous_row_activations.iter().fold(HashMap::new(), |mut acc: HashMap>, (key, vec)| { - acc.entry(vec.len() as u8).or_default().push(*key); + let sra_degree_to_rowaddress_combinations= precomputed_simultaneous_row_activations.iter() + .fold(HashMap::new(), |mut acc: HashMap>, (row_combi, activated_rows)| { + acc.entry(activated_rows.len() as u8).or_default().push(*row_combi); acc }); // output how many combinations of row-addresses activate the given nr of rows - debug!("SRAs row-nr to row-addr mapping: {:#?}", sra_degree_to_rowaddress_combinations.iter().map(|(k,v)| format!("{k} rows activated in {} addr-combinations", v.len())).collect::>()); + // debug!("SRAs row-nr to row-addr mapping: {:#?}", sra_degree_to_rowaddress_combinations.iter().map(|(k,v)| format!("{k} rows activated in {} addr-combinations", v.len())).collect::>()); FCDRAMArchitecture { nr_subarrays: NR_SUBARRAYS, @@ -137,7 +144,22 @@ pub static ARCHITECTURE: LazyLock = LazyLock::new(|| { /// - ! must be smaller than `rows_per_subarray * nr_subarrays` (this is NOT checked!) pub type RowAddress = i64; pub type SubarrayId = i64; -pub type SuccessRate = f64; +#[derive(Debug, PartialEq)] +pub struct SuccessRate(f64); + +impl Eq for SuccessRate {} + +impl PartialOrd for SuccessRate { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) // delegate to total_cmp + } +} + +impl Ord for SuccessRate { + fn cmp(&self, other: &Self) -> Ordering { + self.0.total_cmp(&other.0) + } +} /// TODO: add field encoding topology of subarrays (to determine which of them share sense-amps) pub struct FCDRAMArchitecture { @@ -155,7 +177,9 @@ pub struct FCDRAMArchitecture { pub precomputed_simultaneous_row_activations: HashMap<(RowAddress, RowAddress), Vec>, /// Map degree of SRA (=nr of activated rows by that SRA) to all combinations of RowAddresses which have that degree of SRA /// - use to eg restrict the choice of row-addresses for n-ary AND/OR (eg 4-ary AND -> at least activate 8 rows; more rows could be activated when using input replication) + /// NOTE: LogicOp determiens success-rate pub sra_degree_to_rowaddress_combinations: HashMap>, + // pub sra_degree_to_rowaddress_combinations: HashMap<(u8, LogicOp), BTreeMap<(RowAddress,RowAddress), SuccessRate>>, // to large runtime-overhead :/ /// Stores for every rows which combinations of RowAddresses activate that row (needed for finding appropriate safe space rows) pub row_activated_by_rowaddress_tuple: HashMap>, /// Given a row-addr this returns the distance of it to the sense-amps (!determinse success-rate of op using that `row` as an operand) (see [1] Chap5.2) @@ -186,27 +210,31 @@ impl FCDRAMArchitecture { /// /// NOTE: `compute_rows` are expected to lay in the same subarray and `reference_rows` in one /// subarray adjacent to the compute subarray (!this is not checked but assumed to be true!) - fn get_instructions_implementation_of_logic_ops(logic_op: SupportedLogicOps) -> Vec { + fn get_instructions_implementation_of_logic_ops(logic_op: LogicOp) -> Vec { match logic_op { - SupportedLogicOps::NOT => vec!(Instruction::APA(-1, -1)), - SupportedLogicOps::AND => vec!(Instruction::FracOp(-1), Instruction::APA(-1, -1)), - SupportedLogicOps::OR => vec!(Instruction::FracOp(-1), Instruction::APA(-1, -1)), - SupportedLogicOps::NAND => { + LogicOp::NOT => vec!(Instruction::APA(-1, -1)), + LogicOp::AND => vec!(Instruction::FracOp(-1), Instruction::APA(-1, -1)), + LogicOp::OR => vec!(Instruction::FracOp(-1), Instruction::APA(-1, -1)), + LogicOp::NAND => { // 1. AND, 2. NOT - FCDRAMArchitecture::get_instructions_implementation_of_logic_ops(SupportedLogicOps::AND) + FCDRAMArchitecture::get_instructions_implementation_of_logic_ops(LogicOp::AND) .into_iter() - .chain( FCDRAMArchitecture::get_instructions_implementation_of_logic_ops(SupportedLogicOps::NOT)) + .chain( FCDRAMArchitecture::get_instructions_implementation_of_logic_ops(LogicOp::NOT)) .collect() }, - SupportedLogicOps::NOR => { + LogicOp::NOR => { // 1. OR, 2. NOT - FCDRAMArchitecture::get_instructions_implementation_of_logic_ops(SupportedLogicOps::OR) + FCDRAMArchitecture::get_instructions_implementation_of_logic_ops(LogicOp::OR) .into_iter() - .chain( FCDRAMArchitecture::get_instructions_implementation_of_logic_ops(SupportedLogicOps::NOT)) + .chain( FCDRAMArchitecture::get_instructions_implementation_of_logic_ops(LogicOp::NOT)) .collect() } } } + + fn get_activated_rows() { + + } } /// Categories of distances of rows to sense-amops @@ -318,7 +346,7 @@ impl Instruction { /// - as well as temperature and DRAM speed rate /// /// TAKEAWAY: `OR` is more reliable than `AND` - pub fn get_success_rate_of_apa(&self, implemented_op: SupportedLogicOps) -> SuccessRate { + pub fn get_success_rate_of_apa(&self, implemented_op: LogicOp) -> SuccessRate { // Quote from [1] Chap6.3: "the distance of all simultaneously activated rows" - unclear how this classification happend exactly. Let's be conservative and assume the worst-case behavior // (furthest away row for src-operands). For dst-rows we use the one closest to the sense-amps, since we can choose from which of the rows to read/save the result form @@ -326,7 +354,7 @@ impl Instruction { let success_rate_by_row_distance = { // see [1] Chap5.3 and Chap6.3 match implemented_op { - SupportedLogicOps::NOT => HashMap::from([ + LogicOp::NOT => HashMap::from([ // ((src,dst), success_rate) ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Close), 51.71), ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Close), 54.93), @@ -338,7 +366,7 @@ impl Instruction { ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Far), 85.02), ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Far), 75.13), ]), - SupportedLogicOps::AND => HashMap::from([ + LogicOp::AND => HashMap::from([ // ((reference,compute), success_rate) ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Close), 98.81), ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Close), 99.20), @@ -350,7 +378,7 @@ impl Instruction { ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Far), 95.29), ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Far), 94.95), ]), - SupportedLogicOps::OR => HashMap::from([ + LogicOp::OR => HashMap::from([ // ((reference,compute), success_rate) ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Close), 99.51), ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Close), 99.65), @@ -362,7 +390,7 @@ impl Instruction { ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Far), 98.59), ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Far), 98.80), ]), - SupportedLogicOps::NAND => HashMap::from([ + LogicOp::NAND => HashMap::from([ // ((reference,compute), success_rate) ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Close), 98.81), ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Close), 99.20), @@ -374,7 +402,7 @@ impl Instruction { ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Far), 95.19), ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Far), 94.95), ]), - SupportedLogicOps::NOR => HashMap::from([ + LogicOp::NOR => HashMap::from([ // ((reference,compute), success_rate) ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Close), 99.51), ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Close), 99.65), @@ -414,9 +442,9 @@ impl Instruction { .expect("[ERR] Activated rows were empty"); let total_success_rate = *success_rate_per_operandnr.get(&nr_operands).expect("[ERR] {nr_operands} not =2|4|8|16, the given SRA function seems to not comply with this core assumption.") * success_rate_by_row_distance.get(&(furthest_src_row, closest_dst_row)).unwrap(); - total_success_rate + SuccessRate(total_success_rate) }, - _ => 1.0, + _ => SuccessRate(1.0), } } } @@ -424,7 +452,8 @@ impl Instruction { /// Contains logical operations which are supported (natively) on FCDRAM-Architecture /// - see [`FCDRAMArchitecture::get_instructions_implementation_of_logic_ops`] for how these /// logic-ops are mapped to FCDRAM-instructions -pub enum SupportedLogicOps { +#[derive(Debug, Clone, Copy, Eq, Hash, PartialEq)] +pub enum LogicOp { NOT, AND, OR, diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs index 52f74df..7d1c594 100644 --- a/rs/src/fc_dram/compiler.rs +++ b/rs/src/fc_dram/compiler.rs @@ -10,19 +10,21 @@ use crate::ambit::Architecture; use super::{ - architecture::{FCDRAMArchitecture, Instruction, SubarrayId, ARCHITECTURE, NR_SUBARRAYS, ROWS_PER_SUBARRAY, SUBARRAY_ID_BITMASK}, optimization::optimize, CompilerSettings, Program, ProgramState, RowAddress + architecture::{subarrayid_to_subarray_address, FCDRAMArchitecture, Instruction, LogicOp, SubarrayId, ARCHITECTURE, NR_SUBARRAYS, ROWS_PER_SUBARRAY, SUBARRAY_ID_BITMASK}, optimization::optimize, CompilerSettings, Program, ProgramState, RowAddress }; use eggmock::{Aig, AigLanguage, ComputedNetworkWithBackwardEdges, Id, NetworkLanguage, NetworkWithBackwardEdges, Node, Signal}; +use itertools::Itertools; use log::debug; use priority_queue::PriorityQueue; -use std::{cmp::Ordering, collections::{BinaryHeap, HashMap, HashSet}}; +use rustc_hash::FxHashMap; +use std::{cmp::Ordering, collections::{BinaryHeap, HashMap, HashSet}, env::consts::ARCH, fs::write}; pub struct Compiler { /// compiler-options set by user settings: CompilerSettings, /// Stores the state of all rows at each compilation step comp_state: CompilationState, - /// These rows are reserved in EVERY subarray for storing intermediate results + /// These rows are reserved in EVERY subarray for storing intermediate results (ignore higher bits of these RowAddress) /// - NOTE: only initialized when compilation starts safe_space_rows: Vec, /// RowCombinations which are not allowed to be issued via `APA` since they activate rows within the safe-space @@ -102,6 +104,7 @@ impl Compiler { /// - [ ] TODO: improve algo (in terms of space efficiency) fn alloc_safe_space_rows(&mut self, nr_safe_space_rows: u8) { + debug!("[ENTER alloc_safe_space_rows]"); let supported_nr_safe_space_rows = vec!(1,2,4,8,16,32,64); if !supported_nr_safe_space_rows.contains(&nr_safe_space_rows) { panic!("Only the following nr of rows are supported to be activated: {:?}, given: {}", supported_nr_safe_space_rows, nr_safe_space_rows); @@ -113,7 +116,7 @@ impl Compiler { // choose any row-addr combi activating exactly `nr_safe_space_rows` and choose all that activated rows to be safe-space rows let chosen_row_addr_combi = ARCHITECTURE.sra_degree_to_rowaddress_combinations .get(&nr_safe_space_rows).unwrap() - .first().unwrap(); // just take the first row-combi that activates `nr_safe_space_rows` + .iter().next().unwrap(); // just take the first row-combi that activates `nr_safe_space_rows` ARCHITECTURE.precomputed_simultaneous_row_activations.get(chosen_row_addr_combi).unwrap().to_vec() }; @@ -125,28 +128,53 @@ impl Compiler { } } + /// Places (commonly used) constants in safe-space rows + /// - ! all safe-space rows are assumed to be empty when placing constants (constans are the first things to be placed into safe-space rows) + /// - currently placed constants: all 0s and all 1s (for [`Compiler::init_reference_subarray`] + fn place_constants(&mut self) { + // place constants in EVERY subarray + for subarray in 0..NR_SUBARRAYS { + let mut safe_space = self.safe_space_rows.iter(); + let row_address_0 = subarrayid_to_subarray_address(subarray) | safe_space.next().unwrap(); + self.comp_state.constant_values.insert(0, row_address_0); + let row_address_1 = subarrayid_to_subarray_address(subarray) | safe_space.next().unwrap(); + self.comp_state.constant_values.insert(1, row_address_1); + + self.comp_state.dram_state.insert(row_address_0, RowState { is_compute_row: false, live_value: None, constant: Some(0)} ); + self.comp_state.dram_state.insert(row_address_1, RowState { is_compute_row: false, live_value: None, constant: Some(1)} ); + } + } + /// Place inputs onto appropriate rows + /// - NOTE: constants are expected to be placed before the inputs /// TODO: algo which looks ahead which input-row-placement might be optimal (->reduce nr of move-ops to move intermediate results around & keep inputs close to sense-amps /// TODO: parititon logic network into subgraphs s.t. subgraphs can be mapped onto subarrays reducing nr of needed moves fn place_inputs(&mut self, mut inputs: Vec) { + debug!("[ENTER place_inputs]"); // naive implementation: start placing input on consecutive safe-space rows (continuing with next subarray once the current subarray has no more free safe-space rows) // TODO: change input operand placement to be more optimal (->taking into account future use of those operands) let mut subarray_iter=0..NR_SUBARRAYS; let next_subarray = subarray_iter.next().unwrap(); - let mut row_iter= self.safe_space_rows.iter(); + let subarray_addr = subarrayid_to_subarray_address(next_subarray); while let Some(next_input) = inputs.pop() { + let mut row_iter= self.safe_space_rows.iter() + .filter(|row| ! self.comp_state.constant_values.values().contains( &(subarray_addr | **row)) ); // filter safe-space rows which are NOT already used for constants + // NOTE: some safe-space rows are reserved for constants let (next_subarray, next_row) = if let Some(next_row) = row_iter.next() { (next_subarray, next_row) } else { - row_iter = self.safe_space_rows.iter(); - ( subarray_iter.next().expect("OOM: no more safe-space rows and subarrays available") , row_iter.next().expect("No safe-space rows available" )) + let next_subarray = subarray_iter.next().expect("OOM: no more safe-space rows and subarrays available"); + let next_subarray_addr = subarrayid_to_subarray_address(next_subarray); + let mut row_iter = self.safe_space_rows.iter() + .filter(|row| ! self.comp_state.constant_values.values().contains(&(next_subarray_addr | **row )) ); // filter safe-space rows which are NOT already used for constants + ( next_subarray, row_iter.next().expect("OOM: No safe-space rows available" ) ) }; let row_address = (next_subarray << ROWS_PER_SUBARRAY.ilog2() ) | next_row; // higher bits=id of subarray - let initial_row_state = RowState { is_compute_row: false, live_value: Some(next_input)}; - let initial_value_state = ValueState { is_computed: true, row_location: Some(row_address)}; + let initial_row_state = RowState { is_compute_row: false, live_value: Some(next_input), constant: None }; + let initial_value_state = ValueState { is_computed: true, row_location: Some(row_address) }; self.comp_state.dram_state.insert(row_address, initial_row_state); self.comp_state.value_states.insert(next_input, initial_value_state); } @@ -154,11 +182,12 @@ impl Compiler { /// Initialize candidates with all nodes that are computable fn initialize_candidates(&mut self, network: &impl NetworkWithBackwardEdges) { + debug!("[ENTER initialize_candidates]"); let inputs: Vec = network.leaves().collect(); // init candidates with all nodes having only inputs as src-operands for &input in inputs.as_slice() { - // TODO: NEXT - add all non-input nodes whose src-operands are all inputs + // every output has a prio determined eg by how many src-operands it uses last (->to minimize nr of live values in rows) let mut outputs_with_prio: PriorityQueue = network.node_outputs(input) .filter(|output| network.node(*output).inputs().iter().all(|other_input| inputs.contains(&other_input.node_id()) )) // only those nodes are candidates, whose src-operands are ALL inputs (->only primary inputs are directly available) .map( |output| (output, self.compute_scheduling_prio_for_node(output, network)) ) @@ -173,6 +202,8 @@ impl Compiler { // debug!("Compiling {:?}", network); // 0.1 Allocate safe-space rows (for storing intermediate values safely self.alloc_safe_space_rows(self.settings.safe_space_rows_per_subarray); + + self.place_constants(); // 0.2 Place all inputs and mark them as being live self.place_inputs( network.leaves().collect::>() ); // place input-operands into rows debug!("Placed inputs {:?} in {:?}", network.leaves().collect::>(), self.comp_state.value_states); @@ -180,10 +211,69 @@ impl Compiler { self.initialize_candidates(network); } + /// Returns instructions to initialize `ref_rows` in reference-subarray for corresponding logic-op + /// - NOTE: [1] doesn't describe how the 0s/1s get into the reference subarray. We use `RowCloneFPM` ([4])) to copy the constant 0s/1s from the reserved safe-space row into the corresponding reference subarray row + fn init_reference_subarray(&self, mut ref_rows: Vec, logic_op: LogicOp) -> Vec { + match logic_op { + LogicOp::AND => { + let frac_row = ref_rows.pop().expect("Min 1 row has to be passed for initializing ref subarray"); // TODO: include success-rate considerations to choose best row to use for storing `V_{DD}/2` + let row_address_1 = self.comp_state.constant_values.get(&1).expect("Constants are expected to be placed in every subarray beforehand"); // row address where all 1s (V_DD) is stored + let mut instructions = vec!(); + for _ in 0..self.settings.repetition_fracops { + instructions.push(Instruction::FracOp(frac_row)); + } + for other_row in ref_rows { + instructions.push(Instruction::RowCloneFPM(*row_address_1, other_row)); + } + instructions + }, + LogicOp::OR => { + let frac_row = ref_rows.pop().expect("Min 1 row has to be passed for initializing ref subarray"); // TODO: include success-rate considerations to choose best row to use for storing `V_{DD}/2` + let row_address_0 = self.comp_state.constant_values.get(&0).expect("Constants are expected to be placed in every subarray beforehand"); // row address where all 1s (V_DD) is stored + let mut instructions = vec!(); + for _ in 0..self.settings.repetition_fracops { + instructions.push(Instruction::FracOp(frac_row)); + } + for other_row in ref_rows { + instructions.push(Instruction::RowCloneFPM(*row_address_0, other_row)); + } + instructions + }, + LogicOp::NOT => vec!(), + _ => panic!("{logic_op:?} not supported yet"), + } + } + + /// Places the referenced `src_operands` into the corresponding `row_addresses` which are expected to be simultaneously executed + fn init_compute_subarray(&mut self, mut row_addresses: Vec, mut src_operands: Vec, logic_op: LogicOp) -> Vec { + let mut instructions = vec!(); + // if there are fewer src-operands than activated rows perform input replication + row_addresses.sort_by_key(|row| ((ARCHITECTURE.get_distance_of_row_to_sense_amps)(*row))); // replicate input that resides in row with lowest success-rate (=probably the row furthest away) + let nr_elements_to_extend = row_addresses.len() - src_operands.len(); + if nr_elements_to_extend > 0 { + let last_element = *src_operands.last().unwrap(); + src_operands.extend( std::iter::repeat_n(last_element, nr_elements_to_extend)); + } + + for (&row_addr, &src_operand) in row_addresses.iter().zip(src_operands.iter()) { + let src_operand_location = self.comp_state.value_states.get(&src_operand).expect("Src operand not available although it is used by a candidate. Sth went wrong...") + .row_location.expect("Src operand not live although it is used by a candidate. Sth went wrong..."); + + self.comp_state.dram_state.insert(row_addr, RowState { is_compute_row: true, live_value: Some(src_operand), constant: None }); + + if (src_operand_location & SUBARRAY_ID_BITMASK) == (row_addr & SUBARRAY_ID_BITMASK) { + instructions.push(Instruction::RowCloneFPM(src_operand_location, row_addr)); + } else { + instructions.push(Instruction::RowClonePSM(src_operand_location, row_addr)); + } + } + instructions + } /// Return id of subarray to use for computation and reference (compute_subarrayid, reference_subarrayid) /// - based on location of input rows AND current compilation state /// - [ ] POSSIBLE EXTENSION: include lookahead for future ops and their inputs they depend on fn select_compute_and_ref_subarray(&self, input_rows: Vec) -> (SubarrayId, SubarrayId) { + debug!("[ENTER select_compute_and_ref_subarray]"); // naive implementation: just use the subarray that most of the `input_rows` reside in // TODO: find better solution let used_subarray_ids = input_rows.into_iter().map(|row| row & SUBARRAY_ID_BITMASK); @@ -201,6 +291,9 @@ impl Compiler { /// - [ ] make sure that operation to be executed on those rows won't simultaneously activate other rows holding valid data which will be used by future operations fn execute_next_instruction(&mut self, next_candidate: &Id, network: &impl NetworkWithBackwardEdges) -> Vec { + let mut next_instructions = vec!(); + + debug!("Executing candidate {:?}", next_candidate); let src_operands: Vec = network.node(*next_candidate).inputs() .iter() .map(|signal| signal.node_id()) @@ -223,32 +316,54 @@ impl Compiler { let (compute_subarray, ref_subarray) = self.select_compute_and_ref_subarray(src_rows); let language_op = network.node(*next_candidate); - // match language_op { - // Aig::And() - // } + let logic_op = match language_op { + Aig::And(_) => LogicOp::AND, + Aig::Or(_) => LogicOp::OR, + // TODO: extract NOT + _ => panic!("candidate is expected to be a logic op"), + }; // 0. Select an SRA (=row-address tuple) for the selected subarray based on highest success-rate // TODO (possible improvement): input replication by choosing SRA with more activated rows than operands and duplicating operands which are in far-away rows into several rows?) - let (row1,row2) = ARCHITECTURE.sra_degree_to_rowaddress_combinations.get(&(nr_rows as u8)).unwrap() - // TODO: sort by success-rate - .first().expect("No SRA for nr-rows={nr_rows}"); + let row_combi = ARCHITECTURE.sra_degree_to_rowaddress_combinations.get(&(nr_rows as u8)).unwrap() + // sort by success-rate - using eg `BTreeMap` turned out to impose a too large runtime overhead + .iter() + .find(|combi| !self.blocked_row_combinations.contains(combi)) // choose first block RowAddr-combination + .expect("No SRA for nr-rows={nr_rows}"); + + let activated_rows = ARCHITECTURE.precomputed_simultaneous_row_activations.get(row_combi).unwrap(); + let ref_rows: Vec = activated_rows.iter() + .map(|row| row & (subarrayid_to_subarray_address(ref_subarray))) // make activated rows refer to the right subarray + .collect(); + let comp_rows: Vec = activated_rows.iter() + .map(|row| row & (subarrayid_to_subarray_address(compute_subarray))) // make activated rows refer to the right subarray + .collect(); + // 1. Initialize rows in ref-subarray (if executing AND/OR) // - TODO: read nr of frac-ops to issue from compiler-settings + let mut instruction_init_ref_subarray = self.init_reference_subarray(ref_rows.clone(), logic_op); + next_instructions.append(&mut instruction_init_ref_subarray); // 2. Place rows in the simultaneously activated rows in the compute subarray (init other rows with 0 for OR, 1 for AND and same value for NOT) - + let mut instructions_init_comp_subarray = self.init_compute_subarray( activated_rows.clone(), src_operands, logic_op); + next_instructions.append(&mut instructions_init_comp_subarray); // SKIPPED: 2.2 Check if issuing `APA(src1,src2)` would activate other rows which hold valid data // - only necessary once we find optimization to not write values to safe-space but reuse them diectly // 2.2.1 if yes: move data to other rows for performing this op // 3. Issue actual operation + next_instructions.push(Instruction::APA(row_combi.0, row_combi.1)); + for row in comp_rows { + self.comp_state.dram_state.insert(row, RowState { is_compute_row: true, live_value: Some(*next_candidate), constant: None }); + } + // TODO NEXT: update `comp_state` (computed values, location of values, ..) // 4. Copy result data from dst NEAREST to the sense-amps into a safe-space row // TODO: possible improvement - error correction over all dst-rows (eg majority-vote for each bit, votes weighted by distance to sense-amps?) - vec!() + next_instructions } /// Compute `SchedulingPrio` for a given node @@ -263,7 +378,7 @@ impl Compiler { .filter(|out| *out != id && !(self.comp_state.value_states.get(out) - .unwrap_or(&ValueState{is_computed: false, row_location: None}) // no entry means this is the first time accessing this value + .unwrap_or(&ValueState{is_computed: false, row_location: None }) // no entry means this is the first time accessing this value .is_computed) ) // filter for uses of `input` which still rely on it (=those that are not computed yet, except for currently checked node .collect(); @@ -288,6 +403,8 @@ pub struct RowState { is_compute_row: bool, /// `None` if the value inside this row is currently not live live_value: Option, + /// Some rows (mostly only safe-space rows) store constants values, see [`CompilationState::constant_values`] + constant: Option, } #[derive(Debug)] @@ -306,6 +423,8 @@ pub struct CompilationState { dram_state: HashMap, /// Stores row in which an intermediate result (which is still to be used by future ops) is currently located (or whether it has been computed at all) value_states: HashMap, + /// Some constants are stored in fixed rows (!in each subarray), eg 0s and 1s for initializing reference subarray + constant_values: HashMap, /// List of candidates (ops ready to be issued) prioritized by some metric by which they are scheduled for execution /// - NOTE: calculate Nodes `SchedulingPrio` using candidates: PriorityQueue, @@ -316,6 +435,7 @@ impl CompilationState { Self { dram_state, value_states: HashMap::new(), + constant_values: HashMap::new(), candidates: PriorityQueue::new(), } } @@ -417,7 +537,7 @@ mod tests { INIT.call_once(|| { env_logger::init(); }); - Compiler::new(CompilerSettings { print_program: true, verbose: true, print_compilation_stats: false, min_success_rate: 0.999, safe_space_rows_per_subarray: 16 } ) + Compiler::new(CompilerSettings { print_program: true, verbose: true, print_compilation_stats: false, min_success_rate: 0.999, repetition_fracops: 5, safe_space_rows_per_subarray: 16 } ) } #[test] // mark function as test-fn diff --git a/rs/src/fc_dram/mod.rs b/rs/src/fc_dram/mod.rs index d52fed5..d4a7942 100644 --- a/rs/src/fc_dram/mod.rs +++ b/rs/src/fc_dram/mod.rs @@ -179,6 +179,8 @@ pub struct CompilerSettings { // /// Location to config-file holding fcdram-specific configs // fcdram_config_file: Path, + /// How many times to issue FracOps to store `V_{DD}/2` in one of the activated rows for AND/OR + repetition_fracops: u64, /// Nr of rows to use as a safe space for operands per subarray /// - REMINDER: after `AND`/`OR`-ops the src-operands are overwritten by the op-result, so to reuse operands they're put into specially designated rows (="safe-space") which won't be overwritten /// - Ops reusing those operands have to clone the values from the safe-space prior to issuing the Op diff --git a/rs/src/fc_dram/program.rs b/rs/src/fc_dram/program.rs index 58c850f..fd2fe3f 100644 --- a/rs/src/fc_dram/program.rs +++ b/rs/src/fc_dram/program.rs @@ -94,7 +94,7 @@ impl From for Program { impl Display for Program { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { for instr in &self.instructions { - write!(f, "{}", instr)?; + writeln!(f, "{}", instr)?; } Ok(()) } From 2bc75d0fc83d19b3555ad3864ea054e2572ef1d7 Mon Sep 17 00:00:00 2001 From: alku662e Date: Mon, 9 Jun 2025 19:32:13 +0200 Subject: [PATCH 17/51] Changed Id to Signal, working on adding support for NOT (negated values) --- rs/src/fc_dram/compiler.rs | 102 ++++++++++++++++++++++++++----------- 1 file changed, 71 insertions(+), 31 deletions(-) diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs index 7d1c594..e40ff6c 100644 --- a/rs/src/fc_dram/compiler.rs +++ b/rs/src/fc_dram/compiler.rs @@ -81,6 +81,7 @@ impl Compiler { debug!("{:?},", node); } + // 1. Actual compilation while let Some((next_candidate, _)) = self.comp_state.candidates.pop() { // TODO: extend program with instr that is executed next let executed_instructions = &mut self.execute_next_instruction(&next_candidate, network); @@ -173,10 +174,10 @@ impl Compiler { let row_address = (next_subarray << ROWS_PER_SUBARRAY.ilog2() ) | next_row; // higher bits=id of subarray - let initial_row_state = RowState { is_compute_row: false, live_value: Some(next_input), constant: None }; + let initial_row_state = RowState { is_compute_row: false, live_value: Some(Signal::new(next_input, false)), constant: None }; let initial_value_state = ValueState { is_computed: true, row_location: Some(row_address) }; self.comp_state.dram_state.insert(row_address, initial_row_state); - self.comp_state.value_states.insert(next_input, initial_value_state); + self.comp_state.value_states.insert(Signal::new(next_input, false), initial_value_state); } } @@ -188,9 +189,12 @@ impl Compiler { // init candidates with all nodes having only inputs as src-operands for &input in inputs.as_slice() { // every output has a prio determined eg by how many src-operands it uses last (->to minimize nr of live values in rows) - let mut outputs_with_prio: PriorityQueue = network.node_outputs(input) + let mut outputs_with_prio: PriorityQueue = network.node_outputs(input) .filter(|output| network.node(*output).inputs().iter().all(|other_input| inputs.contains(&other_input.node_id()) )) // only those nodes are candidates, whose src-operands are ALL inputs (->only primary inputs are directly available) - .map( |output| (output, self.compute_scheduling_prio_for_node(output, network)) ) + .map( |output| { + let output_signal = Signal::new(output, false); + (output_signal, self.compute_scheduling_prio_for_node(output_signal, network)) + }) .collect(); self.comp_state.candidates.append(&mut outputs_with_prio); debug!("{:?} has the following outputs: {:?}", input, network.node_outputs(input).collect::>()); @@ -245,7 +249,7 @@ impl Compiler { } /// Places the referenced `src_operands` into the corresponding `row_addresses` which are expected to be simultaneously executed - fn init_compute_subarray(&mut self, mut row_addresses: Vec, mut src_operands: Vec, logic_op: LogicOp) -> Vec { + fn init_compute_subarray(&mut self, mut row_addresses: Vec, mut src_operands: Vec, logic_op: LogicOp) -> Vec { let mut instructions = vec!(); // if there are fewer src-operands than activated rows perform input replication row_addresses.sort_by_key(|row| ((ARCHITECTURE.get_distance_of_row_to_sense_amps)(*row))); // replicate input that resides in row with lowest success-rate (=probably the row furthest away) @@ -269,6 +273,38 @@ impl Compiler { } instructions } + + /// Return sequence of instructions to provide negated inputs (if there are any among `src_operands`)). + /// + /// NOTE: Some inputs may be needed in a negated form by the candidates. To start execution those + /// input operands have to be available with their negated form. + /// TODO: NEXT + fn init_negated_src_operands(&self, src_operands: Vec, network: &impl NetworkWithBackwardEdges) -> Vec { + let mut instructions = vec!(); + let mut negated_inputs: HashSet = HashSet::new(); // inputs which are required in their negated form + for (candidate, _) in &self.comp_state.candidates { + let negated_src_operands: Vec = network.node(candidate.node_id()).inputs().iter() + .filter(|sig| sig.is_inverted()) + .copied() // map ref to owned val + .collect(); + negated_inputs.extend(negated_src_operands.iter()); + } + + for neg_in in negated_inputs { + if self.comp_state.value_states.contains_key(&neg_in) { + // negated signal is already available + continue; + } else { + // else make negated-signal available + let unnegated_signal = neg_in.invert(); + let origin_unneg_signal = self.comp_state.value_states.get(&unnegated_signal).expect("Original version of this value is not available??"); + // TODO: move value in some safe-space row + } + } + + instructions + } + /// Return id of subarray to use for computation and reference (compute_subarrayid, reference_subarrayid) /// - based on location of input rows AND current compilation state /// - [ ] POSSIBLE EXTENSION: include lookahead for future ops and their inputs they depend on @@ -290,14 +326,13 @@ impl Compiler { } /// - [ ] make sure that operation to be executed on those rows won't simultaneously activate other rows holding valid data which will be used by future operations - fn execute_next_instruction(&mut self, next_candidate: &Id, network: &impl NetworkWithBackwardEdges) -> Vec { + fn execute_next_instruction(&mut self, next_candidate: &Signal, network: &impl NetworkWithBackwardEdges) -> Vec { let mut next_instructions = vec!(); debug!("Executing candidate {:?}", next_candidate); - let src_operands: Vec = network.node(*next_candidate).inputs() - .iter() - .map(|signal| signal.node_id()) - .collect(); + let src_operands: Vec = network.node(next_candidate.node_id()).inputs().to_vec(); + let mut init_neg_operands = self.init_negated_src_operands(src_operands.clone(), network); // TODO NEXT: make sure all required negated operands are available + next_instructions.append(&mut init_neg_operands); let nr_operands = src_operands.len(); // use to select SRA to activate let nr_rows = nr_operands.next_power_of_two(); @@ -314,7 +349,7 @@ impl Compiler { .collect(); let (compute_subarray, ref_subarray) = self.select_compute_and_ref_subarray(src_rows); - let language_op = network.node(*next_candidate); + let language_op = network.node(next_candidate.node_id()); let logic_op = match language_op { Aig::And(_) => LogicOp::AND, @@ -355,13 +390,15 @@ impl Compiler { // 3. Issue actual operation next_instructions.push(Instruction::APA(row_combi.0, row_combi.1)); - for row in comp_rows { - self.comp_state.dram_state.insert(row, RowState { is_compute_row: true, live_value: Some(*next_candidate), constant: None }); + for (&comp_row, &ref_row) in comp_rows.iter().zip(ref_rows.iter()) { + self.comp_state.dram_state.insert(comp_row, RowState { is_compute_row: true, live_value: Some(*next_candidate), constant: None }); + // ref subarray holds negated value afterwarsd + self.comp_state.dram_state.insert(ref_row, RowState { is_compute_row: true, live_value: Some(next_candidate.invert()), constant: None }); + // TODO: `value_state` } - // TODO NEXT: update `comp_state` (computed values, location of values, ..) - // 4. Copy result data from dst NEAREST to the sense-amps into a safe-space row - // TODO: possible improvement - error correction over all dst-rows (eg majority-vote for each bit, votes weighted by distance to sense-amps?) + // 4. Copy result data from dst NEAREST to the sense-amps into a safe-space row and update `value_state` + // TODO LAST: possible improvement - error correction over all dst-rows (eg majority-vote for each bit, votes weighted by distance to sense-amps?) next_instructions } @@ -369,17 +406,20 @@ impl Compiler { /// Compute `SchedulingPrio` for a given node /// - used for inserting new candidates /// TODO: write unittest for this function - fn compute_scheduling_prio_for_node(&self, id: Id, network: &impl NetworkWithBackwardEdges) -> SchedulingPrio { - let nr_last_value_uses = network.node(id).inputs() // for each input check whether `id` is the last node using it + fn compute_scheduling_prio_for_node(&self, signal: Signal, network: &impl NetworkWithBackwardEdges) -> SchedulingPrio { + let nr_last_value_uses = network.node(signal.node_id()).inputs() // for each input check whether `id` is the last node using it .iter() .fold(0, |acc, input| { let input_id = Signal::node_id(input); let non_computed_outputs: Vec = network.node_outputs(input_id) // get all other nodes relying on this input - .filter(|out| - *out != id && - !(self.comp_state.value_states.get(out) + .filter(|out| { + + let out_signal = Signal::new(*out,false); + out_signal != signal && + !(self.comp_state.value_states.get(&out_signal) .unwrap_or(&ValueState{is_computed: false, row_location: None }) // no entry means this is the first time accessing this value .is_computed) + } ) // filter for uses of `input` which still rely on it (=those that are not computed yet, except for currently checked node .collect(); if non_computed_outputs.is_empty() { @@ -391,8 +431,8 @@ impl Compiler { SchedulingPrio { nr_last_value_uses, - nr_src_operands: network.node(id).inputs().len(), - nr_result_operands: network.node_outputs(id).collect::>().len(), + nr_src_operands: network.node(signal.node_id()).inputs().len(), + nr_result_operands: network.node_outputs(signal.node_id()).collect::>().len(), } } } @@ -402,7 +442,7 @@ pub struct RowState { /// True iff that row is currently: 1) Not a safe-sapce row, 2) Doesn't activate any safe-sapce rows, 3) Isn't holding valid values in the role of a reference-subarray row is_compute_row: bool, /// `None` if the value inside this row is currently not live - live_value: Option, + live_value: Option, /// Some rows (mostly only safe-space rows) store constants values, see [`CompilationState::constant_values`] constant: Option, } @@ -422,12 +462,12 @@ pub struct CompilationState { /// For each row in the dram-module store its state dram_state: HashMap, /// Stores row in which an intermediate result (which is still to be used by future ops) is currently located (or whether it has been computed at all) - value_states: HashMap, + value_states: HashMap, /// Some constants are stored in fixed rows (!in each subarray), eg 0s and 1s for initializing reference subarray constant_values: HashMap, /// List of candidates (ops ready to be issued) prioritized by some metric by which they are scheduled for execution /// - NOTE: calculate Nodes `SchedulingPrio` using - candidates: PriorityQueue, + candidates: PriorityQueue, } impl CompilationState { @@ -569,13 +609,13 @@ mod tests { // Id(0): Input(1) // act is if one `AND` has already been computed -> other and (`Id(2)`) should be the only candidate left - compiler .comp_state.value_states.insert(eggmock::Id::from(4), ValueState{ is_computed: true, row_location: None }); + compiler .comp_state.value_states.insert(Signal::new(eggmock::Id::from(4), false), ValueState{ is_computed: true, row_location: None }); let ntk_backward = ComputedNetworkWithBackwardEdges::new(ntk); compiler.initialize_candidates(&ntk_backward); - let is_candidate_ids: HashSet = compiler.comp_state.candidates.iter().map(|(id,_)| *id).collect(); - let should_candidate_ids: HashSet = HashSet::from([eggmock::Id::from(2), eggmock::Id::from(4)]); + let is_candidate_ids: HashSet = compiler.comp_state.candidates.iter().map(|(id,_)| *id).collect(); + let should_candidate_ids: HashSet = HashSet::from([Signal::new( eggmock::Id::from(2), false), Signal::new(eggmock::Id::from(4), false)]); assert_eq!( is_candidate_ids, should_candidate_ids); // TODO: test-case with node that relies on one input src-operand and one non-input (intermediate node) src-operand @@ -599,11 +639,11 @@ mod tests { // Id(0): Input(1) // act is if one `AND` has already been computed -> other and (`Id(2)`) should be the only candidate left - compiler.comp_state.value_states.insert(eggmock::Id::from(2), ValueState{ is_computed: true, row_location: None }); + compiler.comp_state.value_states.insert(Signal::new(eggmock::Id::from(2), false), ValueState{ is_computed: true, row_location: None }); let ntk_backward = ComputedNetworkWithBackwardEdges::new(ntk); - let scheduling_prio = compiler.compute_scheduling_prio_for_node(eggmock::Id::from(4), &ntk_backward); + let scheduling_prio = compiler.compute_scheduling_prio_for_node(Signal::new(eggmock::Id::from(4), false), &ntk_backward); assert_eq!(scheduling_prio, SchedulingPrio { nr_last_value_uses: 2, nr_src_operands: 2, nr_result_operands: 1 } ); } From 27ef110d5ea86dbdff094629153444bd6d30d4f2 Mon Sep 17 00:00:00 2001 From: alku662e Date: Tue, 10 Jun 2025 17:07:47 +0200 Subject: [PATCH 18/51] WIP input-operand placement not working yet, output doesn't seem to be placed into a row yet --- rs/src/fc_dram/architecture.rs | 6 +- rs/src/fc_dram/compiler.rs | 171 ++++++++++++++++++++------------- rs/src/fc_dram/program.rs | 84 ++++------------ 3 files changed, 124 insertions(+), 137 deletions(-) diff --git a/rs/src/fc_dram/architecture.rs b/rs/src/fc_dram/architecture.rs index 7fc55a1..1837761 100644 --- a/rs/src/fc_dram/architecture.rs +++ b/rs/src/fc_dram/architecture.rs @@ -5,10 +5,7 @@ //! //! RowAddress (eg via bit-shifting given bitmasks for subarray-id & row-addr to put on-top of RowAddress -use std::{cmp::Ordering, collections::{BTreeMap, HashMap, HashSet}, fmt::{Display, Formatter}, sync::LazyLock}; - -use log::debug; -use priority_queue::PriorityQueue; +use std::{cmp::Ordering, collections::{HashMap, HashSet}, fmt::{Display, Formatter}, sync::LazyLock}; pub const NR_SUBARRAYS: i64 = 2i64.pow(7); pub const ROWS_PER_SUBARRAY: i64 = 2i64.pow(9); @@ -283,6 +280,7 @@ pub enum Instruction { impl Display for Instruction { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + // TODO: change string-representation to display subarray-id let description = match self { Instruction::FracOp(row) => format!("AP({row})"), Instruction::APA(row1,row2) => format!("APA({row1},{row2})"), diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs index e40ff6c..231bbe2 100644 --- a/rs/src/fc_dram/compiler.rs +++ b/rs/src/fc_dram/compiler.rs @@ -7,17 +7,15 @@ //! - [`compile()`] = main function - compiles given logic network for the given [`architecture`] //! into a [`program`] using some [`optimization`] -use crate::ambit::Architecture; - use super::{ - architecture::{subarrayid_to_subarray_address, FCDRAMArchitecture, Instruction, LogicOp, SubarrayId, ARCHITECTURE, NR_SUBARRAYS, ROWS_PER_SUBARRAY, SUBARRAY_ID_BITMASK}, optimization::optimize, CompilerSettings, Program, ProgramState, RowAddress + architecture::{subarrayid_to_subarray_address, FCDRAMArchitecture, Instruction, LogicOp, SubarrayId, ARCHITECTURE, NR_SUBARRAYS, ROWS_PER_SUBARRAY, ROW_ID_BITMASK, SUBARRAY_ID_BITMASK}, optimization::optimize, CompilerSettings, Program, RowAddress }; use eggmock::{Aig, AigLanguage, ComputedNetworkWithBackwardEdges, Id, NetworkLanguage, NetworkWithBackwardEdges, Node, Signal}; use itertools::Itertools; use log::debug; use priority_queue::PriorityQueue; use rustc_hash::FxHashMap; -use std::{cmp::Ordering, collections::{BinaryHeap, HashMap, HashSet}, env::consts::ARCH, fs::write}; +use std::{cmp::Ordering, collections::{HashMap, HashSet}}; pub struct Compiler { /// compiler-options set by user @@ -57,14 +55,10 @@ impl Compiler { &mut self, network: &impl NetworkWithBackwardEdges, ) -> Program { - - // 0. Prepare compilation: select safe-space rows, place inputs into DRAM module - self.init_comp_state(network); - let mut program = Program::new(vec!()); - let outputs = network.outputs(); - + // 0. Prepare compilation: select safe-space rows, place inputs into DRAM module (and store where inputs have been placed in `program`) + self.init_comp_state(network, &mut program); // TODO: how to get src-operands of `outputs` ?? // debug!("{:?}", network.node_outputs(outputs.next()).collect()); @@ -88,15 +82,23 @@ impl Compiler { program.instructions.append(executed_instructions); // update new candidates - } + let new_candidates: PriorityQueue = network.node_outputs(next_candidate.node_id()) + .filter({|out| network.node(*out).inputs().iter() + .all( |input| self.comp_state.value_states.keys().contains(&Signal::new(input.node_id(), false)) && self.comp_state.value_states.get(&Signal::new(input.node_id(), false)).unwrap().is_computed ) + }) + .map(|id| (Signal::new(id, false), self.compute_scheduling_prio_for_node(Signal::new(id, false), network))) + .collect(); - // let (outputs, leaves) = (network.outputs(), network.leaves()); + self.comp_state.candidates.extend(new_candidates); + } - // Program { - // instructions: vec!(Instruction::FracOp(-1)) , - // } - // todo!() // optimize(&mut program); + + // store output operand location so user can retrieve them after running the program + let outputs = network.outputs(); + program.output_row_operands_placement = outputs.map(|out| { + (out, self.comp_state.value_states.get(&out).unwrap().row_location.expect("ERROR: one of the outputs hasn't been computed yet...")) + }).collect(); program } @@ -105,7 +107,6 @@ impl Compiler { /// - [ ] TODO: improve algo (in terms of space efficiency) fn alloc_safe_space_rows(&mut self, nr_safe_space_rows: u8) { - debug!("[ENTER alloc_safe_space_rows]"); let supported_nr_safe_space_rows = vec!(1,2,4,8,16,32,64); if !supported_nr_safe_space_rows.contains(&nr_safe_space_rows) { panic!("Only the following nr of rows are supported to be activated: {:?}, given: {}", supported_nr_safe_space_rows, nr_safe_space_rows); @@ -117,7 +118,7 @@ impl Compiler { // choose any row-addr combi activating exactly `nr_safe_space_rows` and choose all that activated rows to be safe-space rows let chosen_row_addr_combi = ARCHITECTURE.sra_degree_to_rowaddress_combinations .get(&nr_safe_space_rows).unwrap() - .iter().next().unwrap(); // just take the first row-combi that activates `nr_safe_space_rows` + .first().unwrap(); // just take the first row-combi that activates `nr_safe_space_rows` ARCHITECTURE.precomputed_simultaneous_row_activations.get(chosen_row_addr_combi).unwrap().to_vec() }; @@ -151,7 +152,6 @@ impl Compiler { /// TODO: algo which looks ahead which input-row-placement might be optimal (->reduce nr of move-ops to move intermediate results around & keep inputs close to sense-amps /// TODO: parititon logic network into subgraphs s.t. subgraphs can be mapped onto subarrays reducing nr of needed moves fn place_inputs(&mut self, mut inputs: Vec) { - debug!("[ENTER place_inputs]"); // naive implementation: start placing input on consecutive safe-space rows (continuing with next subarray once the current subarray has no more free safe-space rows) // TODO: change input operand placement to be more optimal (->taking into account future use of those operands) let mut subarray_iter=0..NR_SUBARRAYS; @@ -183,7 +183,6 @@ impl Compiler { /// Initialize candidates with all nodes that are computable fn initialize_candidates(&mut self, network: &impl NetworkWithBackwardEdges) { - debug!("[ENTER initialize_candidates]"); let inputs: Vec = network.leaves().collect(); // init candidates with all nodes having only inputs as src-operands @@ -202,7 +201,7 @@ impl Compiler { } /// Initialize compilation state: mark unsuable rows (eg safe-space rows), place input operands - fn init_comp_state(&mut self, network: &impl NetworkWithBackwardEdges) { + fn init_comp_state(&mut self, network: &impl NetworkWithBackwardEdges, program: &mut Program) { // debug!("Compiling {:?}", network); // 0.1 Allocate safe-space rows (for storing intermediate values safely self.alloc_safe_space_rows(self.settings.safe_space_rows_per_subarray); @@ -213,6 +212,25 @@ impl Compiler { debug!("Placed inputs {:?} in {:?}", network.leaves().collect::>(), self.comp_state.value_states); // 0.3 Setup: store all network-nodes yet to be compiled self.initialize_candidates(network); + + // store where inputs have been placed in program + program.input_row_operands_placement = network.leaves().collect::>() + .iter() + .flat_map(|&id| { + let mut locations = Vec::new(); + let original_value = Signal::new(id, false); + let inverted_value = Signal::new(id, false); // inverted value might have also been placed on init + if let Some(value) = self.comp_state.value_states.get(&original_value) { + locations.push((original_value, value.row_location.expect("Inputs are init directly into rows at the start"))); + } + + if let Some(value) = self.comp_state.value_states.get(&inverted_value) { + locations.push((inverted_value, value.row_location.expect("Inputs are init directly into rows at the start"))); + } + + debug_assert_ne!(locations, vec!(), "Input {id:?} has not been placed at all"); + locations + }).collect(); } /// Returns instructions to initialize `ref_rows` in reference-subarray for corresponding logic-op @@ -279,16 +297,14 @@ impl Compiler { /// NOTE: Some inputs may be needed in a negated form by the candidates. To start execution those /// input operands have to be available with their negated form. /// TODO: NEXT - fn init_negated_src_operands(&self, src_operands: Vec, network: &impl NetworkWithBackwardEdges) -> Vec { + fn init_negated_src_operands(&mut self, src_operands: Vec, network: &impl NetworkWithBackwardEdges) -> Vec { let mut instructions = vec!(); let mut negated_inputs: HashSet = HashSet::new(); // inputs which are required in their negated form - for (candidate, _) in &self.comp_state.candidates { - let negated_src_operands: Vec = network.node(candidate.node_id()).inputs().iter() - .filter(|sig| sig.is_inverted()) - .copied() // map ref to owned val - .collect(); - negated_inputs.extend(negated_src_operands.iter()); - } + let negated_src_operands: Vec = src_operands.iter() + .filter(|sig| sig.is_inverted()) + .copied() // map ref to owned val + .collect(); + negated_inputs.extend(negated_src_operands.iter()); for neg_in in negated_inputs { if self.comp_state.value_states.contains_key(&neg_in) { @@ -297,8 +313,28 @@ impl Compiler { } else { // else make negated-signal available let unnegated_signal = neg_in.invert(); - let origin_unneg_signal = self.comp_state.value_states.get(&unnegated_signal).expect("Original version of this value is not available??"); - // TODO: move value in some safe-space row + let origin_unneg_row= self.comp_state.value_states.get(&unnegated_signal).expect("Original version of this value is not available??") + .row_location.expect("Original version of this value is not live??"); + let dst_row = self.get_next_free_safespace_row(None); + + // TODO: negate val and move value in selected safe-space row + let selected_sra = ARCHITECTURE.sra_degree_to_rowaddress_combinations.get(&1).unwrap().iter().find(|row| ! self.safe_space_rows.contains(&row.0)) // just select the first available compute-row + .expect("It's assumed that issuing APA(row,row) for same row activates only that row"); + let origin_subarray_bitmask = origin_unneg_row & SUBARRAY_ID_BITMASK; + let comp_row = origin_subarray_bitmask | (ROW_ID_BITMASK & selected_sra.0); + let (_, ref_array) = self.select_compute_and_ref_subarray(vec!(comp_row)); + let result_row = (ROW_ID_BITMASK & selected_sra.0) & ref_array; + + let move_to_comp_row = Instruction::RowCloneFPM(origin_unneg_row, comp_row); + let not = Instruction::APA(comp_row, result_row); + let move_to_safespace= Instruction::RowCloneFPM(result_row, dst_row); + + instructions.push(move_to_comp_row); + instructions.push(not); + instructions.push(move_to_safespace); + + self.comp_state.dram_state.insert(dst_row, RowState { is_compute_row: false, live_value: Some(neg_in), constant: None }); + self.comp_state.value_states.insert(neg_in, ValueState { is_computed: true, row_location: Some(dst_row) }); } } @@ -309,7 +345,6 @@ impl Compiler { /// - based on location of input rows AND current compilation state /// - [ ] POSSIBLE EXTENSION: include lookahead for future ops and their inputs they depend on fn select_compute_and_ref_subarray(&self, input_rows: Vec) -> (SubarrayId, SubarrayId) { - debug!("[ENTER select_compute_and_ref_subarray]"); // naive implementation: just use the subarray that most of the `input_rows` reside in // TODO: find better solution let used_subarray_ids = input_rows.into_iter().map(|row| row & SUBARRAY_ID_BITMASK); @@ -325,6 +360,34 @@ impl Compiler { (mostly_used_subarray_id, selected_ref_subarray) } + /// Return next free safe-space row + /// - use `preferred_subarray` if there is a specific subarray you would like to be that + /// safe-sapce row from. Else just the next free safe-space row will be chosen + /// - NOTE: this is not guaranteed to be fulfilled ! + fn get_next_free_safespace_row(&self, preferred_subarray: Option) -> RowAddress { + + let subarray_order = if let Some(subarray) = preferred_subarray { + // start search with `preferred_subarray` if it's supplied + let mut first_half = (0..subarray).collect::>(); + first_half.extend(subarray+1..NR_SUBARRAYS); + first_half + // subarray_original_order.filter(|x| *x != subarray).into_iter() + } else { + // else just iterate in natural order + (0..NR_SUBARRAYS).collect() + }; + + for subarray in subarray_order { + for row in &self.safe_space_rows { + let row_addr = row | subarrayid_to_subarray_address(subarray); + if self.comp_state.dram_state.contains_key(&row_addr) || self.comp_state.dram_state.get(&row_addr).unwrap().live_value.is_none() { + return row_addr; + } + } + } + panic!("OOM: No more available safe-space rows"); + } + /// - [ ] make sure that operation to be executed on those rows won't simultaneously activate other rows holding valid data which will be used by future operations fn execute_next_instruction(&mut self, next_candidate: &Signal, network: &impl NetworkWithBackwardEdges) -> Vec { let mut next_instructions = vec!(); @@ -481,41 +544,6 @@ impl CompilationState { } } -// impl<'n, P: NetworkWithBackwardEdges> CompilationState<'n, P> { -// /// initializes `self..candidates()` with inputs + nodes whose src-operands are all inputs -// pub fn new(network: &'n P) -> Self { -// let mut candidates = FxHashSet::default(); -// // check all parents of leaves whether they have only leaf children, in which case they are -// // candidates (since all of their inputs are calculated then) -// for leaf in network.leaves() { // =inputs -// for candidate_id in network.node_outputs(leaf) { -// let candidate = network.node(candidate_id); -// if candidate -// .inputs() -// .iter() -// .all(|signal| network.node(signal.node_id()).is_leaf()) -// { -// candidates.insert((candidate_id, candidate)); -// } -// } -// } -// -// // let outputs = network -// // .outputs() -// // .enumerate() -// // .map(|(id, sig)| (sig.node_id(), (id as i64, sig))) -// // .collect(); -// -// let total_nr_rows_in_dram_module = ARCHITECTURE.nr_subarrays; -// Self { -// network, -// candidates, -// signal_to_row_mapping: HashMap::new(), -// free_rows: (0..=total_nr_rows_in_dram_module).collect(), // we start with all rows being free at the beginning -// } -// } -// } - /// Contains info to order nodes for Instruction Scheduling /// GOAL: minimize register usage: /// 1. Number of last-value-uses @@ -558,7 +586,7 @@ mod tests { use crate::fc_dram::egraph_extraction::CompilingCostFunction; use super::*; // import all elements from parent-module - use std::sync::{LazyLock, Mutex, Once}; + use std::sync::Once; // ERROR: `eggmock`-API doesn't allow this.. // // For data shared among unittests but initalized only once @@ -654,4 +682,11 @@ mod tests { let (selected_subarray, _) = compiler.select_compute_and_ref_subarray(vec!(0b1_000_000_000, 0b1_000_010_000, 0b111_000_000_000, 0b10_100_000_000,)); assert_eq!(selected_subarray, 0b1_000_000_000); } + + #[ignore] + fn test_program_validity() { + // 1. test that no APA activates a safe-space row + + // .. + } } diff --git a/rs/src/fc_dram/program.rs b/rs/src/fc_dram/program.rs index fd2fe3f..c81133c 100644 --- a/rs/src/fc_dram/program.rs +++ b/rs/src/fc_dram/program.rs @@ -14,88 +14,42 @@ pub struct Program { pub instructions: Vec, /// Specifies where row-operands should be placed prior to calling this program /// (This is a convention which tells the user of this lib where the data should be placed within the DRAM before executing this program) - pub input_row_operands_placementl: HashMap, + pub input_row_operands_placement: HashMap, /// Specifies into which rows output-operands will have been placed after the program has run successfully - pub output_row_operands_placementl: HashMap, -} - -#[derive(Debug, Clone)] -pub struct ProgramState { - program: Program, - /// currently used rows - rows: Vec, + pub output_row_operands_placement: HashMap, } impl Program { pub fn new(instructions: Vec) -> Self { Self { instructions, - input_row_operands_placementl: HashMap::new(), - output_row_operands_placementl: HashMap::new(), - } - } -} - -impl ProgramState { - pub fn new( - network: &impl NetworkWithBackwardEdges, - ) -> Self { - Self { - program: Program::new(Vec::new()), - rows: vec!(), + input_row_operands_placement: HashMap::new(), + output_row_operands_placement: HashMap::new(), } } - - - /// TODO: Does FC-DRAM need copying of signals? - /// pub fn signal_copy(&mut self, signal: Signal, target: RowAddress, intermediate_dcc: u8) { - /// } - - /// Sets the value of the operand in `self.rows` to the given signal. If that removes the last - /// reference to the node of the previous signal of the operator, insert spill code for the - /// previous signal - /// **ALWAYS** call this before inserting the actual instruction, otherwise the spill code will - /// spill the wrong value - fn set_signal(&mut self, address: RowAddress, signal: Signal) { - todo!() - } - - /// return rows which are currently unused (so they can be used for operations to come) - pub fn free_id_rows(&mut self, id: Id) { - todo!() - } - - pub fn rows(&self) -> &Vec { - &self.rows - } -} - -impl Deref for ProgramState { - type Target = Program; - - fn deref(&self) -> &Self::Target { - &self.program - } -} - -impl DerefMut for ProgramState { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.program - } -} - -impl From for Program { - fn from(value: ProgramState) -> Self { - value.program - } } /// Print the generated program in human-readable form impl Display for Program { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + writeln!(f, "---------------------------------------")?; + writeln!(f, "Input operand placement:")?; + for (signal, row) in &self.input_row_operands_placement { + writeln!(f, "{:?} in {}", signal, row)?; + } + writeln!(f, "---------------------------------------")?; + + for instr in &self.instructions { writeln!(f, "{}", instr)?; } + + writeln!(f, "---------------------------------------")?; + writeln!(f, "Output operand placement:")?; + for (signal, row) in &self.output_row_operands_placement{ + writeln!(f, "{:?} in {}", signal, row)?; + } + writeln!(f, "---------------------------------------")?; Ok(()) } } From e44bd6a41b1d348151d3df545963ca70c407156c Mon Sep 17 00:00:00 2001 From: alku662e Date: Tue, 10 Jun 2025 19:55:34 +0200 Subject: [PATCH 19/51] Fix buggy input-placement --- rs/src/fc_dram/compiler.rs | 57 +++++++++++++++----------------------- 1 file changed, 23 insertions(+), 34 deletions(-) diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs index 231bbe2..0a6cbd7 100644 --- a/rs/src/fc_dram/compiler.rs +++ b/rs/src/fc_dram/compiler.rs @@ -4,19 +4,18 @@ //! - also see [`RowState`] //! - [`SchedulingPrio`] = used to prioritize/order instruction for Instruction Scheduling //! -//! - [`compile()`] = main function - compiles given logic network for the given [`architecture`] -//! into a [`program`] using some [`optimization`] +//! - [`Compiler::compile()`] = main function - compiles given logic network for the given [`architecture`] into a [`program`] using some [`optimization`] use super::{ - architecture::{subarrayid_to_subarray_address, FCDRAMArchitecture, Instruction, LogicOp, SubarrayId, ARCHITECTURE, NR_SUBARRAYS, ROWS_PER_SUBARRAY, ROW_ID_BITMASK, SUBARRAY_ID_BITMASK}, optimization::optimize, CompilerSettings, Program, RowAddress + architecture::{subarrayid_to_subarray_address, Instruction, LogicOp, SubarrayId, ARCHITECTURE, NR_SUBARRAYS, ROWS_PER_SUBARRAY, ROW_ID_BITMASK, SUBARRAY_ID_BITMASK}, optimization::optimize, CompilerSettings, Program, RowAddress }; -use eggmock::{Aig, AigLanguage, ComputedNetworkWithBackwardEdges, Id, NetworkLanguage, NetworkWithBackwardEdges, Node, Signal}; +use eggmock::{Aig, Id, NetworkWithBackwardEdges, Node, Signal}; use itertools::Itertools; use log::debug; use priority_queue::PriorityQueue; -use rustc_hash::FxHashMap; use std::{cmp::Ordering, collections::{HashMap, HashSet}}; +/// Provides [`Compiler::compile()`] to compile a logic network into a [`Program`] pub struct Compiler { /// compiler-options set by user settings: CompilerSettings, @@ -60,11 +59,6 @@ impl Compiler { // 0. Prepare compilation: select safe-space rows, place inputs into DRAM module (and store where inputs have been placed in `program`) self.init_comp_state(network, &mut program); - // TODO: how to get src-operands of `outputs` ?? - // debug!("{:?}", network.node_outputs(outputs.next()).collect()); - // TODO: get src-operands of outputs and place them appropriately (with knowledge about output - // operands!) - // start with inputs let primary_inputs = network.leaves(); debug!("Primary inputs: {:?}", primary_inputs.collect::>()); @@ -96,9 +90,10 @@ impl Compiler { // store output operand location so user can retrieve them after running the program let outputs = network.outputs(); - program.output_row_operands_placement = outputs.map(|out| { - (out, self.comp_state.value_states.get(&out).unwrap().row_location.expect("ERROR: one of the outputs hasn't been computed yet...")) - }).collect(); + // TODO: doesn't work yet + // program.output_row_operands_placement = outputs.map(|out| { + // (out, self.comp_state.value_states.get(&out).unwrap().row_location.expect("ERROR: one of the outputs hasn't been computed yet...")) + // }).collect(); program } @@ -154,29 +149,13 @@ impl Compiler { fn place_inputs(&mut self, mut inputs: Vec) { // naive implementation: start placing input on consecutive safe-space rows (continuing with next subarray once the current subarray has no more free safe-space rows) // TODO: change input operand placement to be more optimal (->taking into account future use of those operands) - let mut subarray_iter=0..NR_SUBARRAYS; - let next_subarray = subarray_iter.next().unwrap(); - let subarray_addr = subarrayid_to_subarray_address(next_subarray); while let Some(next_input) = inputs.pop() { - let mut row_iter= self.safe_space_rows.iter() - .filter(|row| ! self.comp_state.constant_values.values().contains( &(subarray_addr | **row)) ); // filter safe-space rows which are NOT already used for constants // NOTE: some safe-space rows are reserved for constants - let (next_subarray, next_row) = if let Some(next_row) = row_iter.next() { - (next_subarray, next_row) - } else { - let next_subarray = subarray_iter.next().expect("OOM: no more safe-space rows and subarrays available"); - let next_subarray_addr = subarrayid_to_subarray_address(next_subarray); - let mut row_iter = self.safe_space_rows.iter() - .filter(|row| ! self.comp_state.constant_values.values().contains(&(next_subarray_addr | **row )) ); // filter safe-space rows which are NOT already used for constants - ( next_subarray, row_iter.next().expect("OOM: No safe-space rows available" ) ) - }; - - - let row_address = (next_subarray << ROWS_PER_SUBARRAY.ilog2() ) | next_row; // higher bits=id of subarray + let free_safespace_row = self.get_next_free_safespace_row(None); let initial_row_state = RowState { is_compute_row: false, live_value: Some(Signal::new(next_input, false)), constant: None }; - let initial_value_state = ValueState { is_computed: true, row_location: Some(row_address) }; - self.comp_state.dram_state.insert(row_address, initial_row_state); + let initial_value_state = ValueState { is_computed: true, row_location: Some(free_safespace_row) }; + self.comp_state.dram_state.insert(free_safespace_row, initial_row_state); self.comp_state.value_states.insert(Signal::new(next_input, false), initial_value_state); } } @@ -380,7 +359,7 @@ impl Compiler { for subarray in subarray_order { for row in &self.safe_space_rows { let row_addr = row | subarrayid_to_subarray_address(subarray); - if self.comp_state.dram_state.contains_key(&row_addr) || self.comp_state.dram_state.get(&row_addr).unwrap().live_value.is_none() { + if self.comp_state.dram_state.get(&row_addr).unwrap_or(&RowState::default()).live_value.is_none() { // NOTE: safe-space rows are inserted lazily into `dram_state` return row_addr; } } @@ -510,6 +489,16 @@ pub struct RowState { constant: Option, } +impl Default for RowState { + fn default() -> Self { + RowState { + is_compute_row: false, + live_value: None, + constant: None, + } + } +} + #[derive(Debug)] pub struct ValueState { /// Whether the value has already been computed (->only then it could reside in a row) @@ -581,7 +570,7 @@ impl PartialOrd for SchedulingPrio { #[cfg(test)] mod tests { use eggmock::egg::{self, EGraph, Extractor, RecExpr}; - use eggmock::Network; + use eggmock::{AigLanguage, ComputedNetworkWithBackwardEdges, Network}; use crate::fc_dram::egraph_extraction::CompilingCostFunction; From 27d087db0d1dd15299fc8d7903cc692bb13e70af Mon Sep 17 00:00:00 2001 From: alku662e Date: Tue, 10 Jun 2025 21:04:01 +0200 Subject: [PATCH 20/51] Fix display of addresses for instruction and program --- rs/src/fc_dram/architecture.rs | 66 ++++++++++++++++++++++------------ rs/src/fc_dram/compiler.rs | 9 +++-- rs/src/fc_dram/program.rs | 8 +++-- 3 files changed, 55 insertions(+), 28 deletions(-) diff --git a/rs/src/fc_dram/architecture.rs b/rs/src/fc_dram/architecture.rs index 1837761..4b5ba22 100644 --- a/rs/src/fc_dram/architecture.rs +++ b/rs/src/fc_dram/architecture.rs @@ -7,13 +7,18 @@ use std::{cmp::Ordering, collections::{HashMap, HashSet}, fmt::{Display, Formatter}, sync::LazyLock}; -pub const NR_SUBARRAYS: i64 = 2i64.pow(7); -pub const ROWS_PER_SUBARRAY: i64 = 2i64.pow(9); -pub const SUBARRAY_ID_BITMASK: i64 = 0b1_111_111_000_000_000; // 7 highest bits=subarray id -pub const ROW_ID_BITMASK: i64 = 0b0_000_000_111_111_111; // 7 highest bits=subarray id +pub const NR_SUBARRAYS: u64 = 2u64.pow(7); +pub const ROWS_PER_SUBARRAY: u64 = 2u64.pow(9); +pub const SUBARRAY_ID_BITMASK: u64 = 0b1_111_111_000_000_000; // 7 highest bits=subarray id +pub const ROW_ID_BITMASK: u64 = 0b0_000_000_111_111_111; // 7 highest bits=subarray id +// some utility functions pub fn subarrayid_to_subarray_address(subarray_id: SubarrayId) -> RowAddress { - subarray_id << NR_SUBARRAYS.ilog2() + subarray_id << ROWS_PER_SUBARRAY.ilog2() // lower bits=rows in subarray +} + +pub fn get_subarrayid_from_rowaddr(row: RowAddress) -> SubarrayId { + (row & SUBARRAY_ID_BITMASK) >> NR_SUBARRAYS.ilog2() } /// Main variable specifying architecture of DRAM-module for which to compile for @@ -139,8 +144,8 @@ pub static ARCHITECTURE: LazyLock = LazyLock::new(|| { }); /// - ! must be smaller than `rows_per_subarray * nr_subarrays` (this is NOT checked!) -pub type RowAddress = i64; -pub type SubarrayId = i64; +pub type RowAddress = u64; +pub type SubarrayId = u64; #[derive(Debug, PartialEq)] pub struct SuccessRate(f64); @@ -161,9 +166,9 @@ impl Ord for SuccessRate { /// TODO: add field encoding topology of subarrays (to determine which of them share sense-amps) pub struct FCDRAMArchitecture { /// Nr of subarrays in a DRAM module - pub nr_subarrays: i64, + pub nr_subarrays: u64, /// Nr of rows in a single subarray - pub rows_per_subarray: i64, + pub rows_per_subarray: u64, /// Returns all activated rows when issuing `APA(row1, row2)` /// - NOTE: `row1`,`row2` are expected to reside in adjacent subarrays /// - NOTE: the simultaneously activated rows are expected to have the same addresses in both subarrays @@ -209,9 +214,9 @@ impl FCDRAMArchitecture { /// subarray adjacent to the compute subarray (!this is not checked but assumed to be true!) fn get_instructions_implementation_of_logic_ops(logic_op: LogicOp) -> Vec { match logic_op { - LogicOp::NOT => vec!(Instruction::APA(-1, -1)), - LogicOp::AND => vec!(Instruction::FracOp(-1), Instruction::APA(-1, -1)), - LogicOp::OR => vec!(Instruction::FracOp(-1), Instruction::APA(-1, -1)), + LogicOp::NOT => vec!(Instruction::APA(0, 0)), + LogicOp::AND => vec!(Instruction::FracOp(0), Instruction::APA(0, 0)), + LogicOp::OR => vec!(Instruction::FracOp(0), Instruction::APA(0, 0)), LogicOp::NAND => { // 1. AND, 2. NOT FCDRAMArchitecture::get_instructions_implementation_of_logic_ops(LogicOp::AND) @@ -279,17 +284,20 @@ pub enum Instruction { } impl Display for Instruction { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - // TODO: change string-representation to display subarray-id + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let display_row = |row| { format!("{}.{}", get_subarrayid_from_rowaddr(row), row & ROW_ID_BITMASK)}; // display subarray separately + // TODO: change string-representation to display subarray-id let description = match self { - Instruction::FracOp(row) => format!("AP({row})"), - Instruction::APA(row1,row2) => format!("APA({row1},{row2})"), - Instruction::RowCloneFPM(row1,row2) => format!("AA({row1},{row2})"), - Instruction::RowClonePSM(row1,row2) => format!(" - TRANSFER({row1},(rowX)) - TANSFER(rowX,{row2}) - "), - }; + Instruction::FracOp(row) => format!("AP({})", display_row(*row)), + Instruction::APA(row1,row2) => format!("APA({},{})", display_row(*row1), display_row(*row2)), + Instruction::RowCloneFPM(row1, row2) => format!("AA({},{})", display_row(*row1), display_row(*row2)), + Instruction::RowClonePSM(row1, row2) => format!(" + TRANSFER({},(rowX)) + TANSFER(rowX,{}) + ", + display_row(*row1), + display_row(*row2) + )}; write!(f, "{}", description) } } @@ -473,3 +481,17 @@ pub trait RowDecoder { // N:2N: is supported and let `get_simultaneously_activated_rows_of_apa_op()` handle the rest? } + + +#[cfg(test)] +mod tests { + + use super::*; + // fn init() { + // } + + #[test] // mark function as test-fn + fn test_sra() { + println!("{:?}", ARCHITECTURE.sra_degree_to_rowaddress_combinations.get(&8).unwrap().first()); + } +} diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs index 0a6cbd7..fb8218c 100644 --- a/rs/src/fc_dram/compiler.rs +++ b/rs/src/fc_dram/compiler.rs @@ -7,7 +7,7 @@ //! - [`Compiler::compile()`] = main function - compiles given logic network for the given [`architecture`] into a [`program`] using some [`optimization`] use super::{ - architecture::{subarrayid_to_subarray_address, Instruction, LogicOp, SubarrayId, ARCHITECTURE, NR_SUBARRAYS, ROWS_PER_SUBARRAY, ROW_ID_BITMASK, SUBARRAY_ID_BITMASK}, optimization::optimize, CompilerSettings, Program, RowAddress + architecture::{subarrayid_to_subarray_address, Instruction, LogicOp, SubarrayId, ARCHITECTURE, NR_SUBARRAYS, ROW_ID_BITMASK, SUBARRAY_ID_BITMASK}, optimization::optimize, CompilerSettings, Program, RowAddress }; use eggmock::{Aig, Id, NetworkWithBackwardEdges, Node, Signal}; use itertools::Itertools; @@ -114,7 +114,9 @@ impl Compiler { let chosen_row_addr_combi = ARCHITECTURE.sra_degree_to_rowaddress_combinations .get(&nr_safe_space_rows).unwrap() .first().unwrap(); // just take the first row-combi that activates `nr_safe_space_rows` - ARCHITECTURE.precomputed_simultaneous_row_activations.get(chosen_row_addr_combi).unwrap().to_vec() + ARCHITECTURE.precomputed_simultaneous_row_activations.get(chosen_row_addr_combi).unwrap() + .iter().map(|row| row & ROW_ID_BITMASK) // reset subarray-id to all 0s + .collect() }; // deactivate all combination which could activate safe-space rows @@ -603,7 +605,8 @@ mod tests { const REQUESTED_SAFE_SPACE_ROWS: u8 = 8; compiler.alloc_safe_space_rows(REQUESTED_SAFE_SPACE_ROWS); - assert_eq!(compiler.safe_space_rows.len(), REQUESTED_SAFE_SPACE_ROWS as usize); + debug!("{:?}", compiler.safe_space_rows); + assert_eq!(compiler.safe_space_rows.iter().dedup().collect::>().len(), REQUESTED_SAFE_SPACE_ROWS as usize); } #[test] // mark function as test-fn diff --git a/rs/src/fc_dram/program.rs b/rs/src/fc_dram/program.rs index c81133c..c5b0a9a 100644 --- a/rs/src/fc_dram/program.rs +++ b/rs/src/fc_dram/program.rs @@ -2,7 +2,7 @@ //! compiling given logic-network (see [`compilation`]) and potentially adding some manual //! optimizations ([`optimization`]) use super::architecture::{FCDRAMArchitecture, RowAddress}; -use crate::fc_dram::architecture::Instruction; +use crate::fc_dram::architecture::{get_subarrayid_from_rowaddr, Instruction, ROW_ID_BITMASK}; use eggmock::{Id, Aig, NetworkWithBackwardEdges, Signal}; use std::collections::HashMap; use std::fmt::{Display, Formatter}; @@ -32,10 +32,12 @@ impl Program { /// Print the generated program in human-readable form impl Display for Program { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let display_row = |row| { format!("{}.{}", get_subarrayid_from_rowaddr(row), row & ROW_ID_BITMASK)}; // display subarray separately + writeln!(f, "---------------------------------------")?; writeln!(f, "Input operand placement:")?; for (signal, row) in &self.input_row_operands_placement { - writeln!(f, "{:?} in {}", signal, row)?; + writeln!(f, "{:?} in {}", signal, display_row(*row))?; } writeln!(f, "---------------------------------------")?; @@ -47,7 +49,7 @@ impl Display for Program { writeln!(f, "---------------------------------------")?; writeln!(f, "Output operand placement:")?; for (signal, row) in &self.output_row_operands_placement{ - writeln!(f, "{:?} in {}", signal, row)?; + writeln!(f, "{:?} in {}", signal, display_row(*row))?; } writeln!(f, "---------------------------------------")?; Ok(()) From 551673f757eb9270f1684f709dfc1b858689653b Mon Sep 17 00:00:00 2001 From: alku662e Date: Mon, 23 Jun 2025 09:49:04 +0200 Subject: [PATCH 21/51] Output is now computed --- rs/src/fc_dram/architecture.rs | 1 + rs/src/fc_dram/compiler.rs | 85 +++++++++++++++++++++++----------- 2 files changed, 58 insertions(+), 28 deletions(-) diff --git a/rs/src/fc_dram/architecture.rs b/rs/src/fc_dram/architecture.rs index 4b5ba22..190ebb3 100644 --- a/rs/src/fc_dram/architecture.rs +++ b/rs/src/fc_dram/architecture.rs @@ -340,6 +340,7 @@ impl Instruction { pub fn get_nr_memcycles(&self) -> u16 { match self { Instruction::FracOp(__) => 7, // see [2] ChapIII.A, (two cmd-cycles + five idle cycles) + // TODO: change to ns (t_{RAS}+6ns) - `t_{RAS}` to mem cycles Instruction::APA(_, _) => 3, // NOTE: this is not explicitly written in the paper, TODO: check with authors Instruction::RowCloneFPM(_, _) => 2, // see [4] Chap3.2 Instruction::RowClonePSM(_, _) => 256, // =(8192B/64B)*2 (*2 since copies two time, to and from `` on 64B-granularity diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs index fb8218c..22c6134 100644 --- a/rs/src/fc_dram/compiler.rs +++ b/rs/src/fc_dram/compiler.rs @@ -75,13 +75,9 @@ impl Compiler { let executed_instructions = &mut self.execute_next_instruction(&next_candidate, network); program.instructions.append(executed_instructions); - // update new candidates - let new_candidates: PriorityQueue = network.node_outputs(next_candidate.node_id()) - .filter({|out| network.node(*out).inputs().iter() - .all( |input| self.comp_state.value_states.keys().contains(&Signal::new(input.node_id(), false)) && self.comp_state.value_states.get(&Signal::new(input.node_id(), false)).unwrap().is_computed ) - }) - .map(|id| (Signal::new(id, false), self.compute_scheduling_prio_for_node(Signal::new(id, false), network))) - .collect(); + // update new candidates (`next_candidate` is now available) + let new_candidates = self.get_new_candidates(network, next_candidate); + debug!("New candidates: {:?}", new_candidates); self.comp_state.candidates.extend(new_candidates); } @@ -91,9 +87,9 @@ impl Compiler { // store output operand location so user can retrieve them after running the program let outputs = network.outputs(); // TODO: doesn't work yet - // program.output_row_operands_placement = outputs.map(|out| { - // (out, self.comp_state.value_states.get(&out).unwrap().row_location.expect("ERROR: one of the outputs hasn't been computed yet...")) - // }).collect(); + program.output_row_operands_placement = outputs.map(|out| { + (out, self.comp_state.value_states.get(&out).unwrap().row_location.expect("ERROR: one of the outputs hasn't been computed yet...")) + }).collect(); program } @@ -163,7 +159,7 @@ impl Compiler { } /// Initialize candidates with all nodes that are computable - fn initialize_candidates(&mut self, network: &impl NetworkWithBackwardEdges) { + fn init_candidates(&mut self, network: &impl NetworkWithBackwardEdges) { let inputs: Vec = network.leaves().collect(); // init candidates with all nodes having only inputs as src-operands @@ -181,6 +177,22 @@ impl Compiler { } } + /// Returns list of candidates that can be computed once `computed_node` is available + fn get_new_candidates(&mut self, network: &impl NetworkWithBackwardEdges, computed_node: Signal) -> PriorityQueue { + debug!("Candidates: {:?}", self.comp_state.candidates); + debug!("DRAM state: {:?}", self.comp_state.value_states); + network.node_outputs(computed_node.node_id()) + // filter for new nodes that have all their input-operands available now (->only inputs of computed nodes could have changed to candidate-state, other nodes remain uneffected) + .filter({|out| network.node(*out).inputs().iter() + .all( |input| { + debug!("Out: {:?}, In: {:?}", out, input); + self.comp_state.value_states.keys().contains(input) && self.comp_state.value_states.get(input).unwrap().is_computed + }) + }) + .map(|id| (Signal::new(id, false), self.compute_scheduling_prio_for_node(Signal::new(id, false), network))) // TODO: check if inverted signal is required as well! + .collect() + } + /// Initialize compilation state: mark unsuable rows (eg safe-space rows), place input operands fn init_comp_state(&mut self, network: &impl NetworkWithBackwardEdges, program: &mut Program) { // debug!("Compiling {:?}", network); @@ -192,9 +204,9 @@ impl Compiler { self.place_inputs( network.leaves().collect::>() ); // place input-operands into rows debug!("Placed inputs {:?} in {:?}", network.leaves().collect::>(), self.comp_state.value_states); // 0.3 Setup: store all network-nodes yet to be compiled - self.initialize_candidates(network); + self.init_candidates(network); - // store where inputs have been placed in program + // store where inputs have been placed in program for user to know where to put them when calling into this program program.input_row_operands_placement = network.leaves().collect::>() .iter() .flat_map(|&id| { @@ -436,9 +448,10 @@ impl Compiler { next_instructions.push(Instruction::APA(row_combi.0, row_combi.1)); for (&comp_row, &ref_row) in comp_rows.iter().zip(ref_rows.iter()) { self.comp_state.dram_state.insert(comp_row, RowState { is_compute_row: true, live_value: Some(*next_candidate), constant: None }); + self.comp_state.value_states.insert(*next_candidate, ValueState { is_computed: true, row_location: Some(comp_row) }); // ref subarray holds negated value afterwarsd self.comp_state.dram_state.insert(ref_row, RowState { is_compute_row: true, live_value: Some(next_candidate.invert()), constant: None }); - // TODO: `value_state` + self.comp_state.value_states.insert(next_candidate.invert(), ValueState { is_computed: true, row_location: Some(ref_row) }); } // 4. Copy result data from dst NEAREST to the sense-amps into a safe-space row and update `value_state` @@ -482,6 +495,7 @@ impl Compiler { } /// Stores the current state of a row at a concrete compilations step +#[derive(Default)] // by default not a compute_row, no live-value and no constant inside row pub struct RowState { /// True iff that row is currently: 1) Not a safe-sapce row, 2) Doesn't activate any safe-sapce rows, 3) Isn't holding valid values in the role of a reference-subarray row is_compute_row: bool, @@ -491,16 +505,6 @@ pub struct RowState { constant: Option, } -impl Default for RowState { - fn default() -> Self { - RowState { - is_compute_row: false, - live_value: None, - constant: None, - } - } -} - #[derive(Debug)] pub struct ValueState { /// Whether the value has already been computed (->only then it could reside in a row) @@ -590,6 +594,25 @@ mod tests { // ComputedNetworkWithBackwardEdges::new(&ntk) // }); + // ERROR: This also does not work bc of the weird implementation of a network + // fn simple_egraph() -> ComputedNetworkWithBackwardEdges<'static, (Extractor<'static, CompilingCostFunction, AigLanguage, ()>, Vec)> { + // let mut egraph: EGraph = Default::default(); + // let my_expression: RecExpr = "(and (and 1 3) (and 2 3))".parse().unwrap(); + // egraph.add_expr(&my_expression); + // let extractor = Extractor::new( &egraph, CompilingCostFunction {}); + // let ntk = &(extractor, vec!(egg::Id::from(5))); + // ntk.dump(); + // // Id(5): And([Signal(false, Id(2)), Signal(false, Id(4))]) + // // Id(4): And([Signal(false, Id(3)), Signal(false, Id(1))]) + // // Id(1): Input(3) + // // Id(3): Input(2) + // // Id(2): And([Signal(false, Id(0)), Signal(false, Id(1))]) + // // Id(0): Input(1) + // + // let ntk_backward = ComputedNetworkWithBackwardEdges::new(ntk); + // ntk_backward + // } + static INIT: Once = Once::new(); fn init() -> Compiler { @@ -599,7 +622,7 @@ mod tests { Compiler::new(CompilerSettings { print_program: true, verbose: true, print_compilation_stats: false, min_success_rate: 0.999, repetition_fracops: 5, safe_space_rows_per_subarray: 16 } ) } - #[test] // mark function as test-fn + #[test] fn test_alloc_safe_space_rows() { let mut compiler = init(); const REQUESTED_SAFE_SPACE_ROWS: u8 = 8; @@ -609,7 +632,7 @@ mod tests { assert_eq!(compiler.safe_space_rows.iter().dedup().collect::>().len(), REQUESTED_SAFE_SPACE_ROWS as usize); } - #[test] // mark function as test-fn + #[test] fn test_candidate_initialization() { let mut compiler = init(); @@ -629,11 +652,11 @@ mod tests { // Id(0): Input(1) // act is if one `AND` has already been computed -> other and (`Id(2)`) should be the only candidate left - compiler .comp_state.value_states.insert(Signal::new(eggmock::Id::from(4), false), ValueState{ is_computed: true, row_location: None }); + compiler.comp_state.value_states.insert(Signal::new(eggmock::Id::from(4), false), ValueState{ is_computed: true, row_location: None }); let ntk_backward = ComputedNetworkWithBackwardEdges::new(ntk); - compiler.initialize_candidates(&ntk_backward); + compiler.init_candidates(&ntk_backward); let is_candidate_ids: HashSet = compiler.comp_state.candidates.iter().map(|(id,_)| *id).collect(); let should_candidate_ids: HashSet = HashSet::from([Signal::new( eggmock::Id::from(2), false), Signal::new(eggmock::Id::from(4), false)]); assert_eq!( is_candidate_ids, should_candidate_ids); @@ -641,6 +664,12 @@ mod tests { // TODO: test-case with node that relies on one input src-operand and one non-input (intermediate node) src-operand } + #[test] + fn test_new_candidates() { + let mut compiler = init(); + + } + #[test] fn test_compute_scheduling_prio_for_node() { let mut compiler = init(); From 443e754df7eb516b2d6ed2b715ec0b4259977eb8 Mon Sep 17 00:00:00 2001 From: alku662e Date: Thu, 26 Jun 2025 17:26:54 +0200 Subject: [PATCH 22/51] :construction: Support for n-ary Ops - now coming to Extraction --- rs/src/fc_dram/compiler.rs | 43 +++++++++++++++-------------- rs/src/fc_dram/egraph_extraction.rs | 23 ++++++++------- rs/src/fc_dram/mod.rs | 38 +++++++++++++------------ 3 files changed, 55 insertions(+), 49 deletions(-) diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs index 22c6134..bd41788 100644 --- a/rs/src/fc_dram/compiler.rs +++ b/rs/src/fc_dram/compiler.rs @@ -9,7 +9,7 @@ use super::{ architecture::{subarrayid_to_subarray_address, Instruction, LogicOp, SubarrayId, ARCHITECTURE, NR_SUBARRAYS, ROW_ID_BITMASK, SUBARRAY_ID_BITMASK}, optimization::optimize, CompilerSettings, Program, RowAddress }; -use eggmock::{Aig, Id, NetworkWithBackwardEdges, Node, Signal}; +use eggmock::{Aoig, Id, NetworkWithBackwardEdges, Node, Signal}; use itertools::Itertools; use log::debug; use priority_queue::PriorityQueue; @@ -52,7 +52,7 @@ impl Compiler { /// - 2) outputs can be found after the program has run pub fn compile( &mut self, - network: &impl NetworkWithBackwardEdges, + network: &impl NetworkWithBackwardEdges, ) -> Program { let mut program = Program::new(vec!()); @@ -159,7 +159,7 @@ impl Compiler { } /// Initialize candidates with all nodes that are computable - fn init_candidates(&mut self, network: &impl NetworkWithBackwardEdges) { + fn init_candidates(&mut self, network: &impl NetworkWithBackwardEdges) { let inputs: Vec = network.leaves().collect(); // init candidates with all nodes having only inputs as src-operands @@ -178,7 +178,7 @@ impl Compiler { } /// Returns list of candidates that can be computed once `computed_node` is available - fn get_new_candidates(&mut self, network: &impl NetworkWithBackwardEdges, computed_node: Signal) -> PriorityQueue { + fn get_new_candidates(&mut self, network: &impl NetworkWithBackwardEdges, computed_node: Signal) -> PriorityQueue { debug!("Candidates: {:?}", self.comp_state.candidates); debug!("DRAM state: {:?}", self.comp_state.value_states); network.node_outputs(computed_node.node_id()) @@ -194,7 +194,7 @@ impl Compiler { } /// Initialize compilation state: mark unsuable rows (eg safe-space rows), place input operands - fn init_comp_state(&mut self, network: &impl NetworkWithBackwardEdges, program: &mut Program) { + fn init_comp_state(&mut self, network: &impl NetworkWithBackwardEdges, program: &mut Program) { // debug!("Compiling {:?}", network); // 0.1 Allocate safe-space rows (for storing intermediate values safely self.alloc_safe_space_rows(self.settings.safe_space_rows_per_subarray); @@ -290,7 +290,7 @@ impl Compiler { /// NOTE: Some inputs may be needed in a negated form by the candidates. To start execution those /// input operands have to be available with their negated form. /// TODO: NEXT - fn init_negated_src_operands(&mut self, src_operands: Vec, network: &impl NetworkWithBackwardEdges) -> Vec { + fn init_negated_src_operands(&mut self, src_operands: Vec, network: &impl NetworkWithBackwardEdges) -> Vec { let mut instructions = vec!(); let mut negated_inputs: HashSet = HashSet::new(); // inputs which are required in their negated form let negated_src_operands: Vec = src_operands.iter() @@ -381,8 +381,9 @@ impl Compiler { panic!("OOM: No more available safe-space rows"); } + /// Returns Instructions to execute given `next_candidate` /// - [ ] make sure that operation to be executed on those rows won't simultaneously activate other rows holding valid data which will be used by future operations - fn execute_next_instruction(&mut self, next_candidate: &Signal, network: &impl NetworkWithBackwardEdges) -> Vec { + fn execute_next_instruction(&mut self, next_candidate: &Signal, network: &impl NetworkWithBackwardEdges) -> Vec { let mut next_instructions = vec!(); debug!("Executing candidate {:?}", next_candidate); @@ -408,8 +409,8 @@ impl Compiler { let language_op = network.node(next_candidate.node_id()); let logic_op = match language_op { - Aig::And(_) => LogicOp::AND, - Aig::Or(_) => LogicOp::OR, + Aoig::And(_) => LogicOp::AND, + Aoig::Or(_) => LogicOp::OR, // TODO: extract NOT _ => panic!("candidate is expected to be a logic op"), }; @@ -463,7 +464,7 @@ impl Compiler { /// Compute `SchedulingPrio` for a given node /// - used for inserting new candidates /// TODO: write unittest for this function - fn compute_scheduling_prio_for_node(&self, signal: Signal, network: &impl NetworkWithBackwardEdges) -> SchedulingPrio { + fn compute_scheduling_prio_for_node(&self, signal: Signal, network: &impl NetworkWithBackwardEdges) -> SchedulingPrio { let nr_last_value_uses = network.node(signal.node_id()).inputs() // for each input check whether `id` is the last node using it .iter() .fold(0, |acc, input| { @@ -576,7 +577,7 @@ impl PartialOrd for SchedulingPrio { #[cfg(test)] mod tests { use eggmock::egg::{self, EGraph, Extractor, RecExpr}; - use eggmock::{AigLanguage, ComputedNetworkWithBackwardEdges, Network}; + use eggmock::{AoigLanguage, ComputedNetworkWithBackwardEdges, Network}; use crate::fc_dram::egraph_extraction::CompilingCostFunction; @@ -586,8 +587,8 @@ mod tests { // ERROR: `eggmock`-API doesn't allow this.. // // For data shared among unittests but initalized only once // static TEST_DATA: LazyLock<_> = LazyLock::new(|| { - // let mut egraph: EGraph = Default::default(); - // let my_expression: RecExpr = "(and (and a c) (and b c))".parse().unwrap(); + // let mut egraph: EGraph = Default::default(); + // let my_expression: RecExpr = "(and (and a c) (and b c))".parse().unwrap(); // let extractor = Extractor::new( &egraph, CompilingCostFunction {}); // let ntk = (extractor, vec!(egg::Id::from(9))); // @@ -595,9 +596,9 @@ mod tests { // }); // ERROR: This also does not work bc of the weird implementation of a network - // fn simple_egraph() -> ComputedNetworkWithBackwardEdges<'static, (Extractor<'static, CompilingCostFunction, AigLanguage, ()>, Vec)> { - // let mut egraph: EGraph = Default::default(); - // let my_expression: RecExpr = "(and (and 1 3) (and 2 3))".parse().unwrap(); + // fn simple_egraph() -> ComputedNetworkWithBackwardEdges<'static, (Extractor<'static, CompilingCostFunction, AoigLanguage, ()>, Vec)> { + // let mut egraph: EGraph = Default::default(); + // let my_expression: RecExpr = "(and (and 1 3) (and 2 3))".parse().unwrap(); // egraph.add_expr(&my_expression); // let extractor = Extractor::new( &egraph, CompilingCostFunction {}); // let ntk = &(extractor, vec!(egg::Id::from(5))); @@ -636,10 +637,10 @@ mod tests { fn test_candidate_initialization() { let mut compiler = init(); - let mut egraph: EGraph = Default::default(); - let my_expression: RecExpr = "(and (and 1 3) (and 2 3))".parse().unwrap(); + let mut egraph: EGraph = Default::default(); + let my_expression: RecExpr = "(and (and 1 3) (and 2 3))".parse().unwrap(); egraph.add_expr(&my_expression); - let output2 = egraph.add(AigLanguage::And([eggmock::egg::Id::from(0), eggmock::egg::Id::from(2)])); // additional `And` with one src-operand=input and one non-input src operand + let output2 = egraph.add(AoigLanguage::And([eggmock::egg::Id::from(0), eggmock::egg::Id::from(2)])); // additional `And` with one src-operand=input and one non-input src operand debug!("EGraph used for candidate-init: {:?}", egraph); let extractor = Extractor::new( &egraph, CompilingCostFunction {}); let ntk = &(extractor, vec!(egg::Id::from(5), output2)); @@ -674,8 +675,8 @@ mod tests { fn test_compute_scheduling_prio_for_node() { let mut compiler = init(); - let mut egraph: EGraph = Default::default(); - let my_expression: RecExpr = "(and (and 1 3) (and 2 3))".parse().unwrap(); + let mut egraph: EGraph = Default::default(); + let my_expression: RecExpr = "(and (and 1 3) (and 2 3))".parse().unwrap(); egraph.add_expr(&my_expression); let extractor = Extractor::new( &egraph, CompilingCostFunction {}); let ntk = &(extractor, vec!(egg::Id::from(5))); diff --git a/rs/src/fc_dram/egraph_extraction.rs b/rs/src/fc_dram/egraph_extraction.rs index d562a20..eb35e36 100644 --- a/rs/src/fc_dram/egraph_extraction.rs +++ b/rs/src/fc_dram/egraph_extraction.rs @@ -1,7 +1,7 @@ //! Computation of Compiling Costs use eggmock::egg::{CostFunction, Id}; -use eggmock::{EggIdToSignal, AigLanguage, Aig, NetworkLanguage, Network, Signal}; +use eggmock::{EggIdToSignal, AoigLanguage, Aoig, NetworkLanguage, Network, Signal}; use std::cmp::Ordering; use std::rc::Rc; @@ -19,7 +19,7 @@ pub struct CompilingCost { program_cost: usize, } -impl CostFunction for CompilingCostFunction { +impl CostFunction for CompilingCostFunction { type Cost = Rc; /// Compute cost of given `enode` using `cost_fn` @@ -33,26 +33,29 @@ impl CostFunction for CompilingCostFunction { /// /// TODO: NEXT /// - [ ] Subgraph direkt kompilieren ?? - fn cost(&mut self, enode: &AigLanguage, mut cost_fn: C) -> Self::Cost + fn cost(&mut self, enode: &AoigLanguage, mut cost_fn: C) -> Self::Cost where C: FnMut(Id) -> Self::Cost, { let cost = match enode { - AigLanguage::False => 0, - AigLanguage::Input(_node) => { + AoigLanguage::False => 0, + AoigLanguage::Input(_node) => { // FCDRAMArchitecture::get_distance_of_row_to_sense_amps(&self, row) // TODO: make cost depend on data-pattern of input? 0 }, - AigLanguage::And([_node1, _node2]) | AigLanguage::Or([_node1, _node2]) => { + AoigLanguage::And([_node1, _node2]) | AoigLanguage::Or([_node1, _node2]) => { // TODO: get mapping of AND to FCDRAM-Primitives and get how many mem-cycles they take 3 }, // TODO: increase cost of NOT? (since it moves the value to another subarray!) // eg prefer `OR(a,b)` to `NOT(AND( NOT(a), NOT(b)))` - AigLanguage::Not(_node) => { + AoigLanguage::Not(_node) => { 100 // NOTs seem to be horrible (unless the computation proceeds in the other subarray where the NOT result is placed) }, + _ => { + 0 // TODO: implement for nary-ops + } }; Rc::new(CompilingCost { @@ -63,8 +66,8 @@ impl CostFunction for CompilingCostFunction { // let root = enode.clone(); // let cost = match enode { - // AigLanguage::False | AigLanguage::Input(_) => CompilingCost::leaf(root), - // AigLanguage::Not(id) => { + // AoigLanguage::False | AoigLanguage::Input(_) => CompilingCost::leaf(root), + // AoigLanguage::Not(id) => { // let cost = costs(*id); // // let nesting = if cost.not_nesting == NotNesting::NotANot { @@ -75,7 +78,7 @@ impl CostFunction for CompilingCostFunction { // // // CompilingCost::with_children(self.architecture, root, iter::once((*id, cost)), nesting) // } - // AigLanguage::Maj(children) => CompilingCost::with_children( + // AoigLanguage::Maj(children) => CompilingCost::with_children( // self.architecture, // root, // children.map(|id| (id, costs(id))), diff --git a/rs/src/fc_dram/mod.rs b/rs/src/fc_dram/mod.rs index d4a7942..4705183 100644 --- a/rs/src/fc_dram/mod.rs +++ b/rs/src/fc_dram/mod.rs @@ -34,7 +34,7 @@ use self::egraph_extraction::CompilingCostFunction; use eggmock::egg::{rewrite, EGraph, Extractor, Id, Rewrite, Runner}; use eggmock::{ - Aig, AigLanguage, AigReceiverFFI, Network, NetworkWithBackwardEdges, Receiver, ReceiverFFI, Rewriter, RewriterFFI, Signal // TODO: add AOIG-rewrite (bc FC-DRAM supports AND&OR natively)? + Aig, AigReceiverFFI, Aoig, AoigLanguage, AoigReceiverFFI, Network, NetworkWithBackwardEdges, Receiver, ReceiverFFI, Rewriter, RewriterFFI, Signal // TODO: add AOIG-rewrite (bc FC-DRAM supports AND&OR natively)? }; use log::{debug, logger}; use program::*; @@ -42,7 +42,7 @@ use architecture::*; /// Rewrite rules to use in E-Graph Rewriting (see [egg](https://egraphs-good.github.io/)) /// TODO: adjust rewriting rules to FCDRAM (=AND/OR related rewrites like De-Morgan?) -static REWRITE_RULES: LazyLock>> = LazyLock::new(|| { +static REWRITE_RULES: LazyLock>> = LazyLock::new(|| { let mut rules = vec![ // TODO: add "or" - and De-Morgan ? rewrite!("commute-and"; "(and ?a ?b)" => "(and ?b ?a)"), @@ -68,13 +68,13 @@ static REWRITE_RULES: LazyLock>> = LazyLock::new(|| #[ouroboros::self_referencing] struct CompilerOutput { /// Result E-Graph - graph: EGraph, + graph: EGraph, /// (, output-nodes) #[borrows(graph)] #[covariant] /// A network consists of nodes (accessed via `Extractor` and separately stored `outputs` (`Vec`) ntk: ( - Extractor<'this, CompilingCostFunction, AigLanguage, ()>, // `'this`=self-reference, used to extract best-node from `E-Class` of `AigLanguage`-nodes based on `CompilingCostFunction` + Extractor<'this, CompilingCostFunction, AoigLanguage, ()>, // `'this`=self-reference, used to extract best-node from `E-Class` of `AoigLanguage`-nodes based on `CompilingCostFunction` Vec, // vector of outputs ), /// Compiled Program Program is compiled using previously (EGraph-)extracted `ntk` @@ -86,19 +86,19 @@ struct CompilerOutput { /// - returned receiver allows converting result-graph in both directions (C++ <=> Rust) /// - `settings`: compiler-options fn compiling_receiver<'a>( - rules: &'a [Rewrite], + rules: &'a [Rewrite], settings: CompilerSettings, -) -> impl Receiver + use<'a> { +) -> impl Receiver + use<'a> { // REMINDER: EGraph implements `Receiver` // TODO: deactivate e-graph rewriting, focus on compilation first let mut compiler = Compiler::new(settings); // TODO: rewrite this to a singleton-class - EGraph::::new(()) + EGraph::::new(()) .map(move |(graph, outputs)| { // `.map()` of `Provider`-trait!, outputs=vector of EClasses debug!("Input EGraph nodes: {:?}", graph.nodes()); debug!("Input EGraph's EClasses : {:?}", graph.classes() .map(|eclass| (eclass.id, &eclass.nodes) ) - .collect::)>>() + .collect::)>>() ); // 1. Create E-Graph: run equivalence saturation // debug("Running equivalence saturation..."); @@ -191,29 +191,31 @@ pub struct CompilerSettings { struct FCDramRewriter(CompilerSettings); impl Rewriter for FCDramRewriter { - type Node = Aig; + type Node = Aoig; type Intermediate = CompilerOutput; fn create_receiver( &mut self, - ) -> impl Receiver + 'static { - compiling_receiver(REWRITE_RULES.as_slice(), self.0) + ) -> impl Receiver + 'static { + compiling_receiver(REWRITE_RULES.as_slice(), self.0).adapt(Into::into) } fn rewrite( self, result: CompilerOutput, - output: impl Receiver, + output: impl Receiver, ) { + // todo!() result.borrow_ntk().send(output); } } -/// ?? (maybe FFI for rewriting graph using mockturtle?) -#[no_mangle] -extern "C" fn fcdram_rewriter(settings: CompilerSettings) -> AigReceiverFFI> { - RewriterFFI::new(FCDramRewriter(settings)) -} +// TODO: this will be needed once E-Graph Validation is added (=once we want to transfer the E-Graph back to mockturtle) +// /// ?? (maybe FFI for rewriting graph using mockturtle?) +// #[no_mangle] +// extern "C" fn fcdram_rewriter(settings: CompilerSettings) -> AigReceiverFFI> { +// RewriterFFI::new(FCDramRewriter(settings)) +// } /// Statistic results about Compilation-Process #[repr(C)] @@ -243,5 +245,5 @@ extern "C" fn fcdram_compile(settings: CompilerSettings) -> AigReceiverFFI Date: Wed, 2 Jul 2025 11:11:26 +0200 Subject: [PATCH 23/51] WIP --- rs/src/fc_dram/mod.rs | 67 +++++++-------------- rs/src/fc_dram_bottom_up/mod.rs | 103 -------------------------------- 2 files changed, 22 insertions(+), 148 deletions(-) delete mode 100644 rs/src/fc_dram_bottom_up/mod.rs diff --git a/rs/src/fc_dram/mod.rs b/rs/src/fc_dram/mod.rs index 4705183..06e7a1f 100644 --- a/rs/src/fc_dram/mod.rs +++ b/rs/src/fc_dram/mod.rs @@ -34,25 +34,25 @@ use self::egraph_extraction::CompilingCostFunction; use eggmock::egg::{rewrite, EGraph, Extractor, Id, Rewrite, Runner}; use eggmock::{ - Aig, AigReceiverFFI, Aoig, AoigLanguage, AoigReceiverFFI, Network, NetworkWithBackwardEdges, Receiver, ReceiverFFI, Rewriter, RewriterFFI, Signal // TODO: add AOIG-rewrite (bc FC-DRAM supports AND&OR natively)? + AigReceiverFFI, Aoig, AoigLanguage, Network, NetworkWithBackwardEdges, Receiver, ReceiverFFI, Signal }; -use log::{debug, logger}; +use log::debug; use program::*; use architecture::*; /// Rewrite rules to use in E-Graph Rewriting (see [egg](https://egraphs-good.github.io/)) /// TODO: adjust rewriting rules to FCDRAM (=AND/OR related rewrites like De-Morgan?) static REWRITE_RULES: LazyLock>> = LazyLock::new(|| { - let mut rules = vec![ + let rules = vec![ // TODO: add "or" - and De-Morgan ? rewrite!("commute-and"; "(and ?a ?b)" => "(and ?b ?a)"), rewrite!("and-1"; "(and ?a 1)" => "?a"), rewrite!("and-0"; "(and ?a 0)" => "0"), // TODO: first add `AOIG`-language and add conversion AOIG<->AIG (so mockturtle's aig can still be used underneath) - // rewrite!("and-or"; "(! (or (! ?a) (! ?b)))" => "(and ?a ?b)"), // (De-Morgan) ! not checked whether this works - // rewrite!("or-and"; "(! (and (! ?a) (! ?b)))" => "(or ?a ?b)" ), // (De-Morgan) ! not checked whether this works - // rewrite!("and-or"; "(and ?a ?b)" => "(! (or (! ?a) (! ?b)))"), // (De-Morgan) ! not checked whether this works - // rewrite!("or-and"; "(or ?a ?b)" => "(! (and (! ?a) (! ?b)))"), // (De-Morgan) ! not checked whether this works + rewrite!("and-or"; "(! (or (! ?a) (! ?b)))" => "(and ?a ?b)"), // (De-Morgan) ! not checked whether this works + rewrite!("or-and"; "(! (and (! ?a) (! ?b)))" => "(or ?a ?b)" ), // (De-Morgan) ! not checked whether this works + rewrite!("and-or"; "(and ?a ?b)" => "(! (or (! ?a) (! ?b)))"), // (De-Morgan) ! not checked whether this works + rewrite!("or-and"; "(or ?a ?b)" => "(! (and (! ?a) (! ?b)))"), // (De-Morgan) ! not checked whether this works rewrite!("and-same"; "(and ?a ?a)" => "?a"), rewrite!("not_not"; "(! (! ?a))" => "?a"), // rewrite!("maj_1"; "(maj ?a ?a ?b)" => "?a"), @@ -90,8 +90,7 @@ fn compiling_receiver<'a>( settings: CompilerSettings, ) -> impl Receiver + use<'a> { // REMINDER: EGraph implements `Receiver` - // TODO: deactivate e-graph rewriting, focus on compilation first - let mut compiler = Compiler::new(settings); // TODO: rewrite this to a singleton-class + let mut compiler = Compiler::new(settings); EGraph::::new(()) .map(move |(graph, outputs)| { // `.map()` of `Provider`-trait!, outputs=vector of EClasses @@ -101,17 +100,17 @@ fn compiling_receiver<'a>( .collect::)>>() ); // 1. Create E-Graph: run equivalence saturation - // debug("Running equivalence saturation..."); - // let runner = measure_time!(Runner::default().with_egraph(graph).run(rules), "t_runner", settings.print_compilation_stats ); - // - // - // if settings.verbose { - // println!("== Runner Report"); - // runner.print_report(); - // } - // - // let graph = runner.egraph; + debug!("Running equivalence saturation..."); + let runner = measure_time!( + Runner::default().with_egraph(graph).run(rules), "t_runner", settings.print_compilation_stats + ); + + if settings.verbose { + println!("== Runner Report"); + runner.print_report(); + } + let graph = runner.egraph; CompilerOutput::new( graph, @@ -135,6 +134,7 @@ fn compiling_receiver<'a>( debug!("Compiling..."); debug!("Network outputs: {:?}", ntk.outputs().collect::>()); let ntk_with_backward_edges = ntk.with_backward_edges(); + ntk.dump(); debug!("Network Leaves: {:?}", ntk_with_backward_edges.leaves().collect::>()); debug!("Network Outputs of first leaf: {:?}", ntk_with_backward_edges.node_outputs( @@ -188,28 +188,6 @@ pub struct CompilerSettings { safe_space_rows_per_subarray: u8, } -struct FCDramRewriter(CompilerSettings); - -impl Rewriter for FCDramRewriter { - type Node = Aoig; - type Intermediate = CompilerOutput; - - fn create_receiver( - &mut self, - ) -> impl Receiver + 'static { - compiling_receiver(REWRITE_RULES.as_slice(), self.0).adapt(Into::into) - } - - fn rewrite( - self, - result: CompilerOutput, - output: impl Receiver, - ) { - // todo!() - result.borrow_ntk().send(output); - } -} - // TODO: this will be needed once E-Graph Validation is added (=once we want to transfer the E-Graph back to mockturtle) // /// ?? (maybe FFI for rewriting graph using mockturtle?) // #[no_mangle] @@ -227,13 +205,11 @@ struct CompilerStatistics { instruction_count: u64, } -/// Main function called from `.cpp()` file - receives compiler settings +/// Entry point for cpp-code /// - `settings`: settings to use when running compiler #[no_mangle] extern "C" fn fcdram_compile(settings: CompilerSettings) -> AigReceiverFFI { - // todo!() - // TODO: create example `ARCHITECTURE` implementing `FCDRAMArchitecture` - env_logger::init(); + env_logger::init(); // needed for `export RUST_LOG=debug` to work let receiver = compiling_receiver(REWRITE_RULES.as_slice(), settings) .map(|output| { @@ -245,5 +221,6 @@ extern "C" fn fcdram_compile(settings: CompilerSettings) -> AigReceiverFFI( -// architecture: &'a A, -// rules: &'a [Rewrite], -// settings: CompilerSettings, -// ) -> impl Receiver, Node = Mig> + 'a { -// EGraph::::new(()).map(move |(graph, outputs)| { -// let t_runner = std::time::Instant::now(); -// let runner = Runner::default().with_egraph(graph).run(rules); -// let t_runner = t_runner.elapsed().as_millis(); -// if settings.verbose { -// println!("== Runner Report"); -// runner.print_report(); -// } -// let graph = runner.egraph; -// -// let mut t_extractor = 0; -// let mut t_compiler = 0; -// -// let output = CompilerOutput::new( -// graph, -// |graph| { -// let start_time = Instant::now(); -// let extractor = Extractor::new( -// &graph, -// CompilingCostFunction { -// architecture: architecture, -// }, -// ); -// t_extractor = start_time.elapsed().as_millis(); -// (extractor, outputs) -// }, -// |ntk| { -// let start_time = Instant::now(); -// let program = compile(architecture, &ntk.with_backward_edges()); -// t_compiler = start_time.elapsed().as_millis(); -// if settings.print_program || settings.verbose { -// if settings.verbose { -// println!("== Program") -// } -// println!("{program}"); -// } -// program -// }, -// ); -// if settings.verbose { -// println!("== Timings"); -// println!("t_runner: {t_runner}ms"); -// println!("t_extractor: {t_extractor}ms"); -// println!("t_compiler: {t_compiler}ms"); -// } -// CompilingReceiverResult { -// output, -// t_runner, -// t_extractor, -// t_compiler, -// } -// }) -// } - -// #[no_mangle] -// extern "C" fn fcdram_rewriter(settings: CompilerSettings) -> MigReceiverFFI> { -// info!("Called fcdram_rewriter"); -// RewriterFFI::new(FCDramRewriter(settings)) -// } - -#[repr(C)] -struct CompilerStatistics { - egraph_classes: u64, - egraph_nodes: u64, - egraph_size: u64, - - instruction_count: u64, - - t_runner: u64, - t_extractor: u64, - t_compiler: u64, -} - -/// Main functions called by `main.cpp` -#[no_mangle] -// extern "C" fn fcdram_compile(settings: CompilerSettings) -> MigReceiverFFI { -extern "C" fn fcdram_compile() { - info!("Called fcdram_compile"); - todo!() - // TODO: create example `ARCHITECTURE` implementing `FCDRAMArchitecture` - // let receiver = - // compiling_receiver(&*&ARCHITECTURE, REWRITE_RULES.as_slice(), settings).map(|res| { - // let graph = res.output.borrow_graph(); - // CompilerStatistics { - // egraph_classes: graph.number_of_classes() as u64, - // egraph_nodes: graph.total_number_of_nodes() as u64, - // egraph_size: graph.total_size() as u64, - // instruction_count: res.output.borrow_program().instructions.len() as u64, - // t_runner: res.t_runner as u64, - // t_extractor: res.t_extractor as u64, - // t_compiler: res.t_compiler as u64, - // } - // }); - // MigReceiverFFI::new(receiver) -} From 82d559771b16ffc9282209c03be89b9be24b3064 Mon Sep 17 00:00:00 2001 From: alku662e Date: Wed, 2 Jul 2025 17:06:01 +0200 Subject: [PATCH 24/51] WIP --- rs/src/fc_dram/architecture.rs | 2 +- rs/src/fc_dram/egraph_extraction.rs | 179 +++++++++++++++++++++------- rs/src/fc_dram/mod.rs | 6 +- rs/src/fc_dram/optimization.rs | 3 +- rs/src/fc_dram/program.rs | 5 +- 5 files changed, 144 insertions(+), 51 deletions(-) diff --git a/rs/src/fc_dram/architecture.rs b/rs/src/fc_dram/architecture.rs index 190ebb3..9a3bcfb 100644 --- a/rs/src/fc_dram/architecture.rs +++ b/rs/src/fc_dram/architecture.rs @@ -212,7 +212,7 @@ impl FCDRAMArchitecture { /// /// NOTE: `compute_rows` are expected to lay in the same subarray and `reference_rows` in one /// subarray adjacent to the compute subarray (!this is not checked but assumed to be true!) - fn get_instructions_implementation_of_logic_ops(logic_op: LogicOp) -> Vec { + pub fn get_instructions_implementation_of_logic_ops(logic_op: LogicOp) -> Vec { match logic_op { LogicOp::NOT => vec!(Instruction::APA(0, 0)), LogicOp::AND => vec!(Instruction::FracOp(0), Instruction::APA(0, 0)), diff --git a/rs/src/fc_dram/egraph_extraction.rs b/rs/src/fc_dram/egraph_extraction.rs index eb35e36..cb3b0f3 100644 --- a/rs/src/fc_dram/egraph_extraction.rs +++ b/rs/src/fc_dram/egraph_extraction.rs @@ -1,16 +1,20 @@ //! Computation of Compiling Costs use eggmock::egg::{CostFunction, Id}; -use eggmock::{EggIdToSignal, AoigLanguage, Aoig, NetworkLanguage, Network, Signal}; +use eggmock::{AoigLanguage, egg::Language}; +use log::debug; use std::cmp::Ordering; +use std::ops; use std::rc::Rc; +use super::architecture::{FCDRAMArchitecture, LogicOp}; + pub struct CompilingCostFunction{} // impl StackedPartialGraph { } // Do I need this?? /// TODO: add reliability as cost-metric -#[derive(Debug)] +#[derive(Debug, Clone, Copy)] pub struct CompilingCost { // partial: RefCell>>, /// Probability that the whole program will run successfully @@ -19,6 +23,18 @@ pub struct CompilingCost { program_cost: usize, } +/// Needed to implement `enode.fold()` for computing overall cost from node together with its children +impl ops::Add for CompilingCost { + type Output = CompilingCost; + + fn add(self, rhs: CompilingCost) -> Self::Output { + CompilingCost { + success_rate: self.success_rate * rhs.success_rate, // monotonically decreasing + program_cost: self.program_cost + rhs.program_cost, // monotonically increasing + } + } +} + impl CostFunction for CompilingCostFunction { type Cost = Rc; @@ -37,55 +53,67 @@ impl CostFunction for CompilingCostFunction { where C: FnMut(Id) -> Self::Cost, { - let cost = match enode { - AoigLanguage::False => 0, - AoigLanguage::Input(_node) => { - // FCDRAMArchitecture::get_distance_of_row_to_sense_amps(&self, row) - // TODO: make cost depend on data-pattern of input? - 0 + // TODO: detect self-cycles, other cycles will be detected by compiling, which will result in an error + // enode.children(); + // TODO: rewrite to `.fold()` + + // get op-cost of executing `enode`: + let op_cost = match *enode { + AoigLanguage::False | AoigLanguage::Input(_) => { + CompilingCost { + success_rate: 1.0, + program_cost: 0, + } }, - AoigLanguage::And([_node1, _node2]) | AoigLanguage::Or([_node1, _node2]) => { - // TODO: get mapping of AND to FCDRAM-Primitives and get how many mem-cycles they take - 3 + AoigLanguage::And([node1, node2]) => { + + let mem_cycles_and = FCDRAMArchitecture::get_instructions_implementation_of_logic_ops(LogicOp::AND) + .iter().fold(0, |acc, instr| { acc + instr.get_nr_memcycles() as usize }); + let expected_success_rate = 0.0; // TODO: do empirical analysis of success-rate matrices of ops and come up with good values + CompilingCost { + success_rate: expected_success_rate, + program_cost: mem_cycles_and, + } + + }, + AoigLanguage::Or([node1, node2]) => { + let mem_cycles_or = FCDRAMArchitecture::get_instructions_implementation_of_logic_ops(LogicOp::OR) + .iter().fold(0, |acc, instr| { acc + instr.get_nr_memcycles() as usize }); + let expected_success_rate = 0.0; // TODO: do empirical analysis of success-rate matrices of ops and come up with good values + CompilingCost { + success_rate: expected_success_rate, + program_cost: mem_cycles_or, + } }, // TODO: increase cost of NOT? (since it moves the value to another subarray!) // eg prefer `OR(a,b)` to `NOT(AND( NOT(a), NOT(b)))` - AoigLanguage::Not(_node) => { - 100 // NOTs seem to be horrible (unless the computation proceeds in the other subarray where the NOT result is placed) + AoigLanguage::Not(node) => { + let mem_cycles_and = FCDRAMArchitecture::get_instructions_implementation_of_logic_ops(LogicOp::NOT) + .iter().fold(0, |acc, instr| { acc + instr.get_nr_memcycles() as usize }); + let expected_success_rate = 0.0; // TODO: do empirical analysis of success-rate matrices of ops and come up with good values + + CompilingCost { + success_rate: expected_success_rate, + program_cost: mem_cycles_and, + } }, _ => { - 0 // TODO: implement for nary-ops + // todo!(); + CompilingCost { + success_rate: 0.0, + program_cost: 7, + } + // 0 // TODO: implement for nary-ops, eg using `.children()` } }; - Rc::new(CompilingCost { - success_rate: 0.0, // TODO - program_cost: cost, - }) - // todo!() - - // let root = enode.clone(); - // let cost = match enode { - // AoigLanguage::False | AoigLanguage::Input(_) => CompilingCost::leaf(root), - // AoigLanguage::Not(id) => { - // let cost = costs(*id); - // - // let nesting = if cost.not_nesting == NotNesting::NotANot { - // NotNesting::FirstNot - // } else { - // NotNesting::NestedNots - // }; - // // - // CompilingCost::with_children(self.architecture, root, iter::once((*id, cost)), nesting) - // } - // AoigLanguage::Maj(children) => CompilingCost::with_children( - // self.architecture, - // root, - // children.map(|id| (id, costs(id))), - // NotNesting::NotANot, - // ), - // }; - // Rc::new(cost) + debug!("Folding {:?}", enode); + Rc::new(enode.fold(op_cost, |sum, id| sum + *(cost_fn(id)) )) // TODO: doesn't work yet :/ + + // Rc::new(CompilingCost { + // success_rate: 0.0, + // program_cost: 7, + // }) } } @@ -106,3 +134,70 @@ impl PartialOrd for CompilingCost { } } } + + +#[cfg(test)] +mod tests { + use eggmock::egg::{self, rewrite, EGraph, Extractor, RecExpr, Rewrite, Runner}; + use eggmock::{AoigLanguage, ComputedNetworkWithBackwardEdges, Network, Signal}; + + use crate::fc_dram::compiler::Compiler; + use crate::fc_dram::egraph_extraction::CompilingCostFunction; + use crate::fc_dram::CompilerSettings; + + use super::*; + // import all elements from parent-module + use std::sync::Once; + + static INIT: Once = Once::new(); + + fn init() -> Compiler { + INIT.call_once(|| { + env_logger::init(); + }); + Compiler::new(CompilerSettings { print_program: true, verbose: true, print_compilation_stats: false, min_success_rate: 0.999, repetition_fracops: 5, safe_space_rows_per_subarray: 16 } ) + } + + /// TODO ! + #[test] + fn test_cost_function () { + let mut compiler = init(); + let rewrite_rules: Vec> = vec![ + // TODO: add "or" - and De-Morgan ? + rewrite!("commute-and"; "(and ?a ?b)" => "(and ?b ?a)"), + rewrite!("and-1"; "(and ?a 1)" => "?a"), + rewrite!("and-0"; "(and ?a 0)" => "0"), + // TODO: first add `AOIG`-language and add conversion AOIG<->AIG (so mockturtle's aig can still be used underneath) + rewrite!("and-or"; "(! (or (! ?a) (! ?b)))" => "(and ?a ?b)"), // (De-Morgan) ! not checked whether this works + rewrite!("or-and"; "(! (and (! ?a) (! ?b)))" => "(or ?a ?b)" ), // (De-Morgan) ! not checked whether this works + rewrite!("and-or-more-not"; "(and ?a ?b)" => "(! (or (! ?a) (! ?b)))"), // (De-Morgan) ! not checked whether this works + rewrite!("or-and-more-not"; "(or ?a ?b)" => "(! (and (! ?a) (! ?b)))"), // (De-Morgan) ! not checked whether this works + rewrite!("and-same"; "(and ?a ?a)" => "?a"), + rewrite!("not_not"; "(! (! ?a))" => "?a"), + ]; + + let mut egraph: EGraph = Default::default(); + let my_expression: RecExpr = "(and (and 1 3) (and 2 3))".parse().unwrap(); + egraph.add_expr(&my_expression); + let output2 = egraph.add(AoigLanguage::And([eggmock::egg::Id::from(0), eggmock::egg::Id::from(2)])); // additional `And` with one src-operand=input and one non-input src operand + debug!("EGraph used for candidate-init: {:?}", egraph); + let egraph_clone = egraph.clone(); + let extractor = Extractor::new( &egraph_clone, CompilingCostFunction {}); + let ntk = &(extractor, vec!(egg::Id::from(5), output2)); + ntk.dump(); + // Id(5): And([Signal(false, Id(2)), Signal(false, Id(4))]) + // Id(4): And([Signal(false, Id(3)), Signal(false, Id(1))]) + // Id(1): Input(3) + // Id(3): Input(2) + // Id(2): And([Signal(false, Id(0)), Signal(false, Id(1))]) + // Id(0): Input(1) + + let runner = Runner::default().with_egraph(egraph).run(rewrite_rules.as_slice()); + + let graph = runner.egraph; + + let ntk_backward = ComputedNetworkWithBackwardEdges::new(ntk); + + // TODO: test-case with node that relies on one input src-operand and one non-input (intermediate node) src-operand + } +} diff --git a/rs/src/fc_dram/mod.rs b/rs/src/fc_dram/mod.rs index 06e7a1f..65a70e5 100644 --- a/rs/src/fc_dram/mod.rs +++ b/rs/src/fc_dram/mod.rs @@ -51,8 +51,8 @@ static REWRITE_RULES: LazyLock>> = LazyLock::new(| // TODO: first add `AOIG`-language and add conversion AOIG<->AIG (so mockturtle's aig can still be used underneath) rewrite!("and-or"; "(! (or (! ?a) (! ?b)))" => "(and ?a ?b)"), // (De-Morgan) ! not checked whether this works rewrite!("or-and"; "(! (and (! ?a) (! ?b)))" => "(or ?a ?b)" ), // (De-Morgan) ! not checked whether this works - rewrite!("and-or"; "(and ?a ?b)" => "(! (or (! ?a) (! ?b)))"), // (De-Morgan) ! not checked whether this works - rewrite!("or-and"; "(or ?a ?b)" => "(! (and (! ?a) (! ?b)))"), // (De-Morgan) ! not checked whether this works + rewrite!("and-or-more-not"; "(and ?a ?b)" => "(! (or (! ?a) (! ?b)))"), // (De-Morgan) ! not checked whether this works + rewrite!("or-and-more-not"; "(or ?a ?b)" => "(! (and (! ?a) (! ?b)))"), // (De-Morgan) ! not checked whether this works rewrite!("and-same"; "(and ?a ?a)" => "?a"), rewrite!("not_not"; "(! (! ?a))" => "?a"), // rewrite!("maj_1"; "(maj ?a ?a ?b)" => "?a"), @@ -133,8 +133,8 @@ fn compiling_receiver<'a>( debug!("Compiling..."); debug!("Network outputs: {:?}", ntk.outputs().collect::>()); - let ntk_with_backward_edges = ntk.with_backward_edges(); ntk.dump(); + let ntk_with_backward_edges = ntk.with_backward_edges(); debug!("Network Leaves: {:?}", ntk_with_backward_edges.leaves().collect::>()); debug!("Network Outputs of first leaf: {:?}", ntk_with_backward_edges.node_outputs( diff --git a/rs/src/fc_dram/optimization.rs b/rs/src/fc_dram/optimization.rs index c2f5bdc..6bb4c82 100644 --- a/rs/src/fc_dram/optimization.rs +++ b/rs/src/fc_dram/optimization.rs @@ -6,12 +6,11 @@ //! - manually adapt safe-space to program requirements: unused safe-space rows could still be used ?! //! - [ ] Rematerialization ? use crate::fc_dram::architecture::{RowAddress, Instruction}; -use rustc_hash::FxHashSet; use super::{architecture::FCDRAMArchitecture, program::Program}; pub fn optimize(program: &mut Program) { - if program.instructions.len() == 0 { + if program.instructions.is_empty() { return; } let mut opt = Optimization { program }; diff --git a/rs/src/fc_dram/program.rs b/rs/src/fc_dram/program.rs index c5b0a9a..a176f09 100644 --- a/rs/src/fc_dram/program.rs +++ b/rs/src/fc_dram/program.rs @@ -1,12 +1,11 @@ //! Functionality for generating actual program using architecture defined in [`architecture`] by //! compiling given logic-network (see [`compilation`]) and potentially adding some manual //! optimizations ([`optimization`]) -use super::architecture::{FCDRAMArchitecture, RowAddress}; +use super::architecture::RowAddress; use crate::fc_dram::architecture::{get_subarrayid_from_rowaddr, Instruction, ROW_ID_BITMASK}; -use eggmock::{Id, Aig, NetworkWithBackwardEdges, Signal}; +use eggmock::Signal; use std::collections::HashMap; use std::fmt::{Display, Formatter}; -use std::ops::{Deref, DerefMut}; #[derive(Debug, Clone)] From 89c3eef08adfc849446023159d40217326305a55 Mon Sep 17 00:00:00 2001 From: alku662e Date: Thu, 3 Jul 2025 16:06:47 +0200 Subject: [PATCH 25/51] :construction: Fix infinite loop --- src/fcdram.h | 4 +++- src/main.cpp | 41 ++++++++++++++++++++++++++++------------- 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/src/fcdram.h b/src/fcdram.h index 35f9e85..9aa25e0 100644 --- a/src/fcdram.h +++ b/src/fcdram.h @@ -23,7 +23,9 @@ extern "C" bool print_program; bool verbose; bool print_compilation_stats; - uint16_t safe_space_rows_per_subarray; + double min_success_rate; + uint64_t repetition_fracops; + uint8_t safe_space_rows_per_subarray; }; eggmock::aig_receiver fcdram_rewriter( fcdram_compiler_settings settings ); diff --git a/src/main.cpp b/src/main.cpp index b842ed0..c978be7 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -28,14 +28,30 @@ void run_ambit_example(mig_network in) /** * TODO: change `mig` to `aig`?? */ -void run_fcdram_example(aig_network in) +void run_fcdram_example() { + + aig_network in; + const auto i1 = in.create_pi(); + const auto i2 = in.create_pi(); + const auto i3 = in.create_pi(); + + const auto O1 = in.create_and( i3, i2 ); + const auto O2 = in.create_and( in.create_not( i3 ), i1 ); + const auto bi = in.create_or( O1, O2 ); + in.create_po( bi ); + + write_dot( in, "in.dot" ); std::cout << "Sending graph to fcdram_compile..." << std::endl; // fcdram_compile(); + + // use `eggmock` to send mockturtle-graph to `lime`'s entry point `fcdram_compile()` fcdram_compiler_statistics result = eggmock::send_aig( in, fcdram_compile( fcdram_compiler_settings{ .print_program = true, .verbose = true, .print_compilation_stats = true, + .min_success_rate= 99.9999, + .repetition_fracops=5, // issue 5 FracOps per init of reference subarray .safe_space_rows_per_subarray = 16, } ) ); // std::cout << "IC:" << result.instruction_count << std::endl; @@ -50,17 +66,16 @@ void run_fcdram_example(aig_network in) int main() { // mig_network in; - aig_network in; - const auto b_i = in.create_pi(); - const auto b_i_next = in.create_pi(); - const auto m = in.create_pi(); - - const auto O1 = in.create_and( m, b_i_next ); - const auto O2 = in.create_and( in.create_not( m ), b_i ); - const auto bi = in.create_or( O1, O2 ); - in.create_po( bi ); - - write_dot( in, "in.dot" ); + // const auto b_i = in.create_pi(); + // const auto b_i_next = in.create_pi(); + // const auto m = in.create_pi(); + // + // const auto O1 = in.create_and( m, b_i_next ); + // const auto O2 = in.create_and( in.create_not( m ), b_i ); + // const auto bi = in.create_or( O1, O2 ); + // in.create_po( bi ); + // + // write_dot( in, "in.dot" ); // run_ambit_example(in); - run_fcdram_example(in); + run_fcdram_example(); } From f8afba063a963ecc0cd88d97f071c2e805a96cd7 Mon Sep 17 00:00:00 2001 From: alku662e Date: Fri, 11 Jul 2025 10:47:36 +0200 Subject: [PATCH 26/51] :construction: Store selection of safe-space rows in config file to save recomputation on subsequent runs --- rs/src/fc_dram/architecture.rs | 80 +++++++++++++++++++++------ rs/src/fc_dram/compiler.rs | 74 ++++++++++++++++--------- rs/src/fc_dram/egraph_extraction.rs | 84 +++++++++++++++++------------ rs/src/fc_dram/mod.rs | 10 +++- 4 files changed, 172 insertions(+), 76 deletions(-) diff --git a/rs/src/fc_dram/architecture.rs b/rs/src/fc_dram/architecture.rs index 9a3bcfb..32b7798 100644 --- a/rs/src/fc_dram/architecture.rs +++ b/rs/src/fc_dram/architecture.rs @@ -5,7 +5,7 @@ //! //! RowAddress (eg via bit-shifting given bitmasks for subarray-id & row-addr to put on-top of RowAddress -use std::{cmp::Ordering, collections::{HashMap, HashSet}, fmt::{Display, Formatter}, sync::LazyLock}; +use std::{cmp::Ordering, collections::{HashMap, HashSet}, fmt::{Display, Formatter}, ops, sync::LazyLock}; pub const NR_SUBARRAYS: u64 = 2u64.pow(7); pub const ROWS_PER_SUBARRAY: u64 = 2u64.pow(9); @@ -146,8 +146,23 @@ pub static ARCHITECTURE: LazyLock = LazyLock::new(|| { /// - ! must be smaller than `rows_per_subarray * nr_subarrays` (this is NOT checked!) pub type RowAddress = u64; pub type SubarrayId = u64; -#[derive(Debug, PartialEq)] -pub struct SuccessRate(f64); + +// impl Display for Vec { +// fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { +// write!(f, "[")?; +// let mut iter = self.iter(); +// if let Some(first) = iter.next() { +// write!(f, "{}", first)?; +// for elem in iter { +// write!(f, ",{}", elem)?; +// } +// } +// write!(f, "]") +// } +// } + +#[derive(Debug, PartialEq, Clone, Copy)] +pub struct SuccessRate(pub f64); impl Eq for SuccessRate {} @@ -163,6 +178,31 @@ impl Ord for SuccessRate { } } +impl ops::Mul for SuccessRate { + type Output = SuccessRate; + + fn mul(self, rhs: Self) -> Self::Output { + SuccessRate(rhs.0 * self.0) + } +} + +/// see Figure6,13 in [1] for timing diagrams +/// - all numbers are specified in ns +pub struct TimingSpec { + pub t_ras: f64, + /// Time btw an `PRE` and `ACT` when performing `APA` for issuing a `NOT` + pub time_btw_pre_act_apa_not: f64, +} + +impl Default for TimingSpec { + fn default() -> Self { + todo!() + // TimingSpec { + // t_ras: + // } + } +} + /// TODO: add field encoding topology of subarrays (to determine which of them share sense-amps) pub struct FCDRAMArchitecture { /// Nr of subarrays in a DRAM module @@ -214,9 +254,9 @@ impl FCDRAMArchitecture { /// subarray adjacent to the compute subarray (!this is not checked but assumed to be true!) pub fn get_instructions_implementation_of_logic_ops(logic_op: LogicOp) -> Vec { match logic_op { - LogicOp::NOT => vec!(Instruction::APA(0, 0)), - LogicOp::AND => vec!(Instruction::FracOp(0), Instruction::APA(0, 0)), - LogicOp::OR => vec!(Instruction::FracOp(0), Instruction::APA(0, 0)), + LogicOp::NOT => vec!(Instruction::ApaNOT(0, 0)), + LogicOp::AND => vec!(Instruction::FracOp(0), Instruction::ApaNOT(0, 0)), + LogicOp::OR => vec!(Instruction::FracOp(0), Instruction::ApaNOT(0, 0)), LogicOp::NAND => { // 1. AND, 2. NOT FCDRAMArchitecture::get_instructions_implementation_of_logic_ops(LogicOp::AND) @@ -247,6 +287,7 @@ pub enum RowDistanceToSenseAmps { Far=0, } +type Comment = String; /// Instructions used in FC-DRAM /// - NOT: implemented using `APA` /// - AND/OR: implemented by (see [1] Chap6.1.2) @@ -259,7 +300,7 @@ pub enum RowDistanceToSenseAmps { /// /// Additionally RowClone-operations are added for moving data around if needed (eg if valid data /// would be affected by following Simultaneous-Row-Activations) -#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +#[derive(Clone, Debug, PartialEq, Eq, Hash)] pub enum Instruction { /// Needed for initializing neutral row in reference subarray (to set `V_{AND}`/`V_{OR}` (see /// [1]( @@ -267,14 +308,19 @@ pub enum Instruction { /// - `PRE` "interrupt the process of row activation, and prevent the sense amplifier from being enabled" FracOp(RowAddress), /// Multiple-Row Activation: `ACT R_F -> PRE -> ACT R_L -> PRE` of rows `R_F`,`R_L` for rows within - /// different subarrays. As a result `R_L` holds the negated value of `R_F` (see Chap5.1 of - /// PaperFunctionally Complete DRAMs + /// different subarrays. As a result `R_L` holds the negated value of `R_F` (see Chap5.1 of PaperFunctionally Complete DRAMs /// Used to implement NOT directly - APA(RowAddress,RowAddress), // TODO: Rename to SimultaneousRowActivation or sth the like ? + ApaNOT(RowAddress,RowAddress), + /// Multiple-Row Activation: `ACT R_F -> PRE -> ACT R_L -> PRE` of rows `R_F`,`R_L` for rows within + /// different subarrays (but with different timings than `ApaNOT`!) + /// Used to implement `AND`&`OR` (!make sure to init reference subarray beforehand) + ApaAndOr(RowAddress,RowAddress), /// Fast-Parallel-Mode RowClone for cloning row-data within same subarray /// - corresponds to `AA`, basically copies from src-row -> row-buffer -> dst-row /// - first operand=src, 2nd operand=dst where `src` and `dst` MUST reside in the same subarray ! - RowCloneFPM(RowAddress, RowAddress), + /// + /// Comment indicates what this FPM was issued for (for simpler debugability) + RowCloneFPM(RowAddress, RowAddress, Comment), /// Copies data from src (1st operand) to dst (2nd operand) using RowClonePSM, which copies the /// data from `this_bank(src_row) -> other_bank(rowX) -> this_bank(dst_row)` (where /// `other_bank` might be any other bank). Since this copy uses the internal DRAM-bus it works @@ -289,8 +335,9 @@ impl Display for Instruction { // TODO: change string-representation to display subarray-id let description = match self { Instruction::FracOp(row) => format!("AP({})", display_row(*row)), - Instruction::APA(row1,row2) => format!("APA({},{})", display_row(*row1), display_row(*row2)), - Instruction::RowCloneFPM(row1, row2) => format!("AA({},{})", display_row(*row1), display_row(*row2)), + Instruction::ApaNOT(row1,row2) => format!("APA_NOT({},{})", display_row(*row1), display_row(*row2)), + Instruction::ApaAndOr(row1,row2) => format!("APA_AND_OR({},{}) // activates {:?}", display_row(*row1), display_row(*row2), ARCHITECTURE.precomputed_simultaneous_row_activations.get(&(*row1,*row2))), + Instruction::RowCloneFPM(row1, row2, comment) => format!("AA({},{}) // {}", display_row(*row1), display_row(*row2), comment), Instruction::RowClonePSM(row1, row2) => format!(" TRANSFER({},(rowX)) TANSFER(rowX,{}) @@ -341,8 +388,9 @@ impl Instruction { match self { Instruction::FracOp(__) => 7, // see [2] ChapIII.A, (two cmd-cycles + five idle cycles) // TODO: change to ns (t_{RAS}+6ns) - `t_{RAS}` to mem cycles - Instruction::APA(_, _) => 3, // NOTE: this is not explicitly written in the paper, TODO: check with authors - Instruction::RowCloneFPM(_, _) => 2, // see [4] Chap3.2 + Instruction::ApaNOT(_, _) => 3, // NOTE: this is not explicitly written in the paper, TODO: check with authors + Instruction::ApaAndOr(_, _) => 3, // NOTE: this is not explicitly written in the paper, TODO: check with authors + Instruction::RowCloneFPM(_, _, _) => 2, // see [4] Chap3.2 Instruction::RowClonePSM(_, _) => 256, // =(8192B/64B)*2 (*2 since copies two time, to and from `` on 64B-granularity } } @@ -425,7 +473,7 @@ impl Instruction { }; match self { - Instruction::APA( r1, r2) => { + Instruction::ApaNOT( r1, r2) => { let activated_rows = ARCHITECTURE.precomputed_simultaneous_row_activations.get(&(*r1,*r2)).expect("[ERR] Missing SRA for ({r1},{r2}"); let nr_operands = activated_rows.len(); // ASSUMPTION: it seems like "operands" referred to the number of activated rows (see [1] // taken from [1] Chap6.3 diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs index bd41788..a41be22 100644 --- a/rs/src/fc_dram/compiler.rs +++ b/rs/src/fc_dram/compiler.rs @@ -13,7 +13,8 @@ use eggmock::{Aoig, Id, NetworkWithBackwardEdges, Node, Signal}; use itertools::Itertools; use log::debug; use priority_queue::PriorityQueue; -use std::{cmp::Ordering, collections::{HashMap, HashSet}}; +use toml::Table; +use std::{cmp::Ordering, collections::{HashMap, HashSet}, ffi::CStr, fmt::Debug, fs, path::Path}; /// Provides [`Compiler::compile()`] to compile a logic network into a [`Program`] pub struct Compiler { @@ -114,13 +115,6 @@ impl Compiler { .iter().map(|row| row & ROW_ID_BITMASK) // reset subarray-id to all 0s .collect() }; - - // deactivate all combination which could activate safe-space rows - for row in self.safe_space_rows.iter() { - for row_combi in ARCHITECTURE.row_activated_by_rowaddress_tuple.get(row).unwrap() { - self.blocked_row_combinations.insert(*row_combi); - } - } } /// Places (commonly used) constants in safe-space rows @@ -195,11 +189,34 @@ impl Compiler { /// Initialize compilation state: mark unsuable rows (eg safe-space rows), place input operands fn init_comp_state(&mut self, network: &impl NetworkWithBackwardEdges, program: &mut Program) { - // debug!("Compiling {:?}", network); - // 0.1 Allocate safe-space rows (for storing intermediate values safely - self.alloc_safe_space_rows(self.settings.safe_space_rows_per_subarray); + let config_file = unsafe { CStr::from_ptr(self.settings.config_file) }.to_str().unwrap(); + let config = Path::new(config_file); + println!("{:?}", config); + + // 0.1 Allocate safe-space rows (for storing intermediate values and constants 0s&1s) safely + if config.is_file() { + // TODO: load configs from file + } else { + // TODO: compute configs and write them to this file + + // debug!("Compiling {:?}", network); + self.alloc_safe_space_rows(self.settings.safe_space_rows_per_subarray); + + self.place_constants(); + + let config_in_toml = toml::toml! { + safe_space_rows = [1,2,3] + }; + fs::write(config, config_in_toml.to_string()).expect("Sth went wrong here.."); + } + + // deactivate all combination which could activate safe-space rows + for row in self.safe_space_rows.iter() { + for row_combi in ARCHITECTURE.row_activated_by_rowaddress_tuple.get(row).unwrap() { + self.blocked_row_combinations.insert(*row_combi); + } + } - self.place_constants(); // 0.2 Place all inputs and mark them as being live self.place_inputs( network.leaves().collect::>() ); // place input-operands into rows debug!("Placed inputs {:?} in {:?}", network.leaves().collect::>(), self.comp_state.value_states); @@ -238,7 +255,7 @@ impl Compiler { instructions.push(Instruction::FracOp(frac_row)); } for other_row in ref_rows { - instructions.push(Instruction::RowCloneFPM(*row_address_1, other_row)); + instructions.push(Instruction::RowCloneFPM(*row_address_1, other_row, String::from("Init ref-subarray with 1s"))); } instructions }, @@ -250,7 +267,7 @@ impl Compiler { instructions.push(Instruction::FracOp(frac_row)); } for other_row in ref_rows { - instructions.push(Instruction::RowCloneFPM(*row_address_0, other_row)); + instructions.push(Instruction::RowCloneFPM(*row_address_0, other_row, String::from("Init ref-subarray with 0s"))); } instructions }, @@ -277,7 +294,7 @@ impl Compiler { self.comp_state.dram_state.insert(row_addr, RowState { is_compute_row: true, live_value: Some(src_operand), constant: None }); if (src_operand_location & SUBARRAY_ID_BITMASK) == (row_addr & SUBARRAY_ID_BITMASK) { - instructions.push(Instruction::RowCloneFPM(src_operand_location, row_addr)); + instructions.push(Instruction::RowCloneFPM(src_operand_location, row_addr, String::from("Move operand to compute row"))); } else { instructions.push(Instruction::RowClonePSM(src_operand_location, row_addr)); } @@ -318,9 +335,9 @@ impl Compiler { let (_, ref_array) = self.select_compute_and_ref_subarray(vec!(comp_row)); let result_row = (ROW_ID_BITMASK & selected_sra.0) & ref_array; - let move_to_comp_row = Instruction::RowCloneFPM(origin_unneg_row, comp_row); - let not = Instruction::APA(comp_row, result_row); - let move_to_safespace= Instruction::RowCloneFPM(result_row, dst_row); + let move_to_comp_row = Instruction::RowCloneFPM(origin_unneg_row, comp_row, String::from("Move row to safe space")); + let not = Instruction::ApaNOT(comp_row, result_row); + let move_to_safespace= Instruction::RowCloneFPM(result_row, dst_row, String::from("Move row to safe space")); instructions.push(move_to_comp_row); instructions.push(not); @@ -383,6 +400,7 @@ impl Compiler { /// Returns Instructions to execute given `next_candidate` /// - [ ] make sure that operation to be executed on those rows won't simultaneously activate other rows holding valid data which will be used by future operations + /// TODO: NEXT fn execute_next_instruction(&mut self, next_candidate: &Signal, network: &impl NetworkWithBackwardEdges) -> Vec { let mut next_instructions = vec!(); @@ -408,10 +426,11 @@ impl Compiler { let (compute_subarray, ref_subarray) = self.select_compute_and_ref_subarray(src_rows); let language_op = network.node(next_candidate.node_id()); + // TODO: extract NOT let logic_op = match language_op { - Aoig::And(_) => LogicOp::AND, - Aoig::Or(_) => LogicOp::OR, - // TODO: extract NOT + // REMINDER: operand-nr is extracted by looking at nr of children beforehand + Aoig::And(_) | Aoig::And4(_) | Aoig::And8(_)| Aoig::And16(_)| Aoig::And32(_) => LogicOp::AND, + Aoig::Or(_) | Aoig::Or4(_) | Aoig::Or8(_) | Aoig::Or16(_) | Aoig::Or32(_) => LogicOp::OR, _ => panic!("candidate is expected to be a logic op"), }; @@ -446,7 +465,13 @@ impl Compiler { // 2.2.1 if yes: move data to other rows for performing this op // 3. Issue actual operation - next_instructions.push(Instruction::APA(row_combi.0, row_combi.1)); + let mut actual_op = match logic_op { + LogicOp::NOT => vec!(Instruction::ApaNOT(row_combi.0, row_combi.1)), + LogicOp::AND | LogicOp::OR => vec!(Instruction::ApaAndOr(row_combi.0, row_combi.1)), + LogicOp::NAND | LogicOp::NOR => vec!(Instruction::ApaAndOr(row_combi.0, row_combi.1), Instruction::ApaNOT(row_combi.0, row_combi.1)), // TODO: or the othyer way around (1st NOT)? + }; + + next_instructions.append(&mut actual_op); for (&comp_row, &ref_row) in comp_rows.iter().zip(ref_rows.iter()) { self.comp_state.dram_state.insert(comp_row, RowState { is_compute_row: true, live_value: Some(*next_candidate), constant: None }); self.comp_state.value_states.insert(*next_candidate, ValueState { is_computed: true, row_location: Some(comp_row) }); @@ -581,7 +606,8 @@ mod tests { use crate::fc_dram::egraph_extraction::CompilingCostFunction; - use super::*; // import all elements from parent-module + use super::*; use std::ffi::CString; + // import all elements from parent-module use std::sync::Once; // ERROR: `eggmock`-API doesn't allow this.. @@ -620,7 +646,7 @@ mod tests { INIT.call_once(|| { env_logger::init(); }); - Compiler::new(CompilerSettings { print_program: true, verbose: true, print_compilation_stats: false, min_success_rate: 0.999, repetition_fracops: 5, safe_space_rows_per_subarray: 16 } ) + Compiler::new(CompilerSettings { print_program: true, verbose: true, print_compilation_stats: false, min_success_rate: 0.999, repetition_fracops: 5, safe_space_rows_per_subarray: 16, config_file: CString::new("").expect("CString::new failed").as_ptr(), do_save_config: true} ) } #[test] diff --git a/rs/src/fc_dram/egraph_extraction.rs b/rs/src/fc_dram/egraph_extraction.rs index cb3b0f3..98797d5 100644 --- a/rs/src/fc_dram/egraph_extraction.rs +++ b/rs/src/fc_dram/egraph_extraction.rs @@ -7,18 +7,18 @@ use std::cmp::Ordering; use std::ops; use std::rc::Rc; -use super::architecture::{FCDRAMArchitecture, LogicOp}; +use super::architecture::{FCDRAMArchitecture, LogicOp, SuccessRate}; pub struct CompilingCostFunction{} // impl StackedPartialGraph { } // Do I need this?? /// TODO: add reliability as cost-metric -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, Eq)] pub struct CompilingCost { // partial: RefCell>>, /// Probability that the whole program will run successfully - success_rate: f64, + success_rate: SuccessRate, /// Estimation of program cost (from input logic-ops) program_cost: usize, } @@ -28,6 +28,10 @@ impl ops::Add for CompilingCost { type Output = CompilingCost; fn add(self, rhs: CompilingCost) -> Self::Output { + if self.success_rate.0.abs() > 1.0 || rhs.success_rate.0.abs() > 1.0 { // program_cost > 0 since `usize` is always non-negative + panic!("Compilingcost must be monotonically increasing!"); + } + CompilingCost { success_rate: self.success_rate * rhs.success_rate, // monotonically decreasing program_cost: self.program_cost + rhs.program_cost, // monotonically increasing @@ -35,6 +39,34 @@ impl ops::Add for CompilingCost { } } +impl PartialEq for CompilingCost { + fn eq(&self, other: &Self) -> bool { + self.success_rate == other.success_rate && self.program_cost == other.program_cost + } +} + +/// First compare based on success-rate, then on program-cost +/// TODO: more fine-grained comparison !! +impl PartialOrd for CompilingCost { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +/// First compare based on success-rate, then on program-cost +/// - [`Ordering::Greater`] = better +/// TODO: more fine-grained comparison !! +impl Ord for CompilingCost { + fn cmp(&self, other: &Self) -> Ordering { + // better success-rate is always better than higher program-cost (TODO: improve this) + if self.success_rate == other.success_rate { // TOOD: cmp based on some margin (eg +-0.2%) + self.program_cost.cmp(&other.program_cost) // lower is better + } else { + self.success_rate.cmp(&other.success_rate).reverse() // higher is better + } + } +} + impl CostFunction for CompilingCostFunction { type Cost = Rc; @@ -61,17 +93,18 @@ impl CostFunction for CompilingCostFunction { let op_cost = match *enode { AoigLanguage::False | AoigLanguage::Input(_) => { CompilingCost { - success_rate: 1.0, - program_cost: 0, + success_rate: SuccessRate(1.0), + program_cost: 1, // !=0 to ensure Cost-Function is *strictly monotonically increasing* } }, AoigLanguage::And([node1, node2]) => { let mem_cycles_and = FCDRAMArchitecture::get_instructions_implementation_of_logic_ops(LogicOp::AND) .iter().fold(0, |acc, instr| { acc + instr.get_nr_memcycles() as usize }); - let expected_success_rate = 0.0; // TODO: do empirical analysis of success-rate matrices of ops and come up with good values + debug!("Cycles AND: {}", mem_cycles_and); + let expected_success_rate = 1.0; // TODO: do empirical analysis of success-rate matrices of ops and come up with good values CompilingCost { - success_rate: expected_success_rate, + success_rate: SuccessRate(expected_success_rate), program_cost: mem_cycles_and, } @@ -79,28 +112,30 @@ impl CostFunction for CompilingCostFunction { AoigLanguage::Or([node1, node2]) => { let mem_cycles_or = FCDRAMArchitecture::get_instructions_implementation_of_logic_ops(LogicOp::OR) .iter().fold(0, |acc, instr| { acc + instr.get_nr_memcycles() as usize }); - let expected_success_rate = 0.0; // TODO: do empirical analysis of success-rate matrices of ops and come up with good values + debug!("Cycles OR: {}", mem_cycles_or); + let expected_success_rate = 1.0; // TODO: do empirical analysis of success-rate matrices of ops and come up with good values CompilingCost { - success_rate: expected_success_rate, + success_rate: SuccessRate(expected_success_rate), program_cost: mem_cycles_or, } }, // TODO: increase cost of NOT? (since it moves the value to another subarray!) // eg prefer `OR(a,b)` to `NOT(AND( NOT(a), NOT(b)))` AoigLanguage::Not(node) => { - let mem_cycles_and = FCDRAMArchitecture::get_instructions_implementation_of_logic_ops(LogicOp::NOT) + let mem_cycles_not = FCDRAMArchitecture::get_instructions_implementation_of_logic_ops(LogicOp::NOT) .iter().fold(0, |acc, instr| { acc + instr.get_nr_memcycles() as usize }); - let expected_success_rate = 0.0; // TODO: do empirical analysis of success-rate matrices of ops and come up with good values + debug!("Cycles NOT: {}", mem_cycles_not); + let expected_success_rate = 1.0; // TODO: do empirical analysis of success-rate matrices of ops and come up with good values CompilingCost { - success_rate: expected_success_rate, - program_cost: mem_cycles_and, + success_rate: SuccessRate(expected_success_rate), + program_cost: mem_cycles_not, } }, _ => { // todo!(); CompilingCost { - success_rate: 0.0, + success_rate: SuccessRate(1.0), program_cost: 7, } // 0 // TODO: implement for nary-ops, eg using `.children()` @@ -117,29 +152,10 @@ impl CostFunction for CompilingCostFunction { } } -impl PartialEq for CompilingCost { - fn eq(&self, other: &Self) -> bool { - self.success_rate == other.success_rate && self.program_cost == other.program_cost - } -} - -/// First compare based on success-rate, then on program-cost -/// TODO: more fine-grained comparison !! -impl PartialOrd for CompilingCost { - fn partial_cmp(&self, other: &Self) -> Option { - if self.success_rate == other.success_rate { - self.program_cost.partial_cmp(&other.program_cost) - } else { - self.success_rate.partial_cmp(&other.success_rate) - } - } -} - - #[cfg(test)] mod tests { use eggmock::egg::{self, rewrite, EGraph, Extractor, RecExpr, Rewrite, Runner}; - use eggmock::{AoigLanguage, ComputedNetworkWithBackwardEdges, Network, Signal}; + use eggmock::{AoigLanguage, ComputedNetworkWithBackwardEdges, Network}; use crate::fc_dram::compiler::Compiler; use crate::fc_dram::egraph_extraction::CompilingCostFunction; diff --git a/rs/src/fc_dram/mod.rs b/rs/src/fc_dram/mod.rs index 65a70e5..58114da 100644 --- a/rs/src/fc_dram/mod.rs +++ b/rs/src/fc_dram/mod.rs @@ -24,6 +24,7 @@ pub mod optimization; pub mod program; pub mod utils; +use std::ffi::{CStr, OsStr}; use std::sync::LazyLock; use std::time::Instant; @@ -90,7 +91,7 @@ fn compiling_receiver<'a>( settings: CompilerSettings, ) -> impl Receiver + use<'a> { // REMINDER: EGraph implements `Receiver` - let mut compiler = Compiler::new(settings); + let mut compiler = Compiler::new(settings.clone()); EGraph::::new(()) .map(move |(graph, outputs)| { // `.map()` of `Provider`-trait!, outputs=vector of EClasses @@ -161,7 +162,7 @@ fn compiling_receiver<'a>( }) } -#[derive(Debug, Copy, Clone)] +#[derive(Debug, Clone)] #[repr(C)] /// Compiler options /// - TODO: add flags like minimal success-rate for program @@ -186,6 +187,10 @@ pub struct CompilerSettings { /// - Ops reusing those operands have to clone the values from the safe-space prior to issuing the Op /// - NOTE: rows which are used as safe-space are determined by analyzing patterns in Simultaneous-row activation for the specific architecture (to ensure that safe-space rows won't be activated on any combination of row-addresses) safe_space_rows_per_subarray: u8, + /// Location of config-file (to which to write the compiled configs) - if this config file doesn't exist then a new one is generated under this given path + config_file: *const i8, + /// Whether to save the configuration file (for used safe-space rows, placement of constant 0s&1s, ..) + do_save_config: bool, } // TODO: this will be needed once E-Graph Validation is added (=once we want to transfer the E-Graph back to mockturtle) @@ -209,6 +214,7 @@ struct CompilerStatistics { /// - `settings`: settings to use when running compiler #[no_mangle] extern "C" fn fcdram_compile(settings: CompilerSettings) -> AigReceiverFFI { + env_logger::init(); // needed for `export RUST_LOG=debug` to work let receiver = compiling_receiver(REWRITE_RULES.as_slice(), settings) From 51fad93916bf5153fa6a15af4b3cdf48efce98a2 Mon Sep 17 00:00:00 2001 From: alku662e Date: Fri, 11 Jul 2025 14:48:39 +0200 Subject: [PATCH 27/51] :construction: Allow config file specifying safe-space-rows --- rs/src/fc_dram/compiler.rs | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs index a41be22..014af19 100644 --- a/rs/src/fc_dram/compiler.rs +++ b/rs/src/fc_dram/compiler.rs @@ -13,7 +13,7 @@ use eggmock::{Aoig, Id, NetworkWithBackwardEdges, Node, Signal}; use itertools::Itertools; use log::debug; use priority_queue::PriorityQueue; -use toml::Table; +use toml::{Table, Value}; use std::{cmp::Ordering, collections::{HashMap, HashSet}, ffi::CStr, fmt::Debug, fs, path::Path}; /// Provides [`Compiler::compile()`] to compile a logic network into a [`Program`] @@ -196,19 +196,37 @@ impl Compiler { // 0.1 Allocate safe-space rows (for storing intermediate values and constants 0s&1s) safely if config.is_file() { // TODO: load configs from file + // todo!("Load config from that file.."); + + let content = fs::read_to_string(config).unwrap(); + let value = content.parse::().unwrap(); // Parse into generic TOML Value :contentReference[oaicite:1]{index=1} + + if let Some(arr) = value.get("safe_space_rows").and_then(|v| v.as_array()) { + println!("Found array of length {}", arr.len()); + self.safe_space_rows = arr.iter().map(|v| { + v.as_integer().expect("Expected integer") as u64 + }).collect(); + } else { + panic!("Config file doesn't contain value for safe-space-rows"); + } + } else { // TODO: compute configs and write them to this file // debug!("Compiling {:?}", network); self.alloc_safe_space_rows(self.settings.safe_space_rows_per_subarray); - self.place_constants(); + // self.place_constants(); + let safe_space_rows_toml = Value::Array(self.safe_space_rows.iter().map( + |row| Value::Integer(*row as i64) + ).collect()); let config_in_toml = toml::toml! { - safe_space_rows = [1,2,3] + safe_space_rows = safe_space_rows_toml }; fs::write(config, config_in_toml.to_string()).expect("Sth went wrong here.."); } + self.place_constants(); // TODO: move into config-file too? // deactivate all combination which could activate safe-space rows for row in self.safe_space_rows.iter() { From 9eab81ba2bb0c7ab45cc9998db8e1ee95520847a Mon Sep 17 00:00:00 2001 From: alku662e Date: Sun, 13 Jul 2025 18:37:13 +0200 Subject: [PATCH 28/51] :construction: ! large rewrite in progress ! --- rs/src/fc_dram/architecture.rs | 219 +++++++++--------- rs/src/fc_dram/compiler.rs | 57 ++++- ...graph_extraction.rs => cost_estimation.rs} | 55 +++-- rs/src/fc_dram/mod.rs | 8 +- 4 files changed, 195 insertions(+), 144 deletions(-) rename rs/src/fc_dram/{egraph_extraction.rs => cost_estimation.rs} (85%) diff --git a/rs/src/fc_dram/architecture.rs b/rs/src/fc_dram/architecture.rs index 32b7798..5d955dc 100644 --- a/rs/src/fc_dram/architecture.rs +++ b/rs/src/fc_dram/architecture.rs @@ -6,6 +6,7 @@ //! RowAddress (eg via bit-shifting given bitmasks for subarray-id & row-addr to put on-top of RowAddress use std::{cmp::Ordering, collections::{HashMap, HashSet}, fmt::{Display, Formatter}, ops, sync::LazyLock}; +use strum_macros::EnumIter; pub const NR_SUBARRAYS: u64 = 2u64.pow(7); pub const ROWS_PER_SUBARRAY: u64 = 2u64.pow(9); @@ -125,8 +126,8 @@ pub static ARCHITECTURE: LazyLock = LazyLock::new(|| { // debug!("Precomputed SRAs: {:#?}", precomputed_simultaneous_row_activations.iter().take(20).collect::>()); let sra_degree_to_rowaddress_combinations= precomputed_simultaneous_row_activations.iter() - .fold(HashMap::new(), |mut acc: HashMap>, (row_combi, activated_rows)| { - acc.entry(activated_rows.len() as u8).or_default().push(*row_combi); + .fold(HashMap::new(), |mut acc: HashMap>, (row_combi, activated_rows)| { + acc.entry(SupportedNrOperands::try_from(activated_rows.len() as u8).unwrap()).or_default().push(*row_combi); acc }); // output how many combinations of row-addresses activate the given nr of rows @@ -186,6 +187,13 @@ impl ops::Mul for SuccessRate { } } +impl From for SuccessRate { + fn from(val: f64) -> Self { + SuccessRate(val) + } +} + + /// see Figure6,13 in [1] for timing diagrams /// - all numbers are specified in ns pub struct TimingSpec { @@ -220,7 +228,7 @@ pub struct FCDRAMArchitecture { /// Map degree of SRA (=nr of activated rows by that SRA) to all combinations of RowAddresses which have that degree of SRA /// - use to eg restrict the choice of row-addresses for n-ary AND/OR (eg 4-ary AND -> at least activate 8 rows; more rows could be activated when using input replication) /// NOTE: LogicOp determiens success-rate - pub sra_degree_to_rowaddress_combinations: HashMap>, + pub sra_degree_to_rowaddress_combinations: HashMap>, // pub sra_degree_to_rowaddress_combinations: HashMap<(u8, LogicOp), BTreeMap<(RowAddress,RowAddress), SuccessRate>>, // to large runtime-overhead :/ /// Stores for every rows which combinations of RowAddresses activate that row (needed for finding appropriate safe space rows) pub row_activated_by_rowaddress_tuple: HashMap>, @@ -352,37 +360,6 @@ impl Display for Instruction { /// TODO: where to put logic for determining which rows are activated simultaneously given two /// row-addresses impl Instruction { - /// Return Addreses of Rows which are used by this instruction (=operand-rows AND result-row) - /// - REMINDER: although only two row-operands are given to `APA`, more rows can be/are affected due to *Simultaneous Row Activation* (see [3]) - /// - /// TODO - pub fn used_addresses( - &self, - ) -> Vec { - // ) -> impl Iterator { - todo!() - // let from = match self { - // Instruction::AAP(from, _) => from, - // Instruction::AP(op) => op, - // } - // .row_addresses(architecture); - // let to = match self { - // Instruction::AAP(_, to) => Some(*to), - // _ => None, - // } - // .into_iter() - // .flat_map(|addr| addr.row_addresses(architecture)); - // from.chain(to) - } - - /// Returns all row-addresses whose values are overriden by this instruction - /// TODO - pub fn overridden_rows<'a>( - &self, - ) -> Vec { - // ) -> impl Iterator { - todo!() - } pub fn get_nr_memcycles(&self) -> u16 { match self { @@ -406,72 +383,9 @@ impl Instruction { // Quote from [1] Chap6.3: "the distance of all simultaneously activated rows" - unclear how this classification happend exactly. Let's be conservative and assume the worst-case behavior // (furthest away row for src-operands). For dst-rows we use the one closest to the sense-amps, since we can choose from which of the rows to read/save the result form - let success_rate_by_row_distance = { - // see [1] Chap5.3 and Chap6.3 - match implemented_op { - LogicOp::NOT => HashMap::from([ - // ((src,dst), success_rate) - ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Close), 51.71), - ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Close), 54.93), - ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Close), 44.16), - ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Middle), 57.47), - ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Middle), 53.47), - ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Middle), 81.92), - ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Far), 45.34), - ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Far), 85.02), - ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Far), 75.13), - ]), - LogicOp::AND => HashMap::from([ - // ((reference,compute), success_rate) - ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Close), 98.81), - ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Close), 99.20), - ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Close), 80.04), - ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Middle), 97.08), - ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Middle), 83.26), - ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Middle), 97.71), - ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Far), 75.84), - ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Far), 95.29), - ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Far), 94.95), - ]), - LogicOp::OR => HashMap::from([ - // ((reference,compute), success_rate) - ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Close), 99.51), - ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Close), 99.65), - ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Close), 94.29), - ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Middle), 98.98), - ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Middle), 94.15), - ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Middle), 98.95), - ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Far), 89.23), - ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Far), 98.59), - ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Far), 98.80), - ]), - LogicOp::NAND => HashMap::from([ - // ((reference,compute), success_rate) - ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Close), 98.81), - ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Close), 99.20), - ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Close), 79.59), - ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Middle), 97.08), - ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Middle), 82.98), - ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Middle), 97.67), - ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Far), 75.50), - ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Far), 95.19), - ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Far), 94.95), - ]), - LogicOp::NOR => HashMap::from([ - // ((reference,compute), success_rate) - ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Close), 99.51), - ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Close), 99.65), - ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Close), 94.09), - ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Middle), 98.97), - ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Middle), 94.03), - ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Middle), 98.90), - ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Far), 89.15), - ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Far), 98.52), - ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Far), 98.80), - ]), - } - }; + let success_rate_by_row_distance = implemented_op.get_success_rate_by_row_distance(); + // include nr of operands and distance of rows to sense-amps into success-rate match self { Instruction::ApaNOT( r1, r2) => { let activated_rows = ARCHITECTURE.precomputed_simultaneous_row_activations.get(&(*r1,*r2)).expect("[ERR] Missing SRA for ({r1},{r2}"); @@ -495,8 +409,8 @@ impl Instruction { .map(|row| (ARCHITECTURE.get_distance_of_row_to_sense_amps)(*row)) // RowDistanceToSenseAmps::Far; // TODO: get this .min() .expect("[ERR] Activated rows were empty"); - let total_success_rate = *success_rate_per_operandnr.get(&nr_operands).expect("[ERR] {nr_operands} not =2|4|8|16, the given SRA function seems to not comply with this core assumption.") - * success_rate_by_row_distance.get(&(furthest_src_row, closest_dst_row)).unwrap(); + let total_success_rate = *success_rate_per_operandnr.get(&nr_operands).expect("[ERR] {nr_operands} not =1|2|4|8|16, the given SRA function seems to not comply with this core assumption.") + * success_rate_by_row_distance.get(&(furthest_src_row, closest_dst_row)).unwrap().0; SuccessRate(total_success_rate) }, _ => SuccessRate(1.0), @@ -505,9 +419,8 @@ impl Instruction { } /// Contains logical operations which are supported (natively) on FCDRAM-Architecture -/// - see [`FCDRAMArchitecture::get_instructions_implementation_of_logic_ops`] for how these -/// logic-ops are mapped to FCDRAM-instructions -#[derive(Debug, Clone, Copy, Eq, Hash, PartialEq)] +/// - see [`FCDRAMArchitecture::get_instructions_implementation_of_logic_ops`] for how these logic-ops are mapped to FCDRAM-instructions +#[derive(Debug, Clone, Copy, Eq, Hash, PartialEq, EnumIter)] pub enum LogicOp { NOT, AND, @@ -518,6 +431,102 @@ pub enum LogicOp { NOR, } +impl LogicOp { + + /// see [1] Chap5.3 and Chap6.3 + pub fn get_success_rate_by_row_distance(&self) -> HashMap<(RowDistanceToSenseAmps,RowDistanceToSenseAmps), SuccessRate> { + match self { + LogicOp::NOT => HashMap::from([ + // ((src,dst), success_rate) + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Close), 51.71.into()), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Close), 54.93.into()), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Close), 44.16.into()), + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Middle), 57.47.into()), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Middle), 53.47.into()), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Middle), 81.92.into()), + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Far), 45.34.into()), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Far), 85.02.into()), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Far), 75.13.into()), + ]), + LogicOp::AND => HashMap::from([ + // ((reference,compute), success_rate) + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Close), 98.81.into()), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Close), 99.20.into()), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Close), 80.04.into()), + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Middle), 97.08.into()), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Middle), 83.26.into()), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Middle), 97.71.into()), + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Far), 75.84.into()), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Far), 95.29.into()), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Far), 94.95.into()), + ]), + LogicOp::OR => HashMap::from([ + // ((reference,compute), success_rate) + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Close), 99.51.into()), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Close), 99.65.into()), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Close), 94.29.into()), + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Middle), 98.98.into()), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Middle), 94.15.into()), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Middle), 98.95.into()), + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Far), 89.23.into()), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Far), 98.59.into()), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Far), 98.80.into()), + ]), + LogicOp::NAND => HashMap::from([ + // ((reference,compute), success_rate) + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Close), 98.81.into()), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Close), 99.20.into()), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Close), 79.59.into()), + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Middle), 97.08.into()), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Middle), 82.98.into()), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Middle), 97.67.into()), + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Far), 75.50.into()), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Far), 95.19.into()), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Far), 94.95.into()), + ]), + LogicOp::NOR => HashMap::from([ + // ((reference,compute), success_rate) + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Close), 99.51.into()), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Close), 99.65.into()), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Close), 94.09.into()), + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Middle), 98.97.into()), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Middle), 94.03.into()), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Middle), 98.90.into()), + ((RowDistanceToSenseAmps::Close,RowDistanceToSenseAmps::Far), 89.15.into()), + ((RowDistanceToSenseAmps::Middle,RowDistanceToSenseAmps::Far), 98.52.into()), + ((RowDistanceToSenseAmps::Far,RowDistanceToSenseAmps::Far), 98.80.into()), + ]), + } + } +} + +#[derive(Debug, EnumIter, Hash, PartialEq, Eq)] +#[repr(u8)] // You can change the representation (e.g., u8, u16, etc.) +pub enum SupportedNrOperands { + One = 1, + Two = 2, + Four = 4, + Eight = 8, + Sixteen = 16, + Thirtytwo = 32 +} + +impl TryFrom for SupportedNrOperands { + type Error = (); + + fn try_from(value: u8) -> Result { + match value { + 1 => Ok(SupportedNrOperands::One), + 2 => Ok(SupportedNrOperands::Two), + 4 => Ok(SupportedNrOperands::Four), + 8 => Ok(SupportedNrOperands::Eight), + 16 => Ok(SupportedNrOperands::Sixteen), + 32 => Ok(SupportedNrOperands::Thirtytwo), + _ => Err(()), + } + } +} + /// Implements behavior of the RowDecoderCircuitry as described in [3] /// TODO: remove in favor of passing arbitrary closure to [`FCDRAMArchitecture::get_activated_rows_from_apa`] pub trait RowDecoder { @@ -541,6 +550,6 @@ mod tests { #[test] // mark function as test-fn fn test_sra() { - println!("{:?}", ARCHITECTURE.sra_degree_to_rowaddress_combinations.get(&8).unwrap().first()); + println!("{:?}", ARCHITECTURE.sra_degree_to_rowaddress_combinations.get(&SupportedNrOperands::try_from(8).unwrap()).unwrap().first()); } } diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs index 014af19..ab7cc65 100644 --- a/rs/src/fc_dram/compiler.rs +++ b/rs/src/fc_dram/compiler.rs @@ -7,12 +7,13 @@ //! - [`Compiler::compile()`] = main function - compiles given logic network for the given [`architecture`] into a [`program`] using some [`optimization`] use super::{ - architecture::{subarrayid_to_subarray_address, Instruction, LogicOp, SubarrayId, ARCHITECTURE, NR_SUBARRAYS, ROW_ID_BITMASK, SUBARRAY_ID_BITMASK}, optimization::optimize, CompilerSettings, Program, RowAddress + architecture::{subarrayid_to_subarray_address, Instruction, LogicOp, SubarrayId, SupportedNrOperands, ARCHITECTURE, NR_SUBARRAYS, ROW_ID_BITMASK, SUBARRAY_ID_BITMASK}, optimization::optimize, CompilerSettings, Program, RowAddress }; use eggmock::{Aoig, Id, NetworkWithBackwardEdges, Node, Signal}; use itertools::Itertools; use log::debug; use priority_queue::PriorityQueue; +use strum::IntoEnumIterator; use toml::{Table, Value}; use std::{cmp::Ordering, collections::{HashMap, HashSet}, ffi::CStr, fmt::Debug, fs, path::Path}; @@ -24,9 +25,16 @@ pub struct Compiler { comp_state: CompilationState, /// These rows are reserved in EVERY subarray for storing intermediate results (ignore higher bits of these RowAddress) /// - NOTE: only initialized when compilation starts + /// - NOTE2: all safe-space rows are treated equally, since no computation is performed on them (and hence distance to sense-amps doesn't really matter) + /// + /// DEPRECATED: use `blocked_row_combinations` instead (all other rows are safe-space rows) safe_space_rows: Vec, /// RowCombinations which are not allowed to be issued via `APA` since they activate rows within the safe-space blocked_row_combinations: HashSet<(RowAddress,RowAddress)>, + /// For each nr of operands this field store the rowaddress-combination to issue to activate + /// the desired nr of rows (the choice is made best on success-rate and maximizing the nr of rows which potentially can't be used for storage + /// since they would activate rows where values could reside in) + compute_row_activations: HashMap } impl Compiler { @@ -36,6 +44,7 @@ impl Compiler { comp_state: CompilationState::new( HashMap::new() ), safe_space_rows: vec!(), blocked_row_combinations: HashSet::new(), + compute_row_activations: HashMap::new(), } } @@ -94,9 +103,33 @@ impl Compiler { program } + /// Rather than making sure rows in which live values reside remain untouched, this approach chooses to select fixed RowAddress combinations for all support numbers of operands + /// - this function sets [`Compiler::compute_row_activations`] + /// - NOTE: this choice is expected to be applicable to row activations in all subarrays since the SRA work equivalently between subarrays + /// - ASSUMPTION: there are no architectural differences btw subarrays + /// + /// TODO: NEXT + fn choose_compute_rows(&mut self) { + for nr_operands in SupportedNrOperands::iter() { + let possible_row_combis = ARCHITECTURE.sra_degree_to_rowaddress_combinations.get(&nr_operands).expect("Given Architecture doesn't support SRA of {nr_operands} operands"); + possible_row_combis.iter().fold(possible_row_combis[0], |best_row_combi, next_row_combi| { + // TODO: compare row-combis based on success-rate and return better one of them + + let success_rate_avg_next_row = 0; + let success_rate_avg_best_row = 0; + // take avg success-rate over all supported LogicOps (since compute rows are the same irrespective of the issued op) + for logic_op in LogicOp::iter() { + // logic_op.get + } + todo!() + }); + } + } /// Allocates safe-space rows inside the DRAM-module /// - NOTE: nr of safe-space rows must be a power of 2 (x) between 1<=x<=64 /// - [ ] TODO: improve algo (in terms of space efficiency) + /// + /// PROBLEM: can't test all possibilities since (for nrr safe-space rows=16) there are `math.comb(512,16) = 841141456821158064519401490400 = 8,4*10^{29}` of them fn alloc_safe_space_rows(&mut self, nr_safe_space_rows: u8) { let supported_nr_safe_space_rows = vec!(1,2,4,8,16,32,64); @@ -104,17 +137,19 @@ impl Compiler { panic!("Only the following nr of rows are supported to be activated: {:?}, given: {}", supported_nr_safe_space_rows, nr_safe_space_rows); } - // TODO: this is just a quick&dirty implementation. Solving this (probably NP-complete) problem of finding optimal safe-space rows is probably worth solving for every DRAM-module once + // TODO: this is just a quick&dirty implementation. Solving this problem of finding optimal safe-space rows is probably worth solving for every DRAM-module once self.safe_space_rows = { // choose any row-addr combi activating exactly `nr_safe_space_rows` and choose all that activated rows to be safe-space rows let chosen_row_addr_combi = ARCHITECTURE.sra_degree_to_rowaddress_combinations - .get(&nr_safe_space_rows).unwrap() + .get(&SupportedNrOperands::try_from(nr_safe_space_rows).unwrap()).unwrap() .first().unwrap(); // just take the first row-combi that activates `nr_safe_space_rows` ARCHITECTURE.precomputed_simultaneous_row_activations.get(chosen_row_addr_combi).unwrap() .iter().map(|row| row & ROW_ID_BITMASK) // reset subarray-id to all 0s .collect() }; + + // TODO: if any other rows are unusable as a consequence (bc activating them would activate another safe-space-row anytime) include those unusable rows into safe-space (else they're competely useless) } /// Places (commonly used) constants in safe-space rows @@ -124,10 +159,12 @@ impl Compiler { // place constants in EVERY subarray for subarray in 0..NR_SUBARRAYS { let mut safe_space = self.safe_space_rows.iter(); + let row_address_0 = subarrayid_to_subarray_address(subarray) | safe_space.next().unwrap(); self.comp_state.constant_values.insert(0, row_address_0); let row_address_1 = subarrayid_to_subarray_address(subarray) | safe_space.next().unwrap(); self.comp_state.constant_values.insert(1, row_address_1); + // does it make sense to store any other constants in safe-space?? self.comp_state.dram_state.insert(row_address_0, RowState { is_compute_row: false, live_value: None, constant: Some(0)} ); self.comp_state.dram_state.insert(row_address_1, RowState { is_compute_row: false, live_value: None, constant: Some(1)} ); @@ -195,8 +232,6 @@ impl Compiler { // 0.1 Allocate safe-space rows (for storing intermediate values and constants 0s&1s) safely if config.is_file() { - // TODO: load configs from file - // todo!("Load config from that file.."); let content = fs::read_to_string(config).unwrap(); let value = content.parse::().unwrap(); // Parse into generic TOML Value :contentReference[oaicite:1]{index=1} @@ -211,7 +246,6 @@ impl Compiler { } } else { - // TODO: compute configs and write them to this file // debug!("Compiling {:?}", network); self.alloc_safe_space_rows(self.settings.safe_space_rows_per_subarray); @@ -226,6 +260,8 @@ impl Compiler { }; fs::write(config, config_in_toml.to_string()).expect("Sth went wrong here.."); } + // TODO: allow reading these in from config-file + // self.choose_compute_rows(); // choose which rows will serve as compute rows (those are stored in `self.compute_row_activations` self.place_constants(); // TODO: move into config-file too? // deactivate all combination which could activate safe-space rows @@ -236,6 +272,7 @@ impl Compiler { } // 0.2 Place all inputs and mark them as being live + // TODO: add additional pass over graph: think about which inputs are needed in which subarray self.place_inputs( network.leaves().collect::>() ); // place input-operands into rows debug!("Placed inputs {:?} in {:?}", network.leaves().collect::>(), self.comp_state.value_states); // 0.3 Setup: store all network-nodes yet to be compiled @@ -346,7 +383,7 @@ impl Compiler { let dst_row = self.get_next_free_safespace_row(None); // TODO: negate val and move value in selected safe-space row - let selected_sra = ARCHITECTURE.sra_degree_to_rowaddress_combinations.get(&1).unwrap().iter().find(|row| ! self.safe_space_rows.contains(&row.0)) // just select the first available compute-row + let selected_sra = ARCHITECTURE.sra_degree_to_rowaddress_combinations.get(&SupportedNrOperands::try_from(1).unwrap()).unwrap().iter().find(|row| ! self.safe_space_rows.contains(&row.0)) // just select the first available compute-row .expect("It's assumed that issuing APA(row,row) for same row activates only that row"); let origin_subarray_bitmask = origin_unneg_row & SUBARRAY_ID_BITMASK; let comp_row = origin_subarray_bitmask | (ROW_ID_BITMASK & selected_sra.0); @@ -372,6 +409,8 @@ impl Compiler { /// Return id of subarray to use for computation and reference (compute_subarrayid, reference_subarrayid) /// - based on location of input rows AND current compilation state /// - [ ] POSSIBLE EXTENSION: include lookahead for future ops and their inputs they depend on + /// + /// TODO: NEXT fn select_compute_and_ref_subarray(&self, input_rows: Vec) -> (SubarrayId, SubarrayId) { // naive implementation: just use the subarray that most of the `input_rows` reside in // TODO: find better solution @@ -454,7 +493,7 @@ impl Compiler { // 0. Select an SRA (=row-address tuple) for the selected subarray based on highest success-rate // TODO (possible improvement): input replication by choosing SRA with more activated rows than operands and duplicating operands which are in far-away rows into several rows?) - let row_combi = ARCHITECTURE.sra_degree_to_rowaddress_combinations.get(&(nr_rows as u8)).unwrap() + let row_combi = ARCHITECTURE.sra_degree_to_rowaddress_combinations.get(&SupportedNrOperands::try_from(nr_rows as u8).unwrap()).unwrap() // sort by success-rate - using eg `BTreeMap` turned out to impose a too large runtime overhead .iter() .find(|combi| !self.blocked_row_combinations.contains(combi)) // choose first block RowAddr-combination @@ -622,7 +661,7 @@ mod tests { use eggmock::egg::{self, EGraph, Extractor, RecExpr}; use eggmock::{AoigLanguage, ComputedNetworkWithBackwardEdges, Network}; - use crate::fc_dram::egraph_extraction::CompilingCostFunction; + use crate::fc_dram::cost_estimation::CompilingCostFunction; use super::*; use std::ffi::CString; // import all elements from parent-module diff --git a/rs/src/fc_dram/egraph_extraction.rs b/rs/src/fc_dram/cost_estimation.rs similarity index 85% rename from rs/src/fc_dram/egraph_extraction.rs rename to rs/src/fc_dram/cost_estimation.rs index 98797d5..e23f86a 100644 --- a/rs/src/fc_dram/egraph_extraction.rs +++ b/rs/src/fc_dram/cost_estimation.rs @@ -1,4 +1,4 @@ -//! Computation of Compiling Costs +//! Computation of Cost-Metrics (eg includes success rate) use eggmock::egg::{CostFunction, Id}; use eggmock::{AoigLanguage, egg::Language}; @@ -13,41 +13,40 @@ pub struct CompilingCostFunction{} // impl StackedPartialGraph { } // Do I need this?? -/// TODO: add reliability as cost-metric +/// A metric that estimates the runtime cost of executing an [`super::Instruction`] (in the program) #[derive(Debug, Clone, Copy, Eq)] -pub struct CompilingCost { - // partial: RefCell>>, +pub struct InstructionCost { /// Probability that the whole program will run successfully success_rate: SuccessRate, - /// Estimation of program cost (from input logic-ops) - program_cost: usize, + /// Nr of memcycles it takes to execute the corresponding instruction + mem_cycles: usize, } /// Needed to implement `enode.fold()` for computing overall cost from node together with its children -impl ops::Add for CompilingCost { - type Output = CompilingCost; +impl ops::Add for InstructionCost { + type Output = InstructionCost; - fn add(self, rhs: CompilingCost) -> Self::Output { + fn add(self, rhs: InstructionCost) -> Self::Output { if self.success_rate.0.abs() > 1.0 || rhs.success_rate.0.abs() > 1.0 { // program_cost > 0 since `usize` is always non-negative panic!("Compilingcost must be monotonically increasing!"); } - CompilingCost { + InstructionCost { success_rate: self.success_rate * rhs.success_rate, // monotonically decreasing - program_cost: self.program_cost + rhs.program_cost, // monotonically increasing + mem_cycles: self.mem_cycles + rhs.mem_cycles, // monotonically increasing } } } -impl PartialEq for CompilingCost { +impl PartialEq for InstructionCost { fn eq(&self, other: &Self) -> bool { - self.success_rate == other.success_rate && self.program_cost == other.program_cost + self.success_rate == other.success_rate && self.mem_cycles == other.mem_cycles } } /// First compare based on success-rate, then on program-cost /// TODO: more fine-grained comparison !! -impl PartialOrd for CompilingCost { +impl PartialOrd for InstructionCost { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } @@ -56,11 +55,11 @@ impl PartialOrd for CompilingCost { /// First compare based on success-rate, then on program-cost /// - [`Ordering::Greater`] = better /// TODO: more fine-grained comparison !! -impl Ord for CompilingCost { +impl Ord for InstructionCost { fn cmp(&self, other: &Self) -> Ordering { // better success-rate is always better than higher program-cost (TODO: improve this) if self.success_rate == other.success_rate { // TOOD: cmp based on some margin (eg +-0.2%) - self.program_cost.cmp(&other.program_cost) // lower is better + self.mem_cycles.cmp(&other.mem_cycles) // lower is better } else { self.success_rate.cmp(&other.success_rate).reverse() // higher is better } @@ -68,7 +67,7 @@ impl Ord for CompilingCost { } impl CostFunction for CompilingCostFunction { - type Cost = Rc; + type Cost = Rc; /// Compute cost of given `enode` using `cost_fn` /// @@ -92,9 +91,9 @@ impl CostFunction for CompilingCostFunction { // get op-cost of executing `enode`: let op_cost = match *enode { AoigLanguage::False | AoigLanguage::Input(_) => { - CompilingCost { + InstructionCost { success_rate: SuccessRate(1.0), - program_cost: 1, // !=0 to ensure Cost-Function is *strictly monotonically increasing* + mem_cycles: 1, // !=0 to ensure Cost-Function is *strictly monotonically increasing* } }, AoigLanguage::And([node1, node2]) => { @@ -103,9 +102,9 @@ impl CostFunction for CompilingCostFunction { .iter().fold(0, |acc, instr| { acc + instr.get_nr_memcycles() as usize }); debug!("Cycles AND: {}", mem_cycles_and); let expected_success_rate = 1.0; // TODO: do empirical analysis of success-rate matrices of ops and come up with good values - CompilingCost { + InstructionCost { success_rate: SuccessRate(expected_success_rate), - program_cost: mem_cycles_and, + mem_cycles: mem_cycles_and, } }, @@ -114,9 +113,9 @@ impl CostFunction for CompilingCostFunction { .iter().fold(0, |acc, instr| { acc + instr.get_nr_memcycles() as usize }); debug!("Cycles OR: {}", mem_cycles_or); let expected_success_rate = 1.0; // TODO: do empirical analysis of success-rate matrices of ops and come up with good values - CompilingCost { + InstructionCost { success_rate: SuccessRate(expected_success_rate), - program_cost: mem_cycles_or, + mem_cycles: mem_cycles_or, } }, // TODO: increase cost of NOT? (since it moves the value to another subarray!) @@ -127,16 +126,16 @@ impl CostFunction for CompilingCostFunction { debug!("Cycles NOT: {}", mem_cycles_not); let expected_success_rate = 1.0; // TODO: do empirical analysis of success-rate matrices of ops and come up with good values - CompilingCost { + InstructionCost { success_rate: SuccessRate(expected_success_rate), - program_cost: mem_cycles_not, + mem_cycles: mem_cycles_not, } }, _ => { // todo!(); - CompilingCost { + InstructionCost { success_rate: SuccessRate(1.0), - program_cost: 7, + mem_cycles: 7, } // 0 // TODO: implement for nary-ops, eg using `.children()` } @@ -158,7 +157,7 @@ mod tests { use eggmock::{AoigLanguage, ComputedNetworkWithBackwardEdges, Network}; use crate::fc_dram::compiler::Compiler; - use crate::fc_dram::egraph_extraction::CompilingCostFunction; + use crate::fc_dram::cost_estimation::CompilingCostFunction; use crate::fc_dram::CompilerSettings; use super::*; diff --git a/rs/src/fc_dram/mod.rs b/rs/src/fc_dram/mod.rs index 58114da..fdaf0a3 100644 --- a/rs/src/fc_dram/mod.rs +++ b/rs/src/fc_dram/mod.rs @@ -19,7 +19,7 @@ //! - [`utils`] - utilities (helper macros/...) pub mod architecture; pub mod compiler; -pub mod egraph_extraction; +pub mod cost_estimation; pub mod optimization; pub mod program; pub mod utils; @@ -31,7 +31,7 @@ use std::time::Instant; use crate::measure_time; use self::compiler::Compiler; -use self::egraph_extraction::CompilingCostFunction; +use self::cost_estimation::CompilingCostFunction; use eggmock::egg::{rewrite, EGraph, Extractor, Id, Rewrite, Runner}; use eggmock::{ @@ -186,6 +186,10 @@ pub struct CompilerSettings { /// - REMINDER: after `AND`/`OR`-ops the src-operands are overwritten by the op-result, so to reuse operands they're put into specially designated rows (="safe-space") which won't be overwritten /// - Ops reusing those operands have to clone the values from the safe-space prior to issuing the Op /// - NOTE: rows which are used as safe-space are determined by analyzing patterns in Simultaneous-row activation for the specific architecture (to ensure that safe-space rows won't be activated on any combination of row-addresses) + /// + /// TODO: if `config_file` is passed, make sure nr safe-space-rows is equal to nr of rows detailed in config-file + /// + /// DEPRECATED: current implementation select compute rows instead safe_space_rows_per_subarray: u8, /// Location of config-file (to which to write the compiled configs) - if this config file doesn't exist then a new one is generated under this given path config_file: *const i8, From 0dcb071a5d67f7acb6944ba413e59059fe2cb42f Mon Sep 17 00:00:00 2001 From: alku662e Date: Wed, 16 Jul 2025 08:40:24 +0200 Subject: [PATCH 29/51] :construction: Rewrote estimation of distance of rows to sense-amps. Now fix choice of compute-subarray (don't choose edge subarrays for computation) --- rs/src/fc_dram/architecture.rs | 159 +++++++++++++++++++++++------- rs/src/fc_dram/compiler.rs | 30 +++--- rs/src/fc_dram/cost_estimation.rs | 14 +-- 3 files changed, 152 insertions(+), 51 deletions(-) diff --git a/rs/src/fc_dram/architecture.rs b/rs/src/fc_dram/architecture.rs index 5d955dc..835e249 100644 --- a/rs/src/fc_dram/architecture.rs +++ b/rs/src/fc_dram/architecture.rs @@ -6,6 +6,7 @@ //! RowAddress (eg via bit-shifting given bitmasks for subarray-id & row-addr to put on-top of RowAddress use std::{cmp::Ordering, collections::{HashMap, HashSet}, fmt::{Display, Formatter}, ops, sync::LazyLock}; +use log::debug; use strum_macros::EnumIter; pub const NR_SUBARRAYS: u64 = 2u64.pow(7); @@ -22,6 +23,27 @@ pub fn get_subarrayid_from_rowaddr(row: RowAddress) -> SubarrayId { (row & SUBARRAY_ID_BITMASK) >> NR_SUBARRAYS.ilog2() } +/// All Subarrays (except the ones at the edges) have two neighboring subarrays: one below (subarray_id+1) and one above (subarray_id-1) +#[derive(Debug, Clone, PartialEq)] +pub enum NeighboringSubarrayRelPosition { + /// `subarray_id-1` + Above, + /// `subarray_id+1` + Below, +} + +impl NeighboringSubarrayRelPosition { + /// Get whether `subarray1` is above or below `relative_to` + pub fn get_relative_position(subarray: SubarrayId, relative_to: SubarrayId) -> Self { + assert!((subarray as isize - relative_to as isize).abs() == 1, "Given Arrays are not neighboring arrays"); + if subarray > relative_to { + NeighboringSubarrayRelPosition::Below + } else { + NeighboringSubarrayRelPosition::Above + } + } +} + /// Main variable specifying architecture of DRAM-module for which to compile for /// - this is currently just an example implementation for testing purpose; (TODO: make this configurable at runtime) /// @@ -79,31 +101,24 @@ pub static ARCHITECTURE: LazyLock = LazyLock::new(|| { }; // just a dummy implementation, see [5] Chap3.2 for details why determining the distance based on the Row Addresses issued by the MemController is difficult - let get_distance_of_row_to_sense_amps = |row: RowAddress| -> RowDistanceToSenseAmps { - // ASSUMPTION: last & first rows only have sense-amps from one side - // TODO: is this true? or do all subarrays have a line of sense-amps on both of their ends?? - let distance_to_nearest_sense_amp_in_nr_rows = if row < ROWS_PER_SUBARRAY { - // this row is in the first subarray - row // row-addr = distance to nearest sense-amps - } else if row > (NR_SUBARRAYS-1)*ROWS_PER_SUBARRAY { - // this row is in the last subarray - row - (NR_SUBARRAYS-1)*ROWS_PER_SUBARRAY // =distance to above sense-amps - } else { - // let subarray_id = row / rows_per_subarray; - let row_nr_in_subarray = row % ROWS_PER_SUBARRAY; - if row_nr_in_subarray < ROWS_PER_SUBARRAY { - // row is in the 1st half of the subarray and hence nearer to the "previous" sense-amps - row_nr_in_subarray - } else { - // row is in the 2nd half of the subarray and hence nearer to the "previous" sense-amps - row_nr_in_subarray - ROWS_PER_SUBARRAY/2 - } + // TODO: NEXT + let get_distance_of_row_to_sense_amps = |row: RowAddress, subarray_rel_position: NeighboringSubarrayRelPosition| -> RowDistanceToSenseAmps { + // NOTE: last & first subarrays only have sense-amps from one side + if (get_subarrayid_from_rowaddr(row) == NR_SUBARRAYS-1 && subarray_rel_position == NeighboringSubarrayRelPosition::Below) || (get_subarrayid_from_rowaddr(row) == 0 && subarray_rel_position == NeighboringSubarrayRelPosition::Above) { + panic!("Edge subarrays have sense-amps only connected from one side"); + } - }; - match distance_to_nearest_sense_amp_in_nr_rows { + let local_row_address= row & ROW_ID_BITMASK; + + let distance_to_above_subarray = match local_row_address { i if i < ROWS_PER_SUBARRAY / 2 / 3 => RowDistanceToSenseAmps::Close, // 1st third of subarray-half i if i < ROWS_PER_SUBARRAY / 2 / 6 => RowDistanceToSenseAmps::Middle, // 2nd third of subarray-half _ => RowDistanceToSenseAmps::Far, // everything else is treated as being far away + }; + + match subarray_rel_position { + NeighboringSubarrayRelPosition::Above => distance_to_above_subarray, + NeighboringSubarrayRelPosition::Below => distance_to_above_subarray.reverse(), // rows close to above subarray are far from below subarray etc } }; @@ -165,6 +180,16 @@ pub type SubarrayId = u64; #[derive(Debug, PartialEq, Clone, Copy)] pub struct SuccessRate(pub f64); +impl SuccessRate { + pub fn new(success_rate: f64) -> Self { + if (0.0..=1.0).contains(&success_rate) { + SuccessRate(success_rate) + } else { + panic!("SuccessRate must in [0,1], but was {success_rate}"); + } + } +} + impl Eq for SuccessRate {} impl PartialOrd for SuccessRate { @@ -183,13 +208,13 @@ impl ops::Mul for SuccessRate { type Output = SuccessRate; fn mul(self, rhs: Self) -> Self::Output { - SuccessRate(rhs.0 * self.0) + SuccessRate::new(rhs.0 * self.0) } } impl From for SuccessRate { fn from(val: f64) -> Self { - SuccessRate(val) + SuccessRate::new(val) } } @@ -235,7 +260,7 @@ pub struct FCDRAMArchitecture { /// Given a row-addr this returns the distance of it to the sense-amps (!determinse success-rate of op using that `row` as an operand) (see [1] Chap5.2) /// - NOTE: a realistic implementation should use the Methodology from [1] to determine this distance (RowHammer) /// - there is no way of telling the distance of a row without testing manually (see [5] Chap3.2: "consecutive row addresses issued by the memory controller can be mapped to entirely different regions of DRAM") - pub get_distance_of_row_to_sense_amps: fn(RowAddress) -> RowDistanceToSenseAmps, + pub get_distance_of_row_to_sense_amps: fn(RowAddress, NeighboringSubarrayRelPosition) -> RowDistanceToSenseAmps, } /// Implement this trait for your specific DRAM-module to support FCDRAM-functionality @@ -295,6 +320,17 @@ pub enum RowDistanceToSenseAmps { Far=0, } +impl RowDistanceToSenseAmps { + /// Reverse distance (Far-> Close, Middle -> Middle, Close->Far), useful when row's distance to other neighboring subarray (below/above) is needed + pub fn reverse(&self) -> Self { + match &self { + RowDistanceToSenseAmps::Close => RowDistanceToSenseAmps::Far, + RowDistanceToSenseAmps::Middle=> RowDistanceToSenseAmps::Middle, + RowDistanceToSenseAmps::Far=> RowDistanceToSenseAmps::Close, + } + } +} + type Comment = String; /// Instructions used in FC-DRAM /// - NOT: implemented using `APA` @@ -317,11 +353,11 @@ pub enum Instruction { FracOp(RowAddress), /// Multiple-Row Activation: `ACT R_F -> PRE -> ACT R_L -> PRE` of rows `R_F`,`R_L` for rows within /// different subarrays. As a result `R_L` holds the negated value of `R_F` (see Chap5.1 of PaperFunctionally Complete DRAMs - /// Used to implement NOT directly + /// src=1st operand, dst=2nd operand ApaNOT(RowAddress,RowAddress), /// Multiple-Row Activation: `ACT R_F -> PRE -> ACT R_L -> PRE` of rows `R_F`,`R_L` for rows within /// different subarrays (but with different timings than `ApaNOT`!) - /// Used to implement `AND`&`OR` (!make sure to init reference subarray beforehand) + /// src=1st operand, dst=2nd operand ApaAndOr(RowAddress,RowAddress), /// Fast-Parallel-Mode RowClone for cloning row-data within same subarray /// - corresponds to `AA`, basically copies from src-row -> row-buffer -> dst-row @@ -361,6 +397,7 @@ impl Display for Instruction { /// row-addresses impl Instruction { + /// TODO: rewrite this (eg `ApaNOT` and `ApaAndOr` take different amount of time !!, see Figure pub fn get_nr_memcycles(&self) -> u16 { match self { Instruction::FracOp(__) => 7, // see [2] ChapIII.A, (two cmd-cycles + five idle cycles) @@ -372,7 +409,7 @@ impl Instruction { } } - /// Success Rate off instructions depends on: + /// Success Rate of instructions depends on: /// - for AND/OR (`APA`): number of input operands (see [1] Chap6.3) /// - data pattern can't be taken into consideration here since its not known at compile-time /// - as well as temperature and DRAM speed rate @@ -387,8 +424,8 @@ impl Instruction { // include nr of operands and distance of rows to sense-amps into success-rate match self { - Instruction::ApaNOT( r1, r2) => { - let activated_rows = ARCHITECTURE.precomputed_simultaneous_row_activations.get(&(*r1,*r2)).expect("[ERR] Missing SRA for ({r1},{r2}"); + Instruction::ApaNOT( src, dst) => { + let activated_rows = ARCHITECTURE.precomputed_simultaneous_row_activations.get(&(*src,*dst)).expect("[ERR] Missing SRA for ({r1},{r2}"); let nr_operands = activated_rows.len(); // ASSUMPTION: it seems like "operands" referred to the number of activated rows (see [1] // taken from [1] Chap6.3 let success_rate_per_operandnr = HashMap::from([ @@ -400,20 +437,21 @@ impl Instruction { ]); // nr_operand_success_rate.get(&nr_operands); + let (src_array, dst_array) = (get_subarrayid_from_rowaddr(*src), get_subarrayid_from_rowaddr(*dst)); let furthest_src_row = activated_rows.iter() - .map(|row| (ARCHITECTURE.get_distance_of_row_to_sense_amps)(*row)) // RowDistanceToSenseAmps::Far; // TODO: get this + .map(|row| (ARCHITECTURE.get_distance_of_row_to_sense_amps)(*row, NeighboringSubarrayRelPosition::get_relative_position(src_array, dst_array))) // RowDistanceToSenseAmps::Far; // TODO: get this .max() .expect("[ERR] Activated rows were empty"); // NOTE: SRA is assumed to activate the same row-addresses in both subarrays let closest_dst_row = activated_rows.iter() - .map(|row| (ARCHITECTURE.get_distance_of_row_to_sense_amps)(*row)) // RowDistanceToSenseAmps::Far; // TODO: get this + .map(|row| (ARCHITECTURE.get_distance_of_row_to_sense_amps)(*row, NeighboringSubarrayRelPosition::get_relative_position(dst_array, src_array))) // RowDistanceToSenseAmps::Far; // TODO: get this .min() .expect("[ERR] Activated rows were empty"); let total_success_rate = *success_rate_per_operandnr.get(&nr_operands).expect("[ERR] {nr_operands} not =1|2|4|8|16, the given SRA function seems to not comply with this core assumption.") * success_rate_by_row_distance.get(&(furthest_src_row, closest_dst_row)).unwrap().0; - SuccessRate(total_success_rate) + SuccessRate::new(total_success_rate) }, - _ => SuccessRate(1.0), + _ => SuccessRate::new(1.0), } } } @@ -498,16 +536,71 @@ impl LogicOp { ]), } } + + /// taken from Figure7 (NOT) & Figure15 (AND/OR/NAND/NOR) in [1] (using Mean-dot) + /// - NOTE: since the values have been read from the diagram they might differ +-3% from the actually measured values + /// - remeasuring the values on own setup might be beneficial here ! + /// + /// In General: + /// - for AND/OR/NAND/NOR: "The success rate of bitwise operations consistently increases as the number of input operands increases." (see Observation 11 [1]) + /// - for NOT: seems to be the opposite + pub fn get_success_rate_by_nr_operands(&self) -> HashMap { + match self { + LogicOp::NOT => HashMap::from([ + // ((src,dst), success_rate) + (SupportedNrOperands::One, 98.5.into()), + (SupportedNrOperands::Two, 97.5.into()), + (SupportedNrOperands::Four, 97.0.into()), + (SupportedNrOperands::Eight, 28.0.into()), + (SupportedNrOperands::Sixteen, 10.0.into()), + (SupportedNrOperands::Thirtytwo, 8.0.into()), + ]), + LogicOp::AND => HashMap::from([ + // ((reference,compute), success_rate) + (SupportedNrOperands::Two, 86.0.into()), + (SupportedNrOperands::Four, 91.5.into()), + (SupportedNrOperands::Eight, 92.5.into()), + (SupportedNrOperands::Sixteen, 96.0.into()), + ]), + LogicOp::OR => HashMap::from([ + // ((reference,compute), success_rate) + // TODO + (SupportedNrOperands::Two, 97.5.into()), + (SupportedNrOperands::Four, 97.0.into()), + (SupportedNrOperands::Eight, 28.0.into()), + (SupportedNrOperands::Sixteen, 10.0.into()), + ]), + LogicOp::NAND => HashMap::from([ + // ((reference,compute), success_rate) + // TODO + (SupportedNrOperands::Two, 97.5.into()), + (SupportedNrOperands::Four, 97.0.into()), + (SupportedNrOperands::Eight, 28.0.into()), + (SupportedNrOperands::Sixteen, 10.0.into()), + ]), + LogicOp::NOR => HashMap::from([ + // ((reference,compute), success_rate) + // TODO + (SupportedNrOperands::Two, 97.5.into()), + (SupportedNrOperands::Four, 97.0.into()), + (SupportedNrOperands::Eight, 28.0.into()), + (SupportedNrOperands::Sixteen, 10.0.into()), + ]), + } + } } +/// Support operands numbers for AND/OR/NOT operations #[derive(Debug, EnumIter, Hash, PartialEq, Eq)] #[repr(u8)] // You can change the representation (e.g., u8, u16, etc.) pub enum SupportedNrOperands { + /// One operand only supported for `NOT` One = 1, Two = 2, Four = 4, Eight = 8, Sixteen = 16, + /// Only performed for `NOT` Thirtytwo = 32 } diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs index ab7cc65..6e4e343 100644 --- a/rs/src/fc_dram/compiler.rs +++ b/rs/src/fc_dram/compiler.rs @@ -7,7 +7,7 @@ //! - [`Compiler::compile()`] = main function - compiles given logic network for the given [`architecture`] into a [`program`] using some [`optimization`] use super::{ - architecture::{subarrayid_to_subarray_address, Instruction, LogicOp, SubarrayId, SupportedNrOperands, ARCHITECTURE, NR_SUBARRAYS, ROW_ID_BITMASK, SUBARRAY_ID_BITMASK}, optimization::optimize, CompilerSettings, Program, RowAddress + architecture::{subarrayid_to_subarray_address, Instruction, LogicOp, NeighboringSubarrayRelPosition, SubarrayId, SupportedNrOperands, ARCHITECTURE, NR_SUBARRAYS, ROW_ID_BITMASK, SUBARRAY_ID_BITMASK}, optimization::optimize, CompilerSettings, Program, RowAddress }; use eggmock::{Aoig, Id, NetworkWithBackwardEdges, Node, Signal}; use itertools::Itertools; @@ -104,16 +104,23 @@ impl Compiler { } /// Rather than making sure rows in which live values reside remain untouched, this approach chooses to select fixed RowAddress combinations for all support numbers of operands - /// - this function sets [`Compiler::compute_row_activations`] + /// - this function sets [`Compiler::compute_row_activations`] to use as compute rows /// - NOTE: this choice is expected to be applicable to row activations in all subarrays since the SRA work equivalently between subarrays /// - ASSUMPTION: there are no architectural differences btw subarrays /// + /// # Limitations + /// + /// There are several drawbacks of choosing fixed compute rows: + /// 1. *LogicOp* is not taken into consideration: different compute rows might (in theory) perform better for specific LogicOps (see [`LogicOp::get_success_rate_by_row_distance()`] which returns different SuccessRates based on the corresponding LogicOp) + /// 2. Compute Rows might perform better for the next subarray (+1) than for the previous (-1) subarray (choice of subarray determines which SenseAmps are used and hence the distance btw rows and SenseAmps) + /// + /// This choice aims to finding a good compromise btw those limitations. /// TODO: NEXT fn choose_compute_rows(&mut self) { for nr_operands in SupportedNrOperands::iter() { let possible_row_combis = ARCHITECTURE.sra_degree_to_rowaddress_combinations.get(&nr_operands).expect("Given Architecture doesn't support SRA of {nr_operands} operands"); possible_row_combis.iter().fold(possible_row_combis[0], |best_row_combi, next_row_combi| { - // TODO: compare row-combis based on success-rate and return better one of them + // compare row-combis based on avg success-rate and return the better one of them let success_rate_avg_next_row = 0; let success_rate_avg_best_row = 0; @@ -129,7 +136,7 @@ impl Compiler { /// - NOTE: nr of safe-space rows must be a power of 2 (x) between 1<=x<=64 /// - [ ] TODO: improve algo (in terms of space efficiency) /// - /// PROBLEM: can't test all possibilities since (for nrr safe-space rows=16) there are `math.comb(512,16) = 841141456821158064519401490400 = 8,4*10^{29}` of them + /// PROBLEM: can't test all possibilities since (for nr safe-space rows=16) there are `math.comb(512,16) = 841141456821158064519401490400 = 8,4*10^{29}` of them fn alloc_safe_space_rows(&mut self, nr_safe_space_rows: u8) { let supported_nr_safe_space_rows = vec!(1,2,4,8,16,32,64); @@ -331,11 +338,12 @@ impl Compiler { } } - /// Places the referenced `src_operands` into the corresponding `row_addresses` which are expected to be simultaneously executed - fn init_compute_subarray(&mut self, mut row_addresses: Vec, mut src_operands: Vec, logic_op: LogicOp) -> Vec { + /// Places the referenced `src_operands` into the corresponding `row_addresses` which are expected to be simultaneously executed using [`Instruction::RowCloneFPM`] + /// - NOTE: `rel_pos_of_ref_subarray` might affect placement of inputs in the future (eg to choose which input rows to choose for *input replication*) + fn init_compute_subarray(&mut self, mut row_addresses: Vec, mut src_operands: Vec, logic_op: LogicOp, rel_pos_of_ref_subarray: NeighboringSubarrayRelPosition) -> Vec { let mut instructions = vec!(); // if there are fewer src-operands than activated rows perform input replication - row_addresses.sort_by_key(|row| ((ARCHITECTURE.get_distance_of_row_to_sense_amps)(*row))); // replicate input that resides in row with lowest success-rate (=probably the row furthest away) + row_addresses.sort_by_key(|row| ((ARCHITECTURE.get_distance_of_row_to_sense_amps)(*row, rel_pos_of_ref_subarray.clone()))); // replicate input that resides in row with lowest success-rate (=probably the row furthest away) let nr_elements_to_extend = row_addresses.len() - src_operands.len(); if nr_elements_to_extend > 0 { let last_element = *src_operands.last().unwrap(); @@ -351,7 +359,7 @@ impl Compiler { if (src_operand_location & SUBARRAY_ID_BITMASK) == (row_addr & SUBARRAY_ID_BITMASK) { instructions.push(Instruction::RowCloneFPM(src_operand_location, row_addr, String::from("Move operand to compute row"))); } else { - instructions.push(Instruction::RowClonePSM(src_operand_location, row_addr)); + instructions.push(Instruction::RowClonePSM(src_operand_location, row_addr)); // TODO: remove this, since it's not usable in COTS DRAMs } } instructions @@ -486,8 +494,8 @@ impl Compiler { // TODO: extract NOT let logic_op = match language_op { // REMINDER: operand-nr is extracted by looking at nr of children beforehand - Aoig::And(_) | Aoig::And4(_) | Aoig::And8(_)| Aoig::And16(_)| Aoig::And32(_) => LogicOp::AND, - Aoig::Or(_) | Aoig::Or4(_) | Aoig::Or8(_) | Aoig::Or16(_) | Aoig::Or32(_) => LogicOp::OR, + Aoig::And(_) | Aoig::And4(_) | Aoig::And8(_)| Aoig::And16(_) => LogicOp::AND, + Aoig::Or(_) | Aoig::Or4(_) | Aoig::Or8(_) | Aoig::Or16(_) => LogicOp::OR, _ => panic!("candidate is expected to be a logic op"), }; @@ -514,7 +522,7 @@ impl Compiler { next_instructions.append(&mut instruction_init_ref_subarray); // 2. Place rows in the simultaneously activated rows in the compute subarray (init other rows with 0 for OR, 1 for AND and same value for NOT) - let mut instructions_init_comp_subarray = self.init_compute_subarray( activated_rows.clone(), src_operands, logic_op); + let mut instructions_init_comp_subarray = self.init_compute_subarray( activated_rows.clone(), src_operands, logic_op, NeighboringSubarrayRelPosition::get_relative_position(compute_subarray, ref_subarray)); next_instructions.append(&mut instructions_init_comp_subarray); // SKIPPED: 2.2 Check if issuing `APA(src1,src2)` would activate other rows which hold valid data diff --git a/rs/src/fc_dram/cost_estimation.rs b/rs/src/fc_dram/cost_estimation.rs index e23f86a..1ccf8d8 100644 --- a/rs/src/fc_dram/cost_estimation.rs +++ b/rs/src/fc_dram/cost_estimation.rs @@ -1,4 +1,4 @@ -//! Computation of Cost-Metrics (eg includes success rate) +//! Computation of Cost-Metrics (currently includes success rate and nr of mem-cycles) use eggmock::egg::{CostFunction, Id}; use eggmock::{AoigLanguage, egg::Language}; @@ -92,8 +92,8 @@ impl CostFunction for CompilingCostFunction { let op_cost = match *enode { AoigLanguage::False | AoigLanguage::Input(_) => { InstructionCost { - success_rate: SuccessRate(1.0), - mem_cycles: 1, // !=0 to ensure Cost-Function is *strictly monotonically increasing* + success_rate: SuccessRate::new(1.0), + mem_cycles: 1, // !=0 to ensure Cost-Function is *strictly monotonically increasing* (TODO: monotonicity isn"t needed here, right?") } }, AoigLanguage::And([node1, node2]) => { @@ -103,7 +103,7 @@ impl CostFunction for CompilingCostFunction { debug!("Cycles AND: {}", mem_cycles_and); let expected_success_rate = 1.0; // TODO: do empirical analysis of success-rate matrices of ops and come up with good values InstructionCost { - success_rate: SuccessRate(expected_success_rate), + success_rate: SuccessRate::new(expected_success_rate), mem_cycles: mem_cycles_and, } @@ -114,7 +114,7 @@ impl CostFunction for CompilingCostFunction { debug!("Cycles OR: {}", mem_cycles_or); let expected_success_rate = 1.0; // TODO: do empirical analysis of success-rate matrices of ops and come up with good values InstructionCost { - success_rate: SuccessRate(expected_success_rate), + success_rate: SuccessRate::new(expected_success_rate), mem_cycles: mem_cycles_or, } }, @@ -127,14 +127,14 @@ impl CostFunction for CompilingCostFunction { let expected_success_rate = 1.0; // TODO: do empirical analysis of success-rate matrices of ops and come up with good values InstructionCost { - success_rate: SuccessRate(expected_success_rate), + success_rate: SuccessRate::new(expected_success_rate), mem_cycles: mem_cycles_not, } }, _ => { // todo!(); InstructionCost { - success_rate: SuccessRate(1.0), + success_rate: SuccessRate::new(1.0), mem_cycles: 7, } // 0 // TODO: implement for nary-ops, eg using `.children()` From 209155c4d5cf1415a06a837b7c15f293139f1a19 Mon Sep 17 00:00:00 2001 From: alku662e Date: Wed, 16 Jul 2025 22:21:56 +0200 Subject: [PATCH 30/51] Computation of compute rows works. Now rewrite the rest to use them instead of the safe-space approach --- rs/src/fc_dram/architecture.rs | 9 ++--- rs/src/fc_dram/compiler.rs | 55 ++++++++++++++++++++++--------- rs/src/fc_dram/cost_estimation.rs | 5 ++- rs/src/fc_dram/mod.rs | 4 +++ 4 files changed, 51 insertions(+), 22 deletions(-) diff --git a/rs/src/fc_dram/architecture.rs b/rs/src/fc_dram/architecture.rs index 835e249..4e5d612 100644 --- a/rs/src/fc_dram/architecture.rs +++ b/rs/src/fc_dram/architecture.rs @@ -24,7 +24,7 @@ pub fn get_subarrayid_from_rowaddr(row: RowAddress) -> SubarrayId { } /// All Subarrays (except the ones at the edges) have two neighboring subarrays: one below (subarray_id+1) and one above (subarray_id-1) -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, EnumIter)] pub enum NeighboringSubarrayRelPosition { /// `subarray_id-1` Above, @@ -313,6 +313,7 @@ impl FCDRAMArchitecture { } /// Categories of distances of rows to sense-amops +/// - HIB (higher is better): that's why [`RowDistanceToSenseAmps::Close`] has the highest int value #[derive(Hash,Eq,PartialEq,PartialOrd,Ord)] pub enum RowDistanceToSenseAmps { Close=2, @@ -411,8 +412,8 @@ impl Instruction { /// Success Rate of instructions depends on: /// - for AND/OR (`APA`): number of input operands (see [1] Chap6.3) - /// - data pattern can't be taken into consideration here since its not known at compile-time - /// - as well as temperature and DRAM speed rate + /// - data pattern can't be taken into consideration here since its not known at compile-time (unknown at compile-time) + /// - as well as temperature and DRAM speed rate (ignored here) /// /// TAKEAWAY: `OR` is more reliable than `AND` pub fn get_success_rate_of_apa(&self, implemented_op: LogicOp) -> SuccessRate { @@ -591,7 +592,7 @@ impl LogicOp { } /// Support operands numbers for AND/OR/NOT operations -#[derive(Debug, EnumIter, Hash, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, EnumIter, Hash, PartialEq, Eq)] #[repr(u8)] // You can change the representation (e.g., u8, u16, etc.) pub enum SupportedNrOperands { /// One operand only supported for `NOT` diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs index 6e4e343..5e667f0 100644 --- a/rs/src/fc_dram/compiler.rs +++ b/rs/src/fc_dram/compiler.rs @@ -15,7 +15,7 @@ use log::debug; use priority_queue::PriorityQueue; use strum::IntoEnumIterator; use toml::{Table, Value}; -use std::{cmp::Ordering, collections::{HashMap, HashSet}, ffi::CStr, fmt::Debug, fs, path::Path}; +use std::{cmp::Ordering, collections::{HashMap, HashSet}, env::consts::ARCH, ffi::CStr, fmt::Debug, fs, path::Path}; /// Provides [`Compiler::compile()`] to compile a logic network into a [`Program`] pub struct Compiler { @@ -34,7 +34,7 @@ pub struct Compiler { /// For each nr of operands this field store the rowaddress-combination to issue to activate /// the desired nr of rows (the choice is made best on success-rate and maximizing the nr of rows which potentially can't be used for storage /// since they would activate rows where values could reside in) - compute_row_activations: HashMap + compute_row_activations: HashMap<(SupportedNrOperands, NeighboringSubarrayRelPosition), (RowAddress,RowAddress)> } impl Compiler { @@ -118,18 +118,42 @@ impl Compiler { /// TODO: NEXT fn choose_compute_rows(&mut self) { for nr_operands in SupportedNrOperands::iter() { - let possible_row_combis = ARCHITECTURE.sra_degree_to_rowaddress_combinations.get(&nr_operands).expect("Given Architecture doesn't support SRA of {nr_operands} operands"); - possible_row_combis.iter().fold(possible_row_combis[0], |best_row_combi, next_row_combi| { - // compare row-combis based on avg success-rate and return the better one of them - - let success_rate_avg_next_row = 0; - let success_rate_avg_best_row = 0; - // take avg success-rate over all supported LogicOps (since compute rows are the same irrespective of the issued op) - for logic_op in LogicOp::iter() { - // logic_op.get - } - todo!() - }); + for sense_amp_position in NeighboringSubarrayRelPosition::iter() { + + let possible_row_combis = ARCHITECTURE.sra_degree_to_rowaddress_combinations.get(&nr_operands).expect("Given Architecture doesn't support SRA of {nr_operands} operands"); + let best_row_combi = possible_row_combis.iter().fold(possible_row_combis[0], |best_row_combi, next_row_combi| { + // compare row-combis based on avg success-rate and return the better one of them + + let avg_distance_next_row_combi: u64 = { + let activated_rows = ARCHITECTURE.precomputed_simultaneous_row_activations.get(next_row_combi).unwrap(); + activated_rows.iter().map(|&row| { + // move subarray to 1st subarray (instead of 0th, which is at the edge and hence has no sense-amps above) + let subarray1_id = ((ROW_ID_BITMASK << 1) | 1) ^ ROW_ID_BITMASK; + let row = subarray1_id | row; // makes sure that `get_distance_of_row_to_sense_amps` doesn't panic since SRA returns subarray=0 by default (which is an edge subarray) + println!("{row:b}"); + (ARCHITECTURE.get_distance_of_row_to_sense_amps)(row, sense_amp_position) as u64 + }).sum() + }; + let avg_distance_best_row_combi: u64 = { + // move subarray to 1st subarray (instead of 0th, which is at the edge and hence has no sense-amps above) + let activated_rows = ARCHITECTURE.precomputed_simultaneous_row_activations.get(&best_row_combi).unwrap(); + activated_rows.iter().map(|&row| { + let subarray1_id = ((ROW_ID_BITMASK << 1) | 1) ^ ROW_ID_BITMASK; + let row = subarray1_id | row; // makes sure that `get_distance_of_row_to_sense_amps` doesn't panic since SRA returns subarray=0 by default (which is an edge subarray) + println!("{row:b}"); + (ARCHITECTURE.get_distance_of_row_to_sense_amps)(row, sense_amp_position) as u64 + }).sum() + }; + + if avg_distance_next_row_combi > avg_distance_best_row_combi { + *next_row_combi + } else { + best_row_combi + } + }); + + self.compute_row_activations.insert((nr_operands, sense_amp_position), best_row_combi); + } } } /// Allocates safe-space rows inside the DRAM-module @@ -268,7 +292,8 @@ impl Compiler { fs::write(config, config_in_toml.to_string()).expect("Sth went wrong here.."); } // TODO: allow reading these in from config-file - // self.choose_compute_rows(); // choose which rows will serve as compute rows (those are stored in `self.compute_row_activations` + self.choose_compute_rows(); // choose which rows will serve as compute rows (those are stored in `self.compute_row_activations` + println!("{:?}", self.compute_row_activations); self.place_constants(); // TODO: move into config-file too? // deactivate all combination which could activate safe-space rows diff --git a/rs/src/fc_dram/cost_estimation.rs b/rs/src/fc_dram/cost_estimation.rs index 1ccf8d8..a2ea27b 100644 --- a/rs/src/fc_dram/cost_estimation.rs +++ b/rs/src/fc_dram/cost_estimation.rs @@ -7,12 +7,10 @@ use std::cmp::Ordering; use std::ops; use std::rc::Rc; -use super::architecture::{FCDRAMArchitecture, LogicOp, SuccessRate}; +use super::architecture::{FCDRAMArchitecture, LogicOp, NeighboringSubarrayRelPosition, RowAddress, RowDistanceToSenseAmps, SuccessRate}; pub struct CompilingCostFunction{} -// impl StackedPartialGraph { } // Do I need this?? - /// A metric that estimates the runtime cost of executing an [`super::Instruction`] (in the program) #[derive(Debug, Clone, Copy, Eq)] pub struct InstructionCost { @@ -89,6 +87,7 @@ impl CostFunction for CompilingCostFunction { // TODO: rewrite to `.fold()` // get op-cost of executing `enode`: + // TODO: return higher success-rates for higher n in nary AND/OR let op_cost = match *enode { AoigLanguage::False | AoigLanguage::Input(_) => { InstructionCost { diff --git a/rs/src/fc_dram/mod.rs b/rs/src/fc_dram/mod.rs index fdaf0a3..749cd83 100644 --- a/rs/src/fc_dram/mod.rs +++ b/rs/src/fc_dram/mod.rs @@ -56,6 +56,10 @@ static REWRITE_RULES: LazyLock>> = LazyLock::new(| rewrite!("or-and-more-not"; "(or ?a ?b)" => "(! (and (! ?a) (! ?b)))"), // (De-Morgan) ! not checked whether this works rewrite!("and-same"; "(and ?a ?a)" => "?a"), rewrite!("not_not"; "(! (! ?a))" => "?a"), + rewrite!("and2_to_4"; "(and (and ?a ?b) (and ?c ?d))" => "(and4 ?a ?b ?c ?d)"), + // TODO: + // rewrite!("and4_to_8"; "(and (and ?a ?b) (and ?c ?d))" => "(and ?a ?b ?c ?d)"), + // rewrite!("and8_to_16"; "(and (and ?a ?b) (and ?c ?d))" => "(and ?a ?b ?c ?d)"), // rewrite!("maj_1"; "(maj ?a ?a ?b)" => "?a"), // rewrite!("maj_2"; "(maj ?a (! ?a) ?b)" => "?b"), // rewrite!("associativity"; "(maj ?a ?b (maj ?c ?b ?d))" => "(maj ?d ?b (maj ?c ?b ?a))"), From 510f61b946b7b468966211de44b2351138a33ab1 Mon Sep 17 00:00:00 2001 From: alku662e Date: Thu, 17 Jul 2025 13:48:12 +0200 Subject: [PATCH 31/51] Remove safe-space rows. Write function for assigning subarray id to every Signal (determining in which subarray each Signal will end up) --- rs/src/fc_dram/compiler.rs | 519 ++++++++++++++++++------------------- rs/src/fc_dram/mod.rs | 20 +- 2 files changed, 267 insertions(+), 272 deletions(-) diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs index 5e667f0..9285c83 100644 --- a/rs/src/fc_dram/compiler.rs +++ b/rs/src/fc_dram/compiler.rs @@ -23,18 +23,15 @@ pub struct Compiler { settings: CompilerSettings, /// Stores the state of all rows at each compilation step comp_state: CompilationState, - /// These rows are reserved in EVERY subarray for storing intermediate results (ignore higher bits of these RowAddress) - /// - NOTE: only initialized when compilation starts - /// - NOTE2: all safe-space rows are treated equally, since no computation is performed on them (and hence distance to sense-amps doesn't really matter) - /// - /// DEPRECATED: use `blocked_row_combinations` instead (all other rows are safe-space rows) - safe_space_rows: Vec, - /// RowCombinations which are not allowed to be issued via `APA` since they activate rows within the safe-space - blocked_row_combinations: HashSet<(RowAddress,RowAddress)>, /// For each nr of operands this field store the rowaddress-combination to issue to activate /// the desired nr of rows (the choice is made best on success-rate and maximizing the nr of rows which potentially can't be used for storage /// since they would activate rows where values could reside in) - compute_row_activations: HashMap<(SupportedNrOperands, NeighboringSubarrayRelPosition), (RowAddress,RowAddress)> + /// - This is a Design Decision taken: compute rows are rows reserved for performing computations, all other rows are usable as "Register" + compute_row_activations: HashMap<(SupportedNrOperands, NeighboringSubarrayRelPosition), (RowAddress,RowAddress)>, + /// Stores all subarrays in which the signal has to be available + signal_to_subarrayids: HashMap>, + /// see [`Self::get_all_noninverted_src_signals`]. First `Vec`=noninverted src signals, 2nd `Vec`=inverted src signals + computed_noninverted_scr_signals: HashMap,Vec)>, } impl Compiler { @@ -42,9 +39,9 @@ impl Compiler { Compiler{ settings, comp_state: CompilationState::new( HashMap::new() ), - safe_space_rows: vec!(), - blocked_row_combinations: HashSet::new(), compute_row_activations: HashMap::new(), + signal_to_subarrayids: HashMap::new(), + computed_noninverted_scr_signals: HashMap::new(), } } @@ -156,50 +153,25 @@ impl Compiler { } } } - /// Allocates safe-space rows inside the DRAM-module - /// - NOTE: nr of safe-space rows must be a power of 2 (x) between 1<=x<=64 - /// - [ ] TODO: improve algo (in terms of space efficiency) - /// - /// PROBLEM: can't test all possibilities since (for nr safe-space rows=16) there are `math.comb(512,16) = 841141456821158064519401490400 = 8,4*10^{29}` of them - fn alloc_safe_space_rows(&mut self, nr_safe_space_rows: u8) { - - let supported_nr_safe_space_rows = vec!(1,2,4,8,16,32,64); - if !supported_nr_safe_space_rows.contains(&nr_safe_space_rows) { - panic!("Only the following nr of rows are supported to be activated: {:?}, given: {}", supported_nr_safe_space_rows, nr_safe_space_rows); - } - - // TODO: this is just a quick&dirty implementation. Solving this problem of finding optimal safe-space rows is probably worth solving for every DRAM-module once - self.safe_space_rows = { - - // choose any row-addr combi activating exactly `nr_safe_space_rows` and choose all that activated rows to be safe-space rows - let chosen_row_addr_combi = ARCHITECTURE.sra_degree_to_rowaddress_combinations - .get(&SupportedNrOperands::try_from(nr_safe_space_rows).unwrap()).unwrap() - .first().unwrap(); // just take the first row-combi that activates `nr_safe_space_rows` - ARCHITECTURE.precomputed_simultaneous_row_activations.get(chosen_row_addr_combi).unwrap() - .iter().map(|row| row & ROW_ID_BITMASK) // reset subarray-id to all 0s - .collect() - }; - - // TODO: if any other rows are unusable as a consequence (bc activating them would activate another safe-space-row anytime) include those unusable rows into safe-space (else they're competely useless) - } /// Places (commonly used) constants in safe-space rows /// - ! all safe-space rows are assumed to be empty when placing constants (constans are the first things to be placed into safe-space rows) /// - currently placed constants: all 0s and all 1s (for [`Compiler::init_reference_subarray`] fn place_constants(&mut self) { + todo!(); // place constants in EVERY subarray - for subarray in 0..NR_SUBARRAYS { - let mut safe_space = self.safe_space_rows.iter(); - - let row_address_0 = subarrayid_to_subarray_address(subarray) | safe_space.next().unwrap(); - self.comp_state.constant_values.insert(0, row_address_0); - let row_address_1 = subarrayid_to_subarray_address(subarray) | safe_space.next().unwrap(); - self.comp_state.constant_values.insert(1, row_address_1); - // does it make sense to store any other constants in safe-space?? - - self.comp_state.dram_state.insert(row_address_0, RowState { is_compute_row: false, live_value: None, constant: Some(0)} ); - self.comp_state.dram_state.insert(row_address_1, RowState { is_compute_row: false, live_value: None, constant: Some(1)} ); - } + // for subarray in 0..NR_SUBARRAYS { + // let mut safe_space = self.safe_space_rows.iter(); + // + // let row_address_0 = subarrayid_to_subarray_address(subarray) | safe_space.next().unwrap(); + // self.comp_state.constant_values.insert(0, row_address_0); + // let row_address_1 = subarrayid_to_subarray_address(subarray) | safe_space.next().unwrap(); + // self.comp_state.constant_values.insert(1, row_address_1); + // // does it make sense to store any other constants in safe-space?? + // + // self.comp_state.dram_state.insert(row_address_0, RowState { is_compute_row: false, live_value: None, constant: Some(0)} ); + // self.comp_state.dram_state.insert(row_address_1, RowState { is_compute_row: false, live_value: None, constant: Some(1)} ); + // } } /// Place inputs onto appropriate rows @@ -207,17 +179,18 @@ impl Compiler { /// TODO: algo which looks ahead which input-row-placement might be optimal (->reduce nr of move-ops to move intermediate results around & keep inputs close to sense-amps /// TODO: parititon logic network into subgraphs s.t. subgraphs can be mapped onto subarrays reducing nr of needed moves fn place_inputs(&mut self, mut inputs: Vec) { - // naive implementation: start placing input on consecutive safe-space rows (continuing with next subarray once the current subarray has no more free safe-space rows) - // TODO: change input operand placement to be more optimal (->taking into account future use of those operands) - while let Some(next_input) = inputs.pop() { - // NOTE: some safe-space rows are reserved for constants - let free_safespace_row = self.get_next_free_safespace_row(None); - - let initial_row_state = RowState { is_compute_row: false, live_value: Some(Signal::new(next_input, false)), constant: None }; - let initial_value_state = ValueState { is_computed: true, row_location: Some(free_safespace_row) }; - self.comp_state.dram_state.insert(free_safespace_row, initial_row_state); - self.comp_state.value_states.insert(Signal::new(next_input, false), initial_value_state); - } + todo!(); + // // naive implementation: start placing input on consecutive safe-space rows (continuing with next subarray once the current subarray has no more free safe-space rows) + // // TODO: change input operand placement to be more optimal (->taking into account future use of those operands) + // while let Some(next_input) = inputs.pop() { + // // NOTE: some safe-space rows are reserved for constants + // let free_safespace_row = self.get_next_free_safespace_row(None); + // + // let initial_row_state = RowState { is_compute_row: false, live_value: Some(Signal::new(next_input, false)), constant: None }; + // let initial_value_state = ValueState { is_computed: true, row_location: Some(free_safespace_row) }; + // self.comp_state.dram_state.insert(free_safespace_row, initial_row_state); + // self.comp_state.value_states.insert(Signal::new(next_input, false), initial_value_state); + // } } /// Initialize candidates with all nodes that are computable @@ -255,56 +228,69 @@ impl Compiler { .collect() } - /// Initialize compilation state: mark unsuable rows (eg safe-space rows), place input operands + /// Initialize compilation state: choose compute rows (by setting [`Self::compute_row_activations`], assign subarray-ids to each NodeId, return code to place input operands in `program` fn init_comp_state(&mut self, network: &impl NetworkWithBackwardEdges, program: &mut Program) { let config_file = unsafe { CStr::from_ptr(self.settings.config_file) }.to_str().unwrap(); let config = Path::new(config_file); println!("{:?}", config); - // 0.1 Allocate safe-space rows (for storing intermediate values and constants 0s&1s) safely + // 0.1 Allocate compute rows: rows reserved for performing computations, all other rows are usable as "Register" if config.is_file() { - let content = fs::read_to_string(config).unwrap(); - let value = content.parse::().unwrap(); // Parse into generic TOML Value :contentReference[oaicite:1]{index=1} + // let content = fs::read_to_string(config).unwrap(); + // let value = content.parse::().unwrap(); // Parse into generic TOML Value :contentReference[oaicite:1]{index=1} + // + // if let Some(arr) = value.get("safe_space_rows").and_then(|v| v.as_array()) { + // println!("Found array of length {}", arr.len()); + // self.safe_space_rows = arr.iter().map(|v| { + // v.as_integer().expect("Expected integer") as u64 + // }).collect(); + // } else { + // panic!("Config file doesn't contain value for safe-space-rows"); + // } + + // TODO: read&write this to&from config-file (added manually here in the meantiem) + self.compute_row_activations = HashMap::from([ + ((SupportedNrOperands::One, NeighboringSubarrayRelPosition::Above), (8, 8)), + ((SupportedNrOperands::One, NeighboringSubarrayRelPosition::Below), (303, 303)), + ((SupportedNrOperands::Two, NeighboringSubarrayRelPosition::Above), (15, 79)), + ((SupportedNrOperands::Two, NeighboringSubarrayRelPosition::Below), (293, 357)), + ((SupportedNrOperands::Four, NeighboringSubarrayRelPosition::Above), (60, 42)), + ((SupportedNrOperands::Four, NeighboringSubarrayRelPosition::Below), (472, 412)), + ((SupportedNrOperands::Eight, NeighboringSubarrayRelPosition::Above), (42, 15)), + ((SupportedNrOperands::Eight, NeighboringSubarrayRelPosition::Below), (203, 283)), + ((SupportedNrOperands::Sixteen, NeighboringSubarrayRelPosition::Above), (32, 83)), + ((SupportedNrOperands::Sixteen, NeighboringSubarrayRelPosition::Below), (470, 252)), + ((SupportedNrOperands::Thirtytwo, NeighboringSubarrayRelPosition::Above), (307, 28)), + ((SupportedNrOperands::Thirtytwo, NeighboringSubarrayRelPosition::Below), (149, 318)), + ]); - if let Some(arr) = value.get("safe_space_rows").and_then(|v| v.as_array()) { - println!("Found array of length {}", arr.len()); - self.safe_space_rows = arr.iter().map(|v| { - v.as_integer().expect("Expected integer") as u64 - }).collect(); - } else { - panic!("Config file doesn't contain value for safe-space-rows"); - } } else { // debug!("Compiling {:?}", network); - self.alloc_safe_space_rows(self.settings.safe_space_rows_per_subarray); + self.choose_compute_rows(); // choose which rows will serve as compute rows (those are stored in `self.compute_row_activations` + println!("{:?}", self.compute_row_activations); // self.place_constants(); - let safe_space_rows_toml = Value::Array(self.safe_space_rows.iter().map( - |row| Value::Integer(*row as i64) - ).collect()); - let config_in_toml = toml::toml! { - safe_space_rows = safe_space_rows_toml - }; - fs::write(config, config_in_toml.to_string()).expect("Sth went wrong here.."); - } - // TODO: allow reading these in from config-file - self.choose_compute_rows(); // choose which rows will serve as compute rows (those are stored in `self.compute_row_activations` - println!("{:?}", self.compute_row_activations); - self.place_constants(); // TODO: move into config-file too? - - // deactivate all combination which could activate safe-space rows - for row in self.safe_space_rows.iter() { - for row_combi in ARCHITECTURE.row_activated_by_rowaddress_tuple.get(row).unwrap() { - self.blocked_row_combinations.insert(*row_combi); - } + // TODO: write chosen compute rows to config-file + // let safe_space_rows_toml = Value::Array(self.safe_space_rows.iter().map( + // |row| Value::Integer(*row as i64) + // ).collect()); + // let config_in_toml = toml::toml! { + // safe_space_rows = safe_space_rows_toml + // }; + // fs::write(config, config_in_toml.to_string()).expect("Sth went wrong here.."); } - // 0.2 Place all inputs and mark them as being live - // TODO: add additional pass over graph: think about which inputs are needed in which subarray + // NEXT: 0.2 Group operands by subarray (ensure all operands are placed in the right subarray) + self.assign_signals_to_subarrays(network); // sets `self.signal_to_subarrayids` + + // 0.3 Place all constants and inputs and mark the inputs as being live + self.place_constants(); // constants are placed in each subarray + todo!("NEXT"); + self.place_inputs( network.leaves().collect::>() ); // place input-operands into rows debug!("Placed inputs {:?} in {:?}", network.leaves().collect::>(), self.comp_state.value_states); // 0.3 Setup: store all network-nodes yet to be compiled @@ -330,7 +316,101 @@ impl Compiler { }).collect(); } - /// Returns instructions to initialize `ref_rows` in reference-subarray for corresponding logic-op + /// Assigns signals to subarrays and through this determines placement of those signal in the DRAM module + /// - sets [`Self::signal_to_subarrayids`] + /// + /// # Assumptions + /// + /// - assumes `network` is acyclic ! + /// + /// # TODO + /// + /// - make sure nr of signals placed in a subarray is <= nr of available rows (without compute rows) + /// - think about merging subarray assignment (s.t. several outputs end up in same subarray, so that inputs can be reused among them) + fn assign_signals_to_subarrays(&mut self, network: &impl NetworkWithBackwardEdges) { + // 1. determine all signals which go into outputs without being negated (and hence can be stored in same subarray) + // - also store signals which do need to be negated (and process them in the next step) + // - TODO: continue graph traversal with src-operands of the outputs (until primary inputs are reached) + let mut subarray_id = 1; // start with 1 since edge subarrays cant be used as compute subarrays + for output in network.outputs() { + self.signal_to_subarrayids.insert(output, vec!(subarray_id)); // determine (virtual) subarray in which output will reside + let (noninverted_src_signals, inverted_src_signals) = self.get_all_noninverted_src_signals(output, network); + + println!("{:?}", noninverted_src_signals.clone()); + // all directly (might in theory) reside in the same subarray as `output` (since no NOTS are inbtw which locate them to a neighboring subarray) + for connected_signal in noninverted_src_signals { + self.signal_to_subarrayids.entry(connected_signal).or_default().push(subarray_id); // determine (virtual) subarray in which output will reside + } + + // place inverted signals in neighboring subarray + let neighboring_subarray = subarray_id - 1; // place signals that are inverted odd number of times in Above subarray (arbitrary decision, epxloring whether this makes a difference might be explored in future) + let mut unvisited_signals_in_same_subarray: Vec = vec!(); // inverting even nr of times leads to signals being placed in same subarray + let mut unvisited_signals_in_neighboring_subarray = inverted_src_signals; + while !unvisited_signals_in_same_subarray.is_empty() || !unvisited_signals_in_neighboring_subarray.is_empty() { + // println!("Same subarray: {:?}", unvisited_signals_in_same_subarray); + // println!("Neighboring subarray: {:?}", unvisited_signals_in_neighboring_subarray); + if let Some(signal_neighboring_subarray) = unvisited_signals_in_neighboring_subarray.pop() { + + self.signal_to_subarrayids.entry(signal_neighboring_subarray).or_default().push(neighboring_subarray); + // these are placed in the Above subarray (arbitrary decision, epxloring whether this makes a difference might be explored in future) + let (signals_neighboring_subarray_of_output, mut signals_same_subarray_as_output) = self.get_all_noninverted_src_signals(signal_neighboring_subarray, network); + for inverted_signal in signals_neighboring_subarray_of_output { + self.signal_to_subarrayids.entry(inverted_signal).or_default().push(neighboring_subarray); + } + + unvisited_signals_in_same_subarray.append(&mut signals_same_subarray_as_output); + } + + if let Some(signal_same_subarray) = unvisited_signals_in_same_subarray.pop() { + + self.signal_to_subarrayids.entry(signal_same_subarray).or_default().push(subarray_id); + // signals inverted even nr of times are placed in the same subarray as the `output` Signal + let (signals_same_subarray_of_output, mut signals_neighboring_subarray_as_output) = self.get_all_noninverted_src_signals(signal_same_subarray, network); + for signal in signals_same_subarray_of_output { + self.signal_to_subarrayids.entry(signal).or_default().push(subarray_id); + } + + unvisited_signals_in_neighboring_subarray.append(&mut signals_neighboring_subarray_as_output); + } + } + + // for the beginning place all outputs in different subarrays. A 2nd pass may optimize/merge subarrays later on + subarray_id += 2; // maybe +=2 to account for negated operands being stored in neighboring subarray? (TODO: test with some example networks) + } + + + debug!("{:?}", self.signal_to_subarrayids); + } + + /// Returns all src signals which are not inverted. These are exactly those signals that can be placed in the same subarray as. + /// + /// # Returns + /// + /// Tuple of + /// 1. Vector of src Signals that are **not** inverted + /// 2. Vector of src Signals that are indeed inverted (need to be processed further, only first inverted signal is returned for a subtree) + fn get_all_noninverted_src_signals(&mut self, signal: Signal, network: &impl NetworkWithBackwardEdges) -> (Vec, Vec) { + let signal_node = network.node(signal.node_id()); + + let mut noninverted_src_signals = vec!(); + let mut inverted_src_signals = vec!(); + let mut stack_unvisited_noninverted_src_operands = Vec::from(signal_node.inputs()); + + while let Some(src_operand) = stack_unvisited_noninverted_src_operands.pop() { + if src_operand.is_inverted() { + inverted_src_signals.push(src_operand); // store subarray to which this input has to be placed to as a neighbor, further processing elsewhere + } else { + noninverted_src_signals.push(src_operand); + let src_operand_node = network.node(src_operand.node_id()); + stack_unvisited_noninverted_src_operands.append(&mut Vec::from(src_operand_node.inputs())); + } + } + + self.computed_noninverted_scr_signals.insert(signal, (inverted_src_signals.clone(), noninverted_src_signals.clone())); // to save (possible) recomputation next time + (noninverted_src_signals, inverted_src_signals) + } + + /// Returns instructions to initialize all given `ref_rows` in reference-subarray for corresponding logic-op /// - NOTE: [1] doesn't describe how the 0s/1s get into the reference subarray. We use `RowCloneFPM` ([4])) to copy the constant 0s/1s from the reserved safe-space row into the corresponding reference subarray row fn init_reference_subarray(&self, mut ref_rows: Vec, logic_op: LogicOp) -> Vec { match logic_op { @@ -390,54 +470,6 @@ impl Compiler { instructions } - /// Return sequence of instructions to provide negated inputs (if there are any among `src_operands`)). - /// - /// NOTE: Some inputs may be needed in a negated form by the candidates. To start execution those - /// input operands have to be available with their negated form. - /// TODO: NEXT - fn init_negated_src_operands(&mut self, src_operands: Vec, network: &impl NetworkWithBackwardEdges) -> Vec { - let mut instructions = vec!(); - let mut negated_inputs: HashSet = HashSet::new(); // inputs which are required in their negated form - let negated_src_operands: Vec = src_operands.iter() - .filter(|sig| sig.is_inverted()) - .copied() // map ref to owned val - .collect(); - negated_inputs.extend(negated_src_operands.iter()); - - for neg_in in negated_inputs { - if self.comp_state.value_states.contains_key(&neg_in) { - // negated signal is already available - continue; - } else { - // else make negated-signal available - let unnegated_signal = neg_in.invert(); - let origin_unneg_row= self.comp_state.value_states.get(&unnegated_signal).expect("Original version of this value is not available??") - .row_location.expect("Original version of this value is not live??"); - let dst_row = self.get_next_free_safespace_row(None); - - // TODO: negate val and move value in selected safe-space row - let selected_sra = ARCHITECTURE.sra_degree_to_rowaddress_combinations.get(&SupportedNrOperands::try_from(1).unwrap()).unwrap().iter().find(|row| ! self.safe_space_rows.contains(&row.0)) // just select the first available compute-row - .expect("It's assumed that issuing APA(row,row) for same row activates only that row"); - let origin_subarray_bitmask = origin_unneg_row & SUBARRAY_ID_BITMASK; - let comp_row = origin_subarray_bitmask | (ROW_ID_BITMASK & selected_sra.0); - let (_, ref_array) = self.select_compute_and_ref_subarray(vec!(comp_row)); - let result_row = (ROW_ID_BITMASK & selected_sra.0) & ref_array; - - let move_to_comp_row = Instruction::RowCloneFPM(origin_unneg_row, comp_row, String::from("Move row to safe space")); - let not = Instruction::ApaNOT(comp_row, result_row); - let move_to_safespace= Instruction::RowCloneFPM(result_row, dst_row, String::from("Move row to safe space")); - - instructions.push(move_to_comp_row); - instructions.push(not); - instructions.push(move_to_safespace); - - self.comp_state.dram_state.insert(dst_row, RowState { is_compute_row: false, live_value: Some(neg_in), constant: None }); - self.comp_state.value_states.insert(neg_in, ValueState { is_computed: true, row_location: Some(dst_row) }); - } - } - - instructions - } /// Return id of subarray to use for computation and reference (compute_subarrayid, reference_subarrayid) /// - based on location of input rows AND current compilation state @@ -460,120 +492,93 @@ impl Compiler { (mostly_used_subarray_id, selected_ref_subarray) } - /// Return next free safe-space row - /// - use `preferred_subarray` if there is a specific subarray you would like to be that - /// safe-sapce row from. Else just the next free safe-space row will be chosen - /// - NOTE: this is not guaranteed to be fulfilled ! - fn get_next_free_safespace_row(&self, preferred_subarray: Option) -> RowAddress { - - let subarray_order = if let Some(subarray) = preferred_subarray { - // start search with `preferred_subarray` if it's supplied - let mut first_half = (0..subarray).collect::>(); - first_half.extend(subarray+1..NR_SUBARRAYS); - first_half - // subarray_original_order.filter(|x| *x != subarray).into_iter() - } else { - // else just iterate in natural order - (0..NR_SUBARRAYS).collect() - }; - - for subarray in subarray_order { - for row in &self.safe_space_rows { - let row_addr = row | subarrayid_to_subarray_address(subarray); - if self.comp_state.dram_state.get(&row_addr).unwrap_or(&RowState::default()).live_value.is_none() { // NOTE: safe-space rows are inserted lazily into `dram_state` - return row_addr; - } - } - } - panic!("OOM: No more available safe-space rows"); - } - /// Returns Instructions to execute given `next_candidate` /// - [ ] make sure that operation to be executed on those rows won't simultaneously activate other rows holding valid data which will be used by future operations /// TODO: NEXT fn execute_next_instruction(&mut self, next_candidate: &Signal, network: &impl NetworkWithBackwardEdges) -> Vec { - let mut next_instructions = vec!(); - - debug!("Executing candidate {:?}", next_candidate); - let src_operands: Vec = network.node(next_candidate.node_id()).inputs().to_vec(); - let mut init_neg_operands = self.init_negated_src_operands(src_operands.clone(), network); // TODO NEXT: make sure all required negated operands are available - next_instructions.append(&mut init_neg_operands); - - let nr_operands = src_operands.len(); // use to select SRA to activate - let nr_rows = nr_operands.next_power_of_two(); - - let src_rows: Vec = src_operands.iter() - .map(|src_operand| { - - debug!("src: {src_operand:?}"); - self.comp_state.value_states.get(src_operand) - .unwrap() - .row_location - .expect("Sth went wrong... if the src-operand is not in a row, then this candidate shouldn't have been added to the list of candidates") - }) - .collect(); - - let (compute_subarray, ref_subarray) = self.select_compute_and_ref_subarray(src_rows); - let language_op = network.node(next_candidate.node_id()); - - // TODO: extract NOT - let logic_op = match language_op { - // REMINDER: operand-nr is extracted by looking at nr of children beforehand - Aoig::And(_) | Aoig::And4(_) | Aoig::And8(_)| Aoig::And16(_) => LogicOp::AND, - Aoig::Or(_) | Aoig::Or4(_) | Aoig::Or8(_) | Aoig::Or16(_) => LogicOp::OR, - _ => panic!("candidate is expected to be a logic op"), - }; - - // 0. Select an SRA (=row-address tuple) for the selected subarray based on highest success-rate - // TODO (possible improvement): input replication by choosing SRA with more activated rows than operands and duplicating operands which are in far-away rows into several rows?) - let row_combi = ARCHITECTURE.sra_degree_to_rowaddress_combinations.get(&SupportedNrOperands::try_from(nr_rows as u8).unwrap()).unwrap() - // sort by success-rate - using eg `BTreeMap` turned out to impose a too large runtime overhead - .iter() - .find(|combi| !self.blocked_row_combinations.contains(combi)) // choose first block RowAddr-combination - .expect("No SRA for nr-rows={nr_rows}"); - - let activated_rows = ARCHITECTURE.precomputed_simultaneous_row_activations.get(row_combi).unwrap(); - let ref_rows: Vec = activated_rows.iter() - .map(|row| row & (subarrayid_to_subarray_address(ref_subarray))) // make activated rows refer to the right subarray - .collect(); - let comp_rows: Vec = activated_rows.iter() - .map(|row| row & (subarrayid_to_subarray_address(compute_subarray))) // make activated rows refer to the right subarray - .collect(); - - - // 1. Initialize rows in ref-subarray (if executing AND/OR) - // - TODO: read nr of frac-ops to issue from compiler-settings - let mut instruction_init_ref_subarray = self.init_reference_subarray(ref_rows.clone(), logic_op); - next_instructions.append(&mut instruction_init_ref_subarray); - - // 2. Place rows in the simultaneously activated rows in the compute subarray (init other rows with 0 for OR, 1 for AND and same value for NOT) - let mut instructions_init_comp_subarray = self.init_compute_subarray( activated_rows.clone(), src_operands, logic_op, NeighboringSubarrayRelPosition::get_relative_position(compute_subarray, ref_subarray)); - next_instructions.append(&mut instructions_init_comp_subarray); - - // SKIPPED: 2.2 Check if issuing `APA(src1,src2)` would activate other rows which hold valid data - // - only necessary once we find optimization to not write values to safe-space but reuse them diectly - // 2.2.1 if yes: move data to other rows for performing this op - - // 3. Issue actual operation - let mut actual_op = match logic_op { - LogicOp::NOT => vec!(Instruction::ApaNOT(row_combi.0, row_combi.1)), - LogicOp::AND | LogicOp::OR => vec!(Instruction::ApaAndOr(row_combi.0, row_combi.1)), - LogicOp::NAND | LogicOp::NOR => vec!(Instruction::ApaAndOr(row_combi.0, row_combi.1), Instruction::ApaNOT(row_combi.0, row_combi.1)), // TODO: or the othyer way around (1st NOT)? - }; - - next_instructions.append(&mut actual_op); - for (&comp_row, &ref_row) in comp_rows.iter().zip(ref_rows.iter()) { - self.comp_state.dram_state.insert(comp_row, RowState { is_compute_row: true, live_value: Some(*next_candidate), constant: None }); - self.comp_state.value_states.insert(*next_candidate, ValueState { is_computed: true, row_location: Some(comp_row) }); - // ref subarray holds negated value afterwarsd - self.comp_state.dram_state.insert(ref_row, RowState { is_compute_row: true, live_value: Some(next_candidate.invert()), constant: None }); - self.comp_state.value_states.insert(next_candidate.invert(), ValueState { is_computed: true, row_location: Some(ref_row) }); - } - - // 4. Copy result data from dst NEAREST to the sense-amps into a safe-space row and update `value_state` - // TODO LAST: possible improvement - error correction over all dst-rows (eg majority-vote for each bit, votes weighted by distance to sense-amps?) - - next_instructions + todo!(); + // let mut next_instructions = vec!(); + // + // debug!("Executing candidate {:?}", next_candidate); + // let src_operands: Vec = network.node(next_candidate.node_id()).inputs().to_vec(); + // let mut init_neg_operands = self.init_negated_src_operands(src_operands.clone(), network); // TODO NEXT: make sure all required negated operands are available + // next_instructions.append(&mut init_neg_operands); + // + // let nr_operands = src_operands.len(); // use to select SRA to activate + // let nr_rows = nr_operands.next_power_of_two(); + // + // let src_rows: Vec = src_operands.iter() + // .map(|src_operand| { + // + // debug!("src: {src_operand:?}"); + // self.comp_state.value_states.get(src_operand) + // .unwrap() + // .row_location + // .expect("Sth went wrong... if the src-operand is not in a row, then this candidate shouldn't have been added to the list of candidates") + // }) + // .collect(); + // + // let (compute_subarray, ref_subarray) = self.select_compute_and_ref_subarray(src_rows); + // let language_op = network.node(next_candidate.node_id()); + // + // // TODO: extract NOT + // let logic_op = match language_op { + // // REMINDER: operand-nr is extracted by looking at nr of children beforehand + // Aoig::And(_) | Aoig::And4(_) | Aoig::And8(_)| Aoig::And16(_) => LogicOp::AND, + // Aoig::Or(_) | Aoig::Or4(_) | Aoig::Or8(_) | Aoig::Or16(_) => LogicOp::OR, + // _ => panic!("candidate is expected to be a logic op"), + // }; + // + // // 0. Select an SRA (=row-address tuple) for the selected subarray based on highest success-rate + // // TODO (possible improvement): input replication by choosing SRA with more activated rows than operands and duplicating operands which are in far-away rows into several rows?) + // let row_combi = ARCHITECTURE.sra_degree_to_rowaddress_combinations.get(&SupportedNrOperands::try_from(nr_rows as u8).unwrap()).unwrap() + // // sort by success-rate - using eg `BTreeMap` turned out to impose a too large runtime overhead + // .iter() + // .find(|combi| !self.blocked_row_combinations.contains(combi)) // choose first block RowAddr-combination + // .expect("No SRA for nr-rows={nr_rows}"); + // + // let activated_rows = ARCHITECTURE.precomputed_simultaneous_row_activations.get(row_combi).unwrap(); + // let ref_rows: Vec = activated_rows.iter() + // .map(|row| row & (subarrayid_to_subarray_address(ref_subarray))) // make activated rows refer to the right subarray + // .collect(); + // let comp_rows: Vec = activated_rows.iter() + // .map(|row| row & (subarrayid_to_subarray_address(compute_subarray))) // make activated rows refer to the right subarray + // .collect(); + // + // + // // 1. Initialize rows in ref-subarray (if executing AND/OR) + // // - TODO: read nr of frac-ops to issue from compiler-settings + // let mut instruction_init_ref_subarray = self.init_reference_subarray(ref_rows.clone(), logic_op); + // next_instructions.append(&mut instruction_init_ref_subarray); + // + // // 2. Place rows in the simultaneously activated rows in the compute subarray (init other rows with 0 for OR, 1 for AND and same value for NOT) + // let mut instructions_init_comp_subarray = self.init_compute_subarray( activated_rows.clone(), src_operands, logic_op, NeighboringSubarrayRelPosition::get_relative_position(compute_subarray, ref_subarray)); + // next_instructions.append(&mut instructions_init_comp_subarray); + // + // // SKIPPED: 2.2 Check if issuing `APA(src1,src2)` would activate other rows which hold valid data + // // - only necessary once we find optimization to not write values to safe-space but reuse them diectly + // // 2.2.1 if yes: move data to other rows for performing this op + // + // // 3. Issue actual operation + // let mut actual_op = match logic_op { + // LogicOp::NOT => vec!(Instruction::ApaNOT(row_combi.0, row_combi.1)), + // LogicOp::AND | LogicOp::OR => vec!(Instruction::ApaAndOr(row_combi.0, row_combi.1)), + // LogicOp::NAND | LogicOp::NOR => vec!(Instruction::ApaAndOr(row_combi.0, row_combi.1), Instruction::ApaNOT(row_combi.0, row_combi.1)), // TODO: or the othyer way around (1st NOT)? + // }; + // + // next_instructions.append(&mut actual_op); + // for (&comp_row, &ref_row) in comp_rows.iter().zip(ref_rows.iter()) { + // self.comp_state.dram_state.insert(comp_row, RowState { is_compute_row: true, live_value: Some(*next_candidate), constant: None }); + // self.comp_state.value_states.insert(*next_candidate, ValueState { is_computed: true, row_location: Some(comp_row) }); + // // ref subarray holds negated value afterwarsd + // self.comp_state.dram_state.insert(ref_row, RowState { is_compute_row: true, live_value: Some(next_candidate.invert()), constant: None }); + // self.comp_state.value_states.insert(next_candidate.invert(), ValueState { is_computed: true, row_location: Some(ref_row) }); + // } + // + // // 4. Copy result data from dst NEAREST to the sense-amps into a safe-space row and update `value_state` + // // TODO LAST: possible improvement - error correction over all dst-rows (eg majority-vote for each bit, votes weighted by distance to sense-amps?) + // + // next_instructions } /// Compute `SchedulingPrio` for a given node @@ -739,16 +744,6 @@ mod tests { Compiler::new(CompilerSettings { print_program: true, verbose: true, print_compilation_stats: false, min_success_rate: 0.999, repetition_fracops: 5, safe_space_rows_per_subarray: 16, config_file: CString::new("").expect("CString::new failed").as_ptr(), do_save_config: true} ) } - #[test] - fn test_alloc_safe_space_rows() { - let mut compiler = init(); - const REQUESTED_SAFE_SPACE_ROWS: u8 = 8; - compiler.alloc_safe_space_rows(REQUESTED_SAFE_SPACE_ROWS); - - debug!("{:?}", compiler.safe_space_rows); - assert_eq!(compiler.safe_space_rows.iter().dedup().collect::>().len(), REQUESTED_SAFE_SPACE_ROWS as usize); - } - #[test] fn test_candidate_initialization() { let mut compiler = init(); diff --git a/rs/src/fc_dram/mod.rs b/rs/src/fc_dram/mod.rs index 749cd83..610a9dd 100644 --- a/rs/src/fc_dram/mod.rs +++ b/rs/src/fc_dram/mod.rs @@ -106,16 +106,16 @@ fn compiling_receiver<'a>( ); // 1. Create E-Graph: run equivalence saturation debug!("Running equivalence saturation..."); - let runner = measure_time!( - Runner::default().with_egraph(graph).run(rules), "t_runner", settings.print_compilation_stats - ); - - if settings.verbose { - println!("== Runner Report"); - runner.print_report(); - } - - let graph = runner.egraph; + // let runner = measure_time!( + // Runner::default().with_egraph(graph).run(rules), "t_runner", settings.print_compilation_stats + // ); + // + // if settings.verbose { + // println!("== Runner Report"); + // runner.print_report(); + // } + // + // let graph = runner.egraph; CompilerOutput::new( graph, From 4da8053fd46be604eb7a15e4730bc19cf85f8a58 Mon Sep 17 00:00:00 2001 From: alku662e Date: Sat, 19 Jul 2025 12:33:16 +0200 Subject: [PATCH 32/51] Implemented placement of inputs --- config/fcdram_hksynx.toml | 1 + rs/Cargo.lock | 102 +++++++++++++++++++++++++++- rs/Cargo.toml | 4 ++ rs/in.dot | 22 ++++++ rs/src/fc_dram/compiler.rs | 136 ++++++++++++++++++++----------------- rs/src/fc_dram/program.rs | 20 ++++-- safe/lime-infinite | Bin 0 -> 701592 bytes src/fcdram.h | 3 + src/main.cpp | 2 + 9 files changed, 224 insertions(+), 66 deletions(-) create mode 100644 config/fcdram_hksynx.toml create mode 100644 rs/in.dot create mode 100755 safe/lime-infinite diff --git a/config/fcdram_hksynx.toml b/config/fcdram_hksynx.toml new file mode 100644 index 0000000..f18d7b5 --- /dev/null +++ b/config/fcdram_hksynx.toml @@ -0,0 +1 @@ +safe_space_rows = [192, 161, 49, 32, 177, 33, 176, 193, 80, 65, 160, 64, 209, 48, 208, 81] diff --git a/rs/Cargo.lock b/rs/Cargo.lock index 2495beb..226122e 100644 --- a/rs/Cargo.lock +++ b/rs/Cargo.lock @@ -209,6 +209,12 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + [[package]] name = "indexmap" version = "2.9.0" @@ -290,8 +296,12 @@ dependencies = [ "itertools", "log", "ouroboros", + "priority-queue", "rustc-hash", "smallvec", + "strum", + "strum_macros", + "toml", ] [[package]] @@ -357,7 +367,7 @@ version = "0.18.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c7028bdd3d43083f6d8d4d5187680d0d3560d54df4cc9d752005268b41e64d0" dependencies = [ - "heck", + "heck 0.4.1", "proc-macro2", "proc-macro2-diagnostics", "quote", @@ -385,6 +395,17 @@ dependencies = [ "portable-atomic", ] +[[package]] +name = "priority-queue" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5676d703dda103cbb035b653a9f11448c0a7216c7926bd35fcb5865475d0c970" +dependencies = [ + "autocfg", + "equivalent", + "indexmap", +] + [[package]] name = "proc-macro2" version = "1.0.95" @@ -475,6 +496,12 @@ version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" +[[package]] +name = "rustversion" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" + [[package]] name = "saturating" version = "0.1.0" @@ -507,6 +534,15 @@ dependencies = [ "syn", ] +[[package]] +name = "serde_spanned" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40734c41988f7306bb04f0ecf60ec0f3f1caa34290e4e8ea471dcd3346483b83" +dependencies = [ + "serde", +] + [[package]] name = "smallvec" version = "1.15.0" @@ -519,6 +555,25 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "strum" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f64def088c51c9510a8579e3c5d67c65349dcf755e5479ad3d010aa6454e2c32" + +[[package]] +name = "strum_macros" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c77a8c5abcaf0f9ce05d62342b7d298c346515365c36b673df4ebe3ced01fde8" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "rustversion", + "syn", +] + [[package]] name = "symbol_table" version = "0.4.0" @@ -567,6 +622,45 @@ dependencies = [ "syn", ] +[[package]] +name = "toml" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0207d6ed1852c2a124c1fbec61621acb8330d2bf969a5d0643131e9affd985a5" +dependencies = [ + "indexmap", + "serde", + "serde_spanned", + "toml_datetime", + "toml_parser", + "toml_writer", + "winnow", +] + +[[package]] +name = "toml_datetime" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bade1c3e902f58d73d3f294cd7f20391c1cb2fbcb643b73566bc773971df91e3" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_parser" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5c1c469eda89749d2230d8156a5969a69ffe0d6d01200581cdc6110674d293e" +dependencies = [ + "winnow", +] + +[[package]] +name = "toml_writer" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b679217f2848de74cabd3e8fc5e6d66f40b7da40f8e1954d92054d9010690fd5" + [[package]] name = "unicode-ident" version = "1.0.18" @@ -753,6 +847,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "winnow" +version = "0.7.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74c7b26e3480b707944fc872477815d29a8e429d2f93a1ce000f5fa84a15cbcd" + [[package]] name = "yansi" version = "1.0.1" diff --git a/rs/Cargo.toml b/rs/Cargo.toml index 18855e3..64653fc 100644 --- a/rs/Cargo.toml +++ b/rs/Cargo.toml @@ -16,6 +16,10 @@ ouroboros = "0.18.0" log = "0.4" env_logger = "0.11.8" itertools = "0.14.0" +priority-queue = "2.5.0" +toml = { version = "0.9.1", features = ["serde"] } +strum_macros = "0.27.1" +strum = "0.27.1" [build-dependencies] # eggmock = { path = "../../eggmock" } diff --git a/rs/in.dot b/rs/in.dot new file mode 100644 index 0000000..86804ac --- /dev/null +++ b/rs/in.dot @@ -0,0 +1,22 @@ +digraph { +rankdir=BT; +0 [label="0",shape=box,style=filled,fillcolor=snow2] +1 [label="1",shape=triangle,style=filled,fillcolor=snow2] +2 [label="2",shape=triangle,style=filled,fillcolor=snow2] +3 [label="3",shape=triangle,style=filled,fillcolor=snow2] +4 [label="4",shape=ellipse,style=filled,fillcolor=white] +5 [label="5",shape=ellipse,style=filled,fillcolor=white] +6 [label="6",shape=ellipse,style=filled,fillcolor=white] +po0 [shape=invtriangle,style=filled,fillcolor=snow2] +2 -> 4 [style=solid] +3 -> 4 [style=solid] +1 -> 5 [style=solid] +3 -> 5 [style=dashed] +4 -> 6 [style=dashed] +5 -> 6 [style=dashed] +6 -> po0 [style=dashed] +{rank = same; 0; 1; 2; 3; } +{rank = same; 4; 5; } +{rank = same; 6; } +{rank = same; po0; } +} diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs index 9285c83..3ad243a 100644 --- a/rs/src/fc_dram/compiler.rs +++ b/rs/src/fc_dram/compiler.rs @@ -6,6 +6,8 @@ //! //! - [`Compiler::compile()`] = main function - compiles given logic network for the given [`architecture`] into a [`program`] using some [`optimization`] +use crate::fc_dram::architecture::ROWS_PER_SUBARRAY; + use super::{ architecture::{subarrayid_to_subarray_address, Instruction, LogicOp, NeighboringSubarrayRelPosition, SubarrayId, SupportedNrOperands, ARCHITECTURE, NR_SUBARRAYS, ROW_ID_BITMASK, SUBARRAY_ID_BITMASK}, optimization::optimize, CompilerSettings, Program, RowAddress }; @@ -35,6 +37,9 @@ pub struct Compiler { } impl Compiler { + /// Constants are repeated to fill complete row + const CONSTANTS: [usize; 2] = [0, 1]; + pub fn new(settings: CompilerSettings) -> Self { Compiler{ settings, @@ -63,6 +68,7 @@ impl Compiler { ) -> Program { let mut program = Program::new(vec!()); + // debug!("Compiling {:?}", network); // 0. Prepare compilation: select safe-space rows, place inputs into DRAM module (and store where inputs have been placed in `program`) self.init_comp_state(network, &mut program); @@ -157,40 +163,47 @@ impl Compiler { /// Places (commonly used) constants in safe-space rows /// - ! all safe-space rows are assumed to be empty when placing constants (constans are the first things to be placed into safe-space rows) /// - currently placed constants: all 0s and all 1s (for [`Compiler::init_reference_subarray`] - fn place_constants(&mut self) { - todo!(); + /// - TODO: store placement of constants in `program` + fn place_constants(&mut self, program: &mut Program) { // place constants in EVERY subarray - // for subarray in 0..NR_SUBARRAYS { - // let mut safe_space = self.safe_space_rows.iter(); - // - // let row_address_0 = subarrayid_to_subarray_address(subarray) | safe_space.next().unwrap(); - // self.comp_state.constant_values.insert(0, row_address_0); - // let row_address_1 = subarrayid_to_subarray_address(subarray) | safe_space.next().unwrap(); - // self.comp_state.constant_values.insert(1, row_address_1); - // // does it make sense to store any other constants in safe-space?? - // - // self.comp_state.dram_state.insert(row_address_0, RowState { is_compute_row: false, live_value: None, constant: Some(0)} ); - // self.comp_state.dram_state.insert(row_address_1, RowState { is_compute_row: false, live_value: None, constant: Some(1)} ); - // } + for subarray in 0..NR_SUBARRAYS { + for constant in Self::CONSTANTS { + let next_free_row = self.comp_state.free_rows_per_subarray.get_mut(&subarray).and_then(|v| v.pop()).expect("No free rows in subarray {subarray} :("); + self.comp_state.constant_values.insert(constant, next_free_row); + self.comp_state.dram_state.insert(next_free_row, RowState { is_compute_row: false, live_value: None, constant: Some(constant)} ); + } + } } - /// Place inputs onto appropriate rows + /// Place inputs onto appropriate rows, storing the decided placement into `program.input_row_operands_placement` /// - NOTE: constants are expected to be placed before the inputs /// TODO: algo which looks ahead which input-row-placement might be optimal (->reduce nr of move-ops to move intermediate results around & keep inputs close to sense-amps /// TODO: parititon logic network into subgraphs s.t. subgraphs can be mapped onto subarrays reducing nr of needed moves - fn place_inputs(&mut self, mut inputs: Vec) { - todo!(); - // // naive implementation: start placing input on consecutive safe-space rows (continuing with next subarray once the current subarray has no more free safe-space rows) - // // TODO: change input operand placement to be more optimal (->taking into account future use of those operands) - // while let Some(next_input) = inputs.pop() { - // // NOTE: some safe-space rows are reserved for constants - // let free_safespace_row = self.get_next_free_safespace_row(None); - // - // let initial_row_state = RowState { is_compute_row: false, live_value: Some(Signal::new(next_input, false)), constant: None }; - // let initial_value_state = ValueState { is_computed: true, row_location: Some(free_safespace_row) }; - // self.comp_state.dram_state.insert(free_safespace_row, initial_row_state); - // self.comp_state.value_states.insert(Signal::new(next_input, false), initial_value_state); - // } + fn place_inputs(&mut self, network: &impl NetworkWithBackwardEdges, program: &mut Program) { + + for input in network.leaves().collect::>() { + // check whether the signal is required in inverted or noninverted form and place it accordingly + let original_signal = Signal::new(input, false); + let inverted_signal = Signal::new(input, true); + + if let Some(original_input_locations) = self.signal_to_subarrayids.get(&original_signal) { + for subarray in original_input_locations { + let next_free_row = self.comp_state.free_rows_per_subarray.get_mut(subarray).and_then(|v| v.pop()).expect("OOM: No more free rows in subarray {subarray} for placing inputs"); + self.comp_state.value_states.insert(original_signal, ValueState { is_computed: true, row_location: Some(next_free_row) }); + + program.input_row_operands_placement.entry(original_signal).or_default().push(next_free_row); + } + } + + if let Some(inverted_input_locations) = self.signal_to_subarrayids.get(&inverted_signal) { + for subarray in inverted_input_locations { + let next_free_row = self.comp_state.free_rows_per_subarray.get_mut(subarray).and_then(|v| v.pop()).expect("OOM: No more free rows in subarray {subarray} for placing inputs"); + self.comp_state.value_states.insert(inverted_signal, ValueState { is_computed: true, row_location: Some(next_free_row) }); + + program.input_row_operands_placement.entry(original_signal).or_default().push(next_free_row); + } + } + } } /// Initialize candidates with all nodes that are computable @@ -228,7 +241,11 @@ impl Compiler { .collect() } - /// Initialize compilation state: choose compute rows (by setting [`Self::compute_row_activations`], assign subarray-ids to each NodeId, return code to place input operands in `program` + /// Initialize compilation state: + /// - choose compute rows (by setting [`Self::compute_row_activations`] + /// - assign subarray-ids to each NodeId + /// - initialize [`Self::comp_state::free_rows_per_subarray`] with the rows that are free to be used for placing constants, inputs and intermediate values (when execution has started) + /// - return code to place input operands in `program` fn init_comp_state(&mut self, network: &impl NetworkWithBackwardEdges, program: &mut Program) { let config_file = unsafe { CStr::from_ptr(self.settings.config_file) }.to_str().unwrap(); let config = Path::new(config_file); @@ -267,12 +284,8 @@ impl Compiler { } else { - - // debug!("Compiling {:?}", network); - self.choose_compute_rows(); // choose which rows will serve as compute rows (those are stored in `self.compute_row_activations` println!("{:?}", self.compute_row_activations); - // self.place_constants(); // TODO: write chosen compute rows to config-file // let safe_space_rows_toml = Value::Array(self.safe_space_rows.iter().map( @@ -284,36 +297,33 @@ impl Compiler { // fs::write(config, config_in_toml.to_string()).expect("Sth went wrong here.."); } - // NEXT: 0.2 Group operands by subarray (ensure all operands are placed in the right subarray) + // 0.2 Save free rows + // At the start all rows, except for the compute rows, are free rows + let compute_rows = self.compute_row_activations.values().fold(vec!(), |all_compute_rows, next_compute_row_combi| { + let new_compute_rows = ARCHITECTURE.precomputed_simultaneous_row_activations.get(next_compute_row_combi).expect("Compute row cant be activated??"); + all_compute_rows.iter().chain(new_compute_rows).cloned().collect() + }); + let mut free_rows = (0..ROWS_PER_SUBARRAY).collect::>(); + free_rows.retain(|r| {!compute_rows.contains(r)}); + for subarray in 0..NR_SUBARRAYS { + let free_rows_in_subarray = free_rows.iter().map(|row| row | subarrayid_to_subarray_address(subarray)).collect(); // transform local row address to row addresses in corresponding `subarray` + self.comp_state.free_rows_per_subarray.entry(subarray as SubarrayId).insert_entry(free_rows_in_subarray); + } + + // 0.3 Group operands by subarray (ensure all operands are placed in the right subarray) self.assign_signals_to_subarrays(network); // sets `self.signal_to_subarrayids` - // 0.3 Place all constants and inputs and mark the inputs as being live - self.place_constants(); // constants are placed in each subarray - todo!("NEXT"); + // NEXT: 0.4 Place all constants and inputs and mark the inputs as being live + self.place_constants(program); // constants are placed in each subarray + debug!("Placed constants: {:?}", self.comp_state.constant_values); - self.place_inputs( network.leaves().collect::>() ); // place input-operands into rows + self.place_inputs(network, program); // place input-operands into rows debug!("Placed inputs {:?} in {:?}", network.leaves().collect::>(), self.comp_state.value_states); - // 0.3 Setup: store all network-nodes yet to be compiled - self.init_candidates(network); - - // store where inputs have been placed in program for user to know where to put them when calling into this program - program.input_row_operands_placement = network.leaves().collect::>() - .iter() - .flat_map(|&id| { - let mut locations = Vec::new(); - let original_value = Signal::new(id, false); - let inverted_value = Signal::new(id, false); // inverted value might have also been placed on init - if let Some(value) = self.comp_state.value_states.get(&original_value) { - locations.push((original_value, value.row_location.expect("Inputs are init directly into rows at the start"))); - } + todo!("NEXT"); - if let Some(value) = self.comp_state.value_states.get(&inverted_value) { - locations.push((inverted_value, value.row_location.expect("Inputs are init directly into rows at the start"))); - } + // 0.5 Setup: store all network-nodes yet to be compiled + self.init_candidates(network); - debug_assert_ne!(locations, vec!(), "Input {id:?} has not been placed at all"); - locations - }).collect(); } /// Assigns signals to subarrays and through this determines placement of those signal in the DRAM module @@ -618,11 +628,11 @@ impl Compiler { /// Stores the current state of a row at a concrete compilations step #[derive(Default)] // by default not a compute_row, no live-value and no constant inside row pub struct RowState { - /// True iff that row is currently: 1) Not a safe-sapce row, 2) Doesn't activate any safe-sapce rows, 3) Isn't holding valid values in the role of a reference-subarray row + /// `compute_rows` are reservered rows which solely exist for performing computations, see [`Compiler::compute_row_activations`] is_compute_row: bool, /// `None` if the value inside this row is currently not live live_value: Option, - /// Some rows (mostly only safe-space rows) store constants values, see [`CompilationState::constant_values`] + /// Mostly 0s/1s (for initializing reference subarray), see [`CompilationState::constant_values`] constant: Option, } @@ -638,15 +648,18 @@ pub struct ValueState { /// Keep track of current progress of the compilation (eg which rows are used, into which rows data is placed, ...) pub struct CompilationState { - /// For each row in the dram-module store its state + /// For each row in the dram-module store its state (whether it's a compute row or if not whether/which value is stored inside it dram_state: HashMap, /// Stores row in which an intermediate result (which is still to be used by future ops) is currently located (or whether it has been computed at all) value_states: HashMap, - /// Some constants are stored in fixed rows (!in each subarray), eg 0s and 1s for initializing reference subarray + /// Stores row location of constant + /// - REMINDER: some constants are stored in fixed rows (!in each subarray), eg 0s and 1s for initializing reference subarray constant_values: HashMap, /// List of candidates (ops ready to be issued) prioritized by some metric by which they are scheduled for execution /// - NOTE: calculate Nodes `SchedulingPrio` using candidates: PriorityQueue, + /// For each Subarray store which rows are free (and hence can be used for storing values) + free_rows_per_subarray: HashMap>, } impl CompilationState { @@ -656,6 +669,7 @@ impl CompilationState { value_states: HashMap::new(), constant_values: HashMap::new(), candidates: PriorityQueue::new(), + free_rows_per_subarray: HashMap::new(), } } } diff --git a/rs/src/fc_dram/program.rs b/rs/src/fc_dram/program.rs index a176f09..9dea244 100644 --- a/rs/src/fc_dram/program.rs +++ b/rs/src/fc_dram/program.rs @@ -13,7 +13,8 @@ pub struct Program { pub instructions: Vec, /// Specifies where row-operands should be placed prior to calling this program /// (This is a convention which tells the user of this lib where the data should be placed within the DRAM before executing this program) - pub input_row_operands_placement: HashMap, + /// - NOTE: Signals might have to be placed in several subarrays (REMINDER: movement in btw subarrays is not supported by FCDRAM) + pub input_row_operands_placement: HashMap>, /// Specifies into which rows output-operands will have been placed after the program has run successfully pub output_row_operands_placement: HashMap, } @@ -31,12 +32,23 @@ impl Program { /// Print the generated program in human-readable form impl Display for Program { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - let display_row = |row| { format!("{}.{}", get_subarrayid_from_rowaddr(row), row & ROW_ID_BITMASK)}; // display subarray separately + let display_row = |row| { + format!("{}.{}", get_subarrayid_from_rowaddr(row), row & ROW_ID_BITMASK) + }; // display subarray separately + + + let display_rows = |rows: Vec| { + let formatted: Vec = rows.iter() + .map(|&row| format!("{}.{}", get_subarrayid_from_rowaddr(row), row & ROW_ID_BITMASK)) + .collect(); + + format!("[{}]", formatted.join(", ")) + }; // display subarray separately writeln!(f, "---------------------------------------")?; writeln!(f, "Input operand placement:")?; - for (signal, row) in &self.input_row_operands_placement { - writeln!(f, "{:?} in {}", signal, display_row(*row))?; + for (signal, rows) in &self.input_row_operands_placement { + writeln!(f, "{:?} in {}", signal, display_rows(rows.to_vec()))?; } writeln!(f, "---------------------------------------")?; diff --git a/safe/lime-infinite b/safe/lime-infinite new file mode 100755 index 0000000000000000000000000000000000000000..eb846620bbbdc9dd52d2098a6001b9a42131b0ff GIT binary patch literal 701592 zcmd?Sd3@bfo&SH+g@(-pC|VFC>@5hSE2)5_*sHhLXjq~ZtyU2RRIru?K@9}bIJsW0 z#HCJkWF{_Xhf%2x#SkUEDbO4FGLj-_M6?mh@Cm6Jp-O}f{XJjrb3V&WZlNGE^T)4` z-h4i1d!P4tZ|8m1&-Ll0Z=O_9F*)V`O-j8nRl$2>Wxd6?zp2!W@9t+|{i#pYr4Hop zF{z_d`w*UDaY|8IeaUlhk_A)Yv(g1qs!-)pqrU3VH@Ou5Ji>)i;Zr#2pPp0gE&NmO zU)=MC-#d8WGga-9RqloA>w4VP5I&RT)W-F{a<#4>P8-1Rx${*H{>p>hGc2cOt6cYQ zT)FN)xN_mM#?vKyhIA2*`pxiUnK8MZkizbHoP|=U@Tt7|r>B1P54-y6A9nRcPi5j? zz5Pw4#@E;C%eDG);j_l)51(Ou7d*cDgDpPEf zqUYZJ^K4I@@9WzW|CV{6;nPWhqvdoe6C2+0wx(3}pchCO}NxP5#EPtzxs^?AN%alGm zw3~m&->9&v9UidoClr=+3>PfCN@1ZfyurdBQdnde?y~R)6c*EmJ1u;X!fMrUtA)>3 zc$&g1Eqt!RV*l_83!kI#z6v*5_zen6*@x>bJV)XE6|S}LNeUmJaE*nJQTRZGt1W!E z!qST2l!XsgxJu#C=ZQFeKZV7f;UNo8QurW+2Q2*T2w`c{aKXY)DSU{+8!Y^!!eY&E zmxUiw_)vvAE&PbWuU5F#!VfC^8iiL{_&$YaD7?bLn-xAx;YJJhD15lW^%nl2!mm}h z*21?cEVDFRW8rToe5AtF7XGTjuTwZ>;V&qBl)|I`t@eLL;Tna9EPSKFM=LyF;ZG=h zjKT#AuTuC}g*RCELkb_KaF>NYpz!eucUt%&g-=kp)xzg1e4@fDEqt!RGZkK8;d2z8 zrEsH#-=OeG3fEhBj>0D^Tx;Qz6n?$JH5NWb;Zqc@w(#K!pQ>=m!UrpSn!=;IjQVE&M};&rrD5!nZ1XrouHA{)WP5DO_#guPQ7n zHk`8X7Zg5Q;nC-e{|eVDJY?Y;6<(z9fQ3Jy@Hq+>EWAqLHz~Zq!XHvtMsv8!!XHpr z#(ucd!WSw0W`$cVe7?eOQFx_=&s8|1@Cpl`qwo@i8!h|>h0j&E-okSfeyhT@7CuSg zr3%+r_!xy76s{&Ln>hOqOR`(vo5?O;DfLuPf~O^Vn(bZO_^m&Tc8PkSx@4S%#pb~MxX z$FnniW0Nx#TQXapUwu#nVVZ|9zP##hJ?qFnV?dh^Y06|D4?Y2twoKihn2P0hPpD7% zM?RyB*!k8>wxwf3MUSj^$3vNX(?E7hCi~0aEzj=Woyn&=6_d#|bY!wW%Vdke2ePSD z=TGle;R`Z1?3hWhOuk)KRH?_I6N%s~R3ejYpP|$^(;+TamNpUl-zu7IuT*SqttIC2 z779yR{at7g*<_Y8`L!kKzYqrJSb85IN@}&TA9Q8MFlm%^kGzBATVUB9MzN@3)MaxI zpKev00sfa({i#qjtGymHF0J}Sd!uWMfvtu_wn0Y@l;jy)JHx(CG}L*LFxWXF+G5oq_(67t26oLPP+b@O!jw~T)L3SHT7q* z6`AaLI0I>T-ZYrWE~+LxBa?k5IB^Ud+wQ3Wivgod6A0yNti^`%;SOaNQw)lj&ygjW z?A;Ww_R);*e1KME2S;d&#fQ!Lbq9GTyST9YKT@8&YW<_=WG1(K%!yslib*z|SnkEJ zZ3h^(1e??$2X7aiK+mOvj3jwma2LgU8uvlZGB~h``6Mw;f=h}{pHMXO;Z&-pa~efg z_@c5WWs3glnenxrt)eTZQnWDaYMolX=)X)Txn2h`R< zU-YK(MQ2VZy5+BsDW#%azO5IQFZu^EmQr~5L)6+Oggbq$hm|k-!wE%)RCFbx+dJgR z^xMCcZR^J;6#b=&)>cw9c+!#Sn-hx4M-0xKQ1nR^9aURfeOoUpU$kOE(cf87A>2RW zYCUCq(H{rTl5)dy^*mO;D$_QCcy0?m@j--qIF{oJ&^?peA9p@y*;72?>_504<|ebGA?y<<_+q6=lDGZ^VbKM^)dvzM$}bk{x*Woh;aY{7b2eJs`bOt$aG z{{077t$tapG;e73Bcufv<18=9zvs9aOWOWWv8cCj`Qp@~O>*#+ZE5zeOS8|CP0d_L z&P?0gmN8Ro@5ELNkek3GqtJhRrRT0-Ok;+7xhX+7Tu*L zEX_W%CH+Go@0By1m0Vu2B>>J7fS6OYq_4QrFsS48wnIVyd8NQ9 z#l{pogwjb){ZO3x<5O_zC(_OzOIT5~E$Q3oa5vH$-ywW%hhg=CQ*6)h@G9{m7}jkJ z`wU@+N`c^tjfyYL{weqX5SFyv z8vJFy-2&Cn{@2x~b(?}s z1hXW6#6C+GJ{^NNa0MX#;6W^KAnt8w-wp_~Q?qbiN-+V%l%)%QAA`8>eGUXT7lWq} zoy-mQ$X>sD(R&sxU-aHZ?{iL0Fq00|z2fSO(HMU9l~s47&OE94RbepGR;U<}_hyNB z{_+{jPz*LmkJ~Abp;w#a8yt;3pY1U>y{xIIVS{WHLX6y zvViGT#@EeZs4FK5t{ldP;*WwHEt3H$24}qk z?9`0WsynI`-%&lWrZKRF0hTFMx4G&I1z&aBj#_z)kOc1I=eD&BrkeLN6o$V-yF)r% zLmxRtI5o1I`iHFk3IH|)?zP`zvj%cN)@j{5caK~|UY|jEeX%gFl+yAVfltk}rN=4^ z+8OD=RbR21aTw%%yP{xe{`EWlfQUL5I3!SrrV{|D($VCp0-1`zl#&uP5eu?l*VYoG zn)gw?BlRJe!yMiXp3YY5rv&kJBp^~qM|P{6YV)lT><5D1m9HW@(2?Fj9iw3#3!^%! zdYg98vSc0KRvoXY>TMvwG1;~Fo*hw(vn{)F>0JrI0r612k!=&!I4t5rw)9x17cTi) zb+o@9_p2W>^Ggy3F}68IL=ft7I4tkO5@;7K-xhp75pt-kGIPIv}tx{peLwVZ_LrbLmIVNRM4rNkB$tfrWSa@T8Fw2xrbn z2R>r*$e~2^h;W%_nuBcm(YExiDI?#D)Ul>SIB9dP8q5QD0{%`KK7S#FiHlS~WrA$; z_FQ^~cn_kLn>vgy9M>T)!o~o1Xu9e*ay8HL0OcF}+idjESv? z?>V&|S*R4ZfX}ZwkzwIxOf>?yl%(~=dW~yDjrBP*+v~H92Pl-uA%uvNQExpY*!^G} zJtG-C+~xuJDiMQ+zZ{HUbheFExE9E03$>sDMtFnPz3!OBDKFg`v)HVJ z>@%{9xi2`$$vy?}0W;WHv#LlpM%mH+TD2#ygn21iRw-2B6PM(CEA`k+saH89f9(8h#D8O@Zoj+rfUxXHzfO?NF) z0hyWyJh{jz#7xJ>c$VrcIkDspK&bsHF_v*6>s}*)Ai7@jAQENk{UdK7q;E9tq$CtB z1`osbM8k3FQ|2^P3??mB1gE~zK@M%IYf4t8rvbTGPAN%2F}PlUvXYO2=g}uE#wKM> z6;Eu6X_Dnfa9zs_nf&y7$tUR!GA3R?Ym_-KsIv@)Y&G-~2*}k;-NoQ17+DAMoXSpz zdXfT>tx3{fbRPrO6G{4;7fwtu&=Lt|Y!J_(6K&XU)4&OfQ^67c1;Ick((@WwqKtL^ zQXzvGhK@4GdHS`yEV(%^Hnz&e)}DBinNg>qus4Mdc9pv4(UB?qjruz0xIhS`r>$im zRkenxGm(lYiClR>3OnC7kzwdg3ZN;T$bG?Cnw*|`C;Wf~qQ_7s+ut@+L3^hLe}3GW zN>nkpgl3Q6BRJLbDrqM5W=wNu=ZJIo993QcEq)6qy+kcOVZ0Xisnc*;{N*S9|D?t0 z5Ho+T2HOa{M1!v~DKQP+sn;?ZY_2aOFDX7!xi^~o-U~ud|HGR5fyZ7@bFVigDyB!( z6J|X)~)xi^`gVHin%23a;4l&cS|K`2Co_+rO33?XwI_eQ* z1eK+nmr2_Fv?}ayWt^Qwl%cvl?AOAMSGG6DNYEJ->n0k-Bz!(>;hF-Ch zk+rSHHp(#ZtFM-+VK9*%TdF{=!4i6G2E?4aMY{*WezLR_u0sspDCdGtv#7D_>8fh` zbKn=>J0(?hVL#!i>-2hRAIP?iR#e?lXqQ9d%4i{Ej8&2J*wa(EChDI5%h6Zlv@vykHY`ZV5YAhgmi8KLzeB01(29HlM4oM_j5IHOA;n8Sk z_j3&cKL$Lu^_Q8d+y0ix{+7pRrsDa`Gr!B^4|ugr=St482G*mPRT#4FIy*4?LFX6H zZ6gI_D;XKpSZ}JKxis+3;bl`Ue(=us{nj$Np}@sbPZLctNVF;jyk1IbtS|NBRmNud zMs?PsnP&zwM-Rk8q-;qaDGqk8Xp=J<`3|mc9?SFv(-N^%+ct#WC|h={BfYD&Wmjt? zrWR9mM|#&Y1BI|ynCtl{SJkZaK-H{w-X?P&iX&#F7zqqoqJzKOv&zXpEI{90~WT-BS_ zp9cMD(x2tTFW1W@`g4W;H0w_b@#&kgwzTKo-Xq||b)-K>9SU`%^So&)u_OK2#T_lV z#U0I`S=@0&c5%lg9UaU6adF4;PcQChYG2&Zux4>bx~;yW<;MDs<{LU1T07QWFK~A> zeHtRRXZjwi%{f^)vBn zHh%4hU;oj)4qTe4y0TAy1}?4o$mUKMSjgnsMRiz+b@^xmOl{~QzEgoN{^)IWA2Ayg z;Lk4$o%FQ%_G>8Ryu@keF)Ft(bAsYzVoop|jYihBx7D<<82pWi3UB{oNod=#45jMW zlD-WVHQfe-`8D%vxeZ>X^`}99n)GKm@yqpciT+%nKh65nLVWtRY@6`>zZJiJGk*O>{QC9ywJU!8S9=W^EVRBCCw@16{rC9w*7)_`;@9=@>v!C1c?PFD zU|pryR|rnUJDAAK^g74Mv)-=xm353EmyPFP%%dB2RBB9=z00lAY}A^~miAX64p4fn zo>jeh_t@wjcj*CREE7*l+d7>5Ko4ygtUdAV8Z8>!~S zxzxTmq-Mq;wKfi^(Q!y^k3*t?3zd_nE$v5n?%gpm; zm(3moZ}Hi`=br|Jcmw01M66bxtg+RnbhN*YsEE}^x`1Q#5d>iMYwfY|aC?03FnioG zgGa~Bud%?*ueQh9L+z2DZjbguc);p|?XmG79v%5={astdUs$bDRIE}l;Z!&d2}T?e zs5m5uaY&HkkZQ0{$ZFq6H80Mk_QfGJGY+Y>aY&7hLuz{*5)E7^X7wO_!`#6ltBVr1N{Tri)sO*_YJ()xOX47&BDs4t_2rI1^srUk6Mr?uD5;N7I;WQDC~FgsB(pp~TEs}H-g^5n&-o<}IvyNtI@{rm~$Fc*b4!z@HG$brk6 zLbK2K>nP)PW!%?gWPHg2#>u#qn#94~wCR-rZQ3Y}`uwv*vC<-UQ!b%aTn06Bzc4+< zM0zqZe}JEn(#$IM*zbtadW_+}B^3^$qOq4z;7HPK2zKcb%y@A#=7FN=X4&mJbC;~h7Cf4M;1S7Qrpn; z`}TKCisrtqM=XHiYRn&SFcVsBk5y#$WphWk>VPBSAmWrG3xZ_Xvs>GK$4HI*&+mI* zhZu!#pUP~-<=qpwQrvw31}_>cmU>8`51%atw`O2ZK3wgD!M5G;oO6k7E6nH>pTw29 za4}au#Z6m}V_X8-d&)xFDG^9Qdz@?zTJzD+&y8A~wALXf3?-RvE775F4DYGs$GmZi z#Zo^5?3V*VjwRN6jqx<*j<=L+^V>o!X|lEXY+;$|3yvDsUN6`y1*NS}C-QuPvg2|& zsunUEUfB_8$UD$(mBGLMY}8E9$L`Oj;Xn{hSgjbw&!=RONQW>_PgKcbrAb zF#s=>^)fNxN<=A+O`}o02{XH8lY~6@Ppb2h5=(O|7F zEa|t(RgDsKCTLYg^UhSs3D)Kzky-XZND0!tWWzpW3yu!LZDuI=*``Nx%O4F1Hu9zj zU_xHo#sgdp!0fUq3G61c_v?b>+xus!cDQS{7<@XBbh{;4dvBxZ35Zeum#~LRWAsL2 zb=vOe7hBhd$;Lt=%{4B+KzpA%r(Ao#I3C~{0A^=+;aUL0x`?Rgft zpQ@Rp4eO0{%$MB`__FDUd9v&Tc^olYhFS0%A|@kvB}=kbs?lyS=cg!g>8zo{BkDt{ z)k1HFtX1My+h&y8=y$n8(%YwGSx?Ol3b2Tlqd!^9qihq98eW@1Fq>iGRY3)}PCyFMXu=Dlqwt86F{)x0DE=?|) zrTxj-1JEc2zkGwr*^cy-`YG1E`5N-7A>UYoU=4a7 z>LL@YhC!l+l3nzbOpbsU<|^t{N`?jkPz=s=DC2bNz{$axtvJrcg@vx2)e)BK&c=9D zw{5PlZpI4lcJ=_;6@zU;!*wy1_Ist}Gj*V#!3X{T>j7jCv8c(pm^hnxJlwpzS3H0R zf}dJr%K8;5^I~2mn@G#p{VQD?t0Thx=NrxLznGUs%$68S8QwL;APA92h%P`Vhkx%93^j%_ z8qdy+!Qqk%ga8iTSq6m8$_;lZLFc3(&+jn;nGkn&eQD6CgD&aLsM>?oKW1l;%LqZC zGZVQ&|8a*pJ#yvR9r236>s&Y3=&CXw^K3Y z47RGWT&9L_xM9con~#o(fWyi4zHYJwcsKyi3+%^}Q`Jor?m z>ZAdd%4|>c$kFNtw>}AjSrE`fijp@!Rkf@@7PqZBu3=a@?5$^mumk4)VW9$Rtb2x4 zec+eR(V3}t^|3H`b3F)MT6NlB)fZX&tXfkI5fTl#QPt!#Sgr_jtL14>D^rW2H$M%= ztWzFXM=eiNOVw#pXY+d1G*Vw*7PHVb7j+p?BWJ?_OViS)E!Gu-&s%!9!sC0)entAcR+Sa(xmgB0giO^hJZ zYtdu5Udwg_I^FCHciQLE{mG8^FFOst{)DF#dKmPKY$0-My5=#oW4xW0Ky)#<+Jf>S z$;l2m)8u2#R6&M3KQf)COeJf#)g@ywvRulk;)&y*OC3*Q(%*Z!#shntdDyTm?pdNy zhwFLFe5~ZGbi(^`Sa{E@AG+-^jWf`pf%paQW%@SS@C}#CoQoUlCgckHNAn6M%zONr zIbGw@_~{+q9pH|5U~+WTdd zoY3AcJNm_b?O*0fpZ8{bpN;`0+M6B^@LK>JH@*-3Q?KW{(9FdCtP&eAq%=2Tz!dCh zJeQ&QaVkd++FEv1G*9pOE%Mk~i+FAtlVsk-8!4&l4&?8?*N>aE4S|mob6s2zp1R>VDhh<@8jn+EXoO94wj9BZBMs@QdJD%F$ahvN ze^yU*M{3&AOqys-;EeQ8^8?N-hRjJ=Kd7OZ9P@U+c^5~ZyV^NXXEK#IQ4|ov#6*8Z zrs}Q&(=lgfH+R=l*{p%f3tRD#`^FC7+yR@eGV3zADfhNM%@N&&Ouu}gz*sZav}@Mp z{5ecR`}%7#*@=`Gf)as0jM1|%ur+*Q#~nM|Uz-kARRVmtgP5L+%ckiR#z%UdK)5EB zYb#6|Y3!*wv3%O(k$JQZ<%6WM?E^j4P)hm*~FsgG3TtD(4!7S=L06W=GmZqEmMv$7h3F;2d&EF zrvBo4QxUXgoSTN7jFrZgWJh}&PD2)`U>Nfoq@hCTte5Mhksx+TqEidgqs^>EO2J$N zC0l=FSP*r7b*Amfip;_R&c_`Cl}4t=S#2)S_!eul&YuV~chte1LvbY(Y3o-&Rr?yX zv{s6lAz@o@Ju)E3Sr!`eGm!e^9pa~3RdbSme8f=Pt9lLwCEUXSwT%^>X{_}2r?7{Y zLzP}8=Te)l;Vt-{M!mU)evlc+H9v~7f>SY=CuQ~TT3(Lmz7p+QpKbmYt?Q(9>%eS% z)KNYw!_P69O2FsR-?B4E-vZinIKXhV@|op3O}%rRN@5j?*)tydkjNRO3MkLmQwv+?~vVG7h$ed#lf;1ifO@IB;(!X= z>isO?b@gF)4np#tuRxt%5r0!{EgX1mqq(?}Yq#&d9um`Te?%rFuOCuFBff!G99ZwG0Twm4~YC8(Y~iU)G}^Py62kMkcTiQ?F~#3K`__GFg2UiKS|MdEtAM?yin7&*|oY|DwEA;cr4Af z>u#x~*?gTM8RYWFtyMbW&7+815RN{6<%a zh#h22oOJU_G>`NDoS?MA-Hx2YD}R@RYeg=24QPA$KJiH29cEynVO#-R5=eAA_{2)m ztxx=uq(M_hHqN!E1O7u%%7J}DCJYYm{YR)O->716H=f8k5VRFFEwL*ziB!~^PGn2hFtpow!+!C zA(s~vq!WayItD*hgE7US`WEK8nBL(G?9Qq0#;oKUr!k+M+03O=w8L!fPL9pzJCzsg z3Oe7nQ?AVc_txx8W0(n4{b)^U=&$Ki)usV(q%nr$2VDu;)HLWUel!lV1>og0Y&)`W z0f#5$Y-|g*AOUnR5!uS;KQ_vLha{#XcO3*B%r-ydNZOxkdI+2c9YJrU`3yuh%V^C~ zkJnGA4<{!x3z^7-a@F`!+jGr2eQR`jE5d>Q z$QkL_HxHWc9eFJVCeF9j=_t@&OMb)!s%iJXf%p_rVXq)b$v}|qZ1e3oU8|8EDyHu@ z{EO+px1(EVeuL>Hk_SgF08Te(qJM_wpKRU3g!q9odr5^C}`pIl7Gq$+AD8EJU!g?lN3waAM)ct7yc?QNETS%gNt7Ki__#+jj*} z?b3lq=l~N|KDlz8Z(_L7#HoEs^JdgL&`~cP6ivp()sX)sGl9@$Gg&ijV>Nh5t%!Ed zS%|jtlGYOp9{#QodQ2odO;BU|G&5E!JHJZni@_C`9IuJ1_3B0bO4Z_sV-mU2!kSvK zQl~3xetNqXKRQ|4Pl&*Uv#95(0J&rJ&6*;QlaUywPz)#|ZrDyl;AYqpz(o{pI_?Cj zg)4nm42aAFsphRzfq{x7{RdbUMJ31>TeR_%TqQGP;{#_|!t*uKxH@RnpKZRUt>vE7 zwTIm~AWFX@qV#LHlrH%2zZ<3dT?;uSp3fYm4yLbfY^t3L>~}5BHQkdP=o>q#FL-Um z=C;RPEsFXjv|9|RYMMlMs%L_3cJ^;~s#4TVll-;?e8 z`IFfJ?i(1F>lw{Y>HInw9)l|RBPNZ@@vu!eachBA+!Ja(wCZ!@_iEmcpdth&s9S`Q z;Zo`VB+q&mgU?~V+);0nmg_CZ5PI~C^p4d>xaW2I*>2Hc`3$(sEHZB74cI1cznJ4Y z8<|xDa!(k;-3_}=X8fDp+19c%b?rO3XMlpKk+(4eq2dnFe;5-Afi}oWq73;9ERL%b zf-}mtDXK zqyND;NkpDuF}!mxDt{yUtkU^)UBp}Uc~fUzyE9WQ($zMD+WbW8fxcoo{zuJm*go7z zY3{1PHD#qXeYba~Hc38kPZYl)5&s>WLdU$y!AOP6mA0er$Rd3VBdT-k+lso;CDIa z+TA~zH6H;!v{$R;uGnS(W^z75F?c)I0V80#0;6CTV%#hU@|9UUJBhS{?>p+qh@=cr zO(ZdN`w9+!l|SCVnWcGXWU0*`SiP>=f2Uex)Lc}DkBk4Ji&qzpu=oY{3PkxJ&Cv~_ zs1My-Y)O{s=^`hLzNWD&b$>QQ70X4R^em1nv*B&h>)04}klC9Fxz!9o*Zl;J8A}xrMac2b~ehPY)UEx}}Kk z#b6rD8#&U(uwE9prR!8lBbvu;vq*HT1yS#aF)Dz0zkDO+%goO%q6^L?Zy8Y`O+-b8 z=x1CZ$>JxIP5u(9^JQKvoq3wqOVWm@M@0}TcAF8EfeBv3rkx@><0kvM?u+U=$m(Ly z6Pl!gYqEOUN84H+O;vsFR#*z`e!N2%UNHdpJDC5CiX;C`NlvCOQknT36; zrgc*)HB#r130BRrH-R=11k^%D*+(qLIr^EPq=k}lCLJnj2&??l{4I!5r~k9+9N zH?-{1Dgj~srbaFytd#-6tYf4`nh5{vrz;ee*&Rs}KKq<1h4)c-KH=-XyiVb13Lj5+ z_t0Oa64sQ$$e{}V^s8qO)*=R|=_-nNe!5=$1PU^Ql|hFZ29|211A(jVsGsf@%DoPJ zopMUZ+1*C{Z2ecDzPh5{e8|Avc|^q+xo=#2HyOp7d=O;S8- z-hk^HJXubMMitG|Her*%c@x5Hb!Tk8lR7HpZfit!ZYA>|6{GbXb)`potX-PwiCeXp zsry`0HJd=Q+=q4#2&f+(3}xh6==HahLE>1;WjMznj)HyY!tQ^o4T`8$aV^CW!3ck{==zB8hl!?j+R%S>sNPRp;p z4fT~~O?HjUQIpu-f(I$84s1!)x)@xcU73p?v=x)MusNVwd42V`a;yQKx$ycQ`mz?f z&N&IW)RvUyCaA?gPvj(ZCOMfj3h)o<95m4iCAdtwt)Gq24C)!9f|5o*=^8!clKqWF z2%<6*sVg-`uevY{sP~_>nMJ=~RlSQ-KJUSeuKK4qACk5;GfTE020dT~VqsrZ+lT4) z{PZWLt`VIZG}-J&;|jE?8`;DwLN*_gY~GIa*`%41zClsRq;If9`U)W1y~<0S{HE5H z+f%Fe!@C)iB#az_gmcZpIxT^AJ(W)3Oh6VSNVk2sdl<&>S6DQ_j^%LL^9==*3Q21w zoE}`FWzk~rCA`-}F|+_Mt{7~j2h_!K8GF;zh0&{j)#*S28q#j=Zm{YopChn|Kh>nP zerKxX4V$%{M>N({cobw&Rf@YFh8Pbz*s3sTX;mtmIk;iRdV;J9TqcXM@q_$jRiM!x zc`X2B^6A^;QrVQoZ5j4;S(^2jCm*AEAs5wat4_52*`t8xSwmN@-BqiFy)(9h26KtbKUTHhrs95@{+4Yb=~|&lRJV6&|E9`vn%lQQN2k~H zSJx;yZ_UW9_00$q=PQwggWijA>4lw!+5iv%@&2?C*389$7TpBW8e&+3Kj(4|kaIAR z^Y>t3v+*Ms;T^}Uo@O^u?~^iklWj8N8pp4(s!Pge8nF25*BfZO>ISFU_#` zSFLpo$T{BRj8S%r<>ugu-!tww-uZTpEybY9=!(fJ2H*b5?%lybHuc3v zC+6;)|20_!$9fo>)czv$?z!XbEQu9zZb@t!ko=O^r@k63iB%A*g)j|7^uZiNsase} zS!rXqkCuz?+Pf@%?RtCkFV!S=n9N3s_D+|)P7imVnfvs>6A-6hX-2aO|ogh+5aYX_~|{_+Vl|pOUGpE6mZ!0C#RM%W?4R_ic0PY z3}-j7kO0H8U@B>ht012NGxItzJ!o-S+bs0ejS*Dc?dkl5P> z$cI8a>ScwkjuQYaJPq`Ot7`2Qg_1jG)$V|e)efPd=#E!tfu(*R`0sxbYjp8uIF}P) zqrT(O5QBvg21k`Pj?DztxYK0E5>*ea`lVHUGLFblYIO}h%%dIYXL#_VQ?OP9&YX0A zIYc74ZfvbYR~G;|qLujRJWLP4o1bIJ#ATe436i-TROl#+aR4S8lAmNq=VyjXq>uv7 zBROG>Ux3|lwrkMa8FabXjnnnleZHi-wzhV5?VQ@VnV#BtwexEi)Six>){ucdVx}J5 zwRN=%XV=c2o#~l9XZGCL^JdQ{N3F|IXE|msn0@-}y4edeJ#%X3%$_r6&RlY6J9>DO zqI=G~IrHZ%m~%Qg>gFt*TRV3)IT~G#Wd>vJoVj!7&YL?w(=&I$+|%dQ&0R%H@j|59T?TktD9f9pbm^Q_N9;MuB)qCxDdi+ zdKS{)g&;=S0BM7nZXY3B7M{M4d@0g?McVIj?c$AdJcnD+l$>2X!6Es}3rDq&oTqh< zzF_Ky2R@kXpEaOKot-&14^t@3k+oRXKp7I)#H#ziO~vMHf7Kmpri#HGlkjL&6(iR$ zl>%`xp6sNyyEMCWan(O+@x%r8z50!=PF2rr?wE}_90bj;lvKd&)?c#Pd>tLtBhy{! zwnA-Gb9^s$(x&Dl`Iy)9E4^|Q@a{wF8m_D1EH)RiO+O)P0l6n=UC8g$Cs?o;+;u$f zn9R@ya6P2*6@%~p0P$k_Bb;<}Gltm9g<+1*Du*Cvf1a5o`)cK`cCqq!{W75K#Qo=6 zbO6Jy&5*ewD-6%4J!)D7F{k7EnvZewxkEg0Jozm|Zu|zro#t7F^Yd!w%*a^yfg7EW zoJ=6Hzx~?+Cm$Q>!WL4;wm_TX%wRPiqyZHE@1}K{-Td+wtUW_kr$(s$idu)+dnr|N znDIPXAwEkzV2OAWueUarzlHSw*@Uyr{YL&yJ<`?>_WhE? zW$<3dTSEap?9ThN5;Me~-iw&f+tf7&TlLk?8sV+PeuzFb4uzw5AYsGk9Qq!coO$LE zji$4ruJOW=i3k&Kk3u*lVuC3B9~BSAUtnC~w?#W=Wopd4{1TPwS*Z^>8MmXo*I}SEmn7_MDgoQU0Ewo z3})hhMXKjCv()zWJv%wm{h#-{e}3jHX$C`YK-~crmbrMR-D9>SfWtJz&Sz&uIGt59 z4^Gwbr}shVy6MRIaOzzh#;Kv`9&rJPI2Bs7BxFsSm4^(x1mxZS_`;B#wF?*6T)VM; zOfW9oy!st9q~NTR+!{<`PRDX(8XKUsjE$KxJ!!NbCOZT4;kP;eM!+hWt@Py@E9n3o z)B#HH6ErhfJ;%k4@Jg1ZBa58mv zDC-+WlVpeGZB+D! zF~2k!MZ*Su%Dl4Dp4ny^CI3tQndoR;1N1hU6OX&>)(*&LKHAC#M_XV-v;`4ptLv~; z;|5Cevl_c8&5s&g>FeBpgCP5b>EFi5{wvl0S~H44LEe*KihNup7U{w4)6s)^{`A&c zN6K4pUc2K}$dd_@Ukpyg(Z_q88l+v`0GXUY>xQfnw+jb4N}_ge4K+UG?cdwBR|-yO z=R)h1yt`;$hK^hgf_^6qR&8inMrz<}fP22pYYn^b1q^8 zQ5k0?SZPT_mV9ui7<9eesXC=h)nUIHg{mLm-ipD&j2y!EezwYtYvW$^8Uxb9xIg;o z4aUVi_D7B?p$`dV?2_F*TIBA!5{vv{2OYA4Hx{|?v8i!qS2}qdiLL8s1`Xk&Hc zTfWk9QFhkXvd!^;hW!iLO-1d{IDb<8KLt!|ekqE`%_v9RWG$Yq_FVQQT# zE~=x90Ps5(`ssPFHf8kvjaIO_!eqSS_RHuKL`MD}#)Y7;Xs$&=a!ZMETD>lt7ePtM z4YLK3n=oY$BOIV{+l#g;8z_2Evl#{Ii$=WR3`4kP;q(*va{Bo;P#|D+57J9R9{iRU z_I^HezEfKPk@r($Fr>C@)-$gMNQOy@{Xk_gk=+jWS~wyVPk|5q2;j9+dYbk)aeI)f zM=i-a^)$R0VRc#K;{R!OnZ3MTu?Dsl=U{UuaRvG6bUOGvUGiuU;A@lkuokfBf{*}xe8zG= zV#~AaAW+BzVv>%a*4h!&P)e)KXNv4iC_x}t3_fd9W!TUx-zJJY6)VAP1YZY~)H(cC zQ%7VOLoW=Q97}4?XnZCWu`I1$(o2N{&LJJ^twC26aUhxyTg+NVyk^8uQ}Lz?13X8C zhb#wA5cgkv9MqPql&z#hN31^%nmzpv5g=3>Als60ZjC$r?5y`cx#oD?sgan3#n3Ai z<53L0W|VdBT48hX`u1iiwTQR&dH;W`cZThEC2>1`C^&2Yi_ z?BlR7mbseBv}Y_h1KVV6=C2PjL=ki5>98f`lEI7*JJJ4lS z6@%lij@uX8ff=#&^yKxH!MN=C=kiGUuOWIJtnEY_J1D@DyA6e!?cpJbuW4FP7Hc`` znC`$@bn)!siNNb8MV8a6pB9%j0me@sxYX{1Xu8U55pgyocM?7^a@MHeC;^>Aab%=S9(dt@hUtdsW! z%e{ZI{04wPnf$cX> z@ad^vsNZvX@fsF?v)X>v<#YaUI#^hu+*|{u7LhEkOWF_t{k0HQmfW%$mxXii6n7pN zC5pjccYqr3Emtj%b@sj8;_jb!h9!P{NVqM+YQawcjcRK|^Zr(xkLr-ZRV?F67{ z^(_<+eF3XvU7ISg*Ebo4k!KwB+ont04-JVTo$&q1o^~ak@XInI01xLUp+`8wTgvBX zfl~tQ8=Yx>)Lnsf?i+{fv35|8T)PVugW{l&=A2FYC<#68+7xTwHlv2ipU*3mVh)z2 ze^G-rS4;%!{1_SH;MWO;yE?5bS@O!_2z1OUfeT%T7m(<;Zcs)im8bn6eBYx)?{nH7 zB>JrHkCW)3gG@|Og%_3Rxc~(RN%YK6K0ikBghU_rq?-1E5k6k zeL!?f5^*C!=}w-!#-hS4K+g8mGh|KZfLeiuxzR#~lA7|<&Vwr6{R}alAKbE8f>&LA zaF}`PZ)qWCu*PpFQ=N&C>5OCrM@eyvD%j*=kkRmY;oT6HTI~#~`RHw9RdxWxpQ4K8 zwbYw7vh!USgGmGAQ|pSiwpLKbhA`MDF&ty&U29HEwxk$bsciXW@*pjIqt*n?SPjK@ z1AN+z@GZkl%HVog!e+@AyQ;i>vZ%KbHB!+TV{P&TQM}(2`mvr{uV#ro(SMeXd13K5 zM+CpXyViT){!cI%5j@Zgjk4NkUImtTPh(Rvyyd>3;A@S(7?f;7Zo*3WKY#)vCd~KX z0a}mO0U}JRY1mh?XbGk#Gg>CHKw^OlB&x1H8(u23&hJ4qOL(GsJX8L4o*f$cq}Y$h*rV5dEpO%Yx;r1cTs6z$( zyu{oFF-Klc!4OL<6`Q7={g|W99C53(bho{&n|F6o`96dNFvZ|{Wy>v>*K6TBZ3T)Z zMlupP(viOF?Af5(?&)lI>bgaSetw=-Y=CDbLD=w&DsL6S&$+vy;7 zrP=P_M+}W6t$u~LvyvLia_92v{@1zl4UyT5fw;#F6ws&)cmAYo3GTS{!M)**?^RRR zody@iVvg9HRe~dC?)JIV^P*xLA8O$PwTYdK^zmCkQ)*!0$9`d9Izv+jRl<)&PqKs@ z#~|3Eer&Vr3}i{?Q4GE#sj=SMAXf|WEbcmZ1ekQao7qx^T5DAq)cI!72Mu4#yZvEd zA$Q1v4_gonG=wostm`~mxPy${q9LKQ%JFD zQB5;ki^;Q}pTX#wC^Vng1btu^}=*yN~5Lzxk@6Z3KU zw(eIn;RW>V)$b?WYsg2GH`cf17M!4O5Byvu#_LmR^7YaofyWb6mp$Cm-*0%oQ&NBZmJ39s9a#R*8;TLqZOe=vYQ=%KH;_d~eIO z)YHN1m=C>xjyaTK9rF{7{SJFhB?Gq1uKa6cVdGu-Q+j=B1R3{H<;C z!8W9RC?OMby{}9z^FY|iMbb<2={_J2>;?LmA6{h4LJv9PYo2E0)14SPo~1tBhyPP0 z#`iJL%6I&~=ws-{5!G&lN&lu18sEqK&E}BfKBmj{F=pd)X7NjVm7B*oVlE>>D4}#c zge^RqBW8)vZl7P%esio`4x$cqQR^~!o2uA==APfQ{HtF(bjn^!E!ejU&Q@pT+-uC;}fT;JclIWvvo}TgA zb8_Yb#Jd1m#_}&aqr~#>_fAQzv;3b5ou`SGf9i{##|kZfjoHtHE&PxgCs~v&i#?o= z5gpB*-ms&?gs-K8nTS>nhno@94I(-Bnrk4%`r@mMR&oVq0BwU<^b=j?ke@!8aF@51 zZ@E^@@@_pl>w)J!McU=8X#?rm^w%iyHE_d3%6b)pH9*o{D<5pP;FpGP7jh=8#o$*K zr+FbzSXQaKy)GUg5lqaM{anao-S8oCcESwFiAD+;651${mB(KUo;prN5M!J4uv5pc zA!m|#vnJH@VPB8Iu$m8}%*d4?p8G33-}EIHwhV&|Czl#TTzf=|RF--O#36Z!gEQ7#u;Hy>}b- z;u?f&wXAB*KZ@Q?uK@+2%*4n6FjfINJrvG%okNBpTQwHdwa>LLI73I8rrzmBkSaZt z$@X*Dib4C-U|o|*JW-6M-$APw$GQX4$tl+vAE!&|^b_+DqBT(mmZe|igs>~BKDK!j zM8L2ZJaKF!*_NH?s0VB$rvH_m=R1KGm7PhNYmJC^ZU}wQZU${g!mWLFaV1L;Gju#| z&DR{+EoDAgXjOf+vdy#rO?wATv-h?_P3hLIBU-`XePO=m-9S6+aiFycU}lR*pAwN4 zT+gdn1;b>u1WonLu$2~W*^(KTQd+?|NDE61VN3RU?%>I1d@V0!?%;y5a|iEx$#Vx` z$F)rAruMkDRI+G5%Li#5M!`C+C`aks!KG5+GIIyk5it6o{NFWoFbY-G?OFBkOftabz9-Muoygw%?YcpZJE_^2+iC>*XA%vc$1}@-4GP( zPk8DunDq-cyT(L2O~UywBsJ2$>QEmgfhS=uOB6fS4A)yRqoLbzL}-$%67{JHM&;@w zeO4dpMhrZm$lrM;@Zh()(TRXnvi%FPsgr(zGlAc5ZVs7>K?jpU5spc%hhiF0NVy_` z+@={MpF5;l{N=9Dc8C5r(Dz{$FVe;FJ9;LM-rxmKL!wz+VNe4;1b5nl5 z>Bagmd;qneA6;;vBhnXh6T6(+NOj_&*%Ts&C`)--*!S2J=i?QKDv;EVpo@U)IW#wO zlpsHOodO44^eY7?3nXwF`g(91h}uAYxr>@MkZ^!~xQg(JB;B2~MJp?|dN_mpR;~3F zX4}@i4a=#yC#s@1A2S|k zNDOh@w4hb@u;Wd*jvehI3At_kxYc=xW4d1vNwy6#nb{t_>TqiXRMhYt4E~Koo=dHs{LFG#thL^;w% z2MD!W!3eC-p=2;Y^|yj%viogGY0XlR?7Yi0>~f~+XGT(aDU%{y4e9CyB)KQt5kX#? zH_2)<<9Q!H#QThBRzC^Hs5+rsoHz9DtX=Hl(aNyv_Xk;js9|h8w)*YVLaVve*dfik zq(>G?Sj5hrj<^%SUWld4*OqNFs`c0cS`Dc*FsmwSjRx3;#CzCmZ zQRaJjWRn4Pz+C6@zjlk>O2n#j?{Crjl;$e@4SD&s?)E!tVg^2RX7bVfdY!-xw^O-n zd~29Ezfk{k=pDj|*PS9AN_=Z^98LGiGQ>KXFki`?$`BiZPmr=#TsmLpoIEkNjC4L~ zW|#TJ7n9DlOZFt4ugN4N%dOZ&-bsClFX42gfb^pYl3^P!5-tYU2oKk1S@?}xEsQ3p ztK<83E`89#PRD}g<%Ig7VSgrnZVgXbVJ^*2Z4^gRbn?zFqnj;HCo6MQ;Cmkwzz*46 z0P67yrn#>C;Tm<$^}wl8o0bjmN(Z<=U4Y<#2cru>TH1wSrRFV|K;|Ua2`?n-HyxAx zbflL7Kd{t-QbtsBM>M(_@6ry97Y*bF3@BH|b3DlnX;;_Z`KB{DN#Dqy-UoF4D&Q@c z_GZbpoC6tA{j@M(5Nu?wq_%?mi+-Tlb@KQ4OKzHruDW9I2J;M;`|7!fA9qx`lNcY5 z_oRv`=8RI*CI(~btVY2J$q@FHiOH0!ts469*clj9P2LzK>|UswoR^nOJm}zh4{V3i zQdKMYAf0X6A^By?)*G+xy2iR7gb&=p<5KuKe$~fa;Wu&Pa`Fy9zDl=JK)BS^9yhzO zj$dYS5ON!>o_tFy{H)x)Q3Xm)#pKk)wtFzeJ#Hs34YsqEu5tByp$|{CBY{G8DsT z-=t1f#ob-yJ`~B`Rr#zi*_g7&+OfY8V)SQ2M%F>zWWj;wln0;41?w$uG58rlw>!MO zhA1|Ox7R8uEZ;tAW}@9y(LUxvRz{KN2m=XAo3dqwG1(lFj6O z{UoFCkm&GHy(1ekss!I?K}II-iq8DnS`*jsOw_#&>8Q>VeVu2kPA++LxF9a}s!rXa z!YKxgaoW`mlT^*7;h@J?TU50l?I;<+58Jfu9Lhwzs8_CByQhA}g*<0WwIg1bhI<-$ zQ;}cmi@O5Gi_M~Nf|OyiE>pF+_7oWRp5&W)A#uI)ec&z1AkA{~=Ov$!JY%upEDPcf zkq$5Nxf%B5*E(^r`l~Ig27`9kv@goHfqX^`XORPAtww#8?EU!ktIuE#;5N9a>7ggW%(E zWKWjz>y{Q-%2o^R(Ncb-)Q*+6lnw*4luPbZ&C)e9qK%2911xERrIa;DSW0(xpdM@7 zO@FWfKpU3kS5)vMTny%$^CkEe-08+Q)U})&{4Y_rz`K-Ax=t5iBb{_1oW9n)XTRXQ z|7>`n&6@N$Z2FO&I$x>hEYRts`*R zfAeqXJSTEWQ+eh&dVlmt1E5%pv|vGeXLPQQaR|9Ni1bB!d{q(wF=v&=YxHbJt%egh zBd0@0B7VE660;GE+0aeZ+ zfjBcFhjc_=uW{rknK`n??lm=9UnVtfVoG#mK}ZcP2L(^Q)#NdvM%5h$cT6D0FD+B# z50;^Z<85*t#1@0ZLXA>AH*E?&70|q5h1q52Y~Vr{=%J2xrv9OcN_7LGk_bCODhUg;Zuap_WwLS%>U| z3+UHdMh49O0{XaudMfBQFIt(W6 zcjj0}xEncyXzH2Mhm()i3uQkkO%LI`0!d>CzF z?Y27P7Ia`hIKvvwx}yKKnMz6Ca)eK6Fl?0)+mXf+6?=uwCOovjcdP?gAt@!IZD#1= z>m|c`l8Newi_q=2S}W$c)|SIP{iJOCtkpc@^_D&CtaDNp?+kd^dfe{y_>C{5$1U?? zJ^nNA<@NaU*G<&py)?;#$AmswL@77+4*3}m`LVZBAz3R3B>{y8dJxjYxVn;34LqMk`9_zT4>9+x+x>pa%M5ZTO(*Gk&(U9$aWS` z=xm+5OR4#h5B{DRwbwai5{7;Qpk-C<%Q*?pmEG8Z#CzmjE=k1Jy~iBbuIkm7UsJG& z{Y~1pLuu)~`t{P593hV!#EQY$Q)HZN;B~%Uy|+$D>SViW^H&@kLge0s*`hgW-k^(b zB)$Z8R$Y^dS9-c1c^(qrCqGZ5i8{+}IV3HH!;dGYQcJUsc{J+D6Fk#MAv-iU$#VJO zhkw`k3K5^r)7Q_4-ofuRc{@o7EXIt7ARh6XcOh#5Z}o$l!qT<`kaRtu(kSv%1mw%I(QzE@TO|Q zGsgEZU(|%k_&!D!^fF{|J+cT#>?-Uoh_KGL_XM%Pe~E~XQFWuxZ^l}71(k=BU|UUO zJ`tI`bjWCQim{FP$$SH=QBAr}jFYWBpFc)3(>888)8$Y_8+@TZg!94DXiZ@1)^T@> z0JxbrXyY?gu1{F&WKSMiR2uK+3VctaImT$1;Zn^r!3B}kb4b6POyOWe<;z^j2Mev8 zLvA1}FPH}50GyZbe&gDahJa<)sODPm3KAWa$TkQ_?5=*W(=~Gh;>4JcO9VETn`i8w zYWa`V5iW34E1V-EZq`u^V7=-0UG{g7AUAR*iJjO?U%b#+gEyaKQf_%yE3e6&1uwB6 zHmLNh!S1iA#Q3uYb;|sIan?Ym3*xf|I^8x3!u%qB=WVSbl#9LIzxidEikR^J7$&q; zcke@HnBi5ljqzRIP@(Whnyp5_gi9%Z4$KmCp}UyXcj(>FJ+v916XsT?wqEq5jwzE1TCuzKlciSnq#?`;+hq8$CKc9G#JKO>HIE1pc& zzA?}mzkl-~bt56Er9eOkDKbFMS1EPCU81Nw09wSuv!Z1GW<841PY#R|4h2w?+sys9 zVrJ6&CQhYYd4i)IS<3p zl<~DDo`M<;DdYq&1HhVFhQ zB#-BZLkWmS;0^PDT)J4XVsb(*!oJCkU7So9UlTLL)Z=?F>yyBgn^*t>Z5;Cn?|-YY zY1G#>6ipr(bxvGw!ZW6lzZU(4kxefRnQQRNU>mVcv_!0Hs1edf`*J%~@MkJ$JrYU9 zV3P&uBwyJp3?J1|1zNUV%=N@g&bfsXB%ah<^2a7Dnp1Ll-)w75JeoHRsh<(gC;Gbq z*(aHpKMDO8KW{Q&$)g&oK*DB-4B^As6)35o>l4=Cc*agdl-XCj=POI%S+D5Il6I63 zMxkw4@9LQaREH;Xp187P+cBQIHduA)6VA+q66q+gx2sNHgMPjlq3(dji?2G}Lb(w} zp3BMo+f7qwKyc$6At&#i0X1Ww+`$vqoK{+%q~baXM{7>TNntW0R)tu5KWCC?&bSwt znDZgGB=By!bPs70^vyZfW1g`^v(C|W>{8oO7h$C&Te zITH3jaHw#WI_K@~Le#Jr9BbS4w&0UilT?6O-iNm`+k#`mGHr!M<-dZkZ?I`d39oG} zN!w&G5vXl3`03Hfg`?U8%Gh(v;2G(`sx{`Y`mgxnxly*+r81Cckvsxu$EAELbW~@k zbn6i|l!nOBh&<^E@EUo)Rqbw&0Ssucn#?LAZ%kF3*|K`NFWkGb-ff+!i9SvvfRu>< zZaCVArM_4f(hLnVXb%^`Bo)gt+ML^GU+~UkpN0b)uu9#CoLLcy^t=A57(8&X*-7ZG z{;L>lRamUHgP?=Mx^2V|bNx;rTtB(`REx@AQvprji*#~p&nJx@TY^XFnH@c*B`2$1#3!kjRCc5gd0==S*=@MT0>Sl-)M2_)v4Sr++^b4<+(yN zB=EAEZv?j^{lv(WE$Jsxtu0Tq*y_9oZCLyW&?E05=?%?k2Q^dSL?){JkJh8cEjB?h zj9Z*P(R+(;{IfS&lLebjSVO9xv75K6}WVi}v&Zd3C?%!{9|J*n1#GJ9xODX<@NLVZ!e3(Q2VfR1o zyu+LARF=%a`3RlX(q8CJisCzsRBCiFXezaKbrI3bAt(mzU#9VX*z!faOO~{AAvsek zO88mWg_aiWvzlSSJ?^u5N9iso6Je1wbUu-Ih%UyAM*fu}x}hX;={G)?=H71DF<>_I zE}O8(mRx6(&DWSvuC+&aj?hG}MH>cl{w|ne@ID3Qm-_2qia~?L_`&DROtNZQ-3mQS z9&WbkT>swc1xTNng+2r|$HJsyyb|&%SDk^$w%dlsynE27+*&uU;Py3!+Z)+f1E)oP zcgU=NY5tNfKqAwATb|RQnX1(9i#Bc?7`c_mRK4Z`i6UH!{4sU42;)hZ1~qsjzBtJF!Y-uUomOJzm4|21RH** zc93UZ#D-_cG|q-MS-IGTf6RsV!iN6}+xIeU_&v;%!i&KP-Q2ZMy}S*-9H&0P51g<6 zTO00&hR)!GCdqdK@ldh8)MQ#8fi5A1HkLx=osJELllLbYSy<<>i15aMOAqz*LX;oj z-L?flz4TD>Ov|bpO*uU>(R@uf838Kc>x-;E;3TGkbB>olNoDBA4bdSvTod+Qz25T0 zOW6sR1EWGW{xVi158`6iXdb7A~%z+o(>hu-AwueBVts!82GZA@n0x>*xPF+ z#qnBpj9&_>zVzM;xvvo;q3kG>6-;IX+(*9*zD6p?B42D4JM7LOT-BZ?xarmwS1e*} z@xFsimcW40W=##5V>PySUWSIN5do^LHfrGx+G4Q5S|k?7GA?|8)Er%iQD%SY4=ATa zxG9F?xDA;OKsVi_{v8Ef;;HT{4X2;v`)&OX3UQAFYQEv^wi$T_+ zo4{{USo4`+(K#}EN%r3Kn!iR6AI0zLT$&vTbG=%)W(P;G{(!k((%W{=3Y?Bb%NM;j z+=1jZ0->~?HKxgWXwn5uK*vub=hGb6N)H%KHb4_XMibquPn{c7%gkD%Nwh0ynP@T_ z#AFcJ(sL^5AvJ~))Tj~e0uoVUnILIkYi^yWF{BFKXVFYg8#S!XIn)nQu5Ej0mucE7 zV@0ZgTSs! z|Ga-92UJX{^?7p8m(aVh1oO<`@i%EsdEEp+myXN!e)^^VkG*$+udAr~{*zv4S}rFb zYC(jcpsf`M2--r?6gUu|!Eh-S_GIOYSQ!rTUV@|hP#;B;~!VF4mZTH;i(L~ z-;v#@4(vfrQ@qdCl$oEbQ5fN9s$4<}!n;@C=rVjXXinr(iB-MF;OLUgg65G9O%X>{ z1c3W%cy!=(9A3{DS5Zn0z3!tKj6Mf0u?J{4hZa@+{{OvXa3qstoC-Vl&|d!Vnjw}D zE;m%Y;(&1>!3K{7lb`Qs1XC^sPc7TAC(FJ)+CS>y^cv zV$Lv%smqP{yP!_(##pcJu_M*5aV#I;t)Nmj3KW<%8_rf+luVIH}jZ{XcJ=#)Mq+!7Zc4F)d+ z-*|{57a(o9r^MUdsU#(CyRKTm}Qu&nx<)Ztt(B+gFY^EUZu;KnHLW*gP}G$*HFi z#j{P#q$J2NjULMNwr4j0eAofc4gtOTc%WUJcodEQ6fSy^E-9Ve3z{P?#X?{;B zb72}Pay(Kfr9$vQIAn1Dt!6vT7)h`sj!=@cuTWrLwnH7hE&Ulq zZPG+BEEG@oYD_IelS??%#})Gd&NzB3tsTmZaP-*<)}$%)u^#&5atNE#*mXEK*4Rkl z5!sKngo}IwG4Ws%R6f>GG}pOzAvm_v4g9*;eC~wzOz%1n#wE=5%*;14JiOr}+sh26 zm)9#Kyv&o1-J;l`3}{I=u$dk2#8&Kl-mZOY^X3NKSokLlvNLnfsGDxI;i%K#%47KM zi4jiQ&SrYCk?<`F(x_ynfBx2LXB2QA#k2nzKUJEz6;Lp+F|mRCl4{9imy&xKa}}Fu z`PzQOCvzaII1$EsYs7ey8eqJ?>>x?dvSDG#JSl*1{eV-``IQ>bpY)PkDX7gHoJj5C zD_x>(lWKl~b~`-e^kUfWVkwAmj0uR8J}%{m1IQFJS}mQJ#3hLKHIbPecAua}pg~B0 z9@}`iwZYD!h`k$9o*28rDlzQph>5Lj$_Se7N47y}@z|^4jL$%;h$)k0shFIub8m!e ze6Fpmo~t!k9PMkj)(v^Tt5y`-H2Wr=(~E5m-aN`ly6sP^QzhDjewu z(WtTDU_62U0ng=+9@KwY$~QVfR60xgKYR*JtChI3YNC$T1Z{*fL|Tx9@~U`F=qT(b zIcp_Sov8z=b8^(ZYwiSaHx?vr%`qk%INk%Lh=0Uz;XXZ&-bUW`>xhRKn+Z-!NzMyY ztpJp=>X|VBO>M5A`AguBPR4UPMfm0JU&C6Q$EQN@8P|wj)@?ncwbPJpe!KaV{OQt= zxA_R3nH~J)x%z?WR;~eg!=G&y(0UC%+3O@X2MhW0d)ZtOCv#hHz+q^+fdCy44jj8 z3s<9kQjamb47X3)YEq{QXi%IYz6W&Fi&1Io?#(Xe#gxriQTIOSkAB1?OI908dbzk? zd*l!5SEwKx#^NmI#X8Z57+!4UYq)bw_cBEJs>Mv~@9vgRuVV{3ObXR7J>knc;ZMkz9Wzfx$1lD)=S5KRFjj&~1zLWVrinyrKvQ;ihpfepLq zr7kbpGHt81*t2=Dy2lvK%w6=cq`CS~MCWe`@2iKon_6QGUZ~96zz%G;gw>&BpiN=@ zb}YTix9_}BM*c3EuK0L%Tmq&ZWCaC`W`e4kJ9?KKKg1+m3@Drg#q#7LV9>~)qi{Q9 z?O`LlyZJXN6tRa$1V3)kuzQa89yP(=^31S!e~1LY8Ok1VN4UdF&NDfhQ0AIwg=WG_=kcGfe!;H z9tCuD8qSO={HaMZc{foK>ki%OrFXZ-GbtAL)drbQku~DGe=r`1kchdsj=Zp%w?C^J zC2;`1VWB81@bION*0byjmPQj{rFg6PpIXSM^(l!THUMZg&fa18s%e^h_hHVf;TQH` zP4;LokFRA~5UPY#e5O-qlanbIx$M>(j!)DOSiX5VnYsX8#qV+te2rDDWSn{UOjwA< zL^jgr)_{C>GbI=8+)(30{}!ev@n%!?czdN7)DJ*>$TeP?%6whJlw81?24RM{s4r*UGuXx2`5MF zzlB(>1OwJ-8^LrlY0mROu_WW--w5+|#&V5WWOhiagdzMsx|?+KnC`D^&u;XY@208ISF(RJ)wjChpMGlG`0pLrySh zN-&WggN?I6H2Vk&bz_)vs)K^HtWWiuxxI0G%M(NGjpO6DMX9$1_I7p<;6_@}e)NRQ z?T@sdN~i_`Tt8jC@Ttx1r*eaxUMAYh&de@r7CyPT{pejf(*_9B2WyWR`#jdI;nDuk zur$`VO=CTL_`ieX*%VmM_fuCx-C-w6OYf*d0F2z$j!Ff)YQi zAhCIEwjC>JHrLWW)Sk_`r$y1z0-n}pzffbIU(ojpMfCk5eCcrTPJ4$R!U#g4geW60 zLL9-qd0~A=cBvR}omSl)+& ziWhk~y}DCLPnc(n!}#iZyP>?vzJ%ONcqt53q%+fZQBp>z<$H4T* zD<)w<)7*5O6K#i3KF>>b<-kdBR#EPE>}SR(&90W1Gg=zhFF^)i_7P%@^HHiyuGh{t>u^4J7(iPO}q1Kj()FfHCLFi zxtEp=zVq&CDhzi1=PL+-dse;iB)NvJ53a{|ZAIs}x{6hE{+6|rBS^{5T3J-d(Qj9G zp|QYc1wa*6~MMI(7Z8c7O+8Zm&j@gVC z=P)HRH7PCKwKMq%6X(X?5w=d`)aRWGmUT{)Gb}!FOU36}^;;@ly@lfPzA9Um!S39_ z+~E4fyR6GVw^sFa++{+s!@@?w1)cEq%Hwk{a>@OwT4N*vHJbGFj$$_9dgveYtcO5H ze_nSpnG#{mZqHF1wU4`3tAw0q>ougGs)yjB_zA*V(#u}7tyu}-1Z9VbNncXJ{w?8GIc$>K+m7sUvPvpjwR z=j^f4H6!_iYRMEG2X&(}WmVD|M>2NEH~BP9m2Zbf*+~-iHhm^9Hb>;fGShC>w^}J` zQK>gu9Pw(~vQCfNe$~|*s-(GccbO}4?l#5c%q`x)zx8<^)iab;mc!J$WYtv1r7m%9 zRlXxthRHyOiMrH^EA*HP^;@_=4+xvxvINBrZz#e zz8hl)e|#xX0smD^8Exxx#^y)j!XfaP`8hm>aJoE0trTlmAOUkeeenR{m|ad1s@d8$ zGu#<;ZnQQ}b)C>L>t>PVT2GemJF?8NW&&+vaOpEf7MD^8-u3~jYoUm+r=Fu$*O;2p zq|s4()ek+)hYhAoSB6WC0n{ru+n&yxy~9+WzdbarhfGXTjS?^@M)*kb3Rh^;<@U=a2L@= z!%^}_(3%kRzYdnCZxV^Mk^69|jL$oBBfC@@4NtO+2Ny1ff#RTbKqL)Zb zE=)cop3RqG=DV2=!BtwGa4}sWsHD{}OFsL}XnE2FMt*s6Bvm7HG+yGhGiE3aNAz$r zQxDr|U>@ZI8A?((20xfEEa9r6q(o|eVw~knY>JxPFd1d%siQpvW@3I)H2ttLp|3FHw)&XDEIzV?Ap^wjb6`BK=I@S)~nBL{2nEm-A0vah(Nt}`+ELAf)%|2u>(H}}6lyYhU ze{oy{9U^B$`AQiUj;1SaF`W9y4y7a~M~YMOigso5rEy9+b_zEhUuB?@_fi2UM=ihm z$3hL4C=Kvt^#W3<5GcN@VM?G9bt#D>1S+A)PSS_xySNd-HNsI|Q>W^%Lcci;Ko&*e zUM4izrbs1c0@m$@63G)L_mN7vn4=~lmE8L$<9Or#PyfqWQ7%%+sI5jSNl00mpa~xx zC&ntU)girT>n)q(L7g~e?Q}l4n6)dGDBBB9TpD_sXpr@UM_In{gds{2Mv*`BoLe)v zml;r*G$v28G=3f^I2O=t65C=S)AHZco)xc=(5zT zP00p{Ccv@kU;t~n3V+&G1r&~k_WQxLtVwKdaFa>QFHglkf!onbjdfMvg{LIjBoOt3 z0XILk5>|A!VrI0a!7Cubg$|<2pxf4?#l+hXF>Tli$wDxigid#jh!x~UFNU#SOWY!Y zCZ!PVMhC0Q3NEIBo}-lkm3bd@x$1$GkOj>G~tj=%!r zJbmKX!~Z!RI1yya^1#Qpn+N>7$Z-o@Is5Ifz--3?^sQJxTx2Y8I@_`KPCizlVu4}n zN8aDsnBupoa81enr|;5uny1JHBh*vMS#ICG-z$#AoC2h=@IM1ogH&cBr&A`x*q4UY2Y0B{ zh5piiqnTawGciaC&po5BX9GOAM2_4>B;@EatBEBJ8dG&CQa7-48?#jt$4NClc96{?L2 z_Pr2%53w1ww>GL+4pTVx&uxS?ci1&`#NZE)S>HNl>d5|8$VjQsRf!`8_Q7U?w#Vy7 zTTA~qDHIQQrwDM}$?*Ku4)AcyD55_+$oRpeV&p<6MlhCAXvKm|h+Nlcz&O&upim*W zQ#+F~wM65_t%^p57yzOe;@}^wxd!odX5dUijH#*pu#QGuzzm;MtXjyB5gI2r0HjsB zPh_j?U0Xq)DnCGU zdf48`v>#rM^|^`kQG&^0yvb1yn1~Bvs>_+ohLhHKQkTYt!yd1FlZ7I1t4OU7YR|kB zp$$)$e!n~QU6lgZEpT%UPnO~17ZLB#XuEntkJVGo+rS^q&wJm4j(Y0}8w=yUuVLtR zw+UMo9^$8$ZsdaazTnv|JloNECNJiOIzdK+XAS zyqj)}v6R!uKJX_spd*?>pQ10^Js)B>yDGdPJ{f7?SmBwYcay&LIxYmoult_9$t9uA z5Z0o?HI{50L7GgY)>u^Jn-(>QuxK4@P8Q;5FIF>DJ#6O7KzEzoVtzJ**#^p3*5Vm%Qy z%3$p(rvvoG+U%bd)aTFO*9U}dkKZv4KdF|U!QH9M%#G0F2%r!AT{HXAQ4!AZVsG_9 zYKi4iVjvZp-8D zF-k>))r~7d*aj(ZjVz-3V-uYX*)RvBmM|_m(ber(vKP`!epwH6 zWPibLH=jO~G}0iPw1w%5(~_#vvFC3CnVH!4KrCu&>C(b(0PHIheTb3#2j8vD~n z!83U@HrhOc+$=*vecA|#=waus`&xMq?DyLdrpcRG!ADD_;<-X>L4pm<{dOO!<&H-rQ!v94^*217ND%U#u~Ua&Mv6E+@$YF^1%O%L%! zZ+uL|ZmWi>F01$IT1V%U<>*jxBrzQ}RbowLSj2G2OT<8~W{yTftwvq0`AIfrTITgf zUgDQWUgCT1seOZfJ1DC*1Zsob-g^ZAb> zM|m3msXeF~zmVLT6!h%y;7NQCZlb`pVR3?C@mx79iq6qGlIO&Aev&NmyNU9xslJQG zPH}IzcW6Ak#dwZ<_p5Jj3}AlIm>PuU9?!Pqlae%Gp>6%<9RS=l$o7uO>3-1UPxeh? zP0(&0zQfEJjEw6{62PDxT8Y?DAn#B#i;j5?9!+tWcNXW|0R<>uUjvN({H#YjDMm{q zhF7qNQn?x6e)4Rf=Vr`_bxd{0|4irFy06gnYr{OcOTve9CsYs0Nq2Zv`)n6iodAYfL)r$?NvG`P7IT-~_1FGF+W$eTc=k9v1z}9x|EMW&UB&pgru8X+6t7Ec(4Y?3`&m z$UiJ9*uzel)|X(Cmf@W;i*B=&9W$-Jw3Hn)i@swi6EdyevXlv#MPIU%@tM|7TFUs$ zq6L;xm1(WBl&Z|4ODttvrd8Jqv<#2SEc&3O>;TRv%$DIDGK-G4l(CuC?^?>(%%b;L z%9y6BT877D7EQLd(M_{(3eGHglf8{3SEB<&O7Ka87 zN8CNkQ}8YwnKL`ztZq(t7!RDcKcNI4Cv)+KZ|TE5i4S%Kot62$J}oQzX;1oapu;}d z!ISR#@Q5T`3^%}q4+bYI(?wg!^lfE2I#CIizTvyA+hOMPh7uNfa6PQyB{3h}7v3JyWoWI}DW7GFj*=sfB;6CGlTM}7f zqU^;gd*E98WT(b_%VqIkk68o5Rwti&zp;L=F}E56Uh9MNC>&lABEK>0uSV&|ls<#J z21TT_?H7aeCbRh9MQgHHLe7$>lRPMoa;4B>*)PhBapZ~#J+6BCbu_jaR<#T(hG}y@ zlR*t=WTsiOqWGcmNIq!3?7jM=w`ueJ>4y8pg{K?tC*+7fp1`u6(Y~ONeQw>lbvxle zF)jPt<>TGQg@fmCxFFX{EZj;dRKK_tRPINq>}?5Z*?$YMusE2KA=P0DR<%LO!^}bcBy%ZfwACX>edhKWuF9{B>-MrZw_p2qVeI#PqE#+lBbTmEWC3_^z@ugiedke*;#HGaM%LX2?`41k7#pWg*7!$R` zcYUtgpOPSXn9Ri=*6V}32K2#AdLg&5%J1~)hO(a?)F;KGcBSN#$ApBvHngGZ@72oj zp)KVoD2Eh|U}ywk#fb3LRpkS3O?=cvGGSxx*GExUC0#2+fw@MX2A#kQn-dAlJM{6_ z{^Mhl1TZNuZ{f2E%pax8dve^M(w)&K^FySHq=-A=1W`T`aUm_Pq@+p95F-yUr*uh6 z1LLG6qr%3^-}Y1Uv{cs;JtFJHDq_+S;DzA3yvFl0KQ8^qN}bZS4*fj2ffbE1|BUFg zOBABDBv}|~@pv~`l3u+SyptU~a5Q2JYvck`@6`r70XO0Ok;ARmVL@XJkj~3nQb+wn z{mSuIbF2r0owm?G!8js2l!*J>tg}wT136=jk{SPBQNqz;kAgn;2OT3Rax_A`W%z)m zxA4z`qgsY*nx?c2PidYCP^1zb3Z_xB!<81h!;;<|C2^+O<$8aZ)G1_yp|9v!$2%6H z`mN2A!~7?P`NQm)uV@e0HAQv>rAu$s#$}G)QkgGUnUdxot}@|TS6FHBF+6x~2>!t4 zH#ya8_8~gc_^=AOoh;Ra&v_VSyT;|Gky--e8axTrbB_iUrtT99vbso;yDfxX$qX#- zK0=>Z+?e65e>b0=l7Jm*d!I65?v9;IFn_X81CvEQ=5xsIOA8$S<-k!wt@}YW z$OImNa;fYnS(B;E@yq)vwB^w6zRE9MAS@RsSo41&KhhlJwvmmjSszTWLQj&ilw*$2 z3KdLfaQTiu=Xs&t=*V{$19@tl_e)x zH{^+Q?6Yd$tRd>^^GPqWv0(kz%n=}Azy*bfpHk4vlG1KQ1R6PE^dCe%19~_CdMTg> z$8;t|QZWU*?@S@6V=?a7=-q~|Ii4HfBl%*ONw8Z}Op%p56wS-b(ZS>I^5 zwbF9WBPEtj)eJoFM!tD*IDZ`gP=3ytaKAA5BTCjXWU^p|E!`iU8DxvcNz$X0XIfa) z)Sv}xrxccg%5HX0xuR+`J9d=`7*mkHvAiw8=NYL80S%omLI*JOz^Fb0d&oxFLzW(b zJ>;G{p}eqR(rIg^WVMyJ-uty*O(f;Sd8Q1kyF#veQwpb4PvtD|NGKA$U|aQxGo3;2 z%oB@6TYCLnzO+Q8xNWb}(sRMIi~d=TU_?MREJZLbp|g9DX^eY0JCn{SPA?5xq|KaH zXSZj^+m)1;(*@^XE>SlE(e9jV=7y8BH2lR}eNP1WNVT2%N7e^zqVI>?!**eX^XyqW+lG*#}J#Kg~fgO+W1F97)HHEcK&pc{gV_}Zeq^K{+kaiM}eDx6;U7?0S+m!QD*_lP~ZaqPEsJvcZn^)!D%9% zv7Ot8rZ*(e^v5K}&7h`BSK~yElml0{w-hyG+4d5n$dPqvS~UOqOs^B4G&9ecY8<9?I*$ey=#C4Ea?9RX%+ z69N2s9STO)&wfW)Ex|ByYcj1-Ffmk|yoD?Un#LyB*W zhDN0kY4|=*!%soJVAO394M$0SKS5%wEa#+@gNb-eAoWM=m?&IXb{Wm<_00^09E32$ z7oAD(^KChKT;m)mRAz{D7@IvPS(JUf@xe8y7VQ}acnh%e^OU=GY{#{rZF^S8Qk$#x zmkZqV{C&!X$8RD^M2gla3ynHTgMdC``s1Eyg#~0x+H~C~n@zsOJ>y$tMxs+FCgH^0 zFSZb=n@_XSz1Bww+EoZXE%TDY-QMfnyPs4$ZV?ZePB2PAt#lkbzvQGN1Qk>bed-U{ z4tmQ)K55OZw)Q)B4oKN)$Q90=#hP6klDrnS`$3&)e;#x^-kQ-u=5BO75_1Ew%B zGRZi>cuCwb$A(R>(k;i^Jc;?=>CG6jXF2|!x&lITFe2|qJAK~f=D#k0k&wBAT+ zw^KMg8u5EX!sN}2YEFb_ZJMH|D<67A z%R8SdHK#~=toh4F3c)p?W&9DNu=%^jQZWixfnj|uXEcsgB}2WW=HLGYAGzq<)(M7l zRp$1G>nC%hu8Kl*XCe4Dx!iT|VhrHfF#4QYtyhb9Z7nA^v%Cj`qeF*nbA}7GRkcZw za7AZaYUFE(sq9@Z+-P}q-f+97pe8xnINFUX$vZDGozgjyQbnWR5Y`|hV{Yx;nz{wL zx!E&@-^g?*8^Or8?Wb|%!#3h+oSlE-l`NfbwVp>hig6iN^YT$kn1%rI_Wv(q@MZyCYG>{xIMRkX<$$rd*=3`M2M3^~NO5w5XTZ;0lay9`;{ zfB!!>Z(dR3j$`yV`PKs%m9(E77Q0kRZ=3q)Ma9^75D>Uc(h-)wdrUUv@I zg$H)5(3cf1&&B@um(f4EDKN#qOjX4SR29o5$KhfWkJz_RSt;75INPC#A^%{W_yVFP z`Ne3V;*Mt4j{r(D>uMd(thNG)I%j8%0k2Say%8=~Vn6ANntz0v) z1#P|Eg(FCNwQB03N7*NFo~1g5yoK*#OXXE%E+k{GAUK+fx38d)OHi|58V3gc)4t+W z$znbgf`{xiV%>J&E#=!%e(4weVePl54^lBR`A32DOkhYxLJJ zPl`axNL$vKa@MGc*c&yL>uJwE8N9MWtK4RAo`-t+TByhN1@I7nhXF#x2Kcg@y`Hbk z2AmjF0gErm#!|IqbNftz923d$Zsn*5ccd(hFD;w^#L1ALjr!{BY2(E2m4m)jkl&}&W~AnR4#kcd<03K?g4>vaIa8)a$k3|Kdo$VfK!~c2N3{l5H%-Rz=FwoccS^3HAJ+#TakWI}C*m2;q|&y~ zo9f+uVP)Xr7L7Z=RIrlC67K~20}_!B33CBNJ$r-(f$XPLb1$}|${WFD(8NUKX!5fV zERA?CpCbuYEf(1(sQZ>eJ&h#80H^^s)1;xQ*BpKh**7PVO?2WR0&MP!#)M9YUyuOZ zVW89Ua2sJCQx5j060i?W!S(|N_9`EbSHyOWQPfj)5`QPa>zZGy!?38UMmDPSRXwa! z8%~Q7wcfe6h!wWzHK_Hnk!oF?s5P6a)%84)E}u(w(jnnX=JvWO%wca?IPbZNrg3Iq z8yxE<__WP+(ucIuun%!T4TMYUOjP*Dn;ClPb_O|Wd@{3&t70YM%uZ2>88Y7kfk zPzUbMVKSs`1DuDZ5jqLh?fXY&*X$$t>?63GbO#McSf6PzGcW@y#WQXOCX&hLdw*6Sn|3QqUAPY?a7oK|qzZB0V84Eq0 z)63(zxq0H|=IY?{I`56EqQh~#!*TZZ`i{Akr5e-Ow}en_@a5gQf+*^ zLjf;+tFGEJA#2Ogq)D>Y2o$%?SKexEOA+iliRSKWpq*rr4$GCcwBOWMX>9URE9Qu0 zB^( z_W~7FsQR2#xxCkmalTxg1WCD;cGJ=xN-=d-pxb%kvTpjM$9jKF@D{^XH#}>uDoH=1 zG3BZs(Gje?na1jLI=3iA`m)59Q#Ne`x|YD(wtSMvCD@folc+7uC(#xn9k%SXZ9@@F zA`8z5ZBrk^TUG8?JJ+zQu~y~5oPVgSh(->5P!3a9Q2Hl2Yh>fV<}yRh;KH!b$EbG` z^HbPBMFoYaYHV(mGMEjYb&+>DY_*IWU0C0jH+%QzPmT{Y$^#raD`Yo-DPm01eKI<> zJ(wn##uC4&teVM3aUVM+-evBbrgygtpR-_E%kX=f_T--h2ek}OZyMJ!d}Pz^gKq`0 zh%mS(52N(3D-UDzP{jj>nkqV5=546Rv|h)fz*13H(-o?S3fi;l7tUM%87-%>%U1RH zDR!0cBE!IEt<2eKoNk!_wL8U$C4oqSd;@v+MvD>dn4WNKjm+liI+OS8eTEET#c zZPK4z=7^A?G96?0nC+~JMS=mGfy}|E%c5pW8~_0~2}w!{yab6Gu&TlB>Bl5>(UJlJgvOUBppW zSd@}%pZpdNntvD-+vBta?|@W2bOV1V8`%fkSZSadx2`!1WjgSA#|l>HGtF12yk8La z*)x?Y>K9;Q|GqakEsW@tk^{PfyoZv*1+U8O7-BO|zK7YK==y-4A7~`%8`NM**O!`N zmD0d5PmG$PR0(&k2^bu+HoMX8LzrJ{w*!nsffMOWM}a*M@&iAB0_6Wprmfx6VnhNU zZ)`RBub2S+2?A6|XCw)rl@C^hwK3KjQE8wb9~PpjUDxc3!_U9(UQ`|3@YM~myZ zDdDAjzzb?MLk%RB zS={8|AxFp6B=O0#(_d^V5$~jz%yl8BTtl?tbo@0?QldJNbTnX3+Bd>_yrXdvHPdD0 zzp*13qr08U3l+d!Epo;C3ioQKVDiZE;4b`HJd0mqdnwVDL@wM~NymfRXMzY?VUY?n zb%kxI_AF&i#e*AGvs3Zlo+70<9-P{75#(3`s#Hq;$(Nh$RHK>vPZg zjiE0%F=s8BX@y~HOx!+@f#Bcd;P+AgQjdN$6zk&-D&j}3DIxZ<5U~S8#LOFoPLe92 za>BTk!%flM)C$f1)bKLsM_H=f27#x*eWsN_kof;_E<}xY2>f|n7^xTV&>t?D!iswY z%Pc-86|#lW=zv zTz*+hz-&FJAa%klfTAKF%_g4ywcoBK1Tr8qFPj20E>lls?~8xh0(>oKiaJ3zsGrk6Wzv zb0buHWmIi?mMZMkHHC@~Pc5l;H&<_1LKQ_{%*%(|oC1 zwmuU6L!Y{eOwq{kbB4rf4OSEz>hMFPq^pyfvlCSpl}`jF64CIiOM3Ctqxzd z72*`%YJMSE1#pKn@P<4M!W^Z$xQSeG)E5z~&z8^FFQf2{%V6ul4=^f0`;T{PrW{b+gPV@AhE`D2P}8AC-0{nFjaaKIv@h^DGElVwS|2IeMxP)PGqr8Iz>N zS)XxEgtIz7+^Y>+u!Wvo^PX+)SvAn0tNv%5^9iEetsqy*+edUHhK9aB#CK?|Rhb(~ zDxC_zX3glM>4O}_aL#M>BF{-T*r>WNv#~p~>*&m`M_2P#!(Z)%ncp2CtBYF3qc{9 z%+fsL^?A~xTJY)2hjFjXNuGx8dX0twRFiEHY3JwiT1ps2rCn`Tg%fwyz=SXWjBW`)o4?lB@$?Viz^!6lrv5wgo}PwR7+$GFq~e7J-92epw#Yk|(8jp&fRA#)JW zkv@@*4{2@^43>;7bS}J$BXHWs$cWv4fr48GYAGKkj-V{(qjHJ1JY5-4nm z=8i8$Vu-G*HXaqNAob%B24#4K+Q}8p94t;}(aa%^;%PqFmyZpyU%+Fyx1F99v?y z`s5JKcM3AYyc<%P_>mKno?T7EhiKM{A{JYGi__x6jtuF)a+o5vH-xjrym(~Df?KP%^+rn(q*F-E zsR^VS^MrH9I_lER$Mj4OgP=#zhw{x>p)|>0*Trhq1)t%@7|GhX%BDs{iX6`*IczKN zA0aPTmBRn}^q8iegV{MrZQUGxq-9C91SabZn*zmplWn<`{AKI}6}atS$-1QO^1gIcEK1{hOJQ%+*LQ_1L}?AH&;$S$AtRXtvnUP2cgy zyE#2I_pnFBaZ}2$Ik_fi1{zc$TpWz~cYBjEY*V_qGeL#5>d^FLTP?@2i|?D0XJ9$T?DKd?USw@x_CEKw2yb>?K_ zHIyh|t{!xljBvzOeLeb1Mr*!Ekto&ZGk<)nMICLC1ikIqM5KjkkrV6dhBDSy@r4B> zlcoeK;xLvAuBRqqWw${t}{BcT#dT*`|+nXEHrLG+_TE zYV{oswN&{m3M+qo6#x@`xRDcKI8Dv(rb??T-(iu6bHC25cb-~#ih-O%q!1Lq0wfnE zlID-Gq;@9zB$w%@PaRiLp|M{vurI?S-zFRgDz+miYcSAfJGw+7e-4f-WS?XYN_}cA zt!#c@a5h*@!(-0E(D+Mp`{vmS*9RqAAAI>9Zm7Eb{al*cotG$IAM9{7oJ%Or;P_4I z#O${u-;cl&1-bBda!lJ()ou^LPq9UqAR8k=?KyfQvf3W+7W%A~ z(T+#cUJSDijeUN{ZhU!E#XmA5UtrPBI2Mrj8a~($7`rPv+XZ1W_}31J{a)ehk>O8B%RJolDrW>$P3H{=m$t zv)4!9uq6|QWd`>K$Bc2yXmJ6XCdNQ+%D!|)?)OZ^a=$qy_k@UlFA%|NX<07$?g!u! z)DSr$Zu>jcw7gt7c%Z+d_~%#29`vOm_9X62wuoOL^#oS59dq zi~P2EUx~~nV_P--7Rl@idohlMI{M`Lg{`w#YCrvgliJyo)OJOU@_l4Ln0T6x2TD?E zrvTsk4dc)Kz1)5lxvg6Nk(s~q?Jth1xDde|B3Vvwr(i9-E)v~e6J3)H_#5B;3x&54 zBPG0l$NFbrcPG3>kyv=UHz&OH913rrYa4|3AIvBH)e3Ln?1Zo!C#Ca9J>I9 z*n;q0PKcPK@V=e?0Fc){TNmE#EDH12+e9GQi;WDZEsO7U6if`LstQAMVIt{n0?dPX zD85U3stYyi#P?fGd^@FP;#-p3iEky2B)-3Ocd7W^uM`RJzx_)+vSk6Td!W1kFXfXb`B*H#YXO=R;LrX? zSph!Y>h6<{m~&+r0iIZTrDb;o&U${n9m#AlCDK+M=)Jk`(BNdNAev`Seb!0r_oO7Y zD`*n?ZDfcg_O5_RN^B4I(62%-%s`BYX<0MAK}W48*ZM*D_yy zFv=56Y|a3WqSN*qaCZ@i;aTwi>dYP~0# zZSO~Hk@RePNwXK%A4rI6?faG$*E?*3xc>Ukwn@uf7<7*v(8Y}JkM}<|ui8_*@%=>< zck@#2j)@{t{~B@Ajs#c7L^ArV+gR%eGI~85=i4KrzjortGJ5|Pie>ci`1fv2MxP5r zFQZGDqi$xYjJ}3yk}~?FOUugW3&`dt*()!XjPAH#D4JorE#F#ati?g6WnOnhvtOwl zIM?ycrL6R|E(?gUlIESWfLHGwU^+gl+%hMc*|obqj{H6EAC|2rX_Vwe=6f?rvJTf4 z5$EujT@4-tM82>jj}MRxJjW*c8~kG8Lh$l83J;3ClkxFGi={LEYAeC^+5o9X41KA6 zD7T~KVpmr*DR%Yc&5_;myz@Y2f70g0X`fP>GdzYCrt04&wo0qh5Z@?>-cT9l?|+0o zh<&3(de9m&`)$%rjzvO5VgJ_%$$dIMHv+RCgW&>V-R0hC7dJ4-ws(aN&ghYkZ-(T# zZv1G5i{1K~l^rAlJ|d;k)AX3W$2=KTXT`LWV57(;SUU>2$QQ!X1$q~qD1%7q3D+qd_29!^pfM3p z@PK?UsDc({dZ%UHb7Pvt4?=&U2sH?XdXLTn-&Cfg6(IAfFmqiMZ)XxP0X1e37GV1)0l0om;x*Lr zz!Y;Rip}0ijK*<$v7$ z2>;TwKe`_wmKF#WF<+T8RmdFEFMeYI$ALzu)WcBihDu$~)kR$JWOz$-s^#0uol0~k zJ2;&&PwHpm;TEKzOZ0#GTx0v5fm5)p@#&z<7q5Y6JWb0i`mB-9s7OMsu7Xf)4br;Q zZX0<80C0x{r-Ow>Q49zZ1ngn!#D?|=?eTCM5na(KmQT%*w&R`(GIY89#ZHoxePAUR zWL9N2g6T`wGi@KLWT0thFD^bc%!?m{6+){ZEV&t)Y(wa#?xuI@$h7c# z&)-?CT~H+4npe`;9s@DCSCp#P>EN8y7<F4FIwx2s*bw3|TM-}z+>0#d``gyl+mhb0R z!WL{#Ki~I_?e6C}+Ttzg=NjAiE$QdGi~2bsQA5S@Bv~Sf9-Yk$=M_@gB~JK5UeAB$ z!sMhc68A9UpB&Io8i%|TdYA*$3sBA;;W!8%+bjs7S_nRVq1Myh&CimVlW{U%P4Vu2 zKD+V#I+tF98{j0L&e$&mx4QIN(x-OvA)ZH&lyLp?r=V;kQLIbFicUWXT#n<)6bu!3S>?)1!#m3XifM{)vVKa%2DF(Bb)c77#h_*vJyirAEI zxTMU4G+DK+VxR$OMi2H9>6KXii#+_!1bdRp#d#0lTt#~X_tc(o(q)#E>H2uIKyoww+t zZ*s*}3C6@QGEA2C^}l+bB zh#fqi-|d%*mhK~kK4n1Lu84sO6oKohN_*%0DXh31W2>(3a2a!)>e9(BfKOf{rLlTg ztiwR@mCCouMBK;E7H;OQsb1`5)76?`VpA@R(QUySlr!IEt;=PXBEa|)Pwdo2`}hQq zN~As8KkmxfWyoDc(DF@DtUT@DETG&?j@u0Bg^>o9vE*#8`%rsHpY3>Ebi zqqPt`<}mJM+SWV9C3;T}-!>M9Q^;yf_KqJ>BhGV;@J*z})nSGHj2HmX0J|re#4OIY zz`{p`lCBQU=yBL%`U5O3CwO#+A?_bI7=b;6m|k@z8v`!Heg9cD!e}osYO$I^1G{P6 z_(P-=(N{k)s{VyPxNvJ~b3iV4w1fu;A^%y2LwYpumelN%jByj1J*m~ddX6eD(dxU* z7Qv6MmJ@$ct3S_nI_Wji>U2<~yS4akJvBv$%*SNDKm)6)?T4UJekR~IZH(D+> z3Tc#_V?p&Y=ZzwG*7Kq@O%&CM*gO&>cfxfzIlOS#cAmlcDhJ3E(FvcID3JW2?gtX9 z_3#v=sYfE@z>tp5PocQZ)e+~ofnta+6RrFj7#k>e`~*n6!@e9UM-mjzX#A(oy>)w_ zz7t&>LH+s%lk+5c-p{VgC?-j$S(T_{2%QR{^y&nwGpeZ!2$miyVdJMF`0mFT3hCo- z<)Tb?S~h&j(}&+`BYeVjzor;nM_;+~Uk>}qqtZ3SXZ7&7|J<{CKJc+gKdIDLJnkPp z6Oa3I&ae^L&#l2rypTTPgJvG|_6fCuZa`7zn0>3oAMLfACXB4{@1THh_~|od#f|SF zdygBlXF~{ofa~Uqiw2oMBym&qfw#8DDgXeQDT)cO~#x(cxreO#)DhPXmY+gt`4j7g}6?`=^n*0 zci#=tpEaL$flH5}*Ak*^#JKNgH~l92#$g*&*<&)0e7Fjss)`ZmBf-GEK zxjnLQza-TAQAVMJP0(nh@%4xk$l*Mp+WI@x0bBSseSaG;GBQsR->gjME#ezVBRg$H zKbfjI4&@zUS;Ebtrfu86FqV4(xnmt!YkeP17BpZ?o`Qh>+qQcnT4urfyPW2EUv~x3 zhG$4j@P1vYp2*xleN0s21@WQBLW!JiJ>ka&#kZQ%gF-~5(AgmR$GayXnS#?UFuAJZ zufL+dx+Ja=67@JfcSid*Ubq)GyU&_!k=Vkm0Uyn}&9Ok(rz%KhuiIR304Piwn1M5o zb2Q%ZEi}WM3O=}(v0tydgNeM_AL5YSzD&)7F}^3HJFebfCac_C7eRSup6-X7Q1h_) z+B<08^NUj zg{Sxn^8jbz9BhuNZP1E=tXyP5oT^Q-CW5p-(&RS;GHq{2X4F|N*j1~L{!yCPiaE4W zCSY+?RzY=XS8+mpM^~Vpg=VRS=WI zubvnM9qxnUssn93M(tpi`tW*vn0604VaZ87v5f>8Yu4pqw0C&W<71<0G58F;X;cxB zSQhnSY-N9ovN_7)mfw04^bFyhAHjn$lJK%>pHDF^D<1;354I!wU@)eHMnxnjrBN|2rfa#7T4EY~ zw*M;Dw=H<|+P=6T55mdK;YdINv6dewo)=U(LEKuCAD z26S-osl`geK{sn%A$SFgxPzdz22^*8RF>Xrs@#NltEZ~hQbYOKF&k2$l$Mu((1t&x zsUnNOg{NUABK|%>)sXWw!uaFoCg#t+m|6x!)=K#IP$UE!#zQ!PG9`2A!$|Z~>8UWQ zltT@qPR*quTWDp2_%%rV;nZ5d&gp8|2SrUF?fw2xPZe9U_U8C{ktQ@!=Vq;=d|zHa z4K%{!$P2hMS!!K%%W!40+)p5j*DY4jJ$UOHwc5)S%E<)h2yA}iIgNfpne0BpOA&F2~u*!1E)n~5Pp{>e?JK8(5*&)453c z3XZ^|MKtgytgTSTDiN1}e8VOuL#p2<<*E#$5^^UR|0cdi764XKj2MZ4y%;qbf&j$) zO#5M|gT-lU>n3_3uva7m*!;HvUtRwKt#O92*IE#GL6fNTzfBOlkKLP?seY-qh^f8{ zsbZ%3{CT`Zg5X@-hGM1~d%+fzvB)VUR|xKg6Wg;dES&d3Mbnu27tVG9gAY5SVo?3| zp@_GJig|0a^9IIxuep?X3us&6ts$F8IPO|;zL??P=Porzr5nAlN^w^^!2ch2MQSGZ zVYnnxGp~m@F?Vg$Tcl=w0|&+2^@9s}ldFxXnfHHmo49KyDl5xfKOq>&wsO~Z&A%7U z`Zu}jEXGACcS*`3kjxef(>8Etp{7N~N;J_piBbq0+c@nv2%7scC1P?kQCSOh`#Cul zF7O{^-WarAWDIwta;&YZ@#1X1NSy8GYSG+=%_T%y%R_8Y|G|w12mCdivUk!Y*^7;h2tZMr0-FB|E}9`i6R^3hz5TI!!QP|o!E>$$`jQks_Z<)n@xCz2=o5V7nNH`r6Z5xYj+>jFU zx)v-<*Zu=h>`UlqsR&v-52SOjIX0ugIxZa;;%A`iJi^8BSKnxyk07+E5{ z(@Dw-XQqLnslw@aAE87}NEmIP?%+OAUYj5$GCUd_;K=XC(Oj@v#yDDvI9NidrR){5 zp}I!YdXD!c8iB`9L{k~uwJVwkcjr=1a;d|ERirTCK(10`T6Xv{R@KZv@>u|z2pAeN zf_CFG((OuH^4kGmmN1o@g0gXTkfoUEC#c$HS3w^|`iX8VLy4LbxVd}?{8|UR3ys(X zgkEwib&KK5^ij0kE zC((PRW4-G2U3334bPe+ucT*ZUu266KoE9vDZn?C&zH*qm=)ClWk1aAg7cQCC?sG+6 zyP@;c)f+@Z9m$nbNbWA3tjCFGru{6g(@(z5e(3BcC+ef@)nUrI)u1-5@3P%jG-LN% zbpq1qE`AOW_Y5J9wkSl2tZxk~aFLcG5C^6qzJt0H3{y;e4x8b? zR5+sqpqWt>O?!loqp-=KfGY;~UBP4%@#*pM(JNNWP~zAXEOW${0ArcTRE?|acw4K) zh;(1-wgI;+k#wu5nJ1L1-8Q&1@#W$-g6R|uR>pN9+gUfRt|DybVKdFw$^5T;(2?`@15jZ zhw&Lp{^PXGvQNqO*_qdmlOx}zf5jqHm6K#+6Gf&62|6=YUU$SR98tJNCHS1g?&+0$ zvGOnU4g7-5gs@$so8K1Ep{UI5l+l2!82B6ok}YC;$8GUoe!iqv7-&~z`UNI6c&w{z zx6la-?^*!HQ@Co)9u8>Q@R*_9p<>6J?KQ*YC$4lOndaFx@|Etf1;qnE4EwM`P0%NY<$MfdE=e0C*$ zwBk5wsIM%m|cX zoXB=)?ylW!xAeB1>}PQWKZ+=$Z+M{Ge_`gJ)tB$SVyE*^g0GL1;7M1R609cE^igc5 zwr2EM?R^F+gn=I}%&I9($6|f)0Bm0RdS>eG9f>sq=t!*H0B;c+ zDjjZ2ajo_Bd80+X!w5r!1EJw#E!_x;1C^97v`wW7jcsC4-OFR4?NFcKo1Mv>%+_gr zM_Uc)(#0zm8qHj_qn%#mxZq;J_t~V+*jps8F??IK6QLx z)4R9h4FZfg25Rv|`pLER3Vv>JpZ%QOm$#>Uo8DjOGe>mz{^6$-;H0Ci+j1GOj~cMW zDxPoavE0h|bbYQj*M>?aRsRyP+MgTT9S;vT(Y^QzX?cCk5%Hxgi27-3n|!&~N97Ko zoX<})3>T3Xj~LcR9$+DO>WhqO8$?JdrW}!T1L3W61qxb=SNv+7`;UVo^{byAowpJK z$As?C`<6bNWmw0}daAyoW2y+&VI5*2USHIBy8TO1W9wSxt*dCNLI(~U0J?>?E*fY0 zxx*fz^HNeMtyc54j2~a+@Pth3wDF`t`@ASc;LDt)b5FBf&uyXL?8b2&$0Tj8H3-z zgNY9dzq$nZK)>k2?THU{M`i58uk_*i#D}F!+04YNhuu{G2R_GlSK^Tc*_@ElVp-7 z>NPF2g-G`?F2DO%Jhgj@r?5Hf(EgPyXuFU3Ehw+48 zQgbGDG6pPUNP_3?0Bcth#|akA*FgbO(ka&0YP@7T;78!OTqT5(1~B-vXYWppF%2*c z08Jk4!8efcsDYSS)l7a|+~kswz`w%qXRXb0U{MP5OzUjGF=DWew9M-t)ii;NE6Dbr z2SMw_fsGF-zR7D^p00FtS0Y*5Aar9f)T*XPZEf+W3y`;l^`#p;+MO*)4Bii~@3Q01nubeYjq-ABl+DgkZK2ECuAjVn%P)3scNv`gZGzxc;-y!rkR01W2REyX^EJ6PW%D{` zjy?XaZLWh_VE_ujg+H?@_0>;u+CFa2>IwWaEWajQ?QKo2q{F~k0$w1!KhP?mXsg+RoP6w2~43dLHzO$sG$RzjhO+Oa~pN3~^dYMFPFDU_SMLYYb? z2`4yBkI=eCKONR;-X`(pN9g|=g`yTj3gv4^_ezfF3<`OAQsu7^O;~SZb(Hp3(l66pE#W3Z+k}C=_Et z>!N}^tItXm%0-_nqfo3U3MFO|DHLznS~8+SaoJ;qBKC+B%5uc|5^$X(W+|&srhJ|; zU&ppGK1B=p_6r$(qYg}pjWuM$i?Xs;`dvF7-GmmZ;zz+Zh>E;PXKG#cy1g)Ky@3O%o30|m@DW*l z^<`}xj>8tjpUZVVD#1EyNa=nlb%AL<*?0M)F~_+qd_KzX)UFStzYn8wihPlWQ|c_` z+_L*S8fOxDeqhG;ZdPY#mnKYk8?01z zm*&UtEjAuaUi~hO9^EdD>ablJJ=iXd9>!|uY-yLqh4a=e2Y#2vAeP&uaTRR)F3kgn zj&p2DVG-1*%qGoEBCj4@*g0D^~-lcKv ze3v#{^19-RiB4*J#)Vl#jF?rA{TZjho*X>T_GrS)ZgVEgtXXDJ{%D6LMCUM}$xW+heS z)|ik^@6ZhIi&lzvXcE}fQ>1u@#G_ozj$;SNoY3~xm{G=JKAdv<7U3wLM? zRM8I2R{`}uVuxlTw|QD0EiJP{(^#W^(hkkvZ}iy`J2Z(d_!6I^9hwir8}Tf|hD5x# zEfaq3uDvvE%H3v$YRs{`{r(w*@A|+0XJ4t!{<`x`Bu*q(XtP%+;%AgQG}sH^Rt)y4 z#ZMOA&>tG?uQP+aO4G=ld?VWuS%0xKX|Q)63c>ZTT+(1~0L);2KF3S)ZFX&!ia=dD=mt=*a2iD`@d1gAt?z20I!866=L zcUVs$SgR=$wXnQ%_FZSMe|t&Bd$4zuW~3220nj27jr_5b>aRWK3Ap^3i8T3`F2QfvLq^a6+& zRIUC!vA5A$A2QrKVNHtm!!FQWC;-#QKl$IZ)>o-Dua&j_PGWt5SFdq%5C3A!*+y&q z$~ltwD=D*m*7{b;`6_8fw+Is%S7K}Z)K4L|oK>&XI{BJg>#H!S{oiPZW)t;f@{5FCWk->S8KvHAiT zAXyn|H=9?FKF{f`a<=-Kue%9NQrG`-No1>+Ufp&6Ru%Q|-z9cX)ThW;@2vD83WebJ zp@J=Es_$wrP1z~IwKY@yIUj;r+iR*{d!7nyucDTg*OQ~SuK`lsh6w8`=!)1q9wH{R(|3XR5zZAIh2P+x4NGss59EC^gleq0|yn{n1J-G1VWS)NMA^ z-}Qkq+Qj(xzuZ*6kX-{OlmEAw>eqfEg?Xq=VpIKA1^Gc6ydZxywtC4eGMtCxLKr;tvm^6S~t2?&jwd3gGl6c+C|A2>$$*5?!)j9?D_3WZOfB1M2vT~QDeGjj($m5G&EORbpRoBJsZ1R%*kts_%r zrN=v7u`k^{3goj**=fIv&|&+i5ePj%=mtU;rTFC%c}|zIOW+(5}QRe{cn(;#L)m`F#2_vyhrIB4Ua=F8zLLnO4 z&cG_d`*Jn-JpkcnK_?t?Pkq-9-G8HGE#6PD!tgU9zNx>FeboeuKGBf-OMS=D^wT|q zuMhUzMS&zX2C}c#w+vPu-*R7VecNMK4$6H;)pX6w)wDj;RHg6~^LAFK319VYRE>@J ztU~s2WzEmR13SCiQFIr+`L&IA7Mi!NvRT&#(2}|;<^AwO1dALa4;956v|S-uYDC+C z(k8ElLJ^*KOe0hF)f@|s>E_oiw}WGJoan*eOd_Hh$r z-x1kIu39!YrqhC)+&k5RX@vIubVMq6APik0mSAkby7_AT&^DH`+_^2f_ZZ#H1!u@m zrl~Ny#WNvd@TFDWO+sO(AtbJk4Gf%S;2jIF(Kbx`OU@htA&I5${kkJxY4+^akJ(m8v*?cmHsjR;jgP%FkXrGtXhPj zLf6$ydw=nDH9y9kR#(@mk907N6G;ZphR<}G#TF#3qRo7%e4u@l4<7@*gfYDA*CJWY zY`*P{W1{P59;IG?9nD#)AiR#oNi6NPG;3Q8I#qM3iGj%fxz(B5j~d_7KQz9l{U}`< z;m=-G(|j^2Gi@jY_b?vAGdAg71xd-Rf8z#*w=_Iu(R7)s76K@*XG2)|cs zazO(Tb7JZk?IKz$E97<=6-_sxLu!o5ip*1VBDNnUgQ%YTtKkDiintXH!4&miut%yt zihC=c`dUjZ6!kje-|ej{VfFui^wteu-^Sj$^|k7)KRH0fV@pzxeaqu5qLw!GIsMIj zFqvZCgZQeIr6y7e!Q)F}org|W4{IDEusE$YMuJ}glc~#Aqssn?>zF&kqU%$5dZ<8( zL-OqiXPHf2vkp@_Z&Zp&@>xC-te85MB&FO}kl0fhq@Eq=18s471Ah`F@H(gSGY&5A z2zAf;1gJ+i7L7DGdx#wTDdTW`&i(kH`aKUK36-pOc{9X!B;Dga4b?k@663PkjZ4AR zY)U{)G~eWKDhKR802V=rWoN7j9ZZ8&2)+%V$P1*##;3KBn9{03F%Yx25qBUaVnNBNC#f;#lQoo5IawmNXNK#BbS0(UT0g} zL?TJZIbxo`j)N1|W;b&JD)r%eDL$<;?oWhAqsMwcN36u3=APmX^`=G$NUIlgM-B9O z2vDwpaSUHF5o^t54#ME85UC<~ayMEe9|Hp_ncQD4fboNttq%>X;9@ZkXe>D3x+fvi z+U~|dh?GYZOnsEDB$pKvx4Ls>T3sh8kYeI!}Sv0GGF8)r0Vo6vRsJAr9 zbdqrw_pZqnz(YQ$<`o)=MfAr+W)0)cGkr17R!0aL4o{JeLJB!LOB&ll-BrvvPoZgVax;C;; zL%#l%e{CLoBg}^=^Xo3h4lll-5nom+m$_90Lxo8@4u2evDseUU+r)Q$@J5(_#oFi25VA!kJa|IsO-hy5;lCiS@bqYThR@D8Zw##+K}? z)g5zk_lPam!{t-RnzWB`RX35qfV*VA+8s-EVEMLKj)+r_q#d49XGAgL8?6D$L2w!J zZ7D9FOJ?R8I?dr)k*J{>-b?wUsTz&9 zn3#_fZ!lzvFvR@30Xw+&@wxl5t%eSlmOj-NAjWTr)b| zuKerD+`b!8N34*$(NMX&1`plh_Z~au??$q_k5S#wRXSpFW)w`^zu;9dxvh(TeIoWZ zyXo-6FzEu4@SDzOA3q}dT&Cj-jxcm-T~%Z@99!RUC><-+m11HHGT6`zj;Dr-p&19W;q_*{B zS?!sbr;lCwntEd#`}Wt5??uEyFvLU6FGU&=O{_AAw`Noj!k=RA(6ggb7 z1=^KlPYg2Rf<2r<{%y>vc{X+> zzW`!6s|(QPS|8X`0*FX*Bq&rzKA&3gc}}@I-5@(%v#eFg2?bT59X*7my@rR7Q1?2y zPekt6wQ>OfY2BkTg1wK*$-H>8X&4w;qu*7_Bk@p6>H828=2%IN?qytip_g7A1ch=- zr?V-_E)H0iQL$=s4)NqMG_vM8)Uc8sxlU`i4vnOKD>N^G0S#H=Jr0DM5UNJAGIIEM zS6z%Y)8PZgLW3b6GocX;c*URQ19Y!kAd#t~iGq?{NXJOx2x5FUU-dQ|u^iqN@B~ZtITh9y!7LB3O5~ZvGZJ96z_s znMYDRt^(MTxlp=S`ClUn)|-x%3&K~Lui(JQygA1g=PYQ5sTHWhK?F@5hhk47vR))Q z7@@4bjf(ZsEb0+Wk`{H*fv&b?0EI&^JJO;mKlfHs6JGE3PFnB!y6d`D?Wz{_Ust$F zon4Ds;ORJowWv3{3i&IxsAoaJH%^PXqo)%QF4^6ZwKPfe?quxr`K`662WHT7cr0l- zvuSI%P0Lj=T+|+DYewK8NUFAm`?ffwtzoeZ$+D9>~2DZOu1ukA#}M>)M)b zC~5|6&4r4OPPH}XgGLv$H7h7?)eG-%TT`cMl|Wmw0(*=?+M1VT>yh-sc3+dWt-0Uy zwl!n-_O~_5TJ5&xnALV$^BO`u6KRXgBdzu(v_?n!mN54Pb-QL7%r*x-D73E}v}HD| z*@ae^a#(Y)%TAq2SmwPl9liClM{nrL>(N`G#jb1g)-P_2-gG>S-U_W!vAp{Ywd=Z( zvFuu#Ln!r>1${5HpfY|7l|XQmy3xw{HfE?gC)eZBHm&! zJQ;9&7Iux-|ir3$!;E zj|^>ZN)fEbHgo=+w!PW^Q*V3oMQ>7lqc6qQ(Ze>Rf#`h<2Q^`aUv+rH69Xh4uH}_OolrY}=`$ zsVylB`f+GMf0cIX4=|C=$@Rpv{f{fLN|>w$50$HETroxc00BA3cH!RHBsYL43yijNuV9w#x@Kp@U>}Q?EH5|O|Y8EHN|Ml(7ek@3ydV#slqn; zChQQ#sc^QrHwM3+@uJ|TF_kf%Kk~DR2>b(*B~F=GiX3p0PVju z+lhn~xLcGno}oR1=^Am2`1J&4HnsS!_NWRMG3ilNk=2cMjjDFFM^!o=MpZ>962^*L z)%Ia>FDfFZO3!ME?8dZh^c|^dIgh;jtAKNP^HUVl_oZ`g z$hij2sz^LtVnhomaSZ#%I-&(LotC;&S}+dzqk${(X+Z}1`xZ9(Y0eb@)~ac-(pbHi zw&q%Dg{Sbnm&uqcC8k-KM9gW&I|asQ6v+#QnrVk!q}_NW!Sq`qM&CaJPY0y8G>Pz1-s%TZ1q z)^d~%(aTZlt_N43KTY7^Ed1<`n<8hoH8L7-YvZnbQCMIQi{9Ld5|9Wqrk+zV4oI5IXa&q@iwuS3H)B6h&8BVyO?nx~t_ z!BOKAy3ZOJE5W|9+>yqJJiON7wSFY_c;LMd@3Zla^JzyF;e7yZhQ|HSYBK|dbX+7V zRVF*wTCTE+bjJ~%rqT+&0l~Vd{Oo;KQz^B**g4uIYqjdk<=&QZhwc3>9WgQ4 zk=Ay5b{#f?797I!BOPjO&%_|^H`3Z}wvjrZ?+3uhzO}vRKmcmh$O+S2k#%c38WcL$ z+Ftp)@YeQnFoWSI8#jCNhtm=cl+7P}Ptw|sljL34+Fo`*My+l0M=S=~RlC*CzO{W1 zG`h64y$U_Ru4!#QQuSD%wH;xLRwY}{P zyS3c`p{li2d-YA;s<#S_CmK+UsIcaNp-04or(&^6x8`2Rc0qe2?Xz ze7)xK#8+$2xwt_h$KLbCjttVPK13r-%ig=IzUbYoLSYxiLsm2M=hVz>kFX?dPSz+l zvQ}aI0abL)U~VR6wrc=sHOQc&nlN9Z z{c|#*vZ5_FziK8*!%qwFo5Sx9j$;5EjPpc7tdwc(9e8-at?!tkz=wK#Xa*tlz9}4Z zhIxjqoQ^7*TaQMBW;|iP{bB~-djsOn#d9N`XX7*eO8Aa&Ih53VRBldbrNwz+Ub4wx;=2~Vf5@Kgu%xL zoEJ2bHO1D`jX%L@=_=m5ObG1#-(1WD=}{nEPiN6}00V!Kdi(y315ktD`m8@uf0eHU zVo(o^4`)~f_dWA%9^VEo#u55=&Sehu3h#P#1`hrcaOD>iQ?bLA6gdg2+MDLBT$WV& zO1_=sgZV}(G7!kgDZprNQ8^B&DkhFcw%7KOvzM(spQqm9iy3{#j1GjB;-)32y~?zk zWLiEOb2YGsfjFzzMs8b_WkhDP`h{WOSPsTIBaJW6e;akw8Apv(x|ANNK4?I!n}%^! zrLY$t^)sKSAGn_o^$KyJnMroAA}$d@J$Jm z`BahJPEBmK-b+N*_EoZK0WvrkrBn1ZL_b@g9j0}*3-zq+$Due6P|8`6K0FYmR=sjIY{<2okjk#e`YLqsOsY$w!6S2K53#;7)Zh=+KF*qO+Xm*WjNqG! zYmel1Mwk?|zf~S;4wW-c#kK`eSz+ds_eCtFt|A(06%M20|LVy5*o*K!wvXc}5orMt z3PPDd!hB`~7-p9uUMtG)QM8kO&%w`2_$H)1-=YA)SWMDr0Gbw={i`F`N3DqPn3EM* zTFC35JLm$wpW6h{-_uYXfnS!FaH|Q9qT&1(DiA1}0Y3w1sv3){2T!e7l2hJiYQsVrF|8mc}hiV84j7D_@1vx0TB zC7y(NiF&o_n!l)5#V7XV;C1yK6p})4K_5PYg?&nFDt6coP8d8*nEBJNQ=G_{c8nj; z*;(CRsx@wNw^;fDZXaAMeWYG3mR?t{7EAN_s*WIDw}d@X|Je}sBHE(5OJ!GjUtvDR z9&>E0`rJCsLO%j=_KHOmhr^X*RNstp+(M4K66QPjHI~#^!7nYsYqL69sbfBcAYHD9 z7Q4&EW@D!GP3B)9&^;^2;AjQjq>rPXiZ{ibYEVe^0b_!GhoPU}Pfg%U${2Jvr@HDZ zQc+ci-TQy8J5g2FR-(C(d=lmx42{?71Yb+26!9Qg=+_(+>MYUz!TLCQ(+t6fP3{!* zljIc){ocO5{Es>?8Xr^SXJt>tN)D=D>Ii=T;L*ZL74G)H>&Fav8WOe0uv=_pN{c+8 zoBai<%hfd9nxfD3mU$yn8WK`ju{`Ix$vU!!#j}@IZHBzj`O`_oA9yUX97CTPjBvog zj>{;g+7{tS?%L!P&v8kIYODcD*bpmu9Tb`oMVbA9eKFjr(Yd?wRn9tk&E@f73^vcj zPjwYM+65%F8GW;8zvfo8mN0i=XuMV@$QRH=QqwW>it0e!Z=_6$SF}Eb;MGIq(mM;c zp^=vS=|8EKQK^d$)J@%>##H613Uv^u=^`zy`n^U?)U!Ab=d}m2B2szF1%*cGi$IV! z9enhbAlRr7Y#>Tlz>5nFAUGQc&X(b#_#9W6eb+l2K})&HG25WaRLHjdIGh91Az|t^ zKS%H^Qc%ke7<-4I@v8m;q^R34+3Rl+9)yY*_oP=e=S(@*kz&8Yx|JQfv}$ibz-M8l zDxbkBANfH(<0YS3ra*T4NOmH(<+3YJP+MJt83^l2WE;rY01M+RBQgtE&H||!Zwi(S zhmYfW!LlAij}W4fI$&AH3%Fu(VX4CM;XD_Xt9L>^aA22{f74O!S_59@ zKq23xS9QiFZdG~@^~lnyKTAG7Lm2s!NNSuyNq&&!SqzQWG6gdF3x8B=`Wj9pK>^Vz zm%gN~RgB0g&N!4~m}{A2Rl{WLzqa1c0l987=haE7DQ4g$GR!3#W^u zo1U3HQ%>((h0jPpChh$H?v@=H_fPN4jY2}YMMN739Z8rkfl#v9m|_r2GQAUL{%R7y zhs-W0Q8I#GE3Q4$JHrqrBI<9I2mJ?OZJ>&63!+2QJIV+W=JJEVaI)#0@=cRZ?_3BF zGM?VK2P*LEPw#98E;=~9^X>k6dPm_V!-2aIX3HAgA!~YvD(#ux`6p{^XL@IbdbMhr zsp?f!9g24uUPGsMMjdKXu_td1bHb8L?`)44A=5j3y)3O~wW}U?o?U@!F$ul#fx=nh%O{MOa^CB3HJs7+Z%{ z8a-y5&Z5Kl&G(~vti}6GnahwaJr*n~*5;IPwOm3MsL%{-o@O&I-O`!?7KB+o93(M|VUP%I(&GkR1VzTI}4q+}nKj+)I*`0cjux7b{@ z4)O7Io|k~HJa;=O9i1%VqA1$u_D!Yp5AYz~gYb+tn2X}=t-S=&fd5SPJzL~tl&``X0Oq(`x^cR1Ju>cZWz3HJdam7kfwh}mEN`gntMZJ% z7^R5N(weVyH7ktfQfC}8GpPVBpGSOmg$kP3-X{4v!#VoLfz|M z*qUnWGSV2Ihe-Sabs1I=HBKzVJMsmMs~!iaQng`LL7_=ZhJain zSioN~7)_XoGuR2BB9mFs)k`?`ktlHa$mRS`@Wu$u_5}l+b>(h}qd;SHp?NJ+VDFvL zy~b64+4&jeTCTsJR`r?dDKD4mvH ztQ-tMUlyCAvu3EAmu|UXpkQqj+}VznY(1#AkIWYJ-d6dC_0X_;^7EnoF)r+&puMQ? zOqR2f^(|H*1G#37`U%2IDC!?&4MVp8L)M-q*b8f?NPCS{&sh6HSOqDUkN9=G2zLwh zguAIsf~k=`oCrJK&Mj(IiwbjE4)*PxQ7|1IxP=M(Dj*m$tVi$lIu{-yU`>dK-3l-) zP&vmAIy%o|YQ#GyoT0U8|f8;nRjx<`WSZ9)TtQ=v>6v$N6 zgU`yB#(BK(DU{3GEFoL2#?^MLHn4y{mYMnhZYX>Hk-6q5JCppPSuFsSK<|rLpM(pe~4Jy@#Z&xMOfGphecd!ZyaG}2T~*NLva=_up@+9(<8)Q#l3dHMrMd^hLc56`hy zpI2|83@IRr1j|xlhJZv!EUp$gfB?!d1JM}Kh$?X96i~$y`6SF~!m#cUWyi0F z&dHuEiogi?E^i5zKvnV)lUglT@1O(@Z+5~I7;N%8g+Y;vg42g|VSwR>lu|Hw$|o7S zC&O4qio zSM8v%cCt*)U{V|UR7!Ud#*$d%R=m6niU#_p%82!e@XCfhb`AX0LvlMDHr}I?-prn4 z{+z0-K#s$g9_4$*BnwcwhLj@9vBJyY?d5;w!ywV*W@b00=~nie+=M3V33_l+h~KHO;%YEZ4|E(~ z9#U^L;4|dO#;z1iTzn#NlHsEW{ya7+o<1n{5�|O0`iw4eJJyz=#uH8*{ZLQ+okv zIcMxY{!GI@4W<0kE?(Fri5vDq9t?r~LQu3}XA)5AK}rtnXNJID6014~k%va6RUGYN zs6;a)A3o4G@v@fQ#j~k7RQjC1*OQjx> zsB*LMTLAGC;(0DUvt2_EATD@FKSboPz>x#1Eu&(MYm#Ewgf*2FYxE4oSN04=2Q1@I z0s>)-ui{B}YV=TE^p_cr9alH$!gA$WdOj2|XHai}5m~_9S7vO7`&cRpJH#J|K`_l&9gQe^`TK9d*vkz=? zSe=}}jffUzFswk`cSK!?5X0)eJlWD{H#u3VqT4uB`iF zks{p|h17uuVSKJF&Kgl5Ho9CX`U}f<*a-cF-Tu#IfwkEAY6BHkD_;N45aEUO7mg#P z>}$o_;q-&5WLaPjH$!d4S=h{unjy<({zgic9lLGjGWEehzAJ4ew7>9bh#a!)CGYd0OY85!mS?-D zVOk&iM+p33T3<>^*=zm1A9%HXb2me7t^dAIICZf91+@M!v={E0sROP2qGlJ``>Hb7 z$}W9uch76bJ2OQ4bL_6~0NVR&Zmc?{j$|B7=NLwfNp4gheo?fa2tm7}?eej7GhwyM z^R{u7GOS&GXl4lPVRg(zQgW0$i#q1{_q3jKz=FQQ?rxskbPR|Qir7Yo$^hh z^9VjdaGo?rL($ zYtw=>@@oHkhVojBJ2X2cufCcgOJ0*n$>}FhifRV!FY3duD6eKmUbfxT%S`5c&RrVM zvQvZkOc_`han4=Z^O>X6C-H!yhwG@+H|N zqH`)HIt3+cF(-Mob76Cwl+Z1bo}3wlVSXc)CK_#r)U&t& z*(!6GJs*UrMmF! znV=!xL6y;-6$h244d*@6;yWpi)}5obpK)+iCKE$>t9@;zB%UcTj*a* z^hwG)U$LuU%~uq;<}0Rm4{QHBC*L<;^2+zn9xfnZZ~^)5@z)IH`wh75uzU};*wXTS zfW(r>w_1JcYWdDa_i}F3=|95Ix%mq_J;M5rF8QjFbbG)pYYPZ?hS%J`&Ye#AvzmHD z7kz@hgoV51vW%kDQVMCf6?KOT#4 z2|bHmM?^DZwRTUFk|S|v6nBUE;E4MduVu>|+wF*KbEC_!wLMI0G_=@jP$6aNLhxZ431G4!$--+rnCfTvAG_ZM6H4`NBV1Z0UO8 zUnsh51ipG0~FV@y}|uw4|EXy4dp7Z7-^Czb#1}5+bS2 z^?Anp%Uhwm{GAIDNGj0hd8#}_QmS8}&p+rs&o!i!R#Hj&Jd2*uZ0SDF-fp&heV#4V zH%C&Mo8M-i=M+bTdO2$%`aD|4TTpXkq_hZ~$3@t)}ak$w|#G^#ZZy%Up9 zF(erpX`i%@%wOOzv_7Lat_U+$!4v(jfIzAf{r|p~oi+6QQl9AVu83!UqW>QxA9|wy zG%_Zt4R-*RcYyBYmkA@wSky*5)_ENTH0sXX@T6qB6EG)T_=*0WPV&|v!k(Ofkt0-# z=@G{v(B$<{@LHW+9`7k8%sJs=*d`oBYN?i+^V&hT%{kZD62+l)TvfnJ_1cY51^h`A z7h*V*lfNLc294e2$QbA0_TrbN#aqiSjz!Utu_P(v4^EwnI~oZoXdTNi97UHaTPs^8 zBK@(xpu-Bv4)zBXmG_!A2syX*oL7V&ey$$1HGe!AIw#f&c-2ukZgN-;X4hA?PcAzP z|K!j3J5pD+Us^U1|K!i8l($RxcFB~hD%;N~+Z+E(xwNwVtg>SKGv(aM_AzAx@XwUf zE89nwMe)y+(Y!LDvi<0?-?ttE4geXBaiV9@I^25e17KPYvfe)B+W_mW5#okdRGscV z*ih@H)>|3?JfyaOOw8S>c zfq2^#ykSmp)dn7K78mIWQ?81nH91Lk;i!sJ%+Os*y&3Kq4%lr&FW#B6UV>ve@GoAz zS!Kog-0OPHrH>!@^TfY6X!AY-bINwKqRnHd_#_v98>C2Q-0PVJ^I7qlH~vTUj5Jc$ z;M>?bZwul1?x})0VzN_m$;l~V!7}iLj(CnQbaCvt< zYr&I-xJn|YeCv6W2V*h+T(}HcIC&o|=AQ!JLSijcW_99v0S75~wMw8)D(Z8=dIDcm zQcc=&;?{Er@rg0ki4Giho@!J~hx$&vzGC_}&G5b78xQ$=b(MsC{+Qk`la3tT#j4Hk}c#g(s>Px&4%y$$cTYo>-a2Gy zH#D+>-kHC5=_=76(LwJ8!C*B%~k@ea6H@z*=oX&d@E_}OxRqJR>V-+p>vFD=p z{sR}sdu#~@ls)U#;aU&~4JG*;`gh$C1WWds%<*Jgpfc7KH|{@C88eb~xE&k01!~^) zBNUc;D7^WYlpE9U)Zm2q1>fVPj0FE0Qe~bho(6pk8_O2`(DROg%f>G|XCVBJOotCb zxCL=oZv)3|lHYrt{Qj)+Yepap2X%qUkCEQj`K^WnEvB|P{*Q-3KSjYrph*ft6a_~5 zTvK>hQJ8Ag;n%OJgAKv`+6CeqF{Q&~Fm8Er6o<);OsRc1(qS@;T=}l0BOoI_4itG^ zi}Eu$3bnZQI4Qo5S2?dqeINJNaq{PkD#9ZPijyH9Z8?H((e^nhMFr*eX*>;k&b5a| zI;hOQ>1n8)os;8wIe!}6c`vPc4cQL1vNy!NE*<|zTHsxhKE{ul484feHVA;FaK4EH z_mT)|+DNa-Q`C(w$8d8KRGT$n#&^D~`Uq9IJ7H6e<6V+6LkHsb+9UGw$|Lgg^oVTn z%SIGiy%4cF4~pY=ZoEC(l-1AqoEx|0Pe0D_iW)#Sx$nzAV2`}!ZfVu)8a6VCb{c(x zfoKdY=f<-UYLr?Id4Ik_AY>q_wK}+01%xC32x}@U?otrewc;YiyA*_V3c@->s78{h zK$XBuMqL6~sGkys;;YPx{o_Lc(n%f$WHQIdH#v@x)950)5rZOa(wjANrciCPkm&H3 z4!)HV+fottSrWKmm5T^lz$czeFlwegyzRc``Om!@IMCNyo0QFRz(42kjwwcFX3i>C1_ zG;J^gtM;T8#DxG@D8gIie1<7aV;pLl#1fTYh}dhDJQ-Dm3^0@*$(RrTojeA{YSoXr z1|p_oil%+=o@kn%tfOhBYL9{z1}V`gGR6y5D}muBhnuOKBDZ^;B3p@lI#?!7k=Md0 z@>-u$-of)*sZG86Fb?pwlF8wdyCXzqK$> zRlGaDD`NALwZ*1|f@X#T{FF7qs|9YIr<{H39O6DBVcz#B3(pR@3`8;V!LUHO{C2Mz zJ*@I3qx5wH7wI*|a2Ek@+iFH4e2BD7t!T_C>jN(s?fuYA#uNmeCaU}BnlR%HpX~I# z&Q81BWCF)K%^a&Xqr0+t(7y9uXMKu3-ADBnid?kJwaED%d{>^Zf zS+)ftf4yMQnBj<(FrVk{10k=*+3G$>cz^Y8RJOEd|xYr))+8 zlpu~4%((z8mJcPiJp8Go&*G?>`ZN(m7o}@~5FlD{QP;A@rmO|;hhQcN5D^W*eS9Yb z!|xRcuKYL^1alTz5ZL6YS7g9J`Fnc35cVw>HyGazhA5k9ocPkj3}Jh16g=JmA+>{I zinNG51N#U9TC`))#nDBd_QEv7P~KbCQ!bi=e@DiS8jY%#+|%rKnq-S0_#x%9vA7lP z@aZ5iV#-8=hX11`7I|?^jaFo^B^ta$E=GtnJ62l^rW=tQe~<+7KF@xU4f$|DMEwWj z@J{V246|fmlwrYPs8uK$7P7+%`Kd|VT0I2jY|l!9Euf+85i&xB8bUkh(vt`DCm0n* zjqe3#a=&xT9X(`h;A^^u*tvy3UVHfAoxIwl(SZi90-G&n`+IDq)iqiULT?7M> z25Mq@%ZFGvp9ExREd}ZkAc@7#y%h#w*9R!Rs;61p`*@zlk+tXB>!_M^GUYmyUro0Z zfl|WU0OM&r436_mI?gLZ0nOLXCmu`A%m2C|e~a$t<-a!!NmrV;gP@h%@|E`tqr!17 zeAj7AEbcT~5U^E03}*f?9|*+~2ueJ+RQ=ZzRaOhCECq+OP-WXk=RT_b>%*^?_cT8_ z`KJx$#ihs;H`364=+zX?Ra>y7C}9pe1%CEyq{bmXC`kEif-OfTA9 zuat;NO)KFk6)KKkf8b*PSf@4d3;rpk2-CFTx1HG@*9HMO`FhJ~NJGOZZ!@~69nKh$ zhP|!p&M>Ra`)LDeiM>IGdAA`WMn+}&Gd5n{Pe^W;AR`2$eV9LGl_839n@01$#4 z3(MwimxOscqK|-6$9P04!K>xTIVeFfc_1w za;CPpw?H7(+Sn_9W%peSH-)$B&#{dgvGf!x+X!bpE2X#VDI_1dHuh(fx5H~=1Co;M zMHro>t?;$6)m1iEtwV%82e73?+#!IX;??%v{iKsJoqUVm^~m?OQ2E}hLNk!>Ioz!u zl<(PuaWpy_98wKp1i&@}ctmmW;7|bL&c`YoF1i zWdA|H9Pxz9x3`l#Am7ux1bp&+tI{q{I{A)!$0OfApzi?iP4C+F?kY3``A)+=^bq;3 zOC#UjWH_z;?xBchFW-eX*m(HtZ;UXeu-}1XIIVpDsEB7T-<4BsJbd!q9z%i-%eOiy z*(I6CcZ8EXAm1Cj1bp_J@1#^6E)InT*c~0Nmc@DZg7iio!9HU&-e}?z=5xnVJe)&7 zmq@<8J5u?rmd^u9FgV+u{SqC2TLkDq&Zd9DBh%Tpe>s_|3@wsFTp1oC879n+@oPAj zpe{tNzDrPNAxLL+FLU+W{t@1{cnRm_UAilGa0Ke6Ng3;|Nxp+^&#*MQJK?%abT_7m zXRo`vkbG#{!~0Jmsx@pzx9{Kj+@I4^k zO;l(GZBL$bWCMN2_tVHXer+c5ee6e3p6um2jO0V*dtPVirZv|jBb!ga(zQL0JIMp` zeKVzeFLzR=lW&jLJo5c#sC@6tSiS?W#XZD+#}mdBZO`LZXCmL%uzMTCvzPDLBp)i@ zAIVrp`;MC?C3`;sODEqquClob$oDKS0iXTe;-pL`-(7JQ-&H@;|3lDz_g0}9*l%=Q zsC+l1k?$K=JV&-@xG-i&dec$se>*7ON|Fzi?@cIKhvmCxQnCjVuypeM@=BYlfPDX( zQohePDbvY!IL`RHAl7p7XLtzNewQLHp&&PK0J)zt!0MjhOzq zn>`3)3i)nEhSS>bAVoZT`7SB5@enaPb0l**Bj1aYlKl$-OJ~1(ImrX^{k@lfPrlz> zZkH#We5Wn($oDJK4fLf@9~5& zh5eRXmWh1FE8^M9w>!y)*3TbzM!vHqCL?QYZFW+5AK2*MwxYx6z{pZ&&Nk(=D z0+vp`aVL2|z8|KP?}<*zbn;z=L+&p5J{l_DhccFL-kMPPUP%~J*l&D7Ch~o(RfuOV z-(e&lD&I{ybFOI3#mUG%nTdQKcajI>dx@8T&wej=Ql^t{j~6}iJ%R_G0`2c1Dl`N8 z9q>h{eESl{6!Lw1d?xaJ4O{iWT=w!ko8&{~duM0lyJ=Fg!w6V9`+eggo2!6)yLk!t z*g(q8NyjHy>lpq957r7J2cR@hz-F2{~pbsXi03V1z;tdL!K_LhLv_shZ&m)#n% z$){U%-jeSJoH1kZ_XDEqu*l5oU%N%eH=V=J1DkRi)b0VC(@7E)i(=S*gTo&NHbm6O z!IH`eCO7h#Jhvcwl|It*zMq}I-LW{fktW^?h=;8QEobN7h$ngMeI<`+wNy3?#!Rw1 z0}KL==NEFi7Atj`adN6~vNntp*N(RE8Q{e@WACaVC#t&8w|a503eCWR{_~$u3pyMG zQd!XW3jj>o9;~5=XKz6}k$k8Hy-vnD+5*0MeloJ<1T38e#hl~;3p&6{z-K|Dos{Wn z$WP~ZhB-9)aB3yeUQrIBy!yiDZ#h};I(fg18Ck`I;dIFziz^8Mo6 zWMq4LE4c6x%41IQfP5QM%J(uSWjgtGd)6c0nW6H%HDmeiF4w{ZwtXEB>7PJt|em~wckET$=*%C(%J88XWLu_sffX{ya?xaj7-<_ZF$al}s zlko#oXa@Fs3^&*YXOMnKBj4A~%0#}a<$kmd$oG1Z50!5>E za}|(pTT1yp<)lm}-=m-M$hRR>zAt7h-}6?6$~R6JQ`m3oxJ=~RTM^H`{k@muL*?6V zOef@fY*Ml>XCmLePV#_!r+EqZ?04B;?DC|O?^!rS?do5hgz^U4-%%A%Z z@}!gR4Uc=|Ym#oDf3-@5W+2~(}F-GMrYvBNXxM<@?DQHXc6t4nMUM z_WQS_WWV%^C%k`koRd5t-&tM)KKX9pq)aE@`*2|1Wxv;=yg~U+R-qZlcj*VA_B(9K_e8m;F8vD&M+{<-5syq4K?qFs88I*eRLF_Yv#_ zhZbZn-=j!ARK7WPgH*F{m|9F~^XVh?l#b}y$4rUUc#~;ieCuf$LZ|?Hb25Z7F zRlI|WL}1x#jo#X+lJI_nNXLbMMM763#($%Wd zdFa_WlXQ@(KYg>t`>D_jEOl(>am#4Q4OpTi~ctbt0R` zqj*Ta5sp`J66*UisADS_Ffum43SX=oX`+9+VdPFN(cVoXjF-4{`JL5)U zWAfw|loA3<-)QXgyjb}P9K^~y>ZB1y;|Sve<(p)r@wNE@~*08IOji1u~w*@<1`w_7DBp>M#IwK-L@XASYh<2utT4(L=O=r>3G#6yU`u2 zF12~LtJH(xzo8MO!=d)*JoeC8?C4IfN-TS_rF2NFB8N-%dsm^olJ%Ea>D}M3yA(>j>9d&XahfRvm{6#(d0dp!QL&_TZ5uuCFUOAy1DS zizef{4u$Fbv;PMAvY_)YNr|8l>o;t8fy%hLKzVypB+)X#$qByq0)^_x=Tj>_S5C*Q zbX(ZznsH2A6I77*fy)_sxPmV@9*k5xfz%M>4u4l#VfpwyeuOc2gfTe}ud_!O6ARo`%5dYok;dodL}g$_FNm8V z2mXSq;cM)Fxve|OaaCI?KU~H37}FvN^R%t-7(cg7+{`GiAkbCilFVFK039>m&u8o< z3ywXVo~~A!e=q%M!@N1i7w0T!h^dgCAc97KAFnR>2^UMZko1Nvf7&39pbR>JB3ei; zK3=q!ALcU|Ee`N6hc4ir8}a9vFkiq0(aP}$KEbV>yz+3axE`tomAQyI7r(%i-e(r! z2YE_A1jToW0hKT?6-M@*hdCLwd3L7hZa{K#_D+h zIw<5PLqJb&8B`e8Ub zu|OU+&7Vf+aFy+w=FgZVA${|wy(}Sp^Jm;CA${_v&6JQn`7^2|q<8+b3nip?{){Um zBrkv3Arg|8KjRz;>6Jfi6A9^+KjQ=m*(873+mKmh`zHA_iY26H{lmyU=QKI|j^W2$Tpt&>KAy>sMA#|JdHI(trmbJh zPptr!pLB7~i|UW%{P?E+_|X92Hh#{oZ66)BE7M+!&+J>Re$1O*!%@N1HTEZQXoxKG z&;p?DkU~$piOAEl52_Fci}R7P^;8!}nxet*qf|H$1VzXH&mI?|4ihi!a02I$x|&Hg zuTdfJjE+~!zm@dgVLWg+Wles+inhef)4`Hyffmarrs@gm>(o{a5!Fu74bs}UE;yDsL&9dcyKAorJOCuZ5$OJsO zO_u}~RumhgjrkW;!otd;7O+C+FbERoFi}8ZFJTr)eAOq*BPO%a1GoI`>29@f%WsVU z)lx}I!9=+d<{Pe0-twh*V}E*xN4Rj2I8t0MjwSOg%=DF?ITFHp=t^@fsucu76bbWY zWT?som639BT^V1S`1OXs*KX=71e!1>1-|ycR|w1T_US7gXF)?S1FbyLg)368Y|%Th zv|X3q2~qYn4&|N+K~Y6!f`EX3Toek5?iO#iE;|xm@U_q#;RI}*=fK64W-kOqjSu0n zlSM1xTdcH3`DVoMF{;wXii;#TMklZJ)J=BBnK>R3=04j)eyW)8I7c=>SfDN05WlQ+&%Fr&};B(FHgSz9M6s zF2anD7pG3@+}Xp@V0jU?p!;+#f*=RWbNQA%mVex$OR!v3mNAw*&E~~2MLl=u(P?lx zdUgP(9TDW;iS6Y++k6B;4ovr%>K znj_L+dBdy#mSYj*U|GSp?6LgIrd@*N`b#s$vN?cdihAzE!_(k&8n(RqbnZX|IXIQ_ zEqj~}?%O3eEx#mVoMyvs*GAR3UWcW@a`Q(6Sgyv7a|g@bh|mEnTYGm2memt7#&W*^ zmML^@IaQcLA+3Vbrye`3M+{GE=)CayEH>%FvGBgdAcVf%C&wk4h zyMhq`_&@Ljjyh5ai%Afp6L4bUfyKkZfa* zwbopiF-~vdaw0EIo$Sx`KR6ARyFVDfG8aJ(mOJq+dn~*6>=G>NF31?mAP39G z`IbGFGrDyNmV2I;F_w27>&G&M&Qi)+o&ZkYV9&XO(-w%(0i3@3c|DVD?6$S$tc-EmdSmO{l6}%(`6{-Y`gHC=1UXo~ z%(v`y?%|)h1k2%PW{l-+QNR7BsOSE%cN(1T!=`s1P8A4paJrjs+2b^2eV5?WZ(PPW zUGhgiPMxghj^8T{mM8xsfaSgjaoMC%#K*WzzCw1+=xE&}%oVFb1$ElM# zcm9AhSYC2l0Lu{wab- z9mTioaoXkkF2Sim7Va~`>BC|_PMy@b?{-Ur<&U=nuzVLm4wm2ZEqg5A|E^20ER%)% zjIg}#C_k1dbndlX)8O>>%>kSqN05Wl8+^+irzgMd5}ZcL!hJ?KUA?h&Zptocu>9-a z16ZDkAP39q`IbGFXMfWrSmwyWeMVTWKQarQJ7woIIQ_XMfYVR}IXIolx9o8`w5>~U zYQ{we8MdvVxfvpK0L!nx>Jluc%OYw50H>D`W3zI=4%|G+6%OZviZOBFMpVH@;<$Wv?%~1j`3xaWEq+FBs~_GKJ3lZ~HVj zbu$7u{a>{kr=R$iJx(jlF2U(qS@_Ecr{W=goI0s3oM9G!cLZ`osc z%jaE!3Y|IF0{Lm*A8qi!d4C)N=4f*11Es zO@rmpRRJt_M395!;e5*;%l`lF5-eYo#f79;4tAY~I4giptreoNkrHg^X}IfYz`T$PXBgnz&UcP0IQ{^XJf>V(!E@XsL-T{7`I@z|K zxMdnFPn#OR@<0SRSeEiFdn^xb=@KlL%cOZyEZz0o_WS#>Ordjwwn&51ep3QCZH6ER zr@i==Jx=*6y9B3MG9jK3PT%h9$ElM#w~}5`QneZXsR&@X2tf{(pYSbvESLOWmtZ+g zCVDf%@~gpqEK}&*b3DG03a16v25`CqK@LvO^DTRv{;{GIe~B4V|n~XU4rErnZV2l%R95txnY~8!Rgqm132x5AP1)i-?GPP zj}N;9r(@+$C5}mI=&^u-qhoWr}+4 zEgs)URnNV5WdNtA5ai&rjBnZF^vwHRg3~COoXiNPkN4Wh^;{*7c%;JeZ&w7cJQqO@ zmQ{Sq9?SFJ>k=$`$OL9mEZt*><$L*wo4o8rK(^$S`kJFLwb_q@^ zWYR4ooMsL3d9;Z{9x&)`S zGHA~Tr;GORfYT=v+&JYTLI-gA?2RtLX{ti1(1>~YpZPF|uA3swe58}IV`EiiNI5ieYsHZkH~-^t3UDJ|K;{BB zLn?HTaC*G@0CGzspAUAJnAT9fmHpOM+1RaG*c0@ONF9Es0z~e8omEYjG2xgHR z@C*N9@FEp2t&7Iv6N!`D!zjKhN9`_(Z~NRL+A{^1ifW^LTGXhO1V)_r+L+^lOcbe> zE}f%}-{38EK_k;p${P;z!Y)bNuwSwu1ojI-(T1H#KxqgmIk2A@0((iU3Rm_5_GuMI zyBNBj2Q-L0KZ4R%hq`!Dp@W3kchv_6dCiALUcD*g`3(FhbyOHi`(jG_=qcJL#l1d} z&#NH{h5u~)7C=0ONH71e`bi{XnW0@^czsrB1S-Q;+h>p&f|8xLYwXxHNC zy(2ymd?uH$!f;TZagzzbM+%L(uMM9rFawY!G(VF~3)Z zv8(n#9n)uC2<%~X%<_XhuxC-n?5G*C>X^TglB4IWc~l*9nfl-$-<5StEK;QF{E)h4 zg(DB~v|hQU7>#SQQI0mOv{8Au6sMbQt^1%6Lo1Rn3)LBrHoSkWT|M470(JF-<+)ar zM{u;EvSNA8l-F4GSzf-i4@aY%7tDds=?DG9iwWrUydCZs7wmN|y}_e#-In+9t&Ps~ zdU&+c&0hR-Xu2y^I&UiRy(!K%6Xs`);R%mGLNL*mJmpDmg{SP{PYJB)@3Sr33o|PJ z7G#7L<9*&AIR4#ms!wbq{J=fB7D`rBYgU9)9pk5pzCcC0(N-;&w21@`(-PPVQ7sRq z$z%2M$NDaRiy*B}e&?h0En=Zlr(|yB_<}!{Kf;}epwSSHv|K%GU*W__hnP3_!vneN zdvK_h7K?=)rM?n77(8hD%eDX!+-0*hedzvP3mL8D6Gwtn7IM=w^ruiQws z(exI*9G345%BTCiHL0e^~Lo(1%A{5+#ry%H1Y=i-ptX-oABEdjhDQE z&iPxYq~&&$-Sq9M$Yv0zFzh7IXzf43csVldzh!-O;E@O%X`p&fm`6XcegpmM4EEg7 zB&`FgKJ9DT@C64uyCaZ zUavwbN8GK0kOfJ#>nuh=!s_mzUQ)2u?V^>DcHE;AG4mwap^>)o*W{ixUvEZR86=S~ zpSYg`!U9FN0L*xGTFFgfp7Vf+BI)-Nyx#}A1l7cxit!yq09x=mFOJYz6d)r#ok&X! zp-|K{7B&zCqJvt8Vx#CEy&ZR0Fkvi5jX?KTn>4{`Hw#hA|F*4r&{nxYE$^_&aF3lo zr%~vPQfS3;ViO4iNs`^IcBOVx5lJwt)a@20gLSg!#R`yiL?_Z-6-=u%eFkU00rU1O6&wgWfR4}E%WDWnZMvw+~s5}Ga9QJ z%KN*>u@1r?$Q7E)HsvK#<$+74hM)pcN$^vgN2m_?7q2ejgo4dj116(H^$^ye^lL7h zcOzCELhA5a&*(HEI2f1PsG9JOzXH70Ah;2)`14$nwHLxrp5h|A^5GkSn2j&^%aL1$ z#AI=$9wTe7=m)9;98&>6bX~LZK0a05dtk@ef^iH^>meF+Kn!`y2dx=SJKp;|z+fzK zlYfsvxt^e>5Hu230~#L_q5{_FxvX3#I=2|mcl`yB>P$$L+|t!Li$9weZh!%qTKPB5 zdM6^YM70=2V}9$?f;rJda;cwCe%4PM``{v|xS-sO-&qK2VLYX;Gh>7Cg8YUcdsG)| z21I%7R!IZe8zjw(8ob-DqzSLhQ3DJaV+!%!g6AUhft|q^S_otXA$0>4d+rUC5ZAen z(XtK@bJ=N3ZP;wvXm&cj2po0LPVenR+GWACzlxm>?47ZlPM`gox6`;fs4+E_onFW| zemlMB-hiF%Bk?>&xi^EncG??3j-6i6zoT|~_->(gy5@;r#ZG6S92wgwEwr7NLRqb! zR6MH}fOFfhlJwG99pO&WBrR&x+oWDJOnLw*Y@gRj2`{K%q|&gP)X^+ZN^X*;tqP?5 zRV;N%&rB>8b!fu;bk=X)Qa@A2Hd0#ZLdNl1YQxPLaJzlx>4o13wv4teA^Z>dAo0gaTF`a8z)Tk1A<1}ya>9%AtJeLrN7*HWKF zkYlM^7k1QASML~VsWUOSMxVA$OfTcUZ|5y_*!CG)D!R?mjd{qRUv+Mt;F8 ztcz@;p2?h)lIqY;TC^HiAW^Un3&+t3RTZj1g`ht=SWQI1^4%PJ)j z$6$1?&|fG=8!cO3n&k|`26N#Z>$y>K$JTr>WaiH)-B4Y?Ee*MR+H^lE4=EHBA-jVRkNq|D?zjMJ?f;pX z<^CV^9_ON?#%)H{R-cnVW2Dg%*aCDI!fW&%pdNKLIHZf`$yRDS zf7l2BWzrU(U^Q`!LE;#6C~iivZpNkq8H1?Z3NNVYnpuyNPl}^)2ZBChg66oFKvRfd zcQu|{&t(oepcezWTUl!E_GSulSJ_ZZ3e$ex;E6PVftPv$5%8yZP-2l2Mjtp^{fGWZ$928Yd4wlNw1142NQLvIgZ0th9DJ>1cASFW4SJxf}wR5G7 z!=s^#^KWgi_rP(xId)kXjn$FYIZBxs&85L|Bo=9#TJd^LSs(0xY43-vmly@&X<~Z> zqq*f6dl`{6vC=h&+l~z}`EyXaO=xEA)?8h%Xu|Ry+=GtYNjN_;7lCaV<=AfL*k!4P5B#h$+JB0N0L~!LlRfRBIAJ z)Qu$oi*b}fu{L%#ZIVh$-O7liL7io3Pue~=>4dZN>XUAk01%Y6yvZnio!ZeY$-k|6nRar4BL=Wa9s0XvKn1YJ1qM-wd(9erHtXC8PHI3-#Nl&;@&y1;^I?YsH*5r;V z_?cSxxSJ`Z3NR|Ptqt>}buqq$60x#uG)k5aHyWxVO+3KptL~aYt2>CGA$5Jn7*bv5 z&uZ!sUGzl{D>M1fH`1so!hhn`RT0Wjm8@8V4r>;JIt&(*>9C8t+~~3|!kK#j%z0Eh zTl~^6mrUVZ%ykEJXl<38ywKh6aOXNDSRDeP`y{eaGOe;=T8Nd@pba06(`(V#v+5+S zKDb;_lfA(VHx`*~-paFD+f0qNnWD$s>bWrr@Ya%zTY%5KM(;1cUmkTA0QSX}41d_; zt@@pT5Vc}54pC>KzG8P}rq!9ftO2cDs?+$B_qJyI?KCGM$njcmIp4DPT2Oiuo7d8M zR0C)#q?gr#-$to#%Qpd!&8E4aCE?j(=Y@%IGxS6>~V@<608ew`m{A;oSxpok5eag?sw0p!Lr{T0W8<; z?#6OkMCbsPKm4^zu)OVyjIq2A3;Y?^b6-A}2B)>y2Jh3kw-Ds$+}C`|UgwrobqP+F ztj-vxtMmOhbyDZe*r6T zav{gg?%RY}>2HZ=D0XzsD;sjrcWsvEp^>I?PX1xu%007M1l>lT7Rxj2+w$a~QGiGM zB+Tn^OxTF5moM>Ac_6>u>ZX%jdx=NUV0v`sF$F3U3URI)NS}TSJUI-|cMiFS>J{7_JtA8x^XH$dc6I7-Lk zZ)#LB1*$30;?Xm7bbJEGZ<+0Abuh|__ahXN8GhBr_st{m(I~`Nz?$pBh*%+@Q9LjI z>xTR-a?ZR61z3(wm zGwRY3XP&%G&67j2EE(RQ6&&Xp^2>LI^Id&U6a!=_g7poPCU#t=#e0EshFowo%2(kO zYh^`?Juh*&J&W5es$FWbK$x3qlA)Pk9BE&DGqQ@6tTGy-7dG^-Y8@S2WEL$ldya29 z$7mebl-q#0|8da~`!1<Il@$iC=a%b(kNX~S+OESnFmRAN0?UxmH9ET z8=l5(gdTT`&Rg>RfHP(+{(eAo9nKmzNf8B9Emi;R$>86wW-f#05%(99!AM~& zI~ky&_f6%a@-A&QN>;&iLyTi{bUsuav?^#EvsI(gq}n;O*-i7#L6AN~x(-#f`Q`=d zH%OHaHA@xmpmUKA=Q_bmRu;138<|Vzo7XpNn0Gcfl${#UL$PIVhe&h&4MSiq^LqQk zC((AA(%FtQXxX(A7lDn8TM1o%Wh=Ln*_V*EqhoU0=KRd$cHP+>lv~MLnagb_#QhD( z?anu|kXwO_R`vdsaNi7HA1T8mh;}3SPZLKc=IpT_`KR$%eqD4c}}9-%pmVclnDrOPNKa2f7me8As;IDezx9s5=Af{H9;`1 z1Gh6#*EisIHq>=4o*Pl`H=%o9hi4A1H>MAjxjiuS3HVUaTjgY^qX*vTbiR$LTC$!rV;9xA3o&aNPwn`G(Vq-FpTQ?RM-b#%u~I}<6mPm1 z%TH6q_-Z|BsG-gIH)DTP*+K4tJ}oia1^V#rV1f2@3S{m3a|13|sFg>cAhQ(bfnA*Z z(bFBQC5nrX_pUZ_)O9OzDHz8`pU6~vS)we3dmL9a*^b1I z!3#wn=iplix79EagevQ~5`+p3H9E6Sn5Tx)Je4$+|DuHXEu#ocOZ;~!nwZhx8nMv4 zoO}LqQ|W}vKS|tt3e}?;&yP{6@y?G_jU|(|dwwc5-OZ~);TJ^54$oOQ8~>OOp8-b5 zw^pdTC6%o%P6kihg$x=5pOxkk#tND7z^sutx3V|PwYy)v^FGjx!2Hc-R6}P+*Z>F` zU|>FveNuxaITn8eC}AGT+Cpv1Jk8NwYcm;>LKx*O7)@fl2<_-q*`8arZR>$jTBP4& zFmOra%>65NBGO{OPs02UW=>oaJqJr3g{|AfN>^YiOe>cx(jSCRs&{Hv1NO9As(p3m z-Pg%|b>qmBvva9YZw&!H!dFj&eHB;D{aId8RM{|uIo-lb zcC_)}^-kBPljTza*|q4w5}JUI&6S&`&*4NDnSH%E3~_y0%bRPAKn^vVd2&#_v?7;1 z0PRAsg&(2-=}Xb?!;WYKG_kOH5faOg;}a!%Vwo zXgD7=LwpQKF$0~d5~P>rF zHu|&V;}OfH86ZqZ5m_l&9mJ|?CCSH)Cw)-CF*v;Zh9t%NB=1m)BXd+yM& zBV_-ta_e-1RVF01s;J8L?q!$apD9;Xws$Lw;GZcIoNDVVi^qxFHlnMo5(I)3tZr}` z$0q`K6M^?|y|JFUl(?2Nu{ z{UdE8mL?Q-iyqb>G^LB$y9q&VBrhAT=qJpTC$d#%J$?f6EJ2-8(uR7S3XYbKHf8sK zR-Ob8t&&dao>6(0##^lhDh`*-`RS8|hbq=8m$Sr9K2;~M z#>F5)KI3>)H#*xiXpnW$_+j0EZk;?sst<@%opQ$%QdTRcd1sF`j2Jcs)(1;Yt9 zss>2v+9ILMqc~NC@~A&l6{t7)ZdB!|H;H0Y<;YvD`ibjrl0bEhc&@!>bvLg}pMr9@ zWI6~6A>A107laB>_r^pe3aPaIXv#}HCd@1Bq*~E2?oqnN#+;rN66W1#)~qBnTW)0w z>VT8XsA?BMG4HB2^+*&aR5i(4t@?@UZ<0WvFdKW4MHmwq z5iCB_3#UQNZZ1BdQ>-c+<&{YSyJ)gSHcsQ!aprAt0k4S=nV zY;s(kmbUA&5+aYYfa*M;X~h6b{f^f{!K6ie>nijN03uJC(#CX6q09!B76<=0f@G># zwse~Rq-k)Iu2h^YR%wwdbCVL#h;!w2jKSqk@GeM&YP)M zlp8$&adWapU{t~Ct)#-nu{B+tCd>r-)OyMLfYI!FCCrOU@LO47=9FJ*r$z6X!V#yr zs@}?dH5%WvRzVC-@XC+z1L7(O8=rH(i5l_QQuf+<1woyHK;B|i>YIrE{6AqS{uozw zgCsi^v38~;VslknqsUY%GV&IylJ5%1B9K9aZUF?H10PBaPlMk&^WNiG#3kkr@(bA{ z%p*qGT}NrRRgZiynpdc$-en;@auCarelpI|NiSR&pFb58Ri~Vt;UA|n12pS$4~uk` z9|pp>A_!VT3VJ%_VErm}wGc${gy6-BAYLW)cOV!(!?IjvSS-4j>llMfaV3mQbd;^7 zE%I>ImY8EXdJOlw(aXIaz*nvrWfu^8){T;XwH|71Nl`mJ^3cEl0FFbXlhOiMVY_1K z7?p^H3csSqan2vAezi2Puu?G$nvvU~b@`nvw`&e>fVUe1Rm1=i^ku9e^v{?g0Gk?* zJGf0+DkHL%gJKWR@Z-J~^D*q=m2oY<#frzs8Aiajjg*4X*~1dLYrKO=^olncB>ma! zWDAL&4lMv6?clbZx_Qxs*0{pzv~yU2uRy~>n+4)t8GF9*aO}0PdTrJ?$Qj+Jm`<wV6-oO|Iay87<7Jvw@M-t#)={qOaDz5l<@x%a003f4@cx_|n2;b;io3Y+Ggq)ZpWq*0$ZuMUbK2#xYT3RHEXhx(%!F_T$Jt zF8sZp(fr&unjZ#=<`a^~J#(It^aLW>NdDCmA2NpeS-0fJWWT&hIaeFnF5^)Ira`WF zi3g=g#RckaeKs8zsvbV~5EYX4(F)bYRGF+?9>JqEw9HaZDihzJcH;X4mag=( z?qL&egS4}HvH0j*<-L#Vil zLzjARJ^8+KB|nwc5k$gcOeN^LJXgi5=f)%68~v9=uI@t_M?VzfSi`O-s}huNN(rKj z4nJjtP?h-=Qi@!e6i(RCHd<{5cu8KPq2|x7yiPJsYC|bu^66{->{9Nk_R&*>Dyv3S zv3HTB9g5+K86@6PnKYzy7)t9@RhF7gNNH77HabfpZu%_iPO0wMw{_EiUn>XFj<0Ft zc~tHi&^BX+vN1#rZFTZ}&F+tfQ7g3KzRh?#S5*zOsPwof^`5Ggh>eo}NjDVBZ`mcQ z+*aCUsKu;Xo~~*&?xJL)3u!lLOBpU$(jY;0AeU;l4K|x;8H|A78ruFfoi{$n!W5dn z#;BcDBrZ@jvN2C+&OxfFRx~C0q!(f|-o}1Usc}*bv#jG5RYQ2z9`)?pekY?SC5TXv z_kl*=hMh>>Z0bTG84VO+CiKh?8!y zdfS1D)w(Y*1HFnC#Y>vhdb)m5&!8lXr+vj2hY|U&uIF zIs(S|iVI}iOa@xkd_*@rY4#>jl%$sodY;}XYF&|XQ;!zc$|4h1sNZBZ^DW*Qq-X76 zjWkb0Of_Q{%b-LOsQTrq#cJ-Pc1=0RO+?Wg*2pTcoa$3xrt9&zriA-fiL4T{1DI7} z!xIYSz1JNyvioW3M%{MjhN)FzA3ek<89zAMFd%F}nfR*LW3 zWmzd6#)%}UXQlX+5^#^{K=9N(v(Ra*Ohl%u^ne=zdDM zG5!25etK7my?r}#3}EUREvQ_&l{TMT>QJ%ZqTXI&k*d0bRCk0#=j_e4^WT^3>oRUV z<|4mK?}+$kU+A4FT|wTU)G(~Wh{%XP%P@>h0P4|bi2y&%bj9v6DJYx~v-D(LnJV7@ zY?XG9zK4GhGHlcDN7K5JF2n51l6q%*f9>FG7HE1~psyX=@@vUb{~ZIj?6layP)>a9 z;CXpWz}F7GW3hu#efl{{{50*LUT*QRgC)A3Ry#O))M^J(ds=N@Eg{GlP>e4^*O^Q_ zF?;W=RU=f*aqo5i;<9OQV|m*^k1q8)j21tFDR{k(=xc>?P0z2;QJ-jS@5#VO^^2Ra zKWRK$DYGGFqiK%UURJlU@MtU+>gyzCjV6VQ8ec`gd=x?|s7P;FHwxDUs)H6Op-5Rf zL!C0qAl-0FEpgm>oVr{pI92vu`v>*-6a=bGOqRJ%@b?}YV%+s5S*UK& zt9NM{>h){t)+Fj;y_dK}52&H-`fsZ$-Uzs%ZA0XVSMlR|s(6bX@GHPl7F(n{+4-^b zQregiz!>gKX$w{Hmp93^Ra?3oQRo#TDf9Mi}}FX5-Z}fe=Hub%cA|AB7x@4Su|maJl=y3z1@IH{mC9 zaK=o*y(tYuw^90mvM@m8NdOvn5q)!odXBf7f7n)nKgtxVsRGQOhP(owdc7BJ1VZDr zaHEo>N32hp>4W_qYsEx+`ZR<|$&)Hg^xUZzK=ZoKjLIu7R1aq;^*wLV@@am}!V)MK z@+Nls=R(e`jidNNlfl0U-H-DYL%Oi8Kid52+)x0dm4cp+|e6Z9jb?{_8U>Bm6hU5R&I&btx^PgI#g z)x@HvEF}ndSxrORK`v9>>UN~h%yp6xs7;C+A3qmT|8P;MuMxLKeLR2abv<*`byASH z4@$>NFQTfb%)II(ipo`(fw!obZ&4hKPkp^b@pI+`%(p0ZCXni0rNuOGl4f?*7D`fy zsnp#nWsAk~D6*=)ks>P_On$1&@jB`jZ_V3*$Q-!fUqjoK_sMcji+sIVLhtD(oAsgu zLdi4?)!P(GR^v>KnVZ$sds9Ay9fZ~isU@*uBn@p3?xljB#1jhZ>ZS4+^g{DUO7I`z zB{=VIruUIbRfiIWO5vv7iFw^WF*n}pP0SNywVoK$QX+^j5yMhc1RL50$IIPfADxmw znK;u+&2ROfxOtk-%Dw4UZl)_`t~W|@&8C)?)~XmHFP^Ig!K)a~2}yprlyBU@n-(io zcdUYGXae^5+d!n&v5TielhdoOGpA2C6C2A8`^Br z5xBFEhm7TdIoOle@0IN}n|?b#j!dpLn&%Jm-e&*Nor_Rw2vzFIYBo*mR=KIFzU_Fu z-N+JD(a^R{uHBhZBaIH`%AiuOBe$RZHsXw>@g zTyAdZsf{T+s`oDUlUh~rHuJp>mGZf!p4*YD%DU)ZSV3>+&DGprgS@6^xEVMh;0>Ne0tN3}f-czd$4P2JLzyEf)@a6?O)@+s_) z`^9Rn`BXPHl|?VHMG8;Jv4*y9NkolVhIiV@x?U-wthHTCwa?DX4k<(_9nWW0$%=kM zTYhq1Ln*gprP5F48hug@(q|8*RFKBe3pN1s0$)we!h{vE%A-KRd^kuum@pOIG{Tc2<5qu(L#DI*?6VYI9Z#PtmAm8UGv!gKAA8$p z!EI6i{47b@0*t9oeKt-$wmz3tu+yM#eP;I7=hm0{q0dZ1pBIk{tj}0>Civ-dmCR`R z>2r!qL>|XJXDWBc(dXmkee3i7v_Sftz>Ibu_F2wAh#J#>YQ|gMGp`A#&+6X#ync5- z^jTx*^PT9x`aC+jmp)TvZqrYnr=qLl=`%vPJB~g-Vzsm{`u|?R*fNIsxQLR zFns@*#a%VBtFtcQ%)eNg9h^4PR!MyEM)efC2b%e+;=AQ3d^1B<{gZI%LAI!82~@-O zCvhODQT|+mKT3RgtBOV?je!kSw79QQH|al|j`Nf)oVUVSGpX|a{Vk%}VZ^#xt?kqj zb-8c5;prw`A7&IzZ>!!bXI>g0DNs$g{u4p{%b8vB0KVQe>sQXmTq=4uvdUaT(?<7b z-_7^FwI8ncDtc@GYhUj{jsP(TJPqy zV^*zG93rg!u2+bQcdX!ZMLNlSUy)a&^nIl~;FTM#SCWYV`d|=w(l7DO9i^1j>H89I zGiBc4s!xhChy$%*Uz>%Faq0fRt`3~krEfV@HQp;4Jgcut&}*tpc2f8vVw3Zh9~k;H9&G7+BVb5B3J?;$Y|6)%HBx3J z`dP+`Mlx#Z5jC{U&Gm+bB{Z~c*`&II;v`DWYs_=Lue`pe&C};@Zf~y?|JM8_UwYze zp2-s2qi?>(!LRm86!d-C_oh@l&%NB-(DqA_-z)pRjayZX`J9r*ea>Jk?PxRUV!HTG&X6_~3U+4P!_ZARD6a-f&6fEERJ@TcNmAn;*?f-hoVv zTuNJW$aoWFqKYdurux|g)u7O*NDx~n!)p4CN>;5P#uM@C)Z6!nF4&qlb)nbfZK@D> z2mJa_C{p=M#`tU#8iUkZ9>kr@K(_fV9Um2!Df-A6?Nvh~gCITWv$sb=!y2YzHqlwF8xCTv^GoB#QQ@}KWTK2W-p_K@r1Nj(w&{M>eE-6u)UzKq>QHp-WuLD^o?>KKW<7U?A zlI6yf#ZzxdA<)YqVW?or=t~eJ202^0(>(Zjn~%5N3`X&m88h`l1{IzADQK_0M4bL{ z#sy!}j)XC187Al79N%c*8x4G;fp0YMjRwBaz&9HBMg!ky;2RBmqk;by8W`^^STr$y zyfb&Kv!JM~d~A8b#IY0O$CVb3i&Mc*=kGK=EL~|V5o$pjGPIPSgsanod8u$%@SyK# zFW1&)p4zzkf$49~Nxvvh8&S9M%z;|+Pbw~2P?tR7JLw^Jj0i~{5;`C#T?@T9)Fpn2 zpELN#k-}#tpILln^SOr4wS3a~%;htWk4!FI$LG6zzQ^Z!J`4CP8@sXLi6?|6mxs}f>h*ibn1P4WJc@(K5vDG&-!QlO+P-m>&8P9K3`VR6n}L`*ThT8 z&bl)FjElF-*n0h|55AW0**(KPjkt2t_4n`j;plBw&uzHlsI5C<+~1GB{EvsH{$$x- z{_=;>E00V$?cJ6C_r+VDZLf8A&pWfm6Z3;>99Pf$~ z#_t^ayLq*DE?63z)m|Mr@CV(avhJ=}`@)(tYgVoZZ$EwW?20)+w6`_`x>7c{mk37KhF8h%cwBm@#}$_Hl&ZMR}Af2jy3mr|AhhM z=>hy%!|${774Cu4`j$Ic!|dz+g9G}OKN;ZuvzgcF%b)iH#3w$0Kj#EU*LVtlU;e*1 zxNrHE0D3z=fd8`t$hT7Q^cC(qXY?&k3hi6|OaMJp2hj5Y0_`i@Spod{Jb=6f!|KbQ zvjW(|hyZ%{D1iUj!F~Jxj{y0betO^c?+ze;Ie`CL0+g>8cF5h5WC^7V3JHYq;F!XSycAjSYQjaI< zRmWKVN<2H-b$O;KCtVVsB@ZSm$rXnD9HU;13Df0ALwGO z(YU`oQkSP1^1+lBiKk}h$JFy4(kt?czxnFz86$r`Db)Qj?Qjn1m2l@6@j1`%e{YB` zANY`d{|qBxiKI*J_qVsp4L$qE^Fzw1il?95)*JD!^^4~#gfI8k`rRLkok+N0hMlJw z@q8DG{E(5qdhTf2kBGmN$Mn!-<4zraH1egxPoB?Z@h8%+obEE~L%)2CH00@i@^a!S z@ozNZZ`$DiBivfQ``4o%3HLE0{^tD-qkhQvLM5DO8Vt>qox!b0c;x=}Pu1cT)^I@vk?w8TCf;DSCX2V6|I} zaI^fv4K@6^z_9cC4F{)(>H6_6r?EzPjPuK{$Blfc@C)}rLk~|H{+oJmlP{94bU%CB zhkc6ubok}h>C{V-U#nN>=`zQIHxN&`-{oglXB+u)U4wrAZ0?ccFy&F*KOkA1n}5C} z+~J%=_|=EkpM`vL zJ>MgY@_xktBV0dw2r~3J^J3kf35Gu&>|fH=>X)t(Bfrv&{30ZER2uc0f4)~>50c(2 zzx1v#>VZLqKd40=ccC}AztzY`vp&@9_Qi$8MfOs6c8S|=*Ya|5OR@{?ImLyp0;faE zE?iXL_MX|V&zL_aHpXtxDKF1nRIn^IF4k^eR#;%qab}m6+TAN$4tqgSUU7nS_YbX;z~#-5w)&Q3{}xT2a9^&mDD zA9Au?**OL76)A-$=Slpc?3|?mk~bsHyl|pXD=0VP6C3L+UQDj4Z0XV0Jhzh^4V1-m zMbo-96k`sK?7~?Y_KX;N%Dl_%bFU7ps(JDDd6(HUV(lqnZMn|$Wx7_1ouw&DkYvol z<)V6%l-(*!Xm6`_d* zMa#0C1-bU@lEr0(jv_Y~=DFiBM`vHEO9`yCi6?2Iv2kKy#YN;nS&n-aF-%EWmSLZl zV3&fIS6s3@yCm0M;C7T`yNfBP*^7xHdNT@nPj(~gtLU7BiX~%JHpUk(^UKAP@Qr$l zLg*lU#V1{YE-!JEI!cx~l*Oy4eFc_u1@SWKIsby4V67&N(s4YCRMCnp$u3%K)LhDx z;x04m&PCa|_Pny99H}R%Qk+h+a`wx$_~rT8ZqvqNTQ@eTB3iy zV5Oxi?W$hm6bT`%L+=|W!X;M z*@@QwR2)-9yY~D7>RPGXa?GktBH>qdd-A|9&U&q=RMBVDC32P?gjLhA6LK9n1%=s8 zyQ{dM$gtS}_a-bZDK2x->F`>%uj+_KPc!e+py@GJ8KA5w+fDhhbU3IcV+)E)?G#yu zomHHY?2;Aqh6>zf`8rX>CV1O>$*jfY<#v~&q_nt*cEY{FzAVPbW#9Ol`n4p#znv}B z$0#VZt6rO_o)e9EY&8;^L^Q&L*P5`6j;J(+V7>AOtzH_e3k_5sX-ku3#- zmNzcJr@@K?*p8SnkMF`dO)Q|)d9VWyPg#K=!>yO z`s{Xk=?=R2dD#U{>c@h4F?Rl|l0}EYo17DBbtcd(d38z;Q@ZuVd4b!il*@9Q#ifp( zX!ok`ReUVb>E~GCGDo3{zOm$u-UM1Yqau@jpI>EUh=99=MRUA||L!T9)5>sWG;jXt^Yt6p?R%cSme7&V9kG;Zd$}J7TNmsdkRi?Dz zMsHW8Ag{#X+bdLcpx}4>Cp1=Gm3N698c-^d2TYjtZ@81N8!Zr%$(X?h4wsGHk-f- z9b=huSvr1ES%EW`8QfqkgtfCE{)TD;I2)*)!XB^?^*M!|U~*l~!qhi8KSdkF*=bt1 zk_RIR)=pP{DzV;Sh9*9UJMl$#A+a=Lpi9iyFukn~IlqIoSElzh8=v%% zD_D68)gD47M-bnujqB;0KlQ){RyYQ0j~KG`>O5R~!Z_cd&If6A#(CMf1+1WiXfJUt zM-W;nFs{k@w9W@v0Xb89(~vpTd4zVrI4^(e7sTfb?Qh2U3Uz+E_N95Q?mtaCk}fs!IVLB-9qH%T9#Zk z*X=~^5`l4;>pCL8%M?O${afVMTI8Cs&LQ$e7P+}UK;$VFxw*b2@>LeOxz9l4Gc0m* z{Z-^rKFz~i*A%(a6hd>|MdS}yJuM4o1mo9mq-FSW?c^+=JITjb_Cy2xb@sCk&{)*{a_h0xpwAoBSZxw)<; z@--H@xqm|BS6k#-M~XU=a&Iifsq-+4oPy{*A}n&2&b&vYMb6Tn_pn*y61I6nS>#e? z%p=AkH|rv~mSB-n@VrN|MNWm{JyI=lmO#BnxCa*1WmKME7g++e5SE^~17P-{H=22~tvy|#RYAkYD7d4Msi=3r$@3F-q zr^@sm+bwdVYE#$hEOK^(c#nFEJl`wTv_^~kTNZhfk!M-tqb%}#i#*CA zcUk01E%I`Ue6&SgVUdrq$SW=KOD*zhi#*yQud&DrE%I86e5^&j#UdYPk#D!iWzVE} z)LG=?O(Cqe$ZxaA8!hq}i@ez)kG05KEb=&uywxH%_ddwwHj8|@<^B$f{4$HY(;}Z} zk!zpz*#G4gd6-2m&jy%Bghife3Sp#0o?wyNEOK)%o?MQy$P+F1$5`Z(Eb;`4Jjo(Y zw#X-2jn59@iO%@vhYgXBda++;rWSTk*;43z%)Z*=CsmnIW3)g>sm&_>pl8H;x&` z@se?b8%K(KOnbnVqYBasqq5-PsyU-Xd7ROq4hgsz|W^*2zwerg(#qAhaDs~CtF<{ zB{p|{c9G4TxQ((lv3*8VFRlN*eA;X4Ve!=k5E**SM z=#(qQ#g9DKHac#^nP;6p==8G_#(w+4iz7}AkB&Se>|3V{93Zn+1JLwoXzonyEbUv` z`PzlrCE951Qf<6;nKntA%=3^}@%-bpJnQ&9o?&!oi+LKckf#uD)9z$!dXM%)?LqCw z+N0Xz+7sH-+B4b<+KbxDT7&kE_9yKx+F!Mgv`@9aYe%%8pi_cQ4>}|0?4WN4ofmXT z(1f7NgRTgg9CUTicYd`0l2;Hkk^2Y)B{ zyTRWJwg)?c3xbyhFAu&w_|D+(2j3g~gWw+q|0MXQ!9NdvI`}uiF9*LF{BH0cgZ~oz zVeqHH{|x>jI4ER5$XOxhhl~!nG~}|7%R?rGOb)p! z_lG>@>a;ZA%6<_YslY2{uy#4q##^Bkb1$lxowh+VM zLj4SHfZw_QsX3-zR$P#4bLAIiyAl(b^%d0MY%J} zoX(75LmZXEY`5J#CfX(o6n3|_J~_$h*TUkQrS7s4x6>gH=8kt|m$1_2aN6^;OC@f7 z2t+(CUz2#;U~b@8Xq!CQwsaob`U;91PHk4{eA(+a-Pq0dud{q^*nvfeCSTx2-3piHU^;i`lKq*LIUZGoka*9hF6vA6>P2|5#Uo2z0qfIuxIC2whx7_ND%A&Ho zJV(h_P!6o&vvgCK?H*fD!L>6|k4kU8+Ztzuu5~qNv}Jn7B&+qW96{ySvmr&aAui@C~UcJeGeW zdlJ<OJfbPp13 zWd69^Vzo>`rJRdKZHr5?UHM8sJ`X^Q8)rb0lrlU7DmQe0>&q7||glv2IA zZE7dCc9A>3pmgjMwJWKoU#sd}qEce*3n$q#T+udt8|9S5#1aP<=_tx^9OE9@%8$-( zSYF`Hx8dA2+2$-J9c4w5ZZa0NO)|XXn(BCaH;E+mx^>DVTTiHccr{Yq?I6eaveFV( zD)lO#)!gy1myPEUk<#&{Zl_)E*<%rL>oQeeLPb$(ELNV!WMF7AWa) zfs@|j{P?MsV&`9e^pbWG^~Q|Dtw@;@w zzw&Wh2glbARgLYU$*TT!xl1&v!ZEsmvxA<}=g74qrgKE~79|(G#a@}T`5;7+v?oRn zxmR|M96iOX{*F=q^#n$dmEK2R`S!KjWxvbiF^tpCkK^aT|3B;#<2+gWG@bU_K1~P5 z*Nt9MrBPQ;4(V3Gc<99J)LKxC(Usk?i~(h7lJ%0MV~@!eITmNT3zj*gLsI6@%$TrA zj;&!%d1NEo#WuiPiIZRZeA4wxAN>+*1pQ2_e^rT}qhn=~->u$m%m1tN`P6!*k@PQ> zeL6i>TKjZBitYBIo}O|~zS?K%&+yGM9^o@<$=qUlS&=-aVrOXJv=>GB-$*xX9wUrs zoAQeel8gBRqvPrC=VYG_=onHiPH8Fh?x?DB`MU5J^(g&FJw64DQEqnRQj^}oQ)f%1 zxydecOtNYGzjct7TcG#ZZyBV?<=ldj$4brR~ro=>yT_2$v1+VwKQc3o5HJu~- zvy;+`odr27qHU?n??l^DZi==|m4c!=<*K_Sf0$M>(y!l6%zS35W;(Oo;=wi9F8v0d zGTLXhMfdJR&{T>icy28~Qo?2O>G)oX>7?A$6StI`c*HBCxPJ~+szu{6Vc*lht5a8Y zK}nKs3)YzRJ6#enU5y0LtFc|!YkEwcyD>+U`k?pgt*=xfd-@JZ>b$=l1xZ|>?$N*L zv436gZ+fTKjb%klROI9{U>tSpC`zeKpO7HqWuk$BvKq|#Nz00s78Nh=x!1+xcX}C|&r(dcB`V?RY$fz=ZQ;EL?t&cO^4-rGBQ1Lc zvoj~<^qTCY4&Avq*ZWO{Baf#0RD~y0;TW0WS5xcKBxA}%M(feO`Nd?uz0_IkRwL)* zSDk%TQaH?tc>yb;0a z{HCs@0sfjhcQp3ncwkXqkz~!n=CuM)jqj>Deq_U8E1lEtNT>sZ4v zGg>4>;ubyGssrsRaO60a7x2yoC6q-SyPI`4bP+-*W5lXojsW$K+dXjV^Vsa5^T-jE*BWT98xhEH2@S zyl|DnRV*(1bvrkXokAZWv8E{B~gQg>uK1COu$Ze%2G zUDNctYD^9G#9MtcCOrJt#!hziO!w2eNNb%j@tdqX=0cM4iFsRdWtip4UIQXO|BgR5 zP2Q;?g>PN0MjO^)hwq$M?;-gApT-*{4yG3!B`#hM!K6{CJ4u%5WCh4QCCWA0b_t^) z@8HNB_Iu~IdI~+W^-|nyOhM{HDRV-=rx&4u!X)iZHPn|qYOWR2C0~-J_a003x?3N8 z&Z6Pc=a}@?#(rG}tF)!kY|)??;fY&z%gS6ynd}Vcy@DTYo5$4TJjYEZ?r@fD4>Llv zF$rVTp%RUxIa6#?#+b3tH{$)*q$6G42IVM;lDuHu&srFJ8}S_a1u>IsaxdScf5P;s z-k7K^gQer*%@tiAU%?y!6>0AnVE|^9(q)NZ={3-8*XHHX-IcuWx$(pd)j!Me$dX{M zCQOs?xvJJik6Z4%B}q-UX(txFW8eKvIr@6*gA*=CvEP&cG)WQ$v2b`dK`vPUkv%+32#WnSF7ky8!mWyOFuATHvu4z|L%i;HD@ zp|+^FTw^tX7gDnG(BX6zxJr482nz^t+VXsPqln(Y^y^-J#r?|T?|V@LK4ab6x{J;j z)OkAzN!E+Bj)D85C+lOM3q2EyHOfIX6^12vSKyx+Ue6`6sMsc1^z5G+p z7?e=va`DC&MkKsnH=C}K@#b26Y~o!wV(6h-(`*JWCALwwjPfoL(MQWED3u+Q($~@# zkSu!?avgcuWlmmgrSDPTwX_Z`FWZUj$l>*0?X>OjK~}0%c-}i?&s)!RZPuKbT*}kt zOr4_@Wy_0iWTA`Ujp{3?Eu73j^=1D(R!*rkZ0=CSn$)MPeDH z!!@P2uV>tB)mY$4%6gaIh}n9{+(~oN^Rc-l#V)qt$-_IPcGSaJ z23kr9^hA+;v?h;+l=5y|CwtImmHIp;b%X4zC|EdJQ+w2H#*+|weL`*6*9Lv_6=Zf* z=SNY&a3jwrNDE+>YP>Ys@W*(7!JK)OdLXyBjEVh)nv5)D6fo-2QMQ#URRyR{$BxqE zITo2wiyGq^t?3hSvZ>zK$foN3KI5fVum4u8jCxew3eS@Q>|M`u*;Ne2Ii&bn&bT&~ zw~?7mkkR9zeN<7`pQZRZ^rtF2Q;B+XiNsf}51&xXG|CL33bCirR+}Z*9JO3}05rNX z;p*23z{LsfbL_L16R_(g{ZyH4`3mY!OIxN-chPGwZI?9kYQ+NW@v+vK$Er@vDQ@YU z^x2GE)H1*ef2#10pGiE*acr91+e)GQTZ`5C^q}+^;}h;X^+d(D)76ju(uv2tC$JND+biAr)PB#t(&KazliX^#&+Qm1 zJ3|WTQZ1Iz4W9ZB3`8&uT7AsIF(9{Xyv?%3FHHprUMfbPxOA zL9~pB)c)LpB6)?k*5`Abx%~P~zIr$?2?f&mC%oy!rS6$UG|@72Qr&)jp@3J{JJ~+& zEcJaTSU0=l=)T8OKb5d+SAHsp$?pJ)qCWIS+t)zg|o=3F43rXCW@1F^l6Ats-e6C&I?{KWcyx)9RneiBj zR$(~uUN^qv8#h+?-xwf}9{2jU{BzDxUn=qqpDK>+lhwU^7x(7`1!>}5`GtGKuL{lL zm%Nu#?hjV4^5iX^el(*S!i5kmC^#fYzVRq}i^?UUDgDm&-c5C&gw_B?}C+hat;p%Y<6lZI?dWU;#x5p0moAmb1(?E80 zY+W5&&&SRq)2zK5pp0~YGU@>#`k5+G{35)5rpjFC0RDN)o%fn?r{T`uh0LMJo-BFz5jmxXZ88tl1Kl0 z<*5I4$bU;7^*^5dPy2ti9Q8k*{ZD)U%X|OR-v4<1@0Fwe>+`=QkG_t4@4uh_S$+Pu z-T!_joqXK;Uzc}G z-)4Fx_%vkG6?_mX|jwZ;L4#8csWoOeTv^=CKKZ(zyqRLOfL%?rFRQvIf#4Vif)-#tTp zt$Js=;tqqY2BY@s^1BV1?lQlxyW4NbLw~2=7h|y8;68(f4;$244UrR40pBbla;8+ zV8*nm>U@L2Cui#CnW;KwOgF9@=gXGr=NYqfn*JxO)#EknJv|-7W(BpUGJ^K9*8PtrU+PLp{ z@#{41Gu@dq-L;k^E0KBLVf^k{mSLa%?)ho!3HEs|mNR)`(RevtL6LfNmR}d0sINB9 zS~4$t){>MI{w_;No5K%iE-orlZ|sb-r|~ErFMgEQB&Ecq=s&_j3?hu*d22PoHRI{e z>*vJA^ehj@#`G+{&YCmN9vf>}S5HB3g?CYT*1TAIN=n+In3R<1@$wt(v3B*tn{%%g zcbE70V^>=ON>fsB+Ja>Zmnxa`Vm(9Jp@C0(u46oKNJX0?wj}^YJe&T)KQ%Cmt>*xfcrZ<+p^ z^`f~y!IYbvGl4fi$1R%}%TH6wPr36Gu*D@Sj5kWpT9Q^cAx@G=<)DXWmiu}mT+@Am z?7xy%Tu;=um5K`q!^}s`sMpNj3G5YqTT0UzAQ_9C~2tlOP^Vh?{rn<8id*f4U+#hkTZbwQrM8?d=dgFRR&-Epu2s6Ca z-r>dCuVDjHxq4a8aiebfV>A8dXW4ndAsd&yyF$sUp77F*`z-qEb-zR~=)90g>-k~R zxA@VO&axw(7QVOLe8jUIM&5G7vj@HbH?ynjw3SCZBW~2RJ60X>WWaT>e37QT4tKz? z+m3iT;B*+B!|#W}eE4g)8XkuAaD2rPPZwMQ6LK}}r*J)Nh0XB1+mCoUuL#m+!^p%S zZ3Rq#TVN(!{{17Kw5j~QE!3{$w`-vdCR85rB*WP-3w{q)z+6}hAA$AoW!MVuT7&y! zP5TwJ!E@K*9xjJj@J?6(*TGu&2&{+CLXGa&%zKV_qF^qR?%HaY51U{m%&0oz*#awJ zBW#B4@B4Ed_JK}M|MX(AEydS;7bKzc?3=hLQ;Lz!swjIX9-7o`o zLN^>#jXvR1unyKk>9!7e0KLMwFcLFf1ruP@I`j&gUfu(TGhko z9d3h}F#X4*8y@`$M+4ySF#JzNJn z;FB;SP1Cwy40Qbz_i#0I!AD>w{5Z-jfH3m%4dz@hy3-g+1hx55ng8g#>9KRe>7fqAeF?u7ec_*U%gTH*n1a4Vbv zyI?Ln=jZ4R=E2Qy1Kb1Oh6mwiFf^Uth1!O_!EtZ~oDFl~23Q5(hTC8#Y=R@6Aiv;^ zFq|I?p7|u{hMh1S4*dmsgNtA_+y}SAL$Dc!ZAWh~3P#MO{5?au;jm}PZEg?l&@rooZW2@_xy+zq$E!>|cX`wi}4#Phi4$2+fs(XarfK{s^5Mpy+yUm*T4 z4>rP0unqnhhGkM;*5e-5zDT~oIlIvtTn204Bd`v>23z3guoEuXgWkSJd4tLDX_y5s zeHnX$_rc9D?iK6}E`kSPH4MF;cr{SJz?Co!Ry7iDIPF#J1*X47IT1PB3*Ux^Vcu`a zZzf&#!6euX7r^M(sh8ozH>j84{jeUM(@ePV4H(LgP7iyB^umU{q!&K&JM0ur*+;nW zoOiKP*xG`fLhbk1DSQe>(7uQN0Xvu7+keE);CJCln0J75!n@%?_y`Q8{eK<0;kZAc zPw0kqFt`<%Wtv;IuF;Lg9`KRgIG!lr|y3oiOA`3TElSQhSK6x;<< z;gAo|C$z(AxD#%N!#^ZlupAzS&%wxS(%nY5Fc)URb+8=nhBfdYtb>C-BHl0>cEY(Z zauN9o6X02ga1W=z_0R>I;Uh3Shjs#{!hs)?PcQ-Qhf82&uBL5-8L$qz;UTyk4r|Bm z;mt7I!SABOXt)ce!NGqcy)Xe*K{wn6?}JV7HP{YEe1iKt+B+BxhkuHDxE4C$PFN-9 z9k_?vU=w^Bw!;hlj{C*rH;jgrFbzHlov;~J!O!6~c*$qDhj+tvxC4ggqwjy<9*%)& za1M0BWv~ivg40{3tdbi$3W3cd!n!RRk> z50}GsSPjFS=ov=C4`CWybOiTsJ*4v0i)rP0bQPSSTV56Qx1on zLcC!DtcSU<72XfEQcX(=BV0K5RNTQ*=z@FUdiVj{0fPs1dG^92co=Skk#6cWm;@g= z4Sm7wumZjYYvDmy54CW@h37)8481`cbi-u03FgB0Pbas_d;z5@rHBZ(B;JAtS(OiOoEwkHY|rbU=4f& z*1_FpcX?Xi+H<-*U2q#5R*s#+B={v<07rhi%d--0g&W}>xC;(BmvqB?7`j5!YTyX? z&tk566wfy;`7Qp%t_PFdiO&8Sv2fE{_|Y6@z3y7{_-wQ#2utRfpB3t%!J`rU~g~?+z9^$>*3{z*c)_1eyUV^7TRDZ zOoq-$*c-ePu7-QyW;iejdxJN_HnB;I#v+SM=~u7Vk`3cBF|xE^-E9Z*X~-*7lQ45z`NcT;X)JiG&Dz^%{?>*0Dh zbSm!QI=C0s!ozSk9Qu96K`IpavE`T*~C43)lgcDQAH#mDX?E%~a z!`9;8)z~Rq57XdN&8j z{RorO$v0R7JKzo&QAN4?4({Pdm<~tHMQ<<;u7?l7I@knT;3u#X+UH^C_fikTBsd_0 zbi;9QB}|4JVJ6%Km%;t;ao7ca4Ts%Fx~{`LEP`3E6|RPza5IdWkG;Yf@F2VqhTe}| z!x8X4I0Ht07kh=%U?m)!NjZZF*P}OB1v}ucU_>=~yn%GXMwkKTEFj(RF<1k;;4Zjo zA>|C#LhS+i2hav>cEW|zU@pvutKma%GwgzmFg=U>g15l%b?7;p{DxI99iF*}{DLvC z8t#DG;R~=Cz6Lwsm>lf%2b6ah5C0A`U{WsehBw0Xa5dZkKZASW=??514uwP4Gv0*p za0ASMo1q(i0@uTJdANrUEhgP?E96IzwUT`F4ex+6;AWT$UxTaRLAV*NFCf3*lkgzi z1w%IwA2;6E4a(9CQ=q8;*eGa5k)gkHI?lK5T(uCD=JEg2R4`!tupSO~qc@lawTCqAUTA{}WuzN!h54`zR>I-Suv3@>8{r(-1{c7v zhiTVg6g&u1;n~Y^4^Jt_JxqmL;N!3fz7N~sh!xn|M$!+X;Z~RilW(Sefi74DKZV<2 z?Mn0pV{gS?;C49l$K)@JhwZnaH<(yK`G$AjL4Lu|JIOCN1RjKWcN6X=`iU?az7Esi z^6wKayc<@*9dMg)E&7J3_n>dM0S@~=(hHMe)cvFvUJonbop1}huo`{A=U^Lr8-~@A zei#K0!&K;j`Ebz#xQAob5iU%JO|S&E!@Tvx`zOpZ!f3euhr}D6vw?WSBv=Kv!yWJq zxEHp=!?5N-^z{h+%^LItlVJv258d!7xE?mc9q<6$3ln~XdpHvgeU$nP#>0QY3|RCK z?%^i59yY-p@a%_i58d!EybliDOuum>?x722z}?Ud<9>{Lcr)Aq*TcQ=Rd^VF1c&~V z_J0%ZVIs_c)wPrp_$=HA!+%1(2&cmRFb{S?HyrjD^#)9WJ03yra1X46Qy(SYU=?hH zPr^317lu8K-ZqnOFaf5*nJ^z_!b(^Ix4=7KBW(C7?%`n=wnd`=;~qx9R2T*G;Vf7Q zcfu`jFKmS8K8}010EYdH{yvO`Z^JY=e+%_8to}KAgHOU8uyz}IgRj62*bXDMQjVXX zJ%PC}9o`3BaOZaFC#e0Bat4RMW|#>(;KMND=bS%{d-(JY+{1S0g5#dSJ=_ns!+Fo* z9v*-la7G>N@HUzMq<(`D2!umv7~oiL>y_fH~+39tiZ!r?FC9*1o;usc`*!=A=J7zKlWi`~LFm=D*$O1K+tfsZv|w{Y_7)K~Bj4BvrW zy@B1r444MXVLsdmD`6Ad0tdZ`dzb~=;IlC78SJr{@L>^5g$H3iy#6iR!zQ=|p8GcL z;q$N^=Dma6K1)AqFZBhih8b}9@2GF!D!3j#3wOZqeUuv*4Le{ijQADh2*$t`m<|s? z7wm@BaLBvZJxqkna1rc)%V0zu{aqLXH^FrH6m-Geuo~`z+hI3shC^C#4;R3QUsDcX z47?kr!_CkIe+#SOwD*WVEP{LCL+~(s1rFUwzXry`^Z!8l;YItgdpP%x*bQ{SUGN^b zAMS@;aPR@_?m0~x4U^zjxB$KgSHdshMi~7k+{2l0KP-V=FtHW8+eLrxeew;4{e^Z4 zZi7{D%t6u(U9cJc1a`nS81WnO^RJ{E=EHRODs;nXACPZwF5Ch0;9htKJPbF%q0ejD zn=l@xen`6E66l82a6P=B4SR>tuoWgl?FIA>ZLl0B!>3^u9P<(VK$rkG!wk3w7Q=&Z z9}KN0{|?cQg!jNyxB=$F23QHBKE__*D%c1Iw_~p&hv6?`FEARu4b$KU&{Ss3;b;{q53Z5_lP z=EHn=AFPD?;1<{g8)5F>aSu1cusxKc&u|aZU@F`L^Wi7365jO>+`}KjMz|Ta!S(;d z{mZmBFbZygsc^;->>XynYPbMyhXaC+dRkxv?1Ygp@)hc#fk!wGJu6`?+z3B| zyWohhqn`b+6?VZPrylhTYd{|`8Qur8;A^k~&Kh*oQw!^0J$xUw!syeEdbCFD58B{< zm<+pM7919idzb`kp&Qo2YS;?jfZD6fADoVR_#RA#hhY|c>WPN+Fb%!|opA8Cj(VzKJlqE7!zMWU+eba^@C_LL zI_ZMZunnfcYtKFEalz|hHQWt%zta8TtnLh4FAL%z&-198Mf|)Kdc& z!#em1+z(^UC*NTf9QG#p2~*+cFdq)NfONxga0@*5!lRyMcn9o&zkm_V$ZdoRQ(*?& z0Lx)J+z6*#guTJ}a6c@8UGP3Q>@C_qm<-!s7CdJ-`3+aWTDS++!_zM&zu-(5`Zo3q zN5B_G5pQ^Y6!C_uU?m(k8hybexEDI%VOR-=zJqRAtWz#VX1 zH16U2vABoYINa~W-e5f3Iv)4%#u(hglvv!uusGbqU2relJpuRd?s(k)j&|xY+(Y+7 z+{1*+aSv??xQA(Q2YeRpg&Pua58L6;eavr8!abapeAJT(FPetlVKl6PtG=f3+?XVp-!<(JdOE9L8dg=G{2a5<7#=$I@4lCfaV#0+*upZXHR@e%)_vjZu8|-l5 z9uB(+_i!exfcL>#*b3|6#1h=Yo1pdw=G~wTw!mZKB*_!w%pNjD|a48hj7BVAu-u4(G$|umm>4TG#+A-Mtee?t`s-zx; zF|ZxJ0f+vD_^iPl+_IKgde__&nqz~Q;_rqG)1rzTleFy0eSEE-roJK;vS zZ9Vw`yJ0&#{fCr;zfxXdG)#qQ@O|ipuWi5`-1#7O0BdV-2QwbV{y$)UAxwh%-~w2^ z5&MC6|Cn^a4Vy?8-28v!A6)ea`uLD^!({j;m<4T*;ttM$wXhrRfrmE}9t?eqd~YK? zk5f;;_MhPnPT7k7;Rd)Fw!%Gd%+Ik`I2(q3M1Ka3fN#SY@I#mj4{yUgT=WF)p&Raj zJK#b1B@8{JX;(gpd$>XCXF8DdLeL_CIL3-imFdsVKB)za3Zihpf z2^VI;!*CTG`YHAc<6+xd4r04GyLU#(hc8(L;rz3VLa6SNWBEB573UmS6~f{Z>9eL zcm0`k!!ds$+{484AoVGX`hfBWZ-h?x!H48CEc}S}6K;Te;rSm^pTU_h@}KCtopK15 ze}cYY3#@>HKSkegLkH=F7koxN3?KUk<^FS;|9@g1u;p{&3kUy;au0viNqk}W7uW~< z!I#uCunl&>2fDD2e-S^J46D1*Gn}h+dn#cg+yc8{Bis_)?P-TGq1~Q{PV_aP+Y=90 zpTd)PaOSDqo|SOqY2BWUFf6>=vkNxC{qO_W1rNhvU*P}g-JT>kb8xq30bC1L!XLqn z@D;cV9*V#{-2QFc!&W%#OYGxZ+{1FX0Iq{8VJF-OpF9uu@DsQn&KZV#SObS0VI2e} z!SwTS4_CmIaNGsBhuh&UxDW1!MHhB^Lc5rUgd^ZqI0Np2xv(2n!gFot4PFKtVJ&Qj zbuj!W_Hj|SCmPnmG?+BJ+v9}uVHI@2ZEzoKhM&O>=)Abw6WLAu0h8dlBZxO#1Xsdo zBZ)U09!0!i@EGC^|9mOwhV!F|w})~B)8M9YgbP>25H7qij&R|W_-;=VykH{nhTWGF zZ!JhWa0T&(&riZV+%y^YaMhK#hYP3T9@bC8J*=I9dss0O_q_WtD;4)Jc{c8$Ee-cj zn}d7UdM)l@{daH=Yv!xocnGG!F6e|0Wp{hl!-_@d4Z3r>JqO`}T-=2QX<>`IJqhr8Z~-iVD`9wk zx2F~+7Ib^+VFql470zxCgJ5klw87_LGCWk=?a6|}T-}}uI0e?iOjr-=;6eBS3>_Gx zU2_xp1?%7p_zKL0t#CCQQqt|&45z_8a1lHRZ-t?!1Zg#J1bht6fIDC=+znU51*O<2 zTm~EAde{aZgJEHW3!`AG8-2mBW!)YZJQr5OXt*6_EbsR0g)7UuJ)Q7A7Xl z!!Yd*!aXfWyAdWp7tDk;camSQel_93w_!az2wP$J-RLVkNQ?hI;li0P6=uSGSW$_6 z!7XbD7w&}3unBg+!)v=eLr*7Ps|Xj4gPAZHmc#seDVMPNKEj2Mz*e{eYJ-Ec{r8hk zF!TY!h3CM07zHa~`a0|ymj8fo;ab=MH^PWBg0wyBsfS_v4+$4`!*Uq0q1#gf6CT8_ zVO9;{!V<{jjJ6uu;IhaLW$b8(yE= z{0!m3_hA-13@hNUUr~>ja4*~fzl3|BtpWRjGhr9ZgOTTAuP`3&f$6XfI^pm};sK|@&G2Tp2X2Q4;UO4$ z9{Kbt!F;$ER>BUr4TcZHu3;1$^jrLaGhi0Xg)8BR zCej1rU?a?eZSZawc0T^VC^+PG^a0mGCwv%I!B5~e82JYJgbA=6X2S3bXop}l+ziv; ze&~eb-o!mz0=L1(U=v*R7V(E^Z&SZqNO^_va39QoL*Bve;T*Ugj{F_zhlk;QIBp;L z04v}y8|4!w!A`gUHn*TZxas%kAGW|faLjw?ALhZ(i>R;Q2zc{;>L-}`N8%0F!YbGX zx51bL#2c2wc9`Buc^!`Y&%_)4f9$<~R9w~7_&p3mf)OL87%|c~jTkXvM2it4Dv1~| zV#J6kRaA@^qhh2fjc75_h%t?*7%^f>Em};eqD6}-wP?|%ijhWYG4fcdJZVg+r5aOe zsTnfg&%XCEcMqL2e&2uI^{(|cYv~^DXP_>C+T!#!G)Amtcsnjnr z54jP!64`^?fE-4)AmcxyzL9CjA>>kI{PDzVKm8q<^bguCavriA*@CQ->m$qu$nPP0 zk$*z&Lze%O`kKxAsXu2vKz|y<;ZU2#!128F!F`T!C>MX z^y7oUOym{FLgXvR3S{vi2!D&@j~{TNxYET=Tpzf&yn@Wg#1x9ie$ zA*biyH*y2A7x@};A9B@#U@#?%aRZr)j9Ey$kcG$^jYY3t5azI1`Rc zNB$9+k324ye34s_b;vu_QG9XI>Bt#-GJH37Lj$ zMCKv8kt>mjdDH`P6|xn%9od5%Mh+vBFT}5G+BGr*`3kZCnY5U8kIY8aAy*>XkZs65 zetC6|LA74g&ApeN0MaC8qH{=({9^_G%6F1~$WWoa40Wuva|B|f#^K8r= zOFc1_i803{OpdPuC2~E5|1z=<1ZRs*?&L(dmcf7dT#J)y{7dD(T>eWwgYU-p_%Jnb z(bVL{haY-de6{D?8E4J;dg?5$TDHoNcjO%io+1|(Oif(xJ@2qdH<6*kYmpn_>oxa& z&r%RyTKR7ue46GLyXkwlz8ikH=8IiE%=O>H57m5;%M&PfC;S}EZ+E#2_3y)v)BI|e z=fnR5@6z1+D@#HAFN5EC;ep_3n&0B4uZ2GdzfNWc#eBC( z^9x-2!|(&7pQ`zVE>G}sMg}*>MzUd+baQIK!7q{*?>lXE<= ziTw8*=^sS;`gfimY6Iur9qYZvwNp<%(<#SH^g=q{ba^{G34W~RSGc^-;dz>S&$ARH zjwA4F(r*akcZYaVEcYnjnVR2f+xK%#+EE8w?E3~(IaD4+*cd?PNOVGFC~$qOgueqn zO7o>IZ-DQBr)z$L%RAt^gj1&>`>`j)h=!E0AHMsd$ZcKfp^bDq;XmfHZ%~!tg4)=- zecngM?fB`_i}THOo&!GzZjBM+ZQ@$j&k}epd;$4{{H$_$HGBnpuI4woya`?bKSJ|- zmv=eoPuHCGbTcXvhe3E1=_8F9eo4bXwhG=NpU3@|_V~D)M=Cn|(J}R#T=*b-skY&@ z2PW~M6h7@@zT>RS`~HO4Y%s@6LVb-V<&fs zxh@kXaqkaqs>^ivIdD^5=EF6!kE`&>bd;!%K zRg5aIQG`yi_Te(uhkAGc+%ylh!_S3B8lNS`eelcR^R)eMx%NlkH^Pt8T+QQ2hcH*e zWvvq$6J$W)PqCi~uY#YZxthld;Sa!%*Sy3{Ujcs@ZY)3iKDgu+%KsZ~`gZue@S`;^ zw7H*aeQ;?*w`#tCF-?uDqv-UYV_NGZPhl+pKSA4A?)s1g{|s)bqayfV6kZ8`8*XaH z4e*!X({=ury7_m&+u)|Lq#ynSJknSpaUF$A9hlZK$v7r4Hl@#k-wr=T`={2jMeq&q z*_u1^Vj9;f;nGI_s^{;52V)=b-5*yw>F&v~eb!WNwYO$;^Dm9O-Vs~d9sXl&>tbc= zA={Sd?n8I`(gVR~nHNKCOXu6*vw|!!N?{>+{x{6yc{cnq`2V6CvgI8Jr7wmj6&(nk zq4{dJ4y)iN!7tR@n;S~s2rq=2`f(@xCiqu$`X#RY0r=HXcmRGWoFUQmf1#T`W(l^4l!@nKI?ZH}jCp`I^QOXZb zfScCQ0r(X7;xPN?hvJ{YAfE}3)P^PhZ1{Y5u1nQePiYxIU&Lph=k4%4_5#<>QS5k&dH)f5p;+JM@?;w1Ot`7< zXTek9C+YNR&r$@>fJa&<`$7`A5;QG*r&eW?81fSPB-d0ONV$}(6f=7yl z@Bw%ud`Vb(cYF=Ncfd{KYYG#~%&X1gYc_lu+%&!x!xQ0=+JYaSsvQ0(`?63SMQ{QQD(x0pC z-{Fo89q`|i-o*Ri```%IKhh$ z^XId)=Oi}Pg|OwCdF@4`@J#q-_!*k3eN7?!Irw>+uXV?QbYlN|_%(1}qMD<<_93>P z>jmf}tmJ)&tZjWsN+r#EHSoDQJ-ig|=UO?u23|3Nd!03==+vXrht6#ib<`PC2RdoDaQ>qAKCx4*V2X`? zc=Dn&Z2VOy*E)uttVU1*@TXqVT9`TI@d-wd<&e%B3v$INp;IIj85~n z4g?R;IHFX&==S{QM|puK(J(taV*`UD%uL=e@Ezzl&_{=h`6rQt~U&+@2q-hAn0C z`Dyp)MC!xBQ{nCK`}rJ-iFbpQiSS(bEZ!Mk6UOa>AmOF(^>Bt>H@(`E*TAdc`I>v} z8`mI@w4+wIjQhT=DkkbYx*Nrxq4N{s7V_~C}M?Xa3CY5U+)Bczq`?1P;gfk`Y9j%U7$VmBS0 zZb+L4D~ytMCHy2q+8Ws6C~2GFa|~&_U^%3XG*(EN2I1LJYe2(VVySxLw624S(bq?GQ-x4ML zD7+b-t<&rCPX^~MxM|MHg1-bmOQ%=s;v)DixM@tQgePt=^9H#5zDl#U?{)Vf9q^s- z8^X9fHcNc_;nTlw9`i=wv*EpA>EE&fEa{V3XzzlX#>6amKb$bS^>?jXKSl5l;1_A` zwGS|?`iD>1Xg@>cwh#9itp@mMa8v#r@JzTV|9<%VDCtMx*-_Fbvrt$FH{A!wf?o>1 zHmrR1W>v~p1ef2l3EKzW=*FiK-V`N$1AI3;N!wS?+I7Hp!5Pln_+R7N?}xt+r{L|V|?C? zJ>1%eNPHULGw(N_S$DuQ;NK4`e~nvy_!hWnA2SN?fSdeJW;5~v+|>WF;O%e;W5|Cs zJ{Q4Xflt%?Vz+%(!rP+QZ*cNA_3sY2_;2c;{qQ&7rv5n!kAJ}2Ka*KZPKTS)XTgi$ z7wPh;@vR7cH=Lt4H$KiYI#&CKSHVs3Yk;qZBi!_AP1^x@7=DN5Ui%>{vEL8>1RiPb z7CtJ5A2dI|lgz;NX%zpm;9tPMuJdP2DMOstFM?0Sepvs5e+%x1SHkDOAJ*yZed)c@ zO2kGZI$O{&m7x>f3TJui#^`FdjSax%x4TSjFaUoNZd!w+(6}Ujx{7OGok?ZGx56tm z_kP=Qz|XZLVtf#u&1c^mYVV`g8P(XBUT4mu2|f!RsSGmqSCU5({6~_{qTw)#1KqbFlHlNR-{%kiVkDp6v@b}?YYA$)Gdn)u`@~@`{3`xO)*R)cs)^g2K*HR_Y;Ex zcpLmt9b;)H_q%Pe0-fVFndjDe_(@TCJ3JGfqw{c{r}lGA+EF3=8b135Rh*prD^gdZ z*r-FtG`}S?ux)~y#^)?}BiuB97Qr{ek07s5e^U1cE8!d99QV20Ufx*!tpR=y{CUmu z?vJhYd7CAY()TN<%V`gD&J;sA_o#AOA$LA-7%x? zF*;9mc9{+@hnw1OKD^4n#g9~CelPr4KKo*O-DgucgY`Dqeu!Q^dI^s>&XwF12@Ga2R<8qrp}|nE{mUQ$<$*G z+-i#_sIsVgeKpvqLZ?96kh-zAOICYmL8k?sZ4-5z`#BPi9(3kE8u{65;luDWxT(!1 z%ww*J!qegMn~$bgvQpY)xiV(F|qrR{7$ zM{MLupEyP75YO28*7Dj~Ta1#|k_PiwoqPuS6S%3ZXTg`ke@>pEw&i`!s(6WY5qvNF z&M{}w%IGh2O^wHXDAvy=qF}2qUI6qEjtm}IC zC#28OKB&5GhfgK_Yhf{ZITWKlcw4i%u1DZWPaOy{9k_K+?#@|B+0-Nax7xnkC+%>@ z&n$F$wnmPV)N>L1bvU=o+&t8GI4j}X;Xl>fUfNjWa|8V1pP1M39q4Kc`uqc)4v*B%Qb^MP zpADx=x$(2NmR5W^;OD@bHCN}u{qWb|Y;oQ6>ilsO-U~OaPm{SBCFxCjwJe9r(lRt2 zmAPYf5xkG|do}m|Zz#T%@GZ}n&+i)GUGPYAlfO2+{~S(NaQ#>3 z_sQq7mu)qFhbs#{9d4?>B6uR)6yHj?H;VlRczl%f9q=#kUt$r8zdh|*?V}$q<>Pq3 zmssAPCaDNVdfllvT7^=T4_%^s{eN+U04PF(N-rlH+&z10*&zi@F26zhG zRDT`tS#WM+xaG5Ne^`5?I_mdC_&>qE#cD2+y~dE+PCW~V6MCk8>W2@(P3%iC zmi5<@Zj4gV@wA!GqI2O#z*%CrHk>_>)O9I*4*Xcn)wxei6fSG};vDLd^d8FS`{snY z{DC1NwH)@NSB0Ib7o+>zgwFfunEVwx+tEqh7X2)B2%Qb+%tSvF z8)rTi8}YfUPvO7OT-w1e-FA?U&hfv9{Cv8^EgyafoT7{GI$xB@VLCnwO@U3vuez3{0|8!XXPg#S4RepHRugvXZ5WW;X zU8h&~EIb$T%q5&-I=6kO_aXS<#qdisSKk-Rf!9XiCGbwTsV`Q;pM`JM_U*oCJs~Y+ zY(~f58Mz+`-wvM-H;os2;6-p#AMh<^yn;uX8^nGpe1jo>KRg%y2F6X&p`$}|r zqvWvxz7uY0=PmGVc%(h6_}C5q04`?_Za;U=9fsgZFGrrc{3MCLi2W42kTpgqy?x%C zCyOc3Nk?bZE6n@e|Imr|IywdDT-|jbcsnZLcI<;WKbI@eX-DT3tt0VjcjMK7&XQLn zw;L&M2RsLEsF*h)rTx3~z!Tqq(}DQUz~>7ijLaCl^WI2=9SM8aw^)PWVoEjBc|M!+*K; zFo@1Gzm5DoDY4;M!nq;*2KlVl|7s3PMdy8VOnd!Y_%J+@pHhxe_$Tmh={&slVS%4( zHSlv^kABav1)UerIb7RV?3S||{scTtbM?H_5d3MlX`YW~fNz8!rPHhD5!2xHa8rNF zgKvVHVzd(e<0$!WfIk9Xt@C%z^8H*Zp&p0e3;FDOLamk5`nU%hlirwML)wGd_wPfe z5FJxF6KRb3@KdxO*pOQ+QkNO4CKh-++{3&+QSUhB*vyRW9vWPEgtq;E%ygdyfFT0bZla>UF=jlftGT0FTrc#eO#YF9z<17sKCym+(0h>k0SYtI?U- z8+lzOc{IVZ;HLdY7yOYZ=?CFma8sM~T+TTq+|*wD@NW1jT?XexTH;?0eERRr<6#N> z2>4{3exco#Bz-k}6#EM_S7#zk@S{neqq$ncbiwDtr)sY5eGbCY;EOf)o^LrI`SVh5 z&r*1E7`LA@m$|f`HnIyonKg#*Sv59X^g!(WzS_9EC)ukFv6YXl7v70HNBiMr@F(GK z@OeUe_?0*$HtNt>^oPhcgtx)x!AT@6b9JpzIN@Ou7>D#oA-e*Gb%7AxA zNnZee4Q{HBa(Em3bZy_c_hywJ{yN+=ceTNL;n(T(>if&R@UQflpR3*nUjjc|r&sH! zL?()J;9+}7#y`3BB>rW8@r<79J^vsqj8{J^Txu$A$9j)ZKC3VK<)udMkDv2>w~?IWM4*d{Vy2x$}FG z_b|e<;q&39djrMr%i*VL`_4Tw@v{oP0)8HSE7TuC{VMkB<8xj!dh7bl_ZYUrpN5;- z^d5LK+|(z0SJ>%`wV&!4j#T(w(r0UK-^#J-D;MtlpUC$8q$!2R!^7q>dmgav0!S=3 zptBgApXxl+{oodOAAG*%&Y7v7Yt_`{Cva)EzAiORsrAV)HYUFxdF~e<60YQ#eYmOb zro-dmEFs+bQfGBm6|oo#&E-x5Mkhxfq4 z_HoXA5Y^H64?kM-LJM1YG{GN$f6a@+`1yprCzv*V=C&Q3+T9br zA8eP!T4~8x+f>XsH98kz$Jed;jincRuGkWP6RGs~_n6n48St!+BDZbHC!V;@hF=y- zK7UgA$n!Ic?0bKbPbGGi4w>hU2KXwtvHsz=!^6f3XYVNX`{A*FjqIoJQTTq+pUme_ zT%B_QS@-o)4iD))QjXZ)yU*(8+w*AbrxG!{j&n=sbAOv~|Izz^#r>Emh35^M>qm4p zptCxPj>NWoi(DLYa{S? z;N6-_zg1_i$;)}(@UzHsy2LmOUIm|}ZKyNDBKTT(q-Q22eITsjY-{D!hWvzCzWOBo59vDR%q>aRj}Yd=9k* z^&DFce18;P0^bKWm7^M7^-ps-n&2P6Bb7t^>w@orv(0t=Q}^!%;mMzy{r8lze~rTZ z@FcitF3*8`;Vi9G{`O)){40THM@e4|p99YeOMhOdo}1uH;WvcwZ^nB{qPLU)$^vF>+%0z=I7-6@I?59Iz4SfPD!MEIq);#3{9&1b_21>53huq)>75* z)$qEo^!J6z*96}UUmC{k#g4@Z)uQbsv8vd^z0Ik2b&y z;Inml$G7m_e~!Sr z;ifj7bR+XKJW>01k=tf7;V;0?r!GQmE;h$n7fbmH;qQ$_URzn^hwp)#=I?rVFWi)W zJG={ST3__RJK$f_{;PWiBk*6sO=CjRO?;2?pgBI7a39=Me}(Wl@P#^mXYb?ZS_M2G zUZA)?z7}4iZP;smYsW7-1L%B!&iq*Z7#|zRCpN=fhkI^j zevb(l+nOJ~A0BBhnT**S_;zm~+)u7c8H!;$Co(nWRKfe;rZJ@v{xLjK9L0VoJRvq< zJZ~6)Plu=L^2eTF6<*Q@;Ik$LjP;npAzvQ+2y{d7%Xj-hHhg}R^u_QJ_<1_LOf38@ z`B%a3hQA!vkL)iQ2ycY@;{(Af!}zjLKkI~Fc}O65rnax1I~{;mz!zw)&ej9)&G6?m z_x?0A4yD|}^954^;qPH|UM4C2Wy8M4rW8c% z)JQ%FfuL8H(>oUOqZvNwut3Cne3E`UybN9!=7+n6*aIJcuL$FILzMKsRkVYt0poWC zQsEi!b98!p3vbn9E__K8UJ5URSL^i77ul@x!+YQ*VccFM2yccb9Bxj(UF^eUEgx#9 zUi$~TB>f(E7u*zI->vKw63sjnUUNhsNOdav_KjSzp9}AWpRBpM=Uxi`0M4|l(z|Qe z8u;=4&6F^I7p>C44jdY|Yi#>;`x{+_cVZf$xQ%rqet3ZN+{!d=ze4lMKPXfScz0 z_-`=}M6sU+pOj>_p9hbJo9wTId*P>Q|DAgQ;{OKtSE8hEfgcuycf%(~;Y08k_z^mP zHI~P(W}c(`*J|!OXdwQj!81ueM|1T|MIQVd_~DwX`${X}bKz4pSNAD5z*FG&YVKVZ z8UtJ4HSlSgt25_r_?js8hv22~Je}U_?la@R%^4zmuIA1&-F~j6!MDKq8BVtk%Ua@e zYdW&}VLm$Bj-0R-u!mWTm%%&XraeY2d_UZ@9%zM6o*pnh`>*6$VY@+iC>z6#FK zg&S*ikFy>A0z5-=drND@xexvt{2Q99=kiA2|Aw2^4N2c&FPIz%?g>l(p;bUX*D~R| z;E!rPAqGY0TzGWAde?zFC$)sybEVjTPuDiobIA?x9(bYV>iy*%aLu3CJ>YmxUscwqFd#MS480>@EvfPvr2Cd#*#kiHp-t8u)gc;+Rt<5o`~kSBop!>jqwoRvYWP=l{!84l1>h^9@RZy6t~WeQr@zikpABCgg%`tbf+y+p z>K%Sn@T;TnM)=Ywyc51C3Lk*ahnwc)0DKPI6rYqk=x1OOQQd@lT}niskCGXPJApP;$D?6&%70RDCOniv%H`9PUFr~ALl z_u~8k<9)as_@*el1paswUJZXb3U7jMGjKm{cfntU`zDmZ`-L3`(brACN=~=8=j@ptFb7a!_Z}LlYeROm2gvA%!4n7pQP>ETSTj^t%R?F zcWW+lhI7wN)>paI)3nq;@Io(p^2fv1SN0uh``P0%^fG4$toK3tUQl}KozVT{SC3BE z9Ob+pz)xPI@Z=K$;rHdmA9a3`vX=1^okiMbbxxZNZ-AR(Rt*1Xl=M~b-YDrC;k)5W z!t%E_4N`_qcw*Xw`NKW~v_^>(bOzD67#)Usx7|AL=o9X_i}?|58iV}s&2ZBgo&(O&>YYyobw&=fX{~DuZXiO|hzlXGYmBhYa|JD7*lEH{5jAQV!n%&(=OV&(it1RtMh;XKJ2U z2J5s#%FvF^S#tuxL$!`NJMV+fhMU^d2>cBAwK~1p>nE{6tb|`4#_cbS`ni?~e-2)! zxr`n5(WkYxD?(?+N#^mg5}pKKu5GB_SZaVTgXe{DcfZ~NUkf+2wSM?&_-Q)56+wG# zIts6W&(d7o!%DuJehW8^_gV02xT&6t;BUZ9{k9T*Y*1z(p%C5*H^r_3zBNkvdUy}~ zIPKfj?z*lW-U~OijXwA&Jkppa{*S;9MoFJ^5B=w4bNWnpI^0zEh42dax!QkqURwdL zkCMI~J`g2+J3Ign8|T!0i9UGYT=QAo2>fdJquM|3CM&>J{jc|U-i0S>uJ3=se+xIw znT7Cn_`VnnjE~c?f3p^wV!r}jlMyg}C!!vHGdw}(e}P^9saz|g{)*s@am-n-t2xVl z4($FoR3)DQ?DU*&E=K^~37@RC+rg3W+z7Wom$ZeNiyJL#|gzvLYg0BkW_93wF zbof5FDTev*FW?u3rOyq;pbVaRp84Fd7Jd%gbgtD3uZJJ2?O$QH1@W)vOZf1Y@Pv)F zeQA@SeyjFK>F`$am%Kv!8*cvj@D}(vnydSSW$@SG`!!e3W!J(Ra?SHbD|}NF-UF|N zM|xgad>)3+Tx6b86YgWp0yoWD>F^}D$$mb3>jmceDRcOly8LRtRSW+$=}mKfEBqz+ zp*p>@?(=i4$Fa}U?$(d2%TyigLnkiJJa;G7($3-Yv<>yW{tWm+c!B2XJL?7T)$m9$ z^pmC>UIzbxPA@(_rtZ zfamBu)b|EQ;dj7iYp(89B|pIWHcI|k@O5z0^SwpzW%>3QMQE&0=M$Cim*7)%{_1{U z1H1)pYU>^FdiV`Gy~I$h;Rn!pFN%!-{IxH!;eU|(z89PK89DGTuwm-6CGgLq@M`#e z_{q8)>e+!NxQF~N)?B@FwUTTkB!`Bc%=QJ zAD#;T3H%y9htk{Yi_4_fij6#UcB6BX*74dmh=s3&r!R@zC(__K)I~O&ZJEb+wkn5v z4q@Gu5L@lo+JvrY@7D)k4>#4x2z;G^%lfhhU)I8h#g_|}FVentyW>>$LyU!&OlX5% z`+!;eEQW7{N2*&tybAs({ANB+i0d`uacx59gD5sCsoP<=v=3jg@>R8wMs%hZIBldv z>$q(oeiL=*=!ASxZ72=C7H*1p9(*<26!VqvRR-?I_Z;fvCiqEGj#A~LbA~GMY<2RO z%jb}f&OLhJJ@9I5M2ffYVfX{^urWi`SHi>GlZ7wR_Pq3m1} zS}Sep6L=e+eJfS0-M(gh<52SQ*Hf>{%;R(pyab+sZpdGCFTMm`4mY)nYIqqutPgnY zN|*eb;2rQt^MLR!cr*M1oxk^gtU41u2w(n9?bTv=k9^-o;_rvo!AKZX&;w6{AENWOx8K$pWf(pUZi;onW4vnve%!=ZPj_RTfll(e$UaCcT;}~ytYv*-ACX(-9YAL(It#UqJylse z0Iz_X#*UONlxI`qz9D7Jh9|-A*KOZ<&qC~7Ru{1HDMjy&hXcVEwIAL#t6f-jsF%e0 zK=56-Z>x%#y~cb~6s3K&V&@g~(zPA?)e{!)f$xCN)cl)PHWnX-cfGaP1IjO%g_>FK=|E+}=!i#l!^&W~=cpdy3Vch;= zZ3@?V;N9>Anycsihv6T?kI>vMp4C1Qe#jXme1_)gd6RVbF8D>7d+h@l$v+?NZ3r0O zYfuJ17oMZjt37!wyaYZ~bG0XKg)f1d>aPdB2EJIQ_nvQc06*7;;Z5*p`?$o%i2*uK zhuL__vLR*6fUkHg^4W>x5l@V(;b-&N_jA>k)%i&UHXcLgG;~5^m%2As4{wE=;?NG? z0$-}rtF!7pcu$n{Bk-5tg<)K~nZE2l0?;BtQB`-O`0weEN~fQ{won931=m%~lvNNM6; zIs6#%2-TB%<|iAz1#a4p6~kNLck1-&dxll;Nk52O&wkQ0!beEIg3qD+WsXzdW$Qv` z9yUy4-XJ^&Zi=Jl34Uu19_FK67uKn#)MqL>HRv48=a3)HJ1T_d!q>thou#G0Q;4&q zf0@s|U#Yrs_gl|Ot%4PKaMC(dj~G51(DX z?PscmZ-Cc49ti$Dj1Pszofi1gA4MMf#Mf^49dNpZYk&NmPY>FCLiF~bw-3F&TF{xQ!uG;zME^~UXe}{x3uPf1;|70MTtnFE+XZ9L?1N=1j zNt&y(j~4hj@P(RF4>GJu{JP;K@M|=8eq&DRy@__w1b@a$UG7nJX`d}z?w#xR$}Ltu zx;^OnpR(5hzJDoQt4?iyvzl3}qZ4V2mN*x|v*BM>AJyo+fZmtYM+8g0-J;gp6ooUz(weO4FcCr%Q2!Bj- zx7~eTG*e-9=uLfQ!hY4&>mS$aKyN2{pGLN4bu6(rh~DPbK=7l;dhd!oiANXhr|?^1^i81^! zega0;9>u%V;%lTn@S?~tsQ~j`_iULRJrUJ)OhD+gvOj4bg%f8 z6N7InT@`~?^4x$9TWi-Z^&PVw_-*h7n(OsHya6t;2$jn^X=l9SQo>K^qwr`k%s}U~ z&M%K)5qh4NqsOocondsM#juDv+x3bQ!@E>mBE+x;-I`aO7;aL!Du(;W^96KFd#FSv z`;Xxl;(jPbUi%P5=9vt5(rad306z!r*Xh;!KFS?#+GEwhvq*o7PVcr|B zIsB6-ybk`A?UDD`er&eE7sKc1erNqIaO_vd`(__H1-}gh1KJ1sw}U?wN2DC1=%l?q zVco0N=E<$Z8GeDb;azBzL;8=jqi5mQ#L>4OS8Yws441}*%3Fe+J#U%U=hg5};HG?v zsIMb??0Yu8=Tttj26gWB`the5JB8>q5$9056<;2X5BV~TPH&&NZWEqmOoN-+W;(nR zUP>0BJiPV*A~DT}d)|%A{h%^_n$-qRx8PP8&M&2Pf?RBrj*LAa?rDS&z<|iFTat+lbC6I{oNyE6^>I*tyq@OG+DU z@qZ)V^A#H-w4eF#i}~!^q1vH33n{@yDLOOJ3B^vG$yURc!!Oa?`F))z;d$1|aAW_6*TEz87eDFq;g7*@ z)Al8%>TIhVondsY(>m(A&2{jMKbyzpHu(MU#bN2s50#-8-T|Mhxw=Qb5B@sbv_?vN zfoDbGvvqpA%UfeY2K*DaDMkfOdQ<-8@XtwaI#a5He+G|qej#<#2A{IW?0+vj9e%v_ zU%iKGAN(}z507r>WAv0n}^ghv{){MfI9-vqxU zY>r@DFOeC-UZ=LB)BX|fN|>l~hpi)i_M>yuUn0jr@)?Dvz)ksxjg)rYM?4g{Er^Y5 zco{sJjS_TT{A=VsBY9NAUxAz2Qxm)ceki)3@xwlH_xQQg1#gBwI58IM+*s^EC-HCQ znERL@X2VV8kbKh6DIYe+Ty*l$>HB+hJLTxi{4}~wJvuA*MK4o_aTfY`DpW=(M1-@n1Yg zF|nQ--Fogp=kR}<-y<{(FM!X(UZ^cO>jlXp;TN10z@zz*fzFOEqU#i*GdB=jrxKk_ zW6^aQ(fRyf^fGs$Q{@>mmT6G(fk%s*uY<9|J7)a8S1LRoZi&5v4iZj3Yg(F%VYZn7cy^rDkGc`W?5(PdryqT5bJ(0MI>EPQ?BwHtzzCFz&k z=RCw5(@gkWcr-tX&}pAyj+yvcCHW*ox6_2qnk z^hyp^`-e|DYApD=-halvYHdD+uY|WAI~MW$PWT3RCwz_0U$w~=_%#2R@tuy{@Fe&Q zonGC$9)eGWoAz??ovcgX43Td6yw8W?p9cRJo};RH!Zul@f(%M|o55fNlXDOh{_p?y>;$LQdnKfp7H&PmW7#?X| zEPG84@!b!9l#jmMYM&zO|NGr`SdN|L$Bh~HLv`?yD7+1RHT($4bNp=9d4|EyC22_;`M{!$5VhL>xu?qfE*zbVnr;ZuF@NMu&=V%hIA@~Py(^*Y?7xT;sW}XJW`b0C&gU>uEGM8BRiT9t<$E@=Y-@jBm zrGGeQAflIt-kg)itY@@+vHX(tcpDczbvD#Nem9|KDo;Or&M9U-3ja96%#&Z`y)37h zc^3SpOfxTn&zWcDmGFQjaSbR5y;@be zuQc;4_>)(e*HcCCO;LCyd>!0$&e8y{jlw(No8i-Fi=p|&feg!;I86?L0;Dzv0b^gwM0pW?=oPoeIG>_*2F z!%X;A_)Kj>-P0(9H^U>X(ZzlRe6xZ3;q~wh@HE{It#%fhBAromI?$;tGwVn`1L)M< zV)i2dUuWQwN6HR-g-7!v2c61QW*zaP6rHMD&3@FtR~fkY(F`wvNAsf_ozil%j`%T* z&dP6@{YZF=wq@YrM>;$o9?g#ebP86Rb*%bFr|{cmKkDI&3|#zZhiAc~`O%L~c7<6- z{0N|v^BuDvDLvdPF>vuC8=eY}=0^!SX=}_n;ztcS>9?8vXok-;aPeb1JP{tvk0ErD zD$P3LN5bzokGtLMM>>3pfr}sc@IaJ2M5i1b&mGbIs7J^5U9%tU@XxUkX?&49`rt!R z@(`U-bcV^p6f=oO%1*}1s>t~W&xY?daLJ<>-Um13Av)FQ^xtXLk$jrb8CYxfV>^7O zflD5H;9c-&e#E~`JG#rPBYvc#(_L-$BOm^ffr}qy@K$&r ztRsHxK_~kGvmd@ca6iYu#gA0DA0Ew*JakeYH0y{TW$2{Unf<7R&oFTDqZOV2kLE`& zI*AXNb;OSmbdoli{Yc^_%p?OBKQiH?QSuO-B6I@SF!?HeRH5UkH~Y~D-~VuAE_rmq z_rOhgh|VB7L*!x75kGwIG8aA)IUnJv@DB`J^2mkv!cBRI&PsIpHk)-MpIUVKA2s{Y z3g2Pil1C4`6CTZvedu&Gn03UDq+N`ukD2|*gl{u&@uLvl0*~fLB|5EJ%sS#nBRXwA zF#FL7f6~Ckj{$f+JenV#_m~SC%{o^7qtp09vmd$e2Mk>ND1}$UqxrD`otnqZI^stw zIvaju_M-=Whk=V9!|*bAG(QsinG2iDI^strIu%ct{V0U5FmUms0$v1<=0^iM#ZQ`b z#E(vNN`7qiV*tL?z{QUMJQp6#5C8u#7dD%9#E)Eb@}Dw4FHj0!4v+SH(FSx{w?=N8Wx@AE88>zMJo*jey=coyXkoxj?*=dAmCMetJi zJk6bFZ6s!u@H^o1HUEx#7TEx=kHS0P&%u}J^y+;@{qQ{oo=loi_>5=A!k-i5y0D}_ zGUgjt25hc@<-@WJY$a?V%#Zz0y!a^j)xZ|P!p=n<)&yGuJCnaBrt5^28PfH`Dh%mH zV4Gn62{xr(5(hY&f|=?y1O6P`RIdf_?eO{9wyM{1_y5iDfUm z6n?zUpU=XEVXF)*{!g??151UiGq4<3EzBPwzhYP&EKa`xW&5*7Q_KG20 z4{QK7(U&xQlXf%+>*BL-uDXZd-s`Yl$dLk1`hd3cGxHg1CVV-3F1n#Msh%+|gqOo- zYksZUw=3Xl4BStedUzFlg-&muvxmM<+=0$3=u~PQ^t7fPa@6Fu6FF=D zjhjz3IvGDVm!k=u2RD_Y3w{UORE|OTW&`(o`1ikj9}XU^9I5E6crki8^3X}#W*+xe z!sozE?QH|R7@jRYjo)v#Uwv$~hZcCHf%{3)4X=ksE5|T8c^zXxj>FtMq&~L0^^v%n zJr_Er@jC;a@Jn+!3gG9!P30(uUt!=`iZYkv{I>i>Uco!e=Co+b-kY~WIkB6tlvS~;rF$$DihIH-M)`mpXg+v7kJIwRQn9yc=HlhM5mZ{%@Lj{6`+oA9jodS4&LB`=9g09f?yKeD+)B`?h)TRqza* z-nnlo^RJ(JSPOrO&%T_9^Y0_l3nj05Y>lFut!=5XpdFsnW1e68;4|Q+HZuaxh99f# zt9!Ufdu@J<=IZ&tOn6q5^o0(OG@7v?8=v1Rq__kR`@@YorH|UgWJL*}%sS%79&~oSXZFMQG3y5d z7e7+rFTtbvk%!JJ{bn8UqYRzb{>SV`Exg^p#gA6_R(Lc&deM3MeY1}EF@nx>e>D4% z^f%crMdsp1CVUe-njb~zY#xZNQ-#i!Kk?;0Gt# zaK2CKC(R&y;Rh4?oa9k&_bG{oZ2Uv$d9RWW&w`uwDrNAS;HJGwEquL!`zc>5{7HDU zwOB7Y%RY|2R~bPkh)$%vqvVtH3H#r_P1t`+n^Esw%R=W0bPCW9)ur=05MrYUz7hUC z&BcaV$5f#+gw7VNpZ=WgBbE%4)YKF;@^#g8F)6TC%p@xz|JJ))DakFoyKvEcHFI?nf_MJEHD%qoGe+8=q1C~>HUKMgmn zQJdfa_%!XKdd|6$di@0c-XzBHDm6CCcOG|4vI^rT?GSbz|Myt<`}gcR=^w9?g#CQS z^ts$D`AAWBRJt)#rvjP|8Sq@VAMWYfX;k$O!Zq1&xAjN8zDcuTdh2-`iFPH z*M)KW%WJ~h;Im>5Mm$@VwaNzSKMnpUpMBd^{Yp&L_wYus(}3PgosU{~CI6H4TNIuJ zuZK_5>79Nleip&k!w=Qmd2fO6O89EHDNYUWGI*r%QPOw7E8tgX`(AtS65bDg4n9?L zr%m~}HVSWq*J>^?TIcrVl+THw_n`4v_H6hOaMPSq44)3)HZhM@H;-y`TF@EQI?fy~ zhvk9HAY8_`zl`_`AAB0C*LkMq%KMgn4<#u=j{3qJJ)a4J|I1iz7%p`MN z#!~@l@Gu{o`vHFJroqpHub!BPeH>!FL()gAA42DKbbP;8eJ?b&dbf#3Ql?7mrB9Ar zCgBb68E`+JL*s;ck6eetBb{MMntu4vq&N91c?8h80iEf3OpHAu)V5Oo&3qVtFygt2 zq|b)8!RPZiP295Eh05N#Q^q#Z3jHaZ{I$pEA^uuZ5qexw`+>0lycXr@8k$EBJn{^}~M#zfE(A zD7+bdrh%uEzKS}}gTEa|ohGO{EqE~Y0pI;`wUh3i9Q#t7 z<+S8Cg1vftK7r4nx?OIUNqAC#xdDEX<||yD32%Xi#h&u4b;?r+?}bYn3)xrqRx03A z6DGvfS=gtLq#m9NH~H5N&xc3y&rkYV>a_?i{`uypcqku7u~CJN$;ad|&b;7Zb+3HP zg71Tyd@O>G!c9JAPzS!l93SUK_OTHgY3N)ce6TSj&@^Jv(2sil{fH%WUKK9{b z8{CTHX_0-*Imme{Iwl`W;M1m>eXNG(!A(9k!3*FfA1kTHVz}kwyvRQ8!A3PYCLeu4 z&Mx7R;^-&zsqhi_V|t#TEyX@)ubo9F51pG54+g(8QRik`Ck34{bUKfSyp|AN3*Q!n zx5C?^@E-WnQTQ;tISNnk#CRTu7vqcD-t7As;$J%aeR!mGi12)PH~bi#Ufnk;ga0N9 zuZ6!7g}1_g8HM-2+oSMd_|Ky7gqRpBerIX_oi&hEe)w;rq|b+U!7tJ2op%{Z`ZD;Z zhV*`TF7@*<{1!g@=Bu{sy#H3_?>1~?PCFPpPPeoC`(tZ;cgMv}bK6=!dZ&ElV8k;% z@nsZ#0X&V*p}Ka?UxX)nV?39@S<<=O`&noX$%4;LGOt65;K#w&hNZtfl)e(a4!$Ie z+ZzWC-XZoEeGzegB_@NxnDPIn+#XjQ<9Q^8*^#T^f60}EBuHx9NuC^A zV?5o|x%_>_tOsH|-;be5qj|)AG1vEcJqKgre(m)Pd2RJWulRB7Gd|C6eEf+P>&Hpo zc;5SFY%Dc&r(BYh@pIx9c(QFPhyEZYXE?@FBk6_Bn$Cyg{}|Kf^ZeQy_iLZ$LvLJ* z&vSq5M!7UBcF~gm4!QKWHJgtspo^QPudvcd2;;9es=kOAAKaYuP_IiHlgkT&|vYBtic>e6zNVH+Rm|Sl=O%lei<`M+vS(M^%|FO89k@i zS{{B@+-EVKTjJhyO^)LuUXPK}jn3G(R-b2QY=s@%(pY}1Hzu}ilIO#i*v3hoXT5Rv zO_D(WE6(#%ABB+QXWW9;GcmFE$9dk3iTk(D^Ou;ok9?kgdAU3s8}~|_=U{By({Y|I zsf@RMas6?gr{XTQl1}?L<_WZ(ii>+!df z!HL?D3u*kcrPsWk`@M0`dp++t;E-G6Zu8WR2l3UIB9G_qR+D&HwS@%vm9h)smV3Ty z=knDXKcOtmad+*X;<Uf8Q%;HF$FCQxw+m?%G%IeIr`)E8 zFO6%7@!T-y;TX^VI^K)Q_|^&cW-{^&}kRe5xQsNx5d4Y=-H7ta+n>_`Rlpb zz||KLJufEix33;j7`H9PvtmUkq+)i;ZE>4os5m?SLoSND(Q~)+$CT38?=zh_e;uBI z%F@lgd*VDljI*`lQ{&F^%(J`X{}q0mrL@$uUBUm6`b+R{eeH3c9%E`FPPi{782{iD zPoGl-@h2R10Vfic6A5p}JRg706weNaNk}5Kw6F#>ULEl#9d)T^g;N@3^h;u4)#B}O z^)a4j$HRVQ-0hx+oj;DbIPOiar!=nJD+ASoUQc&S+`$;n2Ts$rTJ5~Jvpq{CvKwk~{6O5U$^fT}9A)c+iH@v@?;(2fKftW9*cwU%t zpWEQa!)Hn6Y23=-KTLTpSaP|?Tg@QB=W&N)JTcbahjLB+d7ZzSs;N5tn{UO2mJ3BVV`otpK`d9Ji^DKF^^LNW%&$m9Yj<){ET1Nh{ z#IpW%I9&GZ@-N;No_`*+e=ftn&$cJ)C;3;K#f-f#0M&fi-d|Hju-$ zvh$a{hLS&qKDGTDUr!ZGwpE;Bdz?Q#M>#3ejm9k-ky{Z3P-MS5xBeyy7bw}=UG$cqe~H&UWNHM=o-t=l|iq7)O7KBTF4Qut@Pg@;=g zsy8>-;D7%aXMNbQo7dhi znY?=I{`JQpH(phFzPt^!^d~>xHpp-|^nKrPZ)^m3#5eIvd=h`e7xBaLLGmBZTa*p| z{=Ztj4c?UargtCwV$HA5dGfCReeR_-t(Rx`4@|%Bo0%0?dB~X zo&u*fWYKD6wW?7&@h&mP-*|J_?IKQ*oR z(m%ZT=GJE)Z2i^`-@5OU<8GVP`0*{5RpvZ&Vb62l*tzYh+oxr(J7d{_i|5rRzII7N zUe>K2Z?1b|{<`Dex~Jm3kw<_1=8G_fNmw{O0k$+WDiKAFg?7`JJg{t$+Od{c8>@TJg+}XMgsa4;~)f{g007 z;L|P7zWCyOi>u1wu33KXWf`w#mIOQBynOac57pkYWb?~wPb^Qp?cX0h*zr-vk6!+8 zYyF)S8{WR=r=xHGpyk1b%X5Eu^O1-DdURj>#%-sbdi~|cxBag5uJ87~zWdzACY^QC zob8Fld;YNL+c!Rzn)>wA9iF@Qzj*K1-)0=KsWkqh&Ylegw;s~dS(|?S(YuR|`o-_< z=8)4U(r${&TYvkv%4cg1sUNL8^W5+K{K&~~es|}iUFF3ee|P6;Yad)${nuxjv)+04 z{8?L$ntj40KRV&S_P$e}yzcKU&vw^l{U6TGJ3hzrfBRR0NbM~mA|i-b*@;*;f>0wO z_6lOJh!wFS_MWwAj2bbkR#8IHR8dt`tM;l@n-cMy*L579@AJFQC*R*6&-Hp$n)iL= zI6vn;uk#vr&+9v{_T1t+$N8s;%S#QZ_iw4r$J;$9xuJ63W(yw8j&qHD=5VFS$bXhE zTQ_j+o+!SI^v+~O55Il=4_c+ zc~aXA1^y|0uA<-kzwfOtuwqr7m1{n(DDa@->l)oJdw6f1{OiWVE?Tk%&sw>vc)*3tCGYHBaJ9th%Oh_b7<6Ru$uo&y7F#_&o0Ovd`arKKuFM=WK?%5o)wD z1{%|hwZ;#|CF7-$*P((#J%>&XBOMkvY<2j_;g&;&Lor7`$EJ?G949!gaNO&7&hfFM zQ#OxmHL|tN7ME>iwr{efWxJB?O}6~meX>i`|M#b_udko4zi)tVpl^_Guy2TOsBd*Y zUq3%Tf4=~~K))crV80N*P`~Q_zW#px{{8{}f&M}M!Tursq5jnad;|Of`~w040t12q zf&)SVLIbJ?`Ud(1`UeIC1_lNN1_y=&h6Yv-@(uC}@(&6K3JeMg3JwYh3Jt0r>>KPC z>>nHu92guF92^`H92#6b#5cq*#6Kh;Brqf>Bse4_Bs8RYsBfrWsDEfcXkchiXmDsq zXlQ8l>MF$QD*5Wly1M$a>IzAYeEs|b0)v7>LaTS{-XpeWFX1IS287n|uVIwz7-;=8 zs#Nl5RKI?hhqu}ltLWhuRMoetpNGn2kjg-)hj&!ZSdWNq!}viJ--?F4+E3I${(g39 z^+6T?ibmZwVS#ae{-L2pRGYB);e!VC9n{;ScaI)=N=CglVciA|8r;3_pe#Rb)HcjF zAtW}~Hz?TOqiWS^Lq`pb?HM<;T0&@WwQk+}Rv9#SSkJ0Gj8>6h0Y<3@FrMP%c!K%-TIFkho@EA@|E2z>|j9j4N=QsMtwmi&J^?a*P}hV|`H zCBEl?!95V8uZPAgAB|al8nf2**4kgqkd5kkIRtc5aXQK(zK<{DbpOy+0uGb@dp zgG6P6Usf79FB!x0ldLrJm{u_NPO{Pr;or;8|6h&7+V+2mSz?{QG_At=FaM}wReQ}Z z)D>d;!;H_tC~PFD%f;s5Vg0M_xlli|(wpabj_MOB3(Aes$F^`C=LRDEX`xCPZeqt$ z_ox89%A|Evqu%(4a+pVbGx8L2NjE`!D#FJ`{ZjH6@(${E^Vmh6<^z3QVB?hHW;wU? zvsol3`5o%7R)wC+XG2|{v-BTwLytwWx)(v^!}JNtW6$T1$7%cm`4)}4s)S6R`x=iS zFO%2ae&eg?{h;?C-$w31?oGajJceBQ#Si2u z_druh2ownqVqfITc{I1$fpX#G>lIQu_W)X?sB^Tx%%Kr1iM{zUVh(qvi#7|n2yL_-&#GPDScD3|z zuMwwpf35ngpQ=#PXGUxIcu;?seDOEnk>thlK)>mh%_7pC$C58K;E&uXi8`}4{jV9x9V}RNVAwzS(Ax^5nmg?@a?2|E8+JoBBUTz`r;3 zM-(^xZ_;1JhpW`jS&H~?Q{PMtET+%3=kUiJNQSY7{6kLYD^mZEJkL^;TQ3KEuvj%1 z)2B}W;!!f|$e|V%Cf_*>+><BX6Dv?oXetYMe0r`#B>X zX}AB9C%eI4%CnT($Tjtg%YsY$3{hMq@&6pr)DNo-f9n};rFu+$VJFHbnt$o1HkM6) zHz#mOw;B2N@`yiz`a#8@ue}xeCFHqP!!v!}Mj>5^vnBbW%Qj0HO#K$}UD@s9xj}wE z66v~9KUg&k)8DZgxdT4w-#y+uBwIntH>Qo;>B zWfs8aTl)V?-eMYfB6)yX5S!@^aJP^1H2IY`(5vod9V^uUW$GPS{d&p?Jh?5@KHWG^===1tS-M@2 z8=J`URz&$rx);dZiXvU>CKUBIF=gR%%KAn*^&A6zj*>Uo0=?=!*5T>}{qy;4OHDLp`F!{uK0ngu1bNlRHjBu*w}$F~rhbqIIHNTNlIJQ0uEMj9+2ou4 z0&h!xh1_QZxXQkDyd!Ti(?0$g6_BofF!WK>?r z%6}7ilNI2qn_I^na=ky6QUkcs&Aq9#N$-Bixkq4aq~o^KuWQVuy)Moj&q z`Zh}`<=K*aZ6)a2IKrrCRp|YhUn%Fs8rP zQ^;>T`5)v{Dnc*q;jYSv8GqtXo26_e^2hgekXChB7Zp+ zddY8~Km2n=f~#g}9WBW(?LhtZBL9bc{B7iG9Qkhn@bSomUfNZ+K&(STxek$foUgc< z-`joRbCy2&)xc%)B=+}4=>)@VAjaC*QsT zH4Iuud-BZPHtTosnMz(W z0bJU{TJrGz@E3hT9r!HmJ2J@+TS z!t<=<)DI!Q-3I<`$Pbc_^@dLu@>WV_`k!88vxu4wtYZ$jjptc1|K?L2#MI{suvxk$ zkQ<@o>%Rgo=70~@lRqnhbY)z+LViC1d>(xkHGqHoGQ=@92$a4`QIep)C4~2 z@e=D;rUouEPQ5)0jsV|2-)8C5bZH%{$oICeS-;CTe1N=218}L|_vFsoRl%yGxb?64 z-X%2=Smhapc*Oq}c_zp0=G4ck0yOmnCPKfG?eG`!NuS{FNqx+h&@WB~-^~2xXbSFq z*k&oE{jVX{*CAKQcTr!O{-c_~XPmaKF02AJ;~Y2NzWl!>_hS1;RO3bX^n?+Q!w~jUBKAAS_ck$`01`1O@;yB8AFXYB$a{tlLOT8CV6OgIT z{0aF|w$@RL{QeJ!=LXyP1M;N1@R9x%*&aR}xDSv>pBv-{y1@sjsORKV0A{-Xehs3A z5$h=25qt>iSLT;`m@ESt;3MPPM)E@)!OOzl_(;AW(7qkc?SynM9tAH? zeYVcrN1B3mGK_q*;%5A(gP@o3u&^4~P5!LCy-zjr(>)MRH~RcTzBk-vk;3G~RR=VE z=9LCtNB$+b(=Yb*@{!yvH+U-bt}*a&=?!jumxB821bK;m;4<%5Qh}N2hPAUPm3pS! z`qwa8DQ@nkxlBhnxR7U1-{z*xB16c#bcfIG=I}4?fDcm1pKxAtrQWdz^uFAO>_y&$ zJTbPeMZC#>RD6w!XURmwlast`EPSS}wpnB{c?)u{zrZJw&m}J~4{=UqoPj;z6Rqvf z%qO46c~f;;>u^^Ao8{(I-e&z?0oaHZ9#Y>T(r%xT-*<;z>Z?R=`0Q(KpWj*JsXY*9 zDf(|Ff3gVt7~^l=2R{0F=tA;&j?k-SV;%d+XS6}O$>f##!soyYn?+=O(u~}h>*|%% zA0%J?5k4|6%R-2nkFKPC#ZqNET6OeZ5Am_{)Rj~73CxCc5FQKBT~R4ssBWNt}J|H94kE( z`s@q9rTsJ_U&{5;Ecy&3KfM=vX}4df3Brui@eO!>>hlZ-PiK3U{?eGdFRv3xf1E(> zF$z9153V8a*B$MoB>n4*fPZ)oa3Asm~ap?RS9D+yv;; zRzffBWUu08dq`t@kn%iF{UbN%r5@EIk5>6~V!4S=WAX>L5&sgVyNx_@V_ho{HQZT8 zY$AMK(x(RbEb?SO=w^a9J;XO@68* z_&Uo?y&IkU^lotJM;>F4ZW6EKcv7ECez+jYp%VEq@|aEFd&!@ZPwoRQ@w<C?e zTT(xVT<@o;2pA9l-Q&Skv$T%6m*6*&sMpP2?r<$PM$~xyA`FZwhIWO)$0s5+Q zY?ebA>tA)ZGI`P*q&p`EK6pVs>nrd>)W;>mr}Pw*e{1s9g=i)wqw5tQ;^Twjw zYSO2|6!^U5d9?N2@#?eNKQ0ttemkkgI|J_XeuIe_6d`W(ae4ZP8{K#LE7kUHzPvmoEBHd(O zzmW2|NA9Fu*9cYz~}@AIV?xev<0s9T&kTavIvL>^nJq1>T19yUeFY$ZN+S-5T_% zzZiObpDl)bKmh#nvYnhIcYJEI9JUgkb)=CmjIvq3_W(9-lW+eE z@pK~3yEN-@!(VaJ|5P}9WL?pR-2D;aly$PpGUz+>K|Gb{e@$_jyNRqHd6q-}!|cXd_Cr-|9MczEZW|2DN<<`I_tPu3l zzmAZf<~-#?{h+nbJ9EB{B6n03XSS=js_>EVErtBRLGb+4w^$GTn3^_=G$$WPels3i z#+gHkTjet?+)}opzVJ8jdDsK~GS4m{kE;cr($xP%zG*nhza)9x4e(jSe&kC2fjoxg zUyVFo6|9-xrVrpF%9k;avo0}+YnsZ?M8CXY2dP+e608y75{uslrxhxa-<^NTpTx~ z+=>MZXE;3%iOkEzsFnus@DK1zDBJ>Zob7jWIOp8>SyJ% z_wT6$W;==K1s|!e6!PWV&#X$H-^ovD*ApAA&>S#IjiEbj>$Wr4=sv( zwIqK-o_GU31Ig=r3;m&A!8?=3lE3c;{{`e1$>YDZS>y%Fx%w{n-2NF{)*(H}H}Jfv zEqzXq4_XC%Jo$5SpH)a#`s3;E(7!fuoRs^{_IQeQ`@0N1gLVkfhQf|-5PjMU`M}MFF@E0&+q1!Cd`S8M`U7kaHK^aGxa5(@`jJWfE#Cj2 znvHe*a2P%(U)ZeQ%L5x3PiAYa}J+?W1W$o-##$B;KU z2LE&MbuH43d_VcD-%$<~$P4@k{pqdX-sI)Uqu;}4wgW!cN$$gbQJDHl$KlhQ<9~JX zP2}BX+bkmWb&-776!2=)S3d!t1t-B(a@H|cadVtW;y4pZUj8KXleF{hIP(1ik*}N7 zKO@g{!1xwHUg;Q}H&76ce9x_J!gFua6rq$(v?} z-j8~>-=M#E4BUzSbI3iZuSorV^4GpLiv*M3Baaz|_9pc>_dNU;Erq}IuO7dHU()VZ zjK2WBD--^m=>M49_iu1%SGWIwzSu9|W2mok5j<=LxYSoP`QZR?7wTt_H{|(a2l5}u z_50KQCBN^4d{v@;z@JE0zwiAC`O8@7r5*OX1bz0g;I8yJLLL@ovxu}G+hucnR`;Xk z1}{K;1I5ku+3OR0UXnMyg7G{uJM_fHJaYM6e(qM>^oehWdX)N#y$YW=&1Z(9k*Ot=0G@bFHErzQ1A$s;^$7Lj_nM;_N0JU8_PZ^Fl?J>t1b zZX zEYAjI81)0mx3WEWke?uT{SDVY8G)9PtOW??bQe)4m|j?16mA{I&Q2^n0%&{^1Vr`Gws1JoGonhpUEU z#(%ac(hY&!*h7Ak`w2D3pO8Bbg1#wvz~As`$omAEkPjtKibp(>ZmCDmZ*Pclkap5f z@iIm+BaP2ByU=GQ^~YZz{w*w@f5{(>f&Vwu_kJw?Hj6})FCy3P8{aLwGyI!Te}OzE z9X`Xz-;rPF1pR#S^G}d&pMKD{C3k!Z?jH$WiM%v<#&Pgu@}1=Rb7!8<;N!)4ybAS^ z5t!{Z-voP~pjTLzrtm(WP|+)H>UV8` zkK}6uxqhAcjN+0OEvz5!{tGYj8vYF%*eu;I{G+DikMDrzCZ9omdK}`ZLw=fEzs?@| z2KjR4ePxobIK|C;JsGU!i(FrClzt0+>VCx2i0OutFW|b%pS-|3=r>GP5!kw&Zckz<;Fv26>y!;9bd={0pB!yf3IS`9*R){*UB|Wo?!+AN9pQ zz$d;G_$KmJbASN#`$ ziT?M!Y?g01*c*4rz1O4OCBH5ii01{L*Gr(jBe{Nm_*L@teEv~<@_s^li_p&RCMs^W zx0k&Bi`3Qk)F@uiF`qHbGn^^n#~#u1Izk(M`jY%%2b9}z@*U~~!Sru<#%2*I!^`C3 zV!=;2;1BYAhQ2TN_o9G}8ssy^K;N5uJb7q+)MG(%7sJ6y_t*sJJCH|`AO0S3iqBl~ z6i@K{)E^|*pVu(I6T{5cj6KlHJh;*UJWnr_WCi*Rb_Cy<1H3Ew4f2yi;UnYcq-+k> zym?cbH#aJ7rdyoXO={5R4Ee46p$o1zI%jbaq+JVSdV@D*=k$eO1V{u`+%gMLbfd9AT=g9A0hmR|HE+_c!=DtcL z^6upNbRXUU zIHDcOdHn;$&2moQ^&lyS&bi>zhxaE()8`Voz7OV^+rcpR^W%8`q_p!W#m)LnN=Ld< zZga>3c|Sli`u|3LgzK674)~yC9{8kRgimMc$CJP2bwE$@qvU!!Uz8UW8}Hq5T}$w8p0X&*4fvURK)4~_Pkkf=y&?qx0Czi*Onn)K1{ch3-n>`h+oG4WOC0C=w&|oR&kljXkq<$ zLVde4(95{$QUd;Ov~uo7u0Iblh5Y4Q_()n?$RBZBMei_-zsdXb1DVVEo#TphGjAYY zGEWsO3EuiA_{+S|hJ3*g=o?zOQtzj7gFd(z{O^+=BVWB1dg({6$+Jxa&qKX?Dfsx$ zv#*zZ!NQ7A5Z#pb%)Q%+=!P>!)^?Snskv=7_%6Xw5GqArReb{cBliwsS8HIGE{TM#bclZkZ!jJkd z$pddf?@2z3+@k^7Ns2Q*xJrJZ4D`}3@>PQWwF~GMvhH0$uD>6{yE62_%Ms5@rdyy2 z_|c;9m;PIYd=dAricr6YTwm|at_mOjw(ws<{X6or!SI)Lmsd6D=WGT~r#^xF`DAdJ zFBXvN`z^=GZDUYwvd#(h&APuwDQ>ohCY*Pqos1)2mJ9JqzgtJ{T?^%`num4N@Pp6Y zXvCSHe30Vix-o_KqozP^tfPME*YMd-eM^7%Jmmcw;{Q-_a~|(A8Rvtt9?Kp8{rO1b zw>1ZXdgL)R5s$0~hKRl;cp1hsi#+%ocro%5NWas^oP85zqCfh)4Qq zDtSy)T}vtT*fI!ueIB`^xS8J+u3JjcKYK9rPQ&0M{ir4RY2J@5?f(<`EbV#lMj`Og z_hZMAukMI^wP(6Nk=NdYbfr9Vg~CUFE_DofuDSN@#I-u~Uhm)^ME??D;5)a$U(Od6 zl2_&Z67kd@BJcAxe4@y&kbBoc`K#>+>+q-n|K&Gq*6+Q6jSb}PW07t{@ib zNI&XCK5aI9WSu{S{3V|=l5uz!c`xn*%eZlqypJ~iyVpXx!PDU%zM!JK5(f6eREg%6sNu#`QozRVdRa;7j*zvr<2w(QgL%%-FVc% z`n?^nv5ETNM3nz#>YLe+?yFwl66a|0%ut)Fdt5}`=Q7H znA~GK$|sI|0(tW((3dCwiTvd{a8L5w4dB0F0pgT)xSCwQ&U=E~HVNrUJFnglJ|o!Q zW&Jjj{1Mk7QeU+jVZ2J?evXV+T@^2+zC=#m^UE-eo{F3GRgveRq5R-FdHD~BN1ppg zjevg4pU9VtZ)F>UA6SfXC_|qw$@}et&keT2kK}o{z7>7BCh&R9>wU7`+oyO|^>~5$ zaBZJ6=a=x&+gm5{fwvH+%wO}#!^VMs$@m|WM{@j;^X?^0nO~!>rL0B$ZSn=r;nRe? zKr_@!4EOn4lJ`{HEdRf5!AH&$=aV0BhfgKyHesP<8 z{R`+l>0h=b^tRs6N0F}~*VpZ?QP8LPp?qX~o=C3W54@c`E)M$j^uJE7Ki~IGadSN| zkLv-cFPBzG_XGFeq~C>*>-XagB45h;T~{&PYvhNfA)cJ%g`?rWay__|k1x3s@3WG2 zJB?hw@2F~P`1q!xJY}3oR@}^&jnDOe!*naPfxhT8=#P?LC7+W9F6BJ5E%f2b5U0$` zYsvNb_Z+$JAMlBwfA4ni(cin3qdoZP2%Du;GoN)-B-h^~cY?eC?;mbKeU%RI8S~O+ z5kzYkt;wUfuP)~`BgsAZ-Zohe=j#X`U&bTjRS>ynCpb$#U8J~K&XK&|UgrHB)Nd*W zy_}B(bb|jymQQQOf0euq_fI6AgwD`69fEv4r2Zbc{(d}yRJd{dj#H zV6=t2T>$D!(yiYWKHb;Dr;;P|Un?$o2f@GRsgH@lc+!I(7Kwq+qw_Y4NPpi)?&gc~ zk@b9^ZqV!Z4{sx1-3UId=>Hdaa2KTO%6!f14xe#fAzwJVF^r4kORIs)Jl?kl^r>y& zpO-#!$*Z=2ejs^REc9K*Ltl;YtRjD#!`?rnC-g2}$gi}kPr`W~5k{Xzy`Yced@AP) zL&;AMN4m0pSVJCh2VC0WFXYMbNVhrtUy*w?hTenRy*JXm7KZYyLmo#Soem#aC;UQw zob^7I`VZutc-_#A+@lZt>#ej|MD#7l-;9R8oCi%I@3Rm5I(^=gKWTRs}Hd$Zh@k)P%Hkr(y1$gl7{Fyh~B5Pas1s%t5`Q(t~CxPCpW zFZoY5ZI*Hg^^?h8_Cq|K$+N|AKjd8li~LMpRdF+Z{l4y2fzyq%xTa*%p? zN`CD*>h}TtI}U;W23|LjdGj>+E}mci>4ZOUi--PrHk524SbpJ^5C>!tQEaQ(g9Z^+LMg^#qK>cgSWUCw4XyU@Q8d6T8k%ldF0d7%N|GS1{5 z0iT9t;S)fgX5_hgfUh7QNnURu%5$>=KJXj~pX9Fam-X8g@|xx0BmL+)`MA6Ak@Jyh z3Gf-m=W~ZJ-OuC$=p*g3@F?i@_bmAB{-|Ir4EQ z;NPA4nDOw@_obhb@Bav&m()K^f__&DeEO1yOaRZUgn05g;Dc|-k7?H#>m)4aCP6=%^RM}RD(2CPysb0x`z`&$ zCPQDWI=GZW@hRrKP|S$o{*s(8*HqjbCk<^p+(F*l8}UnjsXrBZ{e5~dAKB;{_q&^hUC-8 zohH~UBIO@A6MFr9SsTe6&!Zgf(B~z2)jsf-{Ye99e}4ve0(ounUOW#iN8XXV-V*qCBi~PMv_w489$GGdf1vi<&@%Eq zS5faW&evZEz2^?ZBkStN0UsRriurm3y(jgz$@S+uMl6P2 z@7K@C9~FU*oEJDRfnM(yd&qsb-YCp;Uy(Om1|ONPLzlv59?Pd9^#{qDc0s;|lZP*Z ze!}-C|H)WF1!P_JuU0Kf`BHt7RK9xR=zkz-p_w8kznMEG48TvNVr<0FhyOn(^zYUo0 z=4ta?bH&Z}mZFWTLligT|HL?D-L-=J6wiyLtv(@dmx*{PGoD{J!so|%HjBtSdyhPq z3%HzbR@nr-e%{fBd=AH92L~i@oP5DA@JR(W3aN#(8UOCah$o4BI(Y}JUpu8jKchSJ z9jT8YkNUx85gBKuDsDaZp_S(*>iwre-;F*kx4{3+FKAcsY6Qn()Z3p+iig;c)Ac2xQz$fpq_ZhVld^_t!=8-h==Gy&TPTxZR z*bP1_8UHYH{e4hhk*D24JmQ~g7ko0_+wy0Qtp$V4TZnVQ~7s8U#KB?l!2ob`DnI>t>k^l4=+YJgpilp1D^)Th)32xe~_mP z0l!IotG&>t+2GUB0UzYv2Y#OCDX!F4ClBofpR?rq$fK9qEV7sU9eF3-PcG}6Mf>63 z^bUNaJbQc(?)Mvf7Sm@Z`Na&xDg9!~0q8ThewKNp#6fWVzRJbqw#)DjqkqdoSr@-v@k~xrA58ALL70A*(}nHd?tCND3n7d^54mS z-3*_u^60$CS99t+r6JwjC&2rWpC(V|{RYy17aoOPf1l@f@T?Y72ojKdE(4!bbUu8N!WJM10e|Bn0&c^-fGNc)UA0eyOV za5-O?NbcJTT-xW)EN5BI1|4{0a&cJ6bpC4>RzMuRo z`@5`*Zj*1%gpbtk#k26qNCwYIpS-_rp`hHm{BO8xS7 zNVhFNd`f=pOT-yXUix?VuiA$8*`K@xc>?tf$cK_Qxj-KWeBgco0DH{rkFF!a*?kCAsDfOz~E zXW3iOkIfGLl01sMd^mih9gZY75>aj{4C^R!8$SELx7Y6^?>`axILM9r^K3|#LiqbGb6gThNiPPTW*Gh4-zfUcN_9N%@cgf4T zgBPdI7ym#XSlmAT6mtD~eJ1(KF^J!nKDD32CwL9`E0*Ul#m#;Gd5qtg`c&$3abA}5 zsn_In%E6}u^^Py#ufK2qdvc#9(EC$=iG095@Pg#kUcx862ij*a`8x7WY*%t#a8Pk` zzsOr#Z(O5(QyR)`3w?U2j9c?E*X<3-=aQc;YO_df@?GSKfr#gdBR=qW4Ikf}_Vsd@ zyiiWm<6`>segpkE-Uk~&{z&mPs+}b9c_`Vhb9)Q@Y4-Pl)VCzR6lAlAl-oS=wQmup zj1T9?w=V{l@p;TU`0MAt+sXCsZ~dsaS$&ja#d zJ-|K4n`J;hkL|Dw`CjrS-2ah!iTnh8ayj@&e(#ZcRYCg^{kTl%H@Vp?XZ)36>?GIU z7xbDudJyy#7*EyD@VUPS4m-(vl5gm0-%gH^pUQ7vp1y{oRsP4c`FAS$v;6Rv>xrkx zr|^5vW!z}%0H4Kto>A6OSIO7TK>RZ9eInPtCuN}{eD?A^A&r@@y4k?J_Zmb zs|0@!>Nk^juWjERYB@o_^aJ#NP(O;i;UPHZali*V$p_7}AD0^CgwNi;p>HQXwnoKKy# z_aQ{*%39xBadX@#6pHwx=s%DAVgR@w`4Ms-?yoK*-<=yileGQ$f_cD4+=EX=NBqHa z#S5u6S~D9sZH%|n@8>#1`umK$@G&?(B=Zm4@_{FBMfpg(+D-o66aKPpxuCc?kK10^ zpLZ9`45{#)|EQ{Xb5oFgB?{w4Lhp$L3-dLT|2=P#50 z&GAs?ufU?v9}a#q z(sc!I@in-#!>Z(aI3D(=K8aj^&f;%!{ritfm4v_ky=+YsH^+fc?fvsTsXx5kKAvW7 z@TolCW<}DK@k}7U>uX5{mh`Xf3GRLp z{DuQQI69d7=5XaA<Vh;=+C zPnd&va?-zodf>u}Gar0bk$**=I>%-aS&!`_fA9{x1NB`i!>6DVe55@uCI9X;^#0U8 zA%8y=T+XkHRDsVRPw*Mk*C9_%0rw=2A@A@KT*jY8kM^b-&`xAtt6L3vx99Me{I)0e8jNxn$nwwbOMNo*o#`JzUimNhOZ{#m*Wa7> zoLqlTP8C1+PvU$kZ?lxL z9!@6Lzh}xV5c*#2p|8qvs6noOf0=U-^m}7a4u4X=gxn(mT*ik+!O(B#a{(S!RE9B| z-1%3eEA1qW{215a!gGYc$0axFy##$Kke_Rdbj5!Nd7uaU<-GE1a^D8vMd@=+{Fi}C z{T2wtxSgVn+f@`d%fEaNlz%Duj3%FU0&!L%e?qQ5Z`H0kd_tQd{;4d_PvrW0lf1&9 zFUIneakU?L+XqOu2IsrIR_yBx1GyX(!{XBOod3o-W z$U3J?P58fT1b;XBG$s#>2VYD+lU#o<`uF796A-zSPi!stdvc#m`ultG>)*q_0{w$) zLqCG^_(bwot&P;JR%%i&Zm}=A9{;&6JDS$^wEvMW9UDG zeAs52MWnrX)`Q-Y{TH7a#z684+Bi0u{Q4J&CyxG^Hmbhp|+lx z(gr@^eC}AUTDIq_z*{(T+a#Q4S-SZUXrN?K#Q}^1s=CJ5gVuJ$xK* zB42*wKa z>K@dOB7ZUu@!(%|KQVb?00?SRea{;C{nPgG4DAg61*MU$^sf~10o->j%XDumUeG9M zq;VfW+DY~<@VUlzDDB}V@*-sskE}m$lI!pNXx|k+UMJCh3NV37iWg!G|MN%Q80g=w zM*Px$FOlo-#r>?f8Ncyw1ItGIi*|!g9`>&qOr#ch$r13Eb~T6mcp!Kr^>4^`x_~z) zU)mk(x^%AVWc=TwxY@s^Y3r+xif5JId_ADAywPSkH>1Ce{E0ufJ9(Vq=03nprrVBu zJ@wbBB2KCIDzRCo+fH#aPW^iBDDp11kS>>j#u4(JInds^G2O_XmTHR`MPhEMi$@b@PlLmsD{-{tQE zy?&n9h1|9SKC+J8t+=`Gn#6tA6#BoQ-bh9H$hh%+U-&Q8?mwy354=k!=ts~elYGjL z_WgQOf9P*aMLZ>_|DF8qXVklld*cT{U!)z%vpV&6$o2CMzk$#PE=RiJA5EUv6aG!; z^N3u3?@-M_@X?=hiBsI%$Gge%Q>m|Y)X#Z_eD$KgZ7_UZavx9Tk(J~hcprt#`?tvZ z4TR4m`s9v-&%=e_E67`tKXFEWe<05}1bY3shDqdsCEzoS`Yq&Jo1%RBlYb(=cpLh7 za*ufU@9c+q!Cx81YVwB7KzdXEfc*My_{exzcqn|LMuAH^_aTpC|B~_?CcKut&wBDu zp8u9(y3fcPe~Of|?{DER{pBfn zl=l9LkP*=yM_*!*e9sXFF~Gq_*Pb zI9B>0^r_VQCS*OXHdNfypL4a(?{#v|?#QpiQ+^cmUTlZC=|7zOVOgY`pL_-R^(1f^ zCqI%0dV+^hUp5i`Gk!!o(qHP3@8>$fllnpAkJI6U(l(4Kr?$0X?W??25TAF~GWds6Q`8T!P<;Aj#KAsGk0-s*2_rmo5n_Rztrs7oSJ^22>2E!zL{SV~Eb@-R0zT6D>ALsh2 z4S5^#lJ$_TtP{qOXL6jB_IaFKe_v4aO!)6jL%Kfn|BbxUO4Q&amc#g2jz$4<9mV&} zQf|yr+#Emkb>;=~+B;GHlCQe6p+E37(oLsPKgG?ypW*~gY%HX{d<^oti66MlfscP~ z@G_40;4Hbm&PkpN{ox?!!?Ho&bRM{Vy<-^pqMV3d=4Frh(7!1OF5^{G^27Z8R$2et zBj2v=!;D`5pLb<#Rv>a6Z4PkpJ_$MQ~#v<}XO;EqV zb`ob1#8^^Wlg`_KkOvr}Db9To>rG6#Bp|CMc)!>!6AJUjUKat0Nhj?Tj-2FB5o@KzLJ^x8wljqk`4x>|`|N1BB zWxhL0u74kB@*3!0r$O(W-LQ@;-^}xLS#N)}0eoW(_{jMDBe{OQTyP`w z_d7%1g266OJgfM3P~UVC%Hb>OyKaI{ymmj>Lvnq5D7G2;1>8T8@njgeVM=l0Wl7er2Dh<`(#L3IiWVeLT5w1Mz$!-$g!p zkj)~hSghj)`6b>jA@fW2t?)mz3Gq~g+&D#^`UTRJ@%G#{=y&IbUdHVn+fgs)wE1YN z;^ugmQ4Vp+K9B1T@O+PKR*KRdYAc>q{SKf$IS1mAIDaJ9uQ%N$*Wd3Hx)bSMT8{W- zzP?YczwgWETj=%sUiOinABKFDXSv-|+>HOZRt^PsWu31`#m(|LHV5S(>+^W>o#(-2 z+^+T=^eJqgQjcfID^G#mQ^jB&MR!9V&F9Xwg@bM=M`Wob0ec^L}d^35J19)fh zW8|455s%EXEf2zf@hp_T%#VMQ>-#NZ4naT9-Ckef2XN2Y@Rxbv9(j|`h+q0kk;BmI z--p|hJmV7dGH$#hPj3N#=@-q8z~|Iu=%sw-kl#E3zR3*^kP3n)x~bF6+H-$(!>z z6dBJeoq^to*DIwzZYED1ig=`*TqKXpYhMln&%)>UH0U?yK%f`MJ!jbK2mA_s>hH*x zw4b8qz;BNNmv&f_yk0B#OM8Atp5p-YZ;-6<^f&nE*PTb62iNaIzCoUy?{SiOcEIn@ zub2masmEk;cYowd=C5KGpnu5wK!m4~C%VH2{~E@5a{Yb3;eWvA+!H9JzDAR$o&lHj z`C9U>d{4Oa7x#^=}R8Y^LLpaZ;(&t z{mH_!U4l>SSkz-#`Zploo6Wv{yOZnhXWL4yzpwrm@+!p;e;WN?ko&PcCy;wwMm$~| z2heNO_r#HVOn{HfM=QzC^885JVcRS4IqU=nDgQsoqXXb0^X6yr58r{WXFP4M!pF5Y zcn1f3u#o(;3%Hy=o+Q`bkMV?D|K7t2*Rby)*VCD-5ums^F74e1f7!?ANZ#eX{dkh& zFX%moKri=$O(DbDLPZbaUtBmC!)yWfDmT3Pr@`>92)KcBmb{6Yl$ zlju|ICVW!;;M0P+xO64r~U!?2kuv_Ag}ZUK9AhsQ%y{bxliuO^VvewPx}Y@-aMb` zL4Jz-&Mvt!Z*{$#{uz3 z{Ti>}e{v?$70+_y`umIKlec*Vy^QmJl0SL_?rBA1sQ25#U%%cun0ztcTPx*|?G5yc zxKAkaOKEcV@8Lg&{vF9|xhW4-tQMXbL92;{J6BkPvrV_EAJ2RPrZwHPBWg;AHnnA2A6iyl>8m$h-Z{+&@>fOm}bDu%Zi#ue(M?X)w zO`cNJKHW#;>ra9Ir~Q0}&%B|iUm2G&$pagqUCDZ&f|1QI*C$cjSCRdMuH=2V9+rBX zPChjk{J%1A&B$h(sVdLv!Aj!?oeo7Fzs@%a=N>enf5#-s1w-XVXRXtR_x zsIQa_dI!#5GER;pPwb3zJ5q0ChyH9+_=J$#$ct@8{P@%`K9G;&`c2LYYUhB@#jVgw zx!of_cML5-+GhzT=>6Xz9{iPIOif25dlpVe#_q9FBkMLHz7_r z4;n+-b8+S7RpV=r7!Y8?`FV9=9kgrQES2F`J-HUq0hGn zah_v2_aT4FeV9t*PWc!Q@4J(Bcvx|Byq(5((@`P0ArJR2ycWwc`9X5tTLHMVBiE?(N|3UJt4NxyKjzt%O{?;k*lk}NP{-hkZ ztZ&^4L+`u;=}LP(uDJ9;T3A2I6fx_on9)buM{1(DIsPvszI z-YL+pX1a&SH|2!B8hLkj_~`FJolNfRhB%i||B76H|6AqK@VUC!W+`K+U!l0!f0ufO zTSWTrN$T4iK%7;npW^|a*l~zc){`H|pB)8%NPUwssP`moKI*5q*)OVoM*Pyhz9N6m z`REJ!93?;94)LUsSMr3z#<*Qh!7{*5OyZMl>`{YHvpx5`+{m2XbfO7ke`U&L6+`whuW(&D<7`U{< z-sRw*J{$2%Uz<&?e-Cou^3WUmq2J7Odnj(sJ1Kl$N<8^x>Wle9-<;f`0(>s`pk8GB zTupvsG|C}{`W)VjGZpbClV2nc^@Tn^xm!i(50$W4B#=CUT)*FD2f0zxKAwg?7@y~H ze3tpNm*QqS{J?c)FZ!pEpWlacrGDR%Z{G%gX}5DL!RI>X%_8)1RyVSm{(}w^%49Id7WzTuh|{`(!W-bC*6f!##<*}==JZpu1y|y1o2DzpF@7#Ksm^I z_-FD1+#i;5F6jsV1#yUHpaTLuN$$k+1&OnQKlJ)@k^9NLFCg8{%&%(z^tQaTqdTAS*$u~>{FT#AiBGaA`mL$YZqoe%))pM}Pm)Z1S4gc(RH7-dmJsGp75T z{5boOj8|1^!r$XJ`*=>1H~b0hQ_k;3)Pg=&B=i;Nzff^=zSytrM;)hLe}Cg0#m)DD z5B&l@BOPJ%(c(1yA3qA!M!GvapqKX6fPBM9a2Z!)$*+_~IlOX!**0?h{()w7;J_@f9{~_1M|5D-5d!Mtf-=E0!_r~OI0R2f`&#lAq+(+KY1#!x} z-=ZP(!^WdL<@(Wla&LF!H;Qqlk-rXxkMxW7jo`C>2jY}=JC0o6*K&%0{(VV%pJn8C zdA?Ad>Ha|8y$1XnkQZ(YpBcQbN!B5!$P>9<^`*Xf6X>6GMY=7?Pm>oJXS0Z;oBK=X zGdK>&`AZdYpG@fGK7qf<_4|DGG=zH4eM{MMewxV8SJ#dZO|1Hv$ za$eIMKE?c?m-ugye|;VKmGL%r3+VGyL3ygnrq(fvd=lqD(f>(aXrq0)!IALEeGC5o z*>@t>@9Xwz34Po+=wz9i7baU)H}7t{B=p2zbcSFCvQi6C&kTmX5(fyi*zNQOup++ z@KNL*ZCDPUkT02+Kajg#hFpa$EDnivm<%-QHWE@GoE~UU-${3lJl(mJ+aPtI6d5oT-tME#m#!rzh_|}xzhpo$hexL z7xaNQk*=rZW*800m(K*3dPyLkoPhkw`N9VBhrff%I;~A__`l)xP-%y!$j{A2Jkow* z`#>Mr96qs(=PUB2zd~P%{4%-k9O#3|oArf{?L6X_`F*kC<~lq^YtIL%cTa~NZt5}( zxqiQ5*?#a3{s6t~XV&fyp0EvZN<4)Jfcvb2KHb34m3#r$$x@zwkf(D!97cWkz^uog zC5oH%UO5jUzf1i|@}}W7i^#a~j6C;ra3AXZ2f@e2=N+Wnx|1)u487#*5c#~);Opq~ zocv@M_(Sp%gW;cC9K17m4RZbavksE4=QuCt5ueGE0`1$+f;jj;_kh12{dbd>m;(JO z@<-$^J?woZ4S~IQpU60CFx}(iM_(h(668b#w90Jre#tJCW{K`t&A$HU~U`e5K;%dn%^!y)Mq= z=c#|o`)zuYmra0wcm&E*&I=BZZ=Hscl=1T|`TAwxq4c>h3O-diZcBcfC4#4QgnupS zokxRDivky)=H&YK3zr@PePVU!Wjy?zTz`+&zvM35C-h~yo@3$j8|QI@?bB@>`0K6k z??C-K@+U4fi$suzkB9#69pL`tDdaQu!AHiK@=4IU6-Rq`Mg1c30ym&Lbo3^xs2XY&^Ip`6u$`o55u~95RtUqrkgRzlHn; zuUl;)519o0q3p<)w5wmqD|x|3#>wmC8Oh)w^cgo9J`syi4l-VyB45t)GP(aegZvQt zk+k!MQ{bat?_NYc;|$W3b=`5rr42I_>&H{-zYj+}iht#)@YnBS9I3du-!f0zZ&^tF z!++u9#YD1CgU^#mh-W%^G`Y)llxIou?d1CR$n>2KpU0d>WM3|sT)+SD1^M1z;O{~I zd^6y){!egO=S(H{O$C>Fyh`rP=S=@Ij?ILR8}DCTNdJGxUthBKUoq?daQ8Mal5Jag z*zgkvC00TR5Jxf?iQr&t#(k>)QDon_Gd1_tOwUZe?tV|hOVCMm)#>i)TYtBzx@YDY zFp-cI5x^LO&5to);}9@5*a3shGsa-Ru`FXni9<+aV@MPn$q6BjL!3m)_pP<}IeYK( zTh-k&J-NE?&b-srr_R}X?e+7mZ>^=Df9F!KTYu^CH|X_{U;2NV>!Ckz@{RiWfBdz2 zpKmqa`?-HquZQ^R0j_hu?tI+?`J{cXuQ#9nZO>H>d*(jBVy=gMvk$&W@BcfE-nQ%i zBj)-a`N4PO4*K6acRuZp>F0m`d-VQx-M-aa59g-+nz{Z@KUeQy_0Ok&v)BXuFPZCM->`q>+x0%5`ETj{ zt$qA1b3N=|`tk4hv^!s5p3VJZfa`p(PkrwlxqM)L_-*F;Z~lZ{w|d~G%=K^iH2=Lm z`ki{8U;iBc`u}dOfBFBW_p$cq!JpF4|MB0i*Z;Bk-mf;-f6d?wKWMIh_+9$>FEM`a zXPfIkW3J!%H~jDY_@CC#Z+)r1KL5;I|DiKIFBS*>?(Z(%_rHtlT%TVz_WBErTy1}k zUO)Isy}z{!zhz{rqqLk5t}%!`$b${TW`zT++{Y$GnmL@$bm>Prvia z-=Np+d4FuK|K-oo>;Jm>ts`^&o0#$@Bh_bp`ZV8bN$bn>!1B`{rri!{)K;DKmQ+`>pK6d=KA~Q`g2L_FtRp;k=J8`XT+^kmvUA;X3W6-~ zcjSuoi$3Eo=>0>U?H8Nt8>WxJA29d%5pzA{&-?>({db!Ccg^Sj@L$yXfAS~wd%x0L z|7+&@-#7lDwadS1t`9rDoP6Sk^*;aZkLe0qKl%6kh+Yrl)}J-kKmLpQy;i^7`Afz6 z=ikJ2>Yu$!{k-+h@0#oX%Sf-==Ny^qU;Ov<`q3Tz=UdG6e`N4ad%k`R*STMx`L*i* z?3mAg*HGfqPt{-e*iYzvKEJ2;w|>oKHr=w4G=g#l_YkHr*^OL%s_Wp^v z{!hO~?_=?V?={zd{QLE~)mNYMlX{<@{~P*V)?WOax&9kpt>0+n|Ix)uZhygC|CJB)`hW8&xw2HR|5f^&_Itn0T>s>AeNK!2 zeD+W2=Ra#pzxQ|PZSJg_>z7}vpZ@`K|6elKzs~T)^rv_3{JFoO_xVSENS|kB?(_7g z_4*eZe96k;C(ZTG`f|O)?=YX={hRvvzy6#0Jo?`|cjo4L*r({5&GnGS{`2N~%gDc# z!+-f_^#0F2sq6Ez=6nCBxgPSUzw~GI^C55QkD2TL;P2`CUYh&-u(^KvclG+`nsxgh z%=M5j{r{Tl@BFv=re9|6^Amqdzjv*r_pyHXv!B!JU;kzP=fBEaAN)i8yw!XE++2VD zzP_)uJMEv>`~23S-e+OH_tISdQ-9F^-hb^E^z;AaOZ~j{>wX`uTi?LkLjLHO&xdo0 z&&>7T_j7vx-!i}SPt5gSF>&R0n(KG}wtnwF`2#8^@xP=$7@F(<@%QR|K4L!qKbz~{ z^#gj{#_!+pU+8_l>L-itV7M1W)7j)a z=`9A6@w{E|^G)aH@i;jx_{`?0JO7~Ym$#?MY;>{c^6qizr*@`|KDKt2yo?tUyxpSU zA)=_C#K*IX!TES=qu}RT)7fB@6g}m}aB|wx->PezN@ z<<<_qI+=~T!@bSXpf{U1zZ9(>cjpP8O+FB{;`jERy?bvjj-STS-saKSZ1SRWxQHTr z*`OCEv)N?UZeJceK8j`0<(r${WNR?K><$Ng?D6Tv2-2|fom%|~W#fv^(;6n@)5Y0U z?=rbq#FLYF)*YWFE5FhD>|(sYPG9k1B6*4{-ehBPnaoaxlNVRK&*sG#s^l7Ow0+#| z$0rwKIqq?B(>)KmGVd z6vuNalAdU{0mT0ks#dz=e%w#yi`j*~)!OBq`*(M@ccDn}=g!*QNdAoOw&c&&-F5kM z{qBbRxp8+>{@l8|C4X+;-IhOh?(WE+yLWf_=PvFeH{5OAjpUBI>vto$Afci%^gY&XRsSUCf|?wl1OUC$rA^VYhSMZtp$ajpNhtMcjM&5})0}Q^nKC zaL{|zZnfL@TOEAtvi%63Y#%OKYw_V!cir9;7O!&@w;xQO9j(b%hN?1ZK`~<2Ud#qi zjh&~5YcVceoSeWY${FiGg4)p)-{}gYw#8yOW$FLn{XZHr{T$7FY;1fx))!aB442PZl9-jRuKHP{$ z2alf~M)4Le$BPaGs4W&Xx0>rO27Q>9-dWQ7Kx8P2qjfQ%*w1l)n81``ReF1aH9Wy9 zU6xGLpKd3or#x?4^T8>sx9nwvFQb*d=+2BHyC1dp@i?OcIW-@XV-nMf zb}q*BuS5f3^?H-(tIq!8v;DI+UO?XKaGfV%+#S91-d^Y3`_G@pR5nnJ*&~_L7j4By z$!I#6b!V?)nDmRpij*tZty%Yl2yQ%27J7oV9}c^V_aVzqy3>6*t{qR-E2SH<`xMWZ zod*%{CKA8H;}g%4lj03t;jH&6&-xC|y1Gh*ZdlY%F!&w2qse$e3zFcRJ7VH#;|Sfl zQ*FHooW_^9BlPQW7y5TFo+qEi+U4ydurRTY|~-4m-q)^dyWf{4?&cm+>GektyE7hRl{ zHTSHnW0AxhT-K}-jOQbMawn#Bo5*>-c=N?U$n*3p+fZlf3*h_->?acivb^G%LEg2iOcnH?kVK1by`zQ+X0U1 z{^71{-vc!>=e`w)(i`-;SknsBYtQjZhDgsBK|g&%y<&Z zasnJvPcd%YZ6O@xkzG;qH1X*tHD?XS*_<`;s+~~daFKi%h*7IRA?=M2Iv!&Q7Mp@s zB0zjDTK8~Ub~IDVqOD<~E+HCvY#jp-tv^WUSwY@u3e>*ch%db1+IJoR&llze>ew{; z+mh%nYDs+99wtQKI`B=}?MDaHM+gj$dc$bje|(^cz2P=g<_km|jG0fSF;r}7=H^rP z$1qYSQ?XR-XNQmJK^*Rx#VbE?zO7i*QsZ6g4k=w5F%8rdTd9AOx0eka^p~DXUZ>6Q zrS~HPOyDUtxZ;g{n=Lr+v!px9-0SlKmD}zf;tY#sckq!dYCwdL(0|5b^H$vPHE9GZ zeNB8|r_k4oTv?P?Yqz(c`GMY~T4y80bj1Jd)POixtI!~bL<85s^6hZ>4)zhbmb!v({42P_bcH2)5?Pr^0NX>&m?V{b%HGR!JcBGiJ*8%dh0kF>T1b2vgL z-HRcEwXwklkB3Qo*}+?gYfY!fHDXc{S7|l@ews$N@04dv!<%R_>FSg13p?M5=#0(qP;|`Q4 z{<6vc03=w~Md-t>4HmD&v~BPbzO;S#KEK<=>5w7B@$cMg?ZmxD2MEv)1_YGwf|vU{ zapzh5u(JncdL(8-y`q~u3f1Uw09i7A@+G4ZPH7TyS1aT72=bPUyHrLdvOk$kM)Bed zI_t$?an`|h;9p9|V%L0daMF2>2!Hkz8%Z}DC|sP(PZkQTz_o(^r1(<%vaLG>eXbg` zHY)K!Q1wo4)&X5=hXhnc(C4_J*3D?uC?>!*5+?)l&4El&OVDIyP^m*KmWIU*&bepA zs%$y}w4&hKb!e}M+HN9v9lp{d>ifYsZ_H^ph%P+`&x_>+&x-~A4%v?)qR7UH_5D}h z{f2YePkKY3nDkxS;;p6^tfTr!9M#7ndd`ctHtjyZZrB`6EYQsMp^MKglfv1kcCmuj z-wt1YTT?jgy-@_r6M6tp%rgVR+@yLMi-s*rZWXB~7xCbk4sCX;bvhf&FQ85)NFB}> z)_8Bu0FL(CqtU@gV4(2#q6jJnj1HtxFO$w#UGQVk8z1)^on3KIKwDw*AqOGDk+r@S zJ`wkktN_9S15MQ`1c!se3DiGi3U@VV?k2-M@9nW?tlfwc=6;v}Q5Eqn_A$%sw@`c_a7e@ZYI<-Esch4>m5(&pFhz{%X z6lNz{69I@dnAw^o0C)w9+ng-U$Re=zCVn50ySmp5{g;XSB_|vP{{S1}2P7#?Ko-E7 zRHF4_IDyBMq*ohdSB%YOU{-yY7(9!0)>pR)vt{0xh_^tv8}Ubgd%}nIK$p&gDWgAK z+?&)IbtVz+Y6@&SCNbwwffFkjhhTl3-ho0K3XJ$1l-mWCP;T1$SZZd%9>t}MtnCy? zsio8mJYd#QIz}rwY@eJuEdbbRw2jdjPUHZ^dt10Ktri&A{dh7X4g~=rN<^N9p@+R2 zcSgqobBMEDl^F9?U=vIMDzOfd2X;eO0rY{5%n@*tQw_8ZH80Z`naUP#*5u}Msyt_- zQq4_J?KD}$M}{p(8jBjC(BFeu`ygQy-eC|%ckLQr!*i8do?An^k@OR^kU-d6iTJ+7 zg*)c}b`f%10y4>HjE`L*%)-}U_Z3*m(b}H;$rT5-Zgpl?yiMkW+Y$k3jEUB&T4`%#pQE#-Ox}9?=RhRK-7s@5b%zeM_4I z^Z5)15D}i}VjlO~Shpqx6>JE<9DP^K6bmgdAX;g(B2N;Cl+iOF=1v)Unz3|hC}@qvjbuRGrh@QF6q z*AQ~VRx#MFN|h4k8n`1Q1z}&>$gJwW&=R`4nF17_)v`7HsH`^CDPfU4vMk*=tkanx*sL+-S{8}5Z;P&7Qy8sn0 zB(T?R8}Iv{xwRSeaaIXgK=uEi{sMRhZ_d%F-vHu?Y29=T5kz@aqt`tgpbiY@{hDx5Ftx)Nqk8(@zum~%$|^E1R%1}|kH66k zNN%xy~q?{kc?6Ko^ z@61-Oqj1jbhwA}&c_6EOwGzlg0#(s^flH6atP96O%GPfDQ7IcTAfS{T5eW?5;F4*` z1LXfz2i3+Fps77G^dfGzTS1$*eU=yFvp-gexSh=UR<1Cm^9X5|s^5kc@JdT0cMUnb3Oe*N92$HuXnOlN_B+%za zF`z@JyGSl+tzlbWKj-cEm8-fv%lNjLdlP!KtA2YEr>iH9R?cS4-M7K91KC=Ur#@T;M2&R+98Q<)p{E*(a@V;eHvqBe1VQg` z*qaO|vv)UHEGtR5C>D-r&be$UGK_jXJOsF2D1(kak)UIMhR~X91XU`JJ#P!8lhsct zwaa7;w_|{Fe1qJ#<=q2Km=@GXPCP(Yk5IH_sB6GBP0<0C8o8TTS|LPh57MF%9IqXZ zMiS`s3J;4>4)L3{c>i+Lj-Ifh!6D3$IlYghu(+5jY|(=?>$t%UD9!2x@f6gITL&Rd z)hY#XVnj7vQvp8-E0u&$g`X^gaAyv)6{FIy)7m=efj@y(Az;Z2P}Zs??{ieW&lx_1 zNg40isWy?QWW^$Qk_k8+U}2D(K#}1|oB&p1fl6HZs?&ysnXOa@Z_SM;H(43S^hGaNiSgaR)#vdN)2Nn8#|TXFj*& z^|(E#h{b|^(%7rDAZ*F$-0EGRXw$fW4@*LamnU( zw1E$2GMNzx?uG77qjz@*5V@`v&B1NDEQk&8eg(98V0y@zDcWT=$bKw~U9>KH`J`r5 zh(%q4Cu$Ent637;t{Vx9`(Ml`qVJhjB}%V>F)dO(J*K`29huL4EWAN)P%uX5klI5 zVc~~I+k(F5?BuY2UIHX9zjxKgi7N@)ZBNCrh6kwEJ$C1a&j#of$p`N}OY;cYx6Vb( zb9fHm?<;0$vAP9~6xN(RdSpT2q#&dE7`{CL!E7^*Ng_r9;NVzjScbeB#9n^X5yV}9 zzg+B&Io9I-Tx49(Kng2a3z5);&h&2smpM;+v^~!$l4wqUWcB(zrk!>1m}xUm}E1+1-9(B7?53egfxiN z+u{a>rn@26u!*i*X8l9RJeRtJ5I)1JIcg!gU6{%)DT7exsD-b9yJXEv(*eYc!Shc! z6;`H9Q6tuX$2XS{KI68lB3N-dSG0ypVLTFdg*LH(8=~&M5;#_SohszA4i50ZAAzNo z!!J{xWe171iz(93+NflCI)@M{)~N7NX26cIj_jx3VyJ^#-8sE4%7G*vj{w#*1fp+> zG1&m8E_xy4IZzXXa(ceL{M2olJjlN`65Z*uyovQUd&wRqN3(}mmB2s=XSqF)HlCh*X6 zU(?>?da-_4n<(ZE%*`c+pt8s4QYXG`5v9V-mmweDPjZAHJ&VP8f%b0u;NaOoe0YR5 z?d%$j)?pa?p}-_=r#+(~BWxR;VUHB4Y?1J5O>0Xs*fAGJmuq*>yFB(HfYkcj{K?4Bc|K>A!blT|SvetGU{|qL z%BoX>t@@^wmSD#qIi53Lh$Vu;aasQ>#0k08ZD1$?sTfv$sNXXSY3N&!E>ud{Kr!yd zQRn;|=FDq_#mYt$sKjtLF;nBk0F^1`H`rDS?TIdJH!QY5wl$3#jLY$_@l6*MG#u8A8n>>ejW zkM3!Ln^pC*O9ZY16Y-vY}!0?=B$Vk2uoYJXy8j;Cg*|0 zv#kQ*<@K$O6$cyM-YbHklv<(FwH zf!n8h<9jo%2XAvPcqA?J(KHd|AS$TdGJtMQ4E!(fb4B54Ajsuri?}lq)7A|2Fb6xg z+1_{G6exkpLOOA~Z(4e!>d4UDu@XUp`yB)V(%QP%>&|?>!OPA7EViDw?zqp68zZNz z))Ck_Wvtf7;n|@j_@ir0hmcS zezW(M?l+sWnSpBN8RubxY7gSY{F$j;%V-5pH_D1eYqD6>I}ESA)=IV3eZte_)z zh1SejnA_c=6|?!$7OnTVr$wO~!wtMV#@ddBj_Luh&@+&lDzvrzbVvJuPspCYkziBo zLZhFiZs4KI;G4i9$+mjVFe-~?6!R}okvd|(JCdb~WRYP{hCX!m&esvhp{V*wDP0#Rsm?!Sqzsql zk%GL8&_Lp_HfiqQLXU5`Bt9wEeKBJOiG1iLEnPHW%1f;n?{P7Z>pOEn2$Si z8ab2djIp79Rf^8DyMs9du$S)p88qkG8hT4NZ$Rg_!flvL1vy+|D3ujPukM)aeOq^2Mf!(Uw-IKTb8~x$!#&`+az4s>d#^-*=eTH# zuD7dwHMGqGY`WT4x*pzMUK@TaLn^g_r?`&@LD6(`eA36Va@|hNhgaOfq*mXafE}WC=fz3JtYLAk>iSt!7I7rku09{F+odYMQO`Y+ke_ zT}Vygq;$R&JCQmp(RFcNP)i>$tfj@P3@60wK~;AkcN>l47f7?0Sc((m6drgBoP4fs zA%CB-Hj+<>8F19`F9uv*^aq!V!TLMz-M^3d=WE!1;uLXwIvgCMe@YAv6Q^9CbO%F$ z(-6I_#qPxPyX*X=e2#W)fNmLCf)>is#)+@7JkxhIUQNKd6dvQ8IlTX8#SLSY zXyMY=oik(Zt?!=6PfNQPX(byn7dz;jx>yWO(0LxuiBQUu$L#m6=!4ladm9_LZN6H(JtpXa&rxeCuBL2yWbk6SaybP!K6K zX3-TTn^KfE0JeQoQB)IT0gmh!rQco&73SuB{0vH@y=+8A@@j^&K<2h|LEn{QbBMgM z%5Q-#Y(j2mR612h7I#+{Zie3=tqK6j=SS+2^j>d4qd+Zm<0QmDySZLOlT&xaFV_$y zUv>nfHQ%5QO@fA94kbh7o8(+PHB{ASb40(wnQ5m}5uJKMUf?N9yTq?cQK2PYI6%Pm zyMx7(?)-xeMgyQfpjc37J+Z}Egp_afYHxLul2r|Fxh4MIBrmeLt?UHKsDeakm6AUj zsKCowxlf{a+dY}_1%CkJKj(}7V9fr}i~+1W_gdr?&;&?PV4wk~W(;bEK|oA3Oshg> z{Uyy?tB_D&tW|Y9;GSGSzj0Y%VZ{bI1%@{%ly&5^vg)%USEp*3Ga1fs%sTxScq9n* z3M(a|T^;dhRHAZ>u2ykrAR;Mubc8wcp&QOSF9U$!pL@=I-aBq$&#HjHx_o1v{N2!1 zaFp%s&Lx}L6?o~-u0*wWGZb~FC0tcEm+Qz;(53B4p5l@{940-J?(^BQu0tMl)&O%) zi_n=ySP7uvz!{=9wvC3h4cC~3Wsj=5g2wQWjfPS`g~XA`ctNM__zY<+;3-HW;iDi& zF3#AMl9UvQn=V~cgWFxc#=@B<0w!Ri-)}E`&SnwAs{GkHuf!ASLjG*-dChG3+dV=p z=+xUn;GS*g$;G(G`ARXW8i(Gt-ihj~ zVWIKB<{3aa!OWn0<5%K};TD)qN3!1&eMHRoVLukED%S=-c|FDJ;4nV0-1sKJVsT%~59&?!!NkH|rHF9)6HttD6d{N=c9qRO@Gmk)==xTmotfYnCyN)8 z*#~BTT3X^JYg{PMc!Z9}F&b@IGaOt05aB}8+Eu754x1670|W}r z7J36=BhXcA`EDQFKqDuq5WL2;Ksg>aRyM3i#>KJ@iD8j)?kH}d7*EC)&M+@q#_RGNW)t(?WUT7BD$*3eEojZZ%FQ(+ zMrJ@jIX~C7OfzCD+^L7r+Asstd!}nnHZ&ci52(T`rgd6t_Qo7(02puFM`L;-$Vcgf z*_+(l>ST9O9ak7!L99bcWMKBHL7Vk@3cYkQDxuJ$H5WV@z@Kq4KUp9ufQy05DtL{? zoLm~}&%P;P_D;2XdZj1#0Fi;jtt%);@yg72O+n15Vym#}RjPf(gys&_^4;?40Qc}z zhuiKgV+G9YB)^t3Ven{509bd^TqjcbEdp87^MFM#X}9t{@Z5KYaRd0V;+-_)Z9J16 z4sIRhrj%2b_K2O<2e*bcp9v5xht4LfEH$=_)O^f_Vr!S;mM8q3RILTn1v;TvF4HF@ zlrEtxpUwvJ3pPJF=AyENOg@<@Jvu$$^<3~&?aYMV2uhi5Ni@ZA%00N{$PYv4-iPR* z=YbPb(F?=FZ>I)WYCyV}{eCQt8js}i6k%vhd~G@=86*u+`asXQh1k^>-5I99nr@mg ziUsw1a@sbhMXL3!4XjAQE_kKU-e_YDYzqvLk<(KSrugo9C^S)oQiY2#31S#5I~48B zslph|m|g{+OI;YDw6Mf;x1i9*ympUxc9Of3v(V25E>c3 zbN~7Cd+&DQ=|L}uoxFfYl)Riy z#tF(Dg}M1A7Y_Sh+J8KJnN~~e9r(MG*#dk^>zE5&YSEo7&`4->z^VsAYQAHWfjy?i zi@X#&7}IzpE1+;WHQ&IkXtz@w?j~2h4$OA5nOSdj3R~7lGd{h5y@RPB99ZEs`^qz) zI1WMAMu}sw1vy|82H^mMj|!DaGd1cav6K;1LOgE=Wj3C7?W4*8hc?NL*-E24(OD%7ykvyJ?1JTr zOd2;|74Cy5miD&3uEK;ocODL6-!|iIp10b29 zE%#g_y?v>Wh!8Z34?C0+bVXUCCB7Ag%Ts%&16vW^(flyVgKU^lZ>Bk^n@zYjWJZ>0 zUC?GsoER?#i&ygXX;XOgg2%I1WmDXf0G5tRUS{PDo$BqhDw)S*PE-aLmI@B@{D>DV zF6f2-zN+_qu8@t$&2hahdLG)|C`BDt^|xJ@u`U$MNYOzE$V0w9L)G|Tl*H2fOx3Di z0Mv?j-tvc5<2X~`B666YJ6FVeS%XD25telHV%3=ZddX=wlP~bByEK)84Vi*Tpk3%J z5ibVVD%^FLjPt$QkWEjy88!#`kONBFm8^{QoCCe??Ij1q?VDoL}GsmtOCMBKdeLSj~0{!wG79g36)=6mhC*cObJD+SlX^({75=6*RymLRa5-iu(W|Y@_iModFJjkN4GeNIP@W zhhVcMmgY`n@Y5(6^+waYMorhq)_c3zH6jZZRn94_EH>GYf&fh&gr>z5u_C%E?C5xg zcRp)>q>Ic-YYl{li?OuWPr&n>&ARaY(0jN&`|$bceEgz!xqnG6H|N|;2lHuf2-Vyh z!nkW)E!KkFiNQO>w+@*>YNtBm}qNzLi zhQ(cDu6=ye1_z7aOGUT}9A6Pcs)L+tTwBF;VPGS9Gwu6AuLl}%oV?Hk47-}vsK1p! zRli~rH0$E}h_G+ZhSRsFilhuF9VQ4pB(X7XUkZ#tI74TtRcEH9ONq@?FnXTS6%0DxzQvRpJp0**_mS{d~MumEtcsYvGi7d z+#A7>b>@GRfD}!(x2ou;E=+S1PYug`*pdHb57W88+cu$_TCk!)CnLD9#F70$C!z7p zfiY1FOnac6J#-M6jKw5hV=&kxHd&T~q%IB|qjYK_ZF<^eXSx7t>PqBI>7;6c1Qi(*G5Od7n@ul)NqT5K{nds!^JwjQ6LVTl7148CwjzFj!)+t&y}YLGeHEmo(i7N z4J_nzaps{q`lHRo#T4^7;6(evQI%37){Q8a)aj4(hV-7&m}J0V9~?Rf&qGkg@>sT_ zp4J9aSxoB&P`77_F)~Ojp?4?iY)of^OQ)kTs+|p_{16rQ@8i=?y3_s1aL{|@tDQ>f zAfhlLNlx9pPEK~}?sGO5MFX+b^CEC!kam6J8qSrmGCvpbE7^@sXR)Z*0)s z77`{P1H>}sv@NTab9zK9LA5eb>ZJpy;FRNg{V?V{sLDPsi8&f+WY<-#=57eovl&ld zbKN!ZpRE@>TThn#7lIs4)M=a&(fZ<=P5Fy9eksoSLYdj*yBj5VZ&6ht4hLvckjMs4xHoEL6YKegvFn`L z1Z82v_h&w&7x~pp>>S85e2rE7Vt_0$ak$=s9en~G1hmV*|1glP?QWy1HvM2qeWb?oEGc$}P~Jp3|YePO2u zdIrKr2RwT_$pH|%!NEQe@+Y>84(>GSt%sc+cEsa$z@nhsgY+ww(@cb8~^)DYF(1+7CP#rM9hGoOddw-ovG8{y#fH#jy7tu0?@WXYi4 ze{)8kHbG=&0HEI}>e;*FQwk4kG29BwARBk~AD@YKMhzntQM|$OGLjqVdjh?awmr!! z6=!a?^b^O6X#(075fo)H8bwuJTbR1q%3>5)+vz?KxWA3=b<^0!j(5RcncB=yP2Ftj2ptPd;wbmMMm@e5^cQDVDZUksJ`|s*Jv0G% zWHnzwO0L;Pxp881OrU^^b*&)8r`@%t*w(@f7Tr#P8m%HDD+16J{aHmV$hpY_b|O0 z5NdddUNxI}1FKJ9sgcyI_o_9Dah=XgeH$IZAixkE%;y9nHQj2x=A^d=s6##e{7i!v z)Gws4=#p-$jJdUG(vmU(%*IKfZwTG8P>|>8-7G?1)pEuou`mKUAJN2PYKUbTzbdnF`QxR%=v;^l`XIe17alMq6-_{9u2`_5cVt< zzcVN5!P^!AC538{@BEP#`>@!2e2Q5xQv!053(PrigU=by}2@9YOjrXK8K;u`^)Mog6 za+wTMMUB^%o))eCgqVp2!C?TL3lkchOfM#^e%UR+^oRUyKOnVq@`0nGMdZ zwF$p0(_b)9A0}zk=xH_VG>@aS*05LoSL5yopi#=AF$rQ-it3V>L7ntKOqiR0HII{) zD{Qg4wl#k>UnC;~(5$HqF5kD@%pc3@Rgk|~JOZSdK1?eYfk^g1aDDvEglox^X~^s4#9-)J$x+~;sad78-;LQyrMipaHAj>)r1qwlp{rrq`S&Ft78v(Zc4McB3Nh0+!QDSAQaH& zL5u@HI~}4J$4_0|R00C~R#!UX_UeCnOR)xjibF1mVnfgBO1Y}NuX9oJw6|t-n`+*6 z5fD_jU8=aOQZf3+Q7TYT#U-d5i_LnWRFnC2iRe%A?9j`x+-+?crEXh8yNox$e4$rX zZUJ|u1IT?pc}d5GZ;-Q1#WM^J&fE94V>4Tuqwh+(WHdVaW_T*oa0L%Q=!UF?>lmvZ zxntES0!tIC{awz_YXJ!M1nx%{nM2H+K{`zeu=TfKzU;V55^)RC!Zn-;q>`yN=%6ma znFzNut3-m>Ax5((z?I-TM^7R7OV&>OZRZ`iEFP>#<7l-u3ELU3lJP?C$L zdM9vCxr?_|4_kK2G)=0nN_K^|Q7#w6tFWy!{9Nfk+p{*Q z+v_EB!+On(nDKDSlhH~Qa+A@yZceD=BWyYk1C3T!YlOKS!e@>0ZqZ%}yuQ*yTSJxCN4h zVPh35_~GfajjnDB^b--Lvl?0{0%$*JISfuxWEay(jsY4cACkJdg{DuFlM|dPFAmQ# zLF?!{g=o#ZCiSOuw`xNhjCw!~SQ%q!S#XfbyA%M572}Rp7mj(&0s)vi#VA zN-}&a0(zayfQZs@0Ui4+>3zUnW?acz2H={qi=fjOO@apxo+BO^EZrucaZ`cIb3 z!>v(S#%hI5*oJO&UPHZUJk@SD5Td=F$ z)ZzTWJ>WuEU(8w0Ovm}YVuk9hf;F@EH#!!}Rp;6~FewzrV&CowQ+BpuWM(VoY|&fY z&gV-elJAjOU4TtmbOv^C-=htBF5X=TNwo zCt!)v#57D=6!0llj%>oc+-9tKy+Fzle8{XrB#`jk*z^szXnJ4+swqhY0|vsE2s^P$ zy{kL@M!&@??dEBi_)K>!=*x%t4>xZ2^#k7{QrsX12 z5NI|q3E(BrRKf~QjFEGw_jvcFp{z^K54YznbCQ|737q7-(;AKn9F@;l0Mi3*82mwi zFauNu66{C3xsNM=MULVDlI2%XRIXmH%dPUXn|{yL9*uEe5ULgDkq|Q=)oL48@0~|& zye84otGr1Qm0RcRyV+xRESvhxrrTy=zJ`u`v4(QVLDsXQcPh zVNdmo6OBppc5l+;Wi>u+&gpPkmPk;wZ0940lkQyd;RVpu?#0Up)m9A*2o4!Q37$HY zNymBE+UveTxnk#0S7H{lmzzp^31O-Fe1Xc;7bIR{YE8NMSy>-S4d{<$BMdf&hN9=> z@chmLjeUkUnZOI$#oaGy7q>0VVHhbI*aWj-O(I$dHq7^Aegv4Ps0#oGCViNlWFI?J);0zlKN6XQJB7sff_X z^hm)KW1>CC3x8?bteK{=dNTW)a8#CXYFCBva^Nr_M_q!VthQto_|;xG+^lGTiy_|- zfKHj_HPI}3Nr4>+Q<)JYv(GHym%xDW3y%)%>iVz-*PZ&sL%@1~UV1l(fZ+>s?M`wa zPS9cm8z@TQCRKa2h4H)K1kucmMp<>w6_^|l)e$tBUfVahUQ3ag0yLi)qh&P-fUqS8 z=(V42#S%YRFN9cS@eDjXU)cyer{_hqZd!Gsg&v4@8X`HUnx7N(@(e{eQICJI{qA7# zq&xqhbB@H!mX=dFVFeGp2~_euGn(Z7QoR-GS*sb(N!&tZR-wn6eaLP2yZk0@^vEJA z#8sAd37^8f@yPadc!YaWni1=)+RV_)X6Q*cPY>yk7K-gbqB0II&Zy*8#04v<<4XJG zFMCoXNjRCl09#z%F37Mtv}A@Dp_dA_ zW2s}C0%W{I-wR62MRe3W=cSC@j^-c=GM}ci4o!9QO)B(i5>t?#m8Ogidj|Tcm=MaD zBrYMe9K!KEswO(E0m7q`cJTyori#2B>eZ^S7{~OHM7s3HoY9H7J!@?vF+9JRVsH&P z*mBxg&Mu6YP7)@HBkiLhZYsPqc-Z=QEu^=6#(WPanS6OP*+9Yj*_qq;)nUxzMc8qY z)H$8>v;SD=^~L?dBb%zq2K=BsLhXPbtEkqhh8>Gcd}4*fD5 zMeX!K5yTP)Sxmc$#bu4>d!uz!+e!*wx2R+-Lo{f^MT4*}Id`{l9DAd6z%w?Q&8HPkM%JbCZg48IRmkk-4^V=%*C~l0I?*2ggO--wdQE( z<*h_DLYemNBQVmmeX~#Vsk0&2lqI3nV6;6(A2}U6yxFo2uy>1k*KQ>rf~$E_cJKi| zPA?YPN{WfVvI+JcY=gQpXkpD3_=jdwY-tCjC+!Dam}xj#Pj^xd`P6!4s9ncsks%C1 zLAR^d9FamP?yE;a5N`7VRVz+6uFN6ePdBo;S7eo;1pHn6f#%ZV?kUJt{#vYRG45~B zK(14Pa0HexxT+LLTlm-G3DWdrh{Pux@`Uc3ox}H`la38unAgJjK0lk1OVLfrecr~M zy>?KXl{ZsiU8m!6JBKI8N?t9nL-#!B*$r{=;C7kVPJnUn<{HIzS8xvAB*?vEd*Zc6 z`;W!EQ;fqd#%JyP=R){lXM{ zp|W@_9eGp0D*!+5K^H4&$n@bB*0i{MTe`WKHkN5KHwXY3$ASl7P*$9a?V)M^1{|Hpl9msc9?5w}!v=0SwN*@Omk9f63lwH&5Vi$-HZ z2apOcYwoJKTyCv)4xU@afJkdj-ys?;S>dEZ*i!py}kTCE;K^C zm+gqNUEUD~38{NBDsz{JQ^rO}1>Et!2^!-ObFqYv`xH$)2sr~S3urcUp`CaN?sJtX z)yPf*UlMZ=hYKhpV7N2thr@-X#ICk9p)3*PbY}O9tAbK(D3`I@fzw~Dd@dz$*qNf? zi7#N`M40{u_j819TM5pwYMj9-sl4!F=*T>MEUeA}2UhZ$p*sV&tALRU9DM7bSnU&< zvxa)xwI%&HZc`TW!Ho*;Tkxe3RmA;aTM_;%7TWBSw+y% z$CO=mQ4YCar(L%Ha)g+!Dk8k?HYtcl6OI-Zna<`_$*f_dHsup-Gj&BihTd8<%Iyb7 zm^QZl0Objz>13YlcLy_ff-gQ&G}8Ck;bYMndxBHv+|W8jhHbqX>}s*1>^9c(&G$u- zK6%a;nl1%y{ZuH=RdxFP;yQguf%O`$iyXKX7inQO`Fe!NU;xFfkO*fYp)}HTs1M(z z+1Ye@BUR_qJw9*lyOvOjAk(Zt{>o;(TDxjIL&*4yR?C!U;^{Ro;fs7;i;%P40OX`jTS9xsS z$1y}!XP)$rbcW!_vlrRn*G_;H^Zo+WP%_G{En^q<1|$v0V^-ANZBT)9AZ8rfWSFRh zp_#=P>g=LF?$=$1+tb6iwcVdhrl>?2CW~Y)T|1B?K?+4W8_Vkl@HRHww&o1-1D5O- z9vfhjB}N8nGGOwmPwvKU)s@j9 z04)2RI$Y9yg+81^94Wx-(^f=TtZF*z8g&jk_|PP_ZJ(abL!_@pZ9f?nWZ%|<@R}2V z-Rv9PB!{-cggX=;gKPX|W!9aU!om1*0w|i{P1Y|B+MZ@SE%ZoDUgqKgyFJ0&E_@6t z6G{1zE>BSO57r%STvo+m(sv5tnQC(~Ht1t=+#zt8vFW6n}w@aFn8kH3Vgx)e&}6m$oZ?OKAh_6%7#^Z=U$ zX|JWdkkE(e#zBhjAz?2Oeokv-vWxHxj*SzZHN^CSgBdP6rN=#(%#Srri#vt#@o;dA zCP<4eQc#>Hkfxy4H;gIrRg;YnOB4S%Ls$3AkPKl7Xzui7$E={l4*oDC9Ow<}wfXQ+ zGxS3PC?I$M6%*Jhe=t{51tG@_UE8!Ho0i~M z=pge70`*p7R`P6rZmr+KXap&Q2cgA{*fKCaT4}?g!zwkCJ(ZCt+@=m|=APzP91J7kZkK9?k+1Y)N%CyPaYB5&G`O~(Pm1aC=#XVg? zE{ljWdB*_B2@q+{3JXOnT;$gl%u9kw(jRn8UBAS04{s`f5P-HZDPW9>l$MTs7i+M*2bfp;S@aSd z(44T+TTC1FSL2f$+&^=~D*DpFJ2fY0T`Ki0 zv6=1n08qeo@W&o>mG?ITRhB;gil@Vk)2fkt>7APNG@#9Wn^)%sxnc=bca(Jc7}ZS+ zWGATDMKg=+9R}=HiF*?K)^g4XZVULp(p+Vz1{>B_Sz~__{4ffhOi;-YdDeuf;>Mn* zD}SCX$;8X76tO_@2}BU_@bENDYh~-vExM=d`H*^vN0lCj_l`GZtycA7a_;P8|8+5u z;Itu{K_HV+k>Eh1#0Jjv07Gr`2xsv+J%8n-PF;nRHEqs zUDp~pPg2LK0c*UFRO8YS7^2(nfn|u%$n*qU0 z4gfx(<5b-S6|(`2PExWaac7FprX)z}|C^MtrRYV1zQM0|_^2y*^(o4#bHLJY z9{Ls0D5q;cc@PCIgye5}KkKp|S z$A!#0{7cHsV%L0daMF1WD=>SCeWHp6k1tNQru;3^0n+smJ6F=f$K6)Ko?D zS>94LBl}bq7qm>{E)>rt@R_c!w&3-7iTUvyL4(&T!4-~|&bpln%AaIqiv9FeN8Vv7 z&%uCx6RL_?l)iadaES0!By#Uu)=Hnut$Zr+h?Q+!(c{dAl16DwBr=#=B_BPC1 zlExIcZ;pY2LUxNOrKQv1vlf&QPfZttRrSakd@);}-fKI8Q?TY6^Rcw*KFN|n1N3Q@ zeUG&MS;oE4JoqpwD*<{^24F$|;FEL~rH4V1F7>6{o7vkp-qxx@}AD&4H`rl$HO zixBafl{pCbIi*eV8m7)&T(mmsqn8N}X8?<&wFfyN2ph%){yMS<%fj#>y~y>d*58{= zE~ba`q}2g_z4@fO=$*CC3QvcvT2J>@;2zWEXJr9JL5UU6I)mnELc}sbh5~FzkW0oV zCn8ktOWq#B?*^DSeHL5Ln|p{J658<1V*8Md(!RmMU}a}9`{8JfHQ|X+b-Rb;lS+ZT zynW6qaSIa4juJgTXBN48X#9Y}hq#-QE7J4>o?rv+iHwjM4$!=8j&678NUfdpGB%UU zwjXf@?QEu9h|*xXSX<;N^QoT`T`Njl7pG;;7p1fbO1$a`YEpz$t&@C;b#SWkwz(l& z1C&6|GLui@hf3EYR5j5RpqV`KyO}mWpH7B@9x9?-cfZmN8nt&*>a-??Hl`?R>TsGd zm>di>(7FLx=w9+-fOb9UhvS7q*Jjs?HgI_1wW|5q7ql>F%dR&~b2NFEC3x@(lBr2u zqI`>;1<_X!eYHvXK}3Z3a=n;|wSxVtI~iGRbgiWMMd@4FY!ix7wy@nrXY>eb&)_a{ zXisVa*ctpq7vpgdx~0Wz!Ln7jz4Q)93*^%0o2E0Y)UN4NyPgoaMb37vgsIKVpQ&cy zgzIKhhf`ipJcl>9#nh(s0*JMeD3E+#vo}v(hSOt$6fBUSCm^8?haj=akfNu~Gr^r1 zPiCAVYb_g6+}OxAM_h`dp7W7%s{EV@Km1N!$wRUYwJjS(f}9fFCfapYV- z9L|QR0B3RGw^YZw_&{;Ii|h$AGAy3$h-q&`<7G(%lnGS7MOE|a9Lw*zT?Jrb%3U`_ zDhLNS@%ebL5WhFSKRTils2n=P!bxOKmnTtb){C7eN$|WGbKSc^qd9it!Kd@_E5!6LI)2gxbA0-`SDvas1mL%$;Ir_S!C zQ*bWPXxWJya}|YkTi_;9xcuDLeq&Kf4S z_olB>zIh~k^PDj6GBJq&A8ES&V1~)*+!KtME>`7Ruc<&fDw`!>xYlfqlbbDv;X8H+|p|)~&XO7ti z=+J%IX>E2#P}I^^V=zSLNa3u`7lYoMv^NR9nR!IPON-+tuw%&p=~V>`kWR%X03E=0 zx#VMO2M%7qQHKAEHhGpfmqD1q)bTyjqC!@ig)3?~dqs$+$%(Bd69zr#Ww-IlZW_BF zdJbAPI+E-hwxBx&f&_63+}0R!>57dvU~M;Cr-9ihHy)AnlsJhtn%rVE*^VMk{4~8a z5L`NKHMSqPZ8eDAp|ysMb>On0s0j$!4ts9g!?Y=Bw2}T8sjibL3!2yu`Vk!x^xU9U z$f+|N5xC(+Y|{cU;)khRaF+b|EO%S-5*e?#qmDxLwT&rRD65&urYz5K@iA%^dpRip zS~f!@mRMZhJ&I!KjG)uu+5G|Wd~kRwV@2p^yppI_KNxfvJTfO#6vcFsQxo`Hr?qv` z>(9EFhl|SnIz7P~m*@%ZuP6$AxrQEcz@BK1?lmxcWn_-JCY~<_{lyuIExJ=Wv66qV zbLRav5bEq|ArI%#qe##%7(zQE1!We=%S8u4G-AeUDlW{+8$?cO3{7*-Cr*djS6Wa&jaka&#&OT+3v_1+>G8fc%U2A9=N&`}7p!T; zdlPgJRoJwRKBKm{k5!Q=)=K?vNx?-9NQDlyj))0w^HAB!X-aCr(8NS7RS2{+gTB!R zYXkI)&vXq|#Nth^Vu{xIOR%WOPf#nV=MePgp#~5wWS~s_IL_}_^iSi!OVo(`XP@PG zkNK}{cOAY{jWWH}Z)Y^+o_)Eak3p2SQ27qj5}PtRMbc=zaC@e3S3{uJHN6>9Bf7o{ z@xf*_hI-$mCUk+YKnHG+jS0v1g=Wk&Tr`ed9@rCyb2Yc1IW*kBEs#z)2Z!&*`-sEU zSqTAT)KsKgLJ$YEAoakwrI3-@NH0nwJ=go+kp_6G&0_S*WhU}k{Qb*zMX$Xr^x9i@ z!vW0)XXyzBcLMY+Y&fuK!hwg#A3Nc|dof{bgnnqWH=lvJrr(;61j^k1XNOP3`+urV zYp6?_nn;gk5{71(MQgB8lu5pko3LK(sT`1QAbVOuXoQo@*c+)Zie59rc~dZQGg~_~ zDNQVA4y>;^(*>APqXTMvC|E4yYy3%N{p6&JVGi*a6kmZ$m;%v{fbL^{hMW!pTG+02 zJDWPUt8YcGe&Xj;8tQ(#mS(G;EYOBL+Jupp@D7|XTC0xhKNTgr$SExdrZJYtYC-4& z?klwd^a2RdLbVr=E?#aD!+p7h5Jl2S3~1x}ehd^ql41|pLmU5+2kR!^mXE(nYfPfG zZvV22nbYxNVg~!<9J5_b2?AK|U@j`&n${bgGux0KwGdIOKVi=ewRDuv&@IRbn4eFM zcF^P;2|T7x1}B3aiW%l<>SNh+<+;_4c7UC%ayi5&DNZu2N~&bfMK3nANgm^S!iiEF zca1wr1CE+!pml(=9!=QW8*Q$w(KA3Sz-ZI<^UqX;EtMAHb6!amH1`V#?d@m>;n`r$ zfs(`Qb8|Aqb#bb^aC9(mFO2vm`Hg-rOH$ znrhwwWs^VL!K4fwI!cI63`tIOb`)(ezb#;8r}s9&nE>sHKVZ;CI;qG>864*39y=Wy zs3kqlmS-{x9Uv)0L*>)5BMCOkTaChz68?qJws@!|0lEmSAObxi4)h8DT{bFXV~p+> zdN@|_>HURV>nBryUby91;zqfh&z9^zAqZy{r#lL#wZ=@hLz z48*$1Prt{Lrd!*t$z-n~X++v{CiaQ)5bf9trRx%mhtPM|znI zS)#=(H#|NjuP}{E|4oAtcp>)irTKN+YKsvP_Y}@p$)GD2B=QeU6CB)jA8-_2)&axq z_h$3u?slN@W;Txpbqdf>@TrQ47|!sies|F=aN^t}tr#D{3+b+l!kU)@gOJ9pyREyM zf@5N$$QnC*qUKvn2fCVmL4F93_1l)Cx~vhS^2?h`2!;dYjQgS(pI}j_??v zYrE}6gWB{Bzn1mO_V>RkMw8%%jkPtgi!<49G-DH?65CPjh+5vD)A`^g1EFeSF^_o` z2xZ$@a45FoBXsq^l$_bCSSnR^ki3$iIr%PHuzK#{%*7fAKjMI@?4AKv1rR*MKhy>f z@Ym8j(p!Nx?Eq~;8%EQDhpW_2a2_VFVvydK5aG-HyoK-@Q)LWw9Aug3&9z>h9tpLNfJEM9r5j&%WPRkWFye$ z30R@y4(3Y1<;q}6q-!W!g(G5R>$QR{1tw*}6dF-v{5Jd_pq0ENvG2&T_q<11$qCGP zBE~weXgFQcKsC9I1PfG)iBuj{HwCM9s+RRL-msJ+6F@c_Hbg3L_j&p4@$4?9c{yRsFBNk=t8_= zQR0Z8q|Lyq%80g1*~esNQ;9G_JOUf2tG?X1#~v$i&pLaf9WS}pT#u6y*xrXSUqd=D zdeU}?#H0oq>K%9=Y1@^_@i}O}F^pmAINR1M=;@2g+#y;A-t)lpzQ{fsJX(Eu8T^0% z4E1l(#3!nDhYT)9W&j%5f)v1Ib)7wfN=7rrnoDE{yrz`LStsPQ&K*$^S;rdfve)neqoN!96>=g zjQ&WpwUCh#Gw`i*4`D~|(fPq@NfyB`H{%XYI3BtKo`t9&PI;3(@=8oySKNCAC$R;! zSQRv>-Yo3uS5Q5bhSs^0ab+llB`8KCloA9PLMxR-_(m{sBDh|BJo?tK7dYDYV)*F?sokiS{|2;}?%1yIUI zT3diiERP^XkO%CSb?NN^kX$)I*81WJ6_(HXB%sUxECG|W&(^n_lf_vw)6}Bt^>3oM z1|>`q70kxnVU(xZ8@Au<2I>ShU1AWsu)%>y>z3*IdcFfL&?)fi1n3K7aTWX1;~!Fl zg5jIa{}uiVKsn*=?-rD=qm}>@YI;MgWe4QL&ZEP9kx}Qez%DE(cork{?i?P*hgfAE zK6iC*Fs3T2%op-h)5?6RM1)MWmHBeh^fawzX%K@Jbl;QXmG$29R`0EpO_#KG>tu5U zpM))kB-N+WpJ`uU7h>Cz)-#y}`Ry=h!r?Ud3^Xv5Sf-Ofs2X9ogx~jXK0~yIk3lP< zm`s{XnBQRX1&R&aL(zEXpbzg}+Y`{P$iD~B-%yNlEIs?=S{bBy-0FhpH4L52kUCdc z+@36U6rExqBNA)F}W8^$-dgMc5%N?j`} z%8V2;$H&aT^}jb)kc8FG6(k*)<;%IpmO=r`JU|DHPEQ(Pu#Ig~=B`smUCokp6FFe` z-e^xF7d>m9UgA@EHKD!?ISIHZ1S->HB3iSdk}}$~HM_zN#KKmV?y_RnrXY~1ps+Fi z$u1R?^ujZ06U%%T?Md)gFfsvVK2Y6|+vcz*oXh&R@|xo{8 zXaRF+g4|V~{yGwq`HHfDxt=wt0mW@Hog<>^kQN8DrXM1;d0;Du+~B0!Hjyr`u0hZR zG2ws-mh(bIp#?%g!yJ&1oFEea2T_`GeKA2RjR-3VI2F+u`j#McgaeWJyEz#(ZNc77 z{Hiz9HsUuaj&a6S3+7hXKu)_iotSm zw&k@fn{8WCN)s)iSCHHeE7dY3hW5jh3jI9e#yd(w6P$o)4nXJ*tioTQUCzX0^ zcow$#v65p*3SN^G@u(zae2+dAR65W}w}d4V;47la1=&Joap6jP8<&0G8nct=WHB{$ z!pjF4`Q`#V=x_(hm{6q|6*J#dltppJDdudby>Ni>fqr~qM`8B>uPWWF!Z_H59nlpP z9#r~DHIh~*Q2n^$YoRoQ0tVJJq-;W8@*Glw6*-H0*A&F3sSJGol?o%AT+8b(A`)fs z0JRjF*t$u#X!*VmL7aDlosBlps1>bX1t87F zlXH{nRaYTW>_(PaRRL#oSVfsj?`NL{<;qAB*o;?RpWobFcIEf&%y%uSz>FCriVp@* z>Fs+MEn`{*vo2A~aIvMOS;$7F)|5vHXLI!pae;`{f@E#d1ty{xBn|L$OZ2wKP8G(4 z$HBbsYl4w|>^d{T%}%_#Ul8Ju4|6Zc3q`9T zL5U!8XbW(NW+k&w47OZuTiX!4h}aTu6zNH55{l&pnO!lu zjP})s$Qk@*-qekyX}mZoH%C-`I6Rq14r}=PV1|176Gw5v=?4|~|c7o<~ z0h~Ovt@VsX>tL$plqlDdRr-J%CHLQ%$+6Qt*gP9E%eFC|W0ojY@L%m7bxy zDFsQqR0rs?V0v+pEV1tVyzIVFUijga8|1cZ9i_E#Ao0%!z1VOr%~TQ(-B!r4C2ta* zoK^%yD|4jY2Bv(prdSA-Yq5{(_K>hhrh87W2&$&UFfW0WkIf3G>dA+Q&JY`^Hmb`> zo-f_Bp?OlWL<|vdV#V|cJB;}X*pd>*b_CM4NJP~^P)A<44fHe!}3^c?=aMy!WPsKVYf zGHxL)`&pkcAr8x{9MFPhba`#jY=>Pua#`675+yg9mZ(ePxM_*-xk^OJ+Q0C}z=5{9 zbK1~O-^AQC zShH@17)V9tTIjaA2GQWKy#4d`QHxfRk)s*rSQ^|rc*KelwW8gr>n-AfCM9+!vgQ=v z{wIn+%ar}1t&CGo90#;O zCVG&GJP1%g+;YT5Ga9QJstuDQx3i;0q^r7AVJD1{SVE8Xt-&2K_TXWsaEI-wKW`6S zqO!StaITVoJw+Mt6uST)0F%jNUn`kjZCfOg;W#;N9Jiev9X4cQy_g&8Q+YfqGF23- ztC<^vbeE;FfIU^J8;Xfm-jo>(A{nSArh*GvDcf6lJt75P#tvd&R2k7c|+yQlR|-s`YI;SL0{kW z`Kb0&{XVf*H}|f$RedtOaY3a@#^iya{={^lR0~`M*a`;v66%WwLv-|w*4hV^W^7F; zbr=LAoKkxnvnq(!QVg53JkX~B3}@&h{ z0lFxuK)N}Oy}K$7ZyoczvT7E3H-U?m@n{1JRl&k$gGe^U)Byr0C-4i(s9bKROr%Z} zgm=GNJknkY=lKc}&^)x{V!BQEt&+f6gF@E8f_Je=hevw#A06T*yY#+B-4BrWKoci; zV2kc4?k~6Zi>U+iS5Rw9R~rwun~YgZ9S=@A&zGsU#$hn^)@YO=|GM?o?w+k5VcN_P zH54CDW|KL|6$6v2Sg ztRS5*F3V%MU_b;ab$cJ6?3?p*dy^qr2ddbj08c1!7GE-))>6tn;sb3%XxM6L4?;V- zJK=q;$NCy#S;)cibpuFWONQm(@i_oebQpQ$hDniE1rO-X&_W87Z&TJnSo7dC6JHt1 zbUm3mKaj(dK~XvxycDg(6JS%*=)QSwn>J|AAa@u^kebFXqCDL)730&3?hF&GwT|n0 z9S0H+@>K`$Sq{u#Er;O{Fl)9Hmbcr6`-C4*$hCcd)V2_QR@77O4piqM+|GzAZ+H{6 zSSJXDIUkRKV$bgF;`nh^5$6nYQ?&%J(pB{}Z@!>8-?s$1wvGh}-pUgUAD>Sr=%?1i zxN~4QD#SS@+iKjr4k?N@eS_umfH$R=S7(Ga&JVg^;s9BBin)TI{)m|;{Rg#Ku7b%) zx8dkl7DK>)VP6DCn=gxNhrlXIe~PloNQ6q;%dk^~IkXC%YwRj~mGxA3CXI=dcLM&KapPy=bg6Z6`H5mOSbI17TX%{iJQ9d`-?NL?&Yi)71 zd$)ILrY&8^dm(lrm1c4RhlDND3mg(sB&ZEQ^NBl?dHigej7OMPpEp^BT}01K5F2(3 z&7GEJ5teHUsv{|{*h2m2;(n`XX|%7+s-P~oi$K?{X(zf)=#@?Q_>;@t0X%sWDXZ_d zWI_yuvi^hki6At4amyeOdTe<8bJ=K51|={)zT`A=Z4P#T=ON#dk&ieQPUs`;o#^9g zRhL~5w;`PWNL6bCj{;3Ev`$=xd@=)RS5oa91rRveEMaD*%t==(OD5ly z>Q4)*>|l&MX~p)|I{@u=Hj(Zom#YpD^_)1#7}@X zWuxFL^H*u70&nQ|HQ0b^N8Gak9-!ze)j&#-_;Op4r)|=s^@}M0rEK(!QSj-Oiy-7y z#%>}EC>-0ASt!l+4C)!k5_c<1dkaGCXNnk zHfgO>;Y|6#G3J)AvO*5g%~e)tSp`n+n_viL#h@t>FxOw`yr92OcHHY!gE{0f%Y-A) zN8Oh)w1iY_ofqnCV{1JvK>g?#Y>fG2{7l;acf_FzIs^HdrpgDT7YsvUx`N!K4X<-~ zToR${?1ZwPP0+$*ReAa1M6KhI76-p>@1PB`uG0AaCYPZ)6!Me1KI#E{Cue-DpOWPt z#R&;?Z>pUO@rI>5a@#+N0ZK_hny<{#*s8Qcd+@N)28Q-1>Tbq!;=KdB62_6mZe_dA z;DlMVG8`Onma^$IZjp6$9H$_vYXX*4x>P87CLoLuk4L^VvWta)rXHv@nuP^4uHf3m zvm*OCI-j}}lz<$K9AAJou!Bn#5T5gHWu>au^9~A%dS^5Jrfd*isY$a$brd{m#iRd8 zuSb^V8l^xIG9({&)$pb?i$V3I4Lh)cG0jVZSEmH-DfJFHI>wgV~G7`|(h*UhWp7&|tD*S38k#VEL) z3Jq#t(V@^SL8v4Et)Vfr)<1#Ve+-J#bn+sZ%{g-fas7Jh^%(Q;&2 zW7<~I`v4ism8Z|h>>3;oIT_cL-_8ynkWQRCWVnEPJebl4!juo;$Rm53%tWoh-YYW$ zpOls^52o;;pSZc2rd(09!r_Z7AHux}t|%6x*sD6bOv7)&LNEk_1sa;78^J6d4(1E6 zD4cv4McvDq_-Cvte0W56bJ-EzjjKWNVC$HLFut6C|0CG_<$<63qmgnZVmCHL_hl?J zI-L+nQvz0m5h(UnG0ahSG-nfUHcV{C3X-FTHy(-O8T654#ta9=Wr+L@q%YjbCpLBQ z2$q`W$fPea6fmcjnxr-nXPCCAuf$ykN6IvbMT^+|bFd*Kt(rD)a~z_Z9HSYn+rGHE z`GmyM+Gb&zNx}rfZ!Rx;!}$ywk#^EOnZ_s6HT;k212|~~T@73psaNGO@ai|t@P%hW zH!^mBBIxy|g=@QN<}$+xOzeh89IGsmiFgkcJ}NWlml!F>X2kGOrTWs{lV$u(bTY3x z;MD8Rh$0d~^V^`EIPmOjjB)c_C6{r2LcDU7)X8d{9NgSU{#3pV9Suk^nGVf-Vp!e) zY=9k>-ZvwV#!STEv@v1kJxEaGR-u_cyfR#k=qB8L^9Xgwu4Bw|5HF&ovLFCnkTOKB zVo`6*oW@5_Fg4)mgV^>ns5P;H6qP>c#B3))P>oHxz8iufSlc9L^ zDai4zz3iE@XHN0Qo9?n5Z6S7yG2e68?HRwDWgmiGzKiaV1Z4c4QpWUSU?ftOX`Mj( z7>J0Ip$?B>N-I7YPJl7>g(g1`Do7-kbP6;hGVjodvDuivKwB@&wafvCP|rDz{}wp9 zAbMl6;R1ghPZpp62^4~244qp)9DKm}YS1EJzTm2am}ykRiIh566?|X&QzgyHU^aEf zO$VOKbj&>#93CM0qTSnU<@-K6 zscvThJ`brm*MpzxFdqAEN-_qNdQt?K$1Q2$+9yIZwZjB zXeAsEB1?%|a{;ggn`OezJY<3e1k^!UECh?9LiDCQ+fl$2#M;tu(OtKjh5vuE2oA{% zOCmUXSMgK|*tSK?Vk`rSK;_k&5oz=L#a~|Dk0_UrN9QXYJzKzW_ `LGhouy*dJ& zEw6mVmSYf^mHGe%<%Ll&^9PRO%}0@S4R^{RbQ=d{_`spUsKA8=3k;DQw-FPSx_7E# zYI>lA(z}V#VAF0NwI%qHh3&(@u_<1=l)56YJW)>NpNSv8MC}>{dcBm#5|KWLOxk$S z9gJmuif8DdBca2~GR85)hEGUge=%~^Wx-+dcWunPxU%Z;4{uM*XAPvZGT{mokwY==<@#oSxAkGLojFoXTn~?I`_K&{rm7ytKRz0*|ya zIG#dKUmeI7%D}a6G)cDoq*q+3W#8{SO&z%k%a8Zor0)=|JwZ9Rs&%0WrM*oyMVU1QI^7 zP^k@DGey(LG?>9DoaS;h2U5&%`?5+MqC2`}09v=%+ty0g#-2G8X`3@s&I`#TF%!eO z`c>`4vSSaN73obugjg8W{81$Z_6&N!*DKt)s`3D+e~WWivIc%-BlbFHAPN314dIm|ojZ?Gc@0*%8Y1?rGV_zZdh}nm$e!6_Qc)@*LS-qg(#)AQ~IzynvMx>=tY?Ev;(uuJu1hO;Kp4Ul6a&H^%1pd->l zNWA&Gu1*Pbvu<@Eeui7P$Ka)O*6@RQ0E$eXoi4a~U)d zib~Xw+md)Q3>|N(v@v7oUuDJx^4L_N2R8|yKC5G}<}Lh$+X}ui6v9UP@0e2}*11eo z3#lwNqZTx)a>ChYUD_ytkUzi(&X<-DjU`;RO^$6rLf?~c{BH8H2eUPpj6*0M&wk}W z6cI9iB{9_%Ymy7b1Cu7fDShfbcoDyEni|$FGI7pf4EPVNGjXN@yS%z-s&fD84yo&) zHgo(aifI~+Eg5?7t4wyroeZ86{)UJ9*gM?0yV%Y}nqo|rNHJQ&e-*AswASrkcEP-l z7Zb}7z~RX}r(ODVnJ%#c93nz?&$peHMwcwa?{#S3#Pt zffEIHRy$<%0RVy0Kmvg}IvHl)}{ zcOb&=v`sUe(_AyK)lZP$7)F}_8Ix}BEXFzDjJ?{SWK{iy4u49;gO#KAPg*A2Z^Ur` zwU80=BEo9314@|2gG|DWFmW!cnNQizxb=In{>u^<;hSFn(O9NHw3+R(Dm=wUIAyd& zqKRt9+&f&+IcTl*4Bkhu-p-<;D z2i75kQL~~cUT@WHI8x(&C*lUCry#oeY{^yj%c(f2F!(|W6D=unNPXrEdd6$;XcQe8 z{D{KGG4t7I=Z_8^rktkFPs@NA7a!BfrQ9OMOiHSxLtEE;Irp}<_llWcvVIH3l-2o^ z?tbgBPJy7hl5(49Xr2gw&4hW_@mmnGeLY#C`I(upw*Oyu-x4EPwq2Fje2fu4A)XuA zgSKS}2Dp?F`9~IXb=U3JQB~cQFRR{D%R;)2jEth}^338Pgb-K=140-}AWy)owf8yqp3nWouR5Djb!S#&-iZ4-XMfgSdu>BA zKg=?t^hC!Tb;|;7Gi*rTW!f5{Z*qXBMABql;NiV=V zVT`9-9KE}Cx!^vxMI;dbeGVJRT2gp|TqiQ%8T6 z<&b3baNB{|ZIJ6GQxzlEj{01DUhc5w-t*f|G9@ad=*zku)-4b;dD8L%0dIDEu5Hk# zBdG@@?@Hg{fyrtW1LI7`PI$?t{B*qdKBx;rYnm%SG@^GTFupQEoqbe|^pN;%@2=6U z4rC7Vr2x;_`BLHUtwH&zN0eAx!~-ustIn~|F9At_Rw_CLNp>Nx#8C-8IU-!Q8sv&0tn2OO2_YPN?X29@J0cRJmYY)bRynsC4Nc&g)8T-#6G4E4{&rdPs>$?6mC zQ0Iga1OxB@vH>VRv;OkN24d~wdmp6^FN*8JqWDWBbjw^K^&vOcY_*V_vbn_{kb zkvR0e5~ZjOfoN}30N4TUsLoy9hE*VZCr0*z{4#K_1P?zHYQl9k8p$w;kB?+*4~vT| zor~#S_fp+xx}Lig6zqVNyQCW!B<%4rSOL)|N8(AVdX6C&6Z>9bE|fu@7RtD$0WNNB zLl)+?BBFz)KcDyC1rYZBpigA!_%K~URIZD3MhTmKZ_dM^kath6=F=Id@T^BK(l;5~ z1(slSi=M^3aHYlx6ks=P4SkL_H~iFDIK8%N-|-4SJ03Yf*|Zq;DuH`Fan2g`|$O zhmsTH8+r%bO|`-gCc7Iyml?k}O6!2W{c*#iWA=LYZ>@7%GhJJACDdnjzOq2|N~k2c z9S{H#$a}rZGI>ApMQDm$4xn8;d5y7p%NrEA!3@mRIS;yAUJs-Fjau1jx4#v;%Rsci zsthfqnls}sUMZgwAj88IV5C&pdXQ^!SdYU+kI-=EsPw7 zho2+Sbf|5xih%rKUP)nI7Mkr3VWhdzpC6~U;?+?}B8P=_BnT{<2BoIw z)0S;~@xP~)%UPUWQ=A1GQ00!k`Qb&wlVZd z(wugeeDukeUq1f0mv$ZfU&jL4UsLT&3nN@x!Jk!%l<_nN(=us7&tXWCo^G#PxYFyXt zGZ=$$#`ET8)!{4vYh6)dl2gSc@Xqn3@kmZ(>#T}#ovi<`xwU+Ekh9*$N~Sfk^~ zukN0F2_9GiV4BJR_QQWv2!-uq=s@MR826<$J0*{b*_>A7)|TjE=v)vVc?I4S`ZFO? zD4ZieN!qSy(qc9b)1t4?Z(L+6rwb-}pfvZo$Jc`^$bieFk8H#_8W=(e_4I78(7oV0 z8aDt%6hp|mOKLQzn#1fKk3&!@lUVcneNC+o%1Qo`XR8dB*_W$7N++kY>7_i+Ch}2$B?ovVE*$QWi^@HTtOcp;MjAgL+IvqSc;gtieMrf#AJesU*cSx*i zkj?J~OQSZPYpvsBE2p6@bFlzyU0^o}kWTCv7=Jrjw$Nq|_JX&1cE!cd$!`e7Qy|{b zhyuJ7EuDN&5c3#LNJzPTB_ilJ#R!ZYH>e}Q$iO!sl%j%pB)-dwcJK=Wk@$*2u`O#? znOh~yK*DGE!(o7dGmIJoyk}F|g-}lPZ?bjBWuHvce-_AtCOt&da4J5c#Jks^AMx}8 zc)Eg4XO7sammAk)>^P%x40r`6+0cV|VDUiRSg=5Fm#a1q;}{T8O1J>`!vUMdLJY($LmWbU@r3YJHt0L z-L5_1;^91i96D2F&u=*m#ZI}m?@@_@IlW~Jbe zMZj*8MVj_!Gh~{QE!=azl*WIa@ejCqVda>sz%B98BP(MLc8U4ss&l$d(OOY}41 zG^+siOp;6UC}4x%BBx;y9Cy#egRYU@AwB)XIu42Lxh)s-SE+p!TNF1>psB!7z!$>< z@sZG?qDBH;W$lVAk_`EgD~4|hYKps)m*l1FY%Gi1SSHl$W3!e(Rz0XT$L zNzCQ~3&CPzu)F(aJR@l3SKxr8oP!BsISx@LUogE;)q}~xDFA;^-~gc@X7V8v%zv8= zDmTN?48D(}YvGVuTU|sZ7kFDH~w!ux#Zp5!gSLURQ-^_j!SX*SqTv~`a^ZgicG8-h~HXYXv%8E+$ zO{vZ~M+=Ox5Dd2KVWp-*jLnL*ajN~M1ONf%@lFAV_Z&+|WlUp+$R@Vp4AC!`CR5+z)$G^@ZjRwZ6hueJ?Fex3m=`G&sh0 zbq=nw`|%Xzp9RHs925`+4IaMOGKPBM=gajE#qH0M(5+l*6fwM{Q8N62ND2PbZpNzA zvdAHiCgA6xigg9iPFez1_oW+7gv*Vq^)&j3(!(hSL`Df5y$Pv>Z7Z}Em}w*q=-L#5 zSiBbe>dW7ha57iN*p1U-g}x~D79!VXnI=_t*9Wklr@E`moQHT5PZeiWJfY^xQOrF z73nXc#_lJ^=Qs7jl{!ZVCc}TgV}-2#isZO2m{;Q%nrcB#;yh zUzTp?%d({&xGvdyMy$(>OA#`k?1jUkzT%=xSQ@p99NsRE0sN11OU7=#j1XJ?uuK*N z5yi5@uHfu;oi}9K*@GqigdTHiFI(7xQw4Rnj+Y@0gZ?QTKu;K`$9wkzkA%wWWSW@bw19Q^!{P8HAJj=Bf z*iIW-i%(D!Yhf+yT`q?k+urB(aK=g!0`AM|fTO*V675JCm5S#iIfbo_6{!0vxg(y~ zzVs{sO_7or@pJLbejMJ8o?C)Sir$CXm2y%wfATU>KA+wj`#-QzQZQXbpY4hM=s~2 zX}=J>0weSq?XU=&fcA6wt+`Fs>>+F)O>sN>-K0&XwmcN<2IH$KkIrPK*h(HOt<4>{ zF^C(0O2iUuX}@>=Y>e0e`ez9uI&A_#;Tb*^1!H27B{#LXwr^>R2H39((aPF8=*^ks zz$%2OX}1(LMfm`k-1<+y4ry#EUArXwmhhc}+1HE`KQXLJt)?8UFwwAK5C{1|=}7_w zrqQcuPlP~t$Cp;5=R`$%&}xpH`VxM~W}AVHaR(XfTcX2kDntn{L4x(bC%UyWb}CEl ztX35n#zf0k;hr{lf<3SB$J-t5SC8UyQ=8#_9>d^hJRDEKpcQ0r(jRqBo(w4f4OEBS z-!d>$V2Yw>1I-UpIs0_fUw(!?{ds?OF6vA{_F2O>OAt{p$#qnNneDWB;+?LniNvU9 z%fh&z3U|(368?oom2TM*@9+H6G@?r(r*k&TK+k=XK@u;|)ZJf(To8tae+Td2y$9?N zUwQHJJVgbl_Yx(-?|K{DRu@eUftvX)CP@QI#aY-YeeqcWl!z9iuil_L$h}{FK3ezO z{E!%*ywi}lJx;hTUjrtk3F=hr5SUKIHt&>aWtq7Z-^{uNR#U8#wQi`?CgoVgdj;%A z$4hP4v5Izr5e3o8OX_)Ns{4V3Rs6b3*`3Z3X53r?tJ+UZ^%)EeHP^+t(t@*u3;M=m z8D5af3MZFRr!I(FM3Rn~G-W9HqlOEO2lN5mxEY^~4<8nEllQiXkm2e?V@%t_IB#hG zDnTB>C*bn?^8rk}svfj9P4)GF_4W1JTP{=~RYuh5+SB} z8>TCg21CS3DP)E=;FhH6bhbe0 ztOqodIpI|}#WChuemkJhB^?Mu=m=$yQ_|r*Mu=e3;zPbWBA1vSrUHlfKS~KqHyq^I zU=}%35><)8cowpKN(em|1?ZEIz1Ud_7A}r0(VZJhGrW^nS|?7PFkL^Bbe%)(i%}(z zkwdo?TC13@^Ljv^c>$L2`f1a;J(Nu?t6=)%TJ*T-7&(w@lYjBd*%A*(Kfcppylz5g zI8yov)+M@~k^neeIw2z1OR}z5wI>>yB$%f3s;GpxS;g@CHW>ANjwBMBindY=(#aZ# z?08uNU-0ro6?0_<$Kl(y4w2}R!lm!~U<0M((g_+YGAOf=drb1sGbJNw>C-6yBON`x zrrr%^F!Vmf3c#&tsGSMYaZB|Ju=1Ixky&1Wx+?}J*=YaQi&7u zv}=i#eqI(khgb?f;9WX+mc>)u3M2DZAHK3;Gc_Q}fdx^n`;%<45{^P0YIrS}hjDJ- zVL6e*zc3+62$EpZ;`DB^B!$p!UEuMaI(0#DXjf`*vD-{+QX82)EC;vWV_g8({qTAR z!+V8_h61K>@^w6!PPCk1oDldN&F3fs5Q_s42qCb|a2L3e^oIi6CZ2)eCaAkfCzk@j3 z%DjA?6G{3n6Wa!C_e(5=qXrpG^?hn8PTyd3I+$s5$ghH2_lC5O*UKA6#p}^2S7_ve z@Ioy+1pytSCSie}SaugTxe#Zn7(bk(*C{6HMW5>Xui7F?$%mpU$!m#RLKui_Kr>n` z46$9eMHE5-cb8_g*_|hE%sf5!VflA)U8Iu)2Ehyr?-manaSgjom3?Mtjc7L|$;^X* z?CPv!%oQS{SOVZ=T&WdBR1D#m(J0)8k2*)@G7q(>IE7&B?-Q6DlWz{JL>Q8y`A>rHd-HYJkhZ2QgID8g?bgl`P=;49hFKysS*aoYi11Ty6)7C zD4X17$dG9Y7>j#m`!o%!%31`=&W%dowu?DHuvf47|S4U)ZPWbc$U82g*bBqwY+ zLBOlcq^qw1GLhNAs7t_AG=A9N=f2`^>{FLhhd>gWu1a0(=DG85o!q&^*`JttriyZr zzRuQqk3*_STqzYWNe*5n=cSj}Dp+L)&!G%m3q?n2!O#G+1wiFOq0G9gXa!$9Gq|Ub zg{o175)g@Toipe*^3eI<+t3`Y!O5-$h<7_528)Ja8>N`NFpu^JWGaFshEENPNYH{U_Sjy@lyOG92jqew=hZ54B957~8ex zZE2H$HIdrf1T63(w8}Y)g436|MvJDIGRzd38B9HP*qXk>TUEa;;@Kdikaz+<(YZY^ zolv%>)!}3pC?i~;&TL}(rf1E!GxFfvQD}cLi$jpUUVU_j6bT1Mvnrejd5ip{wJHr~ znofc68CsLbI5XppAy|TP3{Oh>t_!(a$6U79Ui=`Y_KrziJ?_z#Kh;!GbzJUrixsGb zxo7rk^Zn|niQap-uH($~e?g^5$xbZ=NyashVvMMK$l zRk?4o!c|ejScWqouUfw84W5sW_Bp=-Tc<)i4OJ^twOs*5gfN2c;7iUEgYX83u*2`)4JU(PZ!9u;&&M5^d__t!{}|@4S{YZ? z%9NI|7)vsq5Z=Ua0+f{rG*%8Lh%nOamy!*S$}Y>Irs;6sw*d~>_lV(;ooWHSwTeHm zt5yS1TnA87Fi_x+zcUyy`p+X(+T9(eG63A-aM05U?Y?R+^tg6 zx^95_%3?4xs%WCtt^TJ9AhFG2;BipHHVpDHXh&hJo59gS4I^soCqpAt#Z--o9 z*Hs`lOgj9q6))5#EsQPf-t`@+8=mITC>*G1-KH-kBd_>8D{8)B!|$_pErL;I*#S7s zcI@kZHj>gmm^I~I==k+KTihYU0x*)j)|uNN2^^q}4a?Q^XT!${7N@^Kj2L1E@)D46 zFvPM8l#&Q(>BJ>>V9KhjjWs*pn)VJrpGp!Rmc^=-;`K@v!brV9pm=Uasznd3oi%Z5 z0Zsd|xbo0i;JOc47aU$rbJeYEpBV&NC4@gK8NA_^u6e3$3|~_@qqv07s6+u#JENh) z46TL%L__Of4{>cfy!s{xF1UJ+o%B!(pO(V~SWqDZ=R{9F&X%9|7hm;mpJ$z8ZC`*D z7o+jbsj?sg;1^1cT; zJMId!bCf~lbX33?XC-3Jp8oe1Ebc21__dg*!sDi$8W^$Y>1cJRP8w^Xf=L#JSO z(oHp3S)*{lFxagnx;de91T^(AdE$hElt9N~Y>Wyy(c}+a z6wM`P>32b?&z45HUZAxen$B$UX<(82{5&fY(>4gNv&g?W@s$yKDB>%F<{w-`jX)$u zFjruOkE50s#?Z$$9G{ZB5$m6&bbDesxEH^p@wrLCld(OvLFQefQA z^b+HVtxR7Y9F&E+gQQ@chPJ1rBVH3zsHc*2_FZlSo~7FuxQ3zquxlo zIT?FY5kV*tZ2z(dR-%_0DgNkL$E($5qxig43%ONJWviiQ}y z!&`)Nw>d=Z2K1}V0z{jIX$FZFh{fTnyM(P-wNQcMS0(@2ruR&=;uTQhyd_g><7~Lv z>H6pn_dk|>?v)+beuB0qq(OizAC9I@Asbhv&-B0=8A?<0PI}V61Hn2&Yo?QoLmzJj zvLCcZIOndv7&bR1C>-cKv04RL)LL5G1z4!`e>XtP@;4Tby!#pPHScdJjW{zMyHg) zeT?8-MquVYMn-ZyV_HISvZvg!FP}+BWX;g32DeQPSZ|0~yDPX%ZVu=^u7_hiAY%tA z+WpXm`5q?j+tOC08v%+WFir0;y=j>peDv*4K1mNA?IT`7Qr5|)3)#qk*DmLM5k1b9 zp6u15gDzvM$ot@h)_{F=UrQLU6wTF#MLTg32{nBv94?eSU2p3F>Mk2mB1_#L7*v@T zkBZNV_dm9hb|(2A>Ox*+`%LbnTwuvNo4P39dO%kSz+I-nKvVxR>)x27?KSe*O zp&TVyj5+U)PtF1Ev8$L(Zr6Hu%OTv?qj8KP^0IgdSZeF^+s9bVJEOC|q*HlN-s-=4 zid;|i@gY&12vCBe`}ZM^%sbXrvdzqf3(vGS4~@W#GXqsf95Vf#JZ#lN$a&a!Bg4qZ zwK3`nMv-bIEK5@=TIirl^CVU;l4@T((~i4?e&#L4bQt+e<5Qbls4?6zlLYeo{W-`a zCZ#j{6&9!m+LvrQrrzV!u!0rIe3}2@&%H#yC51l3(5zv?qr#KTt`~-Kj*=(oB43p9 zXE4L2{}tvDaJn0Wg3TaC$dF4o)9J`hbz>pywcG*&q&~&32LS{^+5%YGrE^?~ZDkQs zkG<@;Npn4go80A@H@Uh(lr%{^w;nna>*fZ2Ni9IV%sAwNb7a9j>%DEyFP>yTH_|`= z*bCMVgm_I6*Mo>2PNW}B>;Quq<_a++>w&Gchd<9Jv4KtI*N}04zTSo&)oq)+8J}x7 zCMV>0JwHx&z;MBHhOh5ND3-9kjbW3!nh@(y(%0QXtN1v7>J^hJYocO}Lr+}F%OPBf z5ZVTH{_A84#d86}Y?33fq?13CwG!kegbye0;#7T-$Q_zW^{Rp-C+Q6R*Upp~G5qih z=isL+X=}p{pF>N?G63$_wJ_fPKe97@(oz2%dbKk@ku_SAdroz8IqV6S2)S5n%e{0+oTv=M7^JuO>e z4*MQ;4Q1bN!|m6X$6XA%F|}>=ly^0}!|O`#z!mY2%hV+byq|tHgqE#Vf5!U^I=tv4 zwCe66kj(@0WvCKG_+$13e)Ix+uI$;_@PCCGZGGlKNCvsfMit>TxzhDmQJ_Q-J%K!A zY!AQ};M&SH|L7UDGJQ6ETX%Q0!C;&2Vl@U^8+RjXa@AXWITBk)*CbT^24`y{S}V^+ zB_^+ID3XD#iu!7xp4ZwK|Jbr!`~4+DXg~`G0)(D8GrYs0w+@^B!MR_U?B-Vf=j zKnGmYC-MiF4w(qS{Ym+T-;k7P8l(^!(wsPX&;u$!+kF{2d&o=y6~nDwbl#dv=o18; zi8nXk6+8U^r?U(yIag4ats)8ecZrI!0W7q}gK1yRMXo3g8`Afq(K`Th_v7*TY1(^{ ze%gDk)h;i1aKmrNs!V}x3~nIIqBC{uB3rjhr<+=A1hJxyMc;Q`(jn<&ZO<|IHbYAh zDGwaFhD7}~AzaOARxe<<OGe!{_C^Sc3sW^KS= zmI7gL_5{2i;JZEub7Nyv415V#cZWP2;5H|C5oqpnA>f7|%sG!~JXEnJSU)CStD9R_ z<>^tkgEto;C=^eE;St_1+QAX4T6b<^9hIVV6SJPni|x12??d2DarER6mg$Y9sI2;_ zptHZsC(VG`R{><4kmqj)J8Z%Gu~C`88XlH<*Qk@3kI{b-yC0s2kJw#`8xHF`>d$7Q zcT&m^A`yW9G(k?=?IBE#$LQh5r9{5M*muvx>+qx_FvraNTeuNsD=CL=XoKxY)M14Z3JY&|x_ z_zcc=f-FE4grYkX@7ZbPp6%G%`5u`zm1ZtU&VXxj1B1FRTM3KQ=K)x(6^?0pSZlpE zknVrmA%+uLZM;EMhL6Uw!OG(?I@Q%^tiwZLtsD7Ioah-F9vo6pib-%_{in+ec{)l@ z_$U3WT+*jz=CPR0&O!QdAM~toe}baUynal0>$;PU2|wVNa7>p#ih(2R#)Kz;dxHs{ zIJA0(XS5Gm?L3DATEFi+4(NKH)j3c+!J4iKtO>Ou(${a_rnBLEF`d9w0X(_i4?+q& zBNTz}AhE1-TA!nK0f-2`n6s;HP}7h;BR3F&X+m4m*-VhBSM3wNncvm;s z_FO@O zaDT2ou@l>*h>UjzQvr3zOng5CHzrGNIycKS%e-8SX?&lcFgqGz#jDZrJ~LFRe>(PV zQ<3i|08^vi5M)CYW6kqe&rm;366P}B9Ox45m)gAT5SxkW3Zk9 z=n;eqB8sffTy4E{ZMHj;VB0|1y^vUe9pltg!=A^K-y5Gze|}R=HQ48Jx$4iL2G)B> zoYO%<#Sq~FlM2YY7KOStIk{4m~=aq@_i(p0g?&4WyaK*^_to4L5 z`-I7RZ&}UnwCO1V{Ln;wkUQ=rTCRr85rXIeRXAl;MNejnC~6b-Sel}`vINniu%VuH zKZ7wNXS(6&ComrHEll%{QiQk}16~BbFzc`>{0kKOm7q&7zdrZQGmpDUB%ZDuI-ORB zPN1)RETNN(=qd4_(d{`OAfhgkV66!9{!sEN55O$A=yv+cuZpG@e|2AZPz9pm-Mmii zxFxUBD}V|_3QHzTq3;uCR%%DKxLWIDC!ZDIBjqFu)=mppcaz1$N*}|CAGA-(J2&kY zA^pysN<0&qRxA|AasXXS+9G1J-C|p$LI?m z&$1B*04@8=Y_ZG+3&zPIKgccdNtp!Q`(DjWLSNbB8V186^k7M!^nJZBOT{n^Fi3fK zG53V`V4X_N%WuQb%jFU1D$dbJ7Xf#8`ea79OUw#AfHyPFJf-4Q8IfTop|81{55RZU z=sa5Fa6hPDnVA-eaOd0z{s9%=K$T7h%lRl7+tQnV;{xkn6XLUsA~b^%`3#@*lOxJo zuSr=@+B^_A6JKc1riFm;J|BQn5GxuyY*e{x;bZl((H=9k$ceY_PHpe#)fCow-hU&Vu0fqD zhdZ$2%Hf&F``@z~&eD5nOlyq{a^o@$hQKtmFtHJ7e>F>iFd*l!_#f`Z5|-LSyBfo$Q2mMi6EX~*pvgV0O-Ec{0 z3Oe+FgV-35xMJP8U)c%SGShCLD&}OpN?@_zOdRMd`q9-E&l`C3SXRiYeR zh?&3vnB>IYv9^Syi(Jyrt}=GTRy3`ohlYqn6k)piY#6tv&x0tcY(Pyb}seu*LrTe$^F z7M<$lysrZn5Yxi837AzGusNMf>zRSNyuGQv(#zYmZ?RS0jt(n%kmr6x?y~~hm+g1= zw4oDF^|bLLuyUn`*m>F?r!Svta#w7AyhB?+=6G;9e4S0y3A?*$Rqa{|<{^@}xK*-> z-51U}%){_C!le3`&?6m%i*hGHKMa0D*pFx68~1Xcw*r4`d5hL`s7=*U7ZjmeK*S41 zvz9Ny+lvw2=%GP^h|H^2qRYV{91!gRQ9e6|xDpOALzD+1teMcdCIsJVvUw9YRS+r@ zcFcGpQPFc~6@r;oNx}UKdr`2`GT6`iv-9aF8$|W28qgY0D+?b^7R=LR#+>sA z<-~abY3~`nCpeoUB#F#kLHYOMn>C@0j#yy|K=Q$JK$B8xhH-KZ zXMI$;w#8&k61S1h-LXYyy@eg+U<<${yRN!6))a*ug97*Ts zz0+Wc%s@Mjo{`}<>V$>-8Oa*Z8egE>N0QFNWHcQ2z(D+xP?aW)T87(fS95uovR;L% zTt4i0z1U?1kl0gv3D&K@F4oQf;T!E;E0_2!Y4EFs*BG!!(BkUx0#q>C9sm_wa1P~^ z2{EWqidxQ`6E2%KC~a|GUPEb#c8yiuB=Lda5^`t~Rw6r=1rA>DRXV%}x(dUAcl+*$ zEkDR&pQid3%s=jI^UN)m$AwHqS59uG>lju`KyBe~-9V*Hl)8GWzN?a2+`_w*XD95~ z4=KOf=g0*^Dw5?EScZ}vOF8`~6RzK&(Bdd_bPffv>!UoN;Edrr}*!D77*Rai%W0BeFi3vWd+FmZT~X6?{u-y@5dR+&A>h7B#@e3Pm`eI zCvqrd(u#O@K9c+<&`Skdu_jnFojR+QUXQ9vujnpAbcN9=C$51kP%;Ntae6Nhwiw|@ zA8pY>xZ9Z6^}B9WL>yPY9KN*^?m`)L+19Ibj7r*m0a&oJMz)!24o+EaLNSe@x|zDI z0tuT&^5{!4(Fdi9az!n~vJBcK^f;HOH_&?3U*Hap_G_>>%7hQ_Nvs#xU?5tDCsy7G zjSxo3{OJX2$QeF2dE7ls2Z*@kykhF81akXckZvCz>IL}0;41@F6+r-L2$t0YE;g=v>KX}Y+;4Zs^mw%aR`Bl{8MA(5Y6kM)drn^8BsY(Q^j*BV?V zZYaLP_ZZ@eZ@zej{t&)^tQu<*BpD7A`0nHlENgl(T%ak>8Ql{%G_}Nr-AsR_vI^*s zVZI-&>sDZ6U7@E5@hh+gZeH!EE3IQCP?T0=+BGu;RA7Z=wg45ZB_v=&xabx-=H0jC z@o0;@+K>;oC_FvP!?0%x$IR?4T#N1G*MO{xwnQU)Vs6ZRd^KG5vr*^N?h15zvPG07 zO&G{D1Z%*E$>ESO_8AJSZgEuV-?Ay{F}^BU?GO5U(r4g^kT>Qwd<* z`gxeX8_aM=tvDD24s5!Tfq+F-Iu9ffOJESNHmx|-;9;?PjJd?pYr+OC1W_E@wEAHe zxTHn$iCSqk_z}G)_%-|`or9Llb#9)9B!5Wi$=d<+%~6C^!&M%?9cc~#T0xwJmdPMaRe4*u2y7BZ zJ{Io#g&}BLFV#FI@yWhs-{Bh*0V!GlMWzf{Nan+|1q#t>L#-=Q-?Z6E+?7U!mX3lv zpDWKpvbGD2Q@dK1@29DOH+P9Kuav0?6T-!q<7XWVa0wGThF?MhU*N7~yp?By>3)vy z_`A=}v3t1>PE&ZN#ocI0x}qWm7PwmXcDM;@6SN+VQ>qLbYf%CN5gdu=vg{7QBO^te zFocF5FK@R3cRu#|ejMB8glsD2I{6&}Wi`5_$4JT6N^?6MR^eG%WF-iMVsu4IV8db4 zi#d`q{nU8Kv?!Azp47q9=wC9ErC+@KT+mS&56DOg3v_Zng5No58+sh2UUA?4Efp7xlRW zXw|E@25Us>|G8ml2|-J_=Qcnu4EIiCB`Gg*m;}_t&&k-Iph;l%( zBX1-#qi*zc@c&_dT-{D~fM4)st{!#I5IbXGA(oN(QWC=SyP|%3PN6OQO@JiO`Xf45 zEFdTYIZ)yW*?Q>itnj*>5dW~g zN*`fC)QaMXKrOSAP}?U|xZUNYt^{yMW4{|8aV5Fqn1duFuAGOSqzMYs>{}<#x!%0M zqd5UkK*CK6Uh6vQeMCQ_{s0x0CHsxpc~ah7oX<$SkP;G#!96O`A^;!feL~R!X_qdz zpPS7g5V@xR@*0JF!dD*ok!Wj z%qwZ^Ks%A&!*~58FtWXQm2jBJ;Bmh+|4l65;_v9l$%XnYm0R6N*kjp zmQuHa4}!}4AXUPOU;2D#B$n>s98)l|`H=Gi<*ZTh5Yb#+$ngW)DRhgp!Q58Imex>x zPt2rI4LDYzB~uQlh785@#UaVFCdYW!jkZ3n1t^E+aV{a`jCc9^YV64u$ozAL1O8!A*8GoWpXQ4+`BDyA<2|SsEq+elm^9sj!Oj4T3x) zjupi0eqVr3YoYOT;Y4D50w-9k=$7h~q*OS#nC_8#t=b@Y<+r zI;Z=M@WT1yaG$$}%`vh=RBgq<04cWHaB%Y(WoKBCvKubQUtDO?-@S2u9K$Y%APjQJ zPiP5Ru4;L*w^EUZNkh5IM(644yZ-zNpshqWb(E1P>%kFCq!C_BJgFJ>f}jiS=Pu)d zOuTEw$au86xm<^!a)=x+Geb_Vc$&^uctAV18p-S)NW7^z3wl^&nmp5w9xAY;!~_8- zffPeJ@jXRIiG)S_B99nmN^K@GA7zSo&D|1ZNpnex$fKfWcYiz81@2n)LRY-Xy*W>9 zGBly;$;a99^Zw$i-YvHCv1xoZzmHGvI8;MTq01DiqaYvr4w*ouH(ckk05*DsdL!<3 zNj$VEKwxa0qw(a4Ho1}MCLj$e6Xf;VIZO=@SX*=IDtK9QdN#X5=LAXrId0)%ef}Eo z!mb#G`I5&4A;ml$87ul~3F&YPTS%jv;xeE;y z=*osA($0yfC(&v^MQw5gMRel-D0_?gBu*rlG72&UEP^Ehfetl8#0)MDKonqOI9CDY z*CQG{>hX`|iVRUjvq<>ZtW$?~Ps!&|DBa(;wNNvPt6 za^6VksS2ixJ%-?(n@73ijPyxicL?Zw2bZqSxV}?HM3ef55RU-yI&AdM$VQZD(Jw&` zlE0*%3-uf^O1lT_#NF7Zb;eu0wN4C&Qu7?+##9@XKX6nG$WxOrtSR`gYa)Jr3gS2D zSgxr)v^vEJS5vP+WIEzpVt$iRFZ1!dkE?U^i-s$w76NElE`mvf2gi1Bv}&Ml1OFnC!kYkDd3$OYFbF$n zLtnl0?`YZ1jkdTntilGOnqsklw4Zj`HP1s?5B7v~ZDj;L7KS#ii4kyg=eK=(#kU-# z@A4WXE@NXKO@N2wx>qxs@SwP1Je2he{%x+aQ2D|on|<7WD{h$KiqRj(8KZJkOhLBe zVEj*5H+WfHUIBsmsyQNVd5BKs*N4js8WWDq)b%QoR;x8q*GJd1BN2Az`u6Sm=x`AP zt4YU8`$njeL;?R9)vW^Nv`3;f(gVWH{hyesd9m9~0Uv9ciw&O{ZOGvBO6-ACglEVX z>S18fL6FccN!_vNsW`P1bp6nP`b8TXMo3X!pP;~jzYaM}NdYdb;<)uDuijpj)M$7@Eru})gycrKMGe|WBnExp^vebySEDlYzkl^-5I%cM_TTGN&(-SuZQAIh|1#SvQESxR0(tL{1bmG5?L^;oo zY}p@S4WCVfL~UK8`;b(O%G!!K4J`fx=qxX@@i3p01@r21wslMv%8}gMz4K?cNW66a zADUsV?25GREk>ka$ivnljVKfp3f!?ka!_&@jliUKimk_D?AueRnj!LUQXcfdYoNEqSTt5 zjR~7iv=itVS!npTG{SKn@>odO{rNIg#MpI1UV_^R=XsQw+~Rx>kiV0ITe|tT_{>&> zpw`||wUHDMu_U3K|ec6Qszfw*R@XdQEYf?$cHdbFqxK}?&Q$pj*z zfpijE03sUrCU5@lF}lP}lfPc`e{7iCn^0PawYR>=t7J@>2B}}G-Kr_Y?DV{v@{4#A zxP0p<0^B&?X|fgZ`LxS==nkVkFmeydA`36zE)h@&V8x_dEU!7uP`6%II%h+EGbTH|8cLh{8)U3eb6ORLe7Er&BbEEQw0gWKgV<}?UyX7fn~)&ypO!x(O(v< z+w#21im5^hLK-&@r>V_?F6RENMu6`f7B3|8nQW;igWDBE!H` z0tNRCZdB1Md4pz2lfdTTmNY=Q9I35`TUZf%&yvXQmGJLK*h)n7N)&Hn7;nQO5n_V3 znw*lg(5TskwS}T+SzOjK;BuKj^~RD+py@!VEYb&yEdw_1-DE%=3DDX{BQL4y9Bpt~ zYt{I<@i##Bk`GN&CWeHeW1(fMNU)qCd-G z0oIyFpVQ0R;a~|_23)#d04#yG5Of-#oAqE~1s4Y8#?fFhSZ<8eEt`ljp0{-1DOMnA zIL2aP%g6PAlUYozI^+g!<*;(|W3z_$!XB2XSroYYTH`d=TEis0uNIq6WuG^fw;Oy0I=VhN=s=qdzpHH*Jbn-$rLQkARpatoz)Y`>qG$l=2kn?dTy@ljq ziD;HrcMEq8Y`yy$r#BIGH7{C&&tkP2Dl6zPnxex9FIbxCiOARjNX1hR2}I_Z`lho>+Yxiv{c)b+xBMM2|*5Yyj6k zW`p6cYI56cOOIW|{? z=Vdgmd24OY(rUK?>!%b(3*g5{#LUdxgH3zQN&*4DjO=ws3aAqY+V5zle*eh->V~im82sr zP!_f3tHeQEh1Om9i`wP7xmuV;QZtX9qQ`756_(T)2Ucs%90Kd``}C{Ik-<9$1PLk8 zp~wK7jW{(wI^S9hL2j|DR`*@)r9IX(S#FkH50rTQHoD*n{!2jh#ABYJ!Q?(0zR~#9 zePRM*xba}Y+O03t9dIw2(WK_1ujkWoy1YR-{tc?}q)6f~ff8?q>14T>OS>BE5MAf5I_HywvK@^s5{SCFZ41EZXnB*{Dm>=|QEr zu6AUx?=xqvQ>0Bvf7VL+zvMstv%~psJ zGwY2@h$bOVGEAt9=Y`0|EAYNS`JTVigqRzgo)H-dZepb#PrX#bby@p$VTRANwZ*Iw z)3*Ec_F}zO_{XSO8JQ7~xlO*i=4M$FO! zh={e9sj9iXl5FosbZQnb{{T$dCy60U!7b#&pT8}HRq)~lpqOdp8DSN$2sA?Tn<%X@VD zGg^AAbq6QJO&L0nNZem}K!iI^)#&s(8;#O_r-#WrZa#=fR3?bVFpMwz{l#2<8!`0d zyXBCwxCs6Ptej5yGYXb+nDnAQ9+JfZi4Ju3)F4(}gV6uU59q8;e<= zi_)77goyA^8@7u_Q_XOR9vdRBCb9~{CFy4r+}fr+HZUu}Hw4LXVpbOJwIzZAvET(! z+@9@B#90yiS$rdSOM5*I9`%rl$3S0@dggIA0G=8|B2qWX45rOnf83L=B2=V9{jk0GRj)d$OM>Tk!Y zkc8Jlm?Z+3%ssFc+P~v2g0@l3HI(+LF1UIpu-eoq2#}cJq2Yt>99^S)A@m{5$9fQ) z1X3geTr5ot;L*iKOanJYZ|X3qOcA076jx|(&)-+)3&JMcC-$IK_n?vu6enF{JFtH5 z(c;~B*}ufsrnfy+`2YWPAmo@N7?h zkK-@*9$meg;63&0a;`rC*?9rpmG_SnKLdlO&jZYNvk~5RK4nC)_h@*NUe6IVu#BNWS~vC-sG2G_Tt)f7*UyzCOa&KZXDPtoipZo8y1k9MAW6 z|Afc?27c%s|EuQsUp2@7VQ|1-F>hwS9tOvs|I|nF8|Oczzxeb2*1VDabI%@U{<-&e z@QwTV%isS*ezg33J?<~sUo>BX)7kq7uk(0&|Mvb+kKg-4J^ts+arO&;*ZuPu{_(&6 zF59GkM#5Zxw(KpkKf^E+v7jL z_x$rK_>aG7kDu*5;mdooJ-yO@`q$;3`TxT6zr;`e#5w;pZ1_JJP%L-PZBoc(IA z|JTj&AIrP#?ftEv)#Lxx&+75#+#LABUcl0-Z{1fH|nnUdM z+jsaE!SO%--9&!$P`Tr8HyY}Ow-_zqi`aM1VBXgYn5`O-_4vznU zIsOOc_z&%Y=4<%*{|T@ECH$wWeQ)o#{)>M8Z~Ytn{BXb9&$nOyCyxEg&hbAq$N$hA zZ+{QJB7f|^?brWvNulCDN<6z!m`)fb^?|S|}{C9f( zADV^5_0K={bM1BgH5~7L{*V8M9{=Ni-|qT92#)^)bNnBeQZ2kEB^X+;3d~o~^ z{zyOn2met&|L21B^Pzbk`{i%qc)pjt{vZ9Z9{-zvpvQk{e_#&y$R23E{%!O5`@!q> z_`e^#j$fBQ_Ulo_@qcVS|Aqa<;5hs86MSP#g#5+_KliD8{p`Q$`Sa)QpK$&1$H6U@ zU;l^yO^=`bFFpE0eEEC$&)#=yf2>{9-*W!{AN0om_P_FPU~oEne-(%LzoGB(+yCsR O<(pp#4ha6a_x}JvOp><% literal 0 HcmV?d00001 diff --git a/src/fcdram.h b/src/fcdram.h index 9aa25e0..0cffc9d 100644 --- a/src/fcdram.h +++ b/src/fcdram.h @@ -3,6 +3,7 @@ #include "eggmock.h" #include +#include extern "C" { @@ -26,6 +27,8 @@ extern "C" double min_success_rate; uint64_t repetition_fracops; uint8_t safe_space_rows_per_subarray; + const char *config_file; + bool do_save_config; }; eggmock::aig_receiver fcdram_rewriter( fcdram_compiler_settings settings ); diff --git a/src/main.cpp b/src/main.cpp index c978be7..d2a8dd0 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -53,6 +53,8 @@ void run_fcdram_example() .min_success_rate= 99.9999, .repetition_fracops=5, // issue 5 FracOps per init of reference subarray .safe_space_rows_per_subarray = 16, + .config_file = "/home/alex/Documents/Studium/Sem6/inf_pm_fpa/lime-fork/config/fcdram_hksynx.toml", + .do_save_config = true, } ) ); // std::cout << "IC:" << result.instruction_count << std::endl; // std::cout << "t1:" << result.t_runner << std::endl; From 1b39a84cc725c6d9960db15f24d88b10c26bbfed Mon Sep 17 00:00:00 2001 From: alku662e Date: Sat, 19 Jul 2025 13:32:50 +0200 Subject: [PATCH 33/51] REWRITE: change from `type RowAddress` to `struct RowAddress` --- rs/src/fc_dram/architecture.rs | 93 ++++++++++++++++++++++------------ rs/src/fc_dram/compiler.rs | 44 ++++++++-------- rs/src/fc_dram/program.rs | 8 +-- 3 files changed, 86 insertions(+), 59 deletions(-) diff --git a/rs/src/fc_dram/architecture.rs b/rs/src/fc_dram/architecture.rs index 4e5d612..c7ba6cf 100644 --- a/rs/src/fc_dram/architecture.rs +++ b/rs/src/fc_dram/architecture.rs @@ -5,7 +5,7 @@ //! //! RowAddress (eg via bit-shifting given bitmasks for subarray-id & row-addr to put on-top of RowAddress -use std::{cmp::Ordering, collections::{HashMap, HashSet}, fmt::{Display, Formatter}, ops, sync::LazyLock}; +use std::{cmp::Ordering, collections::{HashMap, HashSet}, fmt::{self, Display, Formatter}, ops, sync::LazyLock}; use log::debug; use strum_macros::EnumIter; @@ -16,11 +16,11 @@ pub const ROW_ID_BITMASK: u64 = 0b0_000_000_111_111_111; // 7 highest bits=subar // some utility functions pub fn subarrayid_to_subarray_address(subarray_id: SubarrayId) -> RowAddress { - subarray_id << ROWS_PER_SUBARRAY.ilog2() // lower bits=rows in subarray + RowAddress(subarray_id << ROWS_PER_SUBARRAY.ilog2()) // lower bits=rows in subarray } -pub fn get_subarrayid_from_rowaddr(row: RowAddress) -> SubarrayId { - (row & SUBARRAY_ID_BITMASK) >> NR_SUBARRAYS.ilog2() +pub fn get_subarrayid_from_rowaddr(row: u64) -> SubarrayId { + (row & SUBARRAY_ID_BITMASK) >> ROWS_PER_SUBARRAY.ilog2() } /// All Subarrays (except the ones at the edges) have two neighboring subarrays: one below (subarray_id+1) and one above (subarray_id-1) @@ -75,11 +75,11 @@ pub static ARCHITECTURE: LazyLock = LazyLock::new(|| { // for each predecoder store which bits will remain set due to `APA(row1,row)`: let overlapping_bits = [ // latches set by `ACT(row1)` --- latches set by `ACT(row2)` - [ row1 & predecoder_bitmasks[0], row2 & predecoder_bitmasks[0]], - [ row1 & predecoder_bitmasks[1], row2 & predecoder_bitmasks[1]], - [ row1 & predecoder_bitmasks[2], row2 & predecoder_bitmasks[2]], - [ row1 & predecoder_bitmasks[3], row2 & predecoder_bitmasks[3]], - [ row1 & predecoder_bitmasks[4], row2 & predecoder_bitmasks[4]], + [ row1.0 & predecoder_bitmasks[0], row2.0 & predecoder_bitmasks[0]], + [ row1.0 & predecoder_bitmasks[1], row2.0 & predecoder_bitmasks[1]], + [ row1.0 & predecoder_bitmasks[2], row2.0 & predecoder_bitmasks[2]], + [ row1.0 & predecoder_bitmasks[3], row2.0 & predecoder_bitmasks[3]], + [ row1.0 & predecoder_bitmasks[4], row2.0 & predecoder_bitmasks[4]], ]; let mut activated_rows = vec!(); // TODO: get other activated rows and add them to `activated_rows` @@ -87,9 +87,9 @@ pub static ARCHITECTURE: LazyLock = LazyLock::new(|| { for i in 0..1 << predecoder_bitmasks.len() { let activated_row = overlapping_bits.iter() // start with all row-address bits unset (=0) and first predecoder stage (=1) - .fold((0 as RowAddress, 1), |(row, predecoder_stage_onehot), new_row_bits|{ + .fold((RowAddress(0), 1), |(row, predecoder_stage_onehot), new_row_bits|{ let bitmask_to_choose = (i & predecoder_stage_onehot) > 0; - (row | new_row_bits[bitmask_to_choose as usize], predecoder_stage_onehot << 1) + (RowAddress(row.0 | new_row_bits[bitmask_to_choose as usize]), predecoder_stage_onehot << 1) }); activated_rows.push(activated_row.0); } @@ -104,15 +104,15 @@ pub static ARCHITECTURE: LazyLock = LazyLock::new(|| { // TODO: NEXT let get_distance_of_row_to_sense_amps = |row: RowAddress, subarray_rel_position: NeighboringSubarrayRelPosition| -> RowDistanceToSenseAmps { // NOTE: last & first subarrays only have sense-amps from one side - if (get_subarrayid_from_rowaddr(row) == NR_SUBARRAYS-1 && subarray_rel_position == NeighboringSubarrayRelPosition::Below) || (get_subarrayid_from_rowaddr(row) == 0 && subarray_rel_position == NeighboringSubarrayRelPosition::Above) { + if (row.get_subarray_id() == NR_SUBARRAYS-1 && subarray_rel_position == NeighboringSubarrayRelPosition::Below) || (row.get_subarray_id() == 0 && subarray_rel_position == NeighboringSubarrayRelPosition::Above) { panic!("Edge subarrays have sense-amps only connected from one side"); } - let local_row_address= row & ROW_ID_BITMASK; + let local_row_address= RowAddress(row.0 & ROW_ID_BITMASK); let distance_to_above_subarray = match local_row_address { - i if i < ROWS_PER_SUBARRAY / 2 / 3 => RowDistanceToSenseAmps::Close, // 1st third of subarray-half - i if i < ROWS_PER_SUBARRAY / 2 / 6 => RowDistanceToSenseAmps::Middle, // 2nd third of subarray-half + i if i.0 < ROWS_PER_SUBARRAY / 2 / 3 => RowDistanceToSenseAmps::Close, // 1st third of subarray-half + i if i.0 < ROWS_PER_SUBARRAY / 2 / 6 => RowDistanceToSenseAmps::Middle, // 2nd third of subarray-half _ => RowDistanceToSenseAmps::Far, // everything else is treated as being far away }; @@ -125,16 +125,16 @@ pub static ARCHITECTURE: LazyLock = LazyLock::new(|| { // precompute things based on given SRA (simultaneous row activation function) let mut precomputed_simultaneous_row_activations = HashMap::new(); for i in 0..ROWS_PER_SUBARRAY { - precomputed_simultaneous_row_activations.insert((i,i), vec!(i)); // special case: no other row is activated when executing `APA(r1,r1)` + precomputed_simultaneous_row_activations.insert((RowAddress(i),RowAddress(i)), vec!(RowAddress(i))); // special case: no other row is activated when executing `APA(r1,r1)` for j in i+1..ROWS_PER_SUBARRAY { - let activated_rows = get_activated_rows_from_apa(i, j); - precomputed_simultaneous_row_activations.insert((i,j), activated_rows.clone()); - precomputed_simultaneous_row_activations.insert((j,i), activated_rows.clone()); + let activated_rows = get_activated_rows_from_apa(RowAddress(i), RowAddress(j)); + precomputed_simultaneous_row_activations.insert((RowAddress(i),RowAddress(j)), activated_rows.clone()); + precomputed_simultaneous_row_activations.insert((RowAddress(j),RowAddress(i)), activated_rows.clone()); for row in activated_rows { row_activated_by_rowaddress_tuple.entry(row) .or_default() - .insert((i,j)); + .insert((RowAddress(i),RowAddress(j))); } } } @@ -160,9 +160,36 @@ pub static ARCHITECTURE: LazyLock = LazyLock::new(|| { }); /// - ! must be smaller than `rows_per_subarray * nr_subarrays` (this is NOT checked!) -pub type RowAddress = u64; +#[derive(Copy, Clone, PartialEq, Eq, Hash)] +pub struct RowAddress(pub u64); pub type SubarrayId = u64; +impl RowAddress { + /// Return subarray-id the row lies in + /// TODO: ship logic from `get_subarrayid_from_rowaddr()` into this function + pub fn get_subarray_id(&self) -> SubarrayId { + (self.0 & SUBARRAY_ID_BITMASK) >> ROWS_PER_SUBARRAY.ilog2() + } +} + +impl fmt::Display for RowAddress { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}.{}", self.get_subarray_id(), self.0 & ROW_ID_BITMASK) + } +} + +impl fmt::Debug for RowAddress { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}.{}", self.get_subarray_id(), self.0 & ROW_ID_BITMASK) + } +} + +impl From for RowAddress { + fn from(value: u64) -> Self { + RowAddress(value) + } +} + // impl Display for Vec { // fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { // write!(f, "[")?; @@ -287,9 +314,9 @@ impl FCDRAMArchitecture { /// subarray adjacent to the compute subarray (!this is not checked but assumed to be true!) pub fn get_instructions_implementation_of_logic_ops(logic_op: LogicOp) -> Vec { match logic_op { - LogicOp::NOT => vec!(Instruction::ApaNOT(0, 0)), - LogicOp::AND => vec!(Instruction::FracOp(0), Instruction::ApaNOT(0, 0)), - LogicOp::OR => vec!(Instruction::FracOp(0), Instruction::ApaNOT(0, 0)), + LogicOp::NOT => vec!(Instruction::ApaNOT(RowAddress(0), RowAddress(0))), + LogicOp::AND => vec!(Instruction::FracOp(RowAddress(0)), Instruction::ApaNOT(RowAddress(0), RowAddress(0))), + LogicOp::OR => vec!(Instruction::FracOp(RowAddress(0)), Instruction::ApaNOT(RowAddress(0), RowAddress(0))), LogicOp::NAND => { // 1. AND, 2. NOT FCDRAMArchitecture::get_instructions_implementation_of_logic_ops(LogicOp::AND) @@ -376,19 +403,19 @@ pub enum Instruction { impl Display for Instruction { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - let display_row = |row| { format!("{}.{}", get_subarrayid_from_rowaddr(row), row & ROW_ID_BITMASK)}; // display subarray separately + let display_row = |row: &RowAddress| { format!("{}.{}", row.get_subarray_id(), row.0 & ROW_ID_BITMASK)}; // display subarray separately // TODO: change string-representation to display subarray-id let description = match self { - Instruction::FracOp(row) => format!("AP({})", display_row(*row)), - Instruction::ApaNOT(row1,row2) => format!("APA_NOT({},{})", display_row(*row1), display_row(*row2)), - Instruction::ApaAndOr(row1,row2) => format!("APA_AND_OR({},{}) // activates {:?}", display_row(*row1), display_row(*row2), ARCHITECTURE.precomputed_simultaneous_row_activations.get(&(*row1,*row2))), - Instruction::RowCloneFPM(row1, row2, comment) => format!("AA({},{}) // {}", display_row(*row1), display_row(*row2), comment), + Instruction::FracOp(row) => format!("AP({})", display_row(row)), + Instruction::ApaNOT(row1,row2) => format!("APA_NOT({},{})", display_row(row1), display_row(row2)), + Instruction::ApaAndOr(row1,row2) => format!("APA_AND_OR({},{}) // activates {:?}", display_row(row1), display_row(row2), ARCHITECTURE.precomputed_simultaneous_row_activations.get(&(*row1,*row2))), + Instruction::RowCloneFPM(row1, row2, comment) => format!("AA({},{}) // {}", display_row(row1), display_row(row2), comment), Instruction::RowClonePSM(row1, row2) => format!(" TRANSFER({},(rowX)) TANSFER(rowX,{}) ", - display_row(*row1), - display_row(*row2) + display_row(row1), + display_row(row2) )}; write!(f, "{}", description) } @@ -434,11 +461,11 @@ impl Instruction { (4, 94.94), (8, 95.85), (16, 95.87), - (32, 0.00) // no value in paper :// + (32, 0.000) // no value in paper :// ]); // nr_operand_success_rate.get(&nr_operands); - let (src_array, dst_array) = (get_subarrayid_from_rowaddr(*src), get_subarrayid_from_rowaddr(*dst)); + let (src_array, dst_array) = (src.get_subarray_id(), dst.get_subarray_id()); let furthest_src_row = activated_rows.iter() .map(|row| (ARCHITECTURE.get_distance_of_row_to_sense_amps)(*row, NeighboringSubarrayRelPosition::get_relative_position(src_array, dst_array))) // RowDistanceToSenseAmps::Far; // TODO: get this .max() diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs index 3ad243a..ea120bf 100644 --- a/rs/src/fc_dram/compiler.rs +++ b/rs/src/fc_dram/compiler.rs @@ -132,8 +132,8 @@ impl Compiler { activated_rows.iter().map(|&row| { // move subarray to 1st subarray (instead of 0th, which is at the edge and hence has no sense-amps above) let subarray1_id = ((ROW_ID_BITMASK << 1) | 1) ^ ROW_ID_BITMASK; - let row = subarray1_id | row; // makes sure that `get_distance_of_row_to_sense_amps` doesn't panic since SRA returns subarray=0 by default (which is an edge subarray) - println!("{row:b}"); + let row = RowAddress(subarray1_id | row.0); // makes sure that `get_distance_of_row_to_sense_amps` doesn't panic since SRA returns subarray=0 by default (which is an edge subarray) + println!("{:b}", row.0); (ARCHITECTURE.get_distance_of_row_to_sense_amps)(row, sense_amp_position) as u64 }).sum() }; @@ -142,8 +142,8 @@ impl Compiler { let activated_rows = ARCHITECTURE.precomputed_simultaneous_row_activations.get(&best_row_combi).unwrap(); activated_rows.iter().map(|&row| { let subarray1_id = ((ROW_ID_BITMASK << 1) | 1) ^ ROW_ID_BITMASK; - let row = subarray1_id | row; // makes sure that `get_distance_of_row_to_sense_amps` doesn't panic since SRA returns subarray=0 by default (which is an edge subarray) - println!("{row:b}"); + let row = RowAddress(subarray1_id | row.0); // makes sure that `get_distance_of_row_to_sense_amps` doesn't panic since SRA returns subarray=0 by default (which is an edge subarray) + println!("{:b}", row.0); (ARCHITECTURE.get_distance_of_row_to_sense_amps)(row, sense_amp_position) as u64 }).sum() }; @@ -200,7 +200,7 @@ impl Compiler { let next_free_row = self.comp_state.free_rows_per_subarray.get_mut(subarray).and_then(|v| v.pop()).expect("OOM: No more free rows in subarray {subarray} for placing inputs"); self.comp_state.value_states.insert(inverted_signal, ValueState { is_computed: true, row_location: Some(next_free_row) }); - program.input_row_operands_placement.entry(original_signal).or_default().push(next_free_row); + program.input_row_operands_placement.entry(inverted_signal).or_default().push(next_free_row); } } } @@ -268,18 +268,18 @@ impl Compiler { // TODO: read&write this to&from config-file (added manually here in the meantiem) self.compute_row_activations = HashMap::from([ - ((SupportedNrOperands::One, NeighboringSubarrayRelPosition::Above), (8, 8)), - ((SupportedNrOperands::One, NeighboringSubarrayRelPosition::Below), (303, 303)), - ((SupportedNrOperands::Two, NeighboringSubarrayRelPosition::Above), (15, 79)), - ((SupportedNrOperands::Two, NeighboringSubarrayRelPosition::Below), (293, 357)), - ((SupportedNrOperands::Four, NeighboringSubarrayRelPosition::Above), (60, 42)), - ((SupportedNrOperands::Four, NeighboringSubarrayRelPosition::Below), (472, 412)), - ((SupportedNrOperands::Eight, NeighboringSubarrayRelPosition::Above), (42, 15)), - ((SupportedNrOperands::Eight, NeighboringSubarrayRelPosition::Below), (203, 283)), - ((SupportedNrOperands::Sixteen, NeighboringSubarrayRelPosition::Above), (32, 83)), - ((SupportedNrOperands::Sixteen, NeighboringSubarrayRelPosition::Below), (470, 252)), - ((SupportedNrOperands::Thirtytwo, NeighboringSubarrayRelPosition::Above), (307, 28)), - ((SupportedNrOperands::Thirtytwo, NeighboringSubarrayRelPosition::Below), (149, 318)), + ((SupportedNrOperands::One, NeighboringSubarrayRelPosition::Above), (RowAddress(8), RowAddress(8))), + ((SupportedNrOperands::One, NeighboringSubarrayRelPosition::Below), (RowAddress(303), RowAddress(303))), + ((SupportedNrOperands::Two, NeighboringSubarrayRelPosition::Above), (RowAddress(15), RowAddress(79))), + ((SupportedNrOperands::Two, NeighboringSubarrayRelPosition::Below), (RowAddress(293), RowAddress(357))), + ((SupportedNrOperands::Four, NeighboringSubarrayRelPosition::Above), (RowAddress(60), RowAddress(42))), + ((SupportedNrOperands::Four, NeighboringSubarrayRelPosition::Below), (RowAddress(472), RowAddress(412))), + ((SupportedNrOperands::Eight, NeighboringSubarrayRelPosition::Above), (RowAddress(42), RowAddress(15))), + ((SupportedNrOperands::Eight, NeighboringSubarrayRelPosition::Below), (RowAddress(203), RowAddress(283))), + ((SupportedNrOperands::Sixteen, NeighboringSubarrayRelPosition::Above), (RowAddress(32), RowAddress(83))), + ((SupportedNrOperands::Sixteen, NeighboringSubarrayRelPosition::Below), (RowAddress(470), RowAddress(252))), + ((SupportedNrOperands::Thirtytwo, NeighboringSubarrayRelPosition::Above), (RowAddress(307), RowAddress(28))), + ((SupportedNrOperands::Thirtytwo, NeighboringSubarrayRelPosition::Below), (RowAddress(149), RowAddress(318))), ]); @@ -303,10 +303,10 @@ impl Compiler { let new_compute_rows = ARCHITECTURE.precomputed_simultaneous_row_activations.get(next_compute_row_combi).expect("Compute row cant be activated??"); all_compute_rows.iter().chain(new_compute_rows).cloned().collect() }); - let mut free_rows = (0..ROWS_PER_SUBARRAY).collect::>(); + let mut free_rows = (0..ROWS_PER_SUBARRAY).map(RowAddress::from).collect::>(); free_rows.retain(|r| {!compute_rows.contains(r)}); for subarray in 0..NR_SUBARRAYS { - let free_rows_in_subarray = free_rows.iter().map(|row| row | subarrayid_to_subarray_address(subarray)).collect(); // transform local row address to row addresses in corresponding `subarray` + let free_rows_in_subarray = free_rows.iter().map(|row| RowAddress(row.0 | subarrayid_to_subarray_address(subarray).0)).collect(); // transform local row address to row addresses in corresponding `subarray` self.comp_state.free_rows_per_subarray.entry(subarray as SubarrayId).insert_entry(free_rows_in_subarray); } @@ -471,7 +471,7 @@ impl Compiler { self.comp_state.dram_state.insert(row_addr, RowState { is_compute_row: true, live_value: Some(src_operand), constant: None }); - if (src_operand_location & SUBARRAY_ID_BITMASK) == (row_addr & SUBARRAY_ID_BITMASK) { + if (src_operand_location.0 & SUBARRAY_ID_BITMASK) == (row_addr.0 & SUBARRAY_ID_BITMASK) { instructions.push(Instruction::RowCloneFPM(src_operand_location, row_addr, String::from("Move operand to compute row"))); } else { instructions.push(Instruction::RowClonePSM(src_operand_location, row_addr)); // TODO: remove this, since it's not usable in COTS DRAMs @@ -489,7 +489,7 @@ impl Compiler { fn select_compute_and_ref_subarray(&self, input_rows: Vec) -> (SubarrayId, SubarrayId) { // naive implementation: just use the subarray that most of the `input_rows` reside in // TODO: find better solution - let used_subarray_ids = input_rows.into_iter().map(|row| row & SUBARRAY_ID_BITMASK); + let used_subarray_ids = input_rows.into_iter().map(|row| row.0 & SUBARRAY_ID_BITMASK); let (&mostly_used_subarray_id, _) = used_subarray_ids .fold(HashMap::new(), |mut acc, item| { *acc.entry(item).or_insert(0) += 1; @@ -826,7 +826,7 @@ mod tests { #[test] fn test_select_compute_and_ref_subarray() { let compiler = init(); - let (selected_subarray, _) = compiler.select_compute_and_ref_subarray(vec!(0b1_000_000_000, 0b1_000_010_000, 0b111_000_000_000, 0b10_100_000_000,)); + let (selected_subarray, _) = compiler.select_compute_and_ref_subarray(vec!(RowAddress(0b1_000_000_000), RowAddress(0b1_000_010_000), RowAddress(0b111_000_000_000), RowAddress(0b10_100_000_000),)); assert_eq!(selected_subarray, 0b1_000_000_000); } diff --git a/rs/src/fc_dram/program.rs b/rs/src/fc_dram/program.rs index 9dea244..18c08b0 100644 --- a/rs/src/fc_dram/program.rs +++ b/rs/src/fc_dram/program.rs @@ -32,14 +32,14 @@ impl Program { /// Print the generated program in human-readable form impl Display for Program { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - let display_row = |row| { - format!("{}.{}", get_subarrayid_from_rowaddr(row), row & ROW_ID_BITMASK) + let display_row = |row: &RowAddress| { + format!("{}.{}", row.get_subarray_id(), row.0 & ROW_ID_BITMASK) }; // display subarray separately let display_rows = |rows: Vec| { let formatted: Vec = rows.iter() - .map(|&row| format!("{}.{}", get_subarrayid_from_rowaddr(row), row & ROW_ID_BITMASK)) + .map(|&row| format!("{}.{}", row.get_subarray_id(), row.0 & ROW_ID_BITMASK)) .collect(); format!("[{}]", formatted.join(", ")) @@ -60,7 +60,7 @@ impl Display for Program { writeln!(f, "---------------------------------------")?; writeln!(f, "Output operand placement:")?; for (signal, row) in &self.output_row_operands_placement{ - writeln!(f, "{:?} in {}", signal, display_row(*row))?; + writeln!(f, "{:?} in {}", signal, display_row(row))?; } writeln!(f, "---------------------------------------")?; Ok(()) From c093873df976651cd566449b8143b6632fa7cb8d Mon Sep 17 00:00:00 2001 From: alku662e Date: Sat, 19 Jul 2025 13:53:08 +0200 Subject: [PATCH 34/51] Start reimplementing execute_next_instruction() --- rs/src/fc_dram/architecture.rs | 5 ----- rs/src/fc_dram/compiler.rs | 21 ++++++++++----------- rs/src/fc_dram/program.rs | 6 +++++- 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/rs/src/fc_dram/architecture.rs b/rs/src/fc_dram/architecture.rs index c7ba6cf..45fd26b 100644 --- a/rs/src/fc_dram/architecture.rs +++ b/rs/src/fc_dram/architecture.rs @@ -19,10 +19,6 @@ pub fn subarrayid_to_subarray_address(subarray_id: SubarrayId) -> RowAddress { RowAddress(subarray_id << ROWS_PER_SUBARRAY.ilog2()) // lower bits=rows in subarray } -pub fn get_subarrayid_from_rowaddr(row: u64) -> SubarrayId { - (row & SUBARRAY_ID_BITMASK) >> ROWS_PER_SUBARRAY.ilog2() -} - /// All Subarrays (except the ones at the edges) have two neighboring subarrays: one below (subarray_id+1) and one above (subarray_id-1) #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, EnumIter)] pub enum NeighboringSubarrayRelPosition { @@ -166,7 +162,6 @@ pub type SubarrayId = u64; impl RowAddress { /// Return subarray-id the row lies in - /// TODO: ship logic from `get_subarrayid_from_rowaddr()` into this function pub fn get_subarray_id(&self) -> SubarrayId { (self.0 & SUBARRAY_ID_BITMASK) >> ROWS_PER_SUBARRAY.ilog2() } diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs index ea120bf..bd1fd1e 100644 --- a/rs/src/fc_dram/compiler.rs +++ b/rs/src/fc_dram/compiler.rs @@ -69,19 +69,19 @@ impl Compiler { let mut program = Program::new(vec!()); // debug!("Compiling {:?}", network); - // 0. Prepare compilation: select safe-space rows, place inputs into DRAM module (and store where inputs have been placed in `program`) + // 0. Prepare compilation: + // - select safe-space rows + // - place inputs&constants into DRAM module (and store where inputs have been placed in `program`) + // - initialize candidates (with which to start execution) self.init_comp_state(network, &mut program); - // start with inputs - let primary_inputs = network.leaves(); - debug!("Primary inputs: {:?}", primary_inputs.collect::>()); - // println!("{:?}", network.outputs().collect::>()); - debug!("Nodes in network:"); - for node in network.iter() { - debug!("{:?},", node); - } + // debug!("Nodes in network:"); + // for node in network.iter() { + // debug!("{:?},", node); + // } + todo!("NEXT"); // 1. Actual compilation while let Some((next_candidate, _)) = self.comp_state.candidates.pop() { // TODO: extend program with instr that is executed next @@ -319,11 +319,10 @@ impl Compiler { self.place_inputs(network, program); // place input-operands into rows debug!("Placed inputs {:?} in {:?}", network.leaves().collect::>(), self.comp_state.value_states); - todo!("NEXT"); // 0.5 Setup: store all network-nodes yet to be compiled self.init_candidates(network); - + debug!("Initialized candidates {:?}", self.comp_state.candidates); } /// Assigns signals to subarrays and through this determines placement of those signal in the DRAM module diff --git a/rs/src/fc_dram/program.rs b/rs/src/fc_dram/program.rs index 18c08b0..447c38b 100644 --- a/rs/src/fc_dram/program.rs +++ b/rs/src/fc_dram/program.rs @@ -2,7 +2,7 @@ //! compiling given logic-network (see [`compilation`]) and potentially adding some manual //! optimizations ([`optimization`]) use super::architecture::RowAddress; -use crate::fc_dram::architecture::{get_subarrayid_from_rowaddr, Instruction, ROW_ID_BITMASK}; +use crate::fc_dram::architecture::{Instruction, ROW_ID_BITMASK}; use eggmock::Signal; use std::collections::HashMap; use std::fmt::{Display, Formatter}; @@ -11,6 +11,9 @@ use std::fmt::{Display, Formatter}; #[derive(Debug, Clone)] pub struct Program { pub instructions: Vec, + /// Specifies in which rows constants have to be placed (!have to be placed in EVERY subarray) + /// - TODO: adjust this to only place in subarrays which are actually used as reference subarrays during program execution + pub constants_row_placement: HashMap>, /// Specifies where row-operands should be placed prior to calling this program /// (This is a convention which tells the user of this lib where the data should be placed within the DRAM before executing this program) /// - NOTE: Signals might have to be placed in several subarrays (REMINDER: movement in btw subarrays is not supported by FCDRAM) @@ -23,6 +26,7 @@ impl Program { pub fn new(instructions: Vec) -> Self { Self { instructions, + constants_row_placement: HashMap::new(), input_row_operands_placement: HashMap::new(), output_row_operands_placement: HashMap::new(), } From 25e3d8fa48ae7ee9b3a6a59cf65bb15e6ee6911b Mon Sep 17 00:00:00 2001 From: alku662e Date: Sat, 19 Jul 2025 15:54:30 +0200 Subject: [PATCH 35/51] Noticed that candidate-list need to be adjusted to track candidates also per subarray (since signals can be present in several subarrays at once) --- rs/src/fc_dram/compiler.rs | 58 ++++++++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 21 deletions(-) diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs index bd1fd1e..5e8299e 100644 --- a/rs/src/fc_dram/compiler.rs +++ b/rs/src/fc_dram/compiler.rs @@ -81,7 +81,6 @@ impl Compiler { // debug!("{:?},", node); // } - todo!("NEXT"); // 1. Actual compilation while let Some((next_candidate, _)) = self.comp_state.candidates.pop() { // TODO: extend program with instr that is executed next @@ -208,6 +207,7 @@ impl Compiler { /// Initialize candidates with all nodes that are computable fn init_candidates(&mut self, network: &impl NetworkWithBackwardEdges) { + todo!("NEXT"); let inputs: Vec = network.leaves().collect(); // init candidates with all nodes having only inputs as src-operands @@ -479,34 +479,50 @@ impl Compiler { instructions } + /// Returns instructions to be executed for performing `NOT` on `src_row` into `dst_row` + /// - NOTE: currenlty only single-operand NOTs are supported bc + /// 1) more operands lead to (slightly) worse results (see Figure10 in [1]) + /// 2) since there are separate compute rows using multiple dst rows doesn't make sense (the values need to be copied out of the dst-rows anyway into non-compute rows) + fn execute_not(&self, signal_to_invert: &Signal) -> Vec { + let row_combi = self.compute_row_activations.get(&(SupportedNrOperands::One, NeighboringSubarrayRelPosition::Above)).unwrap(); + // perform `NOT` - /// Return id of subarray to use for computation and reference (compute_subarrayid, reference_subarrayid) - /// - based on location of input rows AND current compilation state - /// - [ ] POSSIBLE EXTENSION: include lookahead for future ops and their inputs they depend on - /// - /// TODO: NEXT - fn select_compute_and_ref_subarray(&self, input_rows: Vec) -> (SubarrayId, SubarrayId) { - // naive implementation: just use the subarray that most of the `input_rows` reside in - // TODO: find better solution - let used_subarray_ids = input_rows.into_iter().map(|row| row.0 & SUBARRAY_ID_BITMASK); - let (&mostly_used_subarray_id, _) = used_subarray_ids - .fold(HashMap::new(), |mut acc, item| { - *acc.entry(item).or_insert(0) += 1; - acc - }) - .iter().max_by_key(|&(_, count)| count).unwrap(); - - let selected_ref_subarray = (mostly_used_subarray_id+1) % NR_SUBARRAYS; // TODO: use 2D-layout of subarrays to determine which of them share sense-amps - - (mostly_used_subarray_id, selected_ref_subarray) + let row_combi_correct_subarray = todo!(); // REMINDER: rows returned `compute_row_activations` are not yet adjusted for the right subarray + } + + /// Returns the instructions needed to perform `language_op` + fn execute_and_or(&self, language_op: Aoig) -> Vec { + let logic_op = match language_op { + // REMINDER: operand-nr is extracted by looking at nr of children beforehand + Aoig::And(_) | Aoig::And4(_) | Aoig::And8(_)| Aoig::And16(_) => LogicOp::AND, + Aoig::Or(_) | Aoig::Or4(_) | Aoig::Or8(_) | Aoig::Or16(_) => LogicOp::OR, + _ => panic!("candidate is expected to be a logic op"), + }; + + todo!(); } /// Returns Instructions to execute given `next_candidate` /// - [ ] make sure that operation to be executed on those rows won't simultaneously activate other rows holding valid data which will be used by future operations + /// /// TODO: NEXT fn execute_next_instruction(&mut self, next_candidate: &Signal, network: &impl NetworkWithBackwardEdges) -> Vec { + let node_id = next_candidate.node_id(); + assert!(network.node(node_id).inputs().iter().all(|input| { + self.signal_to_subarrayids.get(input).unwrap() == self.signal_to_subarrayids.get(&Signal::new(node_id, false)) + })); + + let mut next_instructions = vec!(); + // 1. Perform actual operation of the node + let language_op = network.node(node_id); + next_instructions.append(&mut self.execute_and_or(language_op)); + // 2. Negate the result (if needed) + if next_candidate.is_inverted() { + let mut negate_instructions = self.execute_not(next_candidate); + next_instructions.append(&mut negate_instructions); + } todo!(); - // let mut next_instructions = vec!(); + // // debug!("Executing candidate {:?}", next_candidate); // let src_operands: Vec = network.node(next_candidate.node_id()).inputs().to_vec(); From 543802e778ae05505d9152854aa8255db38f95fc Mon Sep 17 00:00:00 2001 From: alku662e Date: Sat, 19 Jul 2025 17:34:45 +0200 Subject: [PATCH 36/51] Rewriting candidate initialization.. --- rs/src/fc_dram/compiler.rs | 133 +++++++++++++++++++------------------ 1 file changed, 69 insertions(+), 64 deletions(-) diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs index 5e8299e..77bcdf3 100644 --- a/rs/src/fc_dram/compiler.rs +++ b/rs/src/fc_dram/compiler.rs @@ -88,7 +88,7 @@ impl Compiler { program.instructions.append(executed_instructions); // update new candidates (`next_candidate` is now available) - let new_candidates = self.get_new_candidates(network, next_candidate); + let new_candidates = self.get_new_candidates(network, next_candidate.0, next_candidate.1); debug!("New candidates: {:?}", new_candidates); self.comp_state.candidates.extend(new_candidates); @@ -99,8 +99,14 @@ impl Compiler { // store output operand location so user can retrieve them after running the program let outputs = network.outputs(); // TODO: doesn't work yet - program.output_row_operands_placement = outputs.map(|out| { - (out, self.comp_state.value_states.get(&out).unwrap().row_location.expect("ERROR: one of the outputs hasn't been computed yet...")) + program.output_row_operands_placement = outputs.flat_map(|out| { + let subarrays = self.signal_to_subarrayids.get(&out).unwrap(); + let mut placements = vec!(); + for subarray in subarrays { + let row_address = self.comp_state.value_states.get(&(out,*subarray)).expect("ERROR: one of the outputs hasn't been computed yet..."); + placements.push((out, *row_address)); + } + placements }).collect(); program } @@ -188,7 +194,7 @@ impl Compiler { if let Some(original_input_locations) = self.signal_to_subarrayids.get(&original_signal) { for subarray in original_input_locations { let next_free_row = self.comp_state.free_rows_per_subarray.get_mut(subarray).and_then(|v| v.pop()).expect("OOM: No more free rows in subarray {subarray} for placing inputs"); - self.comp_state.value_states.insert(original_signal, ValueState { is_computed: true, row_location: Some(next_free_row) }); + self.comp_state.value_states.insert((original_signal, *subarray), next_free_row); program.input_row_operands_placement.entry(original_signal).or_default().push(next_free_row); } @@ -197,7 +203,7 @@ impl Compiler { if let Some(inverted_input_locations) = self.signal_to_subarrayids.get(&inverted_signal) { for subarray in inverted_input_locations { let next_free_row = self.comp_state.free_rows_per_subarray.get_mut(subarray).and_then(|v| v.pop()).expect("OOM: No more free rows in subarray {subarray} for placing inputs"); - self.comp_state.value_states.insert(inverted_signal, ValueState { is_computed: true, row_location: Some(next_free_row) }); + self.comp_state.value_states.insert((inverted_signal, *subarray), next_free_row); program.input_row_operands_placement.entry(inverted_signal).or_default().push(next_free_row); } @@ -206,38 +212,43 @@ impl Compiler { } /// Initialize candidates with all nodes that are computable + /// NOTE: initially all nodes whose src-operands are primary inputs only are marked as candidates in all subarrays (since inputs are expected to be placed in all those subarrays by the user) fn init_candidates(&mut self, network: &impl NetworkWithBackwardEdges) { - todo!("NEXT"); let inputs: Vec = network.leaves().collect(); // init candidates with all nodes having only inputs as src-operands for &input in inputs.as_slice() { // every output has a prio determined eg by how many src-operands it uses last (->to minimize nr of live values in rows) - let mut outputs_with_prio: PriorityQueue = network.node_outputs(input) + let mut outputs_with_prio: PriorityQueue<(Signal, SubarrayId), SchedulingPrio> = network.node_outputs(input) .filter(|output| network.node(*output).inputs().iter().all(|other_input| inputs.contains(&other_input.node_id()) )) // only those nodes are candidates, whose src-operands are ALL inputs (->only primary inputs are directly available) - .map( |output| { + .flat_map( |output| { let output_signal = Signal::new(output, false); - (output_signal, self.compute_scheduling_prio_for_node(output_signal, network)) + let mut output_candidates = vec!(); + for subarray in self.signal_to_subarrayids.get(&output_signal).expect("Signal is not mapped to a subarray yet??") { + output_candidates.push(((output_signal,*subarray), self.compute_scheduling_prio_for_node(output_signal, *subarray, network))); + } + output_candidates }) .collect(); + self.comp_state.candidates.append(&mut outputs_with_prio); debug!("{:?} has the following outputs: {:?}", input, network.node_outputs(input).collect::>()); } } /// Returns list of candidates that can be computed once `computed_node` is available - fn get_new_candidates(&mut self, network: &impl NetworkWithBackwardEdges, computed_node: Signal) -> PriorityQueue { + fn get_new_candidates(&mut self, network: &impl NetworkWithBackwardEdges, computed_node: Signal, subarray: SubarrayId) -> PriorityQueue<(Signal, SubarrayId), SchedulingPrio> { debug!("Candidates: {:?}", self.comp_state.candidates); debug!("DRAM state: {:?}", self.comp_state.value_states); network.node_outputs(computed_node.node_id()) - // filter for new nodes that have all their input-operands available now (->only inputs of computed nodes could have changed to candidate-state, other nodes remain uneffected) + // filter for new nodes that have all their input-operands available now in the same subarray (->only inputs of computed nodes could have changed to candidate-state, other nodes remain uneffected) .filter({|out| network.node(*out).inputs().iter() .all( |input| { debug!("Out: {:?}, In: {:?}", out, input); - self.comp_state.value_states.keys().contains(input) && self.comp_state.value_states.get(input).unwrap().is_computed + self.comp_state.value_states.contains_key(&(*input, subarray)) }) }) - .map(|id| (Signal::new(id, false), self.compute_scheduling_prio_for_node(Signal::new(id, false), network))) // TODO: check if inverted signal is required as well! + .map(|id| ((Signal::new(id, false), subarray), self.compute_scheduling_prio_for_node(Signal::new(id, false), subarray, network))) // TODO: check if inverted signal is required as well! .collect() } @@ -323,6 +334,7 @@ impl Compiler { // 0.5 Setup: store all network-nodes yet to be compiled self.init_candidates(network); debug!("Initialized candidates {:?}", self.comp_state.candidates); + todo!("NEXT"); } /// Assigns signals to subarrays and through this determines placement of those signal in the DRAM module @@ -454,7 +466,9 @@ impl Compiler { /// Places the referenced `src_operands` into the corresponding `row_addresses` which are expected to be simultaneously executed using [`Instruction::RowCloneFPM`] /// - NOTE: `rel_pos_of_ref_subarray` might affect placement of inputs in the future (eg to choose which input rows to choose for *input replication*) - fn init_compute_subarray(&mut self, mut row_addresses: Vec, mut src_operands: Vec, logic_op: LogicOp, rel_pos_of_ref_subarray: NeighboringSubarrayRelPosition) -> Vec { + fn init_compute_subarray(&mut self, mut row_addresses: Vec, mut src_operands: Vec, subarray: SubarrayId, logic_op: LogicOp, rel_pos_of_ref_subarray: NeighboringSubarrayRelPosition) -> Vec { + // TODO: validity check: make sure all inputs are actually already inside `subarray` + let mut instructions = vec!(); // if there are fewer src-operands than activated rows perform input replication row_addresses.sort_by_key(|row| ((ARCHITECTURE.get_distance_of_row_to_sense_amps)(*row, rel_pos_of_ref_subarray.clone()))); // replicate input that resides in row with lowest success-rate (=probably the row furthest away) @@ -465,15 +479,15 @@ impl Compiler { } for (&row_addr, &src_operand) in row_addresses.iter().zip(src_operands.iter()) { - let src_operand_location = self.comp_state.value_states.get(&src_operand).expect("Src operand not available although it is used by a candidate. Sth went wrong...") - .row_location.expect("Src operand not live although it is used by a candidate. Sth went wrong..."); + let src_operand_location = self.comp_state.value_states.get(&(src_operand, subarray)).expect("Src operand not available although it is used by a candidate. Sth went wrong..."); self.comp_state.dram_state.insert(row_addr, RowState { is_compute_row: true, live_value: Some(src_operand), constant: None }); + // TODO: if (src_operand_location.0 & SUBARRAY_ID_BITMASK) == (row_addr.0 & SUBARRAY_ID_BITMASK) { - instructions.push(Instruction::RowCloneFPM(src_operand_location, row_addr, String::from("Move operand to compute row"))); + instructions.push(Instruction::RowCloneFPM(*src_operand_location, row_addr, String::from("Move operand to compute row"))); } else { - instructions.push(Instruction::RowClonePSM(src_operand_location, row_addr)); // TODO: remove this, since it's not usable in COTS DRAMs + instructions.push(Instruction::RowClonePSM(*src_operand_location, row_addr)); // TODO: remove this, since it's not usable in COTS DRAMs } } instructions @@ -502,23 +516,26 @@ impl Compiler { todo!(); } - /// Returns Instructions to execute given `next_candidate` + /// Returns Instructions to execute given `next_candidate` (which is a signal which needs to reside in a specific subarray after performing the execution) /// - [ ] make sure that operation to be executed on those rows won't simultaneously activate other rows holding valid data which will be used by future operations /// /// TODO: NEXT - fn execute_next_instruction(&mut self, next_candidate: &Signal, network: &impl NetworkWithBackwardEdges) -> Vec { - let node_id = next_candidate.node_id(); - assert!(network.node(node_id).inputs().iter().all(|input| { - self.signal_to_subarrayids.get(input).unwrap() == self.signal_to_subarrayids.get(&Signal::new(node_id, false)) - })); + fn execute_next_instruction(&mut self, next_candidate: &(Signal, SubarrayId), network: &impl NetworkWithBackwardEdges) -> Vec { + let (next_signal, result_subarray) = next_candidate; + let node_id = next_signal.node_id(); + + // quick validity check: ensure all inputs are already present in the required array + // assert!(network.node(node_id).inputs().iter().all(|input| { + // // TODO + // })); let mut next_instructions = vec!(); // 1. Perform actual operation of the node let language_op = network.node(node_id); next_instructions.append(&mut self.execute_and_or(language_op)); // 2. Negate the result (if needed) - if next_candidate.is_inverted() { - let mut negate_instructions = self.execute_not(next_candidate); + if next_signal.is_inverted() { + let mut negate_instructions = self.execute_not(next_signal); next_instructions.append(&mut negate_instructions); } todo!(); @@ -606,24 +623,22 @@ impl Compiler { // next_instructions } - /// Compute `SchedulingPrio` for a given node + /// Compute `SchedulingPrio` for a given `signal` located in the `subarray` /// - used for inserting new candidates + /// /// TODO: write unittest for this function - fn compute_scheduling_prio_for_node(&self, signal: Signal, network: &impl NetworkWithBackwardEdges) -> SchedulingPrio { + fn compute_scheduling_prio_for_node(&self, signal: Signal, subarray: SubarrayId, network: &impl NetworkWithBackwardEdges) -> SchedulingPrio { let nr_last_value_uses = network.node(signal.node_id()).inputs() // for each input check whether `id` is the last node using it .iter() .fold(0, |acc, input| { let input_id = Signal::node_id(input); - let non_computed_outputs: Vec = network.node_outputs(input_id) // get all other nodes relying on this input + let non_computed_outputs: Vec = network.node_outputs(input_id) // get all other nodes still relying on this input .filter(|out| { let out_signal = Signal::new(*out,false); - out_signal != signal && - !(self.comp_state.value_states.get(&out_signal) - .unwrap_or(&ValueState{is_computed: false, row_location: None }) // no entry means this is the first time accessing this value - .is_computed) - } - ) // filter for uses of `input` which still rely on it (=those that are not computed yet, except for currently checked node + out_signal != signal && // all output signals except for the current one + !(self.comp_state.value_states.contains_key(&(out_signal, subarray))) // that are not yet computed (not rows present in `subarray` holding that value + }) // filter for uses of `input` which still rely on it (=those that are not computed yet, except for currently checked node .collect(); if non_computed_outputs.is_empty() { acc + 1 @@ -651,28 +666,18 @@ pub struct RowState { constant: Option, } -#[derive(Debug)] -pub struct ValueState { - /// Whether the value has already been computed (->only then it could reside in a row) - /// - the value could also have been computed but spilled already on its last use - /// - helps determining whether src-operand is the last use: for all other output operands of that source operand just check whether they have been already computed - is_computed: bool, - /// Row in which the value resides - row_location: Option, -} - /// Keep track of current progress of the compilation (eg which rows are used, into which rows data is placed, ...) pub struct CompilationState { /// For each row in the dram-module store its state (whether it's a compute row or if not whether/which value is stored inside it dram_state: HashMap, - /// Stores row in which an intermediate result (which is still to be used by future ops) is currently located (or whether it has been computed at all) - value_states: HashMap, + /// For each subarray it stores the row in which the `Signal` is located + value_states: HashMap<(Signal, SubarrayId), RowAddress>, /// Stores row location of constant /// - REMINDER: some constants are stored in fixed rows (!in each subarray), eg 0s and 1s for initializing reference subarray constant_values: HashMap, /// List of candidates (ops ready to be issued) prioritized by some metric by which they are scheduled for execution /// - NOTE: calculate Nodes `SchedulingPrio` using - candidates: PriorityQueue, + candidates: PriorityQueue<(Signal, SubarrayId), SchedulingPrio>, /// For each Subarray store which rows are free (and hence can be used for storing values) free_rows_per_subarray: HashMap>, } @@ -793,14 +798,14 @@ mod tests { // Id(0): Input(1) // act is if one `AND` has already been computed -> other and (`Id(2)`) should be the only candidate left - compiler.comp_state.value_states.insert(Signal::new(eggmock::Id::from(4), false), ValueState{ is_computed: true, row_location: None }); - - let ntk_backward = ComputedNetworkWithBackwardEdges::new(ntk); - - compiler.init_candidates(&ntk_backward); - let is_candidate_ids: HashSet = compiler.comp_state.candidates.iter().map(|(id,_)| *id).collect(); - let should_candidate_ids: HashSet = HashSet::from([Signal::new( eggmock::Id::from(2), false), Signal::new(eggmock::Id::from(4), false)]); - assert_eq!( is_candidate_ids, should_candidate_ids); + // compiler.comp_state.value_states.insert(Signal::new(eggmock::Id::from(4), false), ValueState{ is_computed: true, row_location: None }); + // + // let ntk_backward = ComputedNetworkWithBackwardEdges::new(ntk); + // + // compiler.init_candidates(&ntk_backward); + // let is_candidate_ids: HashSet = compiler.comp_state.candidates.iter().map(|(id,_)| *id).collect(); + // let should_candidate_ids: HashSet = HashSet::from([Signal::new( eggmock::Id::from(2), false), Signal::new(eggmock::Id::from(4), false)]); + // assert_eq!( is_candidate_ids, should_candidate_ids); // TODO: test-case with node that relies on one input src-operand and one non-input (intermediate node) src-operand } @@ -829,20 +834,20 @@ mod tests { // Id(0): Input(1) // act is if one `AND` has already been computed -> other and (`Id(2)`) should be the only candidate left - compiler.comp_state.value_states.insert(Signal::new(eggmock::Id::from(2), false), ValueState{ is_computed: true, row_location: None }); - - let ntk_backward = ComputedNetworkWithBackwardEdges::new(ntk); - - let scheduling_prio = compiler.compute_scheduling_prio_for_node(Signal::new(eggmock::Id::from(4), false), &ntk_backward); - assert_eq!(scheduling_prio, SchedulingPrio { nr_last_value_uses: 2, nr_src_operands: 2, nr_result_operands: 1 } ); + // compiler.comp_state.value_states.insert(Signal::new(eggmock::Id::from(2), false), ValueState{ is_computed: true, row_location: None }); + // + // let ntk_backward = ComputedNetworkWithBackwardEdges::new(ntk); + // + // let scheduling_prio = compiler.compute_scheduling_prio_for_node(Signal::new(eggmock::Id::from(4), false), &ntk_backward); + // assert_eq!(scheduling_prio, SchedulingPrio { nr_last_value_uses: 2, nr_src_operands: 2, nr_result_operands: 1 } ); } #[test] fn test_select_compute_and_ref_subarray() { let compiler = init(); - let (selected_subarray, _) = compiler.select_compute_and_ref_subarray(vec!(RowAddress(0b1_000_000_000), RowAddress(0b1_000_010_000), RowAddress(0b111_000_000_000), RowAddress(0b10_100_000_000),)); - assert_eq!(selected_subarray, 0b1_000_000_000); + // let (selected_subarray, _) = compiler.select_compute_and_ref_subarray(vec!(RowAddress(0b1_000_000_000), RowAddress(0b1_000_010_000), RowAddress(0b111_000_000_000), RowAddress(0b10_100_000_000),)); + // assert_eq!(selected_subarray, 0b1_000_000_000); } #[ignore] From 4249916db940793c2dec138ee91b0a840ad2f749 Mon Sep 17 00:00:00 2001 From: alku662e Date: Sun, 20 Jul 2025 10:58:48 +0200 Subject: [PATCH 37/51] Lots of rewriting, eg changed type SubarrayId to struct SubarrayId. Still assignment of signals to subarrays is buggy when performing multiple NOTs --- rs/src/fc_dram/architecture.rs | 63 +++++++++++++++----- rs/src/fc_dram/compiler.rs | 102 +++++++++++++++++++++++++-------- 2 files changed, 128 insertions(+), 37 deletions(-) diff --git a/rs/src/fc_dram/architecture.rs b/rs/src/fc_dram/architecture.rs index 45fd26b..99cf796 100644 --- a/rs/src/fc_dram/architecture.rs +++ b/rs/src/fc_dram/architecture.rs @@ -16,10 +16,11 @@ pub const ROW_ID_BITMASK: u64 = 0b0_000_000_111_111_111; // 7 highest bits=subar // some utility functions pub fn subarrayid_to_subarray_address(subarray_id: SubarrayId) -> RowAddress { - RowAddress(subarray_id << ROWS_PER_SUBARRAY.ilog2()) // lower bits=rows in subarray + RowAddress(subarray_id.0 << ROWS_PER_SUBARRAY.ilog2()) // lower bits=rows in subarray } /// All Subarrays (except the ones at the edges) have two neighboring subarrays: one below (subarray_id+1) and one above (subarray_id-1) +/// - currently the following subarrays are used together for computations: 0&1,2&3,4&5,.. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, EnumIter)] pub enum NeighboringSubarrayRelPosition { /// `subarray_id-1` @@ -30,9 +31,9 @@ pub enum NeighboringSubarrayRelPosition { impl NeighboringSubarrayRelPosition { /// Get whether `subarray1` is above or below `relative_to` - pub fn get_relative_position(subarray: SubarrayId, relative_to: SubarrayId) -> Self { - assert!((subarray as isize - relative_to as isize).abs() == 1, "Given Arrays are not neighboring arrays"); - if subarray > relative_to { + pub fn get_relative_position(subarray: &SubarrayId, relative_to: &SubarrayId) -> Self { + assert!((subarray.0 as isize - relative_to.0 as isize).abs() == 1, "Given Arrays are not neighboring arrays"); + if subarray.0 > relative_to.0 { NeighboringSubarrayRelPosition::Below } else { NeighboringSubarrayRelPosition::Above @@ -100,7 +101,7 @@ pub static ARCHITECTURE: LazyLock = LazyLock::new(|| { // TODO: NEXT let get_distance_of_row_to_sense_amps = |row: RowAddress, subarray_rel_position: NeighboringSubarrayRelPosition| -> RowDistanceToSenseAmps { // NOTE: last & first subarrays only have sense-amps from one side - if (row.get_subarray_id() == NR_SUBARRAYS-1 && subarray_rel_position == NeighboringSubarrayRelPosition::Below) || (row.get_subarray_id() == 0 && subarray_rel_position == NeighboringSubarrayRelPosition::Above) { + if (row.get_subarray_id().0 == NR_SUBARRAYS-1 && subarray_rel_position == NeighboringSubarrayRelPosition::Below) || (row.get_subarray_id().0 == 0 && subarray_rel_position == NeighboringSubarrayRelPosition::Above) { panic!("Edge subarrays have sense-amps only connected from one side"); } @@ -158,24 +159,29 @@ pub static ARCHITECTURE: LazyLock = LazyLock::new(|| { /// - ! must be smaller than `rows_per_subarray * nr_subarrays` (this is NOT checked!) #[derive(Copy, Clone, PartialEq, Eq, Hash)] pub struct RowAddress(pub u64); -pub type SubarrayId = u64; impl RowAddress { /// Return subarray-id the row lies in pub fn get_subarray_id(&self) -> SubarrayId { - (self.0 & SUBARRAY_ID_BITMASK) >> ROWS_PER_SUBARRAY.ilog2() + SubarrayId((self.0 & SUBARRAY_ID_BITMASK) >> ROWS_PER_SUBARRAY.ilog2()) + } + + /// Converts RowAddress to the same row address but in the other subarray + pub fn local_rowaddress_to_subarray_id(&self, subarray_id: SubarrayId) -> RowAddress { + let local_row_address = self.0 & ROW_ID_BITMASK; + RowAddress( local_row_address | subarrayid_to_subarray_address(subarray_id).0 ) } } impl fmt::Display for RowAddress { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}.{}", self.get_subarray_id(), self.0 & ROW_ID_BITMASK) + write!(f, "{}.{}", self.get_subarray_id().0, self.0 & ROW_ID_BITMASK) } } impl fmt::Debug for RowAddress { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}.{}", self.get_subarray_id(), self.0 & ROW_ID_BITMASK) + write!(f, "{}.{}", self.get_subarray_id().0, self.0 & ROW_ID_BITMASK) } } @@ -185,6 +191,29 @@ impl From for RowAddress { } } +#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] +pub struct SubarrayId(pub u64); + +impl SubarrayId { + /// Currently all ops only work on half of the cells (every 2nd cell) with two subarrays being + /// in a compute/reference subarray "relation" with exactly one other neighboring subarray. + /// This function returns that other partner (compute/reference) subarray + pub fn get_partner_subarray(&self) -> Self { + if self.0 % 2 == 0 { + SubarrayId(self.0 + 1) + } else { + SubarrayId(self.0 - 1) + } + } +} + +impl fmt::Display for SubarrayId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + + // impl Display for Vec { // fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { // write!(f, "[")?; @@ -398,7 +427,7 @@ pub enum Instruction { impl Display for Instruction { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - let display_row = |row: &RowAddress| { format!("{}.{}", row.get_subarray_id(), row.0 & ROW_ID_BITMASK)}; // display subarray separately + let display_row = |row: &RowAddress| { format!("{}.{}", row.get_subarray_id().0, row.0 & ROW_ID_BITMASK)}; // display subarray separately // TODO: change string-representation to display subarray-id let description = match self { Instruction::FracOp(row) => format!("AP({})", display_row(row)), @@ -462,12 +491,12 @@ impl Instruction { let (src_array, dst_array) = (src.get_subarray_id(), dst.get_subarray_id()); let furthest_src_row = activated_rows.iter() - .map(|row| (ARCHITECTURE.get_distance_of_row_to_sense_amps)(*row, NeighboringSubarrayRelPosition::get_relative_position(src_array, dst_array))) // RowDistanceToSenseAmps::Far; // TODO: get this + .map(|row| (ARCHITECTURE.get_distance_of_row_to_sense_amps)(*row, NeighboringSubarrayRelPosition::get_relative_position(&src_array, &dst_array))) // RowDistanceToSenseAmps::Far; // TODO: get this .max() .expect("[ERR] Activated rows were empty"); // NOTE: SRA is assumed to activate the same row-addresses in both subarrays let closest_dst_row = activated_rows.iter() - .map(|row| (ARCHITECTURE.get_distance_of_row_to_sense_amps)(*row, NeighboringSubarrayRelPosition::get_relative_position(dst_array, src_array))) // RowDistanceToSenseAmps::Far; // TODO: get this + .map(|row| (ARCHITECTURE.get_distance_of_row_to_sense_amps)(*row, NeighboringSubarrayRelPosition::get_relative_position(&dst_array, &src_array))) // RowDistanceToSenseAmps::Far; // TODO: get this .min() .expect("[ERR] Activated rows were empty"); let total_success_rate = *success_rate_per_operandnr.get(&nr_operands).expect("[ERR] {nr_operands} not =1|2|4|8|16, the given SRA function seems to not comply with this core assumption.") @@ -643,6 +672,14 @@ impl TryFrom for SupportedNrOperands { } } +impl TryFrom for SupportedNrOperands { + type Error = (); + + fn try_from(value: usize) -> Result { + Self::try_from(value as u8) + } +} + /// Implements behavior of the RowDecoderCircuitry as described in [3] /// TODO: remove in favor of passing arbitrary closure to [`FCDRAMArchitecture::get_activated_rows_from_apa`] pub trait RowDecoder { @@ -666,6 +703,6 @@ mod tests { #[test] // mark function as test-fn fn test_sra() { - println!("{:?}", ARCHITECTURE.sra_degree_to_rowaddress_combinations.get(&SupportedNrOperands::try_from(8).unwrap()).unwrap().first()); + println!("{:?}", ARCHITECTURE.sra_degree_to_rowaddress_combinations.get(&SupportedNrOperands::try_from(8 as u8).unwrap()).unwrap().first()); } } diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs index 77bcdf3..2ceae95 100644 --- a/rs/src/fc_dram/compiler.rs +++ b/rs/src/fc_dram/compiler.rs @@ -17,7 +17,7 @@ use log::debug; use priority_queue::PriorityQueue; use strum::IntoEnumIterator; use toml::{Table, Value}; -use std::{cmp::Ordering, collections::{HashMap, HashSet}, env::consts::ARCH, ffi::CStr, fmt::Debug, fs, path::Path}; +use std::{cmp::Ordering, collections::{HashMap, HashSet}, env::consts::ARCH, ffi::CStr, fmt::Debug, fs, path::Path, vec}; /// Provides [`Compiler::compile()`] to compile a logic network into a [`Program`] pub struct Compiler { @@ -94,6 +94,7 @@ impl Compiler { self.comp_state.candidates.extend(new_candidates); } + todo!("NEXT"); // optimize(&mut program); // store output operand location so user can retrieve them after running the program @@ -173,7 +174,7 @@ impl Compiler { // place constants in EVERY subarray for subarray in 0..NR_SUBARRAYS { for constant in Self::CONSTANTS { - let next_free_row = self.comp_state.free_rows_per_subarray.get_mut(&subarray).and_then(|v| v.pop()).expect("No free rows in subarray {subarray} :("); + let next_free_row = self.comp_state.free_rows_per_subarray.get_mut(&SubarrayId(subarray)).and_then(|v| v.pop()).expect("No free rows in subarray {subarray} :("); self.comp_state.constant_values.insert(constant, next_free_row); self.comp_state.dram_state.insert(next_free_row, RowState { is_compute_row: false, live_value: None, constant: Some(constant)} ); } @@ -187,7 +188,7 @@ impl Compiler { fn place_inputs(&mut self, network: &impl NetworkWithBackwardEdges, program: &mut Program) { for input in network.leaves().collect::>() { - // check whether the signal is required in inverted or noninverted form and place it accordingly + // check whether the signal is required in inverted or noninverted form and place it accordingly in all subarrays where it is needed let original_signal = Signal::new(input, false); let inverted_signal = Signal::new(input, true); @@ -224,6 +225,8 @@ impl Compiler { .flat_map( |output| { let output_signal = Signal::new(output, false); let mut output_candidates = vec!(); + + // for every subarray in which the signal is needed for subarray in self.signal_to_subarrayids.get(&output_signal).expect("Signal is not mapped to a subarray yet??") { output_candidates.push(((output_signal,*subarray), self.compute_scheduling_prio_for_node(output_signal, *subarray, network))); } @@ -317,8 +320,8 @@ impl Compiler { let mut free_rows = (0..ROWS_PER_SUBARRAY).map(RowAddress::from).collect::>(); free_rows.retain(|r| {!compute_rows.contains(r)}); for subarray in 0..NR_SUBARRAYS { - let free_rows_in_subarray = free_rows.iter().map(|row| RowAddress(row.0 | subarrayid_to_subarray_address(subarray).0)).collect(); // transform local row address to row addresses in corresponding `subarray` - self.comp_state.free_rows_per_subarray.entry(subarray as SubarrayId).insert_entry(free_rows_in_subarray); + let free_rows_in_subarray = free_rows.iter().map(|row| row.local_rowaddress_to_subarray_id(SubarrayId(subarray))).collect(); // transform local row address to row addresses in corresponding `subarray` + self.comp_state.free_rows_per_subarray.entry(SubarrayId(subarray)).insert_entry(free_rows_in_subarray); } // 0.3 Group operands by subarray (ensure all operands are placed in the right subarray) @@ -334,7 +337,6 @@ impl Compiler { // 0.5 Setup: store all network-nodes yet to be compiled self.init_candidates(network); debug!("Initialized candidates {:?}", self.comp_state.candidates); - todo!("NEXT"); } /// Assigns signals to subarrays and through this determines placement of those signal in the DRAM module @@ -354,16 +356,24 @@ impl Compiler { // - TODO: continue graph traversal with src-operands of the outputs (until primary inputs are reached) let mut subarray_id = 1; // start with 1 since edge subarrays cant be used as compute subarrays for output in network.outputs() { - self.signal_to_subarrayids.insert(output, vec!(subarray_id)); // determine (virtual) subarray in which output will reside + self.signal_to_subarrayids.insert(output, vec!(SubarrayId(subarray_id))); // determine (virtual) subarray in which output will reside let (noninverted_src_signals, inverted_src_signals) = self.get_all_noninverted_src_signals(output, network); println!("{:?}", noninverted_src_signals.clone()); // all directly (might in theory) reside in the same subarray as `output` (since no NOTS are inbtw which locate them to a neighboring subarray) for connected_signal in noninverted_src_signals { - self.signal_to_subarrayids.entry(connected_signal).or_default().push(subarray_id); // determine (virtual) subarray in which output will reside + self.signal_to_subarrayids.entry(connected_signal).or_default().push(SubarrayId(subarray_id)); // determine (virtual) subarray in which output will reside } - // place inverted signals in neighboring subarray + // inverted signals need to be inverted and hence their non-inverted value is placed in the original subarray (from which it will be inverted later on into the neighboring subarray) + for inverted_signal in inverted_src_signals.as_slice() { + let uninverted_signal = Signal::new(inverted_signal.node_id(), false); + debug!("Placing Signal: {uninverted_signal:?} in {subarray_id} "); + self.signal_to_subarrayids.entry(uninverted_signal).or_default().push(SubarrayId(subarray_id)); // determine (virtual) subarray in which output will reside + } + // place inverted signals in neighboring subarray, 2x inverted signals in same subarray, 3x inverted signals in neighboring subarray etc... + // TODO: same thing as before: place non-inverted version of inverted signals in opposite subarray ! + // !!! doesnt support >=2 NOTs on one path yet !!! let neighboring_subarray = subarray_id - 1; // place signals that are inverted odd number of times in Above subarray (arbitrary decision, epxloring whether this makes a difference might be explored in future) let mut unvisited_signals_in_same_subarray: Vec = vec!(); // inverting even nr of times leads to signals being placed in same subarray let mut unvisited_signals_in_neighboring_subarray = inverted_src_signals; @@ -372,11 +382,11 @@ impl Compiler { // println!("Neighboring subarray: {:?}", unvisited_signals_in_neighboring_subarray); if let Some(signal_neighboring_subarray) = unvisited_signals_in_neighboring_subarray.pop() { - self.signal_to_subarrayids.entry(signal_neighboring_subarray).or_default().push(neighboring_subarray); + self.signal_to_subarrayids.entry(signal_neighboring_subarray).or_default().push(SubarrayId(neighboring_subarray)); // these are placed in the Above subarray (arbitrary decision, epxloring whether this makes a difference might be explored in future) let (signals_neighboring_subarray_of_output, mut signals_same_subarray_as_output) = self.get_all_noninverted_src_signals(signal_neighboring_subarray, network); for inverted_signal in signals_neighboring_subarray_of_output { - self.signal_to_subarrayids.entry(inverted_signal).or_default().push(neighboring_subarray); + self.signal_to_subarrayids.entry(inverted_signal).or_default().push(SubarrayId(neighboring_subarray)); } unvisited_signals_in_same_subarray.append(&mut signals_same_subarray_as_output); @@ -384,11 +394,11 @@ impl Compiler { if let Some(signal_same_subarray) = unvisited_signals_in_same_subarray.pop() { - self.signal_to_subarrayids.entry(signal_same_subarray).or_default().push(subarray_id); + self.signal_to_subarrayids.entry(signal_same_subarray).or_default().push(SubarrayId(subarray_id)); // signals inverted even nr of times are placed in the same subarray as the `output` Signal let (signals_same_subarray_of_output, mut signals_neighboring_subarray_as_output) = self.get_all_noninverted_src_signals(signal_same_subarray, network); for signal in signals_same_subarray_of_output { - self.signal_to_subarrayids.entry(signal).or_default().push(subarray_id); + self.signal_to_subarrayids.entry(signal).or_default().push(SubarrayId(subarray_id)); } unvisited_signals_in_neighboring_subarray.append(&mut signals_neighboring_subarray_as_output); @@ -400,7 +410,7 @@ impl Compiler { } - debug!("{:?}", self.signal_to_subarrayids); + debug!("Signals to subarrayids: {:?}", self.signal_to_subarrayids); } /// Returns all src signals which are not inverted. These are exactly those signals that can be placed in the same subarray as. @@ -504,8 +514,32 @@ impl Compiler { let row_combi_correct_subarray = todo!(); // REMINDER: rows returned `compute_row_activations` are not yet adjusted for the right subarray } - /// Returns the instructions needed to perform `language_op` - fn execute_and_or(&self, language_op: Aoig) -> Vec { + /// Returns the instructions needed to perform `language_op` placing the result in a free row in the `compute_subarray` + /// - TODO: also store negated signal (in reference subarray) if it's needed later on? + fn execute_and_or(&mut self, node_id: Id, compute_subarray: SubarrayId, network: &impl NetworkWithBackwardEdges) -> Vec { + let mut instructions = vec!(); + + let (reference_subarray, rel_position_sense_amps_to_compute_subarray) = if compute_subarray.0 % 2 == 0 { // currently the following arrays are compute&reference subarrays of each other: 0&1,2&3,4&5,... + (compute_subarray.0 + 1, NeighboringSubarrayRelPosition::Below) + } else { + (compute_subarray.0 - 1, NeighboringSubarrayRelPosition::Above) + }; + + + let nr_src_operands = SupportedNrOperands::try_from(network.node(node_id).inputs().len()).unwrap(); + let compute_row_combi= self.compute_row_activations.get(&(nr_src_operands, rel_position_sense_amps_to_compute_subarray)).expect("Nr input operands is {}"); + let compute_rows = ARCHITECTURE.precomputed_simultaneous_row_activations.get(compute_row_combi).unwrap(); + + // 0. Move all src_operands into compute rows + // TODO: map compute rows to right subarray + for (&compute_row, &src_signal) in compute_rows.iter().zip(network.node(node_id).inputs()) { + let compute_row = compute_row.local_rowaddress_to_subarray_id(compute_subarray); + let &src_row = self.comp_state.value_states.get(&(src_signal, compute_subarray)).unwrap_or_else(|| panic!("Src signal {src_signal:?} is not present in compute subarray {compute_subarray} ???")); + instructions.push(self.execute_intrasubarray_rowclone(src_row, compute_row)); + } + + + let language_op = network.node(node_id); let logic_op = match language_op { // REMINDER: operand-nr is extracted by looking at nr of children beforehand Aoig::And(_) | Aoig::And4(_) | Aoig::And8(_)| Aoig::And16(_) => LogicOp::AND, @@ -513,7 +547,22 @@ impl Compiler { _ => panic!("candidate is expected to be a logic op"), }; - todo!(); + // 1. Initialize reference subarray + let ref_rows = compute_rows.iter().map(|c| c.local_rowaddress_to_subarray_id(SubarrayId(reference_subarray))).collect(); + instructions.append(&mut self.init_reference_subarray(ref_rows, logic_op)); + + // 2. Execute actual computation + instructions.push(Instruction::ApaAndOr(compute_row_combi.0.local_rowaddress_to_subarray_id(compute_subarray), compute_row_combi.1.local_rowaddress_to_subarray_id(SubarrayId(reference_subarray)))); + + // 3. Move result into non-compute row + let free_row = self.comp_state.free_rows_per_subarray.get_mut(&compute_subarray).and_then(|v| v.pop()).unwrap_or_else(|| panic!("No more free rows in subarray {compute_subarray}")); + self.comp_state.value_states.insert((Signal::new(node_id, false), compute_subarray), free_row); // TODO: for inverted signals not in result_subarray, right? + + instructions + } + + fn execute_intrasubarray_rowclone(&self, src_row: RowAddress, dst_row: RowAddress) -> Instruction { + Instruction::RowCloneFPM(src_row, dst_row, String::from("Move into compute row")) } /// Returns Instructions to execute given `next_candidate` (which is a signal which needs to reside in a specific subarray after performing the execution) @@ -521,8 +570,8 @@ impl Compiler { /// /// TODO: NEXT fn execute_next_instruction(&mut self, next_candidate: &(Signal, SubarrayId), network: &impl NetworkWithBackwardEdges) -> Vec { - let (next_signal, result_subarray) = next_candidate; - let node_id = next_signal.node_id(); + let (signal, result_subarray) = next_candidate; + let node_id = signal.node_id(); // quick validity check: ensure all inputs are already present in the required array // assert!(network.node(node_id).inputs().iter().all(|input| { @@ -531,14 +580,19 @@ impl Compiler { let mut next_instructions = vec!(); // 1. Perform actual operation of the node - let language_op = network.node(node_id); - next_instructions.append(&mut self.execute_and_or(language_op)); + + let compute_subarray = if signal.is_inverted() { result_subarray.get_partner_subarray() } else { *result_subarray }; // for inverted signals first compute the noninverted signal in the other subarray + + println!("EXECUTING {:?}", next_candidate); + next_instructions.append(&mut self.execute_and_or(node_id, compute_subarray, network)); + todo!("NEXT"); + // 2. Negate the result (if needed) - if next_signal.is_inverted() { - let mut negate_instructions = self.execute_not(next_signal); + if signal.is_inverted() { + let mut negate_instructions = self.execute_not(signal); next_instructions.append(&mut negate_instructions); } - todo!(); + next_instructions // // debug!("Executing candidate {:?}", next_candidate); From d80c112eb406ea501e84bb6278de3456a9ddc264 Mon Sep 17 00:00:00 2001 From: alku662e Date: Sun, 20 Jul 2025 13:25:46 +0200 Subject: [PATCH 38/51] Still on my way... . But executing a single AND/OR followed by a NOT works. --- rs/src/fc_dram/compiler.rs | 95 +++++++++++++++++++++++++++++--------- 1 file changed, 73 insertions(+), 22 deletions(-) diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs index 2ceae95..db9efc6 100644 --- a/rs/src/fc_dram/compiler.rs +++ b/rs/src/fc_dram/compiler.rs @@ -94,7 +94,7 @@ impl Compiler { self.comp_state.candidates.extend(new_candidates); } - todo!("NEXT"); + debug!("Instructions: {:?}", program.instructions); // optimize(&mut program); // store output operand location so user can retrieve them after running the program @@ -357,51 +357,88 @@ impl Compiler { let mut subarray_id = 1; // start with 1 since edge subarrays cant be used as compute subarrays for output in network.outputs() { self.signal_to_subarrayids.insert(output, vec!(SubarrayId(subarray_id))); // determine (virtual) subarray in which output will reside + let neighboring_subarray = SubarrayId(subarray_id).get_partner_subarray(); + let (actual_subarray , neighboring_subarray) = { + + // if output is inverted, then the non-inverted value resides in the partner subarray + if output.is_inverted() { + self.signal_to_subarrayids.insert(Signal::new(output.node_id(), false), vec!(neighboring_subarray)); // determine (virtual) subarray in which output will reside + (neighboring_subarray, SubarrayId(subarray_id)) + } else { + (SubarrayId(subarray_id), neighboring_subarray) + } + }; + let (noninverted_src_signals, inverted_src_signals) = self.get_all_noninverted_src_signals(output, network); - println!("{:?}", noninverted_src_signals.clone()); + println!("Noninverted src signals: {:?}", noninverted_src_signals.clone()); // all directly (might in theory) reside in the same subarray as `output` (since no NOTS are inbtw which locate them to a neighboring subarray) for connected_signal in noninverted_src_signals { - self.signal_to_subarrayids.entry(connected_signal).or_default().push(SubarrayId(subarray_id)); // determine (virtual) subarray in which output will reside + self.signal_to_subarrayids.entry(connected_signal).or_default().push(actual_subarray); // determine (virtual) subarray in which output will reside } - // inverted signals need to be inverted and hence their non-inverted value is placed in the original subarray (from which it will be inverted later on into the neighboring subarray) + // Place direct inputs that ARE inverted in same subarray (while their non-inverted version will end up in the neighboring subarray) for inverted_signal in inverted_src_signals.as_slice() { - let uninverted_signal = Signal::new(inverted_signal.node_id(), false); - debug!("Placing Signal: {uninverted_signal:?} in {subarray_id} "); - self.signal_to_subarrayids.entry(uninverted_signal).or_default().push(SubarrayId(subarray_id)); // determine (virtual) subarray in which output will reside + self.signal_to_subarrayids.entry(*inverted_signal).or_default().push(actual_subarray); // determine (virtual) subarray in which output will reside } // place inverted signals in neighboring subarray, 2x inverted signals in same subarray, 3x inverted signals in neighboring subarray etc... // TODO: same thing as before: place non-inverted version of inverted signals in opposite subarray ! // !!! doesnt support >=2 NOTs on one path yet !!! - let neighboring_subarray = subarray_id - 1; // place signals that are inverted odd number of times in Above subarray (arbitrary decision, epxloring whether this makes a difference might be explored in future) let mut unvisited_signals_in_same_subarray: Vec = vec!(); // inverting even nr of times leads to signals being placed in same subarray - let mut unvisited_signals_in_neighboring_subarray = inverted_src_signals; + let mut unvisited_signals_in_neighboring_subarray: Vec = inverted_src_signals.iter() + .filter(|signal| !network.node(signal.node_id()).is_leaf() ) // leaves don't need to be placed in neighboring subarray since inputs are placed by user + .map(|signal| Signal::new(signal.node_id(), false)).collect(); // before negation the signals are in the neighboring subarray while !unvisited_signals_in_same_subarray.is_empty() || !unvisited_signals_in_neighboring_subarray.is_empty() { // println!("Same subarray: {:?}", unvisited_signals_in_same_subarray); // println!("Neighboring subarray: {:?}", unvisited_signals_in_neighboring_subarray); if let Some(signal_neighboring_subarray) = unvisited_signals_in_neighboring_subarray.pop() { - self.signal_to_subarrayids.entry(signal_neighboring_subarray).or_default().push(SubarrayId(neighboring_subarray)); + self.signal_to_subarrayids.entry(signal_neighboring_subarray).or_default().push(neighboring_subarray); // these are placed in the Above subarray (arbitrary decision, epxloring whether this makes a difference might be explored in future) - let (signals_neighboring_subarray_of_output, mut signals_same_subarray_as_output) = self.get_all_noninverted_src_signals(signal_neighboring_subarray, network); + // NOTE: signals that are inverted an even nr of times are placed in the same subarray as the output + let (signals_neighboring_subarray_of_output, signals_inverted_even_nr_times) = self.get_all_noninverted_src_signals(signal_neighboring_subarray, network); for inverted_signal in signals_neighboring_subarray_of_output { - self.signal_to_subarrayids.entry(inverted_signal).or_default().push(SubarrayId(neighboring_subarray)); + self.signal_to_subarrayids.entry(inverted_signal).or_default().push(neighboring_subarray); } - unvisited_signals_in_same_subarray.append(&mut signals_same_subarray_as_output); + // signals which are inverted again require the non-inverted version to be in the other subarray + let mut signals_to_invert_once_more: Vec = signals_inverted_even_nr_times.into_iter().filter(|signal| { + if network.node(signal.node_id()).is_leaf() { + // is input signal + self.signal_to_subarrayids.entry(*signal).or_default().push(neighboring_subarray); + false + } else { true } + }).collect(); // inputs are placed by user (also inverted ones) + for even_times_inverted_signals in signals_to_invert_once_more.as_slice() { + let signal = Signal::new(even_times_inverted_signals.node_id(), false); + self.signal_to_subarrayids.entry(signal).or_default().push(actual_subarray); + } + unvisited_signals_in_same_subarray.append(&mut signals_to_invert_once_more); } if let Some(signal_same_subarray) = unvisited_signals_in_same_subarray.pop() { - self.signal_to_subarrayids.entry(signal_same_subarray).or_default().push(SubarrayId(subarray_id)); + self.signal_to_subarrayids.entry(signal_same_subarray).or_default().push(actual_subarray); // signals inverted even nr of times are placed in the same subarray as the `output` Signal - let (signals_same_subarray_of_output, mut signals_neighboring_subarray_as_output) = self.get_all_noninverted_src_signals(signal_same_subarray, network); + // NOTE: signals that are inverted an odd nr of times are placed in the neighboring subarray of the output + let (signals_same_subarray_of_output, signals_inverted_even_nr_times) = self.get_all_noninverted_src_signals(signal_same_subarray, network); for signal in signals_same_subarray_of_output { - self.signal_to_subarrayids.entry(signal).or_default().push(SubarrayId(subarray_id)); + self.signal_to_subarrayids.entry(signal).or_default().push(actual_subarray); } - unvisited_signals_in_neighboring_subarray.append(&mut signals_neighboring_subarray_as_output); + // signals which are inverted again require the non-inverted version to be in the other subarray + let mut signals_to_invert_once_more: Vec = signals_inverted_even_nr_times.into_iter().filter(|signal| { + if network.node(signal.node_id()).is_leaf() { + // is input signal + self.signal_to_subarrayids.entry(*signal).or_default().push(neighboring_subarray); + false + } else { true } + }).collect(); // inputs are placed by user (also inverted ones) + for even_times_inverted_signals in signals_to_invert_once_more.as_slice() { + let signal = Signal::new(even_times_inverted_signals.node_id(), false); + self.signal_to_subarrayids.entry(signal).or_default().push(neighboring_subarray); + } + unvisited_signals_in_neighboring_subarray.append(&mut signals_to_invert_once_more); } } @@ -507,11 +544,26 @@ impl Compiler { /// - NOTE: currenlty only single-operand NOTs are supported bc /// 1) more operands lead to (slightly) worse results (see Figure10 in [1]) /// 2) since there are separate compute rows using multiple dst rows doesn't make sense (the values need to be copied out of the dst-rows anyway into non-compute rows) - fn execute_not(&self, signal_to_invert: &Signal) -> Vec { + fn execute_not(&mut self, signal_to_invert: &Signal, dst_array: SubarrayId) -> Vec { + let mut instructions = vec!(); let row_combi = self.compute_row_activations.get(&(SupportedNrOperands::One, NeighboringSubarrayRelPosition::Above)).unwrap(); - // perform `NOT` + let src_array = dst_array.get_partner_subarray(); + + // 1. Copy non-inverted operand into src-row + let src_row = row_combi.0.local_rowaddress_to_subarray_id(src_array); + let src_location = self.comp_state.value_states.get(&(*signal_to_invert, src_array)).unwrap_or_else(|| panic!("Src operand {src_row} is not live in subarray {src_array}??")); + + instructions.push(self.execute_intrasubarray_rowclone(*src_location, src_row)); - let row_combi_correct_subarray = todo!(); // REMINDER: rows returned `compute_row_activations` are not yet adjusted for the right subarray + // 2. Execute NOT + let dst_row = row_combi.1.local_rowaddress_to_subarray_id(dst_array); + instructions.push(Instruction::ApaNOT(src_row, dst_row)); + + // 3. Copy negated value out of compute rows + let free_row = self.comp_state.free_rows_per_subarray.get_mut(&dst_array).and_then(|v| v.pop()).unwrap_or_else(|| panic!("OOM: No free rows in subarray {dst_array}")); + instructions.push(self.execute_intrasubarray_rowclone(dst_row, free_row)); + + instructions } /// Returns the instructions needed to perform `language_op` placing the result in a free row in the `compute_subarray` @@ -585,11 +637,10 @@ impl Compiler { println!("EXECUTING {:?}", next_candidate); next_instructions.append(&mut self.execute_and_or(node_id, compute_subarray, network)); - todo!("NEXT"); // 2. Negate the result (if needed) if signal.is_inverted() { - let mut negate_instructions = self.execute_not(signal); + let mut negate_instructions = self.execute_not(signal, *result_subarray); next_instructions.append(&mut negate_instructions); } next_instructions From 6f6c2b97139a805a726add8ca4cda39ee19b5bdc Mon Sep 17 00:00:00 2001 From: alku662e Date: Sun, 20 Jul 2025 14:14:38 +0200 Subject: [PATCH 39/51] IT FINALLY WORKS!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!1 --- rs/src/fc_dram/compiler.rs | 52 ++++++++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 13 deletions(-) diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs index db9efc6..e1a33a2 100644 --- a/rs/src/fc_dram/compiler.rs +++ b/rs/src/fc_dram/compiler.rs @@ -96,6 +96,7 @@ impl Compiler { debug!("Instructions: {:?}", program.instructions); // optimize(&mut program); + debug!("{:?}", self.comp_state.value_states); // store output operand location so user can retrieve them after running the program let outputs = network.outputs(); @@ -230,6 +231,16 @@ impl Compiler { for subarray in self.signal_to_subarrayids.get(&output_signal).expect("Signal is not mapped to a subarray yet??") { output_candidates.push(((output_signal,*subarray), self.compute_scheduling_prio_for_node(output_signal, *subarray, network))); } + + // if negation is also needed + let inverted_output_signal = Signal::new(output, true); + if self.signal_to_subarrayids.contains_key(&inverted_output_signal) { + // for every subarray in which the signal is needed + for subarray in self.signal_to_subarrayids.get(&inverted_output_signal).expect("Signal is not mapped to a subarray yet??") { + output_candidates.push(((inverted_output_signal,*subarray), self.compute_scheduling_prio_for_node(inverted_output_signal, *subarray, network))); + } + } + output_candidates }) .collect(); @@ -247,11 +258,21 @@ impl Compiler { // filter for new nodes that have all their input-operands available now in the same subarray (->only inputs of computed nodes could have changed to candidate-state, other nodes remain uneffected) .filter({|out| network.node(*out).inputs().iter() .all( |input| { + debug!("OUTPUT: {out:?} with input {input:?}"); debug!("Out: {:?}, In: {:?}", out, input); self.comp_state.value_states.contains_key(&(*input, subarray)) }) }) - .map(|id| ((Signal::new(id, false), subarray), self.compute_scheduling_prio_for_node(Signal::new(id, false), subarray, network))) // TODO: check if inverted signal is required as well! + .flat_map(|id| { + let noninverted_signal = Signal::new(id, false); + let mut new_candidates = vec!(((noninverted_signal, subarray), self.compute_scheduling_prio_for_node(noninverted_signal, subarray, network))); // noninverted version needs to be computed anyway + let inverted_signal = Signal::new(id, true); + // if needed also schedule `inverted_signal` for computation + if self.signal_to_subarrayids.contains_key(&inverted_signal) { + new_candidates.push(((inverted_signal, subarray.get_partner_subarray()), self.compute_scheduling_prio_for_node(noninverted_signal, subarray, network))); + } + new_candidates.into_iter().collect::>() + }) // TODO: check if inverted signal is required as well! .collect() } @@ -356,7 +377,7 @@ impl Compiler { // - TODO: continue graph traversal with src-operands of the outputs (until primary inputs are reached) let mut subarray_id = 1; // start with 1 since edge subarrays cant be used as compute subarrays for output in network.outputs() { - self.signal_to_subarrayids.insert(output, vec!(SubarrayId(subarray_id))); // determine (virtual) subarray in which output will reside + self.signal_to_subarrayids.insert(output, vec!(SubarrayId(subarray_id))); // determine (virtual) subarray in which output will reside - assignment might change in a later stage by merging several outputs into same subarray if there is enough space let neighboring_subarray = SubarrayId(subarray_id).get_partner_subarray(); let (actual_subarray , neighboring_subarray) = { @@ -369,7 +390,7 @@ impl Compiler { } }; - let (noninverted_src_signals, inverted_src_signals) = self.get_all_noninverted_src_signals(output, network); + let (noninverted_src_signals, inverted_src_signals) = self.get_all_noninverted_and_inverted_src_signals(output, network); println!("Noninverted src signals: {:?}", noninverted_src_signals.clone()); // all directly (might in theory) reside in the same subarray as `output` (since no NOTS are inbtw which locate them to a neighboring subarray) @@ -385,20 +406,22 @@ impl Compiler { // TODO: same thing as before: place non-inverted version of inverted signals in opposite subarray ! // !!! doesnt support >=2 NOTs on one path yet !!! let mut unvisited_signals_in_same_subarray: Vec = vec!(); // inverting even nr of times leads to signals being placed in same subarray - let mut unvisited_signals_in_neighboring_subarray: Vec = inverted_src_signals.iter() + let mut unvisited_signals_in_neighboring_subarray: Vec = inverted_src_signals.iter() // =those signals that are negated an odd nr of times .filter(|signal| !network.node(signal.node_id()).is_leaf() ) // leaves don't need to be placed in neighboring subarray since inputs are placed by user .map(|signal| Signal::new(signal.node_id(), false)).collect(); // before negation the signals are in the neighboring subarray while !unvisited_signals_in_same_subarray.is_empty() || !unvisited_signals_in_neighboring_subarray.is_empty() { - // println!("Same subarray: {:?}", unvisited_signals_in_same_subarray); - // println!("Neighboring subarray: {:?}", unvisited_signals_in_neighboring_subarray); + println!("Same subarray: {:?}", unvisited_signals_in_same_subarray); + println!("Neighboring subarray: {:?}", unvisited_signals_in_neighboring_subarray); if let Some(signal_neighboring_subarray) = unvisited_signals_in_neighboring_subarray.pop() { + debug!("Neighboring: {signal_neighboring_subarray:?}"); self.signal_to_subarrayids.entry(signal_neighboring_subarray).or_default().push(neighboring_subarray); // these are placed in the Above subarray (arbitrary decision, epxloring whether this makes a difference might be explored in future) // NOTE: signals that are inverted an even nr of times are placed in the same subarray as the output - let (signals_neighboring_subarray_of_output, signals_inverted_even_nr_times) = self.get_all_noninverted_src_signals(signal_neighboring_subarray, network); - for inverted_signal in signals_neighboring_subarray_of_output { - self.signal_to_subarrayids.entry(inverted_signal).or_default().push(neighboring_subarray); + let (signals_neighboring_subarray, signals_inverted_even_nr_times) = self.get_all_noninverted_and_inverted_src_signals(signal_neighboring_subarray, network); + for signal_inverted_odd_nr_times in signals_neighboring_subarray { + debug!("ODD: {signal_inverted_odd_nr_times:?} placed in subarray {neighboring_subarray}"); + self.signal_to_subarrayids.entry(signal_inverted_odd_nr_times).or_default().push(neighboring_subarray); } // signals which are inverted again require the non-inverted version to be in the other subarray @@ -418,10 +441,11 @@ impl Compiler { if let Some(signal_same_subarray) = unvisited_signals_in_same_subarray.pop() { + debug!("Same: {signal_same_subarray:?}"); self.signal_to_subarrayids.entry(signal_same_subarray).or_default().push(actual_subarray); // signals inverted even nr of times are placed in the same subarray as the `output` Signal // NOTE: signals that are inverted an odd nr of times are placed in the neighboring subarray of the output - let (signals_same_subarray_of_output, signals_inverted_even_nr_times) = self.get_all_noninverted_src_signals(signal_same_subarray, network); + let (signals_same_subarray_of_output, signals_inverted_even_nr_times) = self.get_all_noninverted_and_inverted_src_signals(signal_same_subarray, network); for signal in signals_same_subarray_of_output { self.signal_to_subarrayids.entry(signal).or_default().push(actual_subarray); } @@ -457,7 +481,7 @@ impl Compiler { /// Tuple of /// 1. Vector of src Signals that are **not** inverted /// 2. Vector of src Signals that are indeed inverted (need to be processed further, only first inverted signal is returned for a subtree) - fn get_all_noninverted_src_signals(&mut self, signal: Signal, network: &impl NetworkWithBackwardEdges) -> (Vec, Vec) { + fn get_all_noninverted_and_inverted_src_signals(&mut self, signal: Signal, network: &impl NetworkWithBackwardEdges) -> (Vec, Vec) { let signal_node = network.node(signal.node_id()); let mut noninverted_src_signals = vec!(); @@ -540,7 +564,7 @@ impl Compiler { instructions } - /// Returns instructions to be executed for performing `NOT` on `src_row` into `dst_row` + /// Returns instructions to be executed for performing `NOT` on `src_row` into `dst_row` and updates the `comp_state` holding the negated value /// - NOTE: currenlty only single-operand NOTs are supported bc /// 1) more operands lead to (slightly) worse results (see Figure10 in [1]) /// 2) since there are separate compute rows using multiple dst rows doesn't make sense (the values need to be copied out of the dst-rows anyway into non-compute rows) @@ -551,7 +575,8 @@ impl Compiler { // 1. Copy non-inverted operand into src-row let src_row = row_combi.0.local_rowaddress_to_subarray_id(src_array); - let src_location = self.comp_state.value_states.get(&(*signal_to_invert, src_array)).unwrap_or_else(|| panic!("Src operand {src_row} is not live in subarray {src_array}??")); + let unnegated_signal = Signal::new(signal_to_invert.node_id(), false); + let src_location = self.comp_state.value_states.get(&(unnegated_signal, src_array)).unwrap_or_else(|| panic!("Src operand {src_row} is not live in subarray {src_array} (see {signal_to_invert:?})??")); instructions.push(self.execute_intrasubarray_rowclone(*src_location, src_row)); @@ -562,6 +587,7 @@ impl Compiler { // 3. Copy negated value out of compute rows let free_row = self.comp_state.free_rows_per_subarray.get_mut(&dst_array).and_then(|v| v.pop()).unwrap_or_else(|| panic!("OOM: No free rows in subarray {dst_array}")); instructions.push(self.execute_intrasubarray_rowclone(dst_row, free_row)); + self.comp_state.value_states.insert((*signal_to_invert, dst_array), free_row); instructions } From e59189bf41c72b47dc1e401c8b2ae5f23127fdb1 Mon Sep 17 00:00:00 2001 From: alku662e Date: Sat, 26 Jul 2025 12:42:43 +0200 Subject: [PATCH 40/51] Add missing rewrite rules --- rs/src/fc_dram/architecture.rs | 3 ++- rs/src/fc_dram/mod.rs | 35 +++++++++++++++++----------------- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/rs/src/fc_dram/architecture.rs b/rs/src/fc_dram/architecture.rs index 99cf796..3d0adbe 100644 --- a/rs/src/fc_dram/architecture.rs +++ b/rs/src/fc_dram/architecture.rs @@ -300,6 +300,7 @@ pub struct FCDRAMArchitecture { get_activated_rows_from_apa: fn(RowAddress, RowAddress) -> Vec, /// Stores which rows are simultaneously activated for each combination of Row-Addresses (provided to `APA`-operation) /// - REASON: getting the simultaneously activated will probably be requested very frequently (time-space tradeoff, rather than recomputing on every request)) + /// - REMEMBER: set `subarrayid` of passed row-addresses to 0 (activated rows are precomputed exemplary for RowAddresses in subarray=0 since activated rows do not depend on corresponding subarrays) pub precomputed_simultaneous_row_activations: HashMap<(RowAddress, RowAddress), Vec>, /// Map degree of SRA (=nr of activated rows by that SRA) to all combinations of RowAddresses which have that degree of SRA /// - use to eg restrict the choice of row-addresses for n-ary AND/OR (eg 4-ary AND -> at least activate 8 rows; more rows could be activated when using input replication) @@ -432,7 +433,7 @@ impl Display for Instruction { let description = match self { Instruction::FracOp(row) => format!("AP({})", display_row(row)), Instruction::ApaNOT(row1,row2) => format!("APA_NOT({},{})", display_row(row1), display_row(row2)), - Instruction::ApaAndOr(row1,row2) => format!("APA_AND_OR({},{}) // activates {:?}", display_row(row1), display_row(row2), ARCHITECTURE.precomputed_simultaneous_row_activations.get(&(*row1,*row2))), + Instruction::ApaAndOr(row1,row2) => format!("APA_AND_OR({},{}) // activates {:?}", display_row(row1), display_row(row2), ARCHITECTURE.precomputed_simultaneous_row_activations.get(&(row1.local_rowaddress_to_subarray_id(SubarrayId(0)),row2.local_rowaddress_to_subarray_id(SubarrayId(0))))), Instruction::RowCloneFPM(row1, row2, comment) => format!("AA({},{}) // {}", display_row(row1), display_row(row2), comment), Instruction::RowClonePSM(row1, row2) => format!(" TRANSFER({},(rowX)) diff --git a/rs/src/fc_dram/mod.rs b/rs/src/fc_dram/mod.rs index 610a9dd..5c4f9ae 100644 --- a/rs/src/fc_dram/mod.rs +++ b/rs/src/fc_dram/mod.rs @@ -24,7 +24,6 @@ pub mod optimization; pub mod program; pub mod utils; -use std::ffi::{CStr, OsStr}; use std::sync::LazyLock; use std::time::Instant; @@ -56,13 +55,15 @@ static REWRITE_RULES: LazyLock>> = LazyLock::new(| rewrite!("or-and-more-not"; "(or ?a ?b)" => "(! (and (! ?a) (! ?b)))"), // (De-Morgan) ! not checked whether this works rewrite!("and-same"; "(and ?a ?a)" => "?a"), rewrite!("not_not"; "(! (! ?a))" => "?a"), + + // in general more operands are better for AND/OR (see [1]) rewrite!("and2_to_4"; "(and (and ?a ?b) (and ?c ?d))" => "(and4 ?a ?b ?c ?d)"), - // TODO: - // rewrite!("and4_to_8"; "(and (and ?a ?b) (and ?c ?d))" => "(and ?a ?b ?c ?d)"), - // rewrite!("and8_to_16"; "(and (and ?a ?b) (and ?c ?d))" => "(and ?a ?b ?c ?d)"), - // rewrite!("maj_1"; "(maj ?a ?a ?b)" => "?a"), - // rewrite!("maj_2"; "(maj ?a (! ?a) ?b)" => "?b"), - // rewrite!("associativity"; "(maj ?a ?b (maj ?c ?b ?d))" => "(maj ?d ?b (maj ?c ?b ?a))"), + rewrite!("and4_to_8"; "(and (and4 ?a ?b ?c ?d) (and4 ?e ?f ?g ?h))" => "(and8 ?a ?b ?c ?d ?e ?f ?g ?h)"), + rewrite!("and8_to_16"; "(and (and8 ?a ?b ?c ?d ?e ?f ?g ?h) (and8 ?i ?j ?k ?l ?m ?n ?o ?p))" => "(and16 ?a ?b ?c ?d ?e ?f ?g ?h ?i ?j ?k ?l ?m ?n ?o ?p)"), + rewrite!("or2_to_4"; "(or (or ?a ?b) (or ?c ?d))" => "(or4 ?a ?b ?c ?d)"), + rewrite!("or4_to_8"; "(or (or4 ?a ?b ?c ?d) (or4 ?e ?f ?g ?h))" => "(or8 ?a ?b ?c ?d ?e ?f ?g ?h)"), + rewrite!("or8_to_16"; "(or (or8 ?a ?b ?c ?d ?e ?f ?g ?h) (or8 ?i ?j ?k ?l ?m ?n ?o ?p))" => "(or16 ?a ?b ?c ?d ?e ?f ?g ?h ?i ?j ?k ?l ?m ?n ?o ?p)"), + // TODO: no use for NOT with multiple dsts ]; // rules.extend(rewrite!("invert"; "(! (maj ?a ?b ?c))" <=> "(maj (! ?a) (! ?b) (! ?c))")); // rules.extend(rewrite!("distributivity"; "(maj ?a ?b (maj ?c ?d ?e))" <=> "(maj (maj ?a ?b ?c) (maj ?a ?b ?d) ?e)")); @@ -106,16 +107,16 @@ fn compiling_receiver<'a>( ); // 1. Create E-Graph: run equivalence saturation debug!("Running equivalence saturation..."); - // let runner = measure_time!( - // Runner::default().with_egraph(graph).run(rules), "t_runner", settings.print_compilation_stats - // ); - // - // if settings.verbose { - // println!("== Runner Report"); - // runner.print_report(); - // } - // - // let graph = runner.egraph; + let runner = measure_time!( + Runner::default().with_egraph(graph).run(rules), "t_runner", settings.print_compilation_stats + ); + + if settings.verbose { + println!("== Runner Report"); + runner.print_report(); + } + + let graph = runner.egraph; CompilerOutput::new( graph, From e1b88c3c3d4943749d81aa57c80f77a746f52e05 Mon Sep 17 00:00:00 2001 From: alku662e Date: Sat, 26 Jul 2025 14:23:55 +0200 Subject: [PATCH 41/51] Finish E-Graph Rewriting rules and corresponding cost metric --- rs/src/fc_dram/cost_estimation.rs | 37 +++++++++++++++---------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/rs/src/fc_dram/cost_estimation.rs b/rs/src/fc_dram/cost_estimation.rs index a2ea27b..859a3ad 100644 --- a/rs/src/fc_dram/cost_estimation.rs +++ b/rs/src/fc_dram/cost_estimation.rs @@ -4,6 +4,7 @@ use eggmock::egg::{CostFunction, Id}; use eggmock::{AoigLanguage, egg::Language}; use log::debug; use std::cmp::Ordering; +use std::collections::HashMap; use std::ops; use std::rc::Rc; @@ -83,11 +84,15 @@ impl CostFunction for CompilingCostFunction { C: FnMut(Id) -> Self::Cost, { // TODO: detect self-cycles, other cycles will be detected by compiling, which will result in an error - // enode.children(); - // TODO: rewrite to `.fold()` + + // see Figure17 [1] + let and_nr_operand_to_success_rate: HashMap = HashMap::from([(2,0.85), (4,0.88), (8,0.9), (16,0.92)]); // read from graph (only estimates anyway) + let or_nr_operand_to_success_rate: HashMap = HashMap::from([(2,0.88), (4,0.9), (8,0.96), (16,0.98)]); // read from graph (only estimates anyway) + + // return higher success-rates for higher n in nary AND/OR (see Figure18 [1]) + let nr_operands = enode.children().len(); // get op-cost of executing `enode`: - // TODO: return higher success-rates for higher n in nary AND/OR let op_cost = match *enode { AoigLanguage::False | AoigLanguage::Input(_) => { InstructionCost { @@ -95,49 +100,43 @@ impl CostFunction for CompilingCostFunction { mem_cycles: 1, // !=0 to ensure Cost-Function is *strictly monotonically increasing* (TODO: monotonicity isn"t needed here, right?") } }, - AoigLanguage::And([node1, node2]) => { + AoigLanguage::And(_) | AoigLanguage::And4(_) | AoigLanguage::And8(_) | AoigLanguage::And16(_) => { let mem_cycles_and = FCDRAMArchitecture::get_instructions_implementation_of_logic_ops(LogicOp::AND) .iter().fold(0, |acc, instr| { acc + instr.get_nr_memcycles() as usize }); debug!("Cycles AND: {}", mem_cycles_and); - let expected_success_rate = 1.0; // TODO: do empirical analysis of success-rate matrices of ops and come up with good values + let expected_success_rate = 0.83; // see Figure17 [1], assume that compute rows have a "middle" distance to sense-amps + let &success_rate_operand = and_nr_operand_to_success_rate.get(&nr_operands).unwrap(); InstructionCost { - success_rate: SuccessRate::new(expected_success_rate), + success_rate: SuccessRate::new(expected_success_rate * success_rate_operand), mem_cycles: mem_cycles_and, } }, - AoigLanguage::Or([node1, node2]) => { + AoigLanguage::Or(_) | AoigLanguage::Or4(_) | AoigLanguage::Or8(_) | AoigLanguage::Or16(_) => { let mem_cycles_or = FCDRAMArchitecture::get_instructions_implementation_of_logic_ops(LogicOp::OR) .iter().fold(0, |acc, instr| { acc + instr.get_nr_memcycles() as usize }); debug!("Cycles OR: {}", mem_cycles_or); - let expected_success_rate = 1.0; // TODO: do empirical analysis of success-rate matrices of ops and come up with good values + let expected_success_rate = 0.94; // see Figure17 [1], assume that compute rows have a "middle" distance to sense-amps + let success_rate_operand = or_nr_operand_to_success_rate.get(&nr_operands).unwrap(); InstructionCost { - success_rate: SuccessRate::new(expected_success_rate), + success_rate: SuccessRate::new(expected_success_rate * success_rate_operand), mem_cycles: mem_cycles_or, } }, // TODO: increase cost of NOT? (since it moves the value to another subarray!) // eg prefer `OR(a,b)` to `NOT(AND( NOT(a), NOT(b)))` - AoigLanguage::Not(node) => { + AoigLanguage::Not(_) => { let mem_cycles_not = FCDRAMArchitecture::get_instructions_implementation_of_logic_ops(LogicOp::NOT) .iter().fold(0, |acc, instr| { acc + instr.get_nr_memcycles() as usize }); debug!("Cycles NOT: {}", mem_cycles_not); - let expected_success_rate = 1.0; // TODO: do empirical analysis of success-rate matrices of ops and come up with good values + let expected_success_rate = 0.77; // see Figure11 in [1] (we only use single operand NOT currently) InstructionCost { success_rate: SuccessRate::new(expected_success_rate), mem_cycles: mem_cycles_not, } }, - _ => { - // todo!(); - InstructionCost { - success_rate: SuccessRate::new(1.0), - mem_cycles: 7, - } - // 0 // TODO: implement for nary-ops, eg using `.children()` - } }; debug!("Folding {:?}", enode); From dc668a02e65206ef134708ea12643855b4ee75a1 Mon Sep 17 00:00:00 2001 From: alku662e Date: Sun, 27 Jul 2025 12:15:44 +0200 Subject: [PATCH 42/51] Fix init of reference subarray to use right subarray id for addr of constants --- rs/src/fc_dram/architecture.rs | 15 +------------ rs/src/fc_dram/compiler.rs | 39 ++++++---------------------------- rs/src/fc_dram/mod.rs | 15 ++++++------- 3 files changed, 13 insertions(+), 56 deletions(-) diff --git a/rs/src/fc_dram/architecture.rs b/rs/src/fc_dram/architecture.rs index 3d0adbe..e2f383e 100644 --- a/rs/src/fc_dram/architecture.rs +++ b/rs/src/fc_dram/architecture.rs @@ -418,12 +418,6 @@ pub enum Instruction { /// /// Comment indicates what this FPM was issued for (for simpler debugability) RowCloneFPM(RowAddress, RowAddress, Comment), - /// Copies data from src (1st operand) to dst (2nd operand) using RowClonePSM, which copies the - /// data from `this_bank(src_row) -> other_bank(rowX) -> this_bank(dst_row)` (where - /// `other_bank` might be any other bank). Since this copy uses the internal DRAM-bus it works - /// on cacheline-granularity (64B) which might take some time for 8KiB rows... - /// - see [4] Chap3.3 for `TRANSFER`-instruction - RowClonePSM(RowAddress, RowAddress), } impl Display for Instruction { @@ -435,13 +429,7 @@ impl Display for Instruction { Instruction::ApaNOT(row1,row2) => format!("APA_NOT({},{})", display_row(row1), display_row(row2)), Instruction::ApaAndOr(row1,row2) => format!("APA_AND_OR({},{}) // activates {:?}", display_row(row1), display_row(row2), ARCHITECTURE.precomputed_simultaneous_row_activations.get(&(row1.local_rowaddress_to_subarray_id(SubarrayId(0)),row2.local_rowaddress_to_subarray_id(SubarrayId(0))))), Instruction::RowCloneFPM(row1, row2, comment) => format!("AA({},{}) // {}", display_row(row1), display_row(row2), comment), - Instruction::RowClonePSM(row1, row2) => format!(" - TRANSFER({},(rowX)) - TANSFER(rowX,{}) - ", - display_row(row1), - display_row(row2) - )}; + }; write!(f, "{}", description) } } @@ -458,7 +446,6 @@ impl Instruction { Instruction::ApaNOT(_, _) => 3, // NOTE: this is not explicitly written in the paper, TODO: check with authors Instruction::ApaAndOr(_, _) => 3, // NOTE: this is not explicitly written in the paper, TODO: check with authors Instruction::RowCloneFPM(_, _, _) => 2, // see [4] Chap3.2 - Instruction::RowClonePSM(_, _) => 256, // =(8192B/64B)*2 (*2 since copies two time, to and from `` on 64B-granularity } } diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs index e1a33a2..e5272b5 100644 --- a/rs/src/fc_dram/compiler.rs +++ b/rs/src/fc_dram/compiler.rs @@ -508,25 +508,27 @@ impl Compiler { match logic_op { LogicOp::AND => { let frac_row = ref_rows.pop().expect("Min 1 row has to be passed for initializing ref subarray"); // TODO: include success-rate considerations to choose best row to use for storing `V_{DD}/2` - let row_address_1 = self.comp_state.constant_values.get(&1).expect("Constants are expected to be placed in every subarray beforehand"); // row address where all 1s (V_DD) is stored + let row_address_1 = self.comp_state.constant_values.get(&1).expect("Constants are expected to be placed in every subarray beforehand") + .local_rowaddress_to_subarray_id(frac_row.get_subarray_id()); // row address where all 1s (V_DD) are to bestored let mut instructions = vec!(); for _ in 0..self.settings.repetition_fracops { instructions.push(Instruction::FracOp(frac_row)); } for other_row in ref_rows { - instructions.push(Instruction::RowCloneFPM(*row_address_1, other_row, String::from("Init ref-subarray with 1s"))); + instructions.push(Instruction::RowCloneFPM(row_address_1, other_row, String::from("Init ref-subarray with 1s"))); } instructions }, LogicOp::OR => { let frac_row = ref_rows.pop().expect("Min 1 row has to be passed for initializing ref subarray"); // TODO: include success-rate considerations to choose best row to use for storing `V_{DD}/2` - let row_address_0 = self.comp_state.constant_values.get(&0).expect("Constants are expected to be placed in every subarray beforehand"); // row address where all 1s (V_DD) is stored + let row_address_0 = self.comp_state.constant_values.get(&0).expect("Constants are expected to be placed in every subarray beforehand") + .local_rowaddress_to_subarray_id(frac_row.get_subarray_id()); // row address where all 0s (GND) are to be stored let mut instructions = vec!(); for _ in 0..self.settings.repetition_fracops { instructions.push(Instruction::FracOp(frac_row)); } for other_row in ref_rows { - instructions.push(Instruction::RowCloneFPM(*row_address_0, other_row, String::from("Init ref-subarray with 0s"))); + instructions.push(Instruction::RowCloneFPM(row_address_0, other_row, String::from("Init ref-subarray with 0s"))); } instructions }, @@ -535,35 +537,6 @@ impl Compiler { } } - /// Places the referenced `src_operands` into the corresponding `row_addresses` which are expected to be simultaneously executed using [`Instruction::RowCloneFPM`] - /// - NOTE: `rel_pos_of_ref_subarray` might affect placement of inputs in the future (eg to choose which input rows to choose for *input replication*) - fn init_compute_subarray(&mut self, mut row_addresses: Vec, mut src_operands: Vec, subarray: SubarrayId, logic_op: LogicOp, rel_pos_of_ref_subarray: NeighboringSubarrayRelPosition) -> Vec { - // TODO: validity check: make sure all inputs are actually already inside `subarray` - - let mut instructions = vec!(); - // if there are fewer src-operands than activated rows perform input replication - row_addresses.sort_by_key(|row| ((ARCHITECTURE.get_distance_of_row_to_sense_amps)(*row, rel_pos_of_ref_subarray.clone()))); // replicate input that resides in row with lowest success-rate (=probably the row furthest away) - let nr_elements_to_extend = row_addresses.len() - src_operands.len(); - if nr_elements_to_extend > 0 { - let last_element = *src_operands.last().unwrap(); - src_operands.extend( std::iter::repeat_n(last_element, nr_elements_to_extend)); - } - - for (&row_addr, &src_operand) in row_addresses.iter().zip(src_operands.iter()) { - let src_operand_location = self.comp_state.value_states.get(&(src_operand, subarray)).expect("Src operand not available although it is used by a candidate. Sth went wrong..."); - - self.comp_state.dram_state.insert(row_addr, RowState { is_compute_row: true, live_value: Some(src_operand), constant: None }); - - // TODO: - if (src_operand_location.0 & SUBARRAY_ID_BITMASK) == (row_addr.0 & SUBARRAY_ID_BITMASK) { - instructions.push(Instruction::RowCloneFPM(*src_operand_location, row_addr, String::from("Move operand to compute row"))); - } else { - instructions.push(Instruction::RowClonePSM(*src_operand_location, row_addr)); // TODO: remove this, since it's not usable in COTS DRAMs - } - } - instructions - } - /// Returns instructions to be executed for performing `NOT` on `src_row` into `dst_row` and updates the `comp_state` holding the negated value /// - NOTE: currenlty only single-operand NOTs are supported bc /// 1) more operands lead to (slightly) worse results (see Figure10 in [1]) diff --git a/rs/src/fc_dram/mod.rs b/rs/src/fc_dram/mod.rs index 5c4f9ae..73b37c4 100644 --- a/rs/src/fc_dram/mod.rs +++ b/rs/src/fc_dram/mod.rs @@ -44,15 +44,14 @@ use architecture::*; /// TODO: adjust rewriting rules to FCDRAM (=AND/OR related rewrites like De-Morgan?) static REWRITE_RULES: LazyLock>> = LazyLock::new(|| { let rules = vec![ - // TODO: add "or" - and De-Morgan ? rewrite!("commute-and"; "(and ?a ?b)" => "(and ?b ?a)"), rewrite!("and-1"; "(and ?a 1)" => "?a"), rewrite!("and-0"; "(and ?a 0)" => "0"), - // TODO: first add `AOIG`-language and add conversion AOIG<->AIG (so mockturtle's aig can still be used underneath) - rewrite!("and-or"; "(! (or (! ?a) (! ?b)))" => "(and ?a ?b)"), // (De-Morgan) ! not checked whether this works - rewrite!("or-and"; "(! (and (! ?a) (! ?b)))" => "(or ?a ?b)" ), // (De-Morgan) ! not checked whether this works - rewrite!("and-or-more-not"; "(and ?a ?b)" => "(! (or (! ?a) (! ?b)))"), // (De-Morgan) ! not checked whether this works - rewrite!("or-and-more-not"; "(or ?a ?b)" => "(! (and (! ?a) (! ?b)))"), // (De-Morgan) ! not checked whether this works + + rewrite!("and-or"; "(! (or (! ?a) (! ?b)))" => "(and ?a ?b)"), // (De-Morgan) + rewrite!("or-and"; "(! (and (! ?a) (! ?b)))" => "(or ?a ?b)" ), // (De-Morgan) + rewrite!("and-or-more-not"; "(and ?a ?b)" => "(! (or (! ?a) (! ?b)))"), // (De-Morgan) + rewrite!("or-and-more-not"; "(or ?a ?b)" => "(! (and (! ?a) (! ?b)))"), // (De-Morgan) rewrite!("and-same"; "(and ?a ?a)" => "?a"), rewrite!("not_not"; "(! (! ?a))" => "?a"), @@ -63,10 +62,8 @@ static REWRITE_RULES: LazyLock>> = LazyLock::new(| rewrite!("or2_to_4"; "(or (or ?a ?b) (or ?c ?d))" => "(or4 ?a ?b ?c ?d)"), rewrite!("or4_to_8"; "(or (or4 ?a ?b ?c ?d) (or4 ?e ?f ?g ?h))" => "(or8 ?a ?b ?c ?d ?e ?f ?g ?h)"), rewrite!("or8_to_16"; "(or (or8 ?a ?b ?c ?d ?e ?f ?g ?h) (or8 ?i ?j ?k ?l ?m ?n ?o ?p))" => "(or16 ?a ?b ?c ?d ?e ?f ?g ?h ?i ?j ?k ?l ?m ?n ?o ?p)"), - // TODO: no use for NOT with multiple dsts + // no use for NOT with multiple dsts (for now) ]; - // rules.extend(rewrite!("invert"; "(! (maj ?a ?b ?c))" <=> "(maj (! ?a) (! ?b) (! ?c))")); - // rules.extend(rewrite!("distributivity"; "(maj ?a ?b (maj ?c ?d ?e))" <=> "(maj (maj ?a ?b ?c) (maj ?a ?b ?d) ?e)")); rules }); From 6108f1be883ce038a8fecd9f7b0e87b5370c8d4b Mon Sep 17 00:00:00 2001 From: alku662e Date: Mon, 28 Jul 2025 09:21:41 +0200 Subject: [PATCH 43/51] Fix rewrite rules and output placement of constants --- rs/src/fc_dram/compiler.rs | 18 +++++++++++++++--- rs/src/fc_dram/mod.rs | 13 ++++++++----- rs/src/fc_dram/program.rs | 7 ++++++- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs index e5272b5..ed6f689 100644 --- a/rs/src/fc_dram/compiler.rs +++ b/rs/src/fc_dram/compiler.rs @@ -176,8 +176,9 @@ impl Compiler { for subarray in 0..NR_SUBARRAYS { for constant in Self::CONSTANTS { let next_free_row = self.comp_state.free_rows_per_subarray.get_mut(&SubarrayId(subarray)).and_then(|v| v.pop()).expect("No free rows in subarray {subarray} :("); - self.comp_state.constant_values.insert(constant, next_free_row); + self.comp_state.constant_values.insert(constant, next_free_row.local_rowaddress_to_subarray_id(SubarrayId(0))); self.comp_state.dram_state.insert(next_free_row, RowState { is_compute_row: false, live_value: None, constant: Some(constant)} ); + program.constants_row_placement.insert(constant, next_free_row); } } } @@ -278,8 +279,8 @@ impl Compiler { /// Initialize compilation state: /// - choose compute rows (by setting [`Self::compute_row_activations`] + /// - decide in which rows to place constants /// - assign subarray-ids to each NodeId - /// - initialize [`Self::comp_state::free_rows_per_subarray`] with the rows that are free to be used for placing constants, inputs and intermediate values (when execution has started) /// - return code to place input operands in `program` fn init_comp_state(&mut self, network: &impl NetworkWithBackwardEdges, program: &mut Program) { let config_file = unsafe { CStr::from_ptr(self.settings.config_file) }.to_str().unwrap(); @@ -335,7 +336,7 @@ impl Compiler { // 0.2 Save free rows // At the start all rows, except for the compute rows, are free rows let compute_rows = self.compute_row_activations.values().fold(vec!(), |all_compute_rows, next_compute_row_combi| { - let new_compute_rows = ARCHITECTURE.precomputed_simultaneous_row_activations.get(next_compute_row_combi).expect("Compute row cant be activated??"); + let new_compute_rows = ARCHITECTURE.precomputed_simultaneous_row_activations.get(next_compute_row_combi).expect("Compute row can't be activated??"); all_compute_rows.iter().chain(new_compute_rows).cloned().collect() }); let mut free_rows = (0..ROWS_PER_SUBARRAY).map(RowAddress::from).collect::>(); @@ -882,6 +883,17 @@ mod tests { Compiler::new(CompilerSettings { print_program: true, verbose: true, print_compilation_stats: false, min_success_rate: 0.999, repetition_fracops: 5, safe_space_rows_per_subarray: 16, config_file: CString::new("").expect("CString::new failed").as_ptr(), do_save_config: true} ) } + #[test] + fn test_input_placement () { + + let mut compiler = init(); + let mut egraph: EGraph = Default::default(); + egraph.add_expr(&my_expression); + let out = egraph.add(AoigLanguage::OR([eggmock::egg::Id::from(0), eggmock::egg::Id::from(2)])); // additional `And` with one src-operand=input and one non-input src operand + debug!("EGraph used for candidate-init: {:?}", egraph); + let egraph_clone = egraph.clone(); + } + #[test] fn test_candidate_initialization() { let mut compiler = init(); diff --git a/rs/src/fc_dram/mod.rs b/rs/src/fc_dram/mod.rs index 73b37c4..b6e48dd 100644 --- a/rs/src/fc_dram/mod.rs +++ b/rs/src/fc_dram/mod.rs @@ -44,14 +44,17 @@ use architecture::*; /// TODO: adjust rewriting rules to FCDRAM (=AND/OR related rewrites like De-Morgan?) static REWRITE_RULES: LazyLock>> = LazyLock::new(|| { let rules = vec![ - rewrite!("commute-and"; "(and ?a ?b)" => "(and ?b ?a)"), - rewrite!("and-1"; "(and ?a 1)" => "?a"), - rewrite!("and-0"; "(and ?a 0)" => "0"), + // TODO: sth is wrong with these rewrite rules - they produce a non-equivalent logic network ! + // rewrite!("commute-and"; "(and ?a ?b)" => "(and ?b ?a)"), + // rewrite!("and-1"; "(and ?a 1)" => "?a"), + // rewrite!("and-0"; "(and ?a 0)" => "0"), + // rewrite!("or-1"; "(or ?a 1)" => "1"), + // rewrite!("or-0"; "(or ?a 0)" => "?a"), rewrite!("and-or"; "(! (or (! ?a) (! ?b)))" => "(and ?a ?b)"), // (De-Morgan) rewrite!("or-and"; "(! (and (! ?a) (! ?b)))" => "(or ?a ?b)" ), // (De-Morgan) - rewrite!("and-or-more-not"; "(and ?a ?b)" => "(! (or (! ?a) (! ?b)))"), // (De-Morgan) - rewrite!("or-and-more-not"; "(or ?a ?b)" => "(! (and (! ?a) (! ?b)))"), // (De-Morgan) + // rewrite!("and-or-more-not"; "(and ?a ?b)" => "(! (or (! ?a) (! ?b)))"), // (De-Morgan) + // rewrite!("or-and-more-not"; "(or ?a ?b)" => "(! (and (! ?a) (! ?b)))"), // (De-Morgan) rewrite!("and-same"; "(and ?a ?a)" => "?a"), rewrite!("not_not"; "(! (! ?a))" => "?a"), diff --git a/rs/src/fc_dram/program.rs b/rs/src/fc_dram/program.rs index 447c38b..87a8ce9 100644 --- a/rs/src/fc_dram/program.rs +++ b/rs/src/fc_dram/program.rs @@ -13,7 +13,7 @@ pub struct Program { pub instructions: Vec, /// Specifies in which rows constants have to be placed (!have to be placed in EVERY subarray) /// - TODO: adjust this to only place in subarrays which are actually used as reference subarrays during program execution - pub constants_row_placement: HashMap>, + pub constants_row_placement: HashMap, /// Specifies where row-operands should be placed prior to calling this program /// (This is a convention which tells the user of this lib where the data should be placed within the DRAM before executing this program) /// - NOTE: Signals might have to be placed in several subarrays (REMINDER: movement in btw subarrays is not supported by FCDRAM) @@ -55,6 +55,11 @@ impl Display for Program { writeln!(f, "{:?} in {}", signal, display_rows(rows.to_vec()))?; } writeln!(f, "---------------------------------------")?; + writeln!(f, "Constant operand placement:")?; + for (constant, row) in &self.constants_row_placement { + writeln!(f, "{} in {}", constant, display_row(&row.local_rowaddress_to_subarray_id(super::architecture::SubarrayId(0))))?; + } + writeln!(f, "---------------------------------------")?; for instr in &self.instructions { From 2bfffce53b601449445623f39fb50f968e889d6f Mon Sep 17 00:00:00 2001 From: alku662e Date: Mon, 28 Jul 2025 09:58:56 +0200 Subject: [PATCH 44/51] Print simultaneously activated rows in comments --- rs/src/fc_dram/architecture.rs | 8 +++++++- rs/src/fc_dram/compiler.rs | 34 ++++++++++++++++++++++++++----- rs/src/fc_dram/cost_estimation.rs | 6 +++++- 3 files changed, 41 insertions(+), 7 deletions(-) diff --git a/rs/src/fc_dram/architecture.rs b/rs/src/fc_dram/architecture.rs index e2f383e..932bdad 100644 --- a/rs/src/fc_dram/architecture.rs +++ b/rs/src/fc_dram/architecture.rs @@ -427,7 +427,13 @@ impl Display for Instruction { let description = match self { Instruction::FracOp(row) => format!("AP({})", display_row(row)), Instruction::ApaNOT(row1,row2) => format!("APA_NOT({},{})", display_row(row1), display_row(row2)), - Instruction::ApaAndOr(row1,row2) => format!("APA_AND_OR({},{}) // activates {:?}", display_row(row1), display_row(row2), ARCHITECTURE.precomputed_simultaneous_row_activations.get(&(row1.local_rowaddress_to_subarray_id(SubarrayId(0)),row2.local_rowaddress_to_subarray_id(SubarrayId(0))))), + Instruction::ApaAndOr(row1,row2) => { + let (src_array, dst_array) = (row1.get_subarray_id(), row2.get_subarray_id()); + let activated_rows: Vec = ARCHITECTURE.precomputed_simultaneous_row_activations.get(&(row1.local_rowaddress_to_subarray_id(SubarrayId(0)),row2.local_rowaddress_to_subarray_id(SubarrayId(0)))).unwrap() + .iter().flat_map(|row| vec!(row.local_rowaddress_to_subarray_id(src_array), row.local_rowaddress_to_subarray_id(dst_array))) + .collect(); + format!("APA_AND_OR({},{}) // activates {:?}", display_row(row1), display_row(row2), activated_rows) + }, Instruction::RowCloneFPM(row1, row2, comment) => format!("AA({},{}) // {}", display_row(row1), display_row(row2), comment), }; write!(f, "{}", description) diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs index ed6f689..a514b24 100644 --- a/rs/src/fc_dram/compiler.rs +++ b/rs/src/fc_dram/compiler.rs @@ -880,7 +880,10 @@ mod tests { INIT.call_once(|| { env_logger::init(); }); - Compiler::new(CompilerSettings { print_program: true, verbose: true, print_compilation_stats: false, min_success_rate: 0.999, repetition_fracops: 5, safe_space_rows_per_subarray: 16, config_file: CString::new("").expect("CString::new failed").as_ptr(), do_save_config: true} ) + Compiler::new(CompilerSettings { + print_program: true, verbose: true, print_compilation_stats: false, min_success_rate: 0.999, repetition_fracops: 5, safe_space_rows_per_subarray: 16, + config_file: CString::new("/home/alex/Documents/Studium/Sem6/inf_pm_fpa/lime-fork/config/fcdram_hksynx.toml").unwrap().as_ptr(), + do_save_config: false } ) } #[test] @@ -888,10 +891,31 @@ mod tests { let mut compiler = init(); let mut egraph: EGraph = Default::default(); - egraph.add_expr(&my_expression); - let out = egraph.add(AoigLanguage::OR([eggmock::egg::Id::from(0), eggmock::egg::Id::from(2)])); // additional `And` with one src-operand=input and one non-input src operand - debug!("EGraph used for candidate-init: {:?}", egraph); - let egraph_clone = egraph.clone(); + + // Create Input nodes + let id1 = egraph.add(AoigLanguage::Input(0)); // Id(1) + let id2 = egraph.add(AoigLanguage::Input(1)); // Id(2) + let id3 = egraph.add(AoigLanguage::Input(2)); // Id(3) + + // And([Signal(false, Id(2)), Signal(false, Id(3))]) → Id(4) + let id4 = egraph.add(AoigLanguage::And([id2, id3])); + + // And([Signal(false, Id(1)), Signal(true, Id(3))]) → Id(6) + let not_id3 = egraph.add(AoigLanguage::Not(id3)); + let id6 = egraph.add(AoigLanguage::And([id1, not_id3])); + + // Or([Signal(false, Id(4)), Signal(false, Id(6))]) → Id(10) + let id10 = egraph.add(AoigLanguage::Or([id4, id6])); + + let extractor = Extractor::new( &egraph, CompilingCostFunction {}); + let ntk = &(extractor, vec!(id10)); + ntk.dump(); + + let ntk_with_backward_edges = ntk.with_backward_edges(); + + let program = compiler.compile(&ntk_with_backward_edges); + + println!("{program}"); } #[test] diff --git a/rs/src/fc_dram/cost_estimation.rs b/rs/src/fc_dram/cost_estimation.rs index 859a3ad..3996ea2 100644 --- a/rs/src/fc_dram/cost_estimation.rs +++ b/rs/src/fc_dram/cost_estimation.rs @@ -159,6 +159,7 @@ mod tests { use crate::fc_dram::CompilerSettings; use super::*; + use std::ffi::CString; // import all elements from parent-module use std::sync::Once; @@ -168,7 +169,10 @@ mod tests { INIT.call_once(|| { env_logger::init(); }); - Compiler::new(CompilerSettings { print_program: true, verbose: true, print_compilation_stats: false, min_success_rate: 0.999, repetition_fracops: 5, safe_space_rows_per_subarray: 16 } ) + Compiler::new(CompilerSettings { + print_program: true, verbose: true, print_compilation_stats: false, min_success_rate: 0.999, repetition_fracops: 5, safe_space_rows_per_subarray: 16, + config_file: CString::new("/home/alex/Documents/Studium/Sem6/inf_pm_fpa/lime-fork/config/fcdram_hksynx.toml").unwrap().as_ptr(), + do_save_config: false } ) } /// TODO ! From 9a1b4ec16c31e6b47826e747e35fe46764321f64 Mon Sep 17 00:00:00 2001 From: alku662e Date: Mon, 28 Jul 2025 12:35:58 +0200 Subject: [PATCH 45/51] Last fixes before presi --- rs/src/fc_dram/compiler.rs | 98 ++++---------------------------------- 1 file changed, 10 insertions(+), 88 deletions(-) diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs index a514b24..f6ecbff 100644 --- a/rs/src/fc_dram/compiler.rs +++ b/rs/src/fc_dram/compiler.rs @@ -83,7 +83,6 @@ impl Compiler { // 1. Actual compilation while let Some((next_candidate, _)) = self.comp_state.candidates.pop() { - // TODO: extend program with instr that is executed next let executed_instructions = &mut self.execute_next_instruction(&next_candidate, network); program.instructions.append(executed_instructions); @@ -92,6 +91,9 @@ impl Compiler { debug!("New candidates: {:?}", new_candidates); self.comp_state.candidates.extend(new_candidates); + + // TODO: move result into a free row + // TODO: mark rows containing non-live values as free } debug!("Instructions: {:?}", program.instructions); @@ -552,7 +554,7 @@ impl Compiler { let unnegated_signal = Signal::new(signal_to_invert.node_id(), false); let src_location = self.comp_state.value_states.get(&(unnegated_signal, src_array)).unwrap_or_else(|| panic!("Src operand {src_row} is not live in subarray {src_array} (see {signal_to_invert:?})??")); - instructions.push(self.execute_intrasubarray_rowclone(*src_location, src_row)); + instructions.push(self.execute_intrasubarray_rowclone(*src_location, src_row, String::from("Move into compute row"))); // 2. Execute NOT let dst_row = row_combi.1.local_rowaddress_to_subarray_id(dst_array); @@ -560,7 +562,7 @@ impl Compiler { // 3. Copy negated value out of compute rows let free_row = self.comp_state.free_rows_per_subarray.get_mut(&dst_array).and_then(|v| v.pop()).unwrap_or_else(|| panic!("OOM: No free rows in subarray {dst_array}")); - instructions.push(self.execute_intrasubarray_rowclone(dst_row, free_row)); + instructions.push(self.execute_intrasubarray_rowclone(dst_row, free_row, String::from("Move into free row"))); self.comp_state.value_states.insert((*signal_to_invert, dst_array), free_row); instructions @@ -587,7 +589,7 @@ impl Compiler { for (&compute_row, &src_signal) in compute_rows.iter().zip(network.node(node_id).inputs()) { let compute_row = compute_row.local_rowaddress_to_subarray_id(compute_subarray); let &src_row = self.comp_state.value_states.get(&(src_signal, compute_subarray)).unwrap_or_else(|| panic!("Src signal {src_signal:?} is not present in compute subarray {compute_subarray} ???")); - instructions.push(self.execute_intrasubarray_rowclone(src_row, compute_row)); + instructions.push(self.execute_intrasubarray_rowclone(src_row, compute_row, String::from("Move into compute row"))); } @@ -607,14 +609,16 @@ impl Compiler { instructions.push(Instruction::ApaAndOr(compute_row_combi.0.local_rowaddress_to_subarray_id(compute_subarray), compute_row_combi.1.local_rowaddress_to_subarray_id(SubarrayId(reference_subarray)))); // 3. Move result into non-compute row + // TODO: add instruction to move value into free-row !! let free_row = self.comp_state.free_rows_per_subarray.get_mut(&compute_subarray).and_then(|v| v.pop()).unwrap_or_else(|| panic!("No more free rows in subarray {compute_subarray}")); + instructions.push(self.execute_intrasubarray_rowclone(compute_rows[0], free_row, String::from("Move into free row"))); self.comp_state.value_states.insert((Signal::new(node_id, false), compute_subarray), free_row); // TODO: for inverted signals not in result_subarray, right? instructions } - fn execute_intrasubarray_rowclone(&self, src_row: RowAddress, dst_row: RowAddress) -> Instruction { - Instruction::RowCloneFPM(src_row, dst_row, String::from("Move into compute row")) + fn execute_intrasubarray_rowclone(&self, src_row: RowAddress, dst_row: RowAddress, comment: String) -> Instruction { + Instruction::RowCloneFPM(src_row, dst_row, comment) } /// Returns Instructions to execute given `next_candidate` (which is a signal which needs to reside in a specific subarray after performing the execution) @@ -644,88 +648,6 @@ impl Compiler { next_instructions.append(&mut negate_instructions); } next_instructions - - // - // debug!("Executing candidate {:?}", next_candidate); - // let src_operands: Vec = network.node(next_candidate.node_id()).inputs().to_vec(); - // let mut init_neg_operands = self.init_negated_src_operands(src_operands.clone(), network); // TODO NEXT: make sure all required negated operands are available - // next_instructions.append(&mut init_neg_operands); - // - // let nr_operands = src_operands.len(); // use to select SRA to activate - // let nr_rows = nr_operands.next_power_of_two(); - // - // let src_rows: Vec = src_operands.iter() - // .map(|src_operand| { - // - // debug!("src: {src_operand:?}"); - // self.comp_state.value_states.get(src_operand) - // .unwrap() - // .row_location - // .expect("Sth went wrong... if the src-operand is not in a row, then this candidate shouldn't have been added to the list of candidates") - // }) - // .collect(); - // - // let (compute_subarray, ref_subarray) = self.select_compute_and_ref_subarray(src_rows); - // let language_op = network.node(next_candidate.node_id()); - // - // // TODO: extract NOT - // let logic_op = match language_op { - // // REMINDER: operand-nr is extracted by looking at nr of children beforehand - // Aoig::And(_) | Aoig::And4(_) | Aoig::And8(_)| Aoig::And16(_) => LogicOp::AND, - // Aoig::Or(_) | Aoig::Or4(_) | Aoig::Or8(_) | Aoig::Or16(_) => LogicOp::OR, - // _ => panic!("candidate is expected to be a logic op"), - // }; - // - // // 0. Select an SRA (=row-address tuple) for the selected subarray based on highest success-rate - // // TODO (possible improvement): input replication by choosing SRA with more activated rows than operands and duplicating operands which are in far-away rows into several rows?) - // let row_combi = ARCHITECTURE.sra_degree_to_rowaddress_combinations.get(&SupportedNrOperands::try_from(nr_rows as u8).unwrap()).unwrap() - // // sort by success-rate - using eg `BTreeMap` turned out to impose a too large runtime overhead - // .iter() - // .find(|combi| !self.blocked_row_combinations.contains(combi)) // choose first block RowAddr-combination - // .expect("No SRA for nr-rows={nr_rows}"); - // - // let activated_rows = ARCHITECTURE.precomputed_simultaneous_row_activations.get(row_combi).unwrap(); - // let ref_rows: Vec = activated_rows.iter() - // .map(|row| row & (subarrayid_to_subarray_address(ref_subarray))) // make activated rows refer to the right subarray - // .collect(); - // let comp_rows: Vec = activated_rows.iter() - // .map(|row| row & (subarrayid_to_subarray_address(compute_subarray))) // make activated rows refer to the right subarray - // .collect(); - // - // - // // 1. Initialize rows in ref-subarray (if executing AND/OR) - // // - TODO: read nr of frac-ops to issue from compiler-settings - // let mut instruction_init_ref_subarray = self.init_reference_subarray(ref_rows.clone(), logic_op); - // next_instructions.append(&mut instruction_init_ref_subarray); - // - // // 2. Place rows in the simultaneously activated rows in the compute subarray (init other rows with 0 for OR, 1 for AND and same value for NOT) - // let mut instructions_init_comp_subarray = self.init_compute_subarray( activated_rows.clone(), src_operands, logic_op, NeighboringSubarrayRelPosition::get_relative_position(compute_subarray, ref_subarray)); - // next_instructions.append(&mut instructions_init_comp_subarray); - // - // // SKIPPED: 2.2 Check if issuing `APA(src1,src2)` would activate other rows which hold valid data - // // - only necessary once we find optimization to not write values to safe-space but reuse them diectly - // // 2.2.1 if yes: move data to other rows for performing this op - // - // // 3. Issue actual operation - // let mut actual_op = match logic_op { - // LogicOp::NOT => vec!(Instruction::ApaNOT(row_combi.0, row_combi.1)), - // LogicOp::AND | LogicOp::OR => vec!(Instruction::ApaAndOr(row_combi.0, row_combi.1)), - // LogicOp::NAND | LogicOp::NOR => vec!(Instruction::ApaAndOr(row_combi.0, row_combi.1), Instruction::ApaNOT(row_combi.0, row_combi.1)), // TODO: or the othyer way around (1st NOT)? - // }; - // - // next_instructions.append(&mut actual_op); - // for (&comp_row, &ref_row) in comp_rows.iter().zip(ref_rows.iter()) { - // self.comp_state.dram_state.insert(comp_row, RowState { is_compute_row: true, live_value: Some(*next_candidate), constant: None }); - // self.comp_state.value_states.insert(*next_candidate, ValueState { is_computed: true, row_location: Some(comp_row) }); - // // ref subarray holds negated value afterwarsd - // self.comp_state.dram_state.insert(ref_row, RowState { is_compute_row: true, live_value: Some(next_candidate.invert()), constant: None }); - // self.comp_state.value_states.insert(next_candidate.invert(), ValueState { is_computed: true, row_location: Some(ref_row) }); - // } - // - // // 4. Copy result data from dst NEAREST to the sense-amps into a safe-space row and update `value_state` - // // TODO LAST: possible improvement - error correction over all dst-rows (eg majority-vote for each bit, votes weighted by distance to sense-amps?) - // - // next_instructions } /// Compute `SchedulingPrio` for a given `signal` located in the `subarray` From a6ca73dd7353c9400e78eef631494703ce8b2ccd Mon Sep 17 00:00:00 2001 From: alku662e Date: Tue, 29 Jul 2025 11:10:30 +0200 Subject: [PATCH 46/51] Fix RowClone to display as AAP instead of AA --- rs/src/fc_dram/architecture.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rs/src/fc_dram/architecture.rs b/rs/src/fc_dram/architecture.rs index 932bdad..331e67c 100644 --- a/rs/src/fc_dram/architecture.rs +++ b/rs/src/fc_dram/architecture.rs @@ -434,7 +434,7 @@ impl Display for Instruction { .collect(); format!("APA_AND_OR({},{}) // activates {:?}", display_row(row1), display_row(row2), activated_rows) }, - Instruction::RowCloneFPM(row1, row2, comment) => format!("AA({},{}) // {}", display_row(row1), display_row(row2), comment), + Instruction::RowCloneFPM(row1, row2, comment) => format!("AAP({},{}) // {}", display_row(row1), display_row(row2), comment), }; write!(f, "{}", description) } From 70e51aa4401755cacadd3b1ea105a3d272e72026 Mon Sep 17 00:00:00 2001 From: alku662e Date: Tue, 29 Jul 2025 17:35:34 +0200 Subject: [PATCH 47/51] Fix Rewriting Rules --- rs/src/fc_dram/architecture.rs | 2 +- rs/src/fc_dram/mod.rs | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/rs/src/fc_dram/architecture.rs b/rs/src/fc_dram/architecture.rs index 331e67c..2c1c3c6 100644 --- a/rs/src/fc_dram/architecture.rs +++ b/rs/src/fc_dram/architecture.rs @@ -451,7 +451,7 @@ impl Instruction { // TODO: change to ns (t_{RAS}+6ns) - `t_{RAS}` to mem cycles Instruction::ApaNOT(_, _) => 3, // NOTE: this is not explicitly written in the paper, TODO: check with authors Instruction::ApaAndOr(_, _) => 3, // NOTE: this is not explicitly written in the paper, TODO: check with authors - Instruction::RowCloneFPM(_, _, _) => 2, // see [4] Chap3.2 + Instruction::RowCloneFPM(_, _, _) => 2, // see [4] Chap3.2 (TODO: not correct, given as 90ns?) } } diff --git a/rs/src/fc_dram/mod.rs b/rs/src/fc_dram/mod.rs index b6e48dd..1ebf30e 100644 --- a/rs/src/fc_dram/mod.rs +++ b/rs/src/fc_dram/mod.rs @@ -46,10 +46,10 @@ static REWRITE_RULES: LazyLock>> = LazyLock::new(| let rules = vec![ // TODO: sth is wrong with these rewrite rules - they produce a non-equivalent logic network ! // rewrite!("commute-and"; "(and ?a ?b)" => "(and ?b ?a)"), - // rewrite!("and-1"; "(and ?a 1)" => "?a"), - // rewrite!("and-0"; "(and ?a 0)" => "0"), - // rewrite!("or-1"; "(or ?a 1)" => "1"), - // rewrite!("or-0"; "(or ?a 0)" => "?a"), + rewrite!("and-1"; "(and ?a (! f))" => "?a"), + rewrite!("and-0"; "(and ?a f)" => "f"), + rewrite!("or-1"; "(or ?a (! f))" => "(! f)"), + rewrite!("or-0"; "(or ?a f)" => "?a"), rewrite!("and-or"; "(! (or (! ?a) (! ?b)))" => "(and ?a ?b)"), // (De-Morgan) rewrite!("or-and"; "(! (and (! ?a) (! ?b)))" => "(or ?a ?b)" ), // (De-Morgan) From 39c4b28c294836340c36ea4ad15841aae55d4418 Mon Sep 17 00:00:00 2001 From: Alexander Kusnezoff Date: Thu, 7 Aug 2025 22:40:36 +0200 Subject: [PATCH 48/51] Prepare merge --- rs/Cargo.lock | 0 rs/Cargo.toml | 0 rs/build.rs | 0 rs/in.dot | 0 rs/src/ambit/compilation.rs | 0 rs/src/ambit/extraction.rs | 0 rs/src/ambit/mod.rs | 0 rs/src/ambit/optimization.rs | 0 rs/src/ambit/program.rs | 0 rs/src/ambit/rows.rs | 0 rs/src/fc_dram/architecture.rs | 0 rs/src/fc_dram/compiler.rs | 3 +-- rs/src/fc_dram/cost_estimation.rs | 1 - rs/src/fc_dram/mod.rs | 2 +- rs/src/fc_dram/optimization.rs | 0 rs/src/fc_dram/program.rs | 0 rs/src/fc_dram/utils.rs | 0 rs/src/lib.rs | 0 src/ambit.h | 0 src/ambit_benchmark_main.cpp | 0 src/fcdram.h | 0 src/fcdram_benchmark_main.cpp | 0 src/main.cpp | 21 +++++++++++++++++---- src/utils.cpp | 0 src/utils.h | 0 25 files changed, 19 insertions(+), 8 deletions(-) mode change 100644 => 100755 rs/Cargo.lock mode change 100644 => 100755 rs/Cargo.toml mode change 100644 => 100755 rs/build.rs mode change 100644 => 100755 rs/in.dot mode change 100644 => 100755 rs/src/ambit/compilation.rs mode change 100644 => 100755 rs/src/ambit/extraction.rs mode change 100644 => 100755 rs/src/ambit/mod.rs mode change 100644 => 100755 rs/src/ambit/optimization.rs mode change 100644 => 100755 rs/src/ambit/program.rs mode change 100644 => 100755 rs/src/ambit/rows.rs mode change 100644 => 100755 rs/src/fc_dram/architecture.rs mode change 100644 => 100755 rs/src/fc_dram/compiler.rs mode change 100644 => 100755 rs/src/fc_dram/cost_estimation.rs mode change 100644 => 100755 rs/src/fc_dram/mod.rs mode change 100644 => 100755 rs/src/fc_dram/optimization.rs mode change 100644 => 100755 rs/src/fc_dram/program.rs mode change 100644 => 100755 rs/src/fc_dram/utils.rs mode change 100644 => 100755 rs/src/lib.rs mode change 100644 => 100755 src/ambit.h mode change 100644 => 100755 src/ambit_benchmark_main.cpp mode change 100644 => 100755 src/fcdram.h mode change 100644 => 100755 src/fcdram_benchmark_main.cpp mode change 100644 => 100755 src/main.cpp mode change 100644 => 100755 src/utils.cpp mode change 100644 => 100755 src/utils.h diff --git a/rs/Cargo.lock b/rs/Cargo.lock old mode 100644 new mode 100755 diff --git a/rs/Cargo.toml b/rs/Cargo.toml old mode 100644 new mode 100755 diff --git a/rs/build.rs b/rs/build.rs old mode 100644 new mode 100755 diff --git a/rs/in.dot b/rs/in.dot old mode 100644 new mode 100755 diff --git a/rs/src/ambit/compilation.rs b/rs/src/ambit/compilation.rs old mode 100644 new mode 100755 diff --git a/rs/src/ambit/extraction.rs b/rs/src/ambit/extraction.rs old mode 100644 new mode 100755 diff --git a/rs/src/ambit/mod.rs b/rs/src/ambit/mod.rs old mode 100644 new mode 100755 diff --git a/rs/src/ambit/optimization.rs b/rs/src/ambit/optimization.rs old mode 100644 new mode 100755 diff --git a/rs/src/ambit/program.rs b/rs/src/ambit/program.rs old mode 100644 new mode 100755 diff --git a/rs/src/ambit/rows.rs b/rs/src/ambit/rows.rs old mode 100644 new mode 100755 diff --git a/rs/src/fc_dram/architecture.rs b/rs/src/fc_dram/architecture.rs old mode 100644 new mode 100755 diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs old mode 100644 new mode 100755 index f6ecbff..4f4c032 --- a/rs/src/fc_dram/compiler.rs +++ b/rs/src/fc_dram/compiler.rs @@ -142,7 +142,6 @@ impl Compiler { // move subarray to 1st subarray (instead of 0th, which is at the edge and hence has no sense-amps above) let subarray1_id = ((ROW_ID_BITMASK << 1) | 1) ^ ROW_ID_BITMASK; let row = RowAddress(subarray1_id | row.0); // makes sure that `get_distance_of_row_to_sense_amps` doesn't panic since SRA returns subarray=0 by default (which is an edge subarray) - println!("{:b}", row.0); (ARCHITECTURE.get_distance_of_row_to_sense_amps)(row, sense_amp_position) as u64 }).sum() }; @@ -152,7 +151,7 @@ impl Compiler { activated_rows.iter().map(|&row| { let subarray1_id = ((ROW_ID_BITMASK << 1) | 1) ^ ROW_ID_BITMASK; let row = RowAddress(subarray1_id | row.0); // makes sure that `get_distance_of_row_to_sense_amps` doesn't panic since SRA returns subarray=0 by default (which is an edge subarray) - println!("{:b}", row.0); + // println!("{:b}", row.0); (ARCHITECTURE.get_distance_of_row_to_sense_amps)(row, sense_amp_position) as u64 }).sum() }; diff --git a/rs/src/fc_dram/cost_estimation.rs b/rs/src/fc_dram/cost_estimation.rs old mode 100644 new mode 100755 index 3996ea2..5e14608 --- a/rs/src/fc_dram/cost_estimation.rs +++ b/rs/src/fc_dram/cost_estimation.rs @@ -124,7 +124,6 @@ impl CostFunction for CompilingCostFunction { mem_cycles: mem_cycles_or, } }, - // TODO: increase cost of NOT? (since it moves the value to another subarray!) // eg prefer `OR(a,b)` to `NOT(AND( NOT(a), NOT(b)))` AoigLanguage::Not(_) => { let mem_cycles_not = FCDRAMArchitecture::get_instructions_implementation_of_logic_ops(LogicOp::NOT) diff --git a/rs/src/fc_dram/mod.rs b/rs/src/fc_dram/mod.rs old mode 100644 new mode 100755 index 1ebf30e..0622785 --- a/rs/src/fc_dram/mod.rs +++ b/rs/src/fc_dram/mod.rs @@ -127,7 +127,7 @@ fn compiling_receiver<'a>( Extractor::new( graph, CompilingCostFunction {}, - ), // TODO: provide CostFunction !! + ), "t_extractor", settings.print_compilation_stats ); debug!("Outputs: {outputs:?}"); diff --git a/rs/src/fc_dram/optimization.rs b/rs/src/fc_dram/optimization.rs old mode 100644 new mode 100755 diff --git a/rs/src/fc_dram/program.rs b/rs/src/fc_dram/program.rs old mode 100644 new mode 100755 diff --git a/rs/src/fc_dram/utils.rs b/rs/src/fc_dram/utils.rs old mode 100644 new mode 100755 diff --git a/rs/src/lib.rs b/rs/src/lib.rs old mode 100644 new mode 100755 diff --git a/src/ambit.h b/src/ambit.h old mode 100644 new mode 100755 diff --git a/src/ambit_benchmark_main.cpp b/src/ambit_benchmark_main.cpp old mode 100644 new mode 100755 diff --git a/src/fcdram.h b/src/fcdram.h old mode 100644 new mode 100755 diff --git a/src/fcdram_benchmark_main.cpp b/src/fcdram_benchmark_main.cpp old mode 100644 new mode 100755 diff --git a/src/main.cpp b/src/main.cpp old mode 100644 new mode 100755 index d2a8dd0..c841bb7 --- a/src/main.cpp +++ b/src/main.cpp @@ -32,14 +32,27 @@ void run_fcdram_example() { aig_network in; + // const auto b_i = in.create_pi(); + // const auto b_i_next = in.create_pi(); + // const auto m = in.create_pi(); + // + // const auto O1 = in.create_and( m, b_i_next ); + // const auto O2 = in.create_and( in.create_not( m ), b_i ); + // const auto O3 = in.create_and( in.create_not( O2 ), O1 ); + // const auto bi = in.create_or( O1, O2 ); + // in.create_po( bi ); + // in.create_po( O3 ); + + // test and(and2,and2) -> and4 const auto i1 = in.create_pi(); const auto i2 = in.create_pi(); const auto i3 = in.create_pi(); + const auto i4 = in.create_pi(); - const auto O1 = in.create_and( i3, i2 ); - const auto O2 = in.create_and( in.create_not( i3 ), i1 ); - const auto bi = in.create_or( O1, O2 ); - in.create_po( bi ); + const auto o1 = in.create_and( i1, i2); + const auto o2 = in.create_and( i3, i4); + const auto o3 = in.create_and( o1,o2 ); + in.create_po( o3 ); write_dot( in, "in.dot" ); std::cout << "Sending graph to fcdram_compile..." << std::endl; diff --git a/src/utils.cpp b/src/utils.cpp old mode 100644 new mode 100755 diff --git a/src/utils.h b/src/utils.h old mode 100644 new mode 100755 From 7b8cec34d8b1249bf99a8485aeb3ca6a02afefd5 Mon Sep 17 00:00:00 2001 From: Alexander Kusnezoff Date: Thu, 7 Aug 2025 22:42:28 +0200 Subject: [PATCH 49/51] Produced config (not yet functional) --- config/fcdram_hksynx.toml | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 config/fcdram_hksynx.toml diff --git a/config/fcdram_hksynx.toml b/config/fcdram_hksynx.toml old mode 100644 new mode 100755 From 5fe74e88b1c48608c05e8923945471363581dd7c Mon Sep 17 00:00:00 2001 From: Alexander Kusnezoff Date: Thu, 7 Aug 2025 22:57:25 +0200 Subject: [PATCH 50/51] Fix merge errors --- rs/src/ambit/compilation.rs | 14 ++------------ src/main.cpp | 35 ++++++++++++++--------------------- 2 files changed, 16 insertions(+), 33 deletions(-) diff --git a/rs/src/ambit/compilation.rs b/rs/src/ambit/compilation.rs index 0a639b5..47737cb 100755 --- a/rs/src/ambit/compilation.rs +++ b/rs/src/ambit/compilation.rs @@ -7,8 +7,8 @@ use eggmock::{Id, Mig, NetworkWithBackwardEdges, Node, Signal}; use rustc_hash::{FxHashMap, FxHashSet}; use std::cmp::max; -pub struct CompilationState<'a, 'n, N> { - network: &'n N, +pub struct CompilationState<'a, 'n, P> { + network: &'n P, /// contains all not yet computed network nodes that can be immediately computed (i.e. all /// inputs of the node are already computed) candidates: FxHashSet<(Id, Mig)>, @@ -89,16 +89,6 @@ pub fn compile<'a>( Ok(program) } -pub struct CompilationState<'a, 'n, P> { - network: &'n P, - /// Network-Nodes whose inputs all have been computed - candidates: FxHashSet<(Id, Mig)>, - program: ProgramState<'a>, - - outputs: FxHashMap, - leftover_use_count: FxHashMap, -} - impl<'a, 'n, P: NetworkWithBackwardEdges> CompilationState<'a, 'n, P> { /// - `candidates`: , computed from `network /// - `outputs`: direktly read-out from `network` diff --git a/src/main.cpp b/src/main.cpp index 6451bc0..fc527fa 100755 --- a/src/main.cpp +++ b/src/main.cpp @@ -26,14 +26,7 @@ void run_ambit_example(mig_network in) std::cout << "t3:" << result.t_compiler << std::endl; write_dot( out, "out.dot" ); - ambit_compiler_statistics result = eggmock::send_mig( in, ambit_compile(settings) ); - // std::cout << "IC:" << result.instruction_count << std::endl; - // std::cout << "t1:" << result.t_runner << std::endl; - // std::cout << "t2:" << result.t_extractor << std::endl; - // std::cout << "t3:" << result.t_compiler << std::endl; - // mig_network rewritten = rewrite_mig( in, ambit_rewriter() ); - // write_dot( rewritten, "out.dot" ); - + // ambit_compiler_statistics result = eggmock::send_mig( in, ambit_compile(settings) ); } /** @@ -91,17 +84,17 @@ void run_fcdram_example() int main() { - // mig_network in; - // const auto b_i = in.create_pi(); - // const auto b_i_next = in.create_pi(); - // const auto m = in.create_pi(); - // - // const auto O1 = in.create_and( m, b_i_next ); - // const auto O2 = in.create_and( in.create_not( m ), b_i ); - // const auto bi = in.create_or( O1, O2 ); - // in.create_po( bi ); - // - // write_dot( in, "in.dot" ); - // run_ambit_example(in); - run_fcdram_example(); + mig_network in; + const auto b_i = in.create_pi(); + const auto b_i_next = in.create_pi(); + const auto m = in.create_pi(); + + const auto O1 = in.create_and( m, b_i_next ); + const auto O2 = in.create_and( in.create_not( m ), b_i ); + const auto bi = in.create_or( O1, O2 ); + in.create_po( bi ); + + write_dot( in, "in.dot" ); + run_ambit_example(in); + // run_fcdram_example(); } From 2b1dbf2a3f82f714fb9302340c5a024888ac6b7c Mon Sep 17 00:00:00 2001 From: alku662e Date: Sun, 7 Sep 2025 13:56:53 +0200 Subject: [PATCH 51/51] Minor fixes: remove prints, unused imports, absolute path. Also added writing chosen compute-rows to json-file to save compilation time (and not return hard-coded choice) --- rs/Cargo.lock | 26 ++++++ rs/Cargo.toml | 2 + rs/src/fc_dram/architecture.rs | 52 ++---------- rs/src/fc_dram/compiler.rs | 128 +++++++++++++++++------------- rs/src/fc_dram/cost_estimation.rs | 3 +- rs/src/fc_dram/mod.rs | 2 +- src/main.cpp | 23 ++++-- 7 files changed, 128 insertions(+), 108 deletions(-) diff --git a/rs/Cargo.lock b/rs/Cargo.lock index 226122e..7bb67c9 100644 --- a/rs/Cargo.lock +++ b/rs/Cargo.lock @@ -246,6 +246,12 @@ dependencies = [ "either", ] +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + [[package]] name = "jiff" version = "0.2.14" @@ -298,6 +304,8 @@ dependencies = [ "ouroboros", "priority-queue", "rustc-hash", + "serde", + "serde_json", "smallvec", "strum", "strum_macros", @@ -502,6 +510,12 @@ version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" +[[package]] +name = "ryu" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" + [[package]] name = "saturating" version = "0.1.0" @@ -534,6 +548,18 @@ dependencies = [ "syn", ] +[[package]] +name = "serde_json" +version = "1.0.143" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + [[package]] name = "serde_spanned" version = "1.0.0" diff --git a/rs/Cargo.toml b/rs/Cargo.toml index 64653fc..95cbd90 100644 --- a/rs/Cargo.toml +++ b/rs/Cargo.toml @@ -20,6 +20,8 @@ priority-queue = "2.5.0" toml = { version = "0.9.1", features = ["serde"] } strum_macros = "0.27.1" strum = "0.27.1" +serde = { version="1.0.219", features = ["derive"] } +serde_json = "1.0.143" # for writing chosen compute rows (in FCDRAM) to a json config-file to avoid recomputation [build-dependencies] # eggmock = { path = "../../eggmock" } diff --git a/rs/src/fc_dram/architecture.rs b/rs/src/fc_dram/architecture.rs index 2c1c3c6..827fe52 100644 --- a/rs/src/fc_dram/architecture.rs +++ b/rs/src/fc_dram/architecture.rs @@ -6,7 +6,7 @@ //! RowAddress (eg via bit-shifting given bitmasks for subarray-id & row-addr to put on-top of RowAddress use std::{cmp::Ordering, collections::{HashMap, HashSet}, fmt::{self, Display, Formatter}, ops, sync::LazyLock}; -use log::debug; +use serde::{Deserialize, Serialize}; use strum_macros::EnumIter; pub const NR_SUBARRAYS: u64 = 2u64.pow(7); @@ -21,7 +21,7 @@ pub fn subarrayid_to_subarray_address(subarray_id: SubarrayId) -> RowAddress { /// All Subarrays (except the ones at the edges) have two neighboring subarrays: one below (subarray_id+1) and one above (subarray_id-1) /// - currently the following subarrays are used together for computations: 0&1,2&3,4&5,.. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, EnumIter)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, EnumIter, Serialize, Deserialize)] pub enum NeighboringSubarrayRelPosition { /// `subarray_id-1` Above, @@ -56,7 +56,7 @@ pub static ARCHITECTURE: LazyLock = LazyLock::new(|| { // - see [3] Chap4.2: nr of Predecoders in LWLD determines number & addresses of simultaneously activated rows // - does work for the example shown in [3] Chap3.2: `APA(256,287)` activates rows `287,286,281,280,263,262,257,256` // TODO: add overlapping of higher-order-bits (GWLD) - // TODO: init architecture a run-time, eg from config file + // - at the moment high-order bits (=subarray-id) needs to be added manually using eg `subarrayid_to_subarray_address()` helper function // TODO: maybe evaluate statically? let get_activated_rows_from_apa = |row1: RowAddress, row2: RowAddress| -> Vec { // 1. Define Predecoders by defining for which of the bits they're responsible @@ -98,7 +98,6 @@ pub static ARCHITECTURE: LazyLock = LazyLock::new(|| { }; // just a dummy implementation, see [5] Chap3.2 for details why determining the distance based on the Row Addresses issued by the MemController is difficult - // TODO: NEXT let get_distance_of_row_to_sense_amps = |row: RowAddress, subarray_rel_position: NeighboringSubarrayRelPosition| -> RowDistanceToSenseAmps { // NOTE: last & first subarrays only have sense-amps from one side if (row.get_subarray_id().0 == NR_SUBARRAYS-1 && subarray_rel_position == NeighboringSubarrayRelPosition::Below) || (row.get_subarray_id().0 == 0 && subarray_rel_position == NeighboringSubarrayRelPosition::Above) { @@ -157,7 +156,7 @@ pub static ARCHITECTURE: LazyLock = LazyLock::new(|| { }); /// - ! must be smaller than `rows_per_subarray * nr_subarrays` (this is NOT checked!) -#[derive(Copy, Clone, PartialEq, Eq, Hash)] +#[derive(Copy, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub struct RowAddress(pub u64); impl RowAddress { @@ -213,21 +212,6 @@ impl fmt::Display for SubarrayId { } } - -// impl Display for Vec { -// fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { -// write!(f, "[")?; -// let mut iter = self.iter(); -// if let Some(first) = iter.next() { -// write!(f, "{}", first)?; -// for elem in iter { -// write!(f, ",{}", elem)?; -// } -// } -// write!(f, "]") -// } -// } - #[derive(Debug, PartialEq, Clone, Copy)] pub struct SuccessRate(pub f64); @@ -269,24 +253,6 @@ impl From for SuccessRate { } } - -/// see Figure6,13 in [1] for timing diagrams -/// - all numbers are specified in ns -pub struct TimingSpec { - pub t_ras: f64, - /// Time btw an `PRE` and `ACT` when performing `APA` for issuing a `NOT` - pub time_btw_pre_act_apa_not: f64, -} - -impl Default for TimingSpec { - fn default() -> Self { - todo!() - // TimingSpec { - // t_ras: - // } - } -} - /// TODO: add field encoding topology of subarrays (to determine which of them share sense-amps) pub struct FCDRAMArchitecture { /// Nr of subarrays in a DRAM module @@ -637,7 +603,7 @@ impl LogicOp { } /// Support operands numbers for AND/OR/NOT operations -#[derive(Debug, Clone, Copy, EnumIter, Hash, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, EnumIter, Hash, PartialEq, Eq, Serialize, Deserialize)] #[repr(u8)] // You can change the representation (e.g., u8, u16, etc.) pub enum SupportedNrOperands { /// One operand only supported for `NOT` @@ -684,18 +650,16 @@ pub trait RowDecoder { // TODO: get activation pattern for given rows r1,r2 (N:N vs N:2N) - or just check whether // N:2N: is supported and let `get_simultaneously_activated_rows_of_apa_op()` handle the rest? + // - NOTE: currenlty N:2N activation pattern is not supported } - - +// TODO #[cfg(test)] mod tests { use super::*; - // fn init() { - // } - #[test] // mark function as test-fn + #[test] fn test_sra() { println!("{:?}", ARCHITECTURE.sra_degree_to_rowaddress_combinations.get(&SupportedNrOperands::try_from(8 as u8).unwrap()).unwrap().first()); } diff --git a/rs/src/fc_dram/compiler.rs b/rs/src/fc_dram/compiler.rs index f6ecbff..26b821d 100644 --- a/rs/src/fc_dram/compiler.rs +++ b/rs/src/fc_dram/compiler.rs @@ -6,18 +6,18 @@ //! //! - [`Compiler::compile()`] = main function - compiles given logic network for the given [`architecture`] into a [`program`] using some [`optimization`] -use crate::fc_dram::architecture::ROWS_PER_SUBARRAY; +use crate::fc_dram::architecture::{Instruction, ROWS_PER_SUBARRAY}; use super::{ - architecture::{subarrayid_to_subarray_address, Instruction, LogicOp, NeighboringSubarrayRelPosition, SubarrayId, SupportedNrOperands, ARCHITECTURE, NR_SUBARRAYS, ROW_ID_BITMASK, SUBARRAY_ID_BITMASK}, optimization::optimize, CompilerSettings, Program, RowAddress + architecture::{LogicOp, NeighboringSubarrayRelPosition, SubarrayId, SupportedNrOperands, ARCHITECTURE, NR_SUBARRAYS, ROW_ID_BITMASK}, CompilerSettings, Program, RowAddress }; use eggmock::{Aoig, Id, NetworkWithBackwardEdges, Node, Signal}; -use itertools::Itertools; use log::debug; use priority_queue::PriorityQueue; use strum::IntoEnumIterator; -use toml::{Table, Value}; -use std::{cmp::Ordering, collections::{HashMap, HashSet}, env::consts::ARCH, ffi::CStr, fmt::Debug, fs, path::Path, vec}; +use std::{cmp::Ordering, collections::HashMap, ffi::CStr, fmt::Debug, fs::{self, File}, io::Write, path::Path, vec}; + +use serde::{Serialize, Deserialize}; /// Provides [`Compiler::compile()`] to compile a logic network into a [`Program`] pub struct Compiler { @@ -36,6 +36,14 @@ pub struct Compiler { computed_noninverted_scr_signals: HashMap,Vec)>, } +/// Serializable struct for storing chosen compute rows in a json file +#[derive(Serialize,Deserialize)] +struct ComputeRowRecord { + operands: SupportedNrOperands, + position: NeighboringSubarrayRelPosition, + rows: (RowAddress, RowAddress), +} + impl Compiler { /// Constants are repeated to fill complete row const CONSTANTS: [usize; 2] = [0, 1]; @@ -91,9 +99,6 @@ impl Compiler { debug!("New candidates: {:?}", new_candidates); self.comp_state.candidates.extend(new_candidates); - - // TODO: move result into a free row - // TODO: mark rows containing non-live values as free } debug!("Instructions: {:?}", program.instructions); @@ -142,7 +147,6 @@ impl Compiler { // move subarray to 1st subarray (instead of 0th, which is at the edge and hence has no sense-amps above) let subarray1_id = ((ROW_ID_BITMASK << 1) | 1) ^ ROW_ID_BITMASK; let row = RowAddress(subarray1_id | row.0); // makes sure that `get_distance_of_row_to_sense_amps` doesn't panic since SRA returns subarray=0 by default (which is an edge subarray) - println!("{:b}", row.0); (ARCHITECTURE.get_distance_of_row_to_sense_amps)(row, sense_amp_position) as u64 }).sum() }; @@ -152,7 +156,6 @@ impl Compiler { activated_rows.iter().map(|&row| { let subarray1_id = ((ROW_ID_BITMASK << 1) | 1) ^ ROW_ID_BITMASK; let row = RowAddress(subarray1_id | row.0); // makes sure that `get_distance_of_row_to_sense_amps` doesn't panic since SRA returns subarray=0 by default (which is an edge subarray) - println!("{:b}", row.0); (ARCHITECTURE.get_distance_of_row_to_sense_amps)(row, sense_amp_position) as u64 }).sum() }; @@ -287,52 +290,66 @@ impl Compiler { fn init_comp_state(&mut self, network: &impl NetworkWithBackwardEdges, program: &mut Program) { let config_file = unsafe { CStr::from_ptr(self.settings.config_file) }.to_str().unwrap(); let config = Path::new(config_file); - println!("{:?}", config); // 0.1 Allocate compute rows: rows reserved for performing computations, all other rows are usable as "Register" if config.is_file() { - - // let content = fs::read_to_string(config).unwrap(); - // let value = content.parse::().unwrap(); // Parse into generic TOML Value :contentReference[oaicite:1]{index=1} - // - // if let Some(arr) = value.get("safe_space_rows").and_then(|v| v.as_array()) { - // println!("Found array of length {}", arr.len()); - // self.safe_space_rows = arr.iter().map(|v| { - // v.as_integer().expect("Expected integer") as u64 - // }).collect(); - // } else { - // panic!("Config file doesn't contain value for safe-space-rows"); - // } - - // TODO: read&write this to&from config-file (added manually here in the meantiem) - self.compute_row_activations = HashMap::from([ - ((SupportedNrOperands::One, NeighboringSubarrayRelPosition::Above), (RowAddress(8), RowAddress(8))), - ((SupportedNrOperands::One, NeighboringSubarrayRelPosition::Below), (RowAddress(303), RowAddress(303))), - ((SupportedNrOperands::Two, NeighboringSubarrayRelPosition::Above), (RowAddress(15), RowAddress(79))), - ((SupportedNrOperands::Two, NeighboringSubarrayRelPosition::Below), (RowAddress(293), RowAddress(357))), - ((SupportedNrOperands::Four, NeighboringSubarrayRelPosition::Above), (RowAddress(60), RowAddress(42))), - ((SupportedNrOperands::Four, NeighboringSubarrayRelPosition::Below), (RowAddress(472), RowAddress(412))), - ((SupportedNrOperands::Eight, NeighboringSubarrayRelPosition::Above), (RowAddress(42), RowAddress(15))), - ((SupportedNrOperands::Eight, NeighboringSubarrayRelPosition::Below), (RowAddress(203), RowAddress(283))), - ((SupportedNrOperands::Sixteen, NeighboringSubarrayRelPosition::Above), (RowAddress(32), RowAddress(83))), - ((SupportedNrOperands::Sixteen, NeighboringSubarrayRelPosition::Below), (RowAddress(470), RowAddress(252))), - ((SupportedNrOperands::Thirtytwo, NeighboringSubarrayRelPosition::Above), (RowAddress(307), RowAddress(28))), - ((SupportedNrOperands::Thirtytwo, NeighboringSubarrayRelPosition::Below), (RowAddress(149), RowAddress(318))), - ]); + // if config-file has been provided: get compute rows from that config file rather than recomputing them + + let contents = fs::read_to_string(config).unwrap(); + // Read file contents + + // Parse JSON into Vec + let records: Vec = + serde_json::from_str(&contents).unwrap_or_else(|_| panic!("Failed to parse JSON for file {:?}", config.to_str())); + + // Convert into the HashMap structure + self.compute_row_activations = records + .into_iter() + .map(|rec| { + ( + (rec.operands, rec.position), + (rec.rows.0, rec.rows.1), + ) + }) + .collect(); + + // This is the result for SKHYNIX DRAM (comment out if to save dev-time generating json-file): + // self.compute_row_activations = HashMap::from([ + // ((SupportedNrOperands::One, NeighboringSubarrayRelPosition::Above), (RowAddress(8), RowAddress(8))), + // ((SupportedNrOperands::One, NeighboringSubarrayRelPosition::Below), (RowAddress(303), RowAddress(303))), + // ((SupportedNrOperands::Two, NeighboringSubarrayRelPosition::Above), (RowAddress(15), RowAddress(79))), + // ((SupportedNrOperands::Two, NeighboringSubarrayRelPosition::Below), (RowAddress(293), RowAddress(357))), + // ((SupportedNrOperands::Four, NeighboringSubarrayRelPosition::Above), (RowAddress(60), RowAddress(42))), + // ((SupportedNrOperands::Four, NeighboringSubarrayRelPosition::Below), (RowAddress(472), RowAddress(412))), + // ((SupportedNrOperands::Eight, NeighboringSubarrayRelPosition::Above), (RowAddress(42), RowAddress(15))), + // ((SupportedNrOperands::Eight, NeighboringSubarrayRelPosition::Below), (RowAddress(203), RowAddress(283))), + // ((SupportedNrOperands::Sixteen, NeighboringSubarrayRelPosition::Above), (RowAddress(32), RowAddress(83))), + // ((SupportedNrOperands::Sixteen, NeighboringSubarrayRelPosition::Below), (RowAddress(470), RowAddress(252))), + // ((SupportedNrOperands::Thirtytwo, NeighboringSubarrayRelPosition::Above), (RowAddress(307), RowAddress(28))), + // ((SupportedNrOperands::Thirtytwo, NeighboringSubarrayRelPosition::Below), (RowAddress(149), RowAddress(318))), + // ]); } else { self.choose_compute_rows(); // choose which rows will serve as compute rows (those are stored in `self.compute_row_activations` - println!("{:?}", self.compute_row_activations); - - // TODO: write chosen compute rows to config-file - // let safe_space_rows_toml = Value::Array(self.safe_space_rows.iter().map( - // |row| Value::Integer(*row as i64) - // ).collect()); - // let config_in_toml = toml::toml! { - // safe_space_rows = safe_space_rows_toml - // }; - // fs::write(config, config_in_toml.to_string()).expect("Sth went wrong here.."); + + // write chosen compute rows to config-file + let records: Vec = self.compute_row_activations + .iter() + .map(|((operands, position), (row1, row2))| ComputeRowRecord { + operands: *operands, + position: *position, + rows: (*row1, *row2), + }) + .collect(); + + // Store compute-row choice in json file + let json_output = serde_json::to_string_pretty(&records).unwrap(); + // Write to file + let output_path = Path::new("fcdram_hksynx_compute_rows.json"); // NOTE: the chosen compute rows are specific to HKSYNX DRAM for now + let mut file = File::create(output_path).expect("Failed to create output file"); + file.write_all(json_output.as_bytes()).expect("Failed to write JSON to file"); + println!("Stored chosen compute-rows into {:?}. Pass this file via `.config_file` to safe considerate compilation time for choosing the compute rows", output_path.to_str()); } // 0.2 Save free rows @@ -395,7 +412,7 @@ impl Compiler { let (noninverted_src_signals, inverted_src_signals) = self.get_all_noninverted_and_inverted_src_signals(output, network); - println!("Noninverted src signals: {:?}", noninverted_src_signals.clone()); + // println!("Noninverted src signals: {:?}", noninverted_src_signals.clone()); // all directly (might in theory) reside in the same subarray as `output` (since no NOTS are inbtw which locate them to a neighboring subarray) for connected_signal in noninverted_src_signals { self.signal_to_subarrayids.entry(connected_signal).or_default().push(actual_subarray); // determine (virtual) subarray in which output will reside @@ -413,8 +430,8 @@ impl Compiler { .filter(|signal| !network.node(signal.node_id()).is_leaf() ) // leaves don't need to be placed in neighboring subarray since inputs are placed by user .map(|signal| Signal::new(signal.node_id(), false)).collect(); // before negation the signals are in the neighboring subarray while !unvisited_signals_in_same_subarray.is_empty() || !unvisited_signals_in_neighboring_subarray.is_empty() { - println!("Same subarray: {:?}", unvisited_signals_in_same_subarray); - println!("Neighboring subarray: {:?}", unvisited_signals_in_neighboring_subarray); + // println!("Same subarray: {:?}", unvisited_signals_in_same_subarray); + // println!("Neighboring subarray: {:?}", unvisited_signals_in_neighboring_subarray); if let Some(signal_neighboring_subarray) = unvisited_signals_in_neighboring_subarray.pop() { debug!("Neighboring: {signal_neighboring_subarray:?}"); @@ -623,8 +640,6 @@ impl Compiler { /// Returns Instructions to execute given `next_candidate` (which is a signal which needs to reside in a specific subarray after performing the execution) /// - [ ] make sure that operation to be executed on those rows won't simultaneously activate other rows holding valid data which will be used by future operations - /// - /// TODO: NEXT fn execute_next_instruction(&mut self, next_candidate: &(Signal, SubarrayId), network: &impl NetworkWithBackwardEdges) -> Vec { let (signal, result_subarray) = next_candidate; let node_id = signal.node_id(); @@ -639,7 +654,7 @@ impl Compiler { let compute_subarray = if signal.is_inverted() { result_subarray.get_partner_subarray() } else { *result_subarray }; // for inverted signals first compute the noninverted signal in the other subarray - println!("EXECUTING {:?}", next_candidate); + // println!("EXECUTING {:?}", next_candidate); next_instructions.append(&mut self.execute_and_or(node_id, compute_subarray, network)); // 2. Negate the result (if needed) @@ -804,7 +819,7 @@ mod tests { }); Compiler::new(CompilerSettings { print_program: true, verbose: true, print_compilation_stats: false, min_success_rate: 0.999, repetition_fracops: 5, safe_space_rows_per_subarray: 16, - config_file: CString::new("/home/alex/Documents/Studium/Sem6/inf_pm_fpa/lime-fork/config/fcdram_hksynx.toml").unwrap().as_ptr(), + config_file: CString::new("/config/fcdram_hksynx_compute_rows.json").unwrap().as_ptr(), do_save_config: false } ) } @@ -905,6 +920,7 @@ mod tests { } + // TODO #[test] fn test_select_compute_and_ref_subarray() { let compiler = init(); diff --git a/rs/src/fc_dram/cost_estimation.rs b/rs/src/fc_dram/cost_estimation.rs index 3996ea2..8adcf52 100644 --- a/rs/src/fc_dram/cost_estimation.rs +++ b/rs/src/fc_dram/cost_estimation.rs @@ -8,7 +8,7 @@ use std::collections::HashMap; use std::ops; use std::rc::Rc; -use super::architecture::{FCDRAMArchitecture, LogicOp, NeighboringSubarrayRelPosition, RowAddress, RowDistanceToSenseAmps, SuccessRate}; +use super::architecture::{FCDRAMArchitecture, LogicOp, SuccessRate}; pub struct CompilingCostFunction{} @@ -124,7 +124,6 @@ impl CostFunction for CompilingCostFunction { mem_cycles: mem_cycles_or, } }, - // TODO: increase cost of NOT? (since it moves the value to another subarray!) // eg prefer `OR(a,b)` to `NOT(AND( NOT(a), NOT(b)))` AoigLanguage::Not(_) => { let mem_cycles_not = FCDRAMArchitecture::get_instructions_implementation_of_logic_ops(LogicOp::NOT) diff --git a/rs/src/fc_dram/mod.rs b/rs/src/fc_dram/mod.rs index 1ebf30e..0622785 100644 --- a/rs/src/fc_dram/mod.rs +++ b/rs/src/fc_dram/mod.rs @@ -127,7 +127,7 @@ fn compiling_receiver<'a>( Extractor::new( graph, CompilingCostFunction {}, - ), // TODO: provide CostFunction !! + ), "t_extractor", settings.print_compilation_stats ); debug!("Outputs: {outputs:?}"); diff --git a/src/main.cpp b/src/main.cpp index d2a8dd0..6b415c7 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -32,14 +32,27 @@ void run_fcdram_example() { aig_network in; + // const auto b_i = in.create_pi(); + // const auto b_i_next = in.create_pi(); + // const auto m = in.create_pi(); + // + // const auto O1 = in.create_and( m, b_i_next ); + // const auto O2 = in.create_and( in.create_not( m ), b_i ); + // const auto O3 = in.create_and( in.create_not( O2 ), O1 ); + // const auto bi = in.create_or( O1, O2 ); + // in.create_po( bi ); + // in.create_po( O3 ); + + // test and(and2,and2) -> and4 const auto i1 = in.create_pi(); const auto i2 = in.create_pi(); const auto i3 = in.create_pi(); + const auto i4 = in.create_pi(); - const auto O1 = in.create_and( i3, i2 ); - const auto O2 = in.create_and( in.create_not( i3 ), i1 ); - const auto bi = in.create_or( O1, O2 ); - in.create_po( bi ); + const auto o1 = in.create_and( i1, i2); + const auto o2 = in.create_and( i3, i4); + const auto o3 = in.create_and( o1,o2 ); + in.create_po( o3 ); write_dot( in, "in.dot" ); std::cout << "Sending graph to fcdram_compile..." << std::endl; @@ -53,7 +66,7 @@ void run_fcdram_example() .min_success_rate= 99.9999, .repetition_fracops=5, // issue 5 FracOps per init of reference subarray .safe_space_rows_per_subarray = 16, - .config_file = "/home/alex/Documents/Studium/Sem6/inf_pm_fpa/lime-fork/config/fcdram_hksynx.toml", + .config_file = "", .do_save_config = true, } ) ); // std::cout << "IC:" << result.instruction_count << std::endl;