Skip to main content

miden_processor/trace/
execution_tracer.rs

1use alloc::{sync::Arc, vec::Vec};
2
3use miden_air::trace::chiplets::hasher::{HASH_CYCLE_LEN, HASH_CYCLE_LEN_FELT, STATE_WIDTH};
4use miden_core::{FMP_ADDR, FMP_INIT_VALUE, operations::Operation};
5
6use super::{
7    decoder::block_stack::{BlockInfo, BlockStack, ExecutionContextInfo},
8    stack::OverflowTable,
9    trace_state::{
10        AceReplay, AdviceReplay, BitwiseReplay, BlockAddressReplay, BlockStackReplay,
11        CoreTraceFragmentContext, CoreTraceState, DecoderState, ExecutionContextReplay,
12        ExecutionContextSystemInfo, ExecutionReplay, HasherRequestReplay, HasherResponseReplay,
13        KernelReplay, MastForestResolutionReplay, MemoryReadsReplay, MemoryWritesReplay,
14        RangeCheckerReplay, StackOverflowReplay, StackState, SystemState,
15    },
16    utils::split_u32_into_u16,
17};
18use crate::{
19    ContextId, EMPTY_WORD, FastProcessor, Felt, MIN_STACK_DEPTH, ONE, RowIndex, Word, ZERO,
20    continuation_stack::{Continuation, ContinuationStack},
21    crypto::merkle::MerklePath,
22    mast::{
23        BasicBlockNode, JoinNode, LoopNode, MastForest, MastNode, MastNodeExt, MastNodeId,
24        SplitNode,
25    },
26    processor::{Processor, StackInterface, SystemInterface},
27    trace::chiplets::{CircuitEvaluation, PTR_OFFSET_ELEM, PTR_OFFSET_WORD},
28    tracer::{OperationHelperRegisters, Tracer},
29};
30
31// STATE SNAPSHOT
32// ================================================================================================
33
34/// Execution state snapshot, used to record the state at the start of a trace fragment.
35#[derive(Debug)]
36struct StateSnapshot {
37    state: CoreTraceState,
38    continuation_stack: ContinuationStack,
39    initial_mast_forest: Arc<MastForest>,
40}
41
42// TRACE GENERATION CONTEXT
43// ================================================================================================
44
45#[derive(Debug)]
46pub struct TraceGenerationContext {
47    /// The list of trace fragment contexts built during execution.
48    pub core_trace_contexts: Vec<CoreTraceFragmentContext>,
49
50    // Replays that contain additional data needed to generate the range checker and chiplets
51    // columns.
52    pub range_checker_replay: RangeCheckerReplay,
53    pub memory_writes: MemoryWritesReplay,
54    pub bitwise_replay: BitwiseReplay,
55    pub hasher_for_chiplet: HasherRequestReplay,
56    pub kernel_replay: KernelReplay,
57    pub ace_replay: AceReplay,
58
59    /// The number of rows per core trace fragment, except for the last fragment which may be
60    /// shorter.
61    pub fragment_size: usize,
62}
63
64/// Builder for recording the context to generate trace fragments during execution.
65///
66/// Specifically, this records the information necessary to be able to generate the trace in
67/// fragments of configurable length. This requires storing state at the very beginning of the
68/// fragment before any operations are executed, as well as recording the various values read during
69/// execution in the corresponding "replays" (e.g. values read from memory are recorded in
70/// `MemoryReadsReplay`, values read from the advice provider are recorded in `AdviceReplay``, etc).
71///
72/// Then, to generate a trace fragment, we initialize the state of the processor using the stored
73/// snapshot from the beginning of the fragment, and replay the recorded values as they are
74/// encountered during execution (e.g. when encountering a memory read operation, we will replay the
75/// value rather than querying the memory chiplet).
76#[derive(Debug)]
77pub struct ExecutionTracer {
78    // State stored at the start of a core trace fragment.
79    //
80    // This field is only set to `None` at initialization, and is populated when starting a new
81    // trace fragment with `Self::start_new_fragment_context()`. Hence, on the first call to
82    // `Self::start_new_fragment_context()`, we don't extract a new `TraceFragmentContext`, but in
83    // every other call, we do.
84    state_snapshot: Option<StateSnapshot>,
85
86    // Replay data aggregated throughout the execution of a core trace fragment
87    overflow_table: OverflowTable,
88    overflow_replay: StackOverflowReplay,
89
90    block_stack: BlockStack,
91    block_stack_replay: BlockStackReplay,
92    execution_context_replay: ExecutionContextReplay,
93
94    hasher_chiplet_shim: HasherChipletShim,
95    memory_reads: MemoryReadsReplay,
96    advice: AdviceReplay,
97    external: MastForestResolutionReplay,
98
99    // Replays that contain additional data needed to generate the range checker and chiplets
100    // columns.
101    range_checker: RangeCheckerReplay,
102    memory_writes: MemoryWritesReplay,
103    bitwise: BitwiseReplay,
104    kernel: KernelReplay,
105    hasher_for_chiplet: HasherRequestReplay,
106    ace: AceReplay,
107
108    // Output
109    fragment_contexts: Vec<CoreTraceFragmentContext>,
110
111    /// The number of rows per core trace fragment.
112    fragment_size: usize,
113
114    /// Flag set in `start_clock_cycle` when a Call/Syscall/Dyncall END is encountered, consumed
115    /// in `finalize_clock_cycle` to call `overflow_table.restore_context()`. This is deferred to
116    /// `finalize_clock_cycle` because `finalize_clock_cycle` is only called when the operation
117    /// succeeds (i.e., the stack depth check passes).
118    pending_restore_context: bool,
119
120    /// Flag set in `start_clock_cycle` when an `EvalCircuit` operation is encountered, consumed
121    /// in `finalize_clock_cycle` to record the memory reads performed by the operation.
122    is_eval_circuit_op: bool,
123}
124
125impl ExecutionTracer {
126    /// Creates a new `ExecutionTracer` with the given fragment size.
127    #[inline(always)]
128    pub fn new(fragment_size: usize) -> Self {
129        Self {
130            state_snapshot: None,
131            overflow_table: OverflowTable::default(),
132            overflow_replay: StackOverflowReplay::default(),
133            block_stack: BlockStack::default(),
134            block_stack_replay: BlockStackReplay::default(),
135            execution_context_replay: ExecutionContextReplay::default(),
136            hasher_chiplet_shim: HasherChipletShim::default(),
137            memory_reads: MemoryReadsReplay::default(),
138            range_checker: RangeCheckerReplay::default(),
139            memory_writes: MemoryWritesReplay::default(),
140            advice: AdviceReplay::default(),
141            bitwise: BitwiseReplay::default(),
142            kernel: KernelReplay::default(),
143            hasher_for_chiplet: HasherRequestReplay::default(),
144            ace: AceReplay::default(),
145            external: MastForestResolutionReplay::default(),
146            fragment_contexts: Vec::new(),
147            fragment_size,
148            pending_restore_context: false,
149            is_eval_circuit_op: false,
150        }
151    }
152
153    /// Convert the `ExecutionTracer` into a [TraceGenerationContext] using the data accumulated
154    /// during execution.
155    #[inline(always)]
156    pub fn into_trace_generation_context(mut self) -> TraceGenerationContext {
157        // If there is an ongoing trace state being built, finish it
158        self.finish_current_fragment_context();
159
160        TraceGenerationContext {
161            core_trace_contexts: self.fragment_contexts,
162            range_checker_replay: self.range_checker,
163            memory_writes: self.memory_writes,
164            bitwise_replay: self.bitwise,
165            kernel_replay: self.kernel,
166            hasher_for_chiplet: self.hasher_for_chiplet,
167            ace_replay: self.ace,
168            fragment_size: self.fragment_size,
169        }
170    }
171
172    // HELPERS
173    // -------------------------------------------------------------------------------------------
174
175    /// Captures the internal state into a new [TraceFragmentContext] (stored internally), resets
176    /// the internal replay state of the builder, and records a new state snapshot, marking the
177    /// beginning of the next trace state.
178    ///
179    /// This must be called at the beginning of a new trace fragment, before executing the first
180    /// operation. Internal replay fields are expected to be accessed during execution of this new
181    /// fragment to record data to be replayed by the trace fragment generators.
182    #[inline(always)]
183    fn start_new_fragment_context(
184        &mut self,
185        system_state: SystemState,
186        stack_top: [Felt; MIN_STACK_DEPTH],
187        mut continuation_stack: ContinuationStack,
188        continuation: Continuation,
189        current_forest: Arc<MastForest>,
190    ) {
191        // If there is an ongoing snapshot, finish it
192        self.finish_current_fragment_context();
193
194        // Start a new snapshot
195        self.state_snapshot = {
196            let decoder_state = {
197                if self.block_stack.is_empty() {
198                    DecoderState { current_addr: ZERO, parent_addr: ZERO }
199                } else {
200                    let block_info = self.block_stack.peek();
201
202                    DecoderState {
203                        current_addr: block_info.addr,
204                        parent_addr: block_info.parent_addr,
205                    }
206                }
207            };
208            let stack = {
209                let stack_depth =
210                    MIN_STACK_DEPTH + self.overflow_table.num_elements_in_current_ctx();
211                let last_overflow_addr = self.overflow_table.last_update_clk_in_current_ctx();
212                StackState::new(stack_top, stack_depth, last_overflow_addr)
213            };
214
215            // Push new continuation corresponding to the current execution state
216            continuation_stack.push_continuation(continuation);
217
218            Some(StateSnapshot {
219                state: CoreTraceState {
220                    system: system_state,
221                    decoder: decoder_state,
222                    stack,
223                },
224                continuation_stack,
225                initial_mast_forest: current_forest,
226            })
227        };
228    }
229
230    #[inline(always)]
231    fn record_control_node_start<P: Processor>(
232        &mut self,
233        node: &MastNode,
234        processor: &P,
235        current_forest: &MastForest,
236    ) {
237        let ctx_info = match node {
238            MastNode::Join(node) => {
239                let child1_hash = current_forest
240                    .get_node_by_id(node.first())
241                    .expect("join node's first child expected to be in the forest")
242                    .digest();
243                let child2_hash = current_forest
244                    .get_node_by_id(node.second())
245                    .expect("join node's second child expected to be in the forest")
246                    .digest();
247                self.hasher_for_chiplet.record_hash_control_block(
248                    child1_hash,
249                    child2_hash,
250                    JoinNode::DOMAIN,
251                    node.digest(),
252                );
253
254                None
255            },
256            MastNode::Split(node) => {
257                let child1_hash = current_forest
258                    .get_node_by_id(node.on_true())
259                    .expect("split node's true child expected to be in the forest")
260                    .digest();
261                let child2_hash = current_forest
262                    .get_node_by_id(node.on_false())
263                    .expect("split node's false child expected to be in the forest")
264                    .digest();
265                self.hasher_for_chiplet.record_hash_control_block(
266                    child1_hash,
267                    child2_hash,
268                    SplitNode::DOMAIN,
269                    node.digest(),
270                );
271
272                None
273            },
274            MastNode::Loop(node) => {
275                let body_hash = current_forest
276                    .get_node_by_id(node.body())
277                    .expect("loop node's body expected to be in the forest")
278                    .digest();
279
280                self.hasher_for_chiplet.record_hash_control_block(
281                    body_hash,
282                    EMPTY_WORD,
283                    LoopNode::DOMAIN,
284                    node.digest(),
285                );
286
287                None
288            },
289            MastNode::Call(node) => {
290                let callee_hash = current_forest
291                    .get_node_by_id(node.callee())
292                    .expect("call node's callee expected to be in the forest")
293                    .digest();
294
295                self.hasher_for_chiplet.record_hash_control_block(
296                    callee_hash,
297                    EMPTY_WORD,
298                    node.domain(),
299                    node.digest(),
300                );
301
302                let overflow_addr = self.overflow_table.last_update_clk_in_current_ctx();
303                Some(ExecutionContextInfo::new(
304                    processor.system().ctx(),
305                    processor.system().caller_hash(),
306                    processor.stack().depth(),
307                    overflow_addr,
308                ))
309            },
310            MastNode::Dyn(dyn_node) => {
311                self.hasher_for_chiplet.record_hash_control_block(
312                    EMPTY_WORD,
313                    EMPTY_WORD,
314                    dyn_node.domain(),
315                    dyn_node.digest(),
316                );
317
318                if dyn_node.is_dyncall() {
319                    let overflow_addr = self.overflow_table.last_update_clk_in_current_ctx();
320                    // Note: the stack depth to record is the `current_stack_depth - 1` due to
321                    // the semantics of DYNCALL. That is, the top of the
322                    // stack contains the memory address to where the
323                    // address to dynamically call is located. Then, the
324                    // DYNCALL operation performs a drop, and
325                    // records the stack depth after the drop as the beginning of
326                    // the new context. For more information, look at the docs for how the
327                    // constraints are designed; it's a bit tricky but it works.
328                    let stack_depth_after_drop = processor.stack().depth() - 1;
329                    Some(ExecutionContextInfo::new(
330                        processor.system().ctx(),
331                        processor.system().caller_hash(),
332                        stack_depth_after_drop,
333                        overflow_addr,
334                    ))
335                } else {
336                    None
337                }
338            },
339            MastNode::Block(_) => panic!(
340                "`ExecutionTracer::record_basic_block_start()` must be called instead for basic blocks"
341            ),
342            MastNode::External(_) => panic!(
343                "External nodes are guaranteed to be resolved before record_control_node_start is called"
344            ),
345        };
346
347        let block_addr = self.hasher_chiplet_shim.record_hash_control_block();
348        let parent_addr = self.block_stack.push(block_addr, ctx_info);
349        self.block_stack_replay.record_node_start_parent_addr(parent_addr);
350    }
351
352    /// Records the block address for an END operation based on the block being popped.
353    #[inline(always)]
354    fn record_node_end(&mut self, block_info: &BlockInfo) {
355        let (prev_addr, prev_parent_addr) = if self.block_stack.is_empty() {
356            (ZERO, ZERO)
357        } else {
358            let prev_block = self.block_stack.peek();
359            (prev_block.addr, prev_block.parent_addr)
360        };
361        self.block_stack_replay
362            .record_node_end(block_info.addr, prev_addr, prev_parent_addr);
363    }
364
365    /// Records the execution context system info for CALL/SYSCALL/DYNCALL operations.
366    #[inline(always)]
367    fn record_execution_context(&mut self, ctx_info: ExecutionContextSystemInfo) {
368        self.execution_context_replay.record_execution_context(ctx_info);
369    }
370
371    /// Records the current core trace state, if any.
372    ///
373    /// Specifically, extracts the stored [SnapshotStart] as well as all the replay data recorded
374    /// from the various components (e.g. memory, advice, etc) since the last call to this method.
375    /// Resets the internal state to default values to prepare for the next trace fragment.
376    ///
377    /// Note that the very first time that this is called (at clock cycle 0), the snapshot will not
378    /// contain any replay data, and so no core trace state will be recorded.
379    #[inline(always)]
380    fn finish_current_fragment_context(&mut self) {
381        if let Some(snapshot) = self.state_snapshot.take() {
382            // Extract the replays
383            let (hasher_replay, block_addr_replay) = self.hasher_chiplet_shim.extract_replay();
384            let memory_reads_replay = core::mem::take(&mut self.memory_reads);
385            let advice_replay = core::mem::take(&mut self.advice);
386            let external_replay = core::mem::take(&mut self.external);
387            let stack_overflow_replay = core::mem::take(&mut self.overflow_replay);
388            let block_stack_replay = core::mem::take(&mut self.block_stack_replay);
389            let execution_context_replay = core::mem::take(&mut self.execution_context_replay);
390
391            let trace_state = CoreTraceFragmentContext {
392                state: snapshot.state,
393                replay: ExecutionReplay {
394                    hasher: hasher_replay,
395                    block_address: block_addr_replay,
396                    memory_reads: memory_reads_replay,
397                    advice: advice_replay,
398                    mast_forest_resolution: external_replay,
399                    stack_overflow: stack_overflow_replay,
400                    block_stack: block_stack_replay,
401                    execution_context: execution_context_replay,
402                },
403                continuation: snapshot.continuation_stack,
404                initial_mast_forest: snapshot.initial_mast_forest,
405            };
406
407            self.fragment_contexts.push(trace_state);
408        }
409    }
410
411    /// Pushes the value at stack position 15 onto the overflow table. This must be called in
412    /// `Tracer::start_clock_cycle()` *before* the processor increments the stack size, where stack
413    /// position 15 at the start of the clock cycle corresponds to the element that overflows.
414    #[inline(always)]
415    fn increment_stack_size(&mut self, processor: &FastProcessor) {
416        let new_overflow_value = processor.stack_get(15);
417        self.overflow_table.push(new_overflow_value, processor.system().clock());
418    }
419
420    /// Pops a value from the overflow table and records it for replay.
421    #[inline(always)]
422    fn decrement_stack_size(&mut self) {
423        if let Some(popped_value) = self.overflow_table.pop() {
424            let new_overflow_addr = self.overflow_table.last_update_clk_in_current_ctx();
425            self.overflow_replay.record_pop_overflow(popped_value, new_overflow_addr);
426        }
427    }
428}
429
430impl Tracer for ExecutionTracer {
431    type Processor = FastProcessor;
432
433    /// When sufficiently many clock cycles have elapsed, starts a new trace state. Also updates the
434    /// internal block stack.
435    #[inline(always)]
436    fn start_clock_cycle(
437        &mut self,
438        processor: &FastProcessor,
439        continuation: Continuation,
440        continuation_stack: &ContinuationStack,
441        current_forest: &Arc<MastForest>,
442    ) {
443        // check if we need to start a new trace state
444        if processor.system().clock().as_usize().is_multiple_of(self.fragment_size) {
445            self.start_new_fragment_context(
446                SystemState::from_processor(processor),
447                processor
448                    .stack_top()
449                    .try_into()
450                    .expect("stack_top expected to be MIN_STACK_DEPTH elements"),
451                continuation_stack.clone(),
452                continuation.clone(),
453                current_forest.clone(),
454            );
455        }
456
457        match continuation {
458            Continuation::ResumeBasicBlock { node_id, batch_index, op_idx_in_batch } => {
459                // Update overflow table based on whether the operation increments or decrements
460                // the stack size.
461                let basic_block = current_forest[node_id].unwrap_basic_block();
462                let op = &basic_block.op_batches()[batch_index].ops()[op_idx_in_batch];
463
464                if op.increments_stack_size() {
465                    self.increment_stack_size(processor);
466                } else if op.decrements_stack_size() {
467                    self.decrement_stack_size();
468                }
469
470                if matches!(op, Operation::EvalCircuit) {
471                    self.is_eval_circuit_op = true;
472                }
473            },
474            Continuation::StartNode(mast_node_id) => match &current_forest[mast_node_id] {
475                MastNode::Join(_) => {
476                    self.record_control_node_start(
477                        &current_forest[mast_node_id],
478                        processor,
479                        current_forest,
480                    );
481                },
482                MastNode::Split(_) | MastNode::Loop(_) => {
483                    self.record_control_node_start(
484                        &current_forest[mast_node_id],
485                        processor,
486                        current_forest,
487                    );
488                    self.decrement_stack_size();
489                },
490                MastNode::Call(_) => {
491                    self.record_control_node_start(
492                        &current_forest[mast_node_id],
493                        processor,
494                        current_forest,
495                    );
496                    self.overflow_table.start_context();
497                },
498                MastNode::Dyn(dyn_node) => {
499                    self.record_control_node_start(
500                        &current_forest[mast_node_id],
501                        processor,
502                        current_forest,
503                    );
504                    // DYN and DYNCALL both drop the memory address from the stack.
505                    self.decrement_stack_size();
506
507                    if dyn_node.is_dyncall() {
508                        // Note: the overflow pop (stack size decrement above) must happen before
509                        // starting the new context so that it operates on the old context's
510                        // overflow table, per the semantics of dyncall.
511                        self.overflow_table.start_context();
512                    }
513                },
514                MastNode::Block(basic_block_node) => {
515                    self.hasher_for_chiplet.record_hash_basic_block(
516                        current_forest.clone(),
517                        mast_node_id,
518                        basic_block_node.digest(),
519                    );
520                    let block_addr =
521                        self.hasher_chiplet_shim.record_hash_basic_block(basic_block_node);
522                    let parent_addr =
523                        self.block_stack.push(block_addr, None);
524                    self.block_stack_replay.record_node_start_parent_addr(parent_addr);
525                },
526                MastNode::External(_) => unreachable!(
527                    "start_clock_cycle is guaranteed not to be called on external nodes"
528                ),
529            },
530            Continuation::Respan { node_id: _, batch_index: _ } => {
531                self.block_stack.peek_mut().addr += HASH_CYCLE_LEN_FELT;
532            },
533            Continuation::FinishLoop { node_id: _, was_entered }
534                if was_entered && processor.stack_get(0) == ONE =>
535            {
536                // This is a REPEAT operation, which drops the condition (top element) off the stack
537                self.decrement_stack_size();
538            },
539            Continuation::FinishJoin(_)
540            | Continuation::FinishSplit(_)
541            | Continuation::FinishCall(_)
542            | Continuation::FinishDyn(_)
543            | Continuation::FinishLoop { .. } // not a REPEAT, which is handled separately above
544            | Continuation::FinishBasicBlock(_) => {
545                // The END of a loop that was entered drops the condition from the stack.
546                if matches!(
547                    &continuation,
548                    Continuation::FinishLoop { was_entered, .. } if *was_entered
549                ) {
550                    self.decrement_stack_size();
551                }
552
553                // This is an END operation; pop the block stack and record the node end
554                let block_info = self.block_stack.pop();
555                self.record_node_end(&block_info);
556
557                if let Some(ctx_info) = block_info.ctx_info {
558                    self.record_execution_context(ExecutionContextSystemInfo {
559                        parent_ctx: ctx_info.parent_ctx,
560                        parent_fn_hash: ctx_info.parent_fn_hash,
561                    });
562
563                    self.pending_restore_context = true;
564                }
565            },
566            Continuation::FinishExternal(_)
567            | Continuation::EnterForest(_)
568            | Continuation::AfterExitDecorators(_)
569            | Continuation::AfterExitDecoratorsBasicBlock(_) => {
570                panic!(
571                    "FinishExternal, EnterForest, AfterExitDecorators and AfterExitDecoratorsBasicBlock continuations are guaranteed not to be passed here"
572                )
573            },
574        }
575    }
576
577    #[inline(always)]
578    fn record_mast_forest_resolution(&mut self, node_id: MastNodeId, forest: &Arc<MastForest>) {
579        self.external.record_resolution(node_id, forest.clone());
580    }
581
582    #[inline(always)]
583    fn record_hasher_permute(
584        &mut self,
585        input_state: [Felt; STATE_WIDTH],
586        output_state: [Felt; STATE_WIDTH],
587    ) {
588        self.hasher_for_chiplet.record_permute_input(input_state);
589        self.hasher_chiplet_shim.record_permute_output(output_state);
590    }
591
592    #[inline(always)]
593    fn record_hasher_build_merkle_root(
594        &mut self,
595        node: Word,
596        path: Option<&MerklePath>,
597        index: Felt,
598        output_root: Word,
599    ) {
600        let path = path.expect("execution tracer expects a valid Merkle path");
601        self.hasher_chiplet_shim.record_build_merkle_root(path, output_root);
602        self.hasher_for_chiplet.record_build_merkle_root(node, path.clone(), index);
603    }
604
605    #[inline(always)]
606    fn record_hasher_update_merkle_root(
607        &mut self,
608        old_value: Word,
609        new_value: Word,
610        path: Option<&MerklePath>,
611        index: Felt,
612        old_root: Word,
613        new_root: Word,
614    ) {
615        let path = path.expect("execution tracer expects a valid Merkle path");
616        self.hasher_chiplet_shim.record_update_merkle_root(path, old_root, new_root);
617        self.hasher_for_chiplet.record_update_merkle_root(
618            old_value,
619            new_value,
620            path.clone(),
621            index,
622        );
623    }
624
625    #[inline(always)]
626    fn record_memory_read_element(
627        &mut self,
628        element: Felt,
629        addr: Felt,
630        ctx: ContextId,
631        clk: RowIndex,
632    ) {
633        self.memory_reads.record_read_element(element, addr, ctx, clk);
634    }
635
636    #[inline(always)]
637    fn record_memory_read_word(&mut self, word: Word, addr: Felt, ctx: ContextId, clk: RowIndex) {
638        self.memory_reads.record_read_word(word, addr, ctx, clk);
639    }
640
641    #[inline(always)]
642    fn record_memory_write_element(
643        &mut self,
644        element: Felt,
645        addr: Felt,
646        ctx: ContextId,
647        clk: RowIndex,
648    ) {
649        self.memory_writes.record_write_element(element, addr, ctx, clk);
650    }
651
652    #[inline(always)]
653    fn record_memory_write_word(&mut self, word: Word, addr: Felt, ctx: ContextId, clk: RowIndex) {
654        self.memory_writes.record_write_word(word, addr, ctx, clk);
655    }
656
657    #[inline(always)]
658    fn record_memory_read_element_pair(
659        &mut self,
660        element_0: Felt,
661        addr_0: Felt,
662        element_1: Felt,
663        addr_1: Felt,
664        ctx: ContextId,
665        clk: RowIndex,
666    ) {
667        self.memory_reads.record_read_element(element_0, addr_0, ctx, clk);
668        self.memory_reads.record_read_element(element_1, addr_1, ctx, clk);
669    }
670
671    #[inline(always)]
672    fn record_memory_read_dword(
673        &mut self,
674        words: [Word; 2],
675        addr: Felt,
676        ctx: ContextId,
677        clk: RowIndex,
678    ) {
679        self.memory_reads.record_read_word(words[0], addr, ctx, clk);
680        self.memory_reads.record_read_word(words[1], addr + Felt::new(4), ctx, clk);
681    }
682
683    #[inline(always)]
684    fn record_dyncall_memory(
685        &mut self,
686        callee_hash: Word,
687        read_addr: Felt,
688        read_ctx: ContextId,
689        fmp_ctx: ContextId,
690        clk: RowIndex,
691    ) {
692        self.memory_reads.record_read_word(callee_hash, read_addr, read_ctx, clk);
693        self.memory_writes.record_write_element(FMP_INIT_VALUE, FMP_ADDR, fmp_ctx, clk);
694    }
695
696    #[inline(always)]
697    fn record_crypto_stream(
698        &mut self,
699        plaintext: [Word; 2],
700        src_addr: Felt,
701        ciphertext: [Word; 2],
702        dst_addr: Felt,
703        ctx: ContextId,
704        clk: RowIndex,
705    ) {
706        self.memory_reads.record_read_word(plaintext[0], src_addr, ctx, clk);
707        self.memory_reads
708            .record_read_word(plaintext[1], src_addr + Felt::new(4), ctx, clk);
709        self.memory_writes.record_write_word(ciphertext[0], dst_addr, ctx, clk);
710        self.memory_writes
711            .record_write_word(ciphertext[1], dst_addr + Felt::new(4), ctx, clk);
712    }
713
714    #[inline(always)]
715    fn record_pipe(&mut self, words: [Word; 2], addr: Felt, ctx: ContextId, clk: RowIndex) {
716        self.advice.record_pop_stack_dword(words);
717        self.memory_writes.record_write_word(words[0], addr, ctx, clk);
718        self.memory_writes.record_write_word(words[1], addr + Felt::new(4), ctx, clk);
719    }
720
721    #[inline(always)]
722    fn record_advice_pop_stack(&mut self, value: Felt) {
723        self.advice.record_pop_stack(value);
724    }
725
726    #[inline(always)]
727    fn record_advice_pop_stack_word(&mut self, word: Word) {
728        self.advice.record_pop_stack_word(word);
729    }
730
731    #[inline(always)]
732    fn record_u32and(&mut self, a: Felt, b: Felt) {
733        self.bitwise.record_u32and(a, b);
734    }
735
736    #[inline(always)]
737    fn record_u32xor(&mut self, a: Felt, b: Felt) {
738        self.bitwise.record_u32xor(a, b);
739    }
740
741    #[inline(always)]
742    fn record_u32_range_checks(&mut self, clk: RowIndex, u32_lo: Felt, u32_hi: Felt) {
743        let (t1, t0) = split_u32_into_u16(u32_lo.as_canonical_u64());
744        let (t3, t2) = split_u32_into_u16(u32_hi.as_canonical_u64());
745
746        self.range_checker.record_range_check_u32(clk, [t0, t1, t2, t3]);
747    }
748
749    #[inline(always)]
750    fn record_kernel_proc_access(&mut self, proc_hash: Word) {
751        self.kernel.record_kernel_proc_access(proc_hash);
752    }
753
754    #[inline(always)]
755    fn record_circuit_evaluation(&mut self, circuit_evaluation: CircuitEvaluation) {
756        self.ace.record_circuit_evaluation(circuit_evaluation);
757    }
758
759    #[inline(always)]
760    fn finalize_clock_cycle(
761        &mut self,
762        processor: &FastProcessor,
763        _op_helper_registers: OperationHelperRegisters,
764        _current_forest: &Arc<MastForest>,
765    ) {
766        // Restore the overflow table context for Call/Syscall/Dyncall END. This is deferred
767        // from start_clock_cycle because finalize_clock_cycle is only called when the operation
768        // succeeds (i.e., the stack depth check in processor.restore_context() passes).
769        if self.pending_restore_context {
770            // Restore context for call/syscall/dyncall: pop the current context's
771            // (empty) overflow stack and restore the previous context's overflow state.
772            self.overflow_table.restore_context();
773            self.overflow_replay.record_restore_context_overflow_addr(
774                MIN_STACK_DEPTH + self.overflow_table.num_elements_in_current_ctx(),
775                self.overflow_table.last_update_clk_in_current_ctx(),
776            );
777
778            self.pending_restore_context = false;
779        }
780
781        // Record all memory reads performed during EvalCircuit operations. We run this in
782        // `finalize_clock_cycle` to ensure that the memory reads are only recorded if the operation
783        // succeeds (and hence the values read from the stack can be assumed to be valid).
784        if self.is_eval_circuit_op {
785            let ptr = processor.stack_get(0);
786            let num_read = processor.stack_get(1).as_canonical_u64();
787            let num_eval = processor.stack_get(2).as_canonical_u64();
788            let ctx = processor.ctx();
789            let clk = processor.clock();
790
791            let num_read_rows = num_read / 2;
792
793            let mut addr = ptr;
794            for _ in 0..num_read_rows {
795                let word = processor
796                    .memory()
797                    .read_word(ctx, addr, clk)
798                    .expect("EvalCircuit memory read should not fail after successful execution");
799                self.memory_reads.record_read_word(word, addr, ctx, clk);
800                addr += PTR_OFFSET_WORD;
801            }
802            for _ in 0..num_eval {
803                let element = processor
804                    .memory()
805                    .read_element(ctx, addr)
806                    .expect("EvalCircuit memory read should not fail after successful execution");
807                self.memory_reads.record_read_element(element, addr, ctx, clk);
808                addr += PTR_OFFSET_ELEM;
809            }
810
811            self.is_eval_circuit_op = false;
812        }
813    }
814}
815
816// HASHER CHIPLET SHIM
817// ================================================================================================
818
819/// The number of hasher rows per permutation operation. This is used to compute the address for
820/// the next operation in the hasher chiplet.
821const NUM_HASHER_ROWS_PER_PERMUTATION: u32 = HASH_CYCLE_LEN as u32;
822
823/// Implements a shim for the hasher chiplet, where the responses of the hasher chiplet are emulated
824/// and recorded for later replay.
825///
826/// This is used to simulate hasher operations in parallel trace generation without needing to
827/// actually generate the hasher trace. All hasher operations are recorded during fast execution and
828/// then replayed during core trace generation.
829#[derive(Debug)]
830pub struct HasherChipletShim {
831    /// The address of the next MAST node encountered during execution. This field is used to keep
832    /// track of the number of rows in the hasher chiplet, from which the address of the next MAST
833    /// node is derived.
834    addr: u32,
835    /// Replay for the hasher chiplet responses, recording only the hasher chiplet responses.
836    hasher_replay: HasherResponseReplay,
837    block_addr_replay: BlockAddressReplay,
838}
839
840impl HasherChipletShim {
841    /// Creates a new [HasherChipletShim].
842    pub fn new() -> Self {
843        Self {
844            addr: 1,
845            hasher_replay: HasherResponseReplay::default(),
846            block_addr_replay: BlockAddressReplay::default(),
847        }
848    }
849
850    /// Records the address returned from a call to `Hasher::hash_control_block()`.
851    pub fn record_hash_control_block(&mut self) -> Felt {
852        let block_addr = Felt::from_u32(self.addr);
853
854        self.block_addr_replay.record_block_address(block_addr);
855        self.addr += NUM_HASHER_ROWS_PER_PERMUTATION;
856
857        block_addr
858    }
859
860    /// Records the address returned from a call to `Hasher::hash_basic_block()`.
861    pub fn record_hash_basic_block(&mut self, basic_block_node: &BasicBlockNode) -> Felt {
862        let block_addr = Felt::from_u32(self.addr);
863
864        self.block_addr_replay.record_block_address(block_addr);
865        self.addr += NUM_HASHER_ROWS_PER_PERMUTATION * basic_block_node.num_op_batches() as u32;
866
867        block_addr
868    }
869    /// Records the result of a call to `Hasher::permute()`.
870    pub fn record_permute_output(&mut self, hashed_state: [Felt; 12]) {
871        self.hasher_replay.record_permute(Felt::from_u32(self.addr), hashed_state);
872        self.addr += NUM_HASHER_ROWS_PER_PERMUTATION;
873    }
874
875    /// Records the result of a call to `Hasher::build_merkle_root()`.
876    pub fn record_build_merkle_root(&mut self, path: &MerklePath, computed_root: Word) {
877        self.hasher_replay
878            .record_build_merkle_root(Felt::from_u32(self.addr), computed_root);
879        self.addr += NUM_HASHER_ROWS_PER_PERMUTATION * path.depth() as u32;
880    }
881
882    /// Records the result of a call to `Hasher::update_merkle_root()`.
883    pub fn record_update_merkle_root(&mut self, path: &MerklePath, old_root: Word, new_root: Word) {
884        self.hasher_replay
885            .record_update_merkle_root(Felt::from_u32(self.addr), old_root, new_root);
886
887        // The Merkle path is verified twice: once for the old root and once for the new root.
888        self.addr += 2 * NUM_HASHER_ROWS_PER_PERMUTATION * path.depth() as u32;
889    }
890
891    pub fn extract_replay(&mut self) -> (HasherResponseReplay, BlockAddressReplay) {
892        (
893            core::mem::take(&mut self.hasher_replay),
894            core::mem::take(&mut self.block_addr_replay),
895        )
896    }
897}
898
899impl Default for HasherChipletShim {
900    fn default() -> Self {
901        Self::new()
902    }
903}