Skip to main content

miden_core/mast/debuginfo/
debug_var_storage.rs

1//! Dedicated storage for DebugVar decorators in a compressed sparse row (CSR) format.
2//!
3//! This module provides efficient storage and access for debug variable information,
4//! separate from the main decorator storage. This allows debuggers to efficiently
5//! query variable information without iterating through all decorators.
6
7use alloc::{
8    string::{String, ToString},
9    vec::Vec,
10};
11
12use miden_utils_indexing::{Idx, IndexVec};
13#[cfg(feature = "serde")]
14use serde::{Deserialize, Serialize};
15
16use super::DecoratorIndexError;
17use crate::{
18    mast::MastNodeId,
19    serde::{ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable},
20};
21
22// DEBUG VAR ID
23// ================================================================================================
24
25/// An identifier for a debug variable stored in [DebugInfo](super::DebugInfo).
26///
27/// This is analogous to [DecoratorId](crate::mast::DecoratorId) but specifically for debug
28/// variable information.
29#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
30#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
31pub struct DebugVarId(u32);
32
33impl DebugVarId {
34    /// Returns a new `DebugVarId` with the provided inner value, or an error if the provided
35    /// `value` is greater than or equal to `bound`.
36    pub fn from_u32_bounded(value: u32, bound: usize) -> Result<Self, DeserializationError> {
37        if (value as usize) < bound {
38            Ok(Self(value))
39        } else {
40            Err(DeserializationError::InvalidValue(format!(
41                "DebugVarId {} exceeds bound {}",
42                value, bound
43            )))
44        }
45    }
46
47    /// Returns the inner value as a usize.
48    pub fn to_usize(self) -> usize {
49        self.0 as usize
50    }
51
52    /// Returns the inner value as a u32.
53    pub fn as_u32(&self) -> u32 {
54        self.0
55    }
56}
57
58impl Idx for DebugVarId {}
59
60impl From<u32> for DebugVarId {
61    fn from(value: u32) -> Self {
62        Self(value)
63    }
64}
65
66impl From<DebugVarId> for u32 {
67    fn from(value: DebugVarId) -> Self {
68        value.0
69    }
70}
71
72impl Serializable for DebugVarId {
73    fn write_into<W: ByteWriter>(&self, target: &mut W) {
74        self.0.write_into(target);
75    }
76}
77
78impl Deserializable for DebugVarId {
79    fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
80        let value = u32::read_from(source)?;
81        Ok(Self(value))
82    }
83}
84
85// OP TO DEBUG VAR IDS
86// ================================================================================================
87
88/// A two-level compressed sparse row (CSR) representation for indexing debug variable IDs
89/// per operation per node.
90///
91/// This structure is analogous to [OpToDecoratorIds](super::OpToDecoratorIds) but specifically for
92/// debug variable information. It provides efficient access to debug variables in a hierarchical
93/// manner:
94/// 1. First level: Node -> Operations
95/// 2. Second level: Operation -> DebugVarIds
96///
97/// The actual `DebugVarInfo` values are stored separately in the `debug_vars` field of
98/// `DebugInfo`, indexed by `DebugVarId`.
99#[derive(Debug, Clone, PartialEq, Eq)]
100#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
101pub struct OpToDebugVarIds {
102    /// All the debug var IDs per operation per node
103    debug_var_ids: Vec<DebugVarId>,
104    /// Pointer indices for operations within debug_var_ids
105    op_indptr_for_var_ids: Vec<usize>,
106    /// Pointer indices for nodes within op_indptr_for_var_ids
107    node_indptr_for_op_idx: IndexVec<MastNodeId, usize>,
108}
109
110impl OpToDebugVarIds {
111    /// Create a new empty OpToDebugVarIds.
112    pub fn new() -> Self {
113        Self::with_capacity(0, 0, 0)
114    }
115
116    /// Create a new empty OpToDebugVarIds with the specified capacity.
117    pub fn with_capacity(
118        nodes_capacity: usize,
119        operations_capacity: usize,
120        debug_var_ids_capacity: usize,
121    ) -> Self {
122        Self {
123            debug_var_ids: Vec::with_capacity(debug_var_ids_capacity),
124            op_indptr_for_var_ids: Vec::with_capacity(operations_capacity + 1),
125            node_indptr_for_op_idx: IndexVec::with_capacity(nodes_capacity + 1),
126        }
127    }
128
129    /// Create an OpToDebugVarIds from raw CSR components.
130    pub(super) fn from_components(
131        debug_var_ids: Vec<DebugVarId>,
132        op_indptr_for_var_ids: Vec<usize>,
133        node_indptr_for_op_idx: IndexVec<MastNodeId, usize>,
134    ) -> Result<Self, DecoratorIndexError> {
135        // Completely empty structures are valid
136        if debug_var_ids.is_empty()
137            && op_indptr_for_var_ids.is_empty()
138            && node_indptr_for_op_idx.is_empty()
139        {
140            return Ok(Self {
141                debug_var_ids,
142                op_indptr_for_var_ids,
143                node_indptr_for_op_idx,
144            });
145        }
146
147        // Nodes with no debug vars are valid
148        if debug_var_ids.is_empty() && op_indptr_for_var_ids.is_empty() {
149            if node_indptr_for_op_idx.iter().all(|&ptr| ptr == 0) {
150                return Ok(Self {
151                    debug_var_ids,
152                    op_indptr_for_var_ids,
153                    node_indptr_for_op_idx,
154                });
155            } else {
156                return Err(DecoratorIndexError::InternalStructure);
157            }
158        }
159
160        // Validate the structure
161        if op_indptr_for_var_ids.is_empty() {
162            return Err(DecoratorIndexError::InternalStructure);
163        }
164
165        if op_indptr_for_var_ids[0] != 0 {
166            return Err(DecoratorIndexError::InternalStructure);
167        }
168
169        let Some(&last_op_ptr) = op_indptr_for_var_ids.last() else {
170            return Err(DecoratorIndexError::InternalStructure);
171        };
172        if last_op_ptr > debug_var_ids.len() {
173            return Err(DecoratorIndexError::InternalStructure);
174        }
175
176        if node_indptr_for_op_idx.is_empty() {
177            return Err(DecoratorIndexError::InternalStructure);
178        }
179
180        let node_slice = node_indptr_for_op_idx.as_slice();
181
182        if node_slice[0] != 0 {
183            return Err(DecoratorIndexError::InternalStructure);
184        }
185
186        let Some(&last_node_ptr) = node_slice.last() else {
187            return Err(DecoratorIndexError::InternalStructure);
188        };
189        if last_node_ptr > op_indptr_for_var_ids.len() - 1 {
190            return Err(DecoratorIndexError::InternalStructure);
191        }
192
193        // Ensure monotonicity
194        for window in op_indptr_for_var_ids.windows(2) {
195            if window[0] > window[1] {
196                return Err(DecoratorIndexError::InternalStructure);
197            }
198        }
199
200        for window in node_slice.windows(2) {
201            if window[0] > window[1] {
202                return Err(DecoratorIndexError::InternalStructure);
203            }
204        }
205
206        Ok(Self {
207            debug_var_ids,
208            op_indptr_for_var_ids,
209            node_indptr_for_op_idx,
210        })
211    }
212
213    /// Validate CSR structure integrity.
214    pub(super) fn validate_csr(&self, debug_var_count: usize) -> Result<(), String> {
215        // Completely empty structures are valid
216        if self.debug_var_ids.is_empty()
217            && self.op_indptr_for_var_ids.is_empty()
218            && self.node_indptr_for_op_idx.is_empty()
219        {
220            return Ok(());
221        }
222
223        // Nodes with no debug vars are valid
224        if self.debug_var_ids.is_empty() && self.op_indptr_for_var_ids.is_empty() {
225            if !self.node_indptr_for_op_idx.iter().all(|&ptr| ptr == 0) {
226                return Err("node pointers must all be 0 when there are no debug vars".to_string());
227            }
228            return Ok(());
229        }
230
231        // Validate all debug var IDs
232        for &var_id in &self.debug_var_ids {
233            if var_id.to_usize() >= debug_var_count {
234                return Err(format!(
235                    "Invalid debug var ID {}: exceeds count {}",
236                    var_id.to_usize(),
237                    debug_var_count
238                ));
239            }
240        }
241
242        // Validate op_indptr_for_var_ids
243        if self.op_indptr_for_var_ids.is_empty() {
244            return Err("op_indptr_for_var_ids cannot be empty".to_string());
245        }
246
247        if self.op_indptr_for_var_ids[0] != 0 {
248            return Err("op_indptr_for_var_ids must start at 0".to_string());
249        }
250
251        for window in self.op_indptr_for_var_ids.windows(2) {
252            if window[0] > window[1] {
253                return Err(format!(
254                    "op_indptr_for_var_ids not monotonic: {} > {}",
255                    window[0], window[1]
256                ));
257            }
258        }
259
260        if *self.op_indptr_for_var_ids.last().unwrap() != self.debug_var_ids.len() {
261            return Err(format!(
262                "op_indptr_for_var_ids end {} doesn't match debug_var_ids length {}",
263                self.op_indptr_for_var_ids.last().unwrap(),
264                self.debug_var_ids.len()
265            ));
266        }
267
268        // Validate node_indptr_for_op_idx
269        let node_slice = self.node_indptr_for_op_idx.as_slice();
270        if node_slice.is_empty() {
271            return Err("node_indptr_for_op_idx cannot be empty".to_string());
272        }
273
274        if node_slice[0] != 0 {
275            return Err("node_indptr_for_op_idx must start at 0".to_string());
276        }
277
278        for window in node_slice.windows(2) {
279            if window[0] > window[1] {
280                return Err(format!(
281                    "node_indptr_for_op_idx not monotonic: {} > {}",
282                    window[0], window[1]
283                ));
284            }
285        }
286
287        let max_node_ptr = self.op_indptr_for_var_ids.len() - 1;
288        if *node_slice.last().unwrap() > max_node_ptr {
289            return Err(format!(
290                "node_indptr_for_op_idx end {} exceeds op_indptr bounds {}",
291                node_slice.last().unwrap(),
292                max_node_ptr
293            ));
294        }
295
296        Ok(())
297    }
298
299    /// Returns true if this storage is empty.
300    pub fn is_empty(&self) -> bool {
301        self.node_indptr_for_op_idx.is_empty()
302    }
303
304    /// Get the number of nodes in this storage.
305    pub fn num_nodes(&self) -> usize {
306        if self.node_indptr_for_op_idx.is_empty() {
307            0
308        } else {
309            self.node_indptr_for_op_idx.len() - 1
310        }
311    }
312
313    /// Get the total number of debug var IDs.
314    pub fn num_debug_var_ids(&self) -> usize {
315        self.debug_var_ids.len()
316    }
317
318    /// Add debug variable information for a node incrementally.
319    ///
320    /// This method allows building up the structure by adding debug var IDs for nodes
321    /// in sequential order only.
322    pub fn add_debug_var_info_for_node(
323        &mut self,
324        node: MastNodeId,
325        debug_vars_info: Vec<(usize, DebugVarId)>,
326    ) -> Result<(), DecoratorIndexError> {
327        // Enforce sequential node ids
328        let expected = MastNodeId::new_unchecked(self.num_nodes() as u32);
329        if node < expected {
330            return Err(DecoratorIndexError::NodeIndex(node));
331        }
332        // Create empty nodes for gaps
333        for idx in expected.0..node.0 {
334            self.add_debug_var_info_for_node(MastNodeId::new_unchecked(idx), vec![])?;
335        }
336
337        let op_start = self.op_indptr_for_var_ids.len();
338
339        if self.node_indptr_for_op_idx.is_empty() {
340            self.node_indptr_for_op_idx
341                .push(op_start)
342                .map_err(|_| DecoratorIndexError::OperationIndex { node, operation: op_start })?;
343        } else {
344            let last = MastNodeId::new_unchecked((self.node_indptr_for_op_idx.len() - 1) as u32);
345            self.node_indptr_for_op_idx[last] = op_start;
346        }
347
348        if debug_vars_info.is_empty() {
349            if op_start == self.op_indptr_for_var_ids.len()
350                && !self.op_indptr_for_var_ids.is_empty()
351            {
352                self.op_indptr_for_var_ids.push(self.debug_var_ids.len());
353            }
354
355            self.node_indptr_for_op_idx
356                .push(op_start)
357                .map_err(|_| DecoratorIndexError::OperationIndex { node, operation: op_start })?;
358        } else {
359            let max_op_idx = debug_vars_info.last().unwrap().0;
360            let mut it = debug_vars_info.into_iter().peekable();
361
362            for op in 0..=max_op_idx {
363                self.op_indptr_for_var_ids.push(self.debug_var_ids.len());
364                while it.peek().is_some_and(|(i, _)| *i == op) {
365                    self.debug_var_ids.push(it.next().unwrap().1);
366                }
367            }
368            self.op_indptr_for_var_ids.push(self.debug_var_ids.len());
369
370            let end_ops = self.op_indptr_for_var_ids.len() - 1;
371            self.node_indptr_for_op_idx
372                .push(end_ops)
373                .map_err(|_| DecoratorIndexError::OperationIndex { node, operation: end_ops })?;
374        }
375
376        Ok(())
377    }
378
379    /// Get all debug var IDs for a specific operation within a node.
380    pub fn debug_var_ids_for_operation(
381        &self,
382        node: MastNodeId,
383        operation: usize,
384    ) -> Result<&[DebugVarId], DecoratorIndexError> {
385        let op_range = self.operation_range_for_node(node)?;
386        if operation >= op_range.len() {
387            return Ok(&[]);
388        }
389
390        let op_start_idx = op_range.start + operation;
391        if op_start_idx + 1 >= self.op_indptr_for_var_ids.len() {
392            return Err(DecoratorIndexError::InternalStructure);
393        }
394
395        let var_start = self.op_indptr_for_var_ids[op_start_idx];
396        let var_end = self.op_indptr_for_var_ids[op_start_idx + 1];
397
398        if var_start > var_end || var_end > self.debug_var_ids.len() {
399            return Err(DecoratorIndexError::InternalStructure);
400        }
401
402        Ok(&self.debug_var_ids[var_start..var_end])
403    }
404
405    /// Get the range of operation indices for a given node.
406    pub fn operation_range_for_node(
407        &self,
408        node: MastNodeId,
409    ) -> Result<core::ops::Range<usize>, DecoratorIndexError> {
410        let node_slice = self.node_indptr_for_op_idx.as_slice();
411        let node_idx = node.to_usize();
412
413        if node_idx + 1 >= node_slice.len() {
414            return Err(DecoratorIndexError::NodeIndex(node));
415        }
416
417        let start = node_slice[node_idx];
418        let end = node_slice[node_idx + 1];
419
420        if start > end || end > self.op_indptr_for_var_ids.len() {
421            return Err(DecoratorIndexError::InternalStructure);
422        }
423
424        Ok(start..end)
425    }
426
427    /// Serialize this OpToDebugVarIds.
428    pub(super) fn write_into<W: ByteWriter>(&self, target: &mut W) {
429        self.debug_var_ids.write_into(target);
430        self.op_indptr_for_var_ids.write_into(target);
431        self.node_indptr_for_op_idx.write_into(target);
432    }
433
434    /// Deserialize OpToDebugVarIds.
435    pub(super) fn read_from<R: ByteReader>(
436        source: &mut R,
437        debug_var_count: usize,
438    ) -> Result<Self, DeserializationError> {
439        let debug_var_ids: Vec<DebugVarId> = Deserializable::read_from(source)?;
440        let op_indptr_for_var_ids: Vec<usize> = Deserializable::read_from(source)?;
441        let node_indptr_for_op_idx: IndexVec<MastNodeId, usize> =
442            Deserializable::read_from(source)?;
443
444        let result =
445            Self::from_components(debug_var_ids, op_indptr_for_var_ids, node_indptr_for_op_idx)
446                .map_err(|e| DeserializationError::InvalidValue(e.to_string()))?;
447
448        result.validate_csr(debug_var_count).map_err(|e| {
449            DeserializationError::InvalidValue(format!("OpToDebugVarIds validation failed: {e}"))
450        })?;
451
452        Ok(result)
453    }
454
455    /// Clears this storage.
456    pub fn clear(&mut self) {
457        self.debug_var_ids.clear();
458        self.op_indptr_for_var_ids.clear();
459        self.node_indptr_for_op_idx = IndexVec::new();
460    }
461}
462
463impl Default for OpToDebugVarIds {
464    fn default() -> Self {
465        Self::new()
466    }
467}
468
469#[cfg(test)]
470mod tests {
471    use alloc::vec;
472
473    use miden_utils_indexing::IndexVec;
474
475    use super::*;
476
477    fn test_var_id(value: u32) -> DebugVarId {
478        DebugVarId::from(value)
479    }
480
481    fn test_node_id(value: u32) -> MastNodeId {
482        MastNodeId::new_unchecked(value)
483    }
484
485    /// Helper: Node 0: Op 0 -> [var0, var1], Op 1 -> [var2]; Node 1: Op 0 -> [var3, var4, var5]
486    fn create_test_storage() -> OpToDebugVarIds {
487        let debug_var_ids = vec![
488            test_var_id(0),
489            test_var_id(1),
490            test_var_id(2),
491            test_var_id(3),
492            test_var_id(4),
493            test_var_id(5),
494        ];
495        let op_indptr = vec![0, 2, 3, 6];
496        let mut node_indptr = IndexVec::new();
497        node_indptr.push(0).unwrap();
498        node_indptr.push(2).unwrap();
499        node_indptr.push(3).unwrap();
500
501        OpToDebugVarIds::from_components(debug_var_ids, op_indptr, node_indptr).unwrap()
502    }
503
504    #[test]
505    fn test_add_and_lookup() {
506        let mut storage = OpToDebugVarIds::new();
507
508        // Node 0: op 0 -> [var10, var11], op 2 -> [var12]
509        storage
510            .add_debug_var_info_for_node(
511                test_node_id(0),
512                vec![(0, test_var_id(10)), (0, test_var_id(11)), (2, test_var_id(12))],
513            )
514            .unwrap();
515
516        // Node 1: op 0 -> [var20]
517        storage
518            .add_debug_var_info_for_node(test_node_id(1), vec![(0, test_var_id(20))])
519            .unwrap();
520
521        assert_eq!(storage.num_nodes(), 2);
522        assert_eq!(storage.num_debug_var_ids(), 4);
523
524        // Lookup node 0
525        assert_eq!(
526            storage.debug_var_ids_for_operation(test_node_id(0), 0).unwrap(),
527            &[test_var_id(10), test_var_id(11)]
528        );
529        assert_eq!(storage.debug_var_ids_for_operation(test_node_id(0), 1).unwrap(), &[]);
530        assert_eq!(
531            storage.debug_var_ids_for_operation(test_node_id(0), 2).unwrap(),
532            &[test_var_id(12)]
533        );
534
535        // Lookup node 1
536        assert_eq!(
537            storage.debug_var_ids_for_operation(test_node_id(1), 0).unwrap(),
538            &[test_var_id(20)]
539        );
540
541        // Out-of-range operation returns empty
542        assert_eq!(storage.debug_var_ids_for_operation(test_node_id(0), 99).unwrap(), &[]);
543    }
544
545    #[test]
546    fn test_from_components_and_validate() {
547        let storage = create_test_storage();
548        assert_eq!(storage.num_nodes(), 2);
549        assert_eq!(storage.num_debug_var_ids(), 6);
550        assert!(storage.validate_csr(6).is_ok());
551
552        // Validation fails when var count is too low
553        assert!(storage.validate_csr(3).is_err());
554
555        // Invalid components are rejected
556        let result = OpToDebugVarIds::from_components(
557            vec![test_var_id(0)],
558            vec![0, 5], // points past end
559            IndexVec::new(),
560        );
561        assert_eq!(result, Err(DecoratorIndexError::InternalStructure));
562    }
563
564    #[test]
565    fn test_serialization_round_trip() {
566        let storage = create_test_storage();
567
568        let mut bytes = Vec::new();
569        storage.write_into(&mut bytes);
570
571        let mut reader = crate::serde::SliceReader::new(&bytes);
572        let deserialized = OpToDebugVarIds::read_from(&mut reader, 6).unwrap();
573
574        assert_eq!(storage, deserialized);
575    }
576}