Skip to main content

binoc_sdk/
ir.rs

1use serde::{Deserialize, Serialize};
2use std::collections::{BTreeMap, BTreeSet};
3
4use crate::types::{ArtifactDescriptor, ItemPair};
5
6/// A node in the diff tree — the central data structure of the system.
7/// Every comparator emits it, every transformer rewrites it, and serializers
8/// or bindings read it.
9#[derive(Debug, Clone, Serialize, Deserialize)]
10#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
11pub struct DiffNode {
12    /// Open enum: "add", "remove", "modify", "move", "reorder",
13    /// "schema_change", etc. Plugins may define new actions.
14    pub action: String,
15
16    /// Open string: "directory", "file", "tabular", "zip_archive", etc.
17    /// No built-in types — conventions, not enforcement.
18    pub item_type: String,
19
20    /// Location within snapshot (logical path, including interior paths
21    /// like "archive.zip/data/file.csv").
22    pub path: String,
23
24    /// For moves/renames: the original path.
25    #[serde(skip_serializing_if = "Option::is_none")]
26    pub source_path: Option<String>,
27
28    /// Optional human-readable one-liner describing the change.
29    /// Set by comparator or transformer; used by renderers for narrative rendering.
30    #[serde(default, skip_serializing_if = "Option::is_none")]
31    pub summary: Option<String>,
32
33    /// Open bag of semantic tags, namespaced by convention.
34    /// e.g. "binoc.column-reorder", "biobinoc.gap-change"
35    #[serde(default, skip_serializing_if = "BTreeSet::is_empty")]
36    pub tags: BTreeSet<String>,
37
38    /// Child diff nodes forming the tree structure.
39    #[serde(default, skip_serializing_if = "Vec::is_empty")]
40    pub children: Vec<DiffNode>,
41
42    /// Comparator-specific payload, schema determined by item_type convention.
43    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
44    pub details: BTreeMap<String, serde_json::Value>,
45
46    /// Transformer-added metadata.
47    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
48    pub annotations: BTreeMap<String, serde_json::Value>,
49
50    /// Which comparator produced this node (provenance for extract chain).
51    #[serde(default, skip_serializing_if = "Option::is_none")]
52    pub comparator: Option<String>,
53
54    /// Transformers that modified this node, in order (provenance for extract chain).
55    #[serde(default, skip_serializing_if = "Vec::is_empty")]
56    pub transformed_by: Vec<String>,
57
58    /// The original item pair that produced this node. Session-scoped working
59    /// data: available during a live diff/transform session for transformers
60    /// and extractors that need to re-read source data, and carried across the
61    /// plugin ABI wire so separately-compiled plugins can access it. Callers
62    /// writing changeset output must strip this via
63    /// [`DiffNode::strip_transient`] before serializing.
64    #[serde(default, skip_serializing_if = "Option::is_none")]
65    pub source_items: Option<ItemPair>,
66
67    /// Published artifacts for this node. Session-scoped working data: carried
68    /// across the plugin ABI wire as descriptors (the bytes live in the shared
69    /// `data_root` cache), but not meaningful outside a session. Callers
70    /// writing changeset output must strip this via
71    /// [`DiffNode::strip_transient`] before serializing.
72    #[serde(default, skip_serializing_if = "Vec::is_empty")]
73    pub artifacts: Vec<ArtifactDescriptor>,
74}
75
76impl DiffNode {
77    pub fn new(
78        action: impl Into<String>,
79        item_type: impl Into<String>,
80        path: impl Into<String>,
81    ) -> Self {
82        Self {
83            action: action.into(),
84            item_type: item_type.into(),
85            path: path.into(),
86            source_path: None,
87            summary: None,
88            tags: BTreeSet::new(),
89            children: Vec::new(),
90            details: BTreeMap::new(),
91            annotations: BTreeMap::new(),
92            comparator: None,
93            transformed_by: Vec::new(),
94            source_items: None,
95            artifacts: Vec::new(),
96        }
97    }
98
99    pub fn with_summary(mut self, summary: impl Into<String>) -> Self {
100        self.summary = Some(summary.into());
101        self
102    }
103
104    pub fn with_tag(mut self, tag: impl Into<String>) -> Self {
105        self.tags.insert(tag.into());
106        self
107    }
108
109    pub fn with_detail(mut self, key: impl Into<String>, value: serde_json::Value) -> Self {
110        self.details.insert(key.into(), value);
111        self
112    }
113
114    pub fn with_children(mut self, children: Vec<DiffNode>) -> Self {
115        self.children = children;
116        self
117    }
118
119    pub fn with_source_path(mut self, source: impl Into<String>) -> Self {
120        self.source_path = Some(source.into());
121        self
122    }
123
124    pub fn with_source_items(mut self, items: ItemPair) -> Self {
125        self.source_items = Some(items);
126        self
127    }
128
129    pub fn with_artifact(mut self, artifact: ArtifactDescriptor) -> Self {
130        self.artifacts.push(artifact);
131        self
132    }
133
134    pub fn node_count(&self) -> usize {
135        1 + self.children.iter().map(|c| c.node_count()).sum::<usize>()
136    }
137
138    pub fn all_tags(&self) -> BTreeSet<String> {
139        let mut tags = self.tags.clone();
140        for child in &self.children {
141            tags.extend(child.all_tags());
142        }
143        tags
144    }
145
146    /// Recursively clear session-scoped transient fields (`source_items`,
147    /// `artifacts`) on this node and all descendants.
148    ///
149    /// These fields are wire-visible so the plugin ABI can move them across
150    /// process-ready boundaries, but they are not meaningful outside a live
151    /// session and must be stripped before writing changeset output intended
152    /// for users (JSON files, renderer output, Python return values).
153    pub fn strip_transient(&mut self) {
154        self.source_items = None;
155        self.artifacts.clear();
156        for child in &mut self.children {
157            child.strip_transient();
158        }
159    }
160}
161
162/// A structured description of how to get from one snapshot to the next.
163#[derive(Debug, Clone, Serialize, Deserialize)]
164#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
165pub struct Changeset {
166    pub from_snapshot: String,
167    pub to_snapshot: String,
168    pub root: Option<DiffNode>,
169    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
170    pub metadata: BTreeMap<String, String>,
171}
172
173impl Changeset {
174    pub fn new(from: impl Into<String>, to: impl Into<String>, root: Option<DiffNode>) -> Self {
175        Self {
176            from_snapshot: from.into(),
177            to_snapshot: to.into(),
178            root,
179            metadata: BTreeMap::new(),
180        }
181    }
182
183    pub fn node_count(&self) -> usize {
184        self.root.as_ref().map_or(0, |r| r.node_count())
185    }
186
187    /// Recursively clear session-scoped transient fields on the root and all
188    /// descendants. See [`DiffNode::strip_transient`].
189    pub fn strip_transient(&mut self) {
190        if let Some(root) = self.root.as_mut() {
191            root.strip_transient();
192        }
193    }
194}
195
196#[cfg(test)]
197mod tests {
198    use super::*;
199
200    #[test]
201    fn diff_node_new_creates_node_with_correct_fields() {
202        let node = DiffNode::new("modify", "file", "path/to/file.csv");
203        assert_eq!(node.action, "modify");
204        assert_eq!(node.item_type, "file");
205        assert_eq!(node.path, "path/to/file.csv");
206        assert!(node.source_path.is_none());
207        assert!(node.tags.is_empty());
208        assert!(node.children.is_empty());
209        assert!(node.details.is_empty());
210        assert!(node.annotations.is_empty());
211    }
212
213    #[test]
214    fn diff_node_builder_methods_chain_correctly() {
215        let child = DiffNode::new("add", "file", "child.txt");
216        let node = DiffNode::new("modify", "directory", "dir")
217            .with_tag("binoc.column-reorder")
218            .with_tag("binoc.whitespace")
219            .with_detail("lines_changed", serde_json::json!(42))
220            .with_children(vec![child])
221            .with_source_path("old/dir");
222
223        assert_eq!(node.tags.len(), 2);
224        assert!(node.tags.contains("binoc.column-reorder"));
225        assert!(node.tags.contains("binoc.whitespace"));
226        assert_eq!(
227            node.details.get("lines_changed"),
228            Some(&serde_json::json!(42))
229        );
230        assert_eq!(node.children.len(), 1);
231        assert_eq!(node.children[0].path, "child.txt");
232        assert_eq!(node.source_path.as_deref(), Some("old/dir"));
233    }
234
235    #[test]
236    fn node_count_leaf_returns_one() {
237        let node = DiffNode::new("add", "file", "file.txt");
238        assert_eq!(node.node_count(), 1);
239    }
240
241    #[test]
242    fn node_count_tree_returns_correct_total() {
243        let node = DiffNode::new("modify", "dir", "dir").with_children(vec![
244            DiffNode::new("add", "file", "a.txt"),
245            DiffNode::new("modify", "dir", "sub").with_children(vec![DiffNode::new(
246                "remove",
247                "file",
248                "sub/b.txt",
249            )]),
250        ]);
251        assert_eq!(node.node_count(), 4);
252    }
253
254    #[test]
255    fn all_tags_collects_from_entire_subtree() {
256        let node = DiffNode::new("modify", "dir", "dir")
257            .with_tag("root-tag")
258            .with_children(vec![
259                DiffNode::new("add", "file", "a").with_tag("child-tag"),
260                DiffNode::new("remove", "file", "b")
261                    .with_children(vec![
262                        DiffNode::new("modify", "file", "c").with_tag("grandchild-tag")
263                    ]),
264            ]);
265        let tags = node.all_tags();
266        assert_eq!(tags.len(), 3);
267        assert!(tags.contains("root-tag"));
268        assert!(tags.contains("child-tag"));
269        assert!(tags.contains("grandchild-tag"));
270    }
271
272    #[test]
273    fn serde_round_trip_preserves_equality() {
274        let node = DiffNode::new("move", "file", "new/path.csv")
275            .with_tag("binoc.move")
276            .with_detail("distance", serde_json::json!(10))
277            .with_source_path("old/path.csv");
278        let json = serde_json::to_string(&node).unwrap();
279        let restored: DiffNode = serde_json::from_str(&json).unwrap();
280        assert_eq!(node.action, restored.action);
281        assert_eq!(node.item_type, restored.item_type);
282        assert_eq!(node.path, restored.path);
283        assert_eq!(node.source_path, restored.source_path);
284        assert_eq!(node.tags, restored.tags);
285        assert_eq!(node.details, restored.details);
286    }
287
288    #[test]
289    fn changeset_construction_and_node_count() {
290        let root = DiffNode::new("modify", "dir", "root").with_children(vec![
291            DiffNode::new("add", "file", "root/a.txt"),
292            DiffNode::new("remove", "file", "root/b.txt"),
293        ]);
294        let changeset = Changeset::new("v1", "v2", Some(root));
295        assert_eq!(changeset.from_snapshot, "v1");
296        assert_eq!(changeset.to_snapshot, "v2");
297        assert_eq!(changeset.node_count(), 3);
298    }
299
300    #[test]
301    fn transient_fields_round_trip_through_serde() {
302        // Session-scoped transient fields (`source_items`, `artifacts`) are
303        // wire-visible so the plugin ABI can carry them across a (potentially
304        // process-isolated) boundary. This test pins that behavior: serializing
305        // and deserializing a node with transient fields populated preserves
306        // them on every descendant, not just the root.
307        use crate::types::{
308            ArtifactDescriptor, ArtifactFormat, ArtifactSubject, ItemPair, ItemRef,
309        };
310
311        let artifact = ArtifactDescriptor {
312            format: ArtifactFormat::new("binoc", "tabular", 1),
313            subject: ArtifactSubject::Pair,
314            producer: "binoc.csv".into(),
315            handle: "cache/tabular-abc123".into(),
316        };
317        let source_items = ItemPair::both(
318            ItemRef {
319                logical_path: "data.csv".into(),
320                is_dir: false,
321                content_hash: None,
322                size: None,
323                media_type: None,
324                handle: "/tmp/a/data.csv".into(),
325            },
326            ItemRef {
327                logical_path: "data.csv".into(),
328                is_dir: false,
329                content_hash: None,
330                size: None,
331                media_type: None,
332                handle: "/tmp/b/data.csv".into(),
333            },
334        );
335        let child = DiffNode::new("modify", "tabular", "dir/data.csv")
336            .with_artifact(artifact.clone())
337            .with_source_items(source_items.clone());
338        let root = DiffNode::new("modify", "directory", "dir").with_children(vec![child]);
339
340        let json = serde_json::to_string(&root).unwrap();
341        let restored: DiffNode = serde_json::from_str(&json).unwrap();
342
343        assert_eq!(restored.children.len(), 1);
344        let restored_child = &restored.children[0];
345        assert_eq!(restored_child.artifacts.len(), 1, "child artifact missing");
346        assert_eq!(restored_child.artifacts[0].handle, artifact.handle);
347        assert!(
348            restored_child.source_items.is_some(),
349            "child source_items missing"
350        );
351    }
352
353    #[test]
354    fn strip_transient_clears_every_descendant() {
355        use crate::types::{ArtifactDescriptor, ArtifactFormat, ArtifactSubject};
356        let artifact = ArtifactDescriptor {
357            format: ArtifactFormat::new("binoc", "tabular", 1),
358            subject: ArtifactSubject::Pair,
359            producer: "binoc.csv".into(),
360            handle: "h".into(),
361        };
362        let grandchild = DiffNode::new("modify", "tabular", "a/b/c.csv").with_artifact(artifact);
363        let child = DiffNode::new("modify", "directory", "a/b").with_children(vec![grandchild]);
364        let mut root = DiffNode::new("modify", "directory", "a").with_children(vec![child]);
365        root.strip_transient();
366        fn all_empty(n: &DiffNode) -> bool {
367            n.artifacts.is_empty() && n.source_items.is_none() && n.children.iter().all(all_empty)
368        }
369        assert!(all_empty(&root));
370    }
371
372    #[test]
373    fn changeset_node_count_none_root() {
374        let changeset = Changeset::new("v1", "v2", None);
375        assert_eq!(changeset.node_count(), 0);
376    }
377}