1use serde::{Deserialize, Serialize};
2
3use crate::ir::DiffNode;
4
5#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
9#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
10pub enum ArtifactSubject {
11 #[serde(rename = "left")]
12 Left,
13 #[serde(rename = "right")]
14 Right,
15 #[serde(rename = "pair")]
16 Pair,
17}
18
19#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
32#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
33pub struct ArtifactFormat {
34 pub package: String,
35 pub name: String,
36 pub version: u32,
37}
38
39impl ArtifactFormat {
40 pub fn new(package: impl Into<String>, name: impl Into<String>, version: u32) -> Self {
41 Self {
42 package: package.into(),
43 name: name.into(),
44 version,
45 }
46 }
47}
48
49impl std::fmt::Display for ArtifactFormat {
50 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
51 write!(f, "{}.{}.v{}", self.package, self.name, self.version)
52 }
53}
54
55#[derive(Debug, Clone, Serialize, Deserialize)]
61#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
62pub struct ArtifactDescriptor {
63 pub format: ArtifactFormat,
64 pub subject: ArtifactSubject,
65 pub producer: String,
66 pub handle: String,
69}
70
71pub fn tabular_v1() -> ArtifactFormat {
80 ArtifactFormat::new("binoc", "tabular", 1)
81}
82
83#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
91pub struct TabularData {
92 pub headers: Vec<String>,
93 pub rows: Vec<Vec<String>>,
94}
95
96impl TabularData {
97 pub fn column_index(&self, name: &str) -> Option<usize> {
98 self.headers.iter().position(|h| h == name)
99 }
100
101 pub fn column_values(&self, name: &str) -> Option<Vec<&str>> {
102 let idx = self.column_index(name)?;
103 Some(
104 self.rows
105 .iter()
106 .map(|r| r.get(idx).map(|s| s.as_str()).unwrap_or(""))
107 .collect(),
108 )
109 }
110
111 pub fn to_csv(&self) -> String {
112 let mut out = self.headers.join(",");
113 out.push('\n');
114 for row in &self.rows {
115 out.push_str(&row.join(","));
116 out.push('\n');
117 }
118 out
119 }
120}
121
122#[derive(Debug, Clone, Serialize, Deserialize)]
124pub struct TabularDataPair {
125 pub left: Option<TabularData>,
126 pub right: Option<TabularData>,
127}
128
129impl TabularDataPair {
130 pub fn from_artifacts(
136 node: &crate::ir::DiffNode,
137 data: &dyn crate::traits::DataAccess,
138 ) -> Option<Self> {
139 let fmt = tabular_v1();
140 let left = node
141 .artifacts
142 .iter()
143 .find(|a| a.format == fmt && a.subject == ArtifactSubject::Left)
144 .and_then(|desc| data.get_artifact(desc).ok()?)
145 .and_then(|bytes| serde_json::from_slice(&bytes).ok());
146 let right = node
147 .artifacts
148 .iter()
149 .find(|a| a.format == fmt && a.subject == ArtifactSubject::Right)
150 .and_then(|desc| data.get_artifact(desc).ok()?)
151 .and_then(|bytes| serde_json::from_slice(&bytes).ok());
152 if left.is_none() && right.is_none() {
153 return None;
154 }
155 Some(Self { left, right })
156 }
157}
158
159pub fn tabular_extract(
168 pair: &TabularDataPair,
169 _node: &DiffNode,
170 aspect: &str,
171) -> Option<ExtractResult> {
172 match aspect {
173 "rows_added" => {
174 let right = pair.right.as_ref()?;
175 let left_len = pair.left.as_ref().map_or(0, |l| l.rows.len());
176 if left_len >= right.rows.len() {
177 return Some(ExtractResult::Text("No rows added.\n".into()));
178 }
179 let added = TabularData {
180 headers: right.headers.clone(),
181 rows: right.rows[left_len..].to_vec(),
182 };
183 Some(ExtractResult::Text(added.to_csv()))
184 }
185 "rows_removed" => {
186 let left = pair.left.as_ref()?;
187 let right_len = pair.right.as_ref().map_or(0, |r| r.rows.len());
188 if right_len >= left.rows.len() {
189 return Some(ExtractResult::Text("No rows removed.\n".into()));
190 }
191 let removed = TabularData {
192 headers: left.headers.clone(),
193 rows: left.rows[right_len..].to_vec(),
194 };
195 Some(ExtractResult::Text(removed.to_csv()))
196 }
197 "cells_changed" => {
198 let left = pair.left.as_ref()?;
199 let right = pair.right.as_ref()?;
200 let common_cols = tabular_columns_in_common(left, right);
201 let min_rows = left.rows.len().min(right.rows.len());
202
203 let mut out = String::from("row,column,old_value,new_value\n");
204 for i in 0..min_rows {
205 for col in &common_cols {
206 let li = left.column_index(col)?;
207 let ri = right.column_index(col)?;
208 let lv = left.rows[i].get(li).map(|s| s.as_str()).unwrap_or("");
209 let rv = right.rows[i].get(ri).map(|s| s.as_str()).unwrap_or("");
210 if lv != rv {
211 out.push_str(&format!("{i},{col},{lv},{rv}\n"));
212 }
213 }
214 }
215 Some(ExtractResult::Text(out))
216 }
217 "columns_added" => {
218 let left = pair.left.as_ref()?;
219 let right = pair.right.as_ref()?;
220 let left_set: std::collections::BTreeSet<&str> =
221 left.headers.iter().map(|s| s.as_str()).collect();
222 let added: Vec<&str> = right
223 .headers
224 .iter()
225 .filter(|h| !left_set.contains(h.as_str()))
226 .map(|h| h.as_str())
227 .collect();
228 if added.is_empty() {
229 return Some(ExtractResult::Text("No columns added.\n".into()));
230 }
231 let mut out = String::new();
232 for col in &added {
233 out.push_str(&format!("{col}\n"));
234 if let Some(vals) = right.column_values(col) {
235 for val in vals {
236 out.push_str(&format!(" {val}\n"));
237 }
238 }
239 }
240 Some(ExtractResult::Text(out))
241 }
242 "columns_removed" => {
243 let left = pair.left.as_ref()?;
244 let right = pair.right.as_ref()?;
245 let right_set: std::collections::BTreeSet<&str> =
246 right.headers.iter().map(|s| s.as_str()).collect();
247 let removed: Vec<&str> = left
248 .headers
249 .iter()
250 .filter(|h| !right_set.contains(h.as_str()))
251 .map(|h| h.as_str())
252 .collect();
253 if removed.is_empty() {
254 return Some(ExtractResult::Text("No columns removed.\n".into()));
255 }
256 let mut out = String::new();
257 for col in &removed {
258 out.push_str(&format!("{col}\n"));
259 if let Some(vals) = left.column_values(col) {
260 for val in vals {
261 out.push_str(&format!(" {val}\n"));
262 }
263 }
264 }
265 Some(ExtractResult::Text(out))
266 }
267 "content" | "full" => {
268 let mut out = String::new();
269 if let Some(left) = &pair.left {
270 out.push_str("--- left\n");
271 out.push_str(&left.to_csv());
272 }
273 if let Some(right) = &pair.right {
274 out.push_str("+++ right\n");
275 out.push_str(&right.to_csv());
276 }
277 Some(ExtractResult::Text(out))
278 }
279 _ => None,
280 }
281}
282
283fn tabular_columns_in_common(left: &TabularData, right: &TabularData) -> Vec<String> {
284 let left_set: std::collections::BTreeSet<&str> =
285 left.headers.iter().map(|s| s.as_str()).collect();
286 right
287 .headers
288 .iter()
289 .filter(|h| left_set.contains(h.as_str()))
290 .cloned()
291 .collect()
292}
293
294#[derive(Debug, Clone, Serialize, Deserialize)]
315#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
316pub struct ItemRef {
317 pub logical_path: String,
318 pub is_dir: bool,
319 #[serde(default, skip_serializing_if = "Option::is_none")]
320 pub content_hash: Option<String>,
321 #[serde(default, skip_serializing_if = "Option::is_none")]
322 pub size: Option<u64>,
323 #[serde(default, skip_serializing_if = "Option::is_none")]
324 pub media_type: Option<String>,
325 #[serde(default)]
328 pub handle: String,
329}
330
331impl ItemRef {
332 pub fn extension(&self) -> Option<String> {
333 std::path::Path::new(&self.logical_path)
334 .extension()
335 .map(|e| format!(".{}", e.to_string_lossy().to_lowercase()))
336 }
337
338 pub fn resolve_hash(&self, data: &dyn crate::DataAccess) -> crate::BinocResult<String> {
341 if let Some(hash) = &self.content_hash {
342 return Ok(hash.clone());
343 }
344 let bytes = data.read_bytes(self)?;
345 Ok(blake3::hash(&bytes).to_hex().to_string())
346 }
347
348 pub fn resolve_size(&self, data: &dyn crate::DataAccess) -> crate::BinocResult<u64> {
351 if let Some(size) = self.size {
352 return Ok(size);
353 }
354 let bytes = data.read_bytes(self)?;
355 Ok(bytes.len() as u64)
356 }
357}
358
359#[derive(Debug, Clone, Serialize, Deserialize)]
361#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
362pub struct ItemPair {
363 pub left: Option<ItemRef>,
364 pub right: Option<ItemRef>,
365}
366
367impl ItemPair {
368 pub fn both(left: ItemRef, right: ItemRef) -> Self {
369 Self {
370 left: Some(left),
371 right: Some(right),
372 }
373 }
374
375 pub fn added(right: ItemRef) -> Self {
376 Self {
377 left: None,
378 right: Some(right),
379 }
380 }
381
382 pub fn removed(left: ItemRef) -> Self {
383 Self {
384 left: Some(left),
385 right: None,
386 }
387 }
388
389 pub fn logical_path(&self) -> &str {
390 self.right
391 .as_ref()
392 .or(self.left.as_ref())
393 .map(|i| i.logical_path.as_str())
394 .unwrap_or("")
395 }
396
397 pub fn extension(&self) -> Option<String> {
398 self.right
399 .as_ref()
400 .or(self.left.as_ref())
401 .and_then(|i| i.extension())
402 }
403
404 pub fn media_type(&self) -> Option<&str> {
405 self.right
406 .as_ref()
407 .or(self.left.as_ref())
408 .and_then(|i| i.media_type.as_deref())
409 }
410
411 pub fn is_dir(&self) -> bool {
412 self.right.as_ref().is_some_and(|i| i.is_dir)
413 || self.left.as_ref().is_some_and(|i| i.is_dir)
414 }
415
416 pub fn matching_content_hash(&self) -> Option<&str> {
417 match (&self.left, &self.right) {
418 (Some(l), Some(r)) => match (&l.content_hash, &r.content_hash) {
419 (Some(hl), Some(hr)) if hl == hr => Some(hl.as_str()),
420 _ => None,
421 },
422 _ => None,
423 }
424 }
425}
426
427#[derive(Debug, Serialize, Deserialize)]
431#[non_exhaustive]
432pub enum CompareResult {
433 Identical,
435 Leaf(DiffNode),
437 Expand(DiffNode, Vec<ItemPair>),
439 Skip,
441}
442
443#[non_exhaustive]
445pub enum TransformResult {
446 Unchanged,
448 Replace(Box<DiffNode>),
450 ReplaceMany(Vec<DiffNode>),
452 Remove,
454}
455
456#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
458pub enum NodeShapeFilter {
459 #[default]
461 Any,
462 Container,
464 Leaf,
466 Root,
471}
472
473#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
475pub enum ItemScope {
476 #[default]
478 Files,
479 Containers,
481 Any,
483}
484
485pub enum ExtractResult {
487 Text(String),
488 Binary(Vec<u8>),
489}
490
491#[cfg(test)]
492mod tests {
493 use super::*;
494
495 fn bare_item(logical: &str, is_dir: bool) -> ItemRef {
496 ItemRef {
497 logical_path: logical.into(),
498 is_dir,
499 content_hash: None,
500 size: None,
501 media_type: None,
502 handle: String::new(),
503 }
504 }
505
506 #[test]
507 fn item_ref_extension() {
508 let item = bare_item("data.csv", false);
509 assert_eq!(item.extension(), Some(".csv".into()));
510 }
511
512 #[test]
513 fn item_ref_extension_none() {
514 let item = bare_item("Makefile", false);
515 assert_eq!(item.extension(), None);
516 }
517
518 #[test]
519 fn item_pair_logical_path_prefers_right() {
520 let left = bare_item("left.txt", false);
521 let right = bare_item("right.txt", false);
522 let pair = ItemPair::both(left, right);
523 assert_eq!(pair.logical_path(), "right.txt");
524 }
525
526 #[test]
527 fn item_pair_logical_path_falls_back_to_left() {
528 let left = bare_item("only.txt", false);
529 let pair = ItemPair::removed(left);
530 assert_eq!(pair.logical_path(), "only.txt");
531 }
532
533 #[test]
534 fn item_pair_is_dir() {
535 let dir = bare_item("sub", true);
536 let pair = ItemPair::added(dir);
537 assert!(pair.is_dir());
538 }
539
540 #[test]
541 fn item_pair_matching_hash() {
542 let mut left = bare_item("f", false);
543 left.content_hash = Some("abc".into());
544 let mut right = bare_item("f", false);
545 right.content_hash = Some("abc".into());
546 let pair = ItemPair::both(left, right);
547 assert_eq!(pair.matching_content_hash(), Some("abc"));
548 }
549}