diff --git a/AGENTS.md b/AGENTS.md index a71b1cb..5757113 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -14,21 +14,28 @@ pathrex/ ├── Cargo.toml # Crate manifest (edition 2024) ├── build.rs # Links LAGraph + LAGraphX; optionally regenerates FFI bindings ├── src/ -│ ├── lib.rs # Public modules: graph, formats, lagraph_sys; utils is pub(crate) +│ ├── lib.rs # Public modules: graph, formats, rpq, sparql, lagraph_sys, utils │ ├── main.rs # Binary entry point (placeholder) │ ├── lagraph_sys.rs # FFI module — includes generated bindings │ ├── lagraph_sys_generated.rs# Bindgen output (checked in, regenerated in CI) -│ ├── utils.rs # Internal helpers: CountingBuilder, CountOutput, VecSource, -│ │ # grb_ok! and la_ok! macros +│ ├── utils.rs # Public helpers: CountingBuilder, CountOutput, VecSource, +│ │ # grb_ok! and la_ok! macros, build_graph │ ├── graph/ │ │ ├── mod.rs # Core traits (GraphBuilder, GraphDecomposition, GraphSource, │ │ │ # Backend, Graph), error types, RAII wrappers, GrB init │ │ └── inmemory.rs # InMemory marker, InMemoryBuilder, InMemoryGraph +│ ├── rpq/ +│ │ ├── mod.rs # RPQ evaluation trait (RpqEvaluator), RpqResult, RpqError +│ │ ├── nfarpq.rs # NFA-based RPQ evaluator using LAGraph_RegularPathQuery +│ │ └── rpqmatrix.rs # Plan-based RPQ evaluator using LAGraph_RPQMatrix +│ ├── sparql/ +│ │ └── mod.rs # SPARQL parsing (spargebra), PathTriple extraction, parse_rpq │ └── formats/ │ ├── mod.rs # FormatError enum, re-exports │ └── csv.rs # Csv — CSV → Edge iterator (CsvConfig, ColumnSpec) ├── tests/ -│ └── inmemory_tests.rs # Integration tests for InMemoryBuilder / InMemoryGraph +│ ├── inmemory_tests.rs # Integration tests for InMemoryBuilder / InMemoryGraph +│ └── nfarpq_tests.rs # Integration tests for NfaRpqEvaluator ├── deps/ │ └── LAGraph/ # Git submodule (SparseLinearAlgebra/LAGraph) └── .github/workflows/ci.yml # CI: build GraphBLAS + LAGraph, cargo build & test @@ -204,6 +211,79 @@ Configuration is via [`CsvConfig`](src/formats/csv.rs:17): [`ColumnSpec`](src/formats/csv.rs:11) is either `Index(usize)` or `Name(String)`. Name-based lookup requires `has_header: true`. +### SPARQL parsing (`src/sparql/mod.rs`) + +The [`sparql`](src/sparql/mod.rs) module uses the [`spargebra`](https://crates.io/crates/spargebra) +crate to parse SPARQL 1.1 query strings and extract the single property-path +triple pattern that pathrex's RPQ evaluators operate on. + +**Supported query form:** `SELECT` queries with exactly one triple or property +path pattern in the `WHERE` clause, e.g.: + +```sparql +SELECT ?x ?y WHERE { ?x /* ?y . } +``` + +Key public items: + +- [`parse_query(sparql)`](src/sparql/mod.rs:51) — parses a SPARQL string into a + [`spargebra::Query`]. +- [`extract_path(query)`](src/sparql/mod.rs:73) — validates a parsed `Query` is a + `SELECT` with a single path pattern and returns a [`PathTriple`](src/sparql/mod.rs:62). +- [`parse_rpq(sparql)`](src/sparql/mod.rs:196) — convenience function combining + `parse_query` + `extract_path` in one call. +- [`PathTriple`](src/sparql/mod.rs:62) — holds the extracted `subject` + ([`TermPattern`]), `path` ([`PropertyPathExpression`]), and `object` + ([`TermPattern`]). +- [`ExtractError`](src/sparql/mod.rs:31) — error enum for extraction failures + (`NotSelect`, `NotSinglePath`, `UnsupportedSubject`, `UnsupportedObject`, + `VariablePredicate`). +- [`RpqParseError`](src/sparql/mod.rs:204) — combined error for [`parse_rpq`] + wrapping both [`SparqlSyntaxError`] and [`ExtractError`]. +- [`DEFAULT_BASE_IRI`](src/sparql/mod.rs:44) — `"http://example.org/"`, the + default base IRI constant. + +The module also handles spargebra's desugaring of sequence paths +(`?x // ?y`) from a chain of BGP triples back into a single +[`PropertyPathExpression::Sequence`]. + +### RPQ evaluation (`src/rpq/`) + +The [`rpq`](src/rpq/mod.rs) module provides an abstraction for evaluating +Regular Path Queries (RPQs) over edge-labeled graphs using GraphBLAS/LAGraph. + +Key public items: + +- [`RpqEvaluator`](src/rpq/mod.rs:47) — trait with a single method + [`evaluate(subject, path, object, graph)`](src/rpq/mod.rs:48) that takes + SPARQL [`TermPattern`] endpoints, a [`PropertyPathExpression`] path, and a + [`GraphDecomposition`], returning an [`RpqResult`](src/rpq/mod.rs:42). +- [`RpqResult`](src/rpq/mod.rs:42) — wraps a [`GraphblasVector`] of reachable + vertices. +- [`RpqError`](src/rpq/mod.rs:21) — error enum covering parse errors, extraction + errors, unsupported paths, missing labels/vertices, and GraphBLAS failures. + +#### `NfaRpqEvaluator` (`src/rpq/nfarpq.rs`) + +[`NfaRpqEvaluator`](src/rpq/nfarpq.rs:265) implements [`RpqEvaluator`] by: + +1. Converting a [`PropertyPathExpression`] into an [`Nfa`](src/rpq/nfarpq.rs:27) + via Thompson's construction ([`Nfa::from_property_path()`](src/rpq/nfarpq.rs:35)). +2. Eliminating ε-transitions via epsilon closure + ([`NfaBuilder::epsilon_closure()`](src/rpq/nfarpq.rs:198)). +3. Building one `LAGraph_Graph` per NFA label transition + ([`Nfa::build_lagraph_matrices()`](src/rpq/nfarpq.rs:43)). +4. Calling [`LAGraph_RegularPathQuery`] with the NFA matrices, data-graph + matrices, start/final states, and source vertices. + +Supported path operators: `NamedNode`, `Sequence`, `Alternative`, +`ZeroOrMore`, `OneOrMore`, `ZeroOrOne`. `Reverse` and `NegatedPropertySet` +return [`RpqError::UnsupportedPath`]. + +Subject/object resolution: a [`TermPattern::Variable`] means "all vertices"; +a [`TermPattern::NamedNode`] resolves to a single vertex via +[`GraphDecomposition::get_node_id()`](src/graph/mod.rs:195). + ### FFI layer [`lagraph_sys`](src/lagraph_sys.rs) exposes raw C bindings for GraphBLAS and @@ -212,10 +292,11 @@ LAGraph. Safe Rust wrappers live in [`graph::mod`](src/graph/mod.rs): - [`LagraphGraph`](src/graph/mod.rs:48) — RAII wrapper around `LAGraph_Graph` (calls `LAGraph_Delete` on drop). Also provides [`LagraphGraph::from_coo()`](src/graph/mod.rs:85) to build directly from COO arrays. -- [`GraphblasVector`](src/graph/mod.rs:124) — RAII wrapper around `GrB_Vector`. +- [`GraphblasVector`](src/graph/mod.rs:128) — RAII wrapper around `GrB_Vector` + (derives `Debug`). - [`ensure_grb_init()`](src/graph/mod.rs:39) — one-time `LAGraph_Init` via `std::sync::Once`. -### Macros (`src/utils.rs`) +### Macros & helpers (`src/utils.rs`) Two `#[macro_export]` macros handle FFI error mapping: @@ -225,20 +306,28 @@ Two `#[macro_export]` macros handle FFI error mapping: appending the required `*mut i8` message buffer, and maps failure to `GraphError::LAGraph(info, msg)`. +A convenience function is also provided: + +- [`build_graph(edges)`](src/utils.rs:184) — builds an `InMemoryGraph` from a + slice of `(&str, &str, &str)` triples (source, target, label). Used by + integration tests. + ## Coding Conventions - **Rust edition 2024**. -- Error handling via `thiserror` derive macros; two main error enums: - [`GraphError`](src/graph/mod.rs:15) and [`FormatError`](src/formats/mod.rs:24). +- Error handling via `thiserror` derive macros; three main error enums: + [`GraphError`](src/graph/mod.rs:15), [`FormatError`](src/formats/mod.rs:24), + and [`RpqError`](src/rpq/mod.rs:21). - `FormatError` converts into `GraphError` via `#[from] FormatError` on the `GraphError::Format` variant. -- Unsafe FFI calls are confined to `lagraph_sys`, `graph/mod.rs`, and - `graph/inmemory.rs`. All raw pointers are wrapped in RAII types that free - resources on drop. +- Unsafe FFI calls are confined to `lagraph_sys`, `graph/mod.rs`, + `graph/inmemory.rs`, and `rpq/nfarpq.rs`. All raw pointers are wrapped in + RAII types that free resources on drop. - `unsafe impl Send + Sync` is provided for `LagraphGraph` and `GraphblasVector` because GraphBLAS handles are thread-safe after init. - Unit tests live in `#[cfg(test)] mod tests` blocks inside each module. - Integration tests that need GraphBLAS live in [`tests/inmemory_tests.rs`](tests/inmemory_tests.rs). + Integration tests that need GraphBLAS live in [`tests/inmemory_tests.rs`](tests/inmemory_tests.rs) + and [`tests/nfarpq_tests.rs`](tests/nfarpq_tests.rs). ## Testing @@ -256,7 +345,13 @@ native libraries. Tests in `src/formats/csv.rs` are pure Rust and need no native dependencies. -Tests in `src/graph/inmemory.rs` and [`tests/inmemory_tests.rs`](tests/inmemory_tests.rs) +Tests in `src/sparql/mod.rs` are pure Rust and need no native dependencies. + +Tests in `src/rpq/nfarpq.rs` (NFA construction unit tests) are pure Rust and need no +native dependencies. + +Tests in `src/graph/inmemory.rs`, [`tests/inmemory_tests.rs`](tests/inmemory_tests.rs), +and [`tests/nfarpq_tests.rs`](tests/nfarpq_tests.rs) call real GraphBLAS/LAGraph and require the native libraries to be present. ## CI diff --git a/Cargo.toml b/Cargo.toml index 70180c8..b978367 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,6 +8,7 @@ csv = "1.4.0" libc = "0.2" oxrdf = "0.3.3" oxttl = "0.2.3" +spargebra = "0.4.6" thiserror = "1.0" [features] diff --git a/build.rs b/build.rs index 475046e..4e85f0d 100644 --- a/build.rs +++ b/build.rs @@ -83,6 +83,7 @@ fn regenerate_bindings() { .allowlist_function("LAGraph_Delete") .allowlist_function("LAGraph_Cached_AT") .allowlist_function("LAGraph_MMRead") + .allowlist_function("LAGraph_RegularPathQuery") .default_enum_style(bindgen::EnumVariation::Rust { non_exhaustive: false, }) diff --git a/src/graph/mod.rs b/src/graph/mod.rs index 62e1667..8402ab3 100644 --- a/src/graph/mod.rs +++ b/src/graph/mod.rs @@ -125,6 +125,7 @@ impl Drop for LagraphGraph { unsafe impl Send for LagraphGraph {} unsafe impl Sync for LagraphGraph {} +#[derive(Debug)] pub struct GraphblasVector { pub inner: GrB_Vector, } diff --git a/src/lagraph_sys_generated.rs b/src/lagraph_sys_generated.rs index 3201d28..601acc5 100644 --- a/src/lagraph_sys_generated.rs +++ b/src/lagraph_sys_generated.rs @@ -261,3 +261,37 @@ unsafe extern "C" { msg: *mut ::std::os::raw::c_char, ) -> ::std::os::raw::c_int; } +unsafe extern "C" { + pub fn LAGraph_RegularPathQuery( + reachable: *mut GrB_Vector, + R: *mut LAGraph_Graph, + nl: usize, + QS: *const GrB_Index, + nqs: usize, + QF: *const GrB_Index, + nqf: usize, + G: *mut LAGraph_Graph, + S: *const GrB_Index, + ns: usize, + msg: *mut ::std::os::raw::c_char, + ) -> ::std::os::raw::c_int; +} +#[repr(u32)] +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] +pub enum RPQMatrixOp { + RPQ_MATRIX_OP_LABEL = 0, + RPQ_MATRIX_OP_LOR = 1, + RPQ_MATRIX_OP_CONCAT = 2, + RPQ_MATRIX_OP_KLEENE = 3, + RPQ_MATRIX_OP_KLEENE_L = 4, + RPQ_MATRIX_OP_KLEENE_R = 5, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct RPQMatrixPlan { + pub op: RPQMatrixOp, + pub lhs: *mut RPQMatrixPlan, + pub rhs: *mut RPQMatrixPlan, + pub mat: GrB_Matrix, + pub res_mat: GrB_Matrix, +} diff --git a/src/lib.rs b/src/lib.rs index ef319e0..0f89008 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,8 @@ pub mod formats; pub mod graph; +pub mod rpq; +pub mod sparql; #[allow(unused_unsafe, dead_code)] -pub(crate) mod utils; +pub mod utils; pub mod lagraph_sys; diff --git a/src/rpq/mod.rs b/src/rpq/mod.rs new file mode 100644 index 0000000..df17765 --- /dev/null +++ b/src/rpq/mod.rs @@ -0,0 +1,54 @@ +//! Regular Path Query (RPQ) evaluation over edge-labeled graphs. +//! ```rust,ignore +//! use pathrex::sparql::parse_rpq; +//! use pathrex::rpq::{RpqEvaluator, nfarpq::NfaRpqEvaluator}; +//! +//! let triple = parse_rpq("SELECT ?x ?y WHERE { ?x /* ?y . }")?; +//! let result = NfaRpqEvaluator.evaluate(&triple.subject, &triple.path, &triple.object, &graph)?; +//! ``` + +pub mod nfarpq; + +use crate::graph::GraphDecomposition; +use crate::graph::GraphblasVector; +use crate::sparql::ExtractError; +use spargebra::SparqlSyntaxError; +use spargebra::algebra::PropertyPathExpression; +use spargebra::term::TermPattern; +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum RpqError { + #[error("SPARQL syntax error: {0}")] + Parse(#[from] SparqlSyntaxError), + + #[error("query extraction error: {0}")] + Extract(#[from] ExtractError), + + #[error("unsupported path expression: {0}")] + UnsupportedPath(String), + + #[error("label not found in graph: '{0}'")] + LabelNotFound(String), + + #[error("vertex not found in graph: '{0}'")] + VertexNotFound(String), + + #[error("GraphBLAS/LAGraph error: {0}")] + GraphBlas(String), +} + +#[derive(Debug)] +pub struct RpqResult { + pub reachable: GraphblasVector, +} + +pub trait RpqEvaluator { + fn evaluate( + &self, + subject: &TermPattern, + path: &PropertyPathExpression, + object: &TermPattern, + graph: &G, + ) -> Result; +} diff --git a/src/rpq/nfarpq.rs b/src/rpq/nfarpq.rs new file mode 100644 index 0000000..b50625f --- /dev/null +++ b/src/rpq/nfarpq.rs @@ -0,0 +1,409 @@ +//! NFA-based RPQ evaluation using `LAGraph_RegularPathQuery`. + +use crate::graph::{ + GraphDecomposition, GraphError, GraphblasVector, LagraphGraph, ensure_grb_init, +}; +use crate::grb_ok; +use crate::la_ok; +use crate::lagraph_sys::*; +use crate::lagraph_sys::{GrB_BOOL, GrB_LOR, GrB_Matrix_build_BOOL, GrB_Matrix_new, LAGraph_Kind}; +use crate::rpq::{RpqError, RpqEvaluator, RpqResult}; +use spargebra::algebra::PropertyPathExpression; +use spargebra::term::TermPattern; +use std::collections::{HashMap, HashSet, VecDeque}; + +/// Transitions for a single edge label in the NFA. +/// +/// `rows[i]` and `cols[i]` form a parallel pair: there is a transition from +/// state `rows[i]` to state `cols[i]` on `label`. +#[derive(Debug, Clone)] +pub struct NfaLabelTransitions { + pub label: String, + pub rows: Vec, + pub cols: Vec, +} + +#[derive(Debug, Clone)] +pub struct Nfa { + pub num_states: usize, + pub start_states: Vec, + pub final_states: Vec, + pub transitions: Vec, +} + +impl Nfa { + pub fn from_property_path(path: &PropertyPathExpression) -> Result { + let mut builder = NfaBuilder::new(); + let (start, end) = builder.build(path)?; + builder.mark_start(start); + builder.mark_final(end); + Ok(builder.into_nfa()) + } + + pub fn build_lagraph_matrices(&self) -> Result, RpqError> { + ensure_grb_init().map_err(|e: GraphError| RpqError::GraphBlas(format!("{e}")))?; + let n = self.num_states as GrB_Index; + let mut result = Vec::with_capacity(self.transitions.len()); + + for trans in &self.transitions { + let mut mat: GrB_Matrix = std::ptr::null_mut(); + grb_ok!(GrB_Matrix_new(&mut mat, GrB_BOOL, n, n)) + .map_err(|e: GraphError| RpqError::GraphBlas(format!("{e}")))?; + + if !trans.rows.is_empty() { + let vals: Vec = vec![true; trans.rows.len()]; + grb_ok!(GrB_Matrix_build_BOOL( + mat, + trans.rows.as_ptr(), + trans.cols.as_ptr(), + vals.as_ptr(), + trans.rows.len() as u64, + GrB_LOR, + )) + .map_err(|e: GraphError| RpqError::GraphBlas(format!("{e}")))?; + } + + let lg = LagraphGraph::new(mat, LAGraph_Kind::LAGraph_ADJACENCY_DIRECTED) + .map_err(|e| RpqError::GraphBlas(format!("{e}")))?; + result.push((trans.label.clone(), lg)); + } + + Ok(result) + } +} + +#[derive(Debug, Clone)] +struct Transition { + from: usize, + to: usize, + label: Option, +} + +struct NfaBuilder { + num_states: usize, + transitions: Vec, + start_states: Vec, + final_states: Vec, +} + +impl NfaBuilder { + fn new() -> Self { + Self { + num_states: 0, + transitions: Vec::new(), + start_states: Vec::new(), + final_states: Vec::new(), + } + } + + fn new_state(&mut self) -> usize { + let s = self.num_states; + self.num_states += 1; + s + } + + fn add_epsilon(&mut self, from: usize, to: usize) { + self.transitions.push(Transition { + from, + to, + label: None, + }); + } + + fn add_label(&mut self, from: usize, to: usize, label: String) { + self.transitions.push(Transition { + from, + to, + label: Some(label), + }); + } + + fn mark_start(&mut self, s: usize) { + self.start_states.push(s); + } + + fn mark_final(&mut self, s: usize) { + self.final_states.push(s); + } + + fn build(&mut self, path: &PropertyPathExpression) -> Result<(usize, usize), RpqError> { + match path { + PropertyPathExpression::NamedNode(nn) => { + let s = self.new_state(); + let e = self.new_state(); + self.add_label(s, e, nn.as_str().to_owned()); + Ok((s, e)) + } + + PropertyPathExpression::Sequence(lhs, rhs) => { + let (ls, le) = self.build(lhs)?; + let (rs, re) = self.build(rhs)?; + self.add_epsilon(le, rs); + Ok((ls, re)) + } + + PropertyPathExpression::Alternative(lhs, rhs) => { + let s = self.new_state(); + let e = self.new_state(); + let (ls, le) = self.build(lhs)?; + let (rs, re) = self.build(rhs)?; + self.add_epsilon(s, ls); + self.add_epsilon(s, rs); + self.add_epsilon(le, e); + self.add_epsilon(re, e); + Ok((s, e)) + } + + PropertyPathExpression::ZeroOrMore(inner) => { + let s = self.new_state(); + let e = self.new_state(); + let (is, ie) = self.build(inner)?; + self.add_epsilon(s, is); + self.add_epsilon(ie, is); + self.add_epsilon(ie, e); + self.add_epsilon(s, e); + Ok((s, e)) + } + + PropertyPathExpression::OneOrMore(inner) => { + let s = self.new_state(); + let e = self.new_state(); + let (is, ie) = self.build(inner)?; + self.add_epsilon(s, is); + self.add_epsilon(ie, is); + self.add_epsilon(ie, e); + Ok((s, e)) + } + + PropertyPathExpression::ZeroOrOne(inner) => { + let s = self.new_state(); + let e = self.new_state(); + let (is, ie) = self.build(inner)?; + self.add_epsilon(s, is); + self.add_epsilon(ie, e); + self.add_epsilon(s, e); + Ok((s, e)) + } + + PropertyPathExpression::Reverse(_) => Err(RpqError::UnsupportedPath( + "Reverse paths are not supported".into(), + )), + + PropertyPathExpression::NegatedPropertySet(_) => Err(RpqError::UnsupportedPath( + "NegatedPropertySet paths are not supported".into(), + )), + } + } + + fn epsilon_closure(&self, states: &[usize]) -> HashSet { + let mut closure: HashSet = states.iter().copied().collect(); + let mut queue: VecDeque = states.iter().copied().collect(); + while let Some(s) = queue.pop_front() { + for t in &self.transitions { + if t.from == s && t.label.is_none() && !closure.contains(&t.to) { + closure.insert(t.to); + queue.push_back(t.to); + } + } + } + closure + } + + fn into_nfa(self) -> Nfa { + let n = self.num_states; + + let closures: Vec> = (0..n).map(|s| self.epsilon_closure(&[s])).collect(); + + let mut label_map: HashMap> = HashMap::new(); + for from in 0..n { + for t in &self.transitions { + if t.from == from { + if let Some(label) = &t.label { + for &cf in &closures[from] { + for &ct in &closures[t.to] { + label_map.entry(label.clone()).or_default().push((cf, ct)); + } + } + } + } + } + } + + let start_closure = self.epsilon_closure(&self.start_states); + let start_states: Vec = + start_closure.into_iter().map(|s| s as GrB_Index).collect(); + + let final_set: HashSet = self.final_states.iter().copied().collect(); + let final_states: Vec = (0..n) + .filter(|s| closures[*s].iter().any(|c| final_set.contains(c))) + .map(|s| s as GrB_Index) + .collect(); + + let transitions: Vec = label_map + .into_iter() + .map(|(label, pairs)| { + let mut rows = Vec::with_capacity(pairs.len()); + let mut cols = Vec::with_capacity(pairs.len()); + for (r, c) in pairs { + rows.push(r as GrB_Index); + cols.push(c as GrB_Index); + } + NfaLabelTransitions { label, rows, cols } + }) + .collect(); + + Nfa { + num_states: n, + start_states, + final_states, + transitions, + } + } +} + +/// Evaluates RPQs using `LAGraph_RegularPathQuery`. +pub struct NfaRpqEvaluator; + +impl RpqEvaluator for NfaRpqEvaluator { + fn evaluate( + &self, + subject: &TermPattern, + path: &PropertyPathExpression, + object: &TermPattern, + graph: &G, + ) -> Result { + let nfa = Nfa::from_property_path(path)?; + let nfa_matrices = nfa.build_lagraph_matrices()?; + + let src_id = resolve_vertex(subject, graph, true)?; + let _dst_id = resolve_vertex(object, graph, false)?; + + let n = graph.num_nodes(); + + let source_vertices: Vec = match src_id { + Some(id) => vec![id as GrB_Index], + None => (0..n as GrB_Index).collect(), + }; + + let mut nfa_graph_ptrs: Vec = + nfa_matrices.iter().map(|(_, lg)| lg.inner).collect(); + + let mut data_graph_ptrs: Vec = Vec::with_capacity(nfa_matrices.len()); + for (label, _) in &nfa_matrices { + let lg = graph + .get_graph(label) + .map_err(|_| RpqError::LabelNotFound(label.clone()))?; + data_graph_ptrs.push(lg.inner); + } + + let mut reachable: GrB_Vector = std::ptr::null_mut(); + + la_ok!(LAGraph_RegularPathQuery( + &mut reachable, + nfa_graph_ptrs.as_mut_ptr(), + nfa_matrices.len(), + nfa.start_states.as_ptr(), + nfa.start_states.len(), + nfa.final_states.as_ptr(), + nfa.final_states.len(), + data_graph_ptrs.as_mut_ptr(), + source_vertices.as_ptr(), + source_vertices.len(), + )) + .map_err(|e: GraphError| RpqError::GraphBlas(format!("{e}")))?; + + let result_vec = GraphblasVector { inner: reachable }; + + Ok(RpqResult { + reachable: result_vec, + }) + } +} + +fn resolve_vertex( + term: &TermPattern, + graph: &G, + is_subject: bool, +) -> Result, RpqError> { + match term { + TermPattern::Variable(_) => Ok(None), + TermPattern::NamedNode(nn) => { + let iri = nn.as_str(); + graph + .get_node_id(iri) + .map(Some) + .ok_or_else(|| RpqError::VertexNotFound(iri.to_owned())) + } + other => { + let msg = format!("{other}"); + if is_subject { + Err(RpqError::VertexNotFound(format!( + "unsupported subject term: {msg}" + ))) + } else { + Err(RpqError::VertexNotFound(format!( + "unsupported object term: {msg}" + ))) + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use spargebra::algebra::PropertyPathExpression; + use spargebra::term::NamedNode; + + fn named(iri: &str) -> PropertyPathExpression { + PropertyPathExpression::NamedNode(NamedNode::new_unchecked(iri)) + } + + #[test] + fn test_single_label() { + let nfa = Nfa::from_property_path(&named("knows")).unwrap(); + assert_eq!(nfa.num_states, 2); + assert_eq!(nfa.start_states.len(), 1); + assert_eq!(nfa.final_states.len(), 1); + assert_eq!(nfa.transitions.len(), 1); + assert_eq!(nfa.transitions[0].label, "knows"); + assert_eq!(nfa.transitions[0].rows.len(), 1); + } + + #[test] + fn test_sequence() { + let path = PropertyPathExpression::Sequence(Box::new(named("a")), Box::new(named("b"))); + let nfa = Nfa::from_property_path(&path).unwrap(); + let labels: Vec<&str> = nfa.transitions.iter().map(|t| t.label.as_str()).collect(); + assert!(labels.contains(&"a")); + assert!(labels.contains(&"b")); + } + + #[test] + fn test_alternative() { + let path = PropertyPathExpression::Alternative(Box::new(named("a")), Box::new(named("b"))); + let nfa = Nfa::from_property_path(&path).unwrap(); + let labels: Vec<&str> = nfa.transitions.iter().map(|t| t.label.as_str()).collect(); + assert!(labels.contains(&"a")); + assert!(labels.contains(&"b")); + } + + #[test] + fn test_zero_or_more() { + let path = PropertyPathExpression::ZeroOrMore(Box::new(named("knows"))); + let nfa = Nfa::from_property_path(&path).unwrap(); + // Start state should also be a final state (zero matches). + let start_set: HashSet = nfa.start_states.iter().copied().collect(); + let final_set: HashSet = nfa.final_states.iter().copied().collect(); + assert!(!start_set.is_disjoint(&final_set)); + } + + #[test] + fn test_reverse_unsupported() { + let path = PropertyPathExpression::Reverse(Box::new(named("knows"))); + assert!(matches!( + Nfa::from_property_path(&path), + Err(RpqError::UnsupportedPath(_)) + )); + } +} diff --git a/src/sparql/mod.rs b/src/sparql/mod.rs new file mode 100644 index 0000000..5b5c5fa --- /dev/null +++ b/src/sparql/mod.rs @@ -0,0 +1,348 @@ +//! SPARQL parsing and validation utilities. +//! +//! This module provides helpers for parsing SPARQL query strings using the +//! [`spargebra`] crate and extracting the property path triple pattern that +//! pathrex's RPQ evaluators operate on. +//! +//! # Supported query form +//! +//! SELECT queries with a single triple pattern in the +//! WHERE clause are supported: +//! +//! ```sparql +//! SELECT ?x ?y WHERE { ?x ?y . } +//! SELECT ?x ?y WHERE { ?x /* ?y . } +//! SELECT ?x WHERE { + ?x . } +//! ``` + +use spargebra::algebra::{GraphPattern, PropertyPathExpression}; +use spargebra::term::{NamedNodePattern, TermPattern, TriplePattern}; +use spargebra::{Query, SparqlParser, SparqlSyntaxError}; +use thiserror::Error; + +/// Error returned when extracting a property path triple from a parsed query. +#[derive(Debug, Error)] +pub enum ExtractError { + #[error("expected SELECT query, got a different query form")] + NotSelect, + #[error("WHERE clause must contain exactly one triple or property path pattern")] + NotSinglePath, + #[error("unsupported subject term: {0}")] + UnsupportedSubject(String), + #[error("unsupported object term: {0}")] + UnsupportedObject(String), + #[error("predicate in plain triple must be a named node, not a variable")] + VariablePredicate, +} + +pub const DEFAULT_BASE_IRI: &str = "http://example.org/"; + +/// Parse a SPARQL query string into a [`spargebra::Query`]. +/// +/// # Errors +/// +/// Returns [`SparqlSyntaxError`] if the input is not valid SPARQL 1.1. +pub fn parse_query(sparql: &str) -> Result { + SparqlParser::new() + // .with_base_iri(DEFAULT_BASE_IRI) + // .expect("DEFAULT_BASE_IRI is a valid IRI") + .parse_query(sparql) +} + +/// Extracted triple components from a parsed SPARQL query. +/// +/// Holds owned data so callers do not need to keep the [`Query`] alive. +#[derive(Debug, Clone)] +pub struct PathTriple { + pub subject: TermPattern, + pub path: PropertyPathExpression, + pub object: TermPattern, +} + +/// Extract the property path triple from a parsed SPARQL [`Query`]. +/// +/// Validates that the query is a `SELECT` with a single triple or property +/// path pattern in the WHERE clause and returns a [`PathTriple`] with the +/// three components. +pub fn extract_path(query: &Query) -> Result { + let pattern = match query { + Query::Select { pattern, .. } => pattern, + _ => return Err(ExtractError::NotSelect), + }; + + let triple = extract_path_from_pattern(pattern)?; + + validate_term(&triple.subject, true)?; + validate_term(&triple.object, false)?; + + Ok(triple) +} + +/// Recursively unwrap `GraphPattern` wrappers (Project, Distinct, etc.) to +/// find the single triple or path pattern inside. +fn extract_path_from_pattern(pattern: &GraphPattern) -> Result { + match pattern { + GraphPattern::Path { + subject, + path, + object, + } => Ok(PathTriple { + subject: subject.clone(), + path: path.clone(), + object: object.clone(), + }), + + GraphPattern::Bgp { patterns } => extract_from_bgp(patterns), + + GraphPattern::Project { inner, .. } => extract_path_from_pattern(inner), + + GraphPattern::Distinct { inner } => extract_path_from_pattern(inner), + GraphPattern::Reduced { inner } => extract_path_from_pattern(inner), + GraphPattern::Slice { inner, .. } => extract_path_from_pattern(inner), + + _ => Err(ExtractError::NotSinglePath), + } +} + +/// Extract a [`PathTriple`] from a BGP's triple patterns. +/// +/// Handles two cases: +/// 1. **Single triple** — `?x ?y` → wraps predicate as +/// [`PropertyPathExpression::NamedNode`]. +/// 2. **Desugared sequence path** — spargebra rewrites `?x // ?y` +/// into a chain of triples linked by blank-node intermediates: +/// `?x _:b0 . _:b0 _:b1 . _:b1 ?y`. +/// We detect this pattern and reconstruct a +/// [`PropertyPathExpression::Sequence`]. +fn extract_from_bgp(patterns: &[TriplePattern]) -> Result { + if patterns.is_empty() { + return Err(ExtractError::NotSinglePath); + } + if patterns.len() == 1 { + return bgp_triple_to_path_triple(&patterns[0]); + } + + let mut steps: Vec = Vec::with_capacity(patterns.len()); + for triple in patterns { + match &triple.predicate { + NamedNodePattern::NamedNode(nn) => { + steps.push(PropertyPathExpression::NamedNode(nn.clone())); + } + NamedNodePattern::Variable(_) => return Err(ExtractError::NotSinglePath), + } + } + + for i in 0..patterns.len() - 1 { + let obj_bn = match &patterns[i].object { + TermPattern::BlankNode(bn) => bn, + _ => return Err(ExtractError::NotSinglePath), + }; + let subj_bn = match &patterns[i + 1].subject { + TermPattern::BlankNode(bn) => bn, + _ => return Err(ExtractError::NotSinglePath), + }; + if obj_bn != subj_bn { + return Err(ExtractError::NotSinglePath); + } + } + + let path = steps + .into_iter() + .reduce(|acc, step| PropertyPathExpression::Sequence(Box::new(acc), Box::new(step))) + .unwrap(); + + Ok(PathTriple { + subject: patterns[0].subject.clone(), + path, + object: patterns.last().unwrap().object.clone(), + }) +} + +/// Convert a plain BGP [`TriplePattern`] into a [`PathTriple`] by wrapping +/// the predicate as a [`PropertyPathExpression::NamedNode`]. +fn bgp_triple_to_path_triple(triple: &TriplePattern) -> Result { + let path = match &triple.predicate { + NamedNodePattern::NamedNode(nn) => PropertyPathExpression::NamedNode(nn.clone()), + NamedNodePattern::Variable(_) => return Err(ExtractError::VariablePredicate), + }; + Ok(PathTriple { + subject: triple.subject.clone(), + path, + object: triple.object.clone(), + }) +} + +/// Validate that a [`TermPattern`] is a supported vertex form. +fn validate_term(term: &TermPattern, is_subject: bool) -> Result<(), ExtractError> { + match term { + TermPattern::Variable(_) | TermPattern::NamedNode(_) => Ok(()), + other => { + let msg = format!("{other}"); + if is_subject { + Err(ExtractError::UnsupportedSubject(msg)) + } else { + Err(ExtractError::UnsupportedObject(msg)) + } + } + } +} + +pub fn parse_rpq(sparql: &str) -> Result { + let query = parse_query(sparql)?; + let triple = extract_path(&query)?; + Ok(triple) +} + +/// Combined error for [`parse_rpq`]. +#[derive(Debug, Error)] +pub enum RpqParseError { + #[error("SPARQL syntax error: {0}")] + Syntax(#[from] SparqlSyntaxError), + #[error("query extraction error: {0}")] + Extract(#[from] ExtractError), +} + +#[cfg(test)] +mod tests { + use super::*; + use spargebra::algebra::PropertyPathExpression; + use spargebra::term::TermPattern; + + pub const DEFAULT_BASE_IRI: &str = "BASE "; + + fn parse_and_extract(sparql: &str) -> PathTriple { + let q = format!("{DEFAULT_BASE_IRI} {sparql}"); + parse_rpq(&q).expect("parse_rpq failed") + } + + #[test] + fn test_plain_triple_bgp() { + let triple = parse_and_extract("SELECT ?x ?y WHERE { ?x ?y . }"); + assert!(matches!(triple.subject, TermPattern::Variable(_))); + assert!(matches!(triple.object, TermPattern::Variable(_))); + assert!(matches!(triple.path, PropertyPathExpression::NamedNode(_))); + } + + #[test] + fn test_variable_variable_zero_or_more() { + let triple = parse_and_extract("SELECT ?x ?y WHERE { ?x * ?y . }"); + assert!(matches!(triple.subject, TermPattern::Variable(_))); + assert!(matches!(triple.object, TermPattern::Variable(_))); + assert!(matches!(triple.path, PropertyPathExpression::ZeroOrMore(_))); + } + + #[test] + fn test_variable_variable_sequence() { + let triple = parse_and_extract("SELECT ?x ?y WHERE { ?x / ?y . }"); + assert!(matches!(triple.subject, TermPattern::Variable(_))); + assert!(matches!(triple.object, TermPattern::Variable(_))); + assert!(matches!( + triple.path, + PropertyPathExpression::Sequence(_, _) + )); + } + + #[test] + fn test_named_variable_sequence() { + let triple = parse_and_extract("SELECT ?y WHERE { / ?y . }"); + assert!(matches!(triple.subject, TermPattern::NamedNode(_))); + assert!(matches!(triple.object, TermPattern::Variable(_))); + assert!(matches!( + triple.path, + PropertyPathExpression::Sequence(_, _) + )); + } + + #[test] + fn test_three_step_sequence() { + let triple = parse_and_extract("SELECT ?x ?y WHERE { ?x // ?y . }"); + assert!(matches!(triple.subject, TermPattern::Variable(_))); + assert!(matches!(triple.object, TermPattern::Variable(_))); + match &triple.path { + PropertyPathExpression::Sequence(lhs, _rhs) => { + assert!(matches!( + lhs.as_ref(), + PropertyPathExpression::Sequence(_, _) + )); + } + other => panic!("expected Sequence, got {other:?}"), + } + } + + #[test] + fn test_variable_named_star() { + let triple = parse_and_extract("SELECT ?x WHERE { ?x * . }"); + assert!(matches!(triple.subject, TermPattern::Variable(_))); + assert!(matches!(triple.object, TermPattern::NamedNode(_))); + assert!(matches!(triple.path, PropertyPathExpression::ZeroOrMore(_))); + } + + #[test] + fn test_alternative_path() { + let triple = parse_and_extract("SELECT ?x ?y WHERE { ?x | ?y . }"); + assert!(matches!( + triple.path, + PropertyPathExpression::Alternative(_, _) + )); + } + + #[test] + fn test_one_or_more() { + let triple = parse_and_extract("SELECT ?x ?y WHERE { ?x + ?y . }"); + assert!(matches!(triple.path, PropertyPathExpression::OneOrMore(_))); + } + + #[test] + fn test_zero_or_one() { + let triple = parse_and_extract("SELECT ?x ?y WHERE { ?x ? ?y . }"); + assert!(matches!(triple.path, PropertyPathExpression::ZeroOrOne(_))); + } + + #[test] + fn test_complex_path() { + let triple = parse_and_extract("SELECT ?x ?y WHERE { ?x (/)* ?y . }"); + assert!(matches!(triple.path, PropertyPathExpression::ZeroOrMore(_))); + } + + #[test] + fn test_not_select_returns_error() { + let sparql = format!("{DEFAULT_BASE_IRI} ASK {{ ?x ?y }}"); + let query = parse_query(&sparql).expect("parse failed"); + let result = extract_path(&query); + assert!(matches!(result, Err(ExtractError::NotSelect))); + } + + #[test] + fn test_multiple_triples_returns_error() { + let sparql = format!("{DEFAULT_BASE_IRI} SELECT ?x ?y WHERE {{ ?x ?z . ?z ?y . }}"); + let result = parse_rpq(&sparql); + assert!(matches!( + result, + Err(RpqParseError::Extract(ExtractError::NotSinglePath)) + )); + } + + #[test] + fn test_default_base_iri_resolves_relative_iris() { + let triple = parse_and_extract("SELECT ?x ?y WHERE { ?x ?y . }"); + if let PropertyPathExpression::NamedNode(nn) = &triple.path { + assert_eq!(nn.as_str(), "http://example.org/knows"); + } else { + panic!("expected NamedNode path"); + } + } + + #[test] + fn test_with_prefix_resolves_prefixed_iris() { + let query = SparqlParser::new() + .with_prefix("ex", "http://example.org/") + .unwrap() + .parse_query("SELECT ?x ?y WHERE { ?x ex:knows/ex:likes ?y . }") + .expect("parse with prefix failed"); + let triple = extract_path(&query).expect("extract failed"); + assert!(matches!( + triple.path, + PropertyPathExpression::Sequence(_, _) + )); + } +} diff --git a/src/utils.rs b/src/utils.rs index 7cb37d3..e4add86 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -180,3 +180,23 @@ macro_rules! la_ok { } }}; } + +pub fn build_graph(edges: &[(&str, &str, &str)]) -> ::Graph { + let builder = InMemoryBuilder::new(); + let edges = edges + .iter() + .cloned() + .map(|(s, t, l)| { + Ok(Edge { + source: s.to_string(), + label: l.to_string(), + target: t.to_string(), + }) + }) + .collect::>>(); + builder + .with_stream(edges.into_iter()) + .expect("Should insert edges stream") + .build() + .expect("build must succeed") +} diff --git a/tests/inmemory_tests.rs b/tests/inmemory_tests.rs index d30c305..dccd256 100644 --- a/tests/inmemory_tests.rs +++ b/tests/inmemory_tests.rs @@ -1,28 +1,9 @@ use pathrex::formats::csv::Csv; +use pathrex::utils::build_graph; use pathrex::graph::{ - Backend, Edge, Graph, GraphBuilder, GraphDecomposition, GraphError, InMemory, InMemoryBuilder, + Edge, Graph, GraphBuilder, GraphDecomposition, GraphError, InMemory, InMemoryBuilder, }; -fn build_graph(edges: &[(&str, &str, &str)]) -> ::Graph { - let builder = InMemoryBuilder::new(); - let edges = edges - .iter() - .cloned() - .map(|(s, t, l)| { - Ok(Edge { - source: s.to_string(), - label: l.to_string(), - target: t.to_string(), - }) - }) - .collect::>>(); - builder - .with_stream(edges.into_iter()) - .expect("Should insert edges stream") - .build() - .expect("build must succeed") -} - #[test] fn node_ids_are_unique() { let graph = build_graph(&[ diff --git a/tests/nfarpq_tests.rs b/tests/nfarpq_tests.rs new file mode 100644 index 0000000..53029c3 --- /dev/null +++ b/tests/nfarpq_tests.rs @@ -0,0 +1,363 @@ +use pathrex::graph::GraphDecomposition; +use pathrex::lagraph_sys::{GrB_Index, GrB_Vector_extractTuples_BOOL, GrB_Vector_nvals}; +use pathrex::rpq::nfarpq::NfaRpqEvaluator; +use pathrex::rpq::{RpqError, RpqEvaluator, RpqResult}; +use pathrex::utils::build_graph; +use spargebra::algebra::PropertyPathExpression; +use spargebra::term::{NamedNode, TermPattern, Variable}; + +fn named(iri: &str) -> PropertyPathExpression { + PropertyPathExpression::NamedNode(NamedNode::new_unchecked(iri)) +} + +fn var(name: &str) -> TermPattern { + TermPattern::Variable(Variable::new_unchecked(name)) +} + +fn named_term(iri: &str) -> TermPattern { + TermPattern::NamedNode(NamedNode::new_unchecked(iri)) +} + +fn reachable_indices(result: &RpqResult) -> Vec { + unsafe { + let mut nvals: GrB_Index = 0; + GrB_Vector_nvals(&mut nvals, result.reachable.inner); + if nvals == 0 { + return Vec::new(); + } + let mut indices = vec![0u64; nvals as usize]; + let mut values = vec![false; nvals as usize]; + GrB_Vector_extractTuples_BOOL( + indices.as_mut_ptr(), + values.as_mut_ptr(), + &mut nvals, + result.reachable.inner, + ); + indices.truncate(nvals as usize); + indices + } +} + +fn reachable_count(result: &RpqResult) -> u64 { + unsafe { + let mut nvals: GrB_Index = 0; + GrB_Vector_nvals(&mut nvals, result.reachable.inner); + nvals + } +} + +/// Graph: A --knows--> B --knows--> C +/// Query: ?x ?y +#[test] +fn test_single_label_variable_variable() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "knows")]); + let evaluator = NfaRpqEvaluator; + + let result = evaluator + .evaluate(&var("x"), &named("knows"), &var("y"), &graph) + .expect("evaluate should succeed"); + + let count = reachable_count(&result); + assert_eq!(count, 2); +} + +/// Graph: A --knows--> B --knows--> C +/// Query: ?y +#[test] +fn test_single_label_named_source() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "knows")]); + let evaluator = NfaRpqEvaluator; + + let result = evaluator + .evaluate(&named_term("A"), &named("knows"), &var("y"), &graph) + .expect("evaluate should succeed"); + + let indices = reachable_indices(&result); + let b_id = graph.get_node_id("B").expect("B should exist"); + assert!( + indices.contains(&(b_id as GrB_Index)), + "B (id={b_id}) should be reachable from A via 'knows', got indices: {indices:?}" + ); +} + +/// Graph: A --knows--> B --likes--> C +/// Query: ?x / ?y (two-hop sequence) +#[test] +fn test_sequence_path() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "likes")]); + let evaluator = NfaRpqEvaluator; + + let path = PropertyPathExpression::Sequence(Box::new(named("knows")), Box::new(named("likes"))); + + let result = evaluator + .evaluate(&var("x"), &path, &var("y"), &graph) + .expect("evaluate should succeed"); + + let count = reachable_count(&result); + assert_eq!(count, 1); +} + +/// Graph: A --knows--> B --likes--> C +/// Query: / ?y +#[test] +fn test_sequence_path_named_source() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "likes")]); + let evaluator = NfaRpqEvaluator; + + let path = PropertyPathExpression::Sequence(Box::new(named("knows")), Box::new(named("likes"))); + + let result = evaluator + .evaluate(&named_term("A"), &path, &var("y"), &graph) + .expect("evaluate should succeed"); + + let indices = reachable_indices(&result); + let c_id = graph.get_node_id("C").expect("C should exist"); + assert!( + indices.contains(&(c_id as GrB_Index)), + "C (id={c_id}) should be reachable from A via knows/likes, got indices: {indices:?}" + ); +} + +/// Graph: A --knows--> B, A --likes--> C +/// Query: ?x | ?y +#[test] +fn test_alternative_path() { + let graph = build_graph(&[("A", "B", "knows"), ("A", "C", "likes")]); + let evaluator = NfaRpqEvaluator; + + let path = + PropertyPathExpression::Alternative(Box::new(named("knows")), Box::new(named("likes"))); + + let result = evaluator + .evaluate(&named_term("A"), &path, &var("y"), &graph) + .expect("evaluate should succeed"); + + let indices = reachable_indices(&result); + let b_id = graph.get_node_id("B").expect("B should exist"); + let c_id = graph.get_node_id("C").expect("C should exist"); + assert!( + indices.contains(&(b_id as GrB_Index)), + "B should be reachable via knows|likes" + ); + assert!( + indices.contains(&(c_id as GrB_Index)), + "C should be reachable via knows|likes" + ); +} + +/// Graph: A --knows--> B --knows--> C +/// Query: * ?y +#[test] +fn test_zero_or_more_path() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "knows")]); + let evaluator = NfaRpqEvaluator; + + let path = PropertyPathExpression::ZeroOrMore(Box::new(named("knows"))); + + let result = evaluator + .evaluate(&named_term("A"), &path, &var("y"), &graph) + .expect("evaluate should succeed"); + + let indices = reachable_indices(&result); + let a_id = graph.get_node_id("A").expect("A should exist"); + let b_id = graph.get_node_id("B").expect("B should exist"); + let c_id = graph.get_node_id("C").expect("C should exist"); + + assert!( + indices.contains(&(a_id as GrB_Index)), + "A should be reachable (zero hops)" + ); + assert!( + indices.contains(&(b_id as GrB_Index)), + "B should be reachable (one hop)" + ); + assert!( + indices.contains(&(c_id as GrB_Index)), + "C should be reachable (two hops)" + ); +} + +/// Graph: A --knows--> B --knows--> C +/// Query: + ?y +#[test] +fn test_one_or_more_path() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "knows")]); + let evaluator = NfaRpqEvaluator; + + let path = PropertyPathExpression::OneOrMore(Box::new(named("knows"))); + + let result = evaluator + .evaluate(&named_term("A"), &path, &var("y"), &graph) + .expect("evaluate should succeed"); + + let indices = reachable_indices(&result); + let a_id = graph.get_node_id("A").expect("A should exist"); + let b_id = graph.get_node_id("B").expect("B should exist"); + let c_id = graph.get_node_id("C").expect("C should exist"); + + assert!( + !indices.contains(&(a_id as GrB_Index)), + "A shouldn't be reachable" + ); + assert!( + indices.contains(&(b_id as GrB_Index)), + "B should be reachable (one hop)" + ); + assert!( + indices.contains(&(c_id as GrB_Index)), + "C should be reachable (two hops)" + ); +} + +/// Graph: A --knows--> B --knows--> C +/// Query: ? ?y +#[test] +fn test_zero_or_one_path() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "knows")]); + let evaluator = NfaRpqEvaluator; + + let path = PropertyPathExpression::ZeroOrOne(Box::new(named("knows"))); + + let result = evaluator + .evaluate(&named_term("A"), &path, &var("y"), &graph) + .expect("evaluate should succeed"); + + let indices = reachable_indices(&result); + let a_id = graph.get_node_id("A").expect("A should exist"); + let b_id = graph.get_node_id("B").expect("B should exist"); + let c_id = graph.get_node_id("C").expect("C should exist"); + + assert!( + indices.contains(&(a_id as GrB_Index)), + "A should be reachable (zero hops)" + ); + assert!( + indices.contains(&(b_id as GrB_Index)), + "B should be reachable (one hop)" + ); + assert!( + !indices.contains(&(c_id as GrB_Index)), + "C should NOT be reachable (two hops, but path is ?)" + ); +} + +#[test] +fn test_label_not_found() { + let graph = build_graph(&[("A", "B", "knows")]); + let evaluator = NfaRpqEvaluator; + + let result = evaluator.evaluate(&var("x"), &named("nonexistent"), &var("y"), &graph); + + assert!( + matches!(result, Err(RpqError::LabelNotFound(ref l)) if l == "nonexistent"), + "expected LabelNotFound error, got: {result:?}" + ); +} + +#[test] +fn test_vertex_not_found() { + let graph = build_graph(&[("A", "B", "knows")]); + let evaluator = NfaRpqEvaluator; + + let result = evaluator.evaluate(&named_term("Z"), &named("knows"), &var("y"), &graph); + + assert!( + matches!(result, Err(RpqError::VertexNotFound(ref v)) if v == "Z"), + "expected VertexNotFound error, got: {result:?}" + ); +} + +#[test] +fn test_object_vertex_not_found() { + let graph = build_graph(&[("A", "B", "knows")]); + let evaluator = NfaRpqEvaluator; + + let result = evaluator.evaluate(&var("x"), &named("knows"), &named_term("Z"), &graph); + + assert!( + matches!(result, Err(RpqError::VertexNotFound(ref v)) if v == "Z"), + "expected VertexNotFound error for object, got: {result:?}" + ); +} + +#[test] +fn test_reverse_path_unsupported() { + let graph = build_graph(&[("A", "B", "knows")]); + let evaluator = NfaRpqEvaluator; + + let path = PropertyPathExpression::Reverse(Box::new(named("knows"))); + let result = evaluator.evaluate(&var("x"), &path, &var("y"), &graph); + + assert!( + matches!(result, Err(RpqError::UnsupportedPath(_))), + "expected UnsupportedPath error, got: {result:?}" + ); +} + +/// Graph: A --knows--> B --knows--> C --knows--> A (cycle) +/// Query: * ?y +#[test] +fn test_cycle_graph_star() { + let graph = build_graph(&[ + ("A", "B", "knows"), + ("B", "C", "knows"), + ("C", "A", "knows"), + ]); + let evaluator = NfaRpqEvaluator; + + let path = PropertyPathExpression::ZeroOrMore(Box::new(named("knows"))); + + let result = evaluator + .evaluate(&named_term("A"), &path, &var("y"), &graph) + .expect("evaluate should succeed"); + + let count = reachable_count(&result); + assert_eq!(count, 3, "all 3 nodes should be reachable in a cycle"); +} + +/// Graph: A --knows--> B --likes--> C --knows--> D +/// Query: ?x /*/ ?y +#[test] +fn test_complex_path() { + let graph = build_graph(&[ + ("A", "B", "knows"), + ("B", "C", "likes"), + ("C", "D", "knows"), + ]); + let evaluator = NfaRpqEvaluator; + + // knows / likes* / knows + let path = PropertyPathExpression::Sequence( + Box::new(PropertyPathExpression::Sequence( + Box::new(named("knows")), + Box::new(PropertyPathExpression::ZeroOrMore(Box::new(named("likes")))), + )), + Box::new(named("knows")), + ); + + let result = evaluator + .evaluate(&named_term("A"), &path, &var("y"), &graph) + .expect("evaluate should succeed"); + + let indices = reachable_indices(&result); + let d_id = graph.get_node_id("D").expect("D should exist"); + assert!( + indices.contains(&(d_id as GrB_Index)), + "D should be reachable via knows/likes*/knows, got indices: {indices:?}" + ); +} + +#[test] +fn test_no_matching_path() { + let graph = build_graph(&[("A", "B", "knows")]); + let evaluator = NfaRpqEvaluator; + + let path = PropertyPathExpression::Sequence(Box::new(named("knows")), Box::new(named("likes"))); + + let result = evaluator.evaluate(&var("x"), &path, &var("y"), &graph); + + assert!( + matches!(result, Err(RpqError::LabelNotFound(ref l)) if l == "likes"), + "expected LabelNotFound for 'likes', got: {result:?}" + ); +}