shape/
lib.rs

1mod accepts;
2mod case_enum;
3mod child_shape;
4mod display;
5mod from_json;
6mod hashing;
7mod helpers;
8mod merge;
9mod meta;
10pub mod name;
11
12pub mod graphql;
13pub mod location;
14#[cfg(test)]
15mod tests;
16mod visitor;
17
18use std::hash::Hash;
19use std::hash::Hasher;
20use std::iter::empty;
21
22pub use accepts::ShapeMismatch;
23pub use case_enum::Error;
24pub use case_enum::ShapeCase;
25pub use helpers::OffsetRange;
26use helpers::Ref;
27use indexmap::IndexMap;
28use indexmap::IndexSet;
29use meta::ShapeMeta;
30pub use visitor::ShapeVisitor;
31
32use crate::case_enum::all::all;
33use crate::case_enum::one::one;
34use crate::location::Location;
35use crate::merge::MergeSet;
36use crate::name::Name;
37use crate::name::WeakScope;
38
39/// The `shape::Shape` struct provides a recursive, immutable, reference-counted
40/// tree/DAG format for representing and enforcing common structures and usage
41/// patterns of JSON-like data.
42///
43/// The `Shape` system is not bound to any particular programming language, so
44/// it does not inherit a data model that it must represent and defend, yet it
45/// must adopt/assume _some_ concrete data model, since a type system without a
46/// data model to enforce is as useful as a straitjacket on a coat rack. JSON
47/// was chosen for its relative simplicity, its ubiquity as a data interchange
48/// format used across programming languages, and because JSON is often used in
49/// scenarios without a static type system to help catch errors before runtime.
50///
51/// The `Shape` system has no source syntax for denoting shapes directly, but
52/// you can use the `Shape::*` helper functions to create shapes
53/// programmatically, in Rust. `Shape::pretty_print()` provides a human-readable
54/// representation of a `Shape` for debugging and testing purposes.
55///
56/// All in all, this _Static `Shape` System_ (SSS) supports the following
57/// type-theoretic features:
58///
59/// - [x] Primitive shapes: `Bool`, `String`, `Int`, `Float`, `Null`
60/// - [x] Singleton primitive shapes: `true`, `false`, `"hello"`, `42`, `null`
61/// - [x] `Array` shapes, supporting both static tuples and dynamic lists
62/// - [x] `Object` shapes, supporting both static fields and dynamic string keys
63/// - [x] `One<S1, S2, ...>` union shapes, representing a set of shape
64///   alternatives
65/// - [x] `All<S1, S2, ...>` intersection shapes, representing a set
66///   simultaneous requirements
67/// - [x] `shape.field(name)` and `shape.item(index)` methods for accessing the
68///   shape of a subproperty of a shape
69/// - [x] `Name` shape references, with support for symbolic subproperty shape
70///   access
71/// - [x] `Error` shapes, representing a failure of shape processing, with
72///   support for chains of errors and partial shape data
73/// - [x] `None` shapes, representing the absence of a value (helpful for
74///   representing optionality of shapes)
75/// - [x] `subshape.satisfies(supershape)` and `supershape.accepts(subshape)`
76///   methods for testing shape relationships
77/// - [x] `shape.accepts_json(json)` method for testing whether concrete JSON
78///   data satisfies some expected shape
79/// - [x] `shape.pretty_print()` method for debugging and testing
80
81#[derive(Clone, Eq)]
82// [`Shape`] enforces the simplification of [`ShapeCase`] variants, because
83// there is no way to create a [`Shape`] without simplifying the input
84// [`ShapeCase`]. This is a very useful invariant because it allows each
85// [`ShapeCase`] to assume its immediate [`Shape`] children have already been
86// simplified.
87//
88// In addition simplification, [`Shape`] supports testing shape-shape acceptance
89// (or the equivalent inverse, satisfaction) with `super.accepts(sub)` and/or
90// `sub.satisfies(super)`. See also `shape.accepts_json(json)` for testing
91// whether concrete JSON data satisfies some expected `shape`.
92//
93// In the future, we may internalize/canonize shapes to reduce memory usage,
94// especially for well-known shapes like `Bool` and `Int` and `String`. This
95// would require either thread safety (is `type Ref<T> = std::sync::Arc<T>`
96// enough?) or maintaining per-thread canonical shape tables.
97pub struct Shape {
98    // This field is private, but if you want to match against an immutable
99    // reference to the `ShapeCase` variant, use `match shape.case() { ... }`.
100    case: Ref<ShapeCase>,
101
102    /// The combination of locations which, combined, produce this shape.
103    ///
104    /// Many cases will only have a single location, but when shapes are
105    /// simplified, their locations are all retained in the result.
106    ///
107    /// Currently [`ShapeMeta::Loc(Location)`] is the only variant here, but we
108    /// can add other kinds of metadata in the future.
109    meta: Ref<ShapeMeta>,
110}
111
112impl PartialEq for Shape {
113    fn eq(&self, other: &Self) -> bool {
114        self.case == other.case
115    }
116}
117
118impl Hash for Shape {
119    fn hash<H: Hasher>(&self, state: &mut H) {
120        // Since the PartialEq implementation ignores self.locations, so must
121        // the Hash implementation.
122        self.case.hash(state);
123    }
124}
125
126impl Shape {
127    /// Create a `Shape` from a [`ShapeCase`] variant.
128    ///
129    /// This method is crate-private to help enforce some invariants.
130    pub(crate) fn new(case: ShapeCase, locations: impl IntoIterator<Item = Location>) -> Shape {
131        let meta = ShapeMeta::new(&case, locations, []);
132        Shape {
133            case: Ref::new(case),
134            meta: Ref::new(meta),
135        }
136    }
137
138    /// When boolean helper methods like `.is_none()` and `.is_null()` are not
139    /// enough, you can match against the underlying [`ShapeCase`] by obtaining an
140    /// immutable `&ShapeCase` reference using the `shape.case()` method.
141    #[must_use]
142    pub fn case(&self) -> &ShapeCase {
143        self.case.as_ref()
144    }
145
146    /// Returns an iterator over all [`Location`]s associated with this shape.
147    pub fn locations(&self) -> impl Iterator<Item = &Location> {
148        let self_locs = self.meta.locations();
149
150        let unique_locs: IndexSet<&Location> = match self.case() {
151            ShapeCase::One(shapes) => self_locs
152                .chain(shapes.iter().flat_map(|s| s.meta.locations()))
153                .collect(),
154            ShapeCase::All(shapes) => self_locs
155                .chain(shapes.iter().flat_map(|s| s.meta.locations()))
156                .collect(),
157            _ => self_locs.collect(),
158        };
159
160        unique_locs.into_iter()
161    }
162
163    /// Returns an iterator over all [`Name`]s associated with this shape.
164    pub fn names(&self) -> impl Iterator<Item = &Name> {
165        self.meta.names()
166    }
167
168    pub fn nested_base_names(&self) -> impl Iterator<Item = &str> {
169        self.meta.nested_base_names()
170    }
171
172    /// Returns a [`Shape`] that accepts any boolean value, `true` or `false`.
173    #[must_use]
174    pub fn bool(locations: impl IntoIterator<Item = Location>) -> Self {
175        Self::new(ShapeCase::Bool(None), locations)
176    }
177
178    /// Returns a [`Shape`] that accepts only the specified boolean value.
179    #[must_use]
180    pub fn bool_value(value: bool, locations: impl IntoIterator<Item = Location>) -> Self {
181        Self::new(ShapeCase::Bool(Some(value)), locations)
182    }
183
184    /// Returns a [`Shape`] that accepts any string value.
185    #[must_use]
186    pub fn string(locations: impl IntoIterator<Item = Location>) -> Self {
187        Self::new(ShapeCase::String(None), locations)
188    }
189
190    /// Returns a [`Shape`] that accepts only the specified string value.
191    #[must_use]
192    pub fn string_value(value: &str, locations: impl IntoIterator<Item = Location>) -> Self {
193        Self::new(ShapeCase::String(Some(value.to_string())), locations)
194    }
195
196    /// Returns a [`Shape`] that accepts any integer value.
197    #[must_use]
198    pub fn int(locations: impl IntoIterator<Item = Location>) -> Self {
199        Self::new(ShapeCase::Int(None), locations)
200    }
201
202    /// Returns a [`Shape`] that accepts only the specified integer value.
203    #[must_use]
204    pub fn int_value(value: i64, locations: impl IntoIterator<Item = Location>) -> Self {
205        Self::new(ShapeCase::Int(Some(value)), locations)
206    }
207
208    /// Returns a [`Shape`] that accepts any floating point value.
209    #[must_use]
210    pub fn float(locations: impl IntoIterator<Item = Location>) -> Self {
211        Self::new(ShapeCase::Float, locations)
212    }
213
214    /// Returns a [`Shape`] that accepts only the JSON `null` value.
215    #[must_use]
216    pub fn null(locations: impl IntoIterator<Item = Location>) -> Self {
217        Self::new(ShapeCase::Null, locations)
218    }
219
220    #[must_use]
221    pub fn is_null(&self) -> bool {
222        self.case.is_null()
223    }
224
225    /// Returns a symbolic reference to a named shape, potentially not yet
226    /// defined.
227    ///
228    /// In order to add items to the subpath of this named shape, call the
229    /// `.field(name)` and/or `.item(index)` methods.
230    ///
231    /// Note that variable shapes are represented by [`ShapeCase::Name`] where the
232    /// name string includes the initial `$` character.
233    #[must_use]
234    pub fn name(name: &str, locations: impl IntoIterator<Item = Location>) -> Self {
235        let locations = locations.into_iter().collect::<Vec<_>>();
236        Self::new(
237            ShapeCase::Name(
238                name::Name::base(name.to_string(), locations.clone()),
239                WeakScope::none(),
240            ),
241            locations.clone(),
242        )
243    }
244
245    /// Useful for obtaining the kind of [`IndexMap`] this library uses for the
246    /// [`ShapeCase::Object`] variant.
247    #[must_use]
248    pub fn empty_map() -> IndexMap<String, Self> {
249        IndexMap::new()
250    }
251
252    /// Returns a [`Shape`] that accepts any object shape, regardless of the other
253    /// shape's `fields` or `rest` shape, because an empty object shape `{}`
254    /// imposes no expectations on other objects (except that they are objects).
255    ///
256    /// In the other direction, an empty object shape `{}` can satisfy itself or
257    /// any `Dict<V>` shape (where the `Dict` may be dynamically empty), but
258    /// cannot satisfy any object shape with non-empty `fields`.
259    #[must_use]
260    pub fn empty_object(locations: impl IntoIterator<Item = Location>) -> Self {
261        Shape::new(
262            ShapeCase::Object {
263                fields: Shape::empty_map(),
264                rest: Shape::none(),
265            },
266            locations,
267        )
268    }
269
270    /// To get a compatible empty mutable [`IndexMap`] without directly
271    /// depending on the [`indexmap`] crate yourself, use [`Shape::empty_map()`].
272    #[must_use]
273    pub fn object(
274        fields: IndexMap<String, Shape>,
275        rest: Shape,
276        locations: impl IntoIterator<Item = Location>,
277    ) -> Self {
278        Shape::new(ShapeCase::Object { fields, rest }, locations)
279    }
280
281    /// Returns a [`Shape`] that accepts any object shape with the given static
282    /// fields, with no dynamic fields considered.
283    #[must_use]
284    pub fn record(
285        fields: IndexMap<String, Shape>,
286        locations: impl IntoIterator<Item = Location>,
287    ) -> Self {
288        Shape::object(fields, Shape::none(), locations)
289    }
290
291    /// Returns a [`Shape`] that accepts any dictionary-like object with dynamic
292    /// string properties having a given value shape.
293    #[must_use]
294    pub fn dict(value_shape: Shape, locations: impl IntoIterator<Item = Location>) -> Self {
295        Shape::object(Shape::empty_map(), value_shape, locations)
296    }
297
298    /// Arrays, tuples, and lists are all manifestations of the same underlying
299    /// [`ShapeCase::Array`] representation.
300    pub fn array(
301        prefix: impl IntoIterator<Item = Shape>,
302        tail: Shape,
303        locations: impl IntoIterator<Item = Location>,
304    ) -> Self {
305        let prefix = prefix.into_iter().collect();
306        Self::new(ShapeCase::Array { prefix, tail }, locations)
307    }
308
309    /// A tuple is a [`ShapeCase::Array`] with statically known (though possibly
310    /// empty) element shapes and no dynamic tail shape.
311    pub fn tuple(
312        shapes: impl IntoIterator<Item = Shape>,
313        locations: impl IntoIterator<Item = Location>,
314    ) -> Self {
315        Shape::array(shapes, Shape::none(), locations)
316    }
317
318    /// A `List<S>` is a [`ShapeCase::Array`] with an empty static `prefix` and a
319    /// dynamic element shape `S`.
320    #[must_use]
321    pub fn list(of: Shape, locations: impl IntoIterator<Item = Location>) -> Self {
322        Shape::array(empty(), of, locations)
323    }
324
325    /// Returns a [`ShapeCase::One`] union of the given shapes, simplified.
326    ///
327    /// Note that `locations` in this case should _not_ refer to each individual inner shape, but
328    /// to the thing that caused all of these shapes to be combined, like maybe a `->match`. If
329    /// there is no obvious cause to point users to, then the location should be empty.
330    pub fn one(
331        shapes: impl IntoIterator<Item = Shape>,
332        locations: impl IntoIterator<Item = Location>,
333    ) -> Self {
334        one(shapes.into_iter(), locations.into_iter().collect())
335    }
336
337    /// Returns a [`ShapeCase::All`] intersection of the given shapes, simplified.
338    ///
339    /// Note that `locations` in this case should _not_ refer to each individual inner shape, but
340    /// to the thing that caused all of these shapes to be combined, like maybe a `IntfA & IntfB`.
341    /// If there is no obvious cause to point users to, then the location should be empty.
342    pub fn all(
343        shapes: impl IntoIterator<Item = Shape>,
344        locations: impl IntoIterator<Item = Location>,
345    ) -> Self {
346        all(shapes.into_iter(), locations.into_iter().collect())
347    }
348
349    /// Returns a shape that accepts any JSON value (including [`ShapeCase::None`]
350    /// and [`ShapeCase::Unknown`]), and is not accepted by any shape other than itself.
351    #[must_use]
352    pub fn unknown(locations: impl IntoIterator<Item = Location>) -> Self {
353        Self::new(ShapeCase::Unknown, locations)
354    }
355
356    #[must_use]
357    pub fn is_unknown(&self) -> bool {
358        matches!(self.case(), ShapeCase::Unknown)
359    }
360
361    /// Returns a shape representing the absence of a JSON value, which is
362    /// satisfied/accepted only by itself.
363    ///
364    /// Because this represents the absence of a value, it shouldn't have a location. Basically,
365    /// nothing can produce none alone, and if it were a union, that union would have its own
366    /// location.
367    #[must_use]
368    pub fn none() -> Self {
369        Self::new(ShapeCase::None, [])
370    }
371
372    #[must_use]
373    pub fn is_none(&self) -> bool {
374        self.case.is_none()
375    }
376
377    /// Report a failure of shape processing.
378    #[must_use]
379    pub fn error(
380        message: impl Into<String>,
381        locations: impl IntoIterator<Item = Location>,
382    ) -> Self {
383        Self::new(ShapeCase::error(message.into()), locations)
384    }
385
386    #[must_use]
387    pub fn is_error(&self) -> bool {
388        matches!(self.case(), ShapeCase::Error { .. })
389    }
390
391    /// Iterate over all errors within this shape, recursively
392    pub fn errors(&self) -> impl Iterator<Item = &Error> {
393        self.case.errors()
394    }
395
396    /// Report a failure of shape processing associated with a
397    /// partial/best-guess shape that may still be useful.
398    #[must_use]
399    pub fn error_with_partial(
400        message: impl Into<String>,
401        partial: Shape,
402        locations: impl IntoIterator<Item = Location>,
403    ) -> Self {
404        Self::new(
405            ShapeCase::error_with_partial(message.into(), partial),
406            locations,
407        )
408    }
409
410    /// Clone the shape, adding the provided `locations` to the existing locations.
411    #[must_use]
412    pub fn with_locations<'a>(mut self, locations: impl IntoIterator<Item = &'a Location>) -> Self {
413        for loc in locations {
414            if !self.meta.has_location(loc) {
415                Ref::make_mut(&mut self.meta).add_location(loc);
416            }
417        }
418        self
419    }
420}
421
422#[cfg(test)]
423mod test_errors {
424    use super::*;
425
426    #[test]
427    fn multiple_errors_in_array() {
428        let shape = Shape::tuple(
429            [
430                Shape::int([]),
431                Shape::error("Expected a string", []),
432                Shape::bool([]),
433                Shape::error("Expected a null", []),
434            ],
435            [],
436        );
437        let errors: Vec<_> = shape.errors().collect();
438        assert_eq!(errors.len(), 2);
439        assert_eq!(errors[0].message, "Expected a string");
440        assert_eq!(errors[1].message, "Expected a null");
441    }
442
443    #[test]
444    fn nested_errors() {
445        let shape = Shape::record(
446            [
447                ("a".to_string(), Shape::int([])),
448                ("b".to_string(), Shape::error("Expected a string", [])),
449                ("c".to_string(), Shape::bool([])),
450                (
451                    "d".to_string(),
452                    Shape::record(
453                        [
454                            ("e".to_string(), Shape::error("Expected a null", [])),
455                            ("f".to_string(), Shape::float([])),
456                        ]
457                        .into_iter()
458                        .collect(),
459                        [],
460                    ),
461                ),
462            ]
463            .into_iter()
464            .collect(),
465            [],
466        );
467
468        let errors: Vec<_> = shape.errors().collect();
469        assert_eq!(errors.len(), 2);
470        assert_eq!(errors[0].message, "Expected a string");
471        assert_eq!(errors[1].message, "Expected a null");
472    }
473}