From 3de0a4ae37517880b13ef9b0ee07589a60d2656e Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Mon, 23 Dec 2024 16:33:38 +0100 Subject: [PATCH 1/6] Get rid of legacy typecheck module --- core/src/typecheck/eq.rs | 762 ------- core/src/typecheck/error.rs | 494 ----- core/src/typecheck/mk_uniftype.rs | 154 -- core/src/typecheck/mod.rs | 3122 ----------------------------- core/src/typecheck/operation.rs | 688 ------- core/src/typecheck/pattern.rs | 649 ------ core/src/typecheck/reporting.rs | 238 --- core/src/typecheck/subtyping.rs | 254 --- core/src/typecheck/unif.rs | 1814 ----------------- 9 files changed, 8175 deletions(-) delete mode 100644 core/src/typecheck/eq.rs delete mode 100644 core/src/typecheck/error.rs delete mode 100644 core/src/typecheck/mk_uniftype.rs delete mode 100644 core/src/typecheck/mod.rs delete mode 100644 core/src/typecheck/operation.rs delete mode 100644 core/src/typecheck/pattern.rs delete mode 100644 core/src/typecheck/reporting.rs delete mode 100644 core/src/typecheck/subtyping.rs delete mode 100644 core/src/typecheck/unif.rs diff --git a/core/src/typecheck/eq.rs b/core/src/typecheck/eq.rs deleted file mode 100644 index 42092946d1..0000000000 --- a/core/src/typecheck/eq.rs +++ /dev/null @@ -1,762 +0,0 @@ -//! Computation of type equality for contracts. -//! -//! Determine if two contracts are equal as opaque types. Used to decide if two contract should -//! unify. -//! -//! ## Aliases -//! -//! One basic case we want to handle is aliases, which come in handy for parametrized contracts. We -//! want to equate `Alias` with e.g. `Foo "bar" "baz"` if `Alias` has been defined as -//! `let Alias = Foo "bar" "baz" in ...`. -//! -//! We also want to equate different aliases with the same definition: `Alias type_eq Alias'` if -//! `let Alias' = Foo "bar" "baz" in ...`, or `let Alias' = Alias in ...`. -//! -//! ## Recursion -//! -//! We must refrain from following all variables links blindly, as there could be cycles in the -//! graph leading to an infinite loop: -//! -//! ```nickel -//! { -//! Foo = Bar, -//! Bar = Foo, -//! } -//! ``` -//! -//! Because we just follows variables, and don't apply functions, we can detect cycles while -//! walking the graph. Still, as it is potentially performed many times during typechecking, type -//! equality ought to stay reasonably cheap. We choose to just set an arbitrary limit (the gas) on -//! the number of variable links that the type equality may follow. Doing so, we don't have to -//! worry about loops anymore. -//! -//! ## Equality on terms -//! -//! The terms inside a type may be arbitrarily complex. Primops applications, `match`, and the -//! like are quite unlikely to appear inside an annotation (they surely appear inside contract -//! definitions). We don't want to compare functions syntactically either. The spirit of this -//! implementation is to equate aliases or simple constructs that may appear inlined inside an -//! annotation (applications, records, primitive constants and arrays, mostly) in a structural way. -//! -//! We first test for physical equality (both as an optimization and to detect two variables -//! pointing to the same contract definition in the AST). If the comparison fails, we do a simple -//! structural recursion, unfolding simple forms and following variables with a limited number of -//! times. For anything more complex, we return false. - -use super::*; -use crate::{ - eval::{ - self, - cache::lazy::Thunk, - cache::{Cache, CacheIndex}, - }, - identifier::LocIdent, - term::{self, record::Field, IndexMap, UnaryOp}, -}; - -use std::fmt::Debug; - -/// The maximal number of variable links we want to unfold before abandoning the check. It should -/// stay low, but has been fixed arbitrarily: feel fee to increase reasonably if it turns out -/// legitimate type equalities between simple contracts are unduly rejected in practice. -pub const MAX_GAS: u8 = 12; - -/// Abstract over the term environment, which is represented differently in the typechecker and -/// during evaluation. -/// -/// The evaluation environment holds [crate::eval::cache::CacheIndex]es, while the term environment -/// used during typechecking just maps identifiers to a pair `(RichTerm, Environment)`. To have an -/// interface that works with both, [TermEnvironment] provides [TermEnvironment::get_then], which -/// has to take a closure representing the continuation of the task to do with the result instead -/// of merely returning it. -pub trait TermEnvironment: Clone { - fn get_then(env: &Self, id: Ident, f: F) -> T - where - F: FnOnce(Option<(&RichTerm, &Self)>) -> T; - - /// Cheap check that two environment are physically equal. This check is used to avoid doing - /// extra work and usually boils down to pointer equality. - fn fast_eq(_this: &Self, _that: &Self) -> bool; - - /// When comparing closure, we don't get an identifier, but a cache index (a thunk). - fn get_idx_then(env: &Self, idx: &CacheIndex, f: F) -> T - where - F: FnOnce(Option<(&RichTerm, &Self)>) -> T; -} - -/// A simple term environment, as a mapping from identifiers to a tuple of a term and an -/// environment (i.e. a closure), sufficient for the needs of typechecking. -#[derive(PartialEq, Clone, Debug)] -pub struct SimpleTermEnvironment(pub GenericEnvironment); - -impl SimpleTermEnvironment { - pub fn new() -> Self { - SimpleTermEnvironment(GenericEnvironment::new()) - } -} - -impl Default for SimpleTermEnvironment { - fn default() -> Self { - Self::new() - } -} - -impl TermEnvironment for SimpleTermEnvironment { - fn get_then(env: &Self, id: Ident, f: F) -> T - where - F: FnOnce(Option<(&RichTerm, &SimpleTermEnvironment)>) -> T, - { - // See https://github.com/rust-lang/rust-clippy/issues/11764. It's been fixed upstream, but - // hasn't landed in a clippy release yet. We can remove the allow(clippy) once it does - // land. - #[allow(clippy::map_identity)] - f(env.0.get(&id).map(|(rt, env)| (rt, env))) - } - - fn get_idx_then(_env: &Self, _idx: &CacheIndex, f: F) -> T - where - F: FnOnce(Option<(&RichTerm, &SimpleTermEnvironment)>) -> T, - { - debug_assert!( - false, - "we shouldn't see closure when computing contract equality at typechecking time" - ); - f(None) - } - - fn fast_eq(this: &Self, that: &Self) -> bool { - GenericEnvironment::ptr_eq(&this.0, &that.0) - } -} - -impl std::iter::FromIterator<(Ident, (RichTerm, SimpleTermEnvironment))> for SimpleTermEnvironment { - fn from_iter(iter: T) -> Self - where - T: IntoIterator, - { - SimpleTermEnvironment( - GenericEnvironment::::from_iter(iter), - ) - } -} - -impl TermEnvironment for eval::Environment { - fn get_then(env: &eval::Environment, id: Ident, f: F) -> T - where - F: FnOnce(Option<(&RichTerm, &eval::Environment)>) -> T, - { - match env.get(&id).map(eval::cache::lazy::Thunk::borrow) { - Some(closure_ref) => f(Some((&closure_ref.body, &closure_ref.env))), - None => f(None), - } - } - - fn get_idx_then(_env: &eval::Environment, idx: &CacheIndex, f: F) -> T - where - F: FnOnce(Option<(&RichTerm, &eval::Environment)>) -> T, - { - let closure_ref = idx.borrow_orig(); - - f(Some((&closure_ref.body, &closure_ref.env))) - } - - fn fast_eq(this: &Self, that: &Self) -> bool { - Self::ptr_eq(this, that) - } -} - -pub trait FromEnv { - fn from_env(eval_env: eval::Environment, cache: &C) -> Self; -} - -impl FromEnv for SimpleTermEnvironment { - fn from_env(eval_env: eval::Environment, cache: &C) -> Self { - let generic_env: GenericEnvironment<_, _> = eval_env - .iter_elems() - .map(|(id, idx)| { - let borrowed = cache.get_then(idx.clone(), |c| { - (c.body.clone(), Self::from_env(c.env.clone(), cache)) - }); - (*id, borrowed) - }) - .collect(); - SimpleTermEnvironment(generic_env) - } -} - -/// State threaded through the type equality computation. -#[derive(Copy, Clone, Default)] -struct State { - /// Used to generate temporary rigid type variables for substituting type variables when - /// comparing foralls. Those ids never escape the type equality computations and are used - /// solely as rigid type variables: this is why they don't need proper allocation in the - /// unification table or to care about those ids clashing with the one generated by the - /// typechecker. Generated type constants simply needs to be unique for the duration of the - /// type equality computation. - var_uid: usize, - - /// The current gas remaining for variable substitutions. Once it reaches zero and we encounter - /// a variable, we abort the computation and return false. - gas: u8, -} - -impl State { - fn new(var_uid: usize) -> Self { - State { - var_uid, - gas: MAX_GAS, - } - } - - /// Create a fresh unique id for a rigid type variable. - fn fresh_cst_id(&mut self) -> VarId { - let result = self.var_uid; - self.var_uid += 1; - result - } - - /// Try to consume one unit of gas for a variable substitution. Return true in case of success, - /// or false if the gas was already at zero. - fn use_gas(&mut self) -> bool { - if self.gas == 0 { - false - } else { - self.gas -= 1; - true - } - } -} - -/// Compute equality between two contracts. -/// -/// # Parameters -/// -/// - `env`: an environment mapping variables to their definition (the second placeholder in a -/// `let _ = _ in _`) -pub fn contract_eq( - var_uid: usize, - t1: &RichTerm, - env1: &E, - t2: &RichTerm, - env2: &E, -) -> bool { - contract_eq_bounded(&mut State::new(var_uid), t1, env1, t2, env2) -} - -/// **Warning**: this function isn't computing a sound contract equality (it could equate contracts -/// that aren't actually the same). It is used to deduplicate type and contract annotations for -/// pretty-printing, where there is no notion of environment and the only thing that matters is -/// that they are printed the same or not. -/// -/// Compute equality between two contracts in an empty environment. This means that two variables -/// with the same name are considered equal. -pub fn type_eq_noenv(var_uid: usize, t1: &Type, t2: &Type) -> bool { - let empty = eval::Environment::new(); - - type_eq_bounded( - &mut State::new(var_uid), - &GenericUnifType::from_type(t1.clone(), &empty), - &empty, - &GenericUnifType::from_type(t2.clone(), &empty), - &empty, - ) -} - -/// Decide type equality on contracts in their respective environment and given the remaining gas -/// in `state`. -fn contract_eq_bounded( - state: &mut State, - t1: &RichTerm, - env1: &E, - t2: &RichTerm, - env2: &E, -) -> bool { - use Term::*; - - // Test for physical equality as both an optimization and a way to cheaply equate complex - // contracts that happen to point to the same definition (while the purposely limited - // structural checks below may reject the equality) - if term::SharedTerm::ptr_eq(&t1.term, &t2.term) && E::fast_eq(env1, env2) { - return true; - } - - match (t1.as_ref(), t2.as_ref()) { - (Null, Null) => true, - (Bool(b1), Bool(b2)) => b1 == b2, - (Num(n1), Num(n2)) => n1 == n2, - (Str(s1), Str(s2)) => s1 == s2, - (Enum(id1), Enum(id2)) => id1 == id2, - (SealingKey(s1), SealingKey(s2)) => s1 == s2, - (Sealed(key1, inner1, _), Sealed(key2, inner2, _)) => { - key1 == key2 && contract_eq_bounded(state, inner1, env1, inner2, env2) - } - // We only compare string chunks when they represent a plain string (they don't contain any - // interpolated expression), as static string may be currently parsed as such. We return - // false for anything more complex. - (StrChunks(scs1), StrChunks(scs2)) => { - scs1.len() == scs2.len() - && scs1 - .iter() - .zip(scs2.iter()) - .all(|(chunk1, chunk2)| match (chunk1, chunk2) { - (StrChunk::Literal(s1), StrChunk::Literal(s2)) => s1 == s2, - _ => false, - }) - } - (App(head1, arg1), App(head2, arg2)) => { - contract_eq_bounded(state, head1, env1, head2, env2) - && contract_eq_bounded(state, arg1, env1, arg2, env2) - } - (Closure(id1), Closure(id2)) if Thunk::ptr_eq(id1, id2) => true, - // All variables must be bound at this stage. This is checked by the typechecker when - // walking annotations. However, we may assume that `env` is a local environment (that it - // doesn't include the stdlib). In that case, free variables (unbound) may be deemed equal - // if they have the same identifier: whatever global environment the term will be put in, - // free variables are not redefined locally and will be bound to the same value in any case. - (Var(id1), Var(id2)) => { - ::get_then(env1, id1.ident(), |binding1| { - ::get_then(env2, id2.ident(), |binding2| { - match (binding1, binding2) { - (Some((t1, env1)), Some((t2, env2))) => { - // We may end up using one more gas unit if gas was exactly 1. That is - // not very important, and it's simpler to just ignore this case. We - // still return false if gas was already at zero. - let had_gas = state.use_gas(); - state.use_gas(); - had_gas && contract_eq_bounded(state, t1, env1, t2, env2) - } - (None, None) => id1 == id2, - _ => false, - } - }) - }) - } - (Closure(idx1), Closure(idx2)) => { - ::get_idx_then(env1, idx1, |binding1| { - ::get_idx_then(env2, idx2, |binding2| { - match (binding1, binding2) { - (Some((t1, env1)), Some((t2, env2))) => { - // We may end up using one more gas unit if gas was exactly 1. That is - // not very important, and it's simpler to just ignore this case. We - // still return false if gas was already at zero. - let had_gas = state.use_gas(); - state.use_gas(); - contract_eq_bounded(state, t1, env1, t2, env2) && had_gas - } - _ => false, - } - }) - }) - } - (Var(id), _) => { - state.use_gas() - && ::get_then(env1, id.ident(), |binding| { - binding - .map(|(t1, env1)| contract_eq_bounded(state, t1, env1, t2, env2)) - .unwrap_or(false) - }) - } - (_, Var(id)) => { - state.use_gas() - && ::get_then(env2, id.ident(), |binding| { - binding - .map(|(t2, env2)| contract_eq_bounded(state, t1, env1, t2, env2)) - .unwrap_or(false) - }) - } - (Closure(idx), _) => { - state.use_gas() - && ::get_idx_then(env1, idx, |binding| { - binding - .map(|(t1, env1)| contract_eq_bounded(state, t1, env1, t2, env2)) - .unwrap_or(false) - }) - } - (_, Closure(idx)) => { - state.use_gas() - && ::get_idx_then(env2, idx, |binding| { - binding - .map(|(t2, env2)| contract_eq_bounded(state, t1, env1, t2, env2)) - .unwrap_or(false) - }) - } - (Record(r1), Record(r2)) => { - map_eq( - contract_eq_fields, - state, - &r1.fields, - env1, - &r2.fields, - env2, - ) && r1.attrs.open == r2.attrs.open - } - (RecRecord(r1, dyn_fields, _), Record(r2)) | (Record(r1), RecRecord(r2, dyn_fields, _)) => { - dyn_fields.is_empty() - && map_eq( - contract_eq_fields, - state, - &r1.fields, - env1, - &r2.fields, - env2, - ) - && r1.attrs.open == r2.attrs.open - } - (RecRecord(r1, dyn_fields1, _), RecRecord(r2, dyn_fields2, _)) => - // We only compare records whose field structure is statically known (i.e. without dynamic - // fields). - { - dyn_fields1.is_empty() - && dyn_fields2.is_empty() - && map_eq( - contract_eq_fields, - state, - &r1.fields, - env1, - &r2.fields, - env2, - ) - && r1.attrs.open == r2.attrs.open - } - (Array(ts1, attrs1), Array(ts2, attrs2)) => { - ts1.len() == ts2.len() - && ts1 - .iter() - .zip(ts2.iter()) - .all(|(t1, t2)| contract_eq_bounded(state, t1, env1, t2, env2)) - // Ideally we would compare pending contracts, but it's a bit advanced and for now - // we only equate arrays without additional contracts - && attrs1.pending_contracts.is_empty() && attrs2.pending_contracts.is_empty() - } - // We must compare the inner values as well as the corresponding contracts or type - // annotations. - (Annotated(annot1, t1), Annotated(annot2, t2)) => { - let value_eq = contract_eq_bounded(state, t1, env1, t2, env2); - - // TODO: - // - does it really make sense to compare the annotations? - // - does it even happen to have contracts having themselves type annotations? - // - and in the latter case, should they be declared unequal because of that? - // The answer to the last question is probably yes, because contracts are - // fundamentally as powerful as function application, so they can change their - // argument. - - // We use the same logic as in the typechecker: the type associated to an annotated - // value is either the type annotation, or the first contract annotation. - let ty1 = annot1.first(); - let ty2 = annot2.first(); - - let ty_eq = match (ty1, ty2) { - (None, None) => true, - (Some(ctr1), Some(ctr2)) => type_eq_bounded( - state, - &GenericUnifType::from_type(ctr1.typ.clone(), env1), - env1, - &GenericUnifType::from_type(ctr2.typ.clone(), env2), - env2, - ), - _ => false, - }; - - value_eq && ty_eq - } - (Op1(UnaryOp::RecordAccess(id1), t1), Op1(UnaryOp::RecordAccess(id2), t2)) => { - id1 == id2 && contract_eq_bounded(state, t1, env1, t2, env2) - } - // Contract is just a caching mechanism. `typ` should be the source of truth for equality - // (and it's probably easier to prove that type are equal rather than their generated - // contract version). - ( - Type { - typ: ty1, - contract: _, - }, - Type { - typ: ty2, - contract: _, - }, - ) => type_eq_bounded( - state, - &GenericUnifType::from_type(ty1.clone(), env1), - env1, - &GenericUnifType::from_type(ty2.clone(), env2), - env2, - ), - // We don't treat imports, parse errors, nor pairs of terms that don't have the same shape - _ => false, - } -} - -/// Compute the equality between two hashmaps holding either types or terms. -fn map_eq( - mut f: F, - state: &mut State, - map1: &IndexMap, - env1: &E, - map2: &IndexMap, - env2: &E, -) -> bool -where - F: FnMut(&mut State, &V, &E, &V, &E) -> bool, -{ - map1.len() == map2.len() - && map1.iter().all(|(id, v1)| { - map2.get(id) - .map(|v2| f(state, v1, env1, v2, env2)) - .unwrap_or(false) - }) -} - -/// Convert record rows to a hashmap. -/// -/// Require the rows to be closed (i.e. the last element must be `RowEmpty`), otherwise `None` is -/// returned. `None` is returned as well if a type encountered is not row, or if it is a enum row. -fn rrows_as_map( - erows: &GenericUnifRecordRows, -) -> Option>> { - let map: Option> = erows - .iter() - .map(|item| match item { - GenericUnifRecordRowsIteratorItem::Row(RecordRowF { id, typ: types }) => { - Some((id, types)) - } - _ => None, - }) - .collect(); - - map -} - -/// Convert enum rows to a hashmap. -/// -/// Require the rows to be closed (i.e. the last element must be `RowEmpty`), otherwise `None` is -/// returned. `None` is returned as well if a type encountered is not row type, or if it is a -/// record row. -fn erows_as_map( - erows: &GenericUnifEnumRows, -) -> Option>>> { - let set: Option>> = erows - .iter() - .map(|item| match item { - GenericUnifEnumRowsIteratorItem::Row(EnumRowF { id, typ: types }) => Some((id, types)), - _ => None, - }) - .collect(); - - set -} - -/// Check for contract equality between record fields. Fields are equal if they are both without a -/// definition, or are both defined and their values are equal. -/// -/// The attached metadata must be equal as well: most record contracts are written as field with -/// metadata but without definition. For example, take `{ foo | {bar | Number}}` and `{foo | {bar | -/// String}}`. Those two record contracts are obviously not equal, but to know that, we have to -/// look at the contracts of each bar field. -fn contract_eq_fields( - state: &mut State, - field1: &Field, - env1: &E, - field2: &Field, - env2: &E, -) -> bool { - // Check that the pending contracts are equal. - // - // [^contract-eq-ignore-label]: We mostly ignore the label here, which doesn't impact the fact - // that a contract blame or not. Different labels might lead to different error messages, - // though. Note that there is one important exception: the field `type_environment` does impact - // the evaluation of the contract. Fortunately, it's a simple datastructure that is easy to - // compare, so we do check for equality here. - // - // Otherwise, comparing the rest of the labels seem rather clumsy (as labels store a wide - // variety of static and runtime data) and not very meaningful. - let pending_contracts_eq = field1 - .pending_contracts - .iter() - .zip(field2.pending_contracts.iter()) - .all(|(c1, c2)| { - c1.label.type_environment == c2.label.type_environment - && contract_eq_bounded(state, &c1.contract, env1, &c2.contract, env2) - }); - - // Check that the type and contrat annotations are equal. [^contract-eq-ignore-label] applies - // here as well. - let annotations_eq = field1 - .metadata - .annotation - .iter() - .zip(field2.metadata.annotation.iter()) - .all(|(t1, t2)| { - t1.label.type_environment == t2.label.type_environment - && type_eq_bounded( - state, - &GenericUnifType::from_type(t1.typ.clone(), env1), - env1, - &GenericUnifType::from_type(t2.typ.clone(), env2), - env2, - ) - }); - - // Check that "scalar" metadata (simple values) are equals - let scalar_metadata_eq = field1.metadata.opt == field2.metadata.opt - && field1.metadata.not_exported == field2.metadata.not_exported - && field1.metadata.priority == field2.metadata.priority; - - let value_eq = match (&field1.value, &field2.value) { - (Some(ref value1), Some(ref value2)) => { - contract_eq_bounded(state, value1, env1, value2, env2) - } - (None, None) => true, - _ => false, - }; - - pending_contracts_eq && annotations_eq && scalar_metadata_eq && value_eq -} - -/// Perform the type equality comparison on types. Structurally recurse into type constructors and -/// test that subtypes or subterms (contracts) are equals. -/// -/// Currently, this function operates on `Type` rather than `TypeWrapper`s as it is called by -/// `contract_eq_bounded` on type annotations. But we need to substitute variables to correctly -/// compare `foralls`, hence it accepts more general `TypeWrapper`s. However, we expect to never -/// meet unification variables (we treat them for completeness and to be future proof), and that all -/// the rigid type variables encountered have been introduced by `type_eq_bounded` itself. This is -/// why we don't need unique identifiers that are distinct from the one used during typechecking, -/// and we can just start from `0`. -fn type_eq_bounded( - state: &mut State, - ty1: &GenericUnifType, - env1: &E, - ty2: &GenericUnifType, - env2: &E, -) -> bool { - match (ty1, ty2) { - (GenericUnifType::Concrete { typ: s1, .. }, GenericUnifType::Concrete { typ: s2, .. }) => { - match (s1, s2) { - (TypeF::Wildcard(id1), TypeF::Wildcard(id2)) => id1 == id2, - (TypeF::Dyn, TypeF::Dyn) - | (TypeF::Number, TypeF::Number) - | (TypeF::Bool, TypeF::Bool) - | (TypeF::Symbol, TypeF::Symbol) - | (TypeF::String, TypeF::String) => true, - ( - TypeF::Dict { - type_fields: uty1, - flavour: attrs1, - }, - TypeF::Dict { - type_fields: uty2, - flavour: attrs2, - }, - ) if attrs1 == attrs2 => type_eq_bounded(state, uty1, env1, uty2, env2), - (TypeF::Array(uty1), TypeF::Array(uty2)) => { - type_eq_bounded(state, uty1, env1, uty2, env2) - } - (TypeF::Arrow(s1, t1), TypeF::Arrow(s2, t2)) => { - type_eq_bounded(state, s1, env1, s2, env2) - && type_eq_bounded(state, t1, env1, t2, env2) - } - (TypeF::Enum(uty1), TypeF::Enum(uty2)) => { - fn type_eq_bounded_wrapper( - state: &mut State, - uty1: &Option<&GenericUnifType>, - env1: &E, - uty2: &Option<&GenericUnifType>, - env2: &E, - ) -> bool { - match (uty1, uty2) { - (Some(uty1), Some(uty2)) => { - type_eq_bounded(state, *uty1, env1, *uty2, env2) - } - (None, None) => true, - _ => false, - } - } - - let map1 = erows_as_map(uty1); - let map2 = erows_as_map(uty2); - - map1.zip(map2) - .map(|(m1, m2)| { - map_eq(type_eq_bounded_wrapper, state, &m1, env1, &m2, env2) - }) - .unwrap_or(false) - } - (TypeF::Record(uty1), TypeF::Record(uty2)) => { - fn type_eq_bounded_wrapper( - state: &mut State, - uty1: &&GenericUnifType, - env1: &E, - uty2: &&GenericUnifType, - env2: &E, - ) -> bool { - type_eq_bounded(state, *uty1, env1, *uty2, env2) - } - - let map1 = rrows_as_map(uty1); - let map2 = rrows_as_map(uty2); - - map1.zip(map2) - .map(|(m1, m2)| { - map_eq(type_eq_bounded_wrapper, state, &m1, env1, &m2, env2) - }) - .unwrap_or(false) - } - (TypeF::Contract((t1, env1)), TypeF::Contract((t2, env2))) => { - contract_eq_bounded(state, t1, env1, t2, env2) - } - ( - TypeF::Forall { - var: var1, - var_kind: var_kind1, - body: body1, - }, - TypeF::Forall { - var: var2, - var_kind: var_kind2, - body: body2, - }, - ) => { - let cst_id = state.fresh_cst_id(); - - if var_kind1 != var_kind2 { - return false; - } - - let body1 = body1.clone(); - let body2 = body2.clone(); - - let (uty1_subst, uty2_subst) = match var_kind1 { - VarKind::Type => ( - body1.subst(var1, &GenericUnifType::Constant(cst_id)), - body2.subst(var2, &GenericUnifType::Constant(cst_id)), - ), - VarKind::RecordRows { .. } => ( - body1.subst(var1, &GenericUnifRecordRows::Constant(cst_id)), - body2.subst(var2, &GenericUnifRecordRows::Constant(cst_id)), - ), - VarKind::EnumRows { .. } => ( - body1.subst(var1, &GenericUnifEnumRows::Constant(cst_id)), - body2.subst(var2, &GenericUnifEnumRows::Constant(cst_id)), - ), - }; - - type_eq_bounded(state, &uty1_subst, env1, &uty2_subst, env2) - } - // We can't compare type variables without knowing what they are instantiated to, - // and all type variables should have been substituted at this point, so we bail - // out. - _ => false, - } - } - (GenericUnifType::UnifVar { id: id1, .. }, GenericUnifType::UnifVar { id: id2, .. }) => { - debug_assert!( - false, - "we shouldn't come across unification variables during type equality computation" - ); - id1 == id2 - } - (GenericUnifType::Constant(i1), GenericUnifType::Constant(i2)) => i1 == i2, - _ => false, - } -} diff --git a/core/src/typecheck/error.rs b/core/src/typecheck/error.rs deleted file mode 100644 index 9a9f24a47e..0000000000 --- a/core/src/typecheck/error.rs +++ /dev/null @@ -1,494 +0,0 @@ -//! Internal error types for typechecking. -use super::{ - reporting::{self, ToType}, - State, UnifEnumRow, UnifRecordRow, UnifType, VarId, -}; -use crate::{ - error::TypecheckError, - identifier::LocIdent, - label::ty_path, - position::TermPos, - typ::{TypeF, VarKindDiscriminant}, -}; - -/// Error during the unification of two row types. -#[derive(Debug, PartialEq)] -pub enum RowUnifError { - /// The LHS had a binding that was missing in the RHS. - MissingRow(LocIdent), - /// The LHS had a `Dyn` tail that was missing in the RHS. - MissingDynTail, - /// The RHS had a binding that was not in the LHS. - ExtraRow(LocIdent), - /// The RHS had a additional `Dyn` tail. - ExtraDynTail, - /// There were two incompatible definitions for the same record row. - RecordRowMismatch { - id: LocIdent, - /// The underlying unification error that caused the mismatch. - cause: Box, - }, - /// There were two incompatible definitions for the same enum row. - /// - /// Because enum rows have an optional argument, there might not be any underlying unification - /// error (e.g. one of the row has an argument, and the other does not). This is why the - /// underlying unification error is optional, as opposed to record rows. - EnumRowMismatch { - id: LocIdent, - /// The underlying unification error that caused the mismatch. - cause: Option>, - }, - /// A [row constraint][super::RowConstrs] was violated. - RecordRowConflict(UnifRecordRow), - /// A [row constraint][super::RowConstrs] was violated. - EnumRowConflict(UnifEnumRow), - /// Tried to unify a type constant with another different type. - WithConst { - var_kind: VarKindDiscriminant, - expected_const_id: VarId, - inferred: UnifType, - }, - /// Tried to unify two distinct type constants. - ConstMismatch { - var_kind: VarKindDiscriminant, - expected_const_id: usize, - inferred_const_id: usize, - }, - /// An unbound type variable was referenced. - UnboundTypeVariable(LocIdent), - /// Tried to unify a constant with a unification variable with a strictly lower level. - VarLevelMismatch { - constant_id: VarId, - var_kind: VarKindDiscriminant, - }, -} - -impl RowUnifError { - /// Convert a row unification error to a unification error. - /// - /// There is a hierarchy between error types, from the most local/specific to the most - /// high-level: - /// - [`RowUnifError`] - /// - [`UnifError`] - /// - [`crate::error::TypecheckError`] - /// - /// Each level usually adds information (such as types or positions) and group different - /// specific errors into most general ones. - pub fn into_unif_err(self, expected: UnifType, inferred: UnifType) -> UnifError { - match self { - RowUnifError::MissingRow(id) => UnifError::MissingRow { - id, - expected, - inferred, - }, - RowUnifError::MissingDynTail => UnifError::MissingDynTail { expected, inferred }, - RowUnifError::ExtraRow(id) => UnifError::ExtraRow { - id, - expected, - inferred, - }, - RowUnifError::ExtraDynTail => UnifError::ExtraDynTail { expected, inferred }, - RowUnifError::RecordRowMismatch { id, cause } => UnifError::RecordRowMismatch { - id, - expected, - inferred, - cause, - }, - RowUnifError::EnumRowMismatch { id, cause } => UnifError::EnumRowMismatch { - id, - expected, - inferred, - cause, - }, - RowUnifError::RecordRowConflict(row) => UnifError::RecordRowConflict { - row, - expected, - inferred, - }, - RowUnifError::EnumRowConflict(row) => UnifError::EnumRowConflict { - row, - expected, - inferred, - }, - RowUnifError::WithConst { - var_kind, - expected_const_id, - inferred, - } => UnifError::WithConst { - var_kind, - expected_const_id, - inferred, - }, - RowUnifError::ConstMismatch { - var_kind, - expected_const_id, - inferred_const_id, - } => UnifError::ConstMismatch { - var_kind, - expected_const_id, - inferred_const_id, - }, - RowUnifError::UnboundTypeVariable(id) => UnifError::UnboundTypeVariable(id), - RowUnifError::VarLevelMismatch { - constant_id, - var_kind, - } => UnifError::VarLevelMismatch { - constant_id, - var_kind, - }, - } - } -} - -/// Error during the unification of two types. -/// -/// In each variant, `expected` and `inferred` refers to the two types that failed to unify. -#[derive(Debug, PartialEq)] -pub enum UnifError { - /// Tried to unify two incompatible types. - TypeMismatch { - expected: UnifType, - inferred: UnifType, - }, - /// There are two incompatible definitions for the same row. - RecordRowMismatch { - id: LocIdent, - expected: UnifType, - inferred: UnifType, - /// The uderlying unification error (`expected` and `inferred` should be the record types - /// that failed to unify, while this error is the specific cause of the mismatch for the - /// `id` row) - cause: Box, - }, - /// There are two incompatible definitions for the same row. - /// - /// Because enum rows have an optional argument, there might not be any underlying unification - /// error (e.g. one of the row has an argument, and the other does not). This is why the - /// underlying unification error is optional, as opposed to record rows. - EnumRowMismatch { - id: LocIdent, - expected: UnifType, - inferred: UnifType, - cause: Option>, - }, - /// Tried to unify two distinct type constants. - ConstMismatch { - var_kind: VarKindDiscriminant, - expected_const_id: VarId, - inferred_const_id: VarId, - }, - /// Tried to unify two rows, but a row from the expected type was absent from the inferred type. - MissingRow { - id: LocIdent, - expected: UnifType, - inferred: UnifType, - }, - /// Tried to unify two rows, but a row from the inferred type was absent from the expected type. - ExtraRow { - id: LocIdent, - expected: UnifType, - inferred: UnifType, - }, - /// Tried to unify two rows, but the `Dyn` tail of the expected type was absent from the - /// inferred type. - MissingDynTail { - expected: UnifType, - inferred: UnifType, - }, - /// Tried to unify two rows, but the `Dyn` tail of the RHS was absent from the LHS. - ExtraDynTail { - expected: UnifType, - inferred: UnifType, - }, - /// Tried to unify a unification variable with a row type violating the [row - /// constraints][super::RowConstrs] of the variable. - RecordRowConflict { - /// The row that conflicts with an existing one. - row: UnifRecordRow, - /// The original expected type that led to the row conflict (when unified with the inferred - /// type). - expected: UnifType, - /// The original inferred type that led to the row conflict (when unified with the expected - /// type). - inferred: UnifType, - }, - /// Tried to unify a unification variable with a row type violating the [row - /// constraints][super::RowConstrs] of the variable. - EnumRowConflict { - /// The row that conflicts with an existing one. - row: UnifEnumRow, - /// The original expected type that led to the row conflict (when unified with the inferred - /// type). - expected: UnifType, - /// The original inferred type that led to the row conflict (when unified with the expected - /// type). - inferred: UnifType, - }, - /// Tried to unify a type constant with another different type. - WithConst { - var_kind: VarKindDiscriminant, - expected_const_id: VarId, - inferred: UnifType, - }, - /// An unbound type variable was referenced. - UnboundTypeVariable(LocIdent), - /// An error occurred when unifying the domains of two arrows. - DomainMismatch { - expected: UnifType, - inferred: UnifType, - cause: Box, - }, - /// An error occurred when unifying the codomains of two arrows. - CodomainMismatch { - expected: UnifType, - inferred: UnifType, - cause: Box, - }, - /// Tried to unify a constant with a unification variable with a strictly lower level. - VarLevelMismatch { - constant_id: VarId, - var_kind: VarKindDiscriminant, - }, -} - -impl UnifError { - /// Convert a unification error to a typechecking error. There is a hierarchy between error - /// types, from the most local/specific to the most high-level: - /// - [`RowUnifError`] - /// - [`UnifError`] - /// - [`crate::error::TypecheckError`] - /// - /// Each level usually adds information (such as types or positions) and group different - /// specific errors into most general ones. - /// - /// # Parameters - /// - /// - `state`: the state of unification. Used to access the unification table, and the original - /// names of of unification variable or type constant. - /// - `pos_opt`: the position span of the expression that failed to typecheck. - pub fn into_typecheck_err(self, state: &State, pos_opt: TermPos) -> TypecheckError { - let mut names = reporting::NameReg::new(state.names.clone()); - self.into_typecheck_err_(state, &mut names, pos_opt) - } - - /// Convert a unification error to a typechecking error, given a populated [name - /// registry][reporting::NameReg]. Actual meat of the implementation of - /// [`Self::into_typecheck_err`]. - fn into_typecheck_err_( - self, - state: &State, - names_reg: &mut reporting::NameReg, - pos: TermPos, - ) -> TypecheckError { - match self { - UnifError::TypeMismatch { expected, inferred } => TypecheckError::TypeMismatch { - expected: expected.to_type(names_reg, state.table), - inferred: inferred.to_type(names_reg, state.table), - pos, - }, - UnifError::RecordRowMismatch { - id, - expected, - inferred, - cause, - } => TypecheckError::RecordRowMismatch { - id, - expected: expected.to_type(names_reg, state.table), - inferred: inferred.to_type(names_reg, state.table), - cause: Box::new((*cause).into_typecheck_err_(state, names_reg, TermPos::None)), - pos, - }, - UnifError::EnumRowMismatch { - id, - expected, - inferred, - cause, - } => TypecheckError::EnumRowMismatch { - id, - expected: expected.to_type(names_reg, state.table), - inferred: inferred.to_type(names_reg, state.table), - cause: cause.map(|err| { - Box::new((*err).into_typecheck_err_(state, names_reg, TermPos::None)) - }), - pos, - }, - // TODO: for now, failure to unify with a type constant causes the same error as a - // usual type mismatch. It could be nice to have a specific error message in the - // future. - UnifError::ConstMismatch { - var_kind, - expected_const_id, - inferred_const_id, - } => TypecheckError::TypeMismatch { - expected: UnifType::from_constant_of_kind(expected_const_id, var_kind) - .to_type(names_reg, state.table), - inferred: UnifType::from_constant_of_kind(inferred_const_id, var_kind) - .to_type(names_reg, state.table), - pos, - }, - UnifError::WithConst { - var_kind: VarKindDiscriminant::Type, - expected_const_id, - inferred, - } => TypecheckError::TypeMismatch { - expected: UnifType::Constant(expected_const_id).to_type(names_reg, state.table), - inferred: inferred.to_type(names_reg, state.table), - pos, - }, - UnifError::WithConst { - var_kind, - expected_const_id, - inferred, - } => TypecheckError::ForallParametricityViolation { - kind: var_kind, - tail: UnifType::from_constant_of_kind(expected_const_id, var_kind) - .to_type(names_reg, state.table), - violating_type: inferred.to_type(names_reg, state.table), - pos, - }, - UnifError::MissingRow { - id, - expected, - inferred, - } => TypecheckError::MissingRow { - id, - expected: expected.to_type(names_reg, state.table), - inferred: inferred.to_type(names_reg, state.table), - pos, - }, - UnifError::MissingDynTail { expected, inferred } => TypecheckError::MissingDynTail { - expected: expected.to_type(names_reg, state.table), - inferred: inferred.to_type(names_reg, state.table), - pos, - }, - UnifError::ExtraRow { - id, - expected, - inferred, - } => TypecheckError::ExtraRow { - id, - expected: expected.to_type(names_reg, state.table), - inferred: inferred.to_type(names_reg, state.table), - pos, - }, - UnifError::ExtraDynTail { expected, inferred } => TypecheckError::ExtraDynTail { - expected: expected.to_type(names_reg, state.table), - inferred: inferred.to_type(names_reg, state.table), - pos, - }, - UnifError::RecordRowConflict { - row, - expected, - inferred, - } => TypecheckError::RecordRowConflict { - row: row.to_type(names_reg, state.table), - expected: expected.to_type(names_reg, state.table), - inferred: inferred.to_type(names_reg, state.table), - pos, - }, - UnifError::EnumRowConflict { - row, - expected, - inferred, - } => TypecheckError::EnumRowConflict { - row: row.to_type(names_reg, state.table), - expected: expected.to_type(names_reg, state.table), - inferred: inferred.to_type(names_reg, state.table), - pos, - }, - UnifError::UnboundTypeVariable(ident) => TypecheckError::UnboundTypeVariable(ident), - err @ UnifError::CodomainMismatch { .. } | err @ UnifError::DomainMismatch { .. } => { - let (expected, inferred, type_path, err_final) = err.into_type_path().unwrap(); - TypecheckError::ArrowTypeMismatch { - expected: expected.to_type(names_reg, state.table), - inferred: inferred.to_type(names_reg, state.table), - type_path, - cause: Box::new(err_final.into_typecheck_err_(state, names_reg, TermPos::None)), - pos, - } - } - UnifError::VarLevelMismatch { - constant_id, - var_kind, - } => TypecheckError::VarLevelMismatch { - type_var: names_reg.gen_cst_name(constant_id, var_kind).into(), - pos, - }, - } - } - - /// Transform a `(Co)DomainMismatch` into a type path and other data. - /// - /// `(Co)DomainMismatch` can be nested: when unifying `Num -> Num -> Num` with `Num -> Bool -> - /// Num`, the resulting error is of the form `CodomainMismatch(.., DomainMismatch(.., - /// TypeMismatch(..)))`. The heading sequence of `(Co)DomainMismatch` is better represented as - /// a type path, here `[Codomain, Domain]`, while the last error of the chain -- which thus - /// cannot be a `(Co)DomainMismatch` -- is the actual cause of the unification failure. - /// - /// This function breaks down a `(Co)Domain` mismatch into a more convenient representation. - /// - /// # Return - /// - /// Return `None` if `self` is not a `DomainMismatch` nor a `CodomainMismatch`. - /// - /// Otherwise, return the following tuple: - /// - the original expected type. - /// - the original inferred type. - /// - a type path pointing at the subtypes which failed to be unified. - /// - the final error, which is the actual cause of that failure. - pub fn into_type_path(self) -> Option<(UnifType, UnifType, ty_path::Path, Self)> { - let mut curr: Self = self; - let mut path = ty_path::Path::new(); - // The original expected and inferred type. They are just updated once, in the first - // iteration of the loop below. - let mut utys: Option<(UnifType, UnifType)> = None; - - loop { - match curr { - UnifError::DomainMismatch { - expected: - expected @ UnifType::Concrete { - typ: TypeF::Arrow(_, _), - .. - }, - inferred: - inferred @ UnifType::Concrete { - typ: TypeF::Arrow(_, _), - .. - }, - cause: mismatch, - } => { - utys = utys.or(Some((expected, inferred))); - path.push(ty_path::Elem::Domain); - curr = *mismatch; - } - UnifError::DomainMismatch { .. } => panic!( - "typechecking::to_type_path(): domain mismatch error on a non arrow type" - ), - UnifError::CodomainMismatch { - expected: - expected @ UnifType::Concrete { - typ: TypeF::Arrow(_, _), - .. - }, - inferred: - inferred @ UnifType::Concrete { - typ: TypeF::Arrow(_, _), - .. - }, - cause: mismatch, - } => { - utys = utys.or(Some((expected, inferred))); - path.push(ty_path::Elem::Codomain); - curr = *mismatch; - } - UnifError::CodomainMismatch { .. } => panic!( - "typechecking::to_type_path(): codomain mismatch error on a non arrow type" - ), - // utys equals to `None` iff we did not even enter the case above once, i.e. if - // `self` was indeed neither a `DomainMismatch` nor a `CodomainMismatch` - _ => break utys.map(|(expected, inferred)| (expected, inferred, path, curr)), - } - } - } -} diff --git a/core/src/typecheck/mk_uniftype.rs b/core/src/typecheck/mk_uniftype.rs deleted file mode 100644 index f031f05e16..0000000000 --- a/core/src/typecheck/mk_uniftype.rs +++ /dev/null @@ -1,154 +0,0 @@ -//! Helpers for building `TypeWrapper`s. -use super::{UnifType, VarLevelsData}; -use crate::typ::{DictTypeFlavour, TypeF}; - -/// Multi-ary arrow constructor for types implementing `Into`. -#[macro_export] -macro_rules! mk_uty_arrow { - ($left:expr, $right:expr) => { - $crate::typecheck::UnifType::concrete( - $crate::typ::TypeF::Arrow( - Box::new($crate::typecheck::UnifType::from($left)), - Box::new($crate::typecheck::UnifType::from($right)) - ) - ) - }; - ( $fst:expr, $snd:expr , $( $types:expr ),+ ) => { - $crate::mk_uty_arrow!($fst, $crate::mk_uty_arrow!($snd, $( $types ),+)) - }; -} - -/// Multi-ary enum row constructor for types implementing `Into`. -/// `mk_uty_enum_row!(id1, .., idn; tail)` correspond to `[| 'id1, .., 'idn; tail |]. With the -/// addition of algebraic data types (enum variants), individual rows can also take an additional -/// type parameter, specified as a tuple: for example, `mk_uty_enum_row!(id1, (id2, ty2); tail)` -/// is `[| 'id1, 'id2 ty2; tail |]`. -#[macro_export] -macro_rules! mk_uty_enum_row { - () => { - $crate::typecheck::UnifEnumRows::Concrete { - erows: $crate::typ::EnumRowsF::Empty, - var_levels_data: $crate::typecheck::VarLevelsData::new_no_uvars(), - } - }; - (; $tail:expr) => { - $crate::typecheck::UnifEnumRows::from($tail) - }; - ( ($id:expr, $ty:expr) $(, $rest:tt )* $(; $tail:expr)? ) => { - $crate::typecheck::UnifEnumRows::concrete( - $crate::typ::EnumRowsF::Extend { - row: $crate::typ::EnumRowF { - id: $crate::identifier::LocIdent::from($id), - typ: Some(Box::new($ty.into())), - }, - tail: Box::new($crate::mk_uty_enum_row!($( $rest ),* $(; $tail)?)) - } - ) - }; - ( $id:expr $(, $rest:tt )* $(; $tail:expr)? ) => { - $crate::typecheck::UnifEnumRows::concrete( - $crate::typ::EnumRowsF::Extend { - row: $crate::typ::EnumRowF { - id: $crate::identifier::LocIdent::from($id), - typ: None, - }, - tail: Box::new($crate::mk_uty_enum_row!($( $rest ),* $(; $tail)?)) - } - ) - }; -} - -/// Multi-ary record row constructor for types implementing `Into`. `mk_uty_row!((id1, -/// ty1), .., (idn, tyn); tail)` correspond to `{id1: ty1, .., idn: tyn; tail}`. The tail can be -/// omitted, in which case the empty row is uses as a tail instead. -#[macro_export] -macro_rules! mk_uty_record_row { - () => { - $crate::typecheck::UnifRecordRows::Concrete { - rrows: $crate::typ::RecordRowsF::Empty, - var_levels_data: $crate::typecheck::VarLevelsData::new_no_uvars() - } - }; - (; $tail:expr) => { - $crate::typecheck::UnifRecordRows::from($tail) - }; - (($id:expr, $ty:expr) $(,($ids:expr, $tys:expr))* $(; $tail:expr)?) => { - $crate::typecheck::UnifRecordRows::concrete( - $crate::typ::RecordRowsF::Extend { - row: $crate::typ::RecordRowF { - id: $crate::identifier::LocIdent::from($id), - typ: Box::new($ty.into()), - }, - tail: Box::new($crate::mk_uty_record_row!($(($ids, $tys)),* $(; $tail)?)), - } - ) - }; -} - -/// Wrapper around `mk_uty_enum_row!` to build an enum type from an enum row. -#[macro_export] -macro_rules! mk_uty_enum { - ($( $args:tt )*) => { - $crate::typecheck::UnifType::concrete( - $crate::typ::TypeF::Enum( - $crate::mk_uty_enum_row!($( $args )*) - ) - ) - }; -} - -/// Wrapper around `mk_uty_record!` to build a record type from a record row. -#[macro_export] -macro_rules! mk_uty_record { - ($(($ids:expr, $tys:expr)),* $(; $tail:expr)?) => { - $crate::typecheck::UnifType::concrete( - $crate::typ::TypeF::Record( - $crate::mk_uty_record_row!($(($ids, $tys)),* $(; $tail)?) - ) - ) - }; -} - -/// Generate an helper function to build a 0-ary type. -macro_rules! generate_builder { - ($fun:ident, $var:ident) => { - pub fn $fun() -> UnifType { - UnifType::Concrete { - typ: TypeF::$var, - var_levels_data: VarLevelsData::new_no_uvars(), - } - } - }; -} - -pub fn dict(ty: T) -> UnifType -where - T: Into, -{ - UnifType::concrete(TypeF::Dict { - type_fields: Box::new(ty.into()), - flavour: DictTypeFlavour::Type, - }) -} - -pub fn array(ty: T) -> UnifType -where - T: Into, -{ - UnifType::concrete(TypeF::Array(Box::new(ty.into()))) -} - -pub fn arrow(domain: impl Into, codomain: impl Into) -> UnifType { - UnifType::concrete(TypeF::Arrow( - Box::new(domain.into()), - Box::new(codomain.into()), - )) -} - -// dyn is a reserved keyword -generate_builder!(dynamic, Dyn); -generate_builder!(str, String); -generate_builder!(num, Number); -generate_builder!(bool, Bool); -generate_builder!(sym, Symbol); -generate_builder!(foreign_id, ForeignId); diff --git a/core/src/typecheck/mod.rs b/core/src/typecheck/mod.rs deleted file mode 100644 index df9bd80955..0000000000 --- a/core/src/typecheck/mod.rs +++ /dev/null @@ -1,3122 +0,0 @@ -//! Typechecking and type inference. -//! -//! Nickel uses a mix of a bidirectional typechecking algorithm, together with standard -//! unification-based type inference. Nickel is gradually typed, and dynamic typing is the default. -//! Static typechecking is triggered by a type annotation. -//! -//! # Modes -//! -//! The typechecking algorithm runs in two separate modes, corresponding to static and dynamic -//! typing: -//! -//! - **enforce** corresponds to traditional typechecking in a statically typed language. This -//! happens inside a statically typed block. Such blocks are introduced by the type ascription -//! operator `:`, as in `1 + 1 : Number` or `let f : Number -> Number = fun x => x + 1 in ..`. -//! Enforce mode is implemented by [`type_check`] and variants. -//! - **walk** doesn't enforce any typing but traverses the AST looking for typed blocks to -//! typecheck. Walk mode also stores the annotations of bound identifiers in the environment. This -//! is implemented by the `walk` function. -//! -//! The algorithm usually starts in walk mode, although this can be configured. A typed block -//! (an expression annotated with a type) switches to enforce mode, and is switched back to walk -//! mode when entering an expression annotated with a contract. Type and contract annotations thus -//! serve as a switch for the typechecking mode. -//! -//! Note that the static typing part (enforce mode) is based on the bidirectional typing framework, -//! which defines two different modes. Thus, the enforce mode is itself divided again into -//! **checking** mode and **inference** mode. -//! -//! # Type inference -//! -//! Type inference is done via a form of bidirectional typechecking coupled with unification, in the -//! same spirit as GHC (Haskell), although the type system of Nickel is simpler. The type of -//! un-annotated let-bound expressions (the type of `bound_exp` in `let x = bound_exp in body`) is -//! inferred in enforce mode, but it is never implicitly generalized. For example, the following -//! program is rejected: -//! -//! ```nickel -//! # Rejected -//! (let id = fun x => x in std.seq (id "a") (id 5)) : Number -//! ``` -//! -//! Indeed, `id` is given the type `_a -> _a`, where `_a` is a unification variable, but is not -//! generalized to `forall a. a -> a`. At the first call site, `_a` is unified with `String`, and at -//! the second call site the typechecker complains that `5` is not of type `String`. -//! -//! This restriction is on purpose, as generalization is not trivial to implement efficiently and -//! more importantly can interact with other components of the type system and type inference. If -//! polymorphism is required, the user can simply add annotation: -//! -//! ```nickel -//! # Accepted -//! (let id : forall a. a -> a = fun x => x in std.seq (id "a") (id 5)) : Num -//! ``` -//! -//! In walk mode, the type of let-bound expressions is inferred in a shallow way (see -//! [`apparent_type`]). -use crate::{ - cache::ImportResolver, - environment::Environment as GenericEnvironment, - error::TypecheckError, - identifier::{Ident, LocIdent}, - mk_uty_arrow, mk_uty_enum, mk_uty_record, mk_uty_record_row, stdlib as nickel_stdlib, - term::{ - pattern::bindings::Bindings as _, record::Field, LabeledType, MatchBranch, RichTerm, - StrChunk, Term, TypeAnnotation, - }, - traverse::{Traverse, TraverseOrder}, - typ::*, -}; - -use std::{ - cmp::max, - collections::{HashMap, HashSet}, - convert::{Infallible, TryInto}, - num::NonZeroU16, -}; - -pub mod error; -pub mod operation; -mod pattern; -pub mod reporting; -#[macro_use] -pub mod mk_uniftype; -pub mod eq; -pub mod subtyping; -pub mod unif; - -use eq::{SimpleTermEnvironment, TermEnvironment}; -use error::*; -use indexmap::IndexMap; -use operation::{get_bop_type, get_nop_type, get_uop_type}; -use pattern::{PatternTypeData, PatternTypes}; -use unif::*; - -use self::subtyping::SubsumedBy; - -/// The max depth parameter used to limit the work performed when inferring the type of the stdlib. -const INFER_RECORD_MAX_DEPTH: u8 = 4; - -/// The typechecker has two modes, one for statically typed code and one for dynamically type code. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum TypecheckMode { - /// In `Walk` mode, the typechecker traverses the AST looking for typed blocks. - Walk, - /// In `Enforce` mode, the typechecker checks types. - Enforce, -} - -/// The typing environment. -pub type Environment = GenericEnvironment; - -/// Mapping from wildcard ID to inferred type -pub type Wildcards = Vec; - -/// A table mapping variable IDs with their kind to names. -pub type NameTable = HashMap<(VarId, VarKindDiscriminant), Ident>; - -/// A unifiable record row. -pub type GenericUnifRecordRow = RecordRowF>>; -pub type GenericUnifRecordRowsUnrolling = - RecordRowsF>, Box>>; - -/// Unifiable record rows. Same shape as [`crate::typ::RecordRows`], but where each type is -/// unifiable, and each tail may be a unification variable (or a constant). -#[derive(Clone, PartialEq, Debug)] -pub enum GenericUnifRecordRows { - Concrete { - rrows: GenericUnifRecordRowsUnrolling, - /// Additional metadata related to unification variable levels update. See [VarLevelsData]. - var_levels_data: VarLevelsData, - }, - Constant(VarId), - /// A unification variable. - UnifVar { - /// The unique identifier of this variable in the unification table. - id: VarId, - /// The initial variable level at which the variable was created. See - /// [GenericUnifType::UnifVar]. - init_level: VarLevel, - }, -} - -pub type GenericUnifEnumRow = EnumRowF>>; -pub type GenericUnifEnumRowsUnrolling = - EnumRowsF>, Box>>; - -/// Unifiable enum rows. Same shape as [`crate::typ::EnumRows`] but where each tail may be a -/// unification variable (or a constant). -/// -/// Enum rows don't store any type (they are just a sequence of identifiers), so there is no -/// `GenericUnifEnumRows` taking an additional `E` parameter. -#[derive(Clone, PartialEq, Debug)] -pub enum GenericUnifEnumRows { - Concrete { - erows: GenericUnifEnumRowsUnrolling, - /// Additional metadata related to unification variable levels update. See [VarLevelsData]. - var_levels_data: VarLevelsData, - }, - Constant(VarId), - UnifVar { - /// The unique identifier of this variable in the unification table. - id: VarId, - /// The initial variable level at which the variable was created. See - /// [GenericUnifType::UnifVar]. - init_level: VarLevel, - }, -} - -/// Metadata attached to unification types, which are used to delay and optimize potentially costly -/// type traversals when updating the levels of the free unification variables of a type. Based on -/// Didier Remy's algorithm for the OCaml typechecker, see [Efficient and insightful -/// generalization](http://web.archive.org/web/20230525023637/https://okmij.org/ftp/ML/generalization.html). -/// -/// When unifying a variable with a composite type, we have to update the levels of all the free -/// unification variables contained in that type, which naively incurs a full traversal of the type. -/// The idea behind Didier Remy's algorithm is to delay such traversals, and use the values of -/// [VarLevelsData] to group traversals and avoid unneeded ones. This make variable unification run -/// in constant time again, as long as we don't unify with a rigid type variable. -/// -/// Variable levels data might correspond to different variable kinds (type, record rows and enum -/// rows) depending on where they appear (in a [UnifType], [UnifRecordRows] or [UnifEnumRows]) -#[derive(Clone, Copy, Eq, PartialEq, Debug)] -pub struct VarLevelsData { - /// Upper bound on the variable levels of free unification variables contained in this type. - upper_bound: VarLevel, - /// Pending variable level update, which must satisfy `pending_level <= old_level`. - pending: Option, -} - -impl Default for VarLevelsData { - fn default() -> Self { - VarLevelsData::new_from_bound(VarLevel::MAX_LEVEL) - } -} - -impl VarLevelsData { - pub fn new() -> Self { - Self::default() - } - - /// Create new variable levels data with the given upper bound and no pending level update. - pub fn new_from_bound(upper_bound: VarLevel) -> Self { - VarLevelsData { - upper_bound, - pending: None, - } - } - - /// Create new variable levels data with an upper bound which indicates that there is no - /// unification variable in the attached type and no pending level update. - pub fn new_no_uvars() -> Self { - Self::new_from_bound(VarLevel::NO_VAR) - } -} - -/// Unification types and variants that store an upper bound on the level of the unification -/// variables they contain, or for which an upper bound can be computed quickly (in constant time). -trait VarLevelUpperBound { - // Return an upper bound on the level of the unification variables contained in `self`. - // Depending on the implementer, the level might refer to different kind of unification - // variables (type, record rows or enum rows). - fn var_level_upper_bound(&self) -> VarLevel; -} - -impl VarLevelUpperBound for GenericUnifType { - fn var_level_upper_bound(&self) -> VarLevel { - match self { - GenericUnifType::Concrete { - var_levels_data, .. - } => var_levels_data.upper_bound, - GenericUnifType::UnifVar { init_level, .. } => *init_level, - GenericUnifType::Constant(_) => VarLevel::NO_VAR, - } - } -} - -impl VarLevelUpperBound for GenericUnifTypeUnrolling { - fn var_level_upper_bound(&self) -> VarLevel { - match self { - TypeF::Dyn - | TypeF::Bool - | TypeF::Number - | TypeF::String - | TypeF::ForeignId - | TypeF::Symbol => VarLevel::NO_VAR, - TypeF::Arrow(domain, codomain) => max( - domain.var_level_upper_bound(), - codomain.var_level_upper_bound(), - ), - TypeF::Forall { body, .. } => body.var_level_upper_bound(), - TypeF::Enum(erows) => erows.var_level_upper_bound(), - TypeF::Record(rrows) => rrows.var_level_upper_bound(), - TypeF::Dict { type_fields, .. } => type_fields.var_level_upper_bound(), - TypeF::Array(ty_elts) => ty_elts.var_level_upper_bound(), - TypeF::Wildcard(_) | TypeF::Var(_) | TypeF::Contract(_) => VarLevel::NO_VAR, - } - } -} - -impl VarLevelUpperBound for GenericUnifEnumRows { - fn var_level_upper_bound(&self) -> VarLevel { - match self { - GenericUnifEnumRows::Concrete { - var_levels_data, .. - } => var_levels_data.upper_bound, - GenericUnifEnumRows::UnifVar { init_level, .. } => *init_level, - GenericUnifEnumRows::Constant(_) => VarLevel::NO_VAR, - } - } -} - -impl VarLevelUpperBound for GenericUnifEnumRowsUnrolling { - fn var_level_upper_bound(&self) -> VarLevel { - match self { - // A var that hasn't be instantiated yet isn't a unification variable - EnumRowsF::Empty | EnumRowsF::TailVar(_) => VarLevel::NO_VAR, - EnumRowsF::Extend { row: _, tail } => tail.var_level_upper_bound(), - } - } -} - -impl VarLevelUpperBound for GenericUnifRecordRows { - fn var_level_upper_bound(&self) -> VarLevel { - match self { - GenericUnifRecordRows::Concrete { - var_levels_data, .. - } => var_levels_data.upper_bound, - GenericUnifRecordRows::UnifVar { init_level, .. } => *init_level, - GenericUnifRecordRows::Constant(_) => VarLevel::NO_VAR, - } - } -} - -impl VarLevelUpperBound for GenericUnifRecordRowsUnrolling { - fn var_level_upper_bound(&self) -> VarLevel { - match self { - // A var that hasn't be instantiated yet isn't a unification variable - RecordRowsF::Empty | RecordRowsF::TailVar(_) | RecordRowsF::TailDyn => VarLevel::NO_VAR, - RecordRowsF::Extend { - row: RecordRowF { id: _, typ }, - tail, - } => max(tail.var_level_upper_bound(), typ.var_level_upper_bound()), - } - } -} - -/// The types on which the unification algorithm operates, which may be either a concrete type, a -/// type constant or a unification variable. -/// -/// Contracts store an additional term environment for contract equality checking, which is -/// represented by `E`. The typechecker always uses the same type for `E`. However, the evaluation -/// phase may also resort to checking contract equality, using a different environment -/// representation, hence the parametrization. -/// -/// # Invariants -/// -/// **Important**: the following invariant must always be satisfied: for any free unification -/// variable[^free-unif-var] part of a concrete unification type, the level of this variable must -/// be smaller or equal to `var_levels_data.upper_bound`. Otherwise, the typechecking algorithm -/// might not be correct. Be careful when creating new concrete [GenericUnifType] or [UnifType] -/// values manually. All `from` and `try_from` implementations, the `concrete` method as well as -/// builders from the [mk_uniftype] module all correctly compute the upper bound (given that the -/// upper bounds of the subcomponents are correct). -/// -/// The default value for `var_levels_data`, although it can incur more work, is at least always -/// correct (by setting `upper_bound = VarLevel::MAX`). -/// -/// [^free-unif-var]: A free unification variable is a unification variable that isn't assigned to -/// any type yet, i.e. verifying `uty.root_type(..) == uty` (adapt with the corresponding -/// `root_xxx` method for rows). -#[derive(Clone, PartialEq, Debug)] -pub enum GenericUnifType { - /// A concrete type (like `Number` or `String -> String`). - Concrete { - typ: GenericUnifTypeUnrolling, - /// Additional metadata related to unification variable levels update. See [VarLevelsData]. - var_levels_data: VarLevelsData, - }, - /// A rigid type constant which cannot be unified with anything but itself. - Constant(VarId), - /// A unification variable. - UnifVar { - /// The unique identifier of this variable in the unification table. - id: VarId, - /// An upper bound of this variable level, which usually correspond to the initial level at - /// which the variable was allocated, although this value might be bumped for some - /// variables by level updates. - /// - /// In a model where unification variables directly store a mutable level attribute, we - /// wouldn't need to duplicate this level information both here at the variable level and - /// in the unification table. `init_level` is used to compute upper bounds without having - /// to thread the unification table around (in the `from`/`try_from` implementation for - /// unification types, typically). - /// - /// Note that the actual level of this variable is stored in the unification table, which - /// is the source of truth. The actual level must satisfy `current_level <= init_level` - /// (the level of a variable can only decrease with time). - init_level: VarLevel, - }, -} - -type GenericUnifTypeUnrolling = - TypeF>, GenericUnifRecordRows, GenericUnifEnumRows, (RichTerm, E)>; - -impl GenericUnifType { - /// Create a concrete generic unification type. Compute the variable levels data from the - /// subcomponents. - pub fn concrete(typ: GenericUnifTypeUnrolling) -> Self { - let upper_bound = typ.var_level_upper_bound(); - - GenericUnifType::Concrete { - typ, - var_levels_data: VarLevelsData::new_from_bound(upper_bound), - } - } -} - -impl GenericUnifRecordRows { - /// Create concrete generic record rows. Compute the variable levels data from the - /// subcomponents. - pub fn concrete(typ: GenericUnifRecordRowsUnrolling) -> Self { - let upper_bound = typ.var_level_upper_bound(); - - GenericUnifRecordRows::Concrete { - rrows: typ, - var_levels_data: VarLevelsData::new_from_bound(upper_bound), - } - } -} - -impl GenericUnifEnumRows { - /// Create concrete generic enum rows. Compute the variable levels data from the subcomponents. - pub fn concrete(typ: GenericUnifEnumRowsUnrolling) -> Self { - let upper_bound = typ.var_level_upper_bound(); - - GenericUnifEnumRows::Concrete { - erows: typ, - var_levels_data: VarLevelsData::new_from_bound(upper_bound), - } - } -} - -impl std::convert::TryInto for GenericUnifRecordRows { - type Error = (); - - fn try_into(self) -> Result { - match self { - GenericUnifRecordRows::Concrete { rrows, .. } => { - let converted: RecordRowsF, Box> = rrows.try_map( - |uty| Ok(Box::new(GenericUnifType::try_into(*uty)?)), - |urrows| Ok(Box::new(GenericUnifRecordRows::try_into(*urrows)?)), - )?; - Ok(RecordRows(converted)) - } - _ => Err(()), - } - } -} - -impl std::convert::TryInto for GenericUnifEnumRows { - type Error = (); - - fn try_into(self) -> Result { - match self { - GenericUnifEnumRows::Concrete { erows, .. } => { - let converted: EnumRowsF, Box> = erows.try_map( - |uty| Ok(Box::new(GenericUnifType::try_into(*uty)?)), - |uerows| Ok(Box::new(GenericUnifEnumRows::try_into(*uerows)?)), - )?; - Ok(EnumRows(converted)) - } - _ => Err(()), - } - } -} - -impl std::convert::TryInto for GenericUnifType { - type Error = (); - - fn try_into(self) -> Result { - match self { - GenericUnifType::Concrete { typ, .. } => { - let converted: TypeF, RecordRows, EnumRows, RichTerm> = typ.try_map( - |uty_boxed| { - let ty: Type = (*uty_boxed).try_into()?; - Ok(Box::new(ty)) - }, - GenericUnifRecordRows::try_into, - GenericUnifEnumRows::try_into, - |(term, _env)| Ok(term), - )?; - Ok(Type::from(converted)) - } - _ => Err(()), - } - } -} - -impl GenericUnifEnumRows { - pub fn from_enum_rows(erows: EnumRows, env: &E) -> Self { - let f_erow = |ty: Box| Box::new(GenericUnifType::from_type(*ty, env)); - let f_erows = - |erows: Box| Box::new(GenericUnifEnumRows::from_enum_rows(*erows, env)); - - GenericUnifEnumRows::concrete(erows.0.map(f_erow, f_erows)) - } -} - -impl GenericUnifEnumRows { - /// Return an iterator producing immutable references to individual rows. - pub(super) fn iter(&self) -> EnumRowsIterator, GenericUnifEnumRows> { - EnumRowsIterator { - erows: Some(self), - ty: std::marker::PhantomData, - } - } -} - -impl GenericUnifRecordRows { - /// Create `GenericUnifRecordRows` from `RecordRows`. - pub fn from_record_rows(rrows: RecordRows, env: &E) -> Self { - let f_rrow = |ty: Box| Box::new(GenericUnifType::from_type(*ty, env)); - let f_rrows = - |rrows: Box| Box::new(GenericUnifRecordRows::from_record_rows(*rrows, env)); - - GenericUnifRecordRows::concrete(rrows.0.map(f_rrow, f_rrows)) - } -} - -impl GenericUnifRecordRows { - pub(super) fn iter(&self) -> RecordRowsIterator, GenericUnifRecordRows> { - RecordRowsIterator { - rrows: Some(self), - ty: std::marker::PhantomData, - } - } -} - -/// A type which contains variables that can be substituted with values of type `T`. -trait Subst: Sized { - /// Substitute all variables of identifier `id` with `to`. - fn subst(self, id: &LocIdent, to: &T) -> Self { - self.subst_levels(id, to).0 - } - - /// Must be filled by implementers of this trait. - /// In addition to performing substitution, this method threads variable levels upper bounds to - /// compute new upper bounds efficiently. - fn subst_levels(self, id: &LocIdent, to: &T) -> (Self, VarLevel); -} - -impl Subst> for GenericUnifType { - fn subst_levels(self, id: &LocIdent, to: &GenericUnifType) -> (Self, VarLevel) { - match self { - GenericUnifType::Concrete { - typ: TypeF::Var(var_id), - var_levels_data, - } if var_id == id.ident() => { - // A free type variable isn't (yet) a unification variable, so it shouldn't have a - // level set at this point. During instantiation, it might be substituted for a - // unification variable by this very function, and will then inherit this level. - debug_assert!(var_levels_data.upper_bound == VarLevel::NO_VAR); - - (to.clone(), to.var_level_upper_bound()) - } - GenericUnifType::Concrete { - typ, - var_levels_data, - } => { - let mut upper_bound = VarLevel::NO_VAR; - - let new_ty = GenericUnifType::Concrete { - typ: typ.map_state( - |ty, upper_bound| { - let (new_type, new_ub) = ty.subst_levels(id, to); - *upper_bound = max(*upper_bound, new_ub); - Box::new(new_type) - }, - |rrows, upper_bound| { - let (new_rrows, new_ub) = rrows.subst_levels(id, to); - *upper_bound = max(*upper_bound, new_ub); - new_rrows - }, - |erows, upper_bound| { - let (new_erows, new_ub) = erows.subst_levels(id, to); - *upper_bound = max(*upper_bound, new_ub); - new_erows - }, - // Substitution doesn't cross the contract boundaries - |ctr, _upper_bound| ctr, - &mut upper_bound, - ), - var_levels_data: VarLevelsData { - upper_bound, - ..var_levels_data - }, - }; - - (new_ty, upper_bound) - } - _ => { - let upper_bound = self.var_level_upper_bound(); - (self, upper_bound) - } - } - } -} - -impl Subst> for GenericUnifRecordRows { - fn subst_levels(self, id: &LocIdent, to: &GenericUnifType) -> (Self, VarLevel) { - match self { - GenericUnifRecordRows::Concrete { - rrows, - var_levels_data, - } => { - let mut upper_bound = VarLevel::NO_VAR; - - let new_rrows = rrows.map_state( - |ty, upper_bound| { - let (new_ty, new_ub) = ty.subst_levels(id, to); - *upper_bound = max(*upper_bound, new_ub); - Box::new(new_ty) - }, - |rrows, upper_bound| { - let (new_rrows, new_ub) = rrows.subst_levels(id, to); - *upper_bound = max(*upper_bound, new_ub); - Box::new(new_rrows) - }, - &mut upper_bound, - ); - - let new_urrows = GenericUnifRecordRows::Concrete { - rrows: new_rrows, - var_levels_data: VarLevelsData { - upper_bound, - ..var_levels_data - }, - }; - - (new_urrows, upper_bound) - } - _ => { - let upper_bound = self.var_level_upper_bound(); - (self, upper_bound) - } - } - } -} - -impl Subst> for GenericUnifEnumRows { - fn subst_levels(self, id: &LocIdent, to: &GenericUnifType) -> (Self, VarLevel) { - match self { - GenericUnifEnumRows::Concrete { - erows, - var_levels_data, - } => { - let mut upper_bound = VarLevel::NO_VAR; - - let new_erows = erows.map_state( - |ty, upper_bound| { - let (new_ty, new_ub) = ty.subst_levels(id, to); - *upper_bound = max(*upper_bound, new_ub); - Box::new(new_ty) - }, - |erows, upper_bound| { - let (new_erows, new_ub) = erows.subst_levels(id, to); - *upper_bound = max(*upper_bound, new_ub); - Box::new(new_erows) - }, - &mut upper_bound, - ); - - let new_uerows = GenericUnifEnumRows::Concrete { - erows: new_erows, - var_levels_data: VarLevelsData { - upper_bound, - ..var_levels_data - }, - }; - - (new_uerows, upper_bound) - } - _ => { - let upper_bound = self.var_level_upper_bound(); - (self, upper_bound) - } - } - } -} - -impl Subst> for GenericUnifType { - fn subst_levels(self, id: &LocIdent, to: &GenericUnifRecordRows) -> (Self, VarLevel) { - match self { - GenericUnifType::Concrete { - typ, - var_levels_data, - } => { - let mut upper_bound = VarLevel::NO_VAR; - - let new_ty = typ.map_state( - |ty, upper_bound| { - let (new_ty, new_ub) = ty.subst_levels(id, to); - *upper_bound = max(*upper_bound, new_ub); - Box::new(new_ty) - }, - |rrows, upper_bound| { - let (new_rrows, new_ub) = rrows.subst_levels(id, to); - *upper_bound = max(*upper_bound, new_ub); - new_rrows - }, - |erows, upper_bound| { - let (new_erows, new_ub) = erows.subst_levels(id, to); - *upper_bound = max(*upper_bound, new_ub); - new_erows - }, - |ctr, _upper_bound| ctr, - &mut upper_bound, - ); - - let new_uty = GenericUnifType::Concrete { - typ: new_ty, - var_levels_data, - }; - - (new_uty, upper_bound) - } - _ => { - let upper_bound = self.var_level_upper_bound(); - (self, upper_bound) - } - } - } -} - -impl Subst> for GenericUnifRecordRows { - fn subst_levels(self, id: &LocIdent, to: &GenericUnifRecordRows) -> (Self, VarLevel) { - match self { - GenericUnifRecordRows::Concrete { - rrows: RecordRowsF::TailVar(var_id), - var_levels_data, - } if var_id == *id => { - debug_assert!(var_levels_data.upper_bound == VarLevel::NO_VAR); - (to.clone(), to.var_level_upper_bound()) - } - GenericUnifRecordRows::Concrete { - rrows, - var_levels_data, - } => { - let mut upper_bound = VarLevel::NO_VAR; - - let new_rrows = rrows.map_state( - |ty, upper_bound| { - let (new_ty, new_ub) = ty.subst_levels(id, to); - *upper_bound = max(*upper_bound, new_ub); - Box::new(new_ty) - }, - |rrows, upper_bound| { - let (new_rrows, new_ub) = rrows.subst_levels(id, to); - *upper_bound = max(*upper_bound, new_ub); - Box::new(new_rrows) - }, - &mut upper_bound, - ); - - let new_urrows = GenericUnifRecordRows::Concrete { - rrows: new_rrows, - var_levels_data: VarLevelsData { - upper_bound, - ..var_levels_data - }, - }; - - (new_urrows, upper_bound) - } - _ => { - let upper_bound = self.var_level_upper_bound(); - (self, upper_bound) - } - } - } -} - -impl Subst> for GenericUnifEnumRows { - fn subst_levels(self, id: &LocIdent, to: &GenericUnifRecordRows) -> (Self, VarLevel) { - match self { - GenericUnifEnumRows::Concrete { - erows, - var_levels_data, - } => { - let mut upper_bound = VarLevel::NO_VAR; - - let new_erows = erows.map_state( - |ty, upper_bound| { - let (new_ty, new_ub) = ty.subst_levels(id, to); - *upper_bound = max(*upper_bound, new_ub); - Box::new(new_ty) - }, - |erows, upper_bound| { - let (new_erows, new_ub) = erows.subst_levels(id, to); - *upper_bound = max(*upper_bound, new_ub); - Box::new(new_erows) - }, - &mut upper_bound, - ); - - let new_uerows = GenericUnifEnumRows::Concrete { - erows: new_erows, - var_levels_data, - }; - - (new_uerows, upper_bound) - } - _ => { - let upper_bound = self.var_level_upper_bound(); - (self, upper_bound) - } - } - } -} - -impl Subst> for GenericUnifType { - fn subst_levels(self, id: &LocIdent, to: &GenericUnifEnumRows) -> (Self, VarLevel) { - match self { - GenericUnifType::Concrete { - typ, - var_levels_data, - } => { - let mut upper_bound = VarLevel::NO_VAR; - - let new_ty = typ.map_state( - |ty, upper_bound| { - let (new_ty, new_ub) = ty.subst_levels(id, to); - *upper_bound = max(*upper_bound, new_ub); - Box::new(new_ty) - }, - |rrows, upper_bound| { - let (new_rrows, new_ub) = rrows.subst_levels(id, to); - *upper_bound = max(*upper_bound, new_ub); - new_rrows - }, - |erows, upper_bound| { - let (new_erows, new_ub) = erows.subst_levels(id, to); - *upper_bound = max(*upper_bound, new_ub); - new_erows - }, - |ctr, _upper_bound| ctr, - &mut upper_bound, - ); - - let new_uty = GenericUnifType::Concrete { - typ: new_ty, - var_levels_data: VarLevelsData { - upper_bound, - ..var_levels_data - }, - }; - - (new_uty, upper_bound) - } - _ => { - let upper_bound = self.var_level_upper_bound(); - (self, upper_bound) - } - } - } -} - -impl Subst> for GenericUnifRecordRows { - fn subst_levels(self, id: &LocIdent, to: &GenericUnifEnumRows) -> (Self, VarLevel) { - match self { - GenericUnifRecordRows::Concrete { - rrows, - var_levels_data, - } => { - let mut upper_bound = VarLevel::NO_VAR; - - let new_rrows = rrows.map_state( - |ty, upper_bound| { - let (new_ty, new_ub) = ty.subst_levels(id, to); - *upper_bound = max(*upper_bound, new_ub); - Box::new(new_ty) - }, - |rrows, upper_bound| { - let (new_rrows, new_ub) = rrows.subst_levels(id, to); - *upper_bound = max(*upper_bound, new_ub); - Box::new(new_rrows) - }, - &mut upper_bound, - ); - - let new_urrows = GenericUnifRecordRows::Concrete { - rrows: new_rrows, - var_levels_data: VarLevelsData { - upper_bound, - ..var_levels_data - }, - }; - - (new_urrows, upper_bound) - } - - _ => { - let upper_bound = self.var_level_upper_bound(); - (self, upper_bound) - } - } - } -} - -impl Subst> for GenericUnifEnumRows { - fn subst_levels(self, id: &LocIdent, to: &GenericUnifEnumRows) -> (Self, VarLevel) { - match self { - GenericUnifEnumRows::Concrete { - erows: EnumRowsF::TailVar(var_id), - var_levels_data, - } if var_id == *id => { - debug_assert!(var_levels_data.upper_bound == VarLevel::NO_VAR); - - (to.clone(), to.var_level_upper_bound()) - } - GenericUnifEnumRows::Concrete { - erows, - var_levels_data, - } => { - let mut upper_bound = VarLevel::NO_VAR; - - let new_erows = erows.map_state( - |ty, upper_bound| { - let (new_ty, new_ub) = ty.subst_levels(id, to); - *upper_bound = max(*upper_bound, new_ub); - Box::new(new_ty) - }, - |erows, upper_bound| { - let (new_erows, new_ub) = erows.subst_levels(id, to); - *upper_bound = max(*upper_bound, new_ub); - Box::new(new_erows) - }, - &mut upper_bound, - ); - - let new_uerows = GenericUnifEnumRows::Concrete { - erows: new_erows, - var_levels_data: VarLevelsData { - upper_bound, - ..var_levels_data - }, - }; - - (new_uerows, upper_bound) - } - - _ => { - let upper_bound = self.var_level_upper_bound(); - (self, upper_bound) - } - } - } -} - -impl GenericUnifType { - /// Create a [`GenericUnifType`] from a [`Type`]. - pub fn from_type(ty: Type, env: &E) -> Self { - GenericUnifType::concrete(ty.typ.map( - |ty_| Box::new(GenericUnifType::from_type(*ty_, env)), - |rrows| GenericUnifRecordRows::from_record_rows(rrows, env), - |erows| GenericUnifEnumRows::from_enum_rows(erows, env), - |term| (term, env.clone()), - )) - } -} - -type UnifTypeUnrolling = GenericUnifTypeUnrolling; -type UnifRecordRowsUnrolling = GenericUnifRecordRowsUnrolling; -type UnifEnumRowsUnrolling = GenericUnifEnumRowsUnrolling; - -pub type UnifType = GenericUnifType; - -pub type UnifRecordRow = GenericUnifRecordRow; -pub type UnifRecordRows = GenericUnifRecordRows; - -pub type UnifEnumRow = GenericUnifEnumRow; -pub type UnifEnumRows = GenericUnifEnumRows; - -impl UnifRecordRows { - /// Extract the concrete [`RecordRows`] corresponding to a [`UnifRecordRows`]. Free unification - /// variables as well as type constants are replaced with the empty row. - fn into_rrows(self, table: &UnifTable) -> RecordRows { - match self { - UnifRecordRows::UnifVar { id, init_level } => match table.root_rrows(id, init_level) { - t @ UnifRecordRows::Concrete { .. } => t.into_rrows(table), - _ => RecordRows(RecordRowsF::Empty), - }, - UnifRecordRows::Constant(_) => RecordRows(RecordRowsF::Empty), - UnifRecordRows::Concrete { rrows, .. } => { - let mapped = rrows.map( - |ty| Box::new(ty.into_type(table)), - |rrows| Box::new(rrows.into_rrows(table)), - ); - RecordRows(mapped) - } - } - } - - /// Return the unification root associated with these record rows. If the rows are a unification - /// variable, return the result of `table.root_rrows`. Return `self` otherwise. - fn into_root(self, table: &UnifTable) -> Self { - match self { - UnifRecordRows::UnifVar { id, init_level } => table.root_rrows(id, init_level), - urrows => urrows, - } - } -} - -impl UnifEnumRows { - /// Extract the concrete [`EnumRows`] corresponding to a [`UnifEnumRows`]. Free unification - /// variables as well as type constants are replaced with the empty row. - fn into_erows(self, table: &UnifTable) -> EnumRows { - match self { - UnifEnumRows::UnifVar { id, init_level } => match table.root_erows(id, init_level) { - t @ UnifEnumRows::Concrete { .. } => t.into_erows(table), - _ => EnumRows(EnumRowsF::Empty), - }, - UnifEnumRows::Constant(_) => EnumRows(EnumRowsF::Empty), - UnifEnumRows::Concrete { erows, .. } => { - let mapped = erows.map( - |ty| Box::new(ty.into_type(table)), - |erows| Box::new(erows.into_erows(table)), - ); - EnumRows(mapped) - } - } - } - - /// Return the unification root associated with these enum rows. If the rows are a unification - /// variable, return the result of `table.root_erows`. Return `self` otherwise. - fn into_root(self, table: &UnifTable) -> Self { - match self { - UnifEnumRows::UnifVar { id, init_level } => table.root_erows(id, init_level), - uerows => uerows, - } - } -} - -impl UnifType { - /// Create a [`UnifType`] from an [`ApparentType`]. As for [`GenericUnifType::from_type`], this - /// function requires the current term environment. - pub fn from_apparent_type(at: ApparentType, env: &SimpleTermEnvironment) -> Self { - match at { - ApparentType::Annotated(ty) if has_wildcards(&ty) => { - GenericUnifType::concrete(TypeF::Dyn) - } - ApparentType::Annotated(ty) - | ApparentType::Inferred(ty) - | ApparentType::Approximated(ty) => GenericUnifType::from_type(ty, env), - ApparentType::FromEnv(uty) => uty, - } - } - - pub fn from_constant_of_kind(c: usize, k: VarKindDiscriminant) -> Self { - match k { - VarKindDiscriminant::Type => UnifType::Constant(c), - VarKindDiscriminant::EnumRows => UnifType::Concrete { - typ: TypeF::Enum(UnifEnumRows::Constant(c)), - var_levels_data: VarLevelsData::new_no_uvars(), - }, - VarKindDiscriminant::RecordRows => UnifType::Concrete { - typ: TypeF::Record(UnifRecordRows::Constant(c)), - var_levels_data: VarLevelsData::new_no_uvars(), - }, - } - } - - /// Extract the concrete type corresponding to a unifiable type. Free unification variables as - /// well as type constants are replaced with the type `Dyn`. - fn into_type(self, table: &UnifTable) -> Type { - match self { - UnifType::UnifVar { id, init_level } => match table.root_type(id, init_level) { - t @ UnifType::Concrete { .. } => t.into_type(table), - _ => Type::from(TypeF::Dyn), - }, - UnifType::Constant(_) => Type::from(TypeF::Dyn), - UnifType::Concrete { typ, .. } => { - let mapped = typ.map( - |btyp| Box::new(btyp.into_type(table)), - |urrows| urrows.into_rrows(table), - |uerows| uerows.into_erows(table), - |(term, _env)| term, - ); - Type::from(mapped) - } - } - } - - /// Return the unification root associated with this type. If the type is a unification - /// variable, return the result of `table.root_type`. Return `self` otherwise. - fn into_root(self, table: &UnifTable) -> Self { - match self { - UnifType::UnifVar { id, init_level } => table.root_type(id, init_level), - uty => uty, - } - } -} - -impl From for UnifType { - fn from(typ: UnifTypeUnrolling) -> Self { - let var_level_max = typ.var_level_upper_bound(); - - UnifType::Concrete { - typ, - var_levels_data: VarLevelsData::new_from_bound(var_level_max), - } - } -} - -impl From, Box>> for UnifRecordRows { - fn from(rrows: RecordRowsF, Box>) -> Self { - let var_level_max = rrows.var_level_upper_bound(); - - UnifRecordRows::Concrete { - rrows, - var_levels_data: VarLevelsData::new_from_bound(var_level_max), - } - } -} - -impl From, Box>> for UnifEnumRows { - fn from(erows: EnumRowsF, Box>) -> Self { - UnifEnumRows::concrete(erows) - } -} - -/// Iterator items produced by [RecordRowsIterator] on [GenericUnifRecordRows]. -pub enum GenericUnifRecordRowsIteratorItem<'a, E: TermEnvironment> { - TailDyn, - TailVar(&'a LocIdent), - TailUnifVar { id: VarId, init_level: VarLevel }, - TailConstant(VarId), - Row(RecordRowF<&'a GenericUnifType>), -} - -pub type UnifRecordRowsIteratorItem<'a> = - GenericUnifRecordRowsIteratorItem<'a, SimpleTermEnvironment>; - -impl<'a, E: TermEnvironment> Iterator - for RecordRowsIterator<'a, GenericUnifType, GenericUnifRecordRows> -{ - type Item = GenericUnifRecordRowsIteratorItem<'a, E>; - - fn next(&mut self) -> Option { - self.rrows.and_then(|next| match next { - GenericUnifRecordRows::Concrete { rrows, .. } => match rrows { - RecordRowsF::Empty => { - self.rrows = None; - None - } - RecordRowsF::TailDyn => { - self.rrows = None; - Some(GenericUnifRecordRowsIteratorItem::TailDyn) - } - RecordRowsF::TailVar(id) => { - self.rrows = None; - Some(GenericUnifRecordRowsIteratorItem::TailVar(id)) - } - RecordRowsF::Extend { row, tail } => { - self.rrows = Some(tail); - Some(GenericUnifRecordRowsIteratorItem::Row(RecordRowF { - id: row.id, - typ: row.typ.as_ref(), - })) - } - }, - GenericUnifRecordRows::UnifVar { id, init_level } => { - self.rrows = None; - Some(GenericUnifRecordRowsIteratorItem::TailUnifVar { - id: *id, - init_level: *init_level, - }) - } - GenericUnifRecordRows::Constant(var_id) => { - self.rrows = None; - Some(GenericUnifRecordRowsIteratorItem::TailConstant(*var_id)) - } - }) - } -} - -/// Iterator items produced by [`EnumRowsIterator`]. -pub enum GenericUnifEnumRowsIteratorItem<'a, E: TermEnvironment> { - TailVar(&'a LocIdent), - TailUnifVar { id: VarId, init_level: VarLevel }, - TailConstant(VarId), - Row(EnumRowF<&'a GenericUnifType>), -} - -impl<'a, E: TermEnvironment> Iterator - for EnumRowsIterator<'a, GenericUnifType, GenericUnifEnumRows> -{ - type Item = GenericUnifEnumRowsIteratorItem<'a, E>; - - fn next(&mut self) -> Option { - self.erows.and_then(|next| match next { - GenericUnifEnumRows::Concrete { erows, .. } => match erows { - EnumRowsF::Empty => { - self.erows = None; - None - } - EnumRowsF::TailVar(id) => { - self.erows = None; - Some(GenericUnifEnumRowsIteratorItem::TailVar(id)) - } - EnumRowsF::Extend { row, tail } => { - self.erows = Some(tail); - Some(GenericUnifEnumRowsIteratorItem::Row(EnumRowF { - id: row.id, - typ: row.typ.as_ref().map(|ty| ty.as_ref()), - })) - } - }, - GenericUnifEnumRows::UnifVar { id, init_level } => { - self.erows = None; - Some(GenericUnifEnumRowsIteratorItem::TailUnifVar { - id: *id, - init_level: *init_level, - }) - } - GenericUnifEnumRows::Constant(var_id) => { - self.erows = None; - Some(GenericUnifEnumRowsIteratorItem::TailConstant(*var_id)) - } - }) - } -} - -pub trait ReifyAsUnifType { - fn unif_type() -> UnifType; -} - -/// The typing context is a structure holding the scoped, environment-like data structures required -/// to perform typechecking. -/// -#[derive(Debug, PartialEq, Clone)] -pub struct Context { - /// The typing environment, counterpart of the eval environment for typechecking - pub type_env: Environment, - /// The term environment, used to decide type equality over contracts. - pub term_env: SimpleTermEnvironment, - /// The current variable level, incremented each time we instantiate a polymorphic type and - /// thus introduce a new block of variables (either unification variables or rigid type - /// variables). - pub var_level: VarLevel, -} - -impl Context { - pub fn new() -> Self { - Context { - type_env: Environment::new(), - term_env: SimpleTermEnvironment::new(), - var_level: VarLevel::MIN_LEVEL, - } - } -} - -impl Default for Context { - fn default() -> Self { - Self::new() - } -} - -#[derive(Clone, Debug)] -pub enum EnvBuildError { - NotARecord(RichTerm), -} - -/// Populate the initial typing environment from a `Vec` of parsed files. -pub fn mk_initial_ctxt( - initial_env: &[(nickel_stdlib::StdlibModule, RichTerm)], -) -> Result { - // Collect the bindings for each module, clone them and flatten the result to a single list. - let mut bindings = Vec::new(); - - for (module, rt) in initial_env { - match (module, rt.as_ref()) { - // The internals module is special: it is required to be syntactically a record, - // and is added directly to the top-level environment. - (nickel_stdlib::StdlibModule::Internals, Term::RecRecord(record, ..)) => { - // We reject fields without a value (that would be a stdlib module without - // defintion) - bindings.extend(record.fields.iter().map(|(id, field)| { - ( - *id, - field - .value - .as_ref() - .unwrap_or_else(|| { - panic!("expected stdlib module {id} to have a definition") - }) - .clone(), - ) - })); - } - (nickel_stdlib::StdlibModule::Internals, _) => { - return Err(EnvBuildError::NotARecord(rt.clone())); - } - // Otherwise, we insert a value in the environment bound to the name of the module - (module, _) => bindings.push((module.name().into(), rt.clone())), - } - } - - let term_env = bindings - .iter() - .cloned() - .map(|(id, rt)| (id.ident(), (rt, SimpleTermEnvironment::new()))) - .collect(); - - let type_env = bindings - .into_iter() - .map(|(id, rt)| { - ( - id.ident(), - infer_record_type(&rt, &term_env, INFER_RECORD_MAX_DEPTH), - ) - }) - .collect(); - - Ok(Context { - type_env, - term_env, - var_level: VarLevel::MIN_LEVEL, - }) -} - -/// Add the bindings of a record to a typing environment. Ignore fields whose name are defined -/// through interpolation. -//TODO: support the case of a record with a type annotation. -pub fn env_add_term( - env: &mut Environment, - rt: &RichTerm, - term_env: &SimpleTermEnvironment, - resolver: &dyn ImportResolver, -) -> Result<(), EnvBuildError> { - let RichTerm { term, pos } = rt; - - match term.as_ref() { - Term::Record(record) | Term::RecRecord(record, ..) => { - for (id, field) in &record.fields { - let uty = UnifType::from_apparent_type( - field_apparent_type(field, Some(env), Some(resolver)), - term_env, - ); - env.insert(id.ident(), uty); - } - - Ok(()) - } - t => Err(EnvBuildError::NotARecord(RichTerm::new(t.clone(), *pos))), - } -} - -/// Bind one term in a typing environment. -pub fn env_add( - env: &mut Environment, - id: LocIdent, - rt: &RichTerm, - term_env: &SimpleTermEnvironment, - resolver: &dyn ImportResolver, -) { - env.insert( - id.ident(), - UnifType::from_apparent_type( - apparent_type(rt.as_ref(), Some(env), Some(resolver)), - term_env, - ), - ); -} - -/// The shared state of unification. -pub struct State<'a> { - /// The import resolver, to retrieve and typecheck imports. - resolver: &'a dyn ImportResolver, - /// The unification table. - table: &'a mut UnifTable, - /// Row constraints. - constr: &'a mut RowConstrs, - /// A mapping from unification variables or constants together with their - /// kind to the name of the corresponding type variable which introduced it, - /// if any. - /// - /// Used for error reporting. - names: &'a mut NameTable, - /// A mapping from wildcard ID to unification variable. - wildcard_vars: &'a mut Vec, -} - -/// Immutable and owned data, required by the LSP to carry out specific analysis. -/// It is basically an owned-subset of the typechecking state. -pub struct TypeTables { - pub table: UnifTable, - pub names: NameTable, - pub wildcards: Vec, -} - -/// Typecheck a term. -/// -/// Return the inferred type in case of success. This is just a wrapper that calls -/// `type_check_with_visitor` with a blanket implementation for the visitor. -/// -/// Note that this function doesn't recursively typecheck imports (anymore), but just the current -/// file. It however still needs the resolver to get the apparent type of imports. -/// -/// Return the type inferred for type wildcards. -pub fn type_check( - t: &RichTerm, - initial_ctxt: Context, - resolver: &impl ImportResolver, - initial_mode: TypecheckMode, -) -> Result { - type_check_with_visitor(t, initial_ctxt, resolver, &mut (), initial_mode) - .map(|tables| tables.wildcards) -} - -/// Typecheck a term while providing the type information to a visitor. -pub fn type_check_with_visitor( - t: &RichTerm, - initial_ctxt: Context, - resolver: &impl ImportResolver, - visitor: &mut V, - initial_mode: TypecheckMode, -) -> Result -where - V: TypecheckVisitor, -{ - let (mut table, mut names) = (UnifTable::new(), HashMap::new()); - let mut wildcard_vars = Vec::new(); - - { - let mut state: State = State { - resolver, - table: &mut table, - constr: &mut RowConstrs::new(), - names: &mut names, - wildcard_vars: &mut wildcard_vars, - }; - - if initial_mode == TypecheckMode::Enforce { - let uty = state.table.fresh_type_uvar(initial_ctxt.var_level); - check(&mut state, initial_ctxt, visitor, t, uty)?; - } else { - walk(&mut state, initial_ctxt, visitor, t)?; - } - } - - let result = wildcard_vars_to_type(wildcard_vars.clone(), &table); - Ok(TypeTables { - table, - names, - wildcards: result, - }) -} - -/// Walk the AST of a term looking for statically typed block to check. Fill the linearization -/// alongside and store the apparent type of variable inside the typing environment. -fn walk( - state: &mut State, - mut ctxt: Context, - visitor: &mut V, - rt: &RichTerm, -) -> Result<(), TypecheckError> { - let RichTerm { term: t, pos } = rt; - visitor.visit_term( - rt, - UnifType::from_apparent_type( - apparent_type(t, Some(&ctxt.type_env), Some(state.resolver)), - &ctxt.term_env, - ), - ); - - match t.as_ref() { - Term::ParseError(_) - | Term::RuntimeError(_) - | Term::Null - | Term::Bool(_) - | Term::Num(_) - | Term::Str(_) - | Term::Lbl(_) - | Term::Enum(_) - | Term::ForeignId(_) - | Term::SealingKey(_) - // This function doesn't recursively typecheck imports: this is the responsibility of the - // caller. - | Term::Import(_) - | Term::ResolvedImport(_) => Ok(()), - Term::Var(x) => ctxt.type_env - .get(&x.ident()) - .ok_or(TypecheckError::UnboundIdentifier { id: *x, pos: *pos }) - .map(|_| ()), - Term::StrChunks(chunks) => { - chunks - .iter() - .try_for_each(|chunk| -> Result<(), TypecheckError> { - match chunk { - StrChunk::Literal(_) => Ok(()), - StrChunk::Expr(t, _) => { - walk(state, ctxt.clone(), visitor, t) - } - } - }) - } - Term::Fun(id, t) => { - // The parameter of an unannotated function is always assigned type `Dyn`, unless the - // function is directly annotated with a function contract (see the special casing in - // `walk_with_annot`). - ctxt.type_env.insert(id.ident(), mk_uniftype::dynamic()); - walk(state, ctxt, visitor, t) - } - Term::FunPattern(pat, t) => { - let PatternTypeData { bindings: pat_bindings, ..} = pat.pattern_types(state, &ctxt, TypecheckMode::Walk)?; - ctxt.type_env.extend(pat_bindings.into_iter().map(|(id, typ)| (id.ident(), typ))); - - walk(state, ctxt, visitor, t) - } - Term::Array(terms, _) => terms - .iter() - .try_for_each(|t| -> Result<(), TypecheckError> { - walk(state, ctxt.clone(), visitor, t) - }), - Term::Let(bindings, rt, attrs) => { - // For a recursive let block, shadow all the names we're about to bind, so - // we aren't influenced by variables defined in an outer scope. - if attrs.rec { - for (x, _re) in bindings { - ctxt.type_env - .insert(x.ident(), state.table.fresh_type_uvar(ctxt.var_level)); - } - } - - let start_ctxt = ctxt.clone(); - for (x, re) in bindings { - let ty_let = binding_type(state, re.as_ref(), &start_ctxt, false); - - // We don't support recursive binding when checking for contract equality. - // - // This would quickly lead to cycles, which are hard to deal with without leaking - // memory. In order to deal with recursive bindings, the best way is probably to - // allocate all the term environments inside an arena, local to each statically typed - // block, and use bare references to represent cycles. Then everything would be cleaned - // at the end of the block. - ctxt.term_env - .0 - .insert(x.ident(), (re.clone(), ctxt.term_env.clone())); - - ctxt.type_env.insert(x.ident(), ty_let.clone()); - visitor.visit_ident(x, ty_let.clone()); - } - - let re_ctxt = if attrs.rec { ctxt.clone() } else { start_ctxt.clone() }; - for (_x, re) in bindings { - walk(state, re_ctxt.clone(), visitor, re)?; - } - - walk(state, ctxt, visitor, rt) - } - Term::LetPattern(bindings, rt, attrs) => { - // For a recursive let block, shadow all the names we're about to bind, so - // we aren't influenced by variables defined in an outer scope. - if attrs.rec { - for (pat, _re) in bindings { - for (_path, id, _fld) in pat.bindings() { - ctxt.type_env - .insert(id.ident(), state.table.fresh_type_uvar(ctxt.var_level)); - } - } - } - - let start_ctxt = ctxt.clone(); - - for (pat, re) in bindings { - let ty_let = binding_type(state, re.as_ref(), &start_ctxt, false); - - // In the case of a let-binding, we want to guess a better type than `Dyn` when we can - // do so cheaply for the whole pattern. - if let Some(alias) = &pat.alias { - visitor.visit_ident(alias, ty_let.clone()); - ctxt.type_env.insert(alias.ident(), ty_let); - } - - // [^separate-alias-treatment]: Note that we call `pattern_types` on the inner pattern - // data, which doesn't take into account the potential heading alias `x @ `. - // This is on purpose, as the alias has been treated separately, so we don't want to - // shadow it with a less precise type. - // - // The use of start_ctxt here looks like it might be wrong for let rec, but in fact - // it's unused in TypecheckMode::Walk anyway. - let PatternTypeData {bindings: pat_bindings, ..} = pat.data.pattern_types(state, &start_ctxt, TypecheckMode::Walk)?; - - for (id, typ) in pat_bindings { - visitor.visit_ident(&id, typ.clone()); - ctxt.type_env.insert(id.ident(), typ); - } - } - - let re_ctxt = if attrs.rec { ctxt.clone() } else { start_ctxt.clone() }; - for (_pat, re) in bindings { - walk(state, re_ctxt.clone(), visitor, re)?; - } - - walk(state, ctxt, visitor, rt) - } - Term::App(e, t) => { - walk(state, ctxt.clone(), visitor, e)?; - walk(state, ctxt, visitor, t) - } - Term::Match(data) => { - data.branches.iter().try_for_each(|MatchBranch { pattern, guard, body }| { - let mut local_ctxt = ctxt.clone(); - let PatternTypeData { bindings: pat_bindings, .. } = pattern.data.pattern_types(state, &ctxt, TypecheckMode::Walk)?; - - if let Some(alias) = &pattern.alias { - visitor.visit_ident(alias, mk_uniftype::dynamic()); - local_ctxt.type_env.insert(alias.ident(), mk_uniftype::dynamic()); - } - - for (id, typ) in pat_bindings { - visitor.visit_ident(&id, typ.clone()); - local_ctxt.type_env.insert(id.ident(), typ); - } - - if let Some(guard) = guard { - walk(state, local_ctxt.clone(), visitor, guard)?; - } - - walk(state, local_ctxt, visitor, body) - })?; - - Ok(()) - } - Term::RecRecord(record, dynamic, ..) => { - for (id, field) in record.fields.iter() { - let field_type = field_type( - state, - field, - &ctxt, - false, - ); - ctxt.type_env.insert(id.ident(), field_type.clone()); - visitor.visit_ident(id, field_type); - } - - // Walk the type and contract annotations - - // We don't bind the fields in the term environment used to check for contract - // equality. See the `Let` case above for more details on why such recursive bindings - // are currently ignored. - record.fields - .values() - .try_for_each(|field| -> Result<(), TypecheckError> { - walk_field(state, ctxt.clone(), visitor, field) - })?; - - dynamic.iter().map(|(_, field)| field) - .try_for_each(|field| -> Result<(), TypecheckError> { - walk_field(state, ctxt.clone(), visitor, field) - }) - } - Term::Record(record) => { - record.fields - .values() - .filter_map(|field| field.value.as_ref()) - .try_for_each(|t| -> Result<(), TypecheckError> { - walk(state, ctxt.clone(), visitor, t) - }) - } - Term::EnumVariant { arg: t, ..} - | Term::Sealed(_, t, _) - | Term::Op1(_, t) - | Term::CustomContract(t) => walk(state, ctxt, visitor, t), - Term::Op2(_, t1, t2) => { - walk(state, ctxt.clone(), visitor, t1)?; - walk(state, ctxt, visitor, t2) - } - Term::OpN(_, args) => { - args.iter().try_for_each(|t| -> Result<(), TypecheckError> { - walk( - state, - ctxt.clone(), - visitor, - t, - ) - }, - ) - } - Term::Annotated(annot, rt) => { - walk_annotated(state, ctxt, visitor, annot, rt) - } - // The contract field is just a caching mechanism, and should be set to `None` at this - // point anyway. We can safely ignore it. - Term::Type { typ, contract: _ } => walk_type(state, ctxt, visitor, typ), - Term::Closure(_) => unreachable!("should never see a closure at typechecking time"), - } -} - -/// Same as [`walk`] but operate on a type, which can contain terms as contracts -/// ([crate::typ::TypeF::Contract]), instead of a term. -fn walk_type( - state: &mut State, - ctxt: Context, - visitor: &mut V, - ty: &Type, -) -> Result<(), TypecheckError> { - match &ty.typ { - TypeF::Dyn - | TypeF::Number - | TypeF::Bool - | TypeF::String - | TypeF::ForeignId - | TypeF::Symbol - // Currently, the parser can't generate unbound type variables by construction. Thus we - // don't check here for unbound type variables again. - | TypeF::Var(_) - // An enum type can't contain a contract. - // TODO: the assertion above isn't true anymore (ADTs). Need fixing? - | TypeF::Enum(_) - | TypeF::Wildcard(_) => Ok(()), - TypeF::Arrow(ty1, ty2) => { - walk_type(state, ctxt.clone(), visitor, ty1.as_ref())?; - walk_type(state, ctxt, visitor, ty2.as_ref()) - } - TypeF::Record(rrows) => walk_rrows(state, ctxt, visitor, rrows), - TypeF::Contract(t) => walk(state, ctxt, visitor, t), - TypeF::Dict { type_fields: ty2, .. } - | TypeF::Array(ty2) - | TypeF::Forall {body: ty2, ..} => walk_type(state, ctxt, visitor, ty2), - } -} - -/// Same as [`walk_type`] but operate on record rows. -fn walk_rrows( - state: &mut State, - ctxt: Context, - visitor: &mut V, - rrows: &RecordRows, -) -> Result<(), TypecheckError> { - match rrows.0 { - RecordRowsF::Empty - // Currently, the parser can't generate unbound type variables by construction. Thus we - // don't check here for unbound type variables again. - | RecordRowsF::TailVar(_) - | RecordRowsF::TailDyn => Ok(()), - RecordRowsF::Extend { ref row, ref tail } => { - walk_type(state, ctxt.clone(), visitor, &row.typ)?; - walk_rrows(state, ctxt, visitor, tail) - } - } -} - -fn walk_field( - state: &mut State, - ctxt: Context, - visitor: &mut V, - field: &Field, -) -> Result<(), TypecheckError> { - walk_with_annot( - state, - ctxt, - visitor, - &field.metadata.annotation, - field.value.as_ref(), - ) -} - -fn walk_annotated( - state: &mut State, - ctxt: Context, - visitor: &mut V, - annot: &TypeAnnotation, - rt: &RichTerm, -) -> Result<(), TypecheckError> { - walk_with_annot(state, ctxt, visitor, annot, Some(rt)) -} - -/// Walk an annotated term, either via [crate::term::record::FieldMetadata], or via a standalone -/// type or contract annotation. A type annotation switches the typechecking mode to _enforce_. -fn walk_with_annot( - state: &mut State, - mut ctxt: Context, - visitor: &mut V, - annot: &TypeAnnotation, - value: Option<&RichTerm>, -) -> Result<(), TypecheckError> { - annot - .iter() - .try_for_each(|ty| walk_type(state, ctxt.clone(), visitor, &ty.typ))?; - - match (annot, value) { - ( - TypeAnnotation { - typ: Some(LabeledType { typ: ty2, .. }), - .. - }, - Some(value), - ) => { - let uty2 = UnifType::from_type(ty2.clone(), &ctxt.term_env); - check(state, ctxt, visitor, value, uty2) - } - ( - TypeAnnotation { - typ: None, - contracts, - }, - Some(value), - ) => { - // If we see a function annotated with a function contract, we can get the type of the - // argument for free. We use this information both for typechecking (you could see it - // as an extension of the philosophy of apparent types, but for function arguments - // instead of let-bindings) and for the LSP, to provide better type information and - // completion. - if let Term::Fun(id, body) = value.as_ref() { - // We look for the first contract of the list that is a function contract. - let fst_domain = contracts.iter().find_map(|c| { - if let TypeF::Arrow(domain, _) = &c.typ.typ { - Some(UnifType::from_type(domain.as_ref().clone(), &ctxt.term_env)) - } else { - None - } - }); - - if let Some(domain) = fst_domain { - // Because the normal code path in `walk` sets the function argument to `Dyn`, - // we need to short-circuit it. We manually visit the argument, augment the - // typing environment and walk the body of the function. - visitor.visit_ident(id, domain.clone()); - ctxt.type_env.insert(id.ident(), domain); - return walk(state, ctxt, visitor, body); - } - } - - walk(state, ctxt, visitor, value) - } - _ => Ok(()), - } -} - -/// Check a term against a given type. Although this method mostly corresponds to checking mode in -/// the classical bidirectional framework, it combines both checking and inference modes in -/// practice, to avoid duplicating rules (that is, code) as detailed below. -/// -/// # Literals -/// -/// Checking a literal (a number, a string, a boolean, etc.) unifies the checked type with the -/// corresponding primitive type (`Number`, `String`, `Bool`, etc.). If the checked type is a -/// unification variable, `check` acts as an inference rule. If the type is concrete, unification -/// enforces equality, and `check` acts as a checking rule. -/// -/// # Introduction rules -/// -/// Following Pfenning's recipe (see [Bidirectional Typing][bidirectional-typing]), introduction -/// rules (e.g. typechecking a record) are checking. `check` follows the same logic here: it uses -/// unification to "match" on the expected type (for example in the case of records, a record type -/// or a dictionary type) and pushes typechecking down the record fields. -/// -/// # Elimination rules -/// -/// Elimination rules (such as function application or primitive operator application) only exist in -/// inference mode (still following Pfenning's recipe). `check` follows the inference mode here -/// (typically on function application, where we first call to `infer` on the function part, and -/// then check the argument). -/// -/// Still, `check` is supposed to be implementing checking mode from the outside. We thus also -/// apply the typing rule which switches from inference to checking mode. Currently, subtyping -/// isn't supported yet in Nickel but is planned as part of RFC004. When subtyping lands, as the -/// name suggests, [`subsumption`] will be the place where we apply subsumption, as customary in -/// bidirectional type systems with subtyping. -/// -/// To sum up, elimination rules inside `check` correspond to an inference rule composed with the -/// switching/subsumption rule, resulting in a composite checking rule. -/// -/// # Parameters -/// -/// - `state`: the unification state (see [`State`]). -/// - `env`: the typing environment, mapping free variable to types. -/// - `lin`: The current building linearization of building state `S` -/// - `visitor`: A visitor that can modify the linearization -/// - `t`: the term to check. -/// - `ty`: the type to check the term against. -/// -/// # Linearization (LSP) -/// -/// `check` is in charge of registering every term with the `visitor` and makes sure to scope -/// the visitor accordingly -/// -/// [bidirectional-typing]: (https://arxiv.org/abs/1908.05839) -fn check( - state: &mut State, - mut ctxt: Context, - visitor: &mut V, - rt: &RichTerm, - ty: UnifType, -) -> Result<(), TypecheckError> { - let RichTerm { term: t, pos } = rt; - - visitor.visit_term(rt, ty.clone()); - - // When checking against a polymorphic type, we immediatly instantiate potential heading - // foralls. Otherwise, this polymorphic type wouldn't unify much with other types. If we infer - // a polymorphic type for `rt`, the subsumption rule will take care of instantiating this type - // with unification variables, such that terms like `(fun x => x : forall a. a -> a) : forall - // b. b -> b` typecheck correctly. - let ty = instantiate_foralls(state, &mut ctxt, ty, ForallInst::Constant); - - match t.as_ref() { - Term::ParseError(_) => Ok(()), - Term::RuntimeError(_) => panic!("unexpected RuntimeError term during typechecking"), - // null is inferred to be of type Dyn - Term::Null => ty - .unify(mk_uniftype::dynamic(), state, &ctxt) - .map_err(|err| err.into_typecheck_err(state, rt.pos)), - Term::Bool(_) => ty - .unify(mk_uniftype::bool(), state, &ctxt) - .map_err(|err| err.into_typecheck_err(state, rt.pos)), - Term::Num(_) => ty - .unify(mk_uniftype::num(), state, &ctxt) - .map_err(|err| err.into_typecheck_err(state, rt.pos)), - Term::Str(_) => ty - .unify(mk_uniftype::str(), state, &ctxt) - .map_err(|err| err.into_typecheck_err(state, rt.pos)), - Term::StrChunks(chunks) => { - ty.unify(mk_uniftype::str(), state, &ctxt) - .map_err(|err| err.into_typecheck_err(state, rt.pos))?; - - chunks - .iter() - .try_for_each(|chunk| -> Result<(), TypecheckError> { - match chunk { - StrChunk::Literal(_) => Ok(()), - StrChunk::Expr(t, _) => { - check(state, ctxt.clone(), visitor, t, mk_uniftype::str()) - } - } - }) - } - // Fun is an introduction rule for the arrow type. The target type is thus expected to be - // `T -> U`, which is enforced by unification, and we then check the body of the function - // against `U`, after adding `x : T` in the environment. - Term::Fun(x, t) => { - let src = state.table.fresh_type_uvar(ctxt.var_level); - let trg = state.table.fresh_type_uvar(ctxt.var_level); - let arr = mk_uty_arrow!(src.clone(), trg.clone()); - - visitor.visit_ident(x, src.clone()); - - ty.unify(arr, state, &ctxt) - .map_err(|err| err.into_typecheck_err(state, rt.pos))?; - - ctxt.type_env.insert(x.ident(), src); - check(state, ctxt, visitor, t, trg) - } - Term::FunPattern(pat, t) => { - // See [^separate-alias-treatment]. - let pat_types = pat - .data - .pattern_types(state, &ctxt, TypecheckMode::Enforce)?; - // In the destructuring case, there's no alternative pattern, and we must thus - // immediately close all the row types. - pattern::close_all_enums(pat_types.enum_open_tails, state); - - let src = pat_types.typ; - let trg = state.table.fresh_type_uvar(ctxt.var_level); - let arr = mk_uty_arrow!(src.clone(), trg.clone()); - - if let Some(alias) = &pat.alias { - visitor.visit_ident(alias, src.clone()); - ctxt.type_env.insert(alias.ident(), src); - } - - for (id, typ) in pat_types.bindings { - visitor.visit_ident(&id, typ.clone()); - ctxt.type_env.insert(id.ident(), typ); - } - - ty.unify(arr, state, &ctxt) - .map_err(|err| err.into_typecheck_err(state, rt.pos))?; - check(state, ctxt, visitor, t, trg) - } - // [^custom-contract-is-check]: [crate::term::CustomContract] isn't supposed to be used in - // Nickel source code directly, but we can typecheck it. A custom contract is a - // datastructure holding a function of a specific type. - // - // Whether seen as a type constructor, or as equivalent to a standalone function, it's an - // introduction rule and thus it should be check. - // - // This rule can't produce a polymorphic type (it produces a `Dyn`, or morally a `Contract` - // type, if we had one), so we don't lose much by making it a check rule anyway, as for - // e.g. literals. - Term::CustomContract(t) => { - // The overall type of a custom contract is currently `Dyn`, as we don't have a better - // one. - ty.unify(mk_uniftype::dynamic(), state, &ctxt) - .map_err(|err| err.into_typecheck_err(state, rt.pos))?; - - check( - state, - ctxt.clone(), - visitor, - t, - operation::custom_contract_ret_type(), - ) - } - Term::Array(terms, _) => { - let ty_elts = state.table.fresh_type_uvar(ctxt.var_level); - - ty.unify(mk_uniftype::array(ty_elts.clone()), state, &ctxt) - .map_err(|err| err.into_typecheck_err(state, rt.pos))?; - - terms - .iter() - .try_for_each(|t| -> Result<(), TypecheckError> { - check(state, ctxt.clone(), visitor, t, ty_elts.clone()) - }) - } - Term::Lbl(_) => { - // TODO implement lbl type - ty.unify(mk_uniftype::dynamic(), state, &ctxt) - .map_err(|err| err.into_typecheck_err(state, rt.pos)) - } - Term::Let(bindings, rt, attrs) => { - // For a recursive let block, shadow all the names we're about to bind, so - // we aren't influenced by variables defined in an outer scope. - if attrs.rec { - for (x, _re) in bindings { - ctxt.type_env - .insert(x.ident(), state.table.fresh_type_uvar(ctxt.var_level)); - } - } - - let mut tys = Vec::new(); - let start_ctxt = ctxt.clone(); - for (x, re) in bindings { - let ty_let = binding_type(state, re.as_ref(), &start_ctxt, true); - - // We don't support recursive binding when checking for contract equality. See the - // `Let` case in `walk`. - ctxt.term_env - .0 - .insert(x.ident(), (re.clone(), ctxt.term_env.clone())); - - ctxt.type_env.insert(x.ident(), ty_let.clone()); - visitor.visit_ident(x, ty_let.clone()); - tys.push((re, ty_let)); - } - - let re_ctxt = if attrs.rec { &ctxt } else { &start_ctxt }; - for (re, ty_let) in tys { - check(state, re_ctxt.clone(), visitor, re, ty_let)?; - } - - // FIXME: if we're recursive, do we need to do unify the fresh - // type variables with the (modified by the recursive check) - // binding_type? I feel like we should, but it doesn't seem to make - // a difference. - - check(state, ctxt.clone(), visitor, rt, ty) - } - Term::LetPattern(bindings, rt, attrs) => { - // For a recursive let block, shadow all the names we're about to bind, so - // we aren't influenced by variables defined in an outer scope. - if attrs.rec { - for (pat, _re) in bindings { - for (_path, id, _fld) in pat.bindings() { - ctxt.type_env - .insert(id.ident(), state.table.fresh_type_uvar(ctxt.var_level)); - } - } - } - - let mut tys = Vec::new(); - let start_ctxt = ctxt.clone(); - for (pat, re) in bindings { - // See [^separate-alias-treatment]. - let pat_types = pat.pattern_types(state, &start_ctxt, TypecheckMode::Enforce)?; - - // In the destructuring case, there's no alternative pattern, and we must thus - // immediatly close all the row types. - pattern::close_all_enums(pat_types.enum_open_tails, state); - - // The inferred type of the expr being bound - let ty_let = binding_type(state, re.as_ref(), &start_ctxt, true); - - pat_types - .typ - .unify(ty_let.clone(), state, &start_ctxt) - .map_err(|e| e.into_typecheck_err(state, re.pos))?; - - if let Some(alias) = &pat.alias { - visitor.visit_ident(alias, ty_let.clone()); - ctxt.type_env.insert(alias.ident(), ty_let.clone()); - } - - for (id, typ) in pat_types.bindings { - visitor.visit_ident(&id, typ.clone()); - ctxt.type_env.insert(id.ident(), typ); - } - tys.push((re, ty_let)); - } - - let re_ctxt = if attrs.rec { &ctxt } else { &start_ctxt }; - for (re, ty_let) in tys { - check(state, re_ctxt.clone(), visitor, re, ty_let)?; - } - - check(state, ctxt, visitor, rt, ty) - } - Term::Match(data) => { - // [^typechecking-match-expression]: We can associate a type to each pattern of each - // case of the match expression. From there, the type of a valid argument for the match - // expression is ideally the union of each pattern type. - // - // For record types, we don't have a good way to express union: for example, what could - // be the type of something that is either `{x : a}` or `{y : a}`? In the case of - // record types, we thus just take the intersection of the types, which amounts to - // unify all pattern types together. While it might fail most of the time (including - // for the `{x}` and `{y}` example), it can still typecheck interesting expressions - // when the record pattern are similar enough: - // - // ```nickel - // x |> match { - // {foo, bar: 'Baz} => - // {foo, bar: 'Qux} => - // } - // ``` - // - // We can definitely find a type for `x`: `{foo: a, bar: [| 'Baz, 'Qux |]}`. - // - // For enum types, we can express union: for example, the union of `[|'Foo, 'Bar|]` and - // `[|'Bar, 'Baz|]` is `[|'Foo, 'Bar, 'Baz|]`. We can even turn this into a unification - // problem: "open" the initial row types as `[| 'Foo, 'Bar; ?a |]` and `[|'Bar, 'Baz; - // ?b |]`, unify them together, and close the result (unify the tail with an empty row - // tail). The advantage of this approach is that unification takes care of descending - // into record types and sub-patterns to perform this operation, and we're back to the - // same procedure (almost) than for record patterns: simply unify all pattern types. - // Although we have additional bookkeeping to perform (remember the tail variables - // introduced to open enum rows and close the corresponding rows at the end of the - // procedure). - - // We zip the pattern types with each branch - let with_pat_types = data - .branches - .iter() - .map(|branch| -> Result<_, TypecheckError> { - Ok(( - branch, - branch - .pattern - .pattern_types(state, &ctxt, TypecheckMode::Enforce)?, - )) - }) - .collect::)>, _>>()?; - - // A match expression is a special kind of function. Thus it's typed as `a -> b`, where - // `a` is a type determined by the patterns and `b` is the type of each match arm. - let arg_type = state.table.fresh_type_uvar(ctxt.var_level); - let return_type = state.table.fresh_type_uvar(ctxt.var_level); - - // Express the constraint that all the arms of the match expression should have a - // compatible type and that each guard must be a boolean. - for ( - MatchBranch { - pattern, - guard, - body, - }, - pat_types, - ) in with_pat_types.iter() - { - if let Some(alias) = &pattern.alias { - visitor.visit_ident(alias, return_type.clone()); - ctxt.type_env.insert(alias.ident(), return_type.clone()); - } - - for (id, typ) in pat_types.bindings.iter() { - visitor.visit_ident(id, typ.clone()); - ctxt.type_env.insert(id.ident(), typ.clone()); - } - - if let Some(guard) = guard { - check(state, ctxt.clone(), visitor, guard, mk_uniftype::bool())?; - } - - check(state, ctxt.clone(), visitor, body, return_type.clone())?; - } - - let pat_types = with_pat_types.into_iter().map(|(_, pat_types)| pat_types); - - // Unify all the pattern types with the argument's type, and build the list of all open - // tail vars - let mut enum_open_tails = Vec::with_capacity( - pat_types - .clone() - .map(|pat_type| pat_type.enum_open_tails.len()) - .sum(), - ); - - // Build the list of all wildcard pattern occurrences - let mut wildcard_occurrences = HashSet::with_capacity( - pat_types - .clone() - .map(|pat_type| pat_type.wildcard_occurrences.len()) - .sum(), - ); - - // We don't immediately return if an error occurs while unifying the patterns together. - // For error reporting purposes, it's best to first close the tail variables (if - // needed), to avoid cluttering the reported types with free unification variables - // which are mostly an artifact of our implementation of typechecking pattern matching. - let pat_unif_result: Result<(), UnifError> = - pat_types.into_iter().try_for_each(|pat_type| { - arg_type.clone().unify(pat_type.typ, state, &ctxt)?; - - for (id, typ) in pat_type.bindings { - visitor.visit_ident(&id, typ.clone()); - ctxt.type_env.insert(id.ident(), typ); - } - - enum_open_tails.extend(pat_type.enum_open_tails); - wildcard_occurrences.extend(pat_type.wildcard_occurrences); - - Ok(()) - }); - - // Once we have accumulated all the information about enum rows and wildcard - // occurrences, we can finally close the tails that need to be. - pattern::close_enums(enum_open_tails, &wildcard_occurrences, state); - - // And finally fail if there was an error. - pat_unif_result.map_err(|err| err.into_typecheck_err(state, rt.pos))?; - - // We unify the expected type of the match expression with `arg_type -> return_type`. - // - // This must happen last, or at least after having closed the tails: otherwise, the - // enum type inferred for the argument could be unduly generalized. For example, take: - // - // ``` - // let exp : forall r. [| 'Foo; r |] -> Dyn = match { 'Foo => null } - // ``` - // - // This must not typecheck, as the match expression doesn't have a default case, and - // its type is thus `[| 'Foo |] -> Dyn`. However, during the typechecking of the match - // expression, before tails are closed, the working type is `[| 'Foo; _erows_a |]`, - // which can definitely unify with `[| 'Foo; r |]` while the tail is still open. If we - // close the tail first, then the type becomes [| 'Foo |] and the generalization fails - // as desired. - // - // As a safety net, the tail closing code panics (in debug mode) if it finds a rigid - // type variable at the end of the tail of a pattern type, which would happen if we - // somehow generalized an enum row type variable before properly closing the tails - // before. - ty.unify( - mk_uty_arrow!(arg_type.clone(), return_type.clone()), - state, - &ctxt, - ) - .map_err(|err| err.into_typecheck_err(state, rt.pos))?; - - Ok(()) - } - // Elimination forms (variable, function application and primitive operator application) - // follow the inference discipline, following the Pfennig recipe and the current type - // system specification (as far as typechecking is concerned, primitive operator - // application is the same as function application). - Term::Var(_) - | Term::App(..) - | Term::Op1(..) - | Term::Op2(..) - | Term::OpN(..) - | Term::Annotated(..) => { - let inferred = infer(state, ctxt.clone(), visitor, rt)?; - - // We apply the subsumption rule when switching from infer mode to checking mode. - inferred - .subsumed_by(ty, state, ctxt) - .map_err(|err| err.into_typecheck_err(state, rt.pos)) - } - Term::Enum(id) => { - let row = state.table.fresh_erows_uvar(ctxt.var_level); - ty.unify(mk_uty_enum!(*id; row), state, &ctxt) - .map_err(|err| err.into_typecheck_err(state, rt.pos)) - } - Term::EnumVariant { tag, arg, .. } => { - let row_tail = state.table.fresh_erows_uvar(ctxt.var_level); - let ty_arg = state.table.fresh_type_uvar(ctxt.var_level); - - // We match the expected type against `[| 'id ty_arg; row_tail |]`, where `row_tail` is - // a free unification variable, to ensure it has the right shape and extract the - // components. - ty.unify(mk_uty_enum!((*tag, ty_arg.clone()); row_tail), state, &ctxt) - .map_err(|err| err.into_typecheck_err(state, rt.pos))?; - - // Once we have a type for the argument, we check the variant's data against it. - check(state, ctxt, visitor, arg, ty_arg) - } - // If some fields are defined dynamically, the only potential type that works is `{_ : a}` - // for some `a`. In other words, the checking rule is not the same depending on the target - // type: if the target type is a dictionary type, we simply check each field against the - // element type. - Term::RecRecord(record, dynamic, ..) if !dynamic.is_empty() => { - let ty_dict = state.table.fresh_type_uvar(ctxt.var_level); - ty.unify(mk_uniftype::dict(ty_dict.clone()), state, &ctxt) - .map_err(|err| err.into_typecheck_err(state, rt.pos))?; - - for id in record.fields.keys() { - ctxt.type_env.insert(id.ident(), ty_dict.clone()); - visitor.visit_ident(id, ty_dict.clone()) - } - - // We don't bind recursive fields in the term environment used to check for contract. - // See the recursive let case in `walk`. - record - .fields - .iter() - .try_for_each(|(id, field)| -> Result<(), TypecheckError> { - check_field(state, ctxt.clone(), visitor, *id, field, ty_dict.clone()) - }) - } - Term::Record(record) | Term::RecRecord(record, ..) => { - // For recursive records, we look at the apparent type of each field and bind it in - // ctxt before actually typechecking the content of fields. - // - // Fields defined by interpolation are ignored, because they can't be referred to - // recursively. - - // When we build the recursive environment, there are two different possibilities for - // each field: - // - // 1. The field is annotated. In this case, we use this type to build the type - // environment. We don't need to do any additional check that the field respects - // this annotation: this will be handled by `check_field` when processing the field. - // 2. The field isn't annotated. We are going to infer a concrete type later, but for - // now, we allocate a fresh unification variable in the type environment. In this - // case, once we have inferred an actual type for this field, we need to unify - // what's inside the environment with the actual type to ensure that they agree. - // - // `need_unif_step` stores the list of fields corresponding to the case 2, which - // require this additional unification step. Note that performing the additional - // unification in case 1. should be harmless, but it's wasteful, and is also not - // entirely trivial because of polymorphism (we need to make sure to instantiate - // polymorphic type annotations). So it's simpler to just skip it in this case. - let mut need_unif_step = HashSet::new(); - if let Term::RecRecord(..) = t.as_ref() { - for (id, field) in &record.fields { - let uty_apprt = - field_apparent_type(field, Some(&ctxt.type_env), Some(state.resolver)); - - // `Approximated` corresponds to the case where the type isn't obvious - // (annotation or constant), and thus to case 2. above - if matches!(uty_apprt, ApparentType::Approximated(_)) { - need_unif_step.insert(*id); - } - - let uty = apparent_or_infer(state, uty_apprt, &ctxt, true); - ctxt.type_env.insert(id.ident(), uty.clone()); - visitor.visit_ident(id, uty); - } - } - - let root_ty = ty.clone().into_root(state.table); - - if let UnifType::Concrete { - typ: - TypeF::Dict { - type_fields: rec_ty, - .. - }, - .. - } = root_ty - { - // Checking for a dictionary - record - .fields - .iter() - .try_for_each(|(id, field)| -> Result<(), TypecheckError> { - check_field(state, ctxt.clone(), visitor, *id, field, (*rec_ty).clone()) - }) - } else { - // Building the type {id1 : ?a1, id2: ?a2, .., idn: ?an} - let mut field_types: IndexMap = record - .fields - .keys() - .map(|id| (*id, state.table.fresh_type_uvar(ctxt.var_level))) - .collect(); - - let rows = field_types.iter().fold( - mk_uty_record_row!(), - |acc, (id, row_ty)| mk_uty_record_row!((*id, row_ty.clone()); acc), - ); - - ty.unify(mk_uty_record!(; rows), state, &ctxt) - .map_err(|err| err.into_typecheck_err(state, rt.pos))?; - - for (id, field) in record.fields.iter() { - // For a recursive record and a field which requires the additional unification - // step (whose type wasn't known when building the recursive environment), we - // unify the actual type with the type affected in the typing environment - // (which started as a fresh unification variable, but might have been unified - // with a more concrete type if the current field has been used recursively - // from other fields). - if matches!(t.as_ref(), Term::RecRecord(..)) && need_unif_step.contains(id) { - let affected_type = ctxt.type_env.get(&id.ident()).cloned().unwrap(); - - field_types - .get(id) - .cloned() - .unwrap() - .unify(affected_type, state, &ctxt) - .map_err(|err| { - err.into_typecheck_err( - state, - field.value.as_ref().map(|v| v.pos).unwrap_or_default(), - ) - })?; - } - - check_field( - state, - ctxt.clone(), - visitor, - *id, - field, - // expect(): we've built `rows` in this very function from - // record.fields.keys(), so it must contain `id` - field_types.remove(id).expect( - "inserted `id` inside the `field_types` hashmap previously; \ - expected it to be there", - ), - )?; - } - - Ok(()) - } - } - - Term::ForeignId(_) => ty - .unify(mk_uniftype::foreign_id(), state, &ctxt) - .map_err(|err| err.into_typecheck_err(state, rt.pos)), - Term::SealingKey(_) => ty - .unify(mk_uniftype::sym(), state, &ctxt) - .map_err(|err| err.into_typecheck_err(state, rt.pos)), - Term::Sealed(_, t, _) => check(state, ctxt, visitor, t, ty), - Term::Import(_) => ty - .unify(mk_uniftype::dynamic(), state, &ctxt) - .map_err(|err| err.into_typecheck_err(state, rt.pos)), - // We use the apparent type of the import for checking. This function doesn't recursively - // typecheck imports: this is the responsibility of the caller. - Term::ResolvedImport(file_id) => { - let t = state - .resolver - .get(*file_id) - .expect("Internal error: resolved import not found during typechecking."); - let ty_import: UnifType = UnifType::from_apparent_type( - apparent_type(t.as_ref(), Some(&ctxt.type_env), Some(state.resolver)), - &ctxt.term_env, - ); - ty.unify(ty_import, state, &ctxt) - .map_err(|err| err.into_typecheck_err(state, rt.pos)) - } - Term::Type { typ, contract: _ } => { - if let Some(contract) = typ.find_contract() { - Err(TypecheckError::CtrTypeInTermPos { - contract, - pos: *pos, - }) - } else { - Ok(()) - } - } - Term::Closure(_) => unreachable!("should never see a closure at typechecking time"), - } -} - -fn check_field( - state: &mut State, - ctxt: Context, - visitor: &mut V, - id: LocIdent, - field: &Field, - ty: UnifType, -) -> Result<(), TypecheckError> { - // If there's no annotation, we simply check the underlying value, if any. - if field.metadata.annotation.is_empty() { - if let Some(value) = field.value.as_ref() { - check(state, ctxt, visitor, value, ty) - } else { - // It might make sense to accept any type for a value without definition (which would - // act a bit like a function parameter). But for now, we play safe and implement a more - // restrictive rule, which is that a value without a definition has type `Dyn` - ty.unify(mk_uniftype::dynamic(), state, &ctxt) - .map_err(|err| err.into_typecheck_err(state, id.pos)) - } - } else { - let pos = field.value.as_ref().map(|v| v.pos).unwrap_or(id.pos); - - let inferred = infer_with_annot( - state, - ctxt.clone(), - visitor, - &field.metadata.annotation, - field.value.as_ref(), - )?; - - inferred - .subsumed_by(ty, state, ctxt) - .map_err(|err| err.into_typecheck_err(state, pos)) - } -} - -fn infer_annotated( - state: &mut State, - ctxt: Context, - visitor: &mut V, - annot: &TypeAnnotation, - rt: &RichTerm, -) -> Result { - infer_with_annot(state, ctxt, visitor, annot, Some(rt)) -} - -/// Function handling the common part of inferring the type of terms with type or contract -/// annotation, with or without definitions. This encompasses both standalone type annotation -/// (where `value` is always `Some(_)`) as well as field definitions (where `value` may or may not -/// be defined). -/// -/// As for [check_visited] and [infer_visited], the additional `item_id` is provided when the term -/// has been added to the visitor before but can still benefit from updating its information -/// with the inferred type. -fn infer_with_annot( - state: &mut State, - ctxt: Context, - visitor: &mut V, - annot: &TypeAnnotation, - value: Option<&RichTerm>, -) -> Result { - annot - .iter() - .try_for_each(|ty| walk_type(state, ctxt.clone(), visitor, &ty.typ))?; - - match (annot, value) { - ( - TypeAnnotation { - typ: Some(LabeledType { typ: ty2, .. }), - .. - }, - Some(value), - ) => { - let uty2 = UnifType::from_type(ty2.clone(), &ctxt.term_env); - - visitor.visit_term(value, uty2.clone()); - - check(state, ctxt, visitor, value, uty2.clone())?; - Ok(uty2) - } - // An annotation without a type but with a contract switches the typechecker back to walk - // mode. If there are several contracts, we arbitrarily chose the first one as the apparent - // type (the most precise type would be the intersection of all contracts, but Nickel's - // type system doesn't feature intersection types). - ( - TypeAnnotation { - typ: None, - contracts, - }, - value_opt, - ) if !contracts.is_empty() => { - let ctr = contracts.first().unwrap(); - let LabeledType { typ: ty2, .. } = ctr; - - let uty2 = UnifType::from_type(ty2.clone(), &ctxt.term_env); - - if let Some(value) = &value_opt { - visitor.visit_term(value, uty2.clone()); - } - - // If there's an inner value, we have to walk it, as it may contain statically typed - // blocks. - if let Some(value) = value_opt { - walk(state, ctxt, visitor, value)?; - } - - Ok(uty2) - } - // A non-empty value without a type or a contract annotation is typechecked in the same way - // as its inner value. This case should only happen for record fields, as the parser can't - // produce an annotated term without an actual annotation. Still, such terms could be - // produced programmatically, and aren't necessarily an issue. - (_, Some(value)) => infer(state, ctxt, visitor, value), - // An empty value is a record field without definition. We don't check anything, and infer - // its type to be either the first annotation defined if any, or `Dyn` otherwise. - // We can only hit this case for record fields. - _ => { - let inferred = annot - .first() - .map(|labeled_ty| UnifType::from_type(labeled_ty.typ.clone(), &ctxt.term_env)) - .unwrap_or_else(mk_uniftype::dynamic); - Ok(inferred) - } - } -} - -/// Infer a type for an expression. -/// -/// `infer` corresponds to the inference mode of bidirectional typechecking. Nickel uses a mix of -/// bidirectional typechecking and traditional ML-like unification. -fn infer( - state: &mut State, - mut ctxt: Context, - visitor: &mut V, - rt: &RichTerm, -) -> Result { - let RichTerm { term, pos } = rt; - - match term.as_ref() { - Term::Var(x) => { - let x_ty = ctxt - .type_env - .get(&x.ident()) - .cloned() - .ok_or(TypecheckError::UnboundIdentifier { id: *x, pos: *pos })?; - - visitor.visit_term(rt, x_ty.clone()); - - Ok(x_ty) - } - // Theoretically, we need to instantiate the type of the head of the primop application, - // that is, the primop itself. In practice, `get_uop_type`,`get_bop_type` and - // `get_nop_type` return types that are already instantiated with free unification - // variables, to save building a polymorphic type to only instantiate it immediately. Thus, - // the type of a primop is currently always monomorphic. - Term::Op1(op, t) => { - let (ty_arg, ty_res) = get_uop_type(state, ctxt.var_level, op)?; - - visitor.visit_term(rt, ty_res.clone()); - - check(state, ctxt.clone(), visitor, t, ty_arg)?; - - Ok(ty_res) - } - Term::Op2(op, t1, t2) => { - let (ty_arg1, ty_arg2, ty_res) = get_bop_type(state, ctxt.var_level, op)?; - - visitor.visit_term(rt, ty_res.clone()); - - check(state, ctxt.clone(), visitor, t1, ty_arg1)?; - check(state, ctxt.clone(), visitor, t2, ty_arg2)?; - - Ok(ty_res) - } - Term::OpN(op, args) => { - let (tys_args, ty_res) = get_nop_type(state, ctxt.var_level, op)?; - - visitor.visit_term(rt, ty_res.clone()); - - tys_args.into_iter().zip(args.iter()).try_for_each( - |(ty_arg, arg)| -> Result<_, TypecheckError> { - check(state, ctxt.clone(), visitor, arg, ty_arg)?; - Ok(()) - }, - )?; - - Ok(ty_res) - } - Term::App(e, t) => { - // If we go the full Quick Look route (cf [quick-look] and the Nickel type system - // specification), we will have a more advanced and specific rule to guess the - // instantiation of the potentially polymorphic type of the head of the application. - // Currently, we limit ourselves to predicative instantiation, and we can get away - // with eagerly instantiating heading `foralls` with fresh unification variables. - let head_poly = infer(state, ctxt.clone(), visitor, e)?; - let head = instantiate_foralls(state, &mut ctxt, head_poly, ForallInst::UnifVar); - - let dom = state.table.fresh_type_uvar(ctxt.var_level); - let codom = state.table.fresh_type_uvar(ctxt.var_level); - let arrow = mk_uty_arrow!(dom.clone(), codom.clone()); - - // "Match" the type of the head with `dom -> codom` - arrow - .unify(head, state, &ctxt) - .map_err(|err| err.into_typecheck_err(state, e.pos))?; - - visitor.visit_term(rt, codom.clone()); - - check(state, ctxt.clone(), visitor, t, dom)?; - Ok(codom) - } - Term::Annotated(annot, rt) => infer_annotated(state, ctxt, visitor, annot, rt), - _ => { - // The remaining cases can't produce polymorphic types, and thus we can reuse the - // checking code. Inferring the type for those rules is equivalent to checking against - // a free unification variable. This saves use from duplicating all the remaining - // cases. - let inferred = state.table.fresh_type_uvar(ctxt.var_level); - - visitor.visit_term(rt, inferred.clone()); - - check(state, ctxt, visitor, rt, inferred.clone())?; - Ok(inferred.into_root(state.table)) - } - } -} - -/// Determine the type of a let-bound expression. -/// -/// Call [`apparent_type`] to see if the binding is annotated. If it is, return this type as a -/// [`UnifType`]. Otherwise: -/// -/// - in walk mode, we won't (and possibly can't) infer the type of `bound_exp`: just return `Dyn`. -/// - in typecheck mode, we will typecheck `bound_exp`: return a new unification variable to be -/// associated to `bound_exp`. -/// -/// As this function is always called in a context where an `ImportResolver` is present, expect it -/// passed in arguments. -/// -/// If the annotated type contains any wildcard: -/// -/// - in non strict mode, wildcards are assigned `Dyn`. -/// - in strict mode, the wildcard is typechecked, and we return the unification variable -/// corresponding to it. -fn binding_type(state: &mut State, t: &Term, ctxt: &Context, strict: bool) -> UnifType { - apparent_or_infer( - state, - apparent_type(t, Some(&ctxt.type_env), Some(state.resolver)), - ctxt, - strict, - ) -} - -/// Same as `binding_type` but for record field definition. -fn field_type(state: &mut State, field: &Field, ctxt: &Context, strict: bool) -> UnifType { - apparent_or_infer( - state, - field_apparent_type(field, Some(&ctxt.type_env), Some(state.resolver)), - ctxt, - strict, - ) -} - -/// Either returns the exact type annotation extracted as an apparent type, or return a fresh -/// unification variable, for the type to be inferred by the typechecker, in enforce mode. -/// -/// In walk mode, returns the type as approximated by [`apparent_type`]. -fn apparent_or_infer( - state: &mut State, - aty: ApparentType, - ctxt: &Context, - strict: bool, -) -> UnifType { - match aty { - ApparentType::Annotated(ty) if strict => { - replace_wildcards_with_var(state.table, ctxt, state.wildcard_vars, ty) - } - ApparentType::Approximated(_) if strict => state.table.fresh_type_uvar(ctxt.var_level), - ty_apt => UnifType::from_apparent_type(ty_apt, &ctxt.term_env), - } -} - -/// Substitute wildcards in a type for their unification variable. -fn replace_wildcards_with_var( - table: &mut UnifTable, - ctxt: &Context, - wildcard_vars: &mut Vec, - ty: Type, -) -> UnifType { - fn replace_rrows( - table: &mut UnifTable, - ctxt: &Context, - wildcard_vars: &mut Vec, - rrows: RecordRows, - ) -> UnifRecordRows { - UnifRecordRows::concrete(rrows.0.map_state( - |ty, (table, wildcard_vars)| { - Box::new(replace_wildcards_with_var(table, ctxt, wildcard_vars, *ty)) - }, - |rrows, (table, wildcard_vars)| { - Box::new(replace_rrows(table, ctxt, wildcard_vars, *rrows)) - }, - &mut (table, wildcard_vars), - )) - } - - fn replace_erows( - table: &mut UnifTable, - ctxt: &Context, - wildcard_vars: &mut Vec, - erows: EnumRows, - ) -> UnifEnumRows { - UnifEnumRows::concrete(erows.0.map_state( - |ty, (table, wildcard_vars)| { - Box::new(replace_wildcards_with_var(table, ctxt, wildcard_vars, *ty)) - }, - |erows, (table, wildcard_vars)| { - Box::new(replace_erows(table, ctxt, wildcard_vars, *erows)) - }, - &mut (table, wildcard_vars), - )) - } - - match ty.typ { - TypeF::Wildcard(i) => get_wildcard_var(table, ctxt.var_level, wildcard_vars, i), - _ => UnifType::concrete(ty.typ.map_state( - |ty, (table, wildcard_vars)| { - Box::new(replace_wildcards_with_var(table, ctxt, wildcard_vars, *ty)) - }, - |rrows, (table, wildcard_vars)| replace_rrows(table, ctxt, wildcard_vars, rrows), - // Enum rows contain neither wildcards nor contracts - |erows, (table, wildcard_vars)| replace_erows(table, ctxt, wildcard_vars, erows), - |ctr, _| (ctr, ctxt.term_env.clone()), - &mut (table, wildcard_vars), - )), - } -} - -/// Different kinds of apparent types (see [`apparent_type`]). -/// -/// Indicate the nature of an apparent type. In particular, when in enforce mode, the typechecker -/// throws away approximations as it can do better and infer the actual type of an expression. In -/// walk mode, however, the approximation is the best we can do. This type allows the caller of -/// `apparent_type` to determine which situation it is. -#[derive(Debug)] -pub enum ApparentType { - /// The apparent type is given by a user-provided annotation. - Annotated(Type), - /// The apparent type has been inferred from a simple expression. - Inferred(Type), - /// The term is a variable and its type was retrieved from the typing environment. - FromEnv(UnifType), - /// The apparent type wasn't trivial to determine, and an approximation (most of the time, - /// `Dyn`) has been returned. - Approximated(Type), -} - -impl From for Type { - fn from(at: ApparentType) -> Self { - match at { - ApparentType::Annotated(ty) if has_wildcards(&ty) => Type::from(TypeF::Dyn), - ApparentType::Annotated(ty) - | ApparentType::Inferred(ty) - | ApparentType::Approximated(ty) => ty, - ApparentType::FromEnv(uty) => uty.try_into().ok().unwrap_or(Type::from(TypeF::Dyn)), - } - } -} - -/// Return the apparent type of a field, by first looking at the type annotation, if any, then at -/// the contracts annotation, and if there is none, fall back to the apparent type of the value. If -/// there is no value, `Approximated(Dyn)` is returned. -fn field_apparent_type( - field: &Field, - env: Option<&Environment>, - resolver: Option<&dyn ImportResolver>, -) -> ApparentType { - field - .metadata - .annotation - .first() - .cloned() - .map(|labeled_ty| ApparentType::Annotated(labeled_ty.typ)) - .or_else(|| { - field - .value - .as_ref() - .map(|v| apparent_type(v.as_ref(), env, resolver)) - }) - .unwrap_or(ApparentType::Approximated(Type::from(TypeF::Dyn))) -} - -/// Determine the apparent type of a let-bound expression. -/// -/// When a let-binding `let x = bound_exp in body` is processed, the type of `bound_exp` must be -/// determined in order to be bound to the variable `x` in the typing environment. -/// Then, future occurrences of `x` can be given this type when used in a statically typed block. -/// -/// The role of `apparent_type` is precisely to determine the type of `bound_exp`: -/// - if `bound_exp` is annotated by a type or contract annotation, return the user-provided type, -/// unless that type is a wildcard. -/// - if `bound_exp` is a constant (string, number, boolean or symbol) which type can be deduced -/// directly without unfolding the expression further, return the corresponding exact type. -/// - if `bound_exp` is an array, return `Array Dyn`. -/// - if `bound_exp` is a resolved import, return the apparent type of the imported term. Returns -/// `Dyn` if the resolver is not passed as a parameter to the function. -/// - Otherwise, return an approximation of the type (currently `Dyn`, but could be more precise in -/// the future, such as `Dyn -> Dyn` for functions, `{ | Dyn}` for records, and so on). -pub fn apparent_type( - t: &Term, - env: Option<&Environment>, - resolver: Option<&dyn ImportResolver>, -) -> ApparentType { - use crate::files::FileId; - - // Check the apparent type while avoiding cycling through direct imports loops. Indeed, - // `apparent_type` tries to see through imported terms. But doing so can lead to an infinite - // loop, for example with the trivial program which imports itself: - // - // ```nickel - // # foo.ncl - // import "foo.ncl" - // ``` - // - // The following function thus remembers what imports have been seen already, and simply - // returns `Dyn` if it detects a cycle. - fn apparent_type_check_cycle( - t: &Term, - env: Option<&Environment>, - resolver: Option<&dyn ImportResolver>, - mut imports_seen: HashSet, - ) -> ApparentType { - match t { - Term::Annotated(annot, value) => annot - .first() - .map(|labeled_ty| ApparentType::Annotated(labeled_ty.typ.clone())) - .unwrap_or_else(|| apparent_type(value.as_ref(), env, resolver)), - Term::Num(_) => ApparentType::Inferred(Type::from(TypeF::Number)), - Term::Bool(_) => ApparentType::Inferred(Type::from(TypeF::Bool)), - Term::SealingKey(_) => ApparentType::Inferred(Type::from(TypeF::Symbol)), - Term::Str(_) | Term::StrChunks(_) => ApparentType::Inferred(Type::from(TypeF::String)), - Term::Array(..) => ApparentType::Approximated(Type::from(TypeF::Array(Box::new( - Type::from(TypeF::Dyn), - )))), - Term::Var(id) => env - .and_then(|envs| envs.get(&id.ident()).cloned()) - .map(ApparentType::FromEnv) - .unwrap_or(ApparentType::Approximated(Type::from(TypeF::Dyn))), - Term::ResolvedImport(file_id) => match resolver { - Some(r) if !imports_seen.contains(file_id) => { - imports_seen.insert(*file_id); - - let t = r - .get(*file_id) - .expect("Internal error: resolved import not found during typechecking."); - apparent_type_check_cycle(&t.term, env, Some(r), imports_seen) - } - _ => ApparentType::Approximated(Type::from(TypeF::Dyn)), - }, - _ => ApparentType::Approximated(Type::from(TypeF::Dyn)), - } - } - - apparent_type_check_cycle(t, env, resolver, HashSet::new()) -} - -/// Infer the type of a non-annotated record by recursing inside gathering the apparent type of the -/// fields. It's currently used essentially to type the stdlib. -/// -/// # Parameters -/// -/// - `rt`: the term to infer a type for -/// - `term_env`: the current term environment, used for contracts equality -/// - `max_depth`: the max recursion depth. `infer_record_type` descends into sub-records, as long -/// as it only encounters nested record literals. `max_depth` is used to control this behavior -/// and cap the work that `infer_record_type` might do. -pub fn infer_record_type( - rt: &RichTerm, - term_env: &SimpleTermEnvironment, - max_depth: u8, -) -> UnifType { - match rt.as_ref() { - Term::Record(record) | Term::RecRecord(record, ..) if max_depth > 0 => UnifType::from( - TypeF::Record(UnifRecordRows::concrete(record.fields.iter().fold( - RecordRowsF::Empty, - |r, (id, field)| { - let uty = match field_apparent_type(field, None, None) { - ApparentType::Annotated(ty) => UnifType::from_type(ty, term_env), - ApparentType::FromEnv(uty) => uty, - // If we haven't reached max_depth yet, and the type is only approximated, - // we try to recursively infer a better type. - ApparentType::Inferred(ty) | ApparentType::Approximated(ty) - if max_depth > 0 => - { - field - .value - .as_ref() - .map(|v| infer_record_type(v, term_env, max_depth - 1)) - .unwrap_or(UnifType::from_type(ty, term_env)) - } - ApparentType::Inferred(ty) | ApparentType::Approximated(ty) => { - UnifType::from_type(ty, term_env) - } - }; - - RecordRowsF::Extend { - row: UnifRecordRow { - id: *id, - typ: Box::new(uty), - }, - tail: Box::new(r.into()), - } - }, - ))), - ), - t => UnifType::from_apparent_type( - apparent_type(t, None, None), - &SimpleTermEnvironment::new(), - ), - } -} - -/// Deeply check whether a type contains a wildcard. -fn has_wildcards(ty: &Type) -> bool { - let mut has_wildcard = false; - ty.clone() - .traverse( - &mut |ty: Type| { - if ty.typ.is_wildcard() { - has_wildcard = true; - } - Ok::<_, Infallible>(ty) - }, - TraverseOrder::TopDown, - ) - .unwrap(); - has_wildcard -} - -/// Type of the parameter controlling instantiation of foralls. -/// -/// See [`instantiate_foralls`]. -#[derive(Copy, Clone, Debug, PartialEq)] -enum ForallInst { - Constant, - UnifVar, -} - -/// Instantiate the type variables which are quantified in head position with either unification -/// variables or type constants. -/// -/// For example, if `inst` is `Constant`, `forall a. forall b. a -> (forall c. b -> c)` is -/// transformed to `cst1 -> (forall c. cst2 -> c)` where `cst1` and `cst2` are fresh type -/// constants. This is used when typechecking `forall`s: all quantified type variables in head -/// position are replaced by rigid type constants, and the term is then typechecked normally. As -/// these constants cannot be unified with anything, this forces all the occurrences of a type -/// variable to be the same type. -/// -/// # Parameters -/// -/// - `state`: the unification state -/// - `ty`: the polymorphic type to instantiate -/// - `inst`: the type of instantiation, either by a type constant or by a unification variable -fn instantiate_foralls( - state: &mut State, - ctxt: &mut Context, - mut ty: UnifType, - inst: ForallInst, -) -> UnifType { - ty = ty.into_root(state.table); - - // We are instantiating a polymorphic type: it's precisely the place where we have to increment - // the variable level, to prevent already existing unification variables to unify with the - // rigid type variables introduced here. - // - // As this function can be called on monomorphic types, we only increment the level when we - // really introduce a new block of rigid type variables. - if matches!( - ty, - UnifType::Concrete { - typ: TypeF::Forall { .. }, - .. - } - ) { - ctxt.var_level.incr(); - } - - while let UnifType::Concrete { - typ: TypeF::Forall { - var, - var_kind, - body, - }, - .. - } = ty - { - let kind: VarKindDiscriminant = (&var_kind).into(); - - match var_kind { - VarKind::Type => { - let fresh_uid = state.table.fresh_type_var_id(ctxt.var_level); - let uvar = match inst { - ForallInst::Constant => UnifType::Constant(fresh_uid), - ForallInst::UnifVar => UnifType::UnifVar { - id: fresh_uid, - init_level: ctxt.var_level, - }, - }; - state.names.insert((fresh_uid, kind), var.ident()); - ty = body.subst(&var, &uvar); - } - VarKind::RecordRows { excluded } => { - let fresh_uid = state.table.fresh_rrows_var_id(ctxt.var_level); - let uvar = match inst { - ForallInst::Constant => UnifRecordRows::Constant(fresh_uid), - ForallInst::UnifVar => UnifRecordRows::UnifVar { - id: fresh_uid, - init_level: ctxt.var_level, - }, - }; - state.names.insert((fresh_uid, kind), var.ident()); - ty = body.subst(&var, &uvar); - - if inst == ForallInst::UnifVar { - state.constr.insert(fresh_uid, excluded); - } - } - VarKind::EnumRows { excluded } => { - let fresh_uid = state.table.fresh_erows_var_id(ctxt.var_level); - let uvar = match inst { - ForallInst::Constant => UnifEnumRows::Constant(fresh_uid), - ForallInst::UnifVar => UnifEnumRows::UnifVar { - id: fresh_uid, - init_level: ctxt.var_level, - }, - }; - state.names.insert((fresh_uid, kind), var.ident()); - ty = body.subst(&var, &uvar); - - if inst == ForallInst::UnifVar { - state.constr.insert(fresh_uid, excluded); - } - } - }; - } - - ty -} - -/// Get the type unification variable associated with a given wildcard ID. -fn get_wildcard_var( - table: &mut UnifTable, - var_level: VarLevel, - wildcard_vars: &mut Vec, - id: VarId, -) -> UnifType { - // If `id` is not in `wildcard_vars`, populate it with fresh vars up to `id` - if id >= wildcard_vars.len() { - wildcard_vars.extend((wildcard_vars.len()..=id).map(|_| table.fresh_type_uvar(var_level))); - } - wildcard_vars[id].clone() -} - -/// Convert a mapping from wildcard ID to type var, into a mapping from wildcard ID to concrete -/// type. -fn wildcard_vars_to_type(wildcard_vars: Vec, table: &UnifTable) -> Wildcards { - wildcard_vars - .into_iter() - .map(|var| var.into_type(table)) - .collect() -} - -/// A visitor trait for receiving callbacks during typechecking. -pub trait TypecheckVisitor { - /// Record the type of a term. - /// - /// It's possible for a single term to be visited multiple times, for example, if type - /// inference kicks in. - fn visit_term(&mut self, _term: &RichTerm, _ty: UnifType) {} - - /// Record the type of a bound identifier. - fn visit_ident(&mut self, _ident: &LocIdent, _new_type: UnifType) {} -} - -/// A do-nothing `TypeCheckVisitor` for when you don't want one. -impl TypecheckVisitor for () {} diff --git a/core/src/typecheck/operation.rs b/core/src/typecheck/operation.rs deleted file mode 100644 index d8cd5b3354..0000000000 --- a/core/src/typecheck/operation.rs +++ /dev/null @@ -1,688 +0,0 @@ -//! Typing of primitive operations. -use super::*; -use crate::position::TermPos; -use crate::{ - error::TypecheckError, - label::{Polarity, TypeVarData}, - term::{BinaryOp, NAryOp, RecordExtKind, UnaryOp}, - typ::TypeF, -}; -use crate::{mk_uty_arrow, mk_uty_enum, mk_uty_record}; - -/// Type of unary operations. -pub fn get_uop_type( - state: &mut State, - var_level: VarLevel, - op: &UnaryOp, -) -> Result<(UnifType, UnifType), TypecheckError> { - Ok(match op { - // forall a. bool -> a -> a -> a - UnaryOp::IfThenElse => { - let branches = state.table.fresh_type_uvar(var_level); - - ( - mk_uniftype::bool(), - mk_uty_arrow!(branches.clone(), branches.clone(), branches), - ) - } - // Dyn -> [| 'Number, 'Bool, 'String, 'Enum, 'Function, 'Array, 'Record, 'Label, - // 'ForeignId, 'Type, 'Other |] - UnaryOp::Typeof => ( - mk_uniftype::dynamic(), - mk_uty_enum!( - "Number", - "Bool", - "String", - "Enum", - "Function", - "CustomContract", - "Array", - "Record", - "Label", - "ForeignId", - "Type", - "Other" - ), - ), - // Bool -> Bool -> Bool - UnaryOp::BoolAnd | UnaryOp::BoolOr => { - (mk_uniftype::bool(), mk_uty_arrow!(TypeF::Bool, TypeF::Bool)) - } - // Bool -> Bool - UnaryOp::BoolNot => (mk_uniftype::bool(), mk_uniftype::bool()), - // forall a. Dyn -> a - UnaryOp::Blame => { - let res = state.table.fresh_type_uvar(var_level); - - (mk_uniftype::dynamic(), res) - } - // Dyn -> Polarity - UnaryOp::LabelPol => (mk_uniftype::dynamic(), mk_uty_enum!("Positive", "Negative")), - // forall rows. [| ; rows |] -> [| id ; rows |] - UnaryOp::EnumEmbed(id) => { - let row_var_id = state.table.fresh_erows_var_id(var_level); - let row = UnifEnumRows::UnifVar { - id: row_var_id, - init_level: var_level, - }; - - let domain = mk_uty_enum!(; row.clone()); - let codomain = mk_uty_enum!(*id; row); - - (domain, codomain) - } - // This should not happen, as a match primop is only produced during evaluation. - UnaryOp::TagsOnlyMatch { .. } => panic!("cannot typecheck match primop"), - // Morally, Label -> Label - // Dyn -> Dyn - UnaryOp::LabelFlipPol - | UnaryOp::LabelGoDom - | UnaryOp::LabelGoCodom - | UnaryOp::LabelGoArray - | UnaryOp::LabelGoDict => (mk_uniftype::dynamic(), mk_uniftype::dynamic()), - // forall rows a. { id: a | rows} -> a - UnaryOp::RecordAccess(id) => { - let rows = state.table.fresh_rrows_uvar(var_level); - let res = state.table.fresh_type_uvar(var_level); - - (mk_uty_record!((*id, res.clone()); rows), res) - } - // forall a b. Array a -> (a -> b) -> Array b - UnaryOp::ArrayMap => { - let a = state.table.fresh_type_uvar(var_level); - let b = state.table.fresh_type_uvar(var_level); - - let f_type = mk_uty_arrow!(a.clone(), b.clone()); - ( - mk_uniftype::array(a), - mk_uty_arrow!(f_type, mk_uniftype::array(b)), - ) - } - // forall a. Num -> (Num -> a) -> Array a - UnaryOp::ArrayGen => { - let a = state.table.fresh_type_uvar(var_level); - - let f_type = mk_uty_arrow!(TypeF::Number, a.clone()); - ( - mk_uniftype::num(), - mk_uty_arrow!(f_type, mk_uniftype::array(a)), - ) - } - // forall a b. { _ : a} -> (Str -> a -> b) -> { _ : b } - UnaryOp::RecordMap => { - // Assuming f has type Str -> a -> b, - // this has type Dict(a) -> Dict(b) - - let a = state.table.fresh_type_uvar(var_level); - let b = state.table.fresh_type_uvar(var_level); - - let f_type = mk_uty_arrow!(TypeF::String, a.clone(), b.clone()); - ( - mk_uniftype::dict(a), - mk_uty_arrow!(f_type, mk_uniftype::dict(b)), - ) - } - // forall a b. a -> b -> b - UnaryOp::Seq | UnaryOp::DeepSeq => { - let fst = state.table.fresh_type_uvar(var_level); - let snd = state.table.fresh_type_uvar(var_level); - - (fst, mk_uty_arrow!(snd.clone(), snd)) - } - // forall a. Array a -> Num - UnaryOp::ArrayLength => { - let ty_elt = state.table.fresh_type_uvar(var_level); - (mk_uniftype::array(ty_elt), mk_uniftype::num()) - } - // This should not happen, as ChunksConcat() is only produced during evaluation. - UnaryOp::ChunksConcat => panic!("cannot type ChunksConcat()"), - // forall a. { _: a } -> Array Str - UnaryOp::RecordFields(_) => { - let ty_a = state.table.fresh_type_uvar(var_level); - - ( - mk_uniftype::dict(ty_a), - mk_uniftype::array(mk_uniftype::str()), - ) - } - // forall a. { _: a } -> Array a - UnaryOp::RecordValues => { - let ty_a = state.table.fresh_type_uvar(var_level); - - (mk_uniftype::dict(ty_a.clone()), mk_uniftype::array(ty_a)) - } - // Str -> Str - UnaryOp::StringTrim => (mk_uniftype::str(), mk_uniftype::str()), - // Str -> Array Str - UnaryOp::StringChars => (mk_uniftype::str(), mk_uniftype::array(mk_uniftype::str())), - // Str -> Str - UnaryOp::StringUppercase => (mk_uniftype::str(), mk_uniftype::str()), - // Str -> Str - UnaryOp::StringLowercase => (mk_uniftype::str(), mk_uniftype::str()), - // Str -> Num - UnaryOp::StringLength => (mk_uniftype::str(), mk_uniftype::num()), - // Dyn -> Str - UnaryOp::ToString => (mk_uniftype::dynamic(), mk_uniftype::str()), - // Str -> Num - UnaryOp::NumberFromString => (mk_uniftype::str(), mk_uniftype::num()), - // Str -> < | a> for a rigid type variable a - UnaryOp::EnumFromString => ( - mk_uniftype::str(), - mk_uty_enum!(; state.table.fresh_erows_const(var_level)), - ), - // Str -> Str -> Bool - UnaryOp::StringIsMatch => ( - mk_uniftype::str(), - mk_uty_arrow!(mk_uniftype::str(), mk_uniftype::bool()), - ), - // Str -> Str -> {matched: Str, index: Num, groups: Array Str} - UnaryOp::StringFind => ( - mk_uniftype::str(), - mk_uty_arrow!( - mk_uniftype::str(), - mk_uty_record!( - ("matched", TypeF::String), - ("index", TypeF::Number), - ("groups", mk_uniftype::array(TypeF::String)) - ) - ), - ), - // String -> String -> Array { matched: String, index: Number, groups: Array String } - UnaryOp::StringFindAll => ( - mk_uniftype::str(), - mk_uty_arrow!( - mk_uniftype::str(), - mk_uniftype::array(mk_uty_record!( - ("matched", TypeF::String), - ("index", TypeF::Number), - ("groups", mk_uniftype::array(TypeF::String)) - )) - ), - ), - // Str -> Bool - UnaryOp::StringIsMatchCompiled(_) => (mk_uniftype::str(), mk_uniftype::bool()), - // Str -> {matched: Str, index: Num, groups: Array Str} - UnaryOp::StringFindCompiled(_) => ( - mk_uniftype::str(), - mk_uty_record!( - ("matched", TypeF::String), - ("index", TypeF::Number), - ("groups", mk_uniftype::array(TypeF::String)) - ), - ), - UnaryOp::StringFindAllCompiled(_) => ( - mk_uniftype::str(), - mk_uniftype::array(mk_uty_record!( - ("matched", TypeF::String), - ("index", TypeF::Number), - ("groups", mk_uniftype::array(TypeF::String)) - )), - ), - // Dyn -> Dyn - UnaryOp::Force { .. } => (mk_uniftype::dynamic(), mk_uniftype::dynamic()), - // forall a. a -> a - UnaryOp::RecDefault => { - let ty = state.table.fresh_type_uvar(var_level); - (ty.clone(), ty) - } - // forall a. a -> a - UnaryOp::RecForce => { - let ty = state.table.fresh_type_uvar(var_level); - (ty.clone(), ty) - } - UnaryOp::RecordEmptyWithTail => (mk_uniftype::dynamic(), mk_uniftype::dynamic()), - // forall a. { _ : a} -> { _ : a } - UnaryOp::RecordFreeze => { - let dict = mk_uniftype::dict(state.table.fresh_type_uvar(var_level)); - (dict.clone(), dict) - } - // forall a. Str -> a -> a - UnaryOp::Trace => { - let ty = state.table.fresh_type_uvar(var_level); - (mk_uniftype::str(), mk_uty_arrow!(ty.clone(), ty)) - } - // Morally: Lbl -> Lbl - // Actual: Dyn -> Dyn - UnaryOp::LabelPushDiag => (mk_uniftype::dynamic(), mk_uniftype::dynamic()), - // Str -> Dyn - #[cfg(feature = "nix-experimental")] - UnaryOp::EvalNix => (mk_uniftype::str(), mk_uniftype::dynamic()), - // Because the tag isn't fixed, we can't really provide a proper static type for this - // primop. - // This isn't a problem, as this operator is mostly internal and pattern matching should be - // used to destructure enum variants. - // Dyn -> Dyn - UnaryOp::EnumGetArg => (mk_uniftype::dynamic(), mk_uniftype::dynamic()), - // String -> (Dyn -> Dyn) - UnaryOp::EnumMakeVariant => ( - mk_uniftype::str(), - mk_uniftype::arrow(mk_uniftype::dynamic(), mk_uniftype::dynamic()), - ), - // Same as `EnumGetArg` just above. - // Dyn -> Dyn - UnaryOp::EnumGetTag => (mk_uniftype::dynamic(), mk_uniftype::dynamic()), - // Note that is_variant breaks parametricity, so it can't get a polymorphic type. - // Dyn -> Bool - UnaryOp::EnumIsVariant => (mk_uniftype::dynamic(), mk_uniftype::bool()), - // [crate::term::UnaryOp::PatternBranch] shouldn't appear anywhere in actual code, because its - // second argument can't be properly typechecked: it has unbound variables. However, it's - // not hard to come up with a vague working type for it, so we do. - // forall a. {_ : a} -> Dyn -> Dyn - UnaryOp::PatternBranch => { - let ty_elt = state.table.fresh_type_uvar(var_level); - ( - mk_uniftype::dict(ty_elt), - mk_uty_arrow!(mk_uniftype::dynamic(), mk_uniftype::dynamic()), - ) - } - // -> Dyn - UnaryOp::ContractCustom => (custom_contract_type(), mk_uniftype::dynamic()), - // -> Dyn -> Dyn - UnaryOp::ContractPostprocessResult => ( - custom_contract_ret_type(), - mk_uty_arrow!(mk_uniftype::dynamic(), mk_uniftype::dynamic()), - ), - // Number -> Number - UnaryOp::NumberCos - | UnaryOp::NumberSin - | UnaryOp::NumberTan - | UnaryOp::NumberArcCos - | UnaryOp::NumberArcSin - | UnaryOp::NumberArcTan => (mk_uniftype::num(), mk_uniftype::num()), - }) -} - -/// Type of a binary operation. -pub fn get_bop_type( - state: &mut State, - var_level: VarLevel, - op: &BinaryOp, -) -> Result<(UnifType, UnifType, UnifType), TypecheckError> { - Ok(match op { - // Num -> Num -> Num - BinaryOp::Plus | BinaryOp::Sub | BinaryOp::Mult | BinaryOp::Div | BinaryOp::Modulo => { - (mk_uniftype::num(), mk_uniftype::num(), mk_uniftype::num()) - } - // Sym -> Dyn -> Dyn -> Dyn - BinaryOp::Seal => ( - mk_uniftype::sym(), - mk_uniftype::dynamic(), - mk_uty_arrow!(TypeF::Dyn, TypeF::Dyn), - ), - // Str -> Str -> Str - BinaryOp::StringConcat => (mk_uniftype::str(), mk_uniftype::str(), mk_uniftype::str()), - // Ideally: Contract -> Label -> Dyn -> Dyn - // Currently: Dyn -> Dyn -> (Dyn -> Dyn) - BinaryOp::ContractApply => ( - mk_uniftype::dynamic(), - mk_uniftype::dynamic(), - mk_uty_arrow!(mk_uniftype::dynamic(), mk_uniftype::dynamic()), - ), - // Ideally: Contract -> Label -> Dyn -> - // Currently: Dyn -> Dyn -> (Dyn -> ) - BinaryOp::ContractCheck => ( - mk_uniftype::dynamic(), - mk_uniftype::dynamic(), - mk_uty_arrow!(mk_uniftype::dynamic(), custom_contract_ret_type()), - ), - // Ideally: -> Label -> Dyn - // Currently: -> Dyn -> Dyn - BinaryOp::LabelWithErrorData => ( - error_data_type(), - mk_uniftype::dynamic(), - mk_uniftype::dynamic(), - ), - // Sym -> Dyn -> Dyn -> Dyn - BinaryOp::Unseal => ( - mk_uniftype::sym(), - mk_uniftype::dynamic(), - mk_uty_arrow!(TypeF::Dyn, TypeF::Dyn), - ), - // forall a b. a -> b -> Bool - BinaryOp::Eq => ( - state.table.fresh_type_uvar(var_level), - state.table.fresh_type_uvar(var_level), - mk_uniftype::bool(), - ), - // Num -> Num -> Bool - BinaryOp::LessThan | BinaryOp::LessOrEq | BinaryOp::GreaterThan | BinaryOp::GreaterOrEq => { - (mk_uniftype::num(), mk_uniftype::num(), mk_uniftype::bool()) - } - // Str -> Dyn -> Dyn - BinaryOp::LabelGoField => ( - mk_uniftype::str(), - mk_uniftype::dynamic(), - mk_uniftype::dynamic(), - ), - // forall a. Str -> { _ : a} -> a - BinaryOp::RecordGet => { - let res = state.table.fresh_type_uvar(var_level); - - (mk_uniftype::str(), mk_uniftype::dict(res.clone()), res) - } - // forall a. Str -> {_ : a} -> a -> {_ : a} - BinaryOp::RecordInsert { - ext_kind: RecordExtKind::WithValue, - .. - } => { - let res = state.table.fresh_type_uvar(var_level); - ( - mk_uniftype::str(), - mk_uniftype::dict(res.clone()), - mk_uty_arrow!(res.clone(), mk_uniftype::dict(res)), - ) - } - // forall a. Str -> {_ : a} -> {_ : a} - BinaryOp::RecordInsert { - ext_kind: RecordExtKind::WithoutValue, - .. - } => { - let res = state.table.fresh_type_uvar(var_level); - ( - mk_uniftype::str(), - mk_uniftype::dict(res.clone()), - mk_uty_arrow!(res.clone(), mk_uniftype::dict(res)), - ) - } - // forall a. Str -> { _ : a } -> { _ : a} - BinaryOp::RecordRemove(_) => { - let res = state.table.fresh_type_uvar(var_level); - ( - mk_uniftype::str(), - mk_uniftype::dict(res.clone()), - mk_uniftype::dict(res), - ) - } - // forall a. Str -> {_: a} -> Bool - BinaryOp::RecordHasField(_) => { - let ty_elt = state.table.fresh_type_uvar(var_level); - ( - mk_uniftype::str(), - mk_uniftype::dict(ty_elt), - mk_uniftype::bool(), - ) - } - // forall a. Str -> {_: a} -> Bool - BinaryOp::RecordFieldIsDefined(_) => { - let ty_elt = state.table.fresh_type_uvar(var_level); - ( - mk_uniftype::str(), - mk_uniftype::dict(ty_elt), - mk_uniftype::bool(), - ) - } - // forall a. Array a -> Array a -> Array a - BinaryOp::ArrayConcat => { - let ty_elt = state.table.fresh_type_uvar(var_level); - let ty_array = mk_uniftype::array(ty_elt); - (ty_array.clone(), ty_array.clone(), ty_array) - } - // forall a. Array a -> Num -> a - BinaryOp::ArrayAt => { - let ty_elt = state.table.fresh_type_uvar(var_level); - ( - mk_uniftype::array(ty_elt.clone()), - mk_uniftype::num(), - ty_elt, - ) - } - // Dyn -> Dyn -> Dyn - BinaryOp::Merge(_) => ( - mk_uniftype::dynamic(), - mk_uniftype::dynamic(), - mk_uniftype::dynamic(), - ), - // -> Str -> Str - BinaryOp::Hash => ( - mk_uty_enum!("Md5", "Sha1", "Sha256", "Sha512"), - mk_uniftype::str(), - mk_uniftype::str(), - ), - // forall a. -> a -> Str - BinaryOp::Serialize => { - let ty_input = state.table.fresh_type_uvar(var_level); - ( - mk_uty_enum!("Json", "Yaml", "Toml"), - ty_input, - mk_uniftype::str(), - ) - } - // -> Str -> Dyn - BinaryOp::Deserialize => ( - mk_uty_enum!("Json", "Yaml", "Toml"), - mk_uniftype::str(), - mk_uniftype::dynamic(), - ), - // Num -> Num -> Num - BinaryOp::NumberArcTan2 | BinaryOp::NumberLog | BinaryOp::Pow => { - (mk_uniftype::num(), mk_uniftype::num(), mk_uniftype::num()) - } - // Str -> Str -> Bool - BinaryOp::StringContains => (mk_uniftype::str(), mk_uniftype::str(), mk_uniftype::bool()), - // Str -> Str -> - BinaryOp::StringCompare => ( - mk_uniftype::str(), - mk_uniftype::str(), - mk_uty_enum!("Lesser", "Equal", "Greater"), - ), - // Str -> Str -> Array Str - BinaryOp::StringSplit => ( - mk_uniftype::str(), - mk_uniftype::str(), - mk_uniftype::array(TypeF::String), - ), - // The first argument is a contract, the second is a label. - // forall a. Dyn -> Dyn -> Array a -> Array a - BinaryOp::ContractArrayLazyApp => { - let ty_elt = state.table.fresh_type_uvar(var_level); - let ty_array = mk_uniftype::array(ty_elt); - ( - mk_uniftype::dynamic(), - mk_uniftype::dynamic(), - mk_uty_arrow!(ty_array.clone(), ty_array), - ) - } - // The first argument is a label, the third is a contract. - // forall a. Dyn -> {_: a} -> Dyn -> {_: a} - BinaryOp::ContractRecordLazyApp => { - let ty_field = state.table.fresh_type_uvar(var_level); - let ty_dict = mk_uniftype::dict(ty_field); - ( - mk_uniftype::dynamic(), - ty_dict.clone(), - mk_uty_arrow!(mk_uniftype::dynamic(), ty_dict), - ) - } - // Morally: Str -> Lbl -> Lbl - // Actual: Str -> Dyn -> Dyn - BinaryOp::LabelWithMessage => ( - mk_uniftype::str(), - mk_uniftype::dynamic(), - mk_uniftype::dynamic(), - ), - // Morally: Array Str -> Lbl -> Lbl - // Actual: Array Str -> Dyn -> Dyn - BinaryOp::LabelWithNotes => ( - mk_uniftype::array(TypeF::String), - mk_uniftype::dynamic(), - mk_uniftype::dynamic(), - ), - // Morally: Str -> Lbl -> Lbl - // Actual: Str -> Dyn -> Dyn - BinaryOp::LabelAppendNote => ( - mk_uniftype::str(), - mk_uniftype::dynamic(), - mk_uniftype::dynamic(), - ), - // Morally: Sym -> Lbl -> TypeVarData - // Actual: Sym -> Dyn -> TypeVarData - BinaryOp::LabelLookupTypeVar => ( - mk_uniftype::sym(), - mk_uniftype::dynamic(), - TypeVarData::unif_type(), - ), - // {_ : a} -> {_ : a} - // -> { - // left_only: {_ : a}, - // right_only: {_ : a}, - // left_center: {_ : a}, - // right_center: {_ : a}, - // } - BinaryOp::RecordSplitPair => { - let elt = state.table.fresh_type_uvar(var_level); - let dict = mk_uniftype::dict(elt.clone()); - - let split_result = mk_uty_record!( - ("left_only", dict.clone()), - ("right_only", dict.clone()), - ("left_center", dict.clone()), - ("right_center", dict.clone()) - ); - - (dict.clone(), dict, split_result) - } - // {_ : a} -> {_ : a} -> {_ : a} - BinaryOp::RecordDisjointMerge => { - let elt = state.table.fresh_type_uvar(var_level); - let dict = mk_uniftype::dict(elt.clone()); - - (dict.clone(), dict.clone(), dict) - } - }) -} - -pub fn get_nop_type( - state: &mut State, - var_level: VarLevel, - op: &NAryOp, -) -> Result<(Vec, UnifType), TypecheckError> { - Ok(match op { - // Str -> Str -> Str -> Str - NAryOp::StringReplace | NAryOp::StringReplaceRegex => ( - vec![mk_uniftype::str(), mk_uniftype::str(), mk_uniftype::str()], - mk_uniftype::str(), - ), - // Str -> Num -> Num -> Str - NAryOp::StringSubstr => ( - vec![mk_uniftype::str(), mk_uniftype::num(), mk_uniftype::num()], - mk_uniftype::str(), - ), - // Dyn -> Dyn -> Dyn -> Dyn -> Dyn - NAryOp::RecordSealTail => ( - vec![ - mk_uniftype::dynamic(), - mk_uniftype::dynamic(), - mk_uniftype::dict(mk_uniftype::dynamic()), - mk_uniftype::dict(mk_uniftype::dynamic()), - ], - mk_uniftype::dynamic(), - ), - // Dyn -> Dyn -> Dyn -> Dyn - NAryOp::RecordUnsealTail => ( - vec![ - mk_uniftype::dynamic(), - mk_uniftype::dynamic(), - mk_uniftype::dict(mk_uniftype::dynamic()), - ], - mk_uniftype::dynamic(), - ), - // Num -> Num -> Array a -> Array a - NAryOp::ArraySlice => { - let element_type = state.table.fresh_type_uvar(var_level); - - ( - vec![ - mk_uniftype::num(), - mk_uniftype::num(), - mk_uniftype::array(element_type.clone()), - ], - mk_uniftype::array(element_type), - ) - } - // Morally: Label -> Record -> Record -> Record - // Actual: Dyn -> Dyn -> Dyn -> Dyn - NAryOp::MergeContract => ( - vec![ - mk_uniftype::dynamic(), - mk_uniftype::dynamic(), - mk_uniftype::dynamic(), - ], - mk_uniftype::dynamic(), - ), - // Morally: Sym -> Polarity -> Lbl -> Lbl - // Actual: Sym -> Polarity -> Dyn -> Dyn - NAryOp::LabelInsertTypeVar => ( - vec![ - mk_uniftype::sym(), - Polarity::unif_type(), - mk_uniftype::dynamic(), - ], - mk_uniftype::dynamic(), - ), - }) -} - -/// The type of a custom contract. In nickel syntax, the returned type is: -/// -/// ```nickel -/// Dyn -> Dyn -> [| -/// 'Ok Dyn, -/// 'Error { message | String | optional, notes | Array String | optional } -/// |] -/// ``` -pub fn custom_contract_type() -> UnifType { - mk_uty_arrow!( - mk_uniftype::dynamic(), - mk_uniftype::dynamic(), - custom_contract_ret_type() - ) -} - -/// The return type of a custom contract. See [custom_contract_type]. -/// -/// ```nickel -/// [| -/// 'Ok Dyn, -/// 'Error { message | String | optional, notes | Array String | optional } -/// |] -/// ``` -pub fn custom_contract_ret_type() -> UnifType { - mk_uty_enum!(("Ok", mk_uniftype::dynamic()), ("Error", error_data_type())) -} - -/// The type of error data that can be returned by a custom contract: -/// -/// ```nickel -/// { -/// message -/// | String -/// | optional, -/// notes -/// | Array String -/// | optional -/// } -/// ``` -fn error_data_type() -> UnifType { - use crate::term::make::builder; - - let error_data = builder::Record::new() - .field("message") - .optional() - .contract(TypeF::String) - .no_value() - .field("notes") - .contract(Type { - typ: TypeF::Array(Box::new(Type { - typ: TypeF::String, - pos: TermPos::None, - })), - pos: TermPos::None, - }) - .optional() - .no_value(); - - UnifType::concrete(TypeF::Contract(( - error_data.build(), - SimpleTermEnvironment::new(), - ))) -} diff --git a/core/src/typecheck/pattern.rs b/core/src/typecheck/pattern.rs deleted file mode 100644 index 5dbfd54439..0000000000 --- a/core/src/typecheck/pattern.rs +++ /dev/null @@ -1,649 +0,0 @@ -use crate::{ - error::TypecheckError, - identifier::{Ident, LocIdent}, - mk_uty_record_row, - term::pattern::*, - typ::{EnumRowsF, RecordRowsF, TypeF}, -}; - -use super::*; - -/// A list of pattern variables and their associated type. -pub type TypeBindings = Vec<(LocIdent, UnifType)>; - -/// An element of a pattern path. A pattern path is a sequence of steps that can be used to -/// uniquely locate a sub-pattern within a pattern. -/// -/// For example, in the pattern `{foo={bar='Baz arg}}`: -/// -/// - The path of the full pattern within itself is the empty path. -/// - The path of the `arg` pattern is `[Field("foo"), Field("bar"), Variant]`. -#[derive(Debug, Clone, PartialEq, Eq, Copy, Hash)] -pub enum PatternPathElem { - Field(Ident), - Array(usize), - Variant, -} - -pub type PatternPath = Vec; - -/// The working state of [PatternType::pattern_types_inj]. -pub(super) struct PatTypeState<'a> { - /// The list of pattern variables introduced so far and their inferred type. - bindings: &'a mut TypeBindings, - /// The list of enum row tail variables that are left open when typechecking a match expression. - enum_open_tails: &'a mut Vec<(PatternPath, UnifEnumRows)>, - /// Record, as a field path, the position of wildcard pattern encountered in a record. This - /// impact the final type of the pattern, as a wildcard pattern makes the corresponding row - /// open. - wildcard_pat_paths: &'a mut HashSet, -} - -/// Return value of [PatternTypes::pattern_types], which stores the overall type of a pattern, -/// together with the type of its bindings and additional information for the typechecking of match -/// expressions. -#[derive(Debug, Clone)] -pub struct PatternTypeData { - /// The type of the pattern. - pub typ: T, - /// A list of pattern variables and their associated type. - pub bindings: Vec<(LocIdent, UnifType)>, - /// A list of enum row tail variables that are left open when typechecking a match expression. - /// - /// Those variables (or their descendent in a row type) might need to be closed after the type - /// of all the patterns of a match expression have been unified, depending on the presence of a - /// wildcard pattern. The path of the corresponding sub-pattern is stored as well, since enum - /// patterns in different positions might need different treatment. For example: - /// - /// ```nickel - /// match { - /// 'Foo ('Bar x) => , - /// 'Foo ('Qux x) => , - /// _ => - /// } - /// ``` - /// - /// The presence of a default case means that the row variables of top-level enum patterns - /// might stay open. However, the type corresponding to the sub-patterns `'Bar x` and `'Qux x` - /// must be closed, because this match expression can't handle `'Foo ('Other 0)`. The type of - /// the match expression is thus `[| 'Foo [| 'Bar: a, 'Qux: b |]; c|] -> d`. - /// - /// Wildcard can occur anywhere, so the previous case can also happen within a record pattern: - /// - /// ```nickel - /// match { - /// {foo = 'Bar x} => , - /// {foo = 'Qux x} => , - /// {foo = _} => , - /// } - /// ``` - /// - /// Similarly, the type of the match expression is `{ foo: [| 'Bar: a, 'Qux: b; c |] } -> e`. - /// - /// See [^typechecking-match-expression] in [typecheck] for more details. - pub enum_open_tails: Vec<(PatternPath, UnifEnumRows)>, - /// Paths of the occurrence of wildcard patterns encountered. This is used to determine which - /// tails in [Self::enum_open_tails] should be left open. - pub wildcard_occurrences: HashSet, -} -/// Close all the enum row types left open when typechecking a match expression. Special case of -/// `close_enums` for a single destructuring pattern (thus, where wildcard occurrences are not -/// relevant). -pub fn close_all_enums(enum_open_tails: Vec<(PatternPath, UnifEnumRows)>, state: &mut State) { - close_enums(enum_open_tails, &HashSet::new(), state); -} - -/// Close all the enum row types left open when typechecking a match expression, unless we recorded -/// a wildcard pattern somewhere in the same position. -pub fn close_enums( - enum_open_tails: Vec<(PatternPath, UnifEnumRows)>, - wildcard_occurrences: &HashSet, - state: &mut State, -) { - // Note: both for this function and for `close_enums`, for a given pattern path, all the tail - // variables should ultimately be part of the same enum type, and we just need to close it - // once. We might thus save a bit of work if we kept equivalence classes of tuples (path, tail) - // (equality being given by the equality of paths). Closing one arbitrary member per class - // should then be enough. It's not obvious that this would make any difference in practice, - // though. - for tail in enum_open_tails - .into_iter() - .filter_map(|(path, tail)| (!wildcard_occurrences.contains(&path)).then_some(tail)) - { - close_enum(tail, state); - } -} - -/// Take an enum row, find its final tail (in case of multiple indirection through unification -/// variables) and close it if it's a free unification variable. -fn close_enum(tail: UnifEnumRows, state: &mut State) { - let root = tail.into_root(state.table); - - if let UnifEnumRows::UnifVar { id, .. } = root { - // We don't need to perform any variable level checks when unifying a free - // unification variable with a ground type - state - .table - .assign_erows(id, UnifEnumRows::concrete(EnumRowsF::Empty)); - } else { - let tail = root.iter().find_map(|row_item| { - match row_item { - GenericUnifEnumRowsIteratorItem::TailUnifVar { id, init_level } => { - Some(UnifEnumRows::UnifVar { id, init_level }) - } - GenericUnifEnumRowsIteratorItem::TailVar(_) - | GenericUnifEnumRowsIteratorItem::TailConstant(_) => { - // While unifying open enum rows coming from a pattern, we expect to always - // extend the enum row with other open rows such that the result should always - // stay open. So we expect to find a unification variable at the end of the - // enum row. - // - // But in fact, all the tails for a given pattern path will point to the same - // enum row, so it might have been closed already by a previous call to - // `close_enum`, and that's fine. On the other hand, we should never encounter - // a rigid type variable here (or a non-substituted type variable, although it - // has nothing to do with patterns), so if we reach this point, something is - // wrong with the typechecking of match expression. - debug_assert!(false); - - None - } - _ => None, - } - }); - - if let Some(tail) = tail { - close_enum(tail, state) - } - } -} - -pub(super) trait PatternTypes { - /// The type produced by the pattern. Depending on the nature of the pattern, this type may - /// vary: for example, a record pattern will produce record rows, while a general pattern will - /// produce a general [super::UnifType] - type PatType; - - /// Builds the type associated to the whole pattern, as well as the types associated to each - /// binding introduced by this pattern. When matching a value against a pattern in a statically - /// typed code, either by destructuring or by applying a match expression, the type of the - /// value will be checked against the type generated by `pattern_type` and the bindings will be - /// added to the type environment. - /// - /// The type of each "leaf" identifier will be assigned based on the `mode` argument. The - /// current possibilities are for each leaf to have type `Dyn`, to use an explicit type - /// annotation, or to be assigned a fresh unification variable. - fn pattern_types( - &self, - state: &mut State, - ctxt: &Context, - mode: TypecheckMode, - ) -> Result, TypecheckError> { - let mut bindings = Vec::new(); - let mut enum_open_tails = Vec::new(); - let mut wildcard_pat_paths = HashSet::new(); - - let typ = self.pattern_types_inj( - &mut PatTypeState { - bindings: &mut bindings, - enum_open_tails: &mut enum_open_tails, - wildcard_pat_paths: &mut wildcard_pat_paths, - }, - Vec::new(), - state, - ctxt, - mode, - )?; - - Ok(PatternTypeData { - typ, - bindings, - enum_open_tails, - wildcard_occurrences: wildcard_pat_paths, - }) - } - - /// Same as `pattern_types`, but inject the bindings in a working vector instead of returning - /// them. Implementors should implement this method whose signature avoids creating and - /// combining many short-lived vectors when walking recursively through a pattern. - fn pattern_types_inj( - &self, - pt_state: &mut PatTypeState, - path: PatternPath, - state: &mut State, - ctxt: &Context, - mode: TypecheckMode, - ) -> Result; -} - -impl PatternTypes for RecordPattern { - type PatType = UnifRecordRows; - - fn pattern_types_inj( - &self, - pt_state: &mut PatTypeState, - path: PatternPath, - state: &mut State, - ctxt: &Context, - mode: TypecheckMode, - ) -> Result { - let tail = if self.is_open() { - match mode { - // We use a dynamic tail here since we're in walk mode, - // but if/when we remove dynamic record tails this could - // likely be made an empty tail with no impact. - TypecheckMode::Walk => mk_uty_record_row!(; RecordRowsF::TailDyn), - TypecheckMode::Enforce => state.table.fresh_rrows_uvar(ctxt.var_level), - } - } else { - UnifRecordRows::Concrete { - rrows: RecordRowsF::Empty, - var_levels_data: VarLevelsData::new_no_uvars(), - } - }; - - if let TailPattern::Capture(rest) = self.tail { - pt_state - .bindings - .push((rest, UnifType::concrete(TypeF::Record(tail.clone())))); - } - - self.patterns - .iter() - .map(|field_pat| field_pat.pattern_types_inj(pt_state, path.clone(), state, ctxt, mode)) - .try_fold(tail, |tail, row: Result| { - Ok(UnifRecordRows::concrete(RecordRowsF::Extend { - row: row?, - tail: Box::new(tail), - })) - }) - } -} - -impl PatternTypes for ArrayPattern { - type PatType = UnifType; - - fn pattern_types_inj( - &self, - pt_state: &mut PatTypeState, - path: PatternPath, - state: &mut State, - ctxt: &Context, - mode: TypecheckMode, - ) -> Result { - // We allocate a fresh unification variable and unify it with the type of each element - // pattern in enforce mode. - // - // In walk mode, we still iterate through the sub patterns to populate the bindings, but we - // eschew unification, which might fail if the elements are heterogeneous (say two record - // patterns with different shapes). In this case, we just return `Dyn` as the element type. - let elem_type = match mode { - TypecheckMode::Enforce => state.table.fresh_type_uvar(ctxt.var_level), - TypecheckMode::Walk => mk_uniftype::dynamic(), - }; - - for (idx, subpat) in self.patterns.iter().enumerate() { - let mut path = path.clone(); - path.push(PatternPathElem::Array(idx)); - - let subpat_type = subpat.pattern_types_inj(pt_state, path, state, ctxt, mode)?; - - if let TypecheckMode::Enforce = mode { - elem_type - .clone() - .unify(subpat_type, state, ctxt) - .map_err(|e| e.into_typecheck_err(state, self.pos))?; - } - } - - if let TailPattern::Capture(rest) = &self.tail { - pt_state - .bindings - .push((*rest, mk_uniftype::array(elem_type.clone()))); - } - - Ok(elem_type) - } -} - -impl PatternTypes for Pattern { - type PatType = UnifType; - - fn pattern_types_inj( - &self, - pt_state: &mut PatTypeState, - path: PatternPath, - state: &mut State, - ctxt: &Context, - mode: TypecheckMode, - ) -> Result { - let typ = self - .data - .pattern_types_inj(pt_state, path, state, ctxt, mode)?; - - if let Some(alias) = self.alias { - pt_state.bindings.push((alias, typ.clone())); - } - - Ok(typ) - } -} - -// Depending on the mode, returns the type affected to patterns that match any value (`Any` and -// `Wildcard`): `Dyn` in walk mode, a fresh unification variable in enforce mode. -fn any_type(mode: TypecheckMode, state: &mut State, ctxt: &Context) -> UnifType { - match mode { - TypecheckMode::Walk => mk_uniftype::dynamic(), - TypecheckMode::Enforce => state.table.fresh_type_uvar(ctxt.var_level), - } -} - -impl PatternTypes for PatternData { - type PatType = UnifType; - - fn pattern_types_inj( - &self, - pt_state: &mut PatTypeState, - path: PatternPath, - state: &mut State, - ctxt: &Context, - mode: TypecheckMode, - ) -> Result { - match self { - PatternData::Wildcard => { - pt_state.wildcard_pat_paths.insert(path); - Ok(any_type(mode, state, ctxt)) - } - PatternData::Any(id) => { - let typ = any_type(mode, state, ctxt); - pt_state.bindings.push((*id, typ.clone())); - - Ok(typ) - } - PatternData::Record(record_pat) => Ok(UnifType::concrete(TypeF::Record( - record_pat.pattern_types_inj(pt_state, path, state, ctxt, mode)?, - ))), - PatternData::Array(array_pat) => Ok(mk_uniftype::array( - array_pat.pattern_types_inj(pt_state, path, state, ctxt, mode)?, - )), - PatternData::Enum(enum_pat) => { - let row = enum_pat.pattern_types_inj(pt_state, path.clone(), state, ctxt, mode)?; - // We elaborate the type `[| row; a |]` where `a` is a fresh enum rows unification - // variable registered in `enum_open_tails`. - let tail = state.table.fresh_erows_uvar(ctxt.var_level); - pt_state.enum_open_tails.push((path, tail.clone())); - - Ok(UnifType::concrete(TypeF::Enum(UnifEnumRows::concrete( - EnumRowsF::Extend { - row, - tail: Box::new(tail), - }, - )))) - } - PatternData::Constant(constant_pat) => { - constant_pat.pattern_types_inj(pt_state, path, state, ctxt, mode) - } - PatternData::Or(or_pat) => or_pat.pattern_types_inj(pt_state, path, state, ctxt, mode), - } - } -} - -impl PatternTypes for ConstantPattern { - type PatType = UnifType; - - fn pattern_types_inj( - &self, - pt_state: &mut PatTypeState, - path: PatternPath, - state: &mut State, - ctxt: &Context, - mode: TypecheckMode, - ) -> Result { - self.data - .pattern_types_inj(pt_state, path, state, ctxt, mode) - } -} - -impl PatternTypes for ConstantPatternData { - type PatType = UnifType; - - fn pattern_types_inj( - &self, - _pt_state: &mut PatTypeState, - _path: PatternPath, - _state: &mut State, - _ctxt: &Context, - _mode: TypecheckMode, - ) -> Result { - Ok(match self { - ConstantPatternData::Bool(_) => UnifType::concrete(TypeF::Bool), - ConstantPatternData::Number(_) => UnifType::concrete(TypeF::Number), - ConstantPatternData::String(_) => UnifType::concrete(TypeF::String), - ConstantPatternData::Null => UnifType::concrete(TypeF::Dyn), - }) - } -} - -impl PatternTypes for FieldPattern { - type PatType = UnifRecordRow; - - fn pattern_types_inj( - &self, - pt_state: &mut PatTypeState, - mut path: PatternPath, - state: &mut State, - ctxt: &Context, - mode: TypecheckMode, - ) -> Result { - path.push(PatternPathElem::Field(self.matched_id.ident())); - - // If there is a static type annotation in a nested record patterns then we need to unify - // them with the pattern type we've built to ensure (1) that they're mutually compatible - // and (2) that we assign the annotated types to the right unification variables. - let ty_row = match (&self.annotation.typ, &self.pattern.data, mode) { - // However, in walk mode, we only do that when the nested pattern isn't a leaf (i.e. - // `Any` or `Wildcard`) for backward-compatibility reasons. - // - // Before this function was refactored, Nickel has been allowing things like `let {foo - // : Number} = {foo = 1} in foo` in walk mode, which would fail to typecheck with the - // generic approach: the pattern is parsed as `{foo : Number = foo}`, the second - // occurrence of `foo` gets type `Dyn` in walk mode, but `Dyn` fails to unify with - // `Number`. In this case, we don't recursively call `pattern_types_inj` in the first - // place and just declare that the type of `foo` is `Number`. - // - // This special case should probably be ruled out, requiring the users to use `let {foo - // | Number}` instead, at least outside of a statically typed code block. But before - // this happens, we special case the old behavior and eschew unification. - (Some(annot_ty), PatternData::Any(id), TypecheckMode::Walk) => { - let ty_row = UnifType::from_type(annot_ty.typ.clone(), &ctxt.term_env); - pt_state.bindings.push((*id, ty_row.clone())); - ty_row - } - (Some(annot_ty), PatternData::Wildcard, TypecheckMode::Walk) => { - UnifType::from_type(annot_ty.typ.clone(), &ctxt.term_env) - } - (Some(annot_ty), _, _) => { - let pos = annot_ty.typ.pos; - let annot_uty = UnifType::from_type(annot_ty.typ.clone(), &ctxt.term_env); - - let ty_row = self - .pattern - .pattern_types_inj(pt_state, path, state, ctxt, mode)?; - - ty_row - .clone() - .unify(annot_uty, state, ctxt) - .map_err(|e| e.into_typecheck_err(state, pos))?; - - ty_row - } - _ => self - .pattern - .pattern_types_inj(pt_state, path, state, ctxt, mode)?, - }; - - Ok(UnifRecordRow { - id: self.matched_id, - typ: Box::new(ty_row), - }) - } -} - -impl PatternTypes for EnumPattern { - type PatType = UnifEnumRow; - - fn pattern_types_inj( - &self, - pt_state: &mut PatTypeState, - mut path: PatternPath, - state: &mut State, - ctxt: &Context, - mode: TypecheckMode, - ) -> Result { - let typ_arg = self - .pattern - .as_ref() - .map(|pat| { - path.push(PatternPathElem::Variant); - pat.pattern_types_inj(pt_state, path, state, ctxt, mode) - }) - .transpose()? - .map(Box::new); - - Ok(UnifEnumRow { - id: self.tag, - typ: typ_arg, - }) - } -} - -impl PatternTypes for OrPattern { - type PatType = UnifType; - - fn pattern_types_inj( - &self, - pt_state: &mut PatTypeState, - path: PatternPath, - state: &mut State, - ctxt: &Context, - mode: TypecheckMode, - ) -> Result { - // When checking a sequence of or-patterns, we must combine their open tails and wildcard - // pattern positions - in fact, when typechecking a whole match expression, this is exactly - // what the typechecker is doing: it merges all those data. And a match expression is, - // similarly to an or-pattern, a disjunction of patterns. - // - // However, the treatment of bindings is different. If any of the branch in an or-pattern - // matches, the same code path (the match branch) will be run, and thus they must agree on - // pattern variables. Which means: - // - // 1. All pattern branches must have the same set of variables - // 2. Each variable must have a compatible type across all or-pattern branches - // - // To do so, we call to `pattern_types_inj` with a fresh vector of bindings, so that we can - // post-process them afterward (enforcing 1. and 2. above) before actually adding them to - // the original overall bindings. - // - // `bindings` stores, for each or-pattern branch, the inferred type of the whole branch, - // the generated bindings and the position (the latter for error reporting). - let bindings: Result, _> = self - .patterns - .iter() - .map(|pat| -> Result<_, TypecheckError> { - let mut fresh_bindings = Vec::new(); - - let mut local_state = PatTypeState { - bindings: &mut fresh_bindings, - enum_open_tails: pt_state.enum_open_tails, - wildcard_pat_paths: pt_state.wildcard_pat_paths, - }; - - let typ = - pat.pattern_types_inj(&mut local_state, path.clone(), state, ctxt, mode)?; - - // We sort the bindings to check later that they are the same in all branches - fresh_bindings.sort_by_key(|(id, _typ)| *id); - - Ok((typ, fresh_bindings, pat.pos)) - }) - .collect(); - - let mut it = bindings?.into_iter(); - - // We need a reference set of variables (and their types for unification). We just pick the - // first bindings of the list. - let Some((model_typ, model, _pos)) = it.next() else { - // We should never generate empty `or` sequences (it's not possible to write them in - // the source language, at least). However, it doesn't cost much to support them: such - // a pattern never matches anything. Thus, we return the bottom type encoded as `forall - // a. a`. - let free_var = Ident::from("a"); - - return Ok(UnifType::concrete(TypeF::Forall { - var: free_var.into(), - var_kind: VarKind::Type, - body: Box::new(UnifType::concrete(TypeF::Var(free_var))), - })); - }; - - for (typ, pat_bindings, pos) in it { - if model.len() != pat_bindings.len() { - // We need to arbitrary choose a variable to report. We take the last one of the - // longest list, which is guaranteed to not be present in all branches - let witness = if model.len() > pat_bindings.len() { - // unwrap(): model.len() > pat_bindings.len() >= 0 - model.last().unwrap().0 - } else { - // unwrap(): model.len() <= pat_bindings.len() and (by the outer-if) - // pat_bindings.len() != mode.len(), so: - // 0 <= model.len() < pat_bindings.len() - pat_bindings.last().unwrap().0 - }; - - return Err(TypecheckError::OrPatternVarsMismatch { - var: witness, - pos: self.pos, - }); - } - - // We unify the type of the first or-branch with the current or-branch, to make sure - // all the subpatterns are matching values of the same type - if let TypecheckMode::Enforce = mode { - model_typ - .clone() - .unify(typ, state, ctxt) - .map_err(|e| e.into_typecheck_err(state, pos))?; - } - - // Finally, we unify the type of the bindings - for (idx, (id, typ)) in pat_bindings.into_iter().enumerate() { - let (model_id, model_ty) = &model[idx]; - - if *model_id != id { - // Once again, we must arbitrarily pick a variable to report. We take the - // smaller one, which is guaranteed to be missing (indeed, the greater one - // could still appear later in the other list, but the smaller is necessarily - // missing in the list with the greater one) - return Err(TypecheckError::OrPatternVarsMismatch { - var: std::cmp::min(*model_id, id), - pos: self.pos, - }); - } - - if let TypecheckMode::Enforce = mode { - model_ty - .clone() - .unify(typ, state, ctxt) - .map_err(|e| e.into_typecheck_err(state, id.pos))?; - } - } - } - - // Once we have checked that all the bound variables are the same and we have unified their - // types, we can add them to the overall bindings (since they are unified, it doesn't - // matter which type we use - so we just reuse the model, which is still around) - pt_state.bindings.extend(model); - - Ok(model_typ) - } -} diff --git a/core/src/typecheck/reporting.rs b/core/src/typecheck/reporting.rs deleted file mode 100644 index e95f7599c5..0000000000 --- a/core/src/typecheck/reporting.rs +++ /dev/null @@ -1,238 +0,0 @@ -//! Helpers to convert a `TypeWrapper` to a human-readable `Type` representation for error -//! reporting. -use super::*; - -/// A name registry used to replace unification variables and type constants with human-readable -/// and distinct names. -pub struct NameReg { - /// Currently allocated names, including both variables written by the user and generated - /// names. - names: NameTable, - /// A reverse name table, always kept in sync with `names`, in order to efficiently check if a - /// name is already taken. - taken: HashSet, - /// Counter used to generate fresh letters for unification variables. - var_count: usize, - /// Counter used to generate fresh letters for type constants. - cst_count: usize, -} - -impl NameReg { - /// Create a new registry from an initial table corresponding to user-written type constants. - pub fn new(names: NameTable) -> Self { - let taken = names.values().copied().collect(); - - NameReg { - names, - taken, - var_count: 0, - cst_count: 0, - } - } - - pub fn taken(&self, name: &str) -> bool { - self.taken.contains(&name.into()) - } - - fn insert(&mut self, var_id: VarId, discriminant: VarKindDiscriminant, name: Ident) { - self.names.insert((var_id, discriminant), name); - self.taken.insert(name); - } - - /// Create a fresh name candidate for a type variable or a type constant. - /// - /// Used to convert a unification type to a human-readable representation. - /// - /// To select a candidate, first check in `names` if the variable or the constant corresponds - /// to a type variable written by the user. If it is, return the name of the variable. - /// Otherwise, use the given counter to generate a new single letter. - /// - /// A generated name is clearly not necessarily unique. [`select_uniq`] must then be applied. - fn gen_candidate_name( - names: &NameTable, - counter: &mut usize, - id: VarId, - kind: VarKindDiscriminant, - ) -> String { - match names.get(&(id, kind)) { - // First check if that constant or variable was introduced by a forall. If it was, try - // to use the same name. - Some(orig) => format!("{orig}"), - None => { - //Otherwise, generate a new character - let next = *counter; - *counter += 1; - - let prefix = match kind { - VarKindDiscriminant::Type => "", - VarKindDiscriminant::EnumRows => "erows_", - VarKindDiscriminant::RecordRows => "rrows_", - }; - let character = std::char::from_u32(('a' as u32) + ((next % 26) as u32)).unwrap(); - format!("{prefix}{character}") - } - } - } - - /// Select a name distinct from all the others, starting from a candidate name for a type - /// variable or a type constant. Insert the corresponding name in the name table. - /// - /// If the name is already taken, it just iterates by adding a numeric suffix `1`, `2`, .., and - /// so on until a free name is found. See `var_to_type` and `cst_to_type`. - fn select_uniq(&mut self, mut name: String, id: VarId, kind: VarKindDiscriminant) -> Ident { - // To avoid clashing with already picked names, we add a numeric suffix to the picked - // letter. - if self.taken(&name) { - let mut suffix = 1; - - name = format!("{name}{suffix}"); - while self.taken(&name) { - suffix += 1; - } - } - - let sym = Ident::from(name); - self.insert(id, kind, sym); - sym - } - - /// Either retrieve or generate a new fresh name for a unification variable for error reporting, - /// and wrap it as an identifier. Unification variables are named `_a`, `_b`, .., `_a1`, `_b1`, - /// .. and so on. - pub fn gen_var_name(&mut self, id: VarId, kind: VarKindDiscriminant) -> Ident { - self.names.get(&(id, kind)).cloned().unwrap_or_else(|| { - // Select a candidate name and add a "_" prefix - let candidate = format!( - "_{}", - Self::gen_candidate_name(&self.names, &mut self.var_count, id, kind) - ); - // Add a suffix to make it unique if it has already been picked - self.select_uniq(candidate, id, kind) - }) - } - - /// Either retrieve or generate a new fresh name for a constant for error reporting, and wrap it - /// as type variable. Constant are named `a`, `b`, .., `a1`, `b1`, .. and so on. - pub fn gen_cst_name(&mut self, id: VarId, kind: VarKindDiscriminant) -> Ident { - self.names.get(&(id, kind)).cloned().unwrap_or_else(|| { - // Select a candidate name - let candidate = Self::gen_candidate_name(&self.names, &mut self.cst_count, id, kind); - // Add a suffix to make it unique if it has already been picked - self.select_uniq(candidate, id, kind) - }) - } -} - -pub trait ToType { - /// The target type to convert to. If `Self` is `UnifXXX`, then `Target` is `XXX`. - type Target; - - /// Extract a concrete type corresponding to a unification type for error reporting purpose, - /// given a registry of currently allocated names. - /// - /// As opposed to [`crate::typ::Type::from`], free unification variables and type constants are - /// replaced by type variables which names are determined by this name registry. - /// - /// When reporting error, we want to distinguish occurrences of unification variables and type - /// constants in a human-readable way. - fn to_type(self, reg: &mut NameReg, table: &UnifTable) -> Self::Target; -} - -impl ToType for UnifType { - type Target = Type; - - fn to_type(self, reg: &mut NameReg, table: &UnifTable) -> Self::Target { - let ty = self.into_root(table); - - match ty { - UnifType::UnifVar { id, .. } => { - Type::from(TypeF::Var(reg.gen_var_name(id, VarKindDiscriminant::Type))) - } - UnifType::Constant(id) => { - Type::from(TypeF::Var(reg.gen_cst_name(id, VarKindDiscriminant::Type))) - } - UnifType::Concrete { typ, .. } => { - let mapped = typ.map_state( - |btyp, reg| Box::new(btyp.to_type(reg, table)), - |rrows, reg| rrows.to_type(reg, table), - |erows, reg| erows.to_type(reg, table), - |(ctr, _env), _reg| ctr, - reg, - ); - Type::from(mapped) - } - } - } -} - -impl ToType for UnifRecordRows { - type Target = RecordRows; - - fn to_type(self, reg: &mut NameReg, table: &UnifTable) -> Self::Target { - let rrows = self.into_root(table); - - match rrows { - UnifRecordRows::UnifVar { id, .. } => RecordRows(RecordRowsF::TailVar( - reg.gen_var_name(id, VarKindDiscriminant::RecordRows).into(), - )), - UnifRecordRows::Constant(id) => RecordRows(RecordRowsF::TailVar( - reg.gen_cst_name(id, VarKindDiscriminant::RecordRows).into(), - )), - UnifRecordRows::Concrete { rrows, .. } => { - let mapped = rrows.map_state( - |btyp, reg| Box::new(btyp.to_type(reg, table)), - |rrows, reg| Box::new(rrows.to_type(reg, table)), - reg, - ); - RecordRows(mapped) - } - } - } -} - -impl ToType for UnifEnumRows { - type Target = EnumRows; - - fn to_type(self, reg: &mut NameReg, table: &UnifTable) -> Self::Target { - let erows = self.into_root(table); - - match erows { - UnifEnumRows::UnifVar { id, .. } => EnumRows(EnumRowsF::TailVar( - reg.gen_var_name(id, VarKindDiscriminant::EnumRows).into(), - )), - UnifEnumRows::Constant(id) => EnumRows(EnumRowsF::TailVar( - reg.gen_cst_name(id, VarKindDiscriminant::EnumRows).into(), - )), - UnifEnumRows::Concrete { erows, .. } => { - let mapped = erows.map_state( - |btyp, reg| Box::new(btyp.to_type(reg, table)), - |erows, reg| Box::new(erows.to_type(reg, table)), - reg, - ); - EnumRows(mapped) - } - } - } -} - -impl ToType for UnifEnumRow { - type Target = EnumRow; - - fn to_type(self, reg: &mut NameReg, table: &UnifTable) -> Self::Target { - EnumRow { - id: self.id, - typ: self.typ.map(|typ| Box::new(typ.to_type(reg, table))), - } - } -} - -impl ToType for UnifRecordRow { - type Target = RecordRow; - - fn to_type(self, reg: &mut NameReg, table: &UnifTable) -> Self::Target { - RecordRow { - id: self.id, - typ: Box::new(self.typ.to_type(reg, table)), - } - } -} diff --git a/core/src/typecheck/subtyping.rs b/core/src/typecheck/subtyping.rs deleted file mode 100644 index bbcaa86669..0000000000 --- a/core/src/typecheck/subtyping.rs +++ /dev/null @@ -1,254 +0,0 @@ -//! Type subsumption (subtyping) -//! -//! Subtyping is a relation between types that allows a value of one type to be used at a place -//! where another type is expected, because the value's actual type is subsumed by the expected -//! type. -//! -//! The subsumption rule is applied when from inference mode to checking mode, as customary in -//! bidirectional type checking. -//! -//! Currently, there is one core subtyping axiom: -//! -//! - Record / Dictionary : `{a1 : T1,...,an : Tn} <: {_ : U}` if for every n `Tn <: U` -//! -//! The subtyping relation is extended to a congruence on other type constructors in the obvious -//! way: -//! -//! - `Array T <: Array U` if `T <: U` -//! - `{_ : T} <: {_ : U}` if `T <: U` -//! - `{a1 : T1,...,an : Tn} <: {b1 : U1,...,bn : Un}` if for every n `Tn <: Un` -//! -//! In all other cases, we fallback to unification (although we instantiate polymorphic types as -//! needed before). That is, we try to apply reflexivity: `T <: U` if `T = U`. -//! -//! The type instantiation corresponds to the zero-ary case of application in the current -//! specification (which is based on [A Quick Look at Impredicativity][quick-look], although we -//! currently don't support impredicative polymorphism). -//! -//! [quick-look]: https://www.microsoft.com/en-us/research/uploads/prod/2020/01/quick-look-icfp20-fixed.pdf -use super::*; - -pub(super) trait SubsumedBy { - type Error; - - /// Checks if `self` is subsumed by `t2`, that is if `self <: t2`. Returns an error otherwise. - fn subsumed_by(self, t2: Self, state: &mut State, ctxt: Context) -> Result<(), Self::Error>; -} - -impl SubsumedBy for UnifType { - type Error = UnifError; - - fn subsumed_by( - self, - t2: Self, - state: &mut State, - mut ctxt: Context, - ) -> Result<(), Self::Error> { - let inferred = instantiate_foralls(state, &mut ctxt, self, ForallInst::UnifVar); - let checked = t2.into_root(state.table); - - match (inferred, checked) { - // {a1 : T1,...,an : Tn} <: {_ : U} if for every n `Tn <: U` - ( - UnifType::Concrete { - typ: TypeF::Record(rrows), - .. - }, - UnifType::Concrete { - typ: - TypeF::Dict { - type_fields, - flavour, - }, - var_levels_data, - }, - ) => { - for row in rrows.iter() { - match row { - GenericUnifRecordRowsIteratorItem::Row(a) => { - a.typ - .clone() - .subsumed_by(*type_fields.clone(), state, ctxt.clone())? - } - GenericUnifRecordRowsIteratorItem::TailUnifVar { id, .. } => - // We don't need to perform any variable level checks when unifying a free - // unification variable with a ground type - // We close the tail because there is no guarantee that - // { a : Number, b : Number, _ : a?} <= { _ : Number} - { - state - .table - .assign_rrows(id, UnifRecordRows::concrete(RecordRowsF::Empty)) - } - GenericUnifRecordRowsIteratorItem::TailConstant(id) => { - let checked = UnifType::Concrete { - typ: TypeF::Dict { - type_fields: type_fields.clone(), - flavour, - }, - var_levels_data, - }; - Err(UnifError::WithConst { - var_kind: VarKindDiscriminant::RecordRows, - expected_const_id: id, - inferred: checked, - })? - } - _ => (), - } - } - Ok(()) - } - // Array T <: Array U if T <: U - ( - UnifType::Concrete { - typ: TypeF::Array(a), - .. - }, - UnifType::Concrete { - typ: TypeF::Array(b), - .. - }, - ) - // Dict T <: Dict U if T <: U - | ( - UnifType::Concrete { - typ: TypeF::Dict { type_fields: a, .. }, - .. - }, - UnifType::Concrete { - typ: TypeF::Dict { type_fields: b, .. }, - .. - }, - ) => a.subsumed_by(*b, state, ctxt), - // {a1 : T1,...,an : Tn} <: {b1 : U1,...,bn : Un} if for every n `Tn <: Un` - ( - UnifType::Concrete { - typ: TypeF::Record(rrows1), - .. - }, - UnifType::Concrete { - typ: TypeF::Record(rrows2), - .. - }, - ) => rrows1 - .clone() - .subsumed_by(rrows2.clone(), state, ctxt) - .map_err(|err| err.into_unif_err(mk_uty_record!(;rrows2), mk_uty_record!(;rrows1))), - // T <: U if T = U - (inferred, checked) => checked.unify(inferred, state, &ctxt), - } - } -} - -impl SubsumedBy for UnifRecordRows { - type Error = RowUnifError; - - fn subsumed_by(self, t2: Self, state: &mut State, ctxt: Context) -> Result<(), Self::Error> { - // This code is almost taken verbatim fro `unify`, but where some recursive calls are - // changed to be `subsumed_by` instead of `unify`. We can surely factorize both into a - // generic function, but this is left for future work. - let inferred = self.into_root(state.table); - let checked = t2.into_root(state.table); - - match (inferred, checked) { - ( - UnifRecordRows::Concrete { rrows: rrows1, .. }, - UnifRecordRows::Concrete { - rrows: rrows2, - var_levels_data: levels2, - }, - ) => match (rrows1, rrows2) { - (RecordRowsF::Extend { row, tail }, rrows2 @ RecordRowsF::Extend { .. }) => { - let urrows2 = UnifRecordRows::Concrete { - rrows: rrows2, - var_levels_data: levels2, - }; - let (ty_res, urrows_without_ty_res) = urrows2 - .remove_row(&row.id, &row.typ, state, ctxt.var_level) - .map_err(|err| match err { - RemoveRowError::Missing => RowUnifError::MissingRow(row.id), - RemoveRowError::Conflict => { - RowUnifError::RecordRowConflict(row.clone()) - } - })?; - if let RemoveRowResult::Extracted(ty) = ty_res { - row.typ - .subsumed_by(ty, state, ctxt.clone()) - .map_err(|err| RowUnifError::RecordRowMismatch { - id: row.id, - cause: Box::new(err), - })?; - } - tail.subsumed_by(urrows_without_ty_res, state, ctxt) - } - (RecordRowsF::TailVar(id), _) | (_, RecordRowsF::TailVar(id)) => { - Err(RowUnifError::UnboundTypeVariable(id)) - } - (RecordRowsF::Empty, RecordRowsF::Empty) - | (RecordRowsF::TailDyn, RecordRowsF::TailDyn) => Ok(()), - (RecordRowsF::Empty, RecordRowsF::TailDyn) - | (RecordRowsF::TailDyn, RecordRowsF::Empty) => Err(RowUnifError::ExtraDynTail), - ( - RecordRowsF::Empty, - RecordRowsF::Extend { - row: UnifRecordRow { id, .. }, - .. - }, - ) - | ( - RecordRowsF::TailDyn, - RecordRowsF::Extend { - row: UnifRecordRow { id, .. }, - .. - }, - ) => Err(RowUnifError::MissingRow(id)), - ( - RecordRowsF::Extend { - row: UnifRecordRow { id, .. }, - .. - }, - RecordRowsF::TailDyn, - ) - | ( - RecordRowsF::Extend { - row: UnifRecordRow { id, .. }, - .. - }, - RecordRowsF::Empty, - ) => Err(RowUnifError::ExtraRow(id)), - }, - (UnifRecordRows::UnifVar { id, .. }, urrows) - | (urrows, UnifRecordRows::UnifVar { id, .. }) => { - if let UnifRecordRows::Constant(cst_id) = urrows { - let constant_level = state.table.get_rrows_level(cst_id); - state.table.force_rrows_updates(constant_level); - if state.table.get_rrows_level(id) < constant_level { - return Err(RowUnifError::VarLevelMismatch { - constant_id: cst_id, - var_kind: VarKindDiscriminant::RecordRows, - }); - } - } - urrows.propagate_constrs(state.constr, id)?; - state.table.assign_rrows(id, urrows); - Ok(()) - } - (UnifRecordRows::Constant(i1), UnifRecordRows::Constant(i2)) if i1 == i2 => Ok(()), - (UnifRecordRows::Constant(i1), UnifRecordRows::Constant(i2)) => { - Err(RowUnifError::ConstMismatch { - var_kind: VarKindDiscriminant::RecordRows, - expected_const_id: i2, - inferred_const_id: i1, - }) - } - (urrows, UnifRecordRows::Constant(i)) | (UnifRecordRows::Constant(i), urrows) => { - Err(RowUnifError::WithConst { - var_kind: VarKindDiscriminant::RecordRows, - expected_const_id: i, - inferred: UnifType::concrete(TypeF::Record(urrows)), - }) - } - } - } -} diff --git a/core/src/typecheck/unif.rs b/core/src/typecheck/unif.rs deleted file mode 100644 index 2f2b2ad41c..0000000000 --- a/core/src/typecheck/unif.rs +++ /dev/null @@ -1,1814 +0,0 @@ -//! Types unification. - -use super::*; - -/// Unification variable or type constants unique identifier. -pub type VarId = usize; - -/// Variable levels. Levels are used in order to implement polymorphism in a sound way: we need to -/// associate to each unification variable and rigid type variable a level, which depends on when -/// those variables were introduced, and to forbid some unifications if a condition on levels is -/// not met. -#[derive(Clone, Copy, Ord, Eq, PartialEq, PartialOrd, Debug)] -pub struct VarLevel(NonZeroU16); - -impl VarLevel { - /// Special constant used for level upper bound to indicate that a type doesn't contain any - /// unification variable. It's equal to `1` and strictly smaller than [VarLevel::MIN_LEVEL], so - /// it's strictly smaller than any concrete variable level. - pub const NO_VAR: Self = VarLevel(NonZeroU16::MIN); - /// The first available variable level, `2`. - // unsafe is required because `unwrap()` is not usable in `const fn` code as of today in stable - // Rust. - // unsafe(): we must enforce the invariant that the argument `n` of `new_unchecked(n)` verifies - // `0 < n`. Indeed `0 < 2`. - pub const MIN_LEVEL: Self = unsafe { VarLevel(NonZeroU16::new_unchecked(2)) }; - /// The maximum level. Used as an upper bound to indicate that nothing can be said about the - /// levels of the unification variables contained in a type. - pub const MAX_LEVEL: Self = VarLevel(NonZeroU16::MAX); - - /// Increment the variable level by one. Panic if the maximum capacity of the underlying - /// numeric type is reached (currently, `u16::MAX`). - pub fn incr(&mut self) { - let new_value = self - .0 - .checked_add(1) - .expect("reached the maxium unification variable level"); - self.0 = new_value; - } -} - -/// An element of the unification table. Contains the potential type this variable points to (or -/// `None` if the variable hasn't been unified with something yet), and the variable's level. -pub struct UnifSlot { - value: Option, - level: VarLevel, -} - -impl UnifSlot { - pub fn new(level: VarLevel) -> Self { - UnifSlot { value: None, level } - } -} - -/// The unification table. -/// -/// Map each unification variable to either another type variable or a concrete type it has been -/// unified with. Each binding `(ty, var)` in this map should be thought of an edge in a -/// unification graph. -/// -/// The unification table is really three separate tables, corresponding to the different kinds of -/// types: standard types, record rows, and enum rows. -/// -/// The unification table is a relatively low-level data structure, whose consumer has to ensure -/// specific invariants. It is used by the `unify` function and its variants, but you should avoid -/// using it directly, unless you know what you're doing. -#[derive(Default)] -pub struct UnifTable { - types: Vec>, - rrows: Vec>, - erows: Vec>, - pending_type_updates: Vec, - pending_rrows_updates: Vec, - pending_erows_updates: Vec, -} - -impl UnifTable { - pub fn new() -> Self { - UnifTable::default() - } - - /// Assign a type to a type unification variable. - /// - /// This method updates variables level, at least lazily, by pushing them to a stack of pending - /// traversals. - /// - /// # Preconditions - /// - /// - This method doesn't check for the variable level conditions. This is the responsibility - /// of the caller. - /// - If the target type is a unification variable as well, it must not be assigned to another - /// unification type. That is, `assign` should always be passed a root type. Otherwise, the - /// handling of variable levels will be messed up. - /// - This method doesn't force pending level updates when needed (calling to - /// `force_type_updates`), i.e. when `uty` is a rigid type variable. Having pending variable - /// level updates and using `assign_type` might make typechecking incorrect in some situation - /// by unduely allowing unsound generalization. This is the responsibility of the caller. - pub fn assign_type(&mut self, var: VarId, uty: UnifType) { - // Unifying a free variable with itself is a no-op. - if matches!(uty, UnifType::UnifVar { id, ..} if id == var) { - return; - } - - debug_assert!({ - if let UnifType::UnifVar { id, init_level: _ } = &uty { - self.types[*id].value.is_none() - } else { - true - } - }); - debug_assert!(self.types[var].value.is_none()); - - let uty_lvl_updated = self.update_type_level(var, uty, self.types[var].level); - self.types[var].value = Some(uty_lvl_updated); - } - - // Lazily propagate a variable level to the unification variables contained in `uty`. Either do - // a direct update in constant time when possible, or push a stack of delayed updates for - // composite types. - fn update_type_level(&mut self, var: VarId, uty: UnifType, new_level: VarLevel) -> UnifType { - match uty { - // We can do the update right away - UnifType::UnifVar { id, init_level } => { - if new_level < self.types[id].level { - self.types[id].level = new_level; - } - - UnifType::UnifVar { id, init_level } - } - // If a concrete type is a candidate for update, we push the pending update on the - // stack - UnifType::Concrete { - typ, - var_levels_data, - } if var_levels_data.upper_bound >= new_level => { - self.pending_type_updates.push(var); - - UnifType::Concrete { - typ, - var_levels_data: VarLevelsData { - pending: Some(new_level), - ..var_levels_data - }, - } - } - // The remaining types either don't contain unification variables or have all their - // level greater than the updated level - _ => uty, - } - } - - /// Assign record rows to a record rows unification variable. - /// - /// This method updates variables level, at least lazily, by pushing them to a stack of pending - /// traversals. - /// - /// # Preconditions - /// - /// - This method doesn't check for the variable level conditions. This is the responsibility - /// of the caller. - /// - If the target type is a unification variable as well, it must not be assigned to another - /// unification type. That is, `assign` should always be passed a root type. Otherwise, the - /// handling of variable levels will be messed up. - /// - This method doesn't force pending level updates when needed (calling to - /// `force_rrows_updates`), i.e. when `uty` is a rigid type variable. Having pending variable - /// level updates and using `assign_type` might make typechecking incorrect in some situation - /// by unduly allowing unsound generalization. This is the responsibility of the caller. - pub fn assign_rrows(&mut self, var: VarId, rrows: UnifRecordRows) { - // Unifying a free variable with itself is a no-op. - if matches!(rrows, UnifRecordRows::UnifVar { id, ..} if id == var) { - return; - } - - self.update_rrows_level(var, &rrows, self.rrows[var].level); - debug_assert!(self.rrows[var].value.is_none()); - self.rrows[var].value = Some(rrows); - } - - // cf `update_type_level()` - fn update_rrows_level(&mut self, var: VarId, uty: &UnifRecordRows, new_level: VarLevel) { - match uty { - // We can do the update right away - UnifRecordRows::UnifVar { - id: var_id, - init_level: _, - } => { - if new_level < self.rrows[*var_id].level { - self.rrows[*var_id].level = new_level; - } - } - // If concrete rows are a candidate for update, we push the pending update on the stack - UnifRecordRows::Concrete { - var_levels_data, .. - } if var_levels_data.upper_bound >= new_level => self.pending_rrows_updates.push(var), - // The remaining rows either don't contain unification variables or have all their - // level greater than the updated level - _ => (), - } - } - - /// Assign enum rows to an enum rows unification variable. - /// - /// This method updates variables level, at least lazily, by pushing them to a stack of pending - /// traversals. - /// - /// # Preconditions - /// - /// - This method doesn't check for the variable level conditions. This is the responsibility - /// of the caller. - /// - If the target type is a unification variable as well, it must not be assigned to another - /// unification type. That is, `assign` should always be passed a root type. Otherwise, the - /// handling of variable levels will be messed up. - /// - This method doesn't force pending level updates when needed (calling to - /// `force_erows_updates`), i.e. when `uty` is a rigid type variable. Having pending variable - /// level updates and using `assign_type` might make typechecking incorrect in some situation - /// by unduly allowing unsound generalization. This is the responsibility of the caller. - pub fn assign_erows(&mut self, var: VarId, erows: UnifEnumRows) { - // Unifying a free variable with itself is a no-op. - if matches!(erows, UnifEnumRows::UnifVar { id, .. } if id == var) { - return; - } - - self.update_erows_level(var, &erows, self.erows[var].level); - debug_assert!(self.erows[var].value.is_none()); - self.erows[var].value = Some(erows); - } - - // cf `update_type_level()` - fn update_erows_level(&mut self, var: VarId, uty: &UnifEnumRows, new_level: VarLevel) { - match uty { - // We can do the update right away - UnifEnumRows::UnifVar { - id: var_id, - init_level: _, - } => { - if new_level < self.erows[*var_id].level { - self.erows[*var_id].level = new_level; - } - } - // If concrete rows are a candidate for update, we push the pending update on the stack - UnifEnumRows::Concrete { - var_levels_data, .. - } if var_levels_data.upper_bound >= new_level => self.pending_erows_updates.push(var), - // The remaining rows either don't contain unification variables or have all their - // level greater than the updated level - _ => (), - } - } - - /// Retrieve the current assignment of a type unification variable. - pub fn get_type(&self, var: VarId) -> Option<&UnifType> { - self.types[var].value.as_ref() - } - - /// Retrieve the current level of a unification variable or a rigid type variable. - pub fn get_level(&self, var: VarId) -> VarLevel { - self.types[var].level - } - - /// Retrieve the current assignment of a record rows unification variable. - pub fn get_rrows(&self, var: VarId) -> Option<&UnifRecordRows> { - self.rrows[var].value.as_ref() - } - - /// Retrieve the current level of a record rows unification variable or a record rows rigid - /// type variable. - pub fn get_rrows_level(&self, var: VarId) -> VarLevel { - self.rrows[var].level - } - - /// Retrieve the current assignment of an enum rows unification variable. - pub fn get_erows(&self, var: VarId) -> Option<&UnifEnumRows> { - self.erows[var].value.as_ref() - } - - /// Retrieve the current level of an enu rows unification variable or a record rows rigid type - /// variable. - pub fn get_erows_level(&self, var: VarId) -> VarLevel { - self.erows[var].level - } - - /// Create a fresh type unification variable (or constant) identifier and allocate a - /// corresponding slot in the table. - pub fn fresh_type_var_id(&mut self, current_level: VarLevel) -> VarId { - let next = self.types.len(); - self.types.push(UnifSlot::new(current_level)); - next - } - - /// Create a fresh record rows variable (or constant) identifier and allocate a corresponding - /// slot in the table. - pub fn fresh_rrows_var_id(&mut self, current_level: VarLevel) -> VarId { - let next = self.rrows.len(); - self.rrows.push(UnifSlot::new(current_level)); - next - } - - /// Create a fresh enum rows variable (or constant) identifier and allocate a corresponding - /// slot in the table. - pub fn fresh_erows_var_id(&mut self, current_level: VarLevel) -> VarId { - let next = self.erows.len(); - self.erows.push(UnifSlot::new(current_level)); - next - } - - /// Create a fresh type unification variable and allocate a corresponding slot in the table. - pub fn fresh_type_uvar(&mut self, current_level: VarLevel) -> UnifType { - UnifType::UnifVar { - id: self.fresh_type_var_id(current_level), - init_level: current_level, - } - } - - /// Create a fresh record rows unification variable and allocate a corresponding slot in the - /// table. - pub fn fresh_rrows_uvar(&mut self, current_level: VarLevel) -> UnifRecordRows { - UnifRecordRows::UnifVar { - id: self.fresh_rrows_var_id(current_level), - init_level: current_level, - } - } - - /// Create a fresh enum rows unification variable and allocate a corresponding slot in the - /// table. - pub fn fresh_erows_uvar(&mut self, current_level: VarLevel) -> UnifEnumRows { - UnifEnumRows::UnifVar { - id: self.fresh_erows_var_id(current_level), - init_level: current_level, - } - } - - /// Create a fresh type constant and allocate a corresponding slot in the table. - pub fn fresh_type_const(&mut self, current_level: VarLevel) -> UnifType { - UnifType::Constant(self.fresh_type_var_id(current_level)) - } - - /// Create a fresh record rows constant and allocate a corresponding slot in the table. - pub fn fresh_rrows_const(&mut self, current_level: VarLevel) -> UnifRecordRows { - UnifRecordRows::Constant(self.fresh_rrows_var_id(current_level)) - } - - /// Create a fresh enum rows constant and allocate a corresponding slot in the table. - pub fn fresh_erows_const(&mut self, current_level: VarLevel) -> UnifEnumRows { - UnifEnumRows::Constant(self.fresh_erows_var_id(current_level)) - } - - /// Follow the links in the unification table to find the representative of the equivalence - /// class of the type unification variable `x`. - /// - /// This corresponds to the find in union-find. - // TODO This should be a union find like algorithm - pub fn root_type(&self, var_id: VarId, init_level: VarLevel) -> UnifType { - // All queried variable must have been introduced by `new_var` and thus a corresponding - // entry must always exist in `state`. If not, the typechecking algorithm is not correct, - // and we panic. - match self.types[var_id].value.as_ref() { - None => UnifType::UnifVar { - id: var_id, - init_level, - }, - Some(UnifType::UnifVar { id, init_level }) => self.root_type(*id, *init_level), - Some(ty) => ty.clone(), - } - } - - /// Follow the links in the unification table to find the representative of the equivalence - /// class of the record rows unification variable `x`. - /// - /// This corresponds to the find in union-find. - // TODO This should be a union find like algorithm - pub fn root_rrows(&self, var_id: VarId, init_level: VarLevel) -> UnifRecordRows { - // All queried variable must have been introduced by `new_var` and thus a corresponding - // entry must always exist in `state`. If not, the typechecking algorithm is not correct, - // and we panic. - match self.rrows[var_id].value.as_ref() { - None => UnifRecordRows::UnifVar { - id: var_id, - init_level, - }, - Some(UnifRecordRows::UnifVar { id, init_level }) => self.root_rrows(*id, *init_level), - Some(ty) => ty.clone(), - } - } - - /// Follow the links in the unification table to find the representative of the equivalence - /// class of the enum rows unification variable `x`. - /// - /// This corresponds to the find in union-find. - // TODO This should be a union find like algorithm - pub fn root_erows(&self, var_id: VarId, init_level: VarLevel) -> UnifEnumRows { - // All queried variable must have been introduced by `new_var` and thus a corresponding - // entry must always exist in `state`. If not, the typechecking algorithm is not correct, - // and we panic. - match self.erows[var_id].value.as_ref() { - None => UnifEnumRows::UnifVar { - id: var_id, - init_level, - }, - Some(UnifEnumRows::UnifVar { id, init_level }) => self.root_erows(*id, *init_level), - Some(ty) => ty.clone(), - } - } - - /// Return a `VarId` greater than all of the variables currently allocated (unification and - /// rigid type variables, of all kinds, rows or types). The returned UID is guaranteed to be - /// different from all the currently live variables. This is currently simply the max of the - /// length of the various unification tables. - /// - /// Used inside [self::eq] to generate temporary rigid type variables that are guaranteed to - /// not conflict with existing variables. - pub fn max_uvars_count(&self) -> VarId { - max(self.types.len(), max(self.rrows.len(), self.erows.len())) - } - - /// This function forces pending type updates prior to unifying a variable with a rigid type - /// variable of level `constant_level`. Updates that wouldn't change the outcome of such a - /// unification are delayed further. - /// - /// The whole point of variable levels is to forbid some unsound unifications of a unification - /// variable with a rigid type variable. For performance reasons, those levels aren't - /// propagated immediatly when unifying a variable with a concrete type, but lazily stored at - /// the level of types (see [VarLevel]). - /// - /// However, unifying with a rigid type variable is an instance that requires levels to be up - /// to date. In this case, this function must be called before checking variable levels. - /// - /// # Parameters - /// - /// - `constant_level`: the level of the rigid type variable we're unifying with. While not - /// strictly required to propagate levels, it is used to eschew variable level updates that - /// wouldn't change the outcome of the unfication, which we can keep for later forced - /// updates. - fn force_type_updates(&mut self, constant_level: VarLevel) { - fn update_unr_with_lvl( - table: &mut UnifTable, - uty: UnifTypeUnrolling, - level: VarLevel, - ) -> UnifTypeUnrolling { - uty.map_state( - |uty, table| Box::new(update_utype_with_lvl(table, *uty, level)), - |rrows, table| update_rrows_with_lvl(table, rrows, level), - |erows, table| update_erows_with_lvl(table, erows, level), - |ctr, _| ctr, - table, - ) - } - - fn update_rrows_with_lvl( - table: &mut UnifTable, - rrows: UnifRecordRows, - level: VarLevel, - ) -> UnifRecordRows { - let rrows = rrows.into_root(table); - - match rrows { - UnifRecordRows::Concrete { - rrows, - var_levels_data, - } => { - let rrows = rrows.map_state( - |uty, table| Box::new(update_utype_with_lvl(table, *uty, level)), - |rrows, table| Box::new(update_rrows_with_lvl(table, *rrows, level)), - table, - ); - - // [^var-level-kinds]: Note that for `UnifRecordRows` (and for enum rows as - // well), the variable levels data are concerned with record rows unification - // variables, not type unification variable. We thus let them untouched, as - // updating record rows variable levels is an orthogonal concern. - UnifRecordRows::Concrete { - rrows, - var_levels_data, - } - } - UnifRecordRows::UnifVar { .. } | UnifRecordRows::Constant(_) => rrows, - } - } - - fn update_erows_with_lvl( - table: &mut UnifTable, - erows: UnifEnumRows, - level: VarLevel, - ) -> UnifEnumRows { - let erows = erows.into_root(table); - - match erows { - UnifEnumRows::Concrete { - erows, - var_levels_data, - } => { - let erows = erows.map_state( - |uty, table| Box::new(update_utype_with_lvl(table, *uty, level)), - |erows, table| Box::new(update_erows_with_lvl(table, *erows, level)), - table, - ); - - // see [^var-level-kinds] - UnifEnumRows::Concrete { - erows, - var_levels_data, - } - } - UnifEnumRows::UnifVar { .. } | UnifEnumRows::Constant(_) => erows, - } - } - - fn update_utype_with_lvl( - table: &mut UnifTable, - uty: UnifType, - level: VarLevel, - ) -> UnifType { - let uty = uty.into_root(table); - - match uty { - UnifType::UnifVar { id, init_level } => { - if table.types[id].level > level { - table.types[id].level = level; - } - - UnifType::UnifVar { id, init_level } - } - UnifType::Concrete { - typ, - var_levels_data, - } if var_levels_data.upper_bound > level => { - let level = var_levels_data - .pending - .map(|pending_level| max(pending_level, level)) - .unwrap_or(level); - let typ = update_unr_with_lvl(table, typ, level); - - UnifType::Concrete { - typ, - var_levels_data: VarLevelsData { - upper_bound: level, - pending: None, - }, - } - } - UnifType::Constant(_) | UnifType::Concrete { .. } => uty, - } - } - - fn update_utype( - table: &mut UnifTable, - uty: UnifType, - constant_level: VarLevel, - ) -> (UnifType, bool) { - match uty { - UnifType::UnifVar { .. } => { - // We should never end up updating the level of a type variable, as this update - // is done on the spot. - debug_assert!(false); - - (uty, false) - } - UnifType::Concrete { - typ, - var_levels_data: - VarLevelsData { - pending: Some(pending_level), - upper_bound, - }, - } => { - // [^irrelevant-level-update]: A level update where the if-condition below is - // true wouldn't change the outcome of unifying a variable with a constant of - // level `constant_level`. - // - // Impactful updates are updates that might change the level of a variable from - // a value greater than or equals to `constant_level` to a new level strictly - // smaller, but: - // - // 1. If `upper_bound` < `constant_level`, then all unification variable levels - // are already strictly smaller than `constant_level`. An update won't change - // this inequality (level update can only decrease levels) - // 2. If `pending_level` >= `constant_level`, then the update might only - // decrease a level that was greater than `constant_level` to a - // `pending_level` which is still greater than `constant_level`. Once again, - // the update doesn't change the inequality with respect to constant_level. - // - // Thus, such updates might be delayed even more. - if upper_bound < constant_level || pending_level >= constant_level { - return ( - UnifType::Concrete { - typ, - var_levels_data: VarLevelsData { - upper_bound: pending_level, - pending: Some(pending_level), - }, - }, - true, - ); - } - - let typ = if upper_bound > pending_level { - update_unr_with_lvl(table, typ, pending_level) - } else { - typ - }; - - ( - UnifType::Concrete { - typ, - var_levels_data: VarLevelsData { - upper_bound: pending_level, - pending: None, - }, - }, - false, - ) - } - // [^ignore-no-pending-level] If there is no pending level, then this update has - // already been handled (possibly by a forced update on an enclosing type), and - // there's nothing to do. - // - // Note that this type might still contain other pending updates deeper inside, but - // those are registered as pending updates and will be processed in any case. - UnifType::Constant(_) | UnifType::Concrete { .. } => (uty, false), - } - } - - let rest = std::mem::take(&mut self.pending_type_updates) - .into_iter() - .filter(|id| { - // unwrap(): if a unification variable has been push on the update stack, it - // has been been by `assign_type`, and thus MUST have been assigned to - // something. - let typ = self.types[*id].value.take().unwrap(); - let (new_type, delayed) = update_utype(self, typ, constant_level); - self.types[*id].value = Some(new_type); - - delayed - }) - .collect(); - - self.pending_type_updates = rest; - } - - /// See `force_type_updates`. Same as `force_type_updates`, but when unifying a record row - /// unification variable. - pub fn force_rrows_updates(&mut self, constant_level: VarLevel) { - fn update_rrows_unr_with_lvl( - table: &mut UnifTable, - rrows: UnifRecordRowsUnrolling, - level: VarLevel, - ) -> UnifRecordRowsUnrolling { - rrows.map_state( - |uty, table| Box::new(update_utype_with_lvl(table, *uty, level)), - |rrows, table| Box::new(update_rrows_with_lvl(table, *rrows, level)), - table, - ) - } - - fn update_erows_unr_with_lvl( - table: &mut UnifTable, - erows: UnifEnumRowsUnrolling, - level: VarLevel, - ) -> UnifEnumRowsUnrolling { - erows.map_state( - |uty, table| Box::new(update_utype_with_lvl(table, *uty, level)), - |erows, table| Box::new(update_erows_with_lvl(table, *erows, level)), - table, - ) - } - - fn update_utype_with_lvl( - table: &mut UnifTable, - utype: UnifType, - level: VarLevel, - ) -> UnifType { - let utype = utype.into_root(table); - - match utype { - UnifType::Concrete { - typ, - var_levels_data, - } => { - let typ = typ.map_state( - |uty, table| Box::new(update_utype_with_lvl(table, *uty, level)), - |rrows, table| update_rrows_with_lvl(table, rrows, level), - |erows, table| update_erows_with_lvl(table, erows, level), - |ctr, _| ctr, - table, - ); - - // See [^var-level-kinds] - UnifType::Concrete { - typ, - var_levels_data, - } - } - UnifType::UnifVar { .. } | UnifType::Constant(_) => utype, - } - } - - fn update_rrows_with_lvl( - table: &mut UnifTable, - rrows: UnifRecordRows, - level: VarLevel, - ) -> UnifRecordRows { - let rrows = rrows.into_root(table); - - match rrows { - UnifRecordRows::UnifVar { id, init_level } => { - if table.rrows[id].level > level { - table.rrows[id].level = level; - } - - UnifRecordRows::UnifVar { id, init_level } - } - UnifRecordRows::Concrete { - rrows, - var_levels_data, - } if var_levels_data.upper_bound > level => { - let level = var_levels_data - .pending - .map(|pending_level| max(pending_level, level)) - .unwrap_or(level); - let rrows = update_rrows_unr_with_lvl(table, rrows, level); - - UnifRecordRows::Concrete { - rrows, - var_levels_data: VarLevelsData { - upper_bound: level, - pending: None, - }, - } - } - UnifRecordRows::Constant(_) | UnifRecordRows::Concrete { .. } => rrows, - } - } - - fn update_erows_with_lvl( - table: &mut UnifTable, - erows: UnifEnumRows, - level: VarLevel, - ) -> UnifEnumRows { - let erows = erows.into_root(table); - - match erows { - UnifEnumRows::Concrete { - erows, - var_levels_data, - } => { - let erows = update_erows_unr_with_lvl(table, erows, level); - - // See [^var-level-kinds] - UnifEnumRows::Concrete { - erows, - var_levels_data, - } - } - UnifEnumRows::UnifVar { .. } | UnifEnumRows::Constant(_) => erows, - } - } - - fn update_rrows( - table: &mut UnifTable, - rrows: UnifRecordRows, - constant_level: VarLevel, - ) -> (UnifRecordRows, bool) { - match rrows { - UnifRecordRows::UnifVar { .. } => { - // We should never end up updating the level of a unification variable, as this - // update is done on the spot. - debug_assert!(false); - - (rrows, false) - } - UnifRecordRows::Concrete { - rrows, - var_levels_data: - VarLevelsData { - pending: Some(pending_level), - upper_bound, - }, - } => { - // See [^irrelevant-level-update] - if upper_bound < constant_level || pending_level >= constant_level { - return ( - UnifRecordRows::Concrete { - rrows, - var_levels_data: VarLevelsData { - upper_bound: pending_level, - pending: Some(pending_level), - }, - }, - true, - ); - } - - let rrows = if upper_bound > pending_level { - update_rrows_unr_with_lvl(table, rrows, pending_level) - } else { - rrows - }; - - ( - UnifRecordRows::Concrete { - rrows, - var_levels_data: VarLevelsData { - upper_bound: pending_level, - pending: None, - }, - }, - false, - ) - } - // See [^ignore-no-pending-level] - UnifRecordRows::Constant(_) | UnifRecordRows::Concrete { .. } => (rrows, false), - } - } - - let rest = std::mem::take(&mut self.pending_rrows_updates) - .into_iter() - .filter(|id| { - // unwrap(): if a unification variable has been push on the update stack, it - // has been been by `assign_rrows`, and thus MUST have been assigned to - // something. - let rrows = self.rrows[*id].value.take().unwrap(); - let (new_rrows, delay) = update_rrows(self, rrows, constant_level); - self.rrows[*id].value = Some(new_rrows); - - delay - }) - .collect(); - - self.pending_rrows_updates = rest; - } - - /// See `force_type_updates`. Same as `force_type_updates`, but when unifying an enum row - /// unification variable. - pub fn force_erows_updates(&mut self, constant_level: VarLevel) { - fn update_rrows_unr_with_lvl( - table: &mut UnifTable, - rrows: UnifRecordRowsUnrolling, - level: VarLevel, - ) -> UnifRecordRowsUnrolling { - rrows.map_state( - |uty, table| Box::new(update_utype_with_lvl(table, *uty, level)), - |rrows, table| Box::new(update_rrows_with_lvl(table, *rrows, level)), - table, - ) - } - - fn update_erows_unr_with_lvl( - table: &mut UnifTable, - erows: UnifEnumRowsUnrolling, - level: VarLevel, - ) -> UnifEnumRowsUnrolling { - erows.map_state( - |uty, table| Box::new(update_utype_with_lvl(table, *uty, level)), - |erows, table| Box::new(update_erows_with_lvl(table, *erows, level)), - table, - ) - } - - fn update_utype_with_lvl( - table: &mut UnifTable, - utype: UnifType, - level: VarLevel, - ) -> UnifType { - let utype = utype.into_root(table); - - match utype { - UnifType::Concrete { - typ, - var_levels_data, - } => { - let typ = typ.map_state( - |uty, table| Box::new(update_utype_with_lvl(table, *uty, level)), - |rrows, table| update_rrows_with_lvl(table, rrows, level), - |erows, table| update_erows_with_lvl(table, erows, level), - |ctr, _| ctr, - table, - ); - - // See [^var-level-kinds] - UnifType::Concrete { - typ, - var_levels_data, - } - } - UnifType::UnifVar { .. } | UnifType::Constant(_) => utype, - } - } - - fn update_rrows_with_lvl( - table: &mut UnifTable, - rrows: UnifRecordRows, - level: VarLevel, - ) -> UnifRecordRows { - let rrows = rrows.into_root(table); - - match rrows { - UnifRecordRows::Concrete { - rrows, - var_levels_data, - } => { - let rrows = update_rrows_unr_with_lvl(table, rrows, level); - - // See [^var-level-kinds] - UnifRecordRows::Concrete { - rrows, - var_levels_data, - } - } - UnifRecordRows::UnifVar { .. } | UnifRecordRows::Constant(_) => rrows, - } - } - - fn update_erows_with_lvl( - table: &mut UnifTable, - erows: UnifEnumRows, - level: VarLevel, - ) -> UnifEnumRows { - let erows = erows.into_root(table); - - match erows { - UnifEnumRows::UnifVar { id, init_level } => { - if table.erows[id].level > level { - table.erows[id].level = level; - } - - UnifEnumRows::UnifVar { id, init_level } - } - UnifEnumRows::Concrete { - erows, - var_levels_data, - } if var_levels_data.upper_bound > level => { - let level = var_levels_data - .pending - .map(|pending_level| max(pending_level, level)) - .unwrap_or(level); - let erows = update_erows_unr_with_lvl(table, erows, level); - - UnifEnumRows::Concrete { - erows, - var_levels_data: VarLevelsData { - upper_bound: level, - pending: None, - }, - } - } - UnifEnumRows::Constant(_) | UnifEnumRows::Concrete { .. } => erows, - } - } - - fn update_erows( - table: &mut UnifTable, - erows: UnifEnumRows, - constant_level: VarLevel, - ) -> (UnifEnumRows, bool) { - match erows { - UnifEnumRows::UnifVar { .. } => { - // We should never end up updating the level of a unification variable, as this - // update is done on the spot. - debug_assert!(false); - - (erows, false) - } - UnifEnumRows::Concrete { - erows, - var_levels_data: - VarLevelsData { - pending: Some(pending_level), - upper_bound, - }, - } => { - // See [^irrelevant-level-update] - if upper_bound < constant_level || pending_level >= constant_level { - return ( - UnifEnumRows::Concrete { - erows, - var_levels_data: VarLevelsData { - upper_bound: pending_level, - pending: Some(pending_level), - }, - }, - true, - ); - } - - let erows = if upper_bound > pending_level { - update_erows_unr_with_lvl(table, erows, pending_level) - } else { - erows - }; - - ( - UnifEnumRows::Concrete { - erows, - var_levels_data: VarLevelsData { - upper_bound: pending_level, - pending: None, - }, - }, - false, - ) - } - // See [^ignore-no-pending-level] - UnifEnumRows::Constant(_) | UnifEnumRows::Concrete { .. } => (erows, false), - } - } - - let rest = std::mem::take(&mut self.pending_erows_updates) - .into_iter() - .filter(|id| { - // unwrap(): if a unification variable has been pushed on the update stack, it must - // have been done by `assign_erows`, and thus MUST have been assigned to something. - let erows = self.erows[*id].value.take().unwrap(); - let (new_erows, delay) = update_erows(self, erows, constant_level); - self.erows[*id].value = Some(new_erows); - - delay - }) - .collect(); - - self.pending_erows_updates = rest; - } -} - -/// Row constraints. -/// -/// A row constraint applies to a unification variable appearing inside a row type (such as `r` in -/// `{ someId: SomeType ; r }` or `[| 'Foo Number, 'Baz; r |]`). A row constraint is a set of -/// identifiers that said row must NOT contain, to forbid ill-formed types with multiple -/// declaration of the same id, for example `{ a: Number, a: String}` or `[| 'Foo String, 'Foo -/// Number |]`. -/// -/// Note that because the syntax (and pattern matching likewise) distinguishes between `'Foo` and -/// `'Foo some_arg`, the type `[| 'Foo, 'Foo SomeType |]` is unproblematic for typechecking. In -/// some sense, enum tags and enum variants live in a different dimension. It looks like we should -/// use separate sets of constraints for enum tag constraints and enum variants constraints. But a -/// set just for enum tag constraints is useless, because enum tags can never conflict, as they -/// don't have any argument: `'Foo` always "agrees with" another `'Foo` definition. In consequence, -/// we simply record enum variants constraints and ignore enum tags. -/// -/// Note that a `VarId` always refer to either a type unification variable, a record row -/// unification variable or an enum row unification variable. Thus, we can use a single constraint -/// set per variable id (which isn't used at all for type unification variables). Because we expect -/// the map to be rather sparse, we use a `HashMap` instead of a `Vec`. -pub type RowConstrs = HashMap>; - -pub(super) trait PropagateConstrs { - /// Check that unifying a variable with a type doesn't violate rows constraints, and update the - /// row constraints of the unified type accordingly if needed. - /// - /// When a unification variable `UnifVar(p)` is unified with a type `uty` which is either a row type - /// or another unification variable which could be later unified with a row type itself, the - /// following operations are required: - /// - /// 1. If `uty` is a concrete row, check that it doesn't contain an identifier which is forbidden by - /// a row constraint on `p`. - /// 2. If `uty` is either a unification variable `u` or a row type ending with a unification - /// variable `u`, we must add the constraints of `p` to the constraints of `u`. Indeed, take the - /// following situation: `p` appears in a row type `{a: Number ; p}`, hence has a constraint that - /// it must not contain a field `a`. Then `p` is unified with a fresh type variable `u`. If we - /// don't constrain `u`, `u` could be unified later with a row type `{a : String}` which violates - /// the original constraint on `p`. Thus, when unifying `p` with `u` or a row ending with `u`, - /// `u` must inherit all the constraints of `p`. - fn propagate_constrs(&self, constr: &mut RowConstrs, var_id: VarId) - -> Result<(), RowUnifError>; -} - -impl PropagateConstrs for UnifRecordRows { - fn propagate_constrs( - &self, - constr: &mut RowConstrs, - var_id: VarId, - ) -> Result<(), RowUnifError> { - fn propagate( - constr: &mut RowConstrs, - var_id: VarId, - var_constr: HashSet, - rrows: &UnifRecordRows, - ) -> Result<(), RowUnifError> { - match rrows { - UnifRecordRows::Concrete { - rrows: RecordRowsF::Extend { row, .. }, - .. - } if var_constr.contains(&row.id.ident()) => { - Err(RowUnifError::RecordRowConflict(row.clone())) - } - UnifRecordRows::Concrete { - rrows: RecordRowsF::Extend { tail, .. }, - .. - } => propagate(constr, var_id, var_constr, tail), - UnifRecordRows::UnifVar { id, .. } if *id != var_id => { - if let Some(tail_constr) = constr.get_mut(id) { - tail_constr.extend(var_constr); - } else { - constr.insert(*id, var_constr); - } - - Ok(()) - } - _ => Ok(()), - } - } - - if let Some(var_constr) = constr.remove(&var_id) { - propagate(constr, var_id, var_constr, self) - } else { - Ok(()) - } - } -} - -impl PropagateConstrs for UnifEnumRows { - fn propagate_constrs( - &self, - constr: &mut RowConstrs, - var_id: VarId, - ) -> Result<(), RowUnifError> { - fn propagate( - constr: &mut RowConstrs, - var_id: VarId, - var_constr: HashSet, - erows: &UnifEnumRows, - ) -> Result<(), RowUnifError> { - match erows { - UnifEnumRows::Concrete { - // If the row is an enum tag (ie `typ` is `None`), it can't cause any conflict. - // See [RowConstrs] for more details. - erows: - EnumRowsF::Extend { - row: - row @ UnifEnumRow { - id: _, - typ: Some(_), - }, - .. - }, - .. - } if var_constr.contains(&row.id.ident()) => { - Err(RowUnifError::EnumRowConflict(row.clone())) - } - UnifEnumRows::Concrete { - erows: EnumRowsF::Extend { tail, .. }, - .. - } => propagate(constr, var_id, var_constr, tail), - UnifEnumRows::UnifVar { id, .. } if *id != var_id => { - if let Some(tail_constr) = constr.get_mut(id) { - tail_constr.extend(var_constr); - } else { - constr.insert(*id, var_constr); - } - - Ok(()) - } - _ => Ok(()), - } - } - - if let Some(var_constr) = constr.remove(&var_id) { - propagate(constr, var_id, var_constr, self) - } else { - Ok(()) - } - } -} - -/// Types which can be unified. -pub(super) trait Unify { - type Error; - - /// Try to unify two types. Unification corresponds to imposing an equality constraints on - /// those types. This can fail if the types can't be matched. - fn unify(self, t2: Self, state: &mut State, ctxt: &Context) -> Result<(), Self::Error>; -} - -impl Unify for UnifType { - type Error = UnifError; - - fn unify(self, t2: UnifType, state: &mut State, ctxt: &Context) -> Result<(), UnifError> { - let t1 = self.into_root(state.table); - let t2 = t2.into_root(state.table); - - // t1 and t2 are roots of the type - match (t1, t2) { - // If either type is a wildcard, unify with the associated type var - ( - UnifType::Concrete { - typ: TypeF::Wildcard(id), - .. - }, - ty2, - ) - | ( - ty2, - UnifType::Concrete { - typ: TypeF::Wildcard(id), - .. - }, - ) => { - let ty1 = get_wildcard_var(state.table, ctxt.var_level, state.wildcard_vars, id); - ty1.unify(ty2, state, ctxt) - } - ( - UnifType::Concrete { - typ: s1, - var_levels_data: _, - }, - UnifType::Concrete { - typ: s2, - var_levels_data: _, - }, - ) => match (s1, s2) { - (TypeF::Dyn, TypeF::Dyn) - | (TypeF::Number, TypeF::Number) - | (TypeF::Bool, TypeF::Bool) - | (TypeF::String, TypeF::String) - | (TypeF::Symbol, TypeF::Symbol) => Ok(()), - (TypeF::Array(uty1), TypeF::Array(uty2)) => uty1.unify(*uty2, state, ctxt), - (TypeF::Arrow(s1s, s1t), TypeF::Arrow(s2s, s2t)) => { - s1s.clone() - .unify((*s2s).clone(), state, ctxt) - .map_err(|err| UnifError::DomainMismatch { - expected: UnifType::concrete(TypeF::Arrow(s1s.clone(), s1t.clone())), - inferred: UnifType::concrete(TypeF::Arrow(s2s.clone(), s2t.clone())), - cause: Box::new(err), - })?; - s1t.clone() - .unify((*s2t).clone(), state, ctxt) - .map_err(|err| UnifError::CodomainMismatch { - expected: UnifType::concrete(TypeF::Arrow(s1s, s1t)), - inferred: UnifType::concrete(TypeF::Arrow(s2s, s2t)), - cause: Box::new(err), - }) - } - (TypeF::Contract((t1, env1)), TypeF::Contract((t2, env2))) - if eq::contract_eq(state.table.max_uvars_count(), &t1, &env1, &t2, &env2) => - { - Ok(()) - } - (TypeF::Enum(erows1), TypeF::Enum(erows2)) => erows1 - .clone() - .unify(erows2.clone(), state, ctxt) - .map_err(|err| { - err.into_unif_err(mk_uty_enum!(; erows1), mk_uty_enum!(; erows2)) - }), - (TypeF::Record(rrows1), TypeF::Record(rrows2)) => rrows1 - .clone() - .unify(rrows2.clone(), state, ctxt) - .map_err(|err| { - err.into_unif_err(mk_uty_record!(; rrows1), mk_uty_record!(; rrows2)) - }), - ( - TypeF::Dict { - type_fields: uty1, .. - }, - TypeF::Dict { - type_fields: uty2, .. - }, - ) => uty1.unify(*uty2, state, ctxt), - ( - TypeF::Forall { - var: var1, - var_kind: var_kind1, - body: body1, - }, - TypeF::Forall { - var: var2, - var_kind: var_kind2, - body: body2, - }, - ) if var_kind1 == var_kind2 => { - // Very stupid (slow) implementation - let (substd1, substd2) = match var_kind1 { - VarKind::Type => { - let constant_type = state.table.fresh_type_const(ctxt.var_level); - ( - body1.subst(&var1, &constant_type), - body2.subst(&var2, &constant_type), - ) - } - VarKind::RecordRows { .. } => { - let constant_type = state.table.fresh_rrows_const(ctxt.var_level); - ( - body1.subst(&var1, &constant_type), - body2.subst(&var2, &constant_type), - ) - } - VarKind::EnumRows { .. } => { - let constant_type = state.table.fresh_erows_const(ctxt.var_level); - ( - body1.subst(&var1, &constant_type), - body2.subst(&var2, &constant_type), - ) - } - }; - - substd1.unify(substd2, state, ctxt) - } - (TypeF::Var(ident), _) | (_, TypeF::Var(ident)) => { - Err(UnifError::UnboundTypeVariable(ident.into())) - } - (ty1, ty2) => Err(UnifError::TypeMismatch { - expected: UnifType::concrete(ty1), - inferred: UnifType::concrete(ty2), - }), - }, - (UnifType::UnifVar { id, .. }, uty) | (uty, UnifType::UnifVar { id, .. }) => { - // [^check-unif-var-level]: If we are unifying a variable with a rigid type - // variable, force potential unification variable level updates and check that the - // level of the unification variable is greater or equals to the constant: that is, - // that the variable doesn't "escape its scope". This is required to handle - // polymorphism soundly, and is the whole point of all the machinery around variable - // levels. - if let UnifType::Constant(cst_id) = uty { - let constant_level = state.table.get_level(cst_id); - state.table.force_type_updates(constant_level); - - if state.table.get_level(id) < constant_level { - return Err(UnifError::VarLevelMismatch { - constant_id: cst_id, - var_kind: VarKindDiscriminant::Type, - }); - } - } - - state.table.assign_type(id, uty); - Ok(()) - } - (UnifType::Constant(i1), UnifType::Constant(i2)) if i1 == i2 => Ok(()), - (UnifType::Constant(i1), UnifType::Constant(i2)) => Err(UnifError::ConstMismatch { - var_kind: VarKindDiscriminant::Type, - expected_const_id: i1, - inferred_const_id: i2, - }), - (ty, UnifType::Constant(i)) | (UnifType::Constant(i), ty) => { - Err(UnifError::WithConst { - var_kind: VarKindDiscriminant::Type, - expected_const_id: i, - inferred: ty, - }) - } - } - } -} - -impl Unify for UnifEnumRows { - type Error = RowUnifError; - - fn unify( - self, - uerows2: UnifEnumRows, - state: &mut State, - ctxt: &Context, - ) -> Result<(), RowUnifError> { - let uerows1 = self.into_root(state.table); - let uerows2 = uerows2.into_root(state.table); - - match (uerows1, uerows2) { - ( - UnifEnumRows::Concrete { - erows: erows1, - var_levels_data: _, - }, - UnifEnumRows::Concrete { - erows: erows2, - var_levels_data: var_levels2, - }, - ) => match (erows1, erows2) { - (EnumRowsF::TailVar(id), _) | (_, EnumRowsF::TailVar(id)) => { - Err(RowUnifError::UnboundTypeVariable(id)) - } - (EnumRowsF::Empty, EnumRowsF::Empty) => Ok(()), - ( - EnumRowsF::Empty, - EnumRowsF::Extend { - row: UnifEnumRow { id, .. }, - .. - }, - ) => Err(RowUnifError::ExtraRow(id)), - ( - EnumRowsF::Extend { - row: UnifEnumRow { id, .. }, - .. - }, - EnumRowsF::Empty, - ) => Err(RowUnifError::MissingRow(id)), - (EnumRowsF::Extend { row, tail }, erows2 @ EnumRowsF::Extend { .. }) => { - let uerows2 = UnifEnumRows::Concrete { - erows: erows2, - var_levels_data: var_levels2, - }; - - let (ty2_result, t2_without_row) = - //TODO[adts]: it's ugly to create a temporary Option just to please the - //Box/Nobox types, we should find a better signature for remove_row - uerows2.remove_row(&row.id, &row.typ.clone().map(|typ| *typ), state, ctxt.var_level).map_err(|err| match err { - RemoveRowError::Missing => RowUnifError::MissingRow(row.id), - RemoveRowError::Conflict => RowUnifError::EnumRowConflict(row.clone()), - })?; - - // The alternative to this if-condition is `RemoveRowResult::Extended`, which - // means that `t2` could be successfully extended with the row `id typ`, in - // which case we don't have to perform additional unification for this specific - // row - if let RemoveRowResult::Extracted(ty2) = ty2_result { - match (row.typ, ty2) { - (Some(typ), Some(ty2)) => { - typ.unify(ty2, state, ctxt).map_err(|err| { - RowUnifError::EnumRowMismatch { - id: row.id, - cause: Some(Box::new(err)), - } - })?; - } - (Some(_), None) | (None, Some(_)) => { - return Err(RowUnifError::EnumRowMismatch { - id: row.id, - cause: None, - }); - } - (None, None) => (), - } - } - - tail.unify(t2_without_row, state, ctxt) - } - }, - (UnifEnumRows::UnifVar { id, init_level: _ }, uerows) - | (uerows, UnifEnumRows::UnifVar { id, init_level: _ }) => { - // see [^check-unif-var-level] - if let UnifEnumRows::Constant(cst_id) = uerows { - let constant_level = state.table.get_erows_level(cst_id); - state.table.force_erows_updates(constant_level); - - if state.table.get_erows_level(id) < constant_level { - return Err(RowUnifError::VarLevelMismatch { - constant_id: cst_id, - var_kind: VarKindDiscriminant::EnumRows, - }); - } - } - - uerows.propagate_constrs(state.constr, id)?; - state.table.assign_erows(id, uerows); - Ok(()) - } - (UnifEnumRows::Constant(i1), UnifEnumRows::Constant(i2)) if i1 == i2 => Ok(()), - (UnifEnumRows::Constant(i1), UnifEnumRows::Constant(i2)) => { - Err(RowUnifError::ConstMismatch { - var_kind: VarKindDiscriminant::EnumRows, - expected_const_id: i1, - inferred_const_id: i2, - }) - } - (uerows, UnifEnumRows::Constant(i)) | (UnifEnumRows::Constant(i), uerows) => { - //TODO ROWS: should we refactor RowUnifError as well? - Err(RowUnifError::WithConst { - var_kind: VarKindDiscriminant::EnumRows, - expected_const_id: i, - inferred: UnifType::concrete(TypeF::Enum(uerows)), - }) - } - } - } -} - -impl Unify for UnifRecordRows { - type Error = RowUnifError; - - fn unify( - self, - urrows2: UnifRecordRows, - state: &mut State, - ctxt: &Context, - ) -> Result<(), RowUnifError> { - let urrows1 = self.into_root(state.table); - let urrows2 = urrows2.into_root(state.table); - - match (urrows1, urrows2) { - ( - UnifRecordRows::Concrete { - rrows: rrows1, - var_levels_data: _, - }, - UnifRecordRows::Concrete { - rrows: rrows2, - var_levels_data: var_levels2, - }, - ) => match (rrows1, rrows2) { - (RecordRowsF::TailVar(id), _) | (_, RecordRowsF::TailVar(id)) => { - Err(RowUnifError::UnboundTypeVariable(id)) - } - (RecordRowsF::Empty, RecordRowsF::Empty) - | (RecordRowsF::TailDyn, RecordRowsF::TailDyn) => Ok(()), - (RecordRowsF::Empty, RecordRowsF::TailDyn) => Err(RowUnifError::ExtraDynTail), - (RecordRowsF::TailDyn, RecordRowsF::Empty) => Err(RowUnifError::MissingDynTail), - ( - RecordRowsF::Empty, - RecordRowsF::Extend { - row: UnifRecordRow { id, .. }, - .. - }, - ) - | ( - RecordRowsF::TailDyn, - RecordRowsF::Extend { - row: UnifRecordRow { id, .. }, - .. - }, - ) => Err(RowUnifError::ExtraRow(id)), - ( - RecordRowsF::Extend { - row: UnifRecordRow { id, .. }, - .. - }, - RecordRowsF::TailDyn, - ) - | ( - RecordRowsF::Extend { - row: UnifRecordRow { id, .. }, - .. - }, - RecordRowsF::Empty, - ) => Err(RowUnifError::MissingRow(id)), - (RecordRowsF::Extend { row, tail }, rrows2 @ RecordRowsF::Extend { .. }) => { - let urrows2 = UnifRecordRows::Concrete { - rrows: rrows2, - var_levels_data: var_levels2, - }; - - let (ty2_result, urrows2_without_ty2) = urrows2 - .remove_row(&row.id, &row.typ, state, ctxt.var_level) - .map_err(|err| match err { - RemoveRowError::Missing => RowUnifError::MissingRow(row.id), - RemoveRowError::Conflict => { - RowUnifError::RecordRowConflict(row.clone()) - } - })?; - - // The alternative to this if-condition is `RemoveRowResult::Extended`, which - // means that `t2` could be successfully extended with the row `id typ`, in - // which case we don't have to perform additional unification for this specific - // row - if let RemoveRowResult::Extracted(ty2) = ty2_result { - row.typ.unify(ty2, state, ctxt).map_err(|err| { - RowUnifError::RecordRowMismatch { - id: row.id, - cause: Box::new(err), - } - })?; - } - - tail.unify(urrows2_without_ty2, state, ctxt) - } - }, - (UnifRecordRows::UnifVar { id, init_level: _ }, urrows) - | (urrows, UnifRecordRows::UnifVar { id, init_level: _ }) => { - // see [^check-unif-var-level] - if let UnifRecordRows::Constant(cst_id) = urrows { - let constant_level = state.table.get_rrows_level(cst_id); - state.table.force_rrows_updates(constant_level); - - if state.table.get_rrows_level(id) < constant_level { - return Err(RowUnifError::VarLevelMismatch { - constant_id: cst_id, - var_kind: VarKindDiscriminant::RecordRows, - }); - } - } - - urrows.propagate_constrs(state.constr, id)?; - state.table.assign_rrows(id, urrows); - Ok(()) - } - (UnifRecordRows::Constant(i1), UnifRecordRows::Constant(i2)) if i1 == i2 => Ok(()), - (UnifRecordRows::Constant(i1), UnifRecordRows::Constant(i2)) => { - Err(RowUnifError::ConstMismatch { - var_kind: VarKindDiscriminant::RecordRows, - expected_const_id: i1, - inferred_const_id: i2, - }) - } - (urrows, UnifRecordRows::Constant(i)) | (UnifRecordRows::Constant(i), urrows) => { - Err(RowUnifError::WithConst { - var_kind: VarKindDiscriminant::RecordRows, - expected_const_id: i, - inferred: UnifType::concrete(TypeF::Record(urrows)), - }) - } - } - } -} - -#[derive(Clone, Copy, Debug)] -pub(super) enum RemoveRowError { - // The row to add was missing and the row type was closed (no free unification variable in tail - // position). - Missing, - // The row to add was missing and the row type couldn't be extended because of row constraints. - Conflict, -} - -#[derive(Clone, Debug)] -pub enum RemoveRowResult { - Extracted(RowContent), - Extended, -} - -pub(super) trait RemoveRow: Sized { - /// The row data minus the identifier. - type RowContent: Clone; - - /// Fetch a specific `row_id` from a row type, and return the content of the row together with - /// the original row type without the found row. - /// - /// If the searched row isn't found: - /// - /// - If the row type is extensible, i.e. it ends with a free unification variable in tail - /// position, this function adds the missing row (with `row.types` as a type for record rows, - /// if allowed by row constraints) and then acts as if `remove_row` was called again on - /// this extended row type. That is, `remove_row` returns the new row and the extended type - /// without the added row). - /// - Otherwise, raise a missing row error. - /// - /// # Motivation - /// - /// This method is used as part of row unification: let's say we want to unify `{ r1, ..tail1 - /// }` with `{ ..tail2 }` where `r1` is a row (the head of the left hand side rows), and - /// `tail1` and `tail2` are sequences of rows. - /// - /// For those to unify, we must have either: - /// - /// - `r1` is somewhere in `tail2`, and `tail1` unifies with `{..tail2'}` where `tail2'` is - /// `tail2` without `r1`. - /// - `tail2` is extensible, in which case we can extend `tail2` with `r1`, assuming that - /// `tail1` unifies with `{..tail2'}`, where `tail2'` is `tail2` after extending with `r1` - /// and then removing it. Modulo fresh unification variable shuffling, `tail2'` is in fact - /// isomorphic to `tail2` before it was extended. - /// - /// When we unify two row types, we destructure the left hand side to extract the head `r1` and - /// the tail `tail1`. Then, we try to find and extract `r1` from `tail2`. If `r1` was found, we - /// additionally unify the extracted type found in `tail2` (returned as part of - /// [RemoveRowResult::Extracted]) with `r1.typ` to make sure they agree. In case of extension, - /// we were free to chose the type of the new added row, which we set to be `r1.typ` (the - /// `row_content` parameter of `remove_row`), and there's no additional check to perform (and - /// indeed [RemoveRowResult::Extended] doesn't carry any information). - /// - /// Finally, since `remove_row` returns the initial row type minus the extracted row, we can go - /// on recursively and unify `tail1` with this rest. - /// - /// # Parameters - /// - /// - `row_id`: the identifier of the row to extract - /// - `row_content`: as explained above, `remove_row` is used in the context of unifying two row - /// types. If `self` doesn't contain `row_id` but is extensible, we must add a corresponding - /// new row: we fill it with `row_content`. In the context of unification, the is the content of - /// the row coming from the other row type. - /// - `state`: the unification state - /// - `var_level`: the ambient variable level - fn remove_row( - self, - row_id: &LocIdent, - row_content: &Self::RowContent, - state: &mut State, - var_level: VarLevel, - ) -> Result<(RemoveRowResult, Self), RemoveRowError>; -} - -impl RemoveRow for UnifRecordRows { - type RowContent = UnifType; - - fn remove_row( - self, - target: &LocIdent, - target_content: &Self::RowContent, - state: &mut State, - var_level: VarLevel, - ) -> Result<(RemoveRowResult, UnifRecordRows), RemoveRowError> { - let rrows = self.into_root(state.table); - - match rrows { - UnifRecordRows::Concrete { rrows, .. } => match rrows { - RecordRowsF::Empty | RecordRowsF::TailDyn | RecordRowsF::TailVar(_) => { - Err(RemoveRowError::Missing) - } - RecordRowsF::Extend { - row: next_row, - tail, - } => { - if target.ident() == next_row.id.ident() { - Ok((RemoveRowResult::Extracted(*next_row.typ), *tail)) - } else { - let (extracted_row, rest) = - tail.remove_row(target, target_content, state, var_level)?; - Ok(( - extracted_row, - UnifRecordRows::concrete(RecordRowsF::Extend { - row: next_row, - tail: Box::new(rest), - }), - )) - } - } - }, - UnifRecordRows::UnifVar { id: var_id, .. } => { - let tail_var_id = state.table.fresh_rrows_var_id(var_level); - // We have to manually insert the constraint that `tail_var_id` can't contain a row - // `target`, to avoid producing ill-formed record rows later - state - .constr - .insert(tail_var_id, HashSet::from([target.ident()])); - - let row_to_insert = UnifRecordRow { - id: *target, - typ: Box::new(target_content.clone()), - }; - - let tail_var = UnifRecordRows::UnifVar { - id: tail_var_id, - init_level: var_level, - }; - - let tail_extended = UnifRecordRows::concrete(RecordRowsF::Extend { - row: row_to_insert, - tail: Box::new(tail_var.clone()), - }); - - tail_extended - .propagate_constrs(state.constr, var_id) - .map_err(|_| RemoveRowError::Conflict)?; - state.table.assign_rrows(var_id, tail_extended); - - Ok((RemoveRowResult::Extended, tail_var)) - } - UnifRecordRows::Constant(_) => Err(RemoveRowError::Missing), - } - } -} - -impl RemoveRow for UnifEnumRows { - type RowContent = Option; - - fn remove_row( - self, - target: &LocIdent, - target_content: &Self::RowContent, - state: &mut State, - var_level: VarLevel, - ) -> Result<(RemoveRowResult, UnifEnumRows), RemoveRowError> { - let uerows = self.into_root(state.table); - - match uerows { - UnifEnumRows::Concrete { erows, .. } => match erows { - EnumRowsF::Empty | EnumRowsF::TailVar(_) => Err(RemoveRowError::Missing), - EnumRowsF::Extend { - row: next_row, - tail, - } => { - // Enum variants and enum tags don't conflict, and can thus coexist in the same - // row type (for example, [| 'Foo Number, 'Foo |]). In some sense, they live - // inside different dimensions. Thus, when matching rows, we don't only compare - // the tag but also the nature of the enum row (tag vs variant) - if target.ident() == next_row.id.ident() - && target_content.is_some() == next_row.typ.is_some() - { - Ok(( - RemoveRowResult::Extracted(next_row.typ.map(|typ| *typ)), - *tail, - )) - } else { - let (extracted_row, rest) = - tail.remove_row(target, target_content, state, var_level)?; - Ok(( - extracted_row, - UnifEnumRows::concrete(EnumRowsF::Extend { - row: next_row, - tail: Box::new(rest), - }), - )) - } - } - }, - UnifEnumRows::UnifVar { id: var_id, .. } => { - let tail_var_id = state.table.fresh_erows_var_id(var_level); - - // Enum tag are ignored for row conflict. See [RowConstrs] - if target_content.is_some() { - state - .constr - .insert(tail_var_id, HashSet::from([target.ident()])); - } - - let row_to_insert = UnifEnumRow { - id: *target, - typ: target_content.clone().map(Box::new), - }; - - let tail_var = UnifEnumRows::UnifVar { - id: tail_var_id, - init_level: var_level, - }; - - let tail_extended = UnifEnumRows::concrete(EnumRowsF::Extend { - row: row_to_insert, - tail: Box::new(tail_var.clone()), - }); - - tail_extended - .propagate_constrs(state.constr, var_id) - .map_err(|_| RemoveRowError::Conflict)?; - state.table.assign_erows(var_id, tail_extended); - - Ok((RemoveRowResult::Extended, tail_var)) - } - UnifEnumRows::Constant(_) => Err(RemoveRowError::Missing), - } - } -} From a7f5174844ef69813037b1da5efd7b2bf74893c1 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Mon, 23 Dec 2024 16:34:03 +0100 Subject: [PATCH 2/6] Promote bytecode::typecheck to typecheck --- core/src/bytecode/mod.rs | 1 - core/src/{bytecode => }/typecheck/eq.rs | 0 core/src/{bytecode => }/typecheck/error.rs | 0 core/src/{bytecode => }/typecheck/mk_uniftype.rs | 0 core/src/{bytecode => }/typecheck/mod.rs | 0 core/src/{bytecode => }/typecheck/operation.rs | 0 core/src/{bytecode => }/typecheck/pattern.rs | 0 core/src/{bytecode => }/typecheck/record.rs | 0 core/src/{bytecode => }/typecheck/reporting.rs | 0 core/src/{bytecode => }/typecheck/subtyping.rs | 0 core/src/{bytecode => }/typecheck/unif.rs | 0 11 files changed, 1 deletion(-) rename core/src/{bytecode => }/typecheck/eq.rs (100%) rename core/src/{bytecode => }/typecheck/error.rs (100%) rename core/src/{bytecode => }/typecheck/mk_uniftype.rs (100%) rename core/src/{bytecode => }/typecheck/mod.rs (100%) rename core/src/{bytecode => }/typecheck/operation.rs (100%) rename core/src/{bytecode => }/typecheck/pattern.rs (100%) rename core/src/{bytecode => }/typecheck/record.rs (100%) rename core/src/{bytecode => }/typecheck/reporting.rs (100%) rename core/src/{bytecode => }/typecheck/subtyping.rs (100%) rename core/src/{bytecode => }/typecheck/unif.rs (100%) diff --git a/core/src/bytecode/mod.rs b/core/src/bytecode/mod.rs index be69349a14..b65bb35b55 100644 --- a/core/src/bytecode/mod.rs +++ b/core/src/bytecode/mod.rs @@ -4,4 +4,3 @@ //! default in mainline Nickel. pub mod ast; -pub mod typecheck; diff --git a/core/src/bytecode/typecheck/eq.rs b/core/src/typecheck/eq.rs similarity index 100% rename from core/src/bytecode/typecheck/eq.rs rename to core/src/typecheck/eq.rs diff --git a/core/src/bytecode/typecheck/error.rs b/core/src/typecheck/error.rs similarity index 100% rename from core/src/bytecode/typecheck/error.rs rename to core/src/typecheck/error.rs diff --git a/core/src/bytecode/typecheck/mk_uniftype.rs b/core/src/typecheck/mk_uniftype.rs similarity index 100% rename from core/src/bytecode/typecheck/mk_uniftype.rs rename to core/src/typecheck/mk_uniftype.rs diff --git a/core/src/bytecode/typecheck/mod.rs b/core/src/typecheck/mod.rs similarity index 100% rename from core/src/bytecode/typecheck/mod.rs rename to core/src/typecheck/mod.rs diff --git a/core/src/bytecode/typecheck/operation.rs b/core/src/typecheck/operation.rs similarity index 100% rename from core/src/bytecode/typecheck/operation.rs rename to core/src/typecheck/operation.rs diff --git a/core/src/bytecode/typecheck/pattern.rs b/core/src/typecheck/pattern.rs similarity index 100% rename from core/src/bytecode/typecheck/pattern.rs rename to core/src/typecheck/pattern.rs diff --git a/core/src/bytecode/typecheck/record.rs b/core/src/typecheck/record.rs similarity index 100% rename from core/src/bytecode/typecheck/record.rs rename to core/src/typecheck/record.rs diff --git a/core/src/bytecode/typecheck/reporting.rs b/core/src/typecheck/reporting.rs similarity index 100% rename from core/src/bytecode/typecheck/reporting.rs rename to core/src/typecheck/reporting.rs diff --git a/core/src/bytecode/typecheck/subtyping.rs b/core/src/typecheck/subtyping.rs similarity index 100% rename from core/src/bytecode/typecheck/subtyping.rs rename to core/src/typecheck/subtyping.rs diff --git a/core/src/bytecode/typecheck/unif.rs b/core/src/typecheck/unif.rs similarity index 100% rename from core/src/bytecode/typecheck/unif.rs rename to core/src/typecheck/unif.rs From 6946838b2cb8db71f446cfeb3302ef3e6eae26dc Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Mon, 23 Dec 2024 16:41:28 +0100 Subject: [PATCH 3/6] Fix compilation of newly plugged typecheck --- core/src/typecheck/mk_uniftype.rs | 62 +++++++++++++-------------- core/src/typecheck/mod.rs | 25 ++++++----- core/src/typecheck/operation.rs | 70 +++++++++++++++---------------- core/src/typecheck/pattern.rs | 4 +- core/src/typecheck/record.rs | 6 +-- core/src/typecheck/subtyping.rs | 2 +- core/src/typecheck/unif.rs | 4 +- 7 files changed, 86 insertions(+), 87 deletions(-) diff --git a/core/src/typecheck/mk_uniftype.rs b/core/src/typecheck/mk_uniftype.rs index a315f4cb33..8dbcc9b27d 100644 --- a/core/src/typecheck/mk_uniftype.rs +++ b/core/src/typecheck/mk_uniftype.rs @@ -4,106 +4,106 @@ use crate::typ::{DictTypeFlavour, TypeF}; /// Multi-ary arrow constructor for types implementing `Into`. #[macro_export] -macro_rules! mk_buty_arrow { +macro_rules! mk_uty_arrow { ($left:expr, $right:expr) => { - $crate::bytecode::typecheck::UnifType::concrete( + $crate::typecheck::UnifType::concrete( $crate::typ::TypeF::Arrow( - Box::new($crate::bytecode::typecheck::UnifType::from($left)), - Box::new($crate::bytecode::typecheck::UnifType::from($right)) + Box::new($crate::typecheck::UnifType::from($left)), + Box::new($crate::typecheck::UnifType::from($right)) ) ) }; ( $fst:expr, $snd:expr , $( $types:expr ),+ ) => { - $crate::mk_buty_arrow!($fst, $crate::mk_buty_arrow!($snd, $( $types ),+)) + $crate::mk_uty_arrow!($fst, $crate::mk_uty_arrow!($snd, $( $types ),+)) }; } /// Multi-ary enum row constructor for types implementing `Into`. -/// `mk_buty_enum_row!(id1, .., idn; tail)` correspond to `[| 'id1, .., 'idn; tail |]. With the +/// `mk_uty_enum_row!(id1, .., idn; tail)` correspond to `[| 'id1, .., 'idn; tail |]. With the /// addition of algebraic data types (enum variants), individual rows can also take an additional -/// type parameter, specified as a tuple: for example, `mk_buty_enum_row!(id1, (id2, ty2); tail)` +/// type parameter, specified as a tuple: for example, `mk_uty_enum_row!(id1, (id2, ty2); tail)` /// is `[| 'id1, 'id2 ty2; tail |]`. #[macro_export] -macro_rules! mk_buty_enum_row { +macro_rules! mk_uty_enum_row { () => { - $crate::bytecode::typecheck::UnifEnumRows::Concrete { + $crate::typecheck::UnifEnumRows::Concrete { erows: $crate::typ::EnumRowsF::Empty, - var_levels_data: $crate::bytecode::typecheck::VarLevelsData::new_no_uvars(), + var_levels_data: $crate::typecheck::VarLevelsData::new_no_uvars(), } }; (; $tail:expr) => { - $crate::bytecode::typecheck::UnifEnumRows::from($tail) + $crate::typecheck::UnifEnumRows::from($tail) }; ( ($id:expr, $ty:expr) $(, $rest:tt )* $(; $tail:expr)? ) => { - $crate::bytecode::typecheck::UnifEnumRows::concrete( + $crate::typecheck::UnifEnumRows::concrete( $crate::typ::EnumRowsF::Extend { row: $crate::typ::EnumRowF { id: $crate::identifier::LocIdent::from($id), typ: Some(Box::new($ty.into())), }, - tail: Box::new($crate::mk_buty_enum_row!($( $rest ),* $(; $tail)?)) + tail: Box::new($crate::mk_uty_enum_row!($( $rest ),* $(; $tail)?)) } ) }; ( $id:expr $(, $rest:tt )* $(; $tail:expr)? ) => { - $crate::bytecode::typecheck::UnifEnumRows::concrete( + $crate::typecheck::UnifEnumRows::concrete( $crate::typ::EnumRowsF::Extend { row: $crate::typ::EnumRowF { id: $crate::identifier::LocIdent::from($id), typ: None, }, - tail: Box::new($crate::mk_buty_enum_row!($( $rest ),* $(; $tail)?)) + tail: Box::new($crate::mk_uty_enum_row!($( $rest ),* $(; $tail)?)) } ) }; } -/// Multi-ary record row constructor for types implementing `Into`. `mk_buty_row!((id1, +/// Multi-ary record row constructor for types implementing `Into`. `mk_uty_row!((id1, /// ty1), .., (idn, tyn); tail)` correspond to `{id1: ty1, .., idn: tyn; tail}`. The tail can be /// omitted, in which case the empty row is uses as a tail instead. #[macro_export] -macro_rules! mk_buty_record_row { +macro_rules! mk_uty_record_row { () => { - $crate::bytecode::typecheck::UnifRecordRows::Concrete { + $crate::typecheck::UnifRecordRows::Concrete { rrows: $crate::typ::RecordRowsF::Empty, - var_levels_data: $crate::bytecode::typecheck::VarLevelsData::new_no_uvars() + var_levels_data: $crate::typecheck::VarLevelsData::new_no_uvars() } }; (; $tail:expr) => { - $crate::bytecode::typecheck::UnifRecordRows::from($tail) + $crate::typecheck::UnifRecordRows::from($tail) }; (($id:expr, $ty:expr) $(,($ids:expr, $tys:expr))* $(; $tail:expr)?) => { - $crate::bytecode::typecheck::UnifRecordRows::concrete( + $crate::typecheck::UnifRecordRows::concrete( $crate::typ::RecordRowsF::Extend { row: $crate::typ::RecordRowF { id: $crate::identifier::LocIdent::from($id), typ: Box::new($ty.into()), }, - tail: Box::new($crate::mk_buty_record_row!($(($ids, $tys)),* $(; $tail)?)), + tail: Box::new($crate::mk_uty_record_row!($(($ids, $tys)),* $(; $tail)?)), } ) }; } -/// Wrapper around `mk_buty_enum_row!` to build an enum type from an enum row. +/// Wrapper around `mk_uty_enum_row!` to build an enum type from an enum row. #[macro_export] -macro_rules! mk_buty_enum { +macro_rules! mk_uty_enum { ($( $args:tt )*) => { - $crate::bytecode::typecheck::UnifType::concrete( + $crate::typecheck::UnifType::concrete( $crate::typ::TypeF::Enum( - $crate::mk_buty_enum_row!($( $args )*) + $crate::mk_uty_enum_row!($( $args )*) ) ) }; } -/// Wrapper around `mk_buty_record!` to build a record type from a record row. +/// Wrapper around `mk_uty_record!` to build a record type from a record row. #[macro_export] -macro_rules! mk_buty_record { +macro_rules! mk_uty_record { ($(($ids:expr, $tys:expr)),* $(; $tail:expr)?) => { - $crate::bytecode::typecheck::UnifType::concrete( + $crate::typecheck::UnifType::concrete( $crate::typ::TypeF::Record( - $crate::mk_buty_record_row!($(($ids, $tys)),* $(; $tail)?) + $crate::mk_uty_record_row!($(($ids, $tys)),* $(; $tail)?) ) ) }; @@ -155,7 +155,7 @@ where { args.into_iter() .rev() - .fold(codomain.into(), |acc, ty| mk_buty_arrow!(ty.into(), acc)) + .fold(codomain.into(), |acc, ty| mk_uty_arrow!(ty.into(), acc)) } // dyn is a reserved keyword diff --git a/core/src/typecheck/mod.rs b/core/src/typecheck/mod.rs index 26d4d498f2..a55a325f4e 100644 --- a/core/src/typecheck/mod.rs +++ b/core/src/typecheck/mod.rs @@ -54,17 +54,16 @@ //! //! In walk mode, the type of let-bound expressions is inferred in a shallow way (see //! [HasApparentType]). -use super::ast::{ - pattern::bindings::Bindings as _, record::FieldDef, typ::*, Annotation, Ast, AstAlloc, - MatchBranch, Node, StringChunk, TryConvert, -}; - use crate::{ + bytecode::ast::{ + pattern::bindings::Bindings as _, record::FieldDef, typ::*, Annotation, Ast, AstAlloc, + MatchBranch, Node, StringChunk, TryConvert, + }, cache::ImportResolver, environment::Environment, error::TypecheckError, identifier::{Ident, LocIdent}, - mk_buty_arrow, mk_buty_enum, mk_buty_record, mk_buty_record_row, stdlib as nickel_stdlib, + mk_uty_arrow, mk_uty_enum, mk_uty_record, mk_uty_record_row, stdlib as nickel_stdlib, traverse::TraverseAlloc, typ::{EnumRowsIterator, RecordRowsIterator, VarKind, VarKindDiscriminant}, }; @@ -1210,13 +1209,13 @@ pub trait ReifyAsUnifType<'ast> { impl<'ast> ReifyAsUnifType<'ast> for crate::label::TypeVarData { fn unif_type() -> UnifType<'ast> { - mk_buty_record!(("polarity", crate::label::Polarity::unif_type())) + mk_uty_record!(("polarity", crate::label::Polarity::unif_type())) } } impl<'ast> ReifyAsUnifType<'ast> for crate::label::Polarity { fn unif_type() -> UnifType<'ast> { - mk_buty_enum!("Positive", "Negative") + mk_uty_enum!("Positive", "Negative") } } @@ -1980,7 +1979,7 @@ impl<'ast> Check<'ast> for Ast<'ast> { ctxt.type_env.insert(id.ident(), typ); } - Ok(mk_buty_arrow!(arg_type, fun_type)) + Ok(mk_uty_arrow!(arg_type, fun_type)) }, )?; @@ -2220,7 +2219,7 @@ impl<'ast> Check<'ast> for Ast<'ast> { // somehow generalized an enum row type variable before properly closing the tails // before. ty.unify( - mk_buty_arrow!(arg_type.clone(), return_type.clone()), + mk_uty_arrow!(arg_type.clone(), return_type.clone()), state, &ctxt, ) @@ -2242,7 +2241,7 @@ impl<'ast> Check<'ast> for Ast<'ast> { } Node::EnumVariant { tag, arg: None } => { let row = state.table.fresh_erows_uvar(ctxt.var_level); - ty.unify(mk_buty_enum!(*tag; row), state, &ctxt) + ty.unify(mk_uty_enum!(*tag; row), state, &ctxt) .map_err(|err| err.into_typecheck_err(state, self.pos)) } Node::EnumVariant { @@ -2255,7 +2254,7 @@ impl<'ast> Check<'ast> for Ast<'ast> { // We match the expected type against `[| 'id ty_arg; row_tail |]`, where `row_tail` is // a free unification variable, to ensure it has the right shape and extract the // components. - ty.unify(mk_buty_enum!((*tag, ty_arg.clone()); tail), state, &ctxt) + ty.unify(mk_uty_enum!((*tag, ty_arg.clone()); tail), state, &ctxt) .map_err(|err| err.into_typecheck_err(state, self.pos))?; // Once we have a type for the argument, we check the variant's data against it. @@ -2447,7 +2446,7 @@ impl<'ast> Infer<'ast> for Ast<'ast> { } // Theoretically, we need to instantiate the type of the head of the primop application, // that is, the primop itself. In practice, - // [crate::bytecode::typecheck::operation::PrimOpType::primop_type] returns types that are + // [crate::typecheck::operation::PrimOpType::primop_type] returns types that are // already instantiated with free unification variables, to save building a polymorphic // type that would be instantiated immediately. Thus, the type of a primop is currently // always monomorphic. diff --git a/core/src/typecheck/operation.rs b/core/src/typecheck/operation.rs index d651a356de..945111d5a8 100644 --- a/core/src/typecheck/operation.rs +++ b/core/src/typecheck/operation.rs @@ -7,7 +7,7 @@ use crate::{ typ::TypeF, }; -use crate::{mk_buty_arrow, mk_buty_enum, mk_buty_record}; +use crate::{mk_uty_arrow, mk_uty_enum, mk_uty_record}; pub trait PrimOpType { fn primop_type<'ast>( @@ -28,7 +28,7 @@ impl PrimOpType for PrimOp { // 'ForeignId, 'Type, 'Other |] PrimOp::Typeof => ( vec![mk_uniftype::dynamic()], - mk_buty_enum!( + mk_uty_enum!( "Number", "Bool", "String", @@ -46,7 +46,7 @@ impl PrimOpType for PrimOp { // Bool -> Bool -> Bool PrimOp::BoolAnd | PrimOp::BoolOr => ( vec![mk_uniftype::bool()], - mk_buty_arrow!(TypeF::Bool, TypeF::Bool), + mk_uty_arrow!(TypeF::Bool, TypeF::Bool), ), // Bool -> Bool PrimOp::BoolNot => (vec![mk_uniftype::bool()], mk_uniftype::bool()), @@ -59,7 +59,7 @@ impl PrimOpType for PrimOp { // Dyn -> Polarity PrimOp::LabelPol => ( vec![mk_uniftype::dynamic()], - mk_buty_enum!("Positive", "Negative"), + mk_uty_enum!("Positive", "Negative"), ), // forall rows. [| ; rows |] -> [| id ; rows |] PrimOp::EnumEmbed(id) => { @@ -69,8 +69,8 @@ impl PrimOpType for PrimOp { init_level: var_level, }; - let domain = mk_buty_enum!(; row.clone()); - let codomain = mk_buty_enum!(*id; row); + let domain = mk_uty_enum!(; row.clone()); + let codomain = mk_uty_enum!(*id; row); (vec![domain], codomain) } @@ -86,27 +86,27 @@ impl PrimOpType for PrimOp { let rows = state.table.fresh_rrows_uvar(var_level); let res = state.table.fresh_type_uvar(var_level); - (vec![mk_buty_record!((*id, res.clone()); rows)], res) + (vec![mk_uty_record!((*id, res.clone()); rows)], res) } // forall a b. Array a -> (a -> b) -> Array b PrimOp::ArrayMap => { let a = state.table.fresh_type_uvar(var_level); let b = state.table.fresh_type_uvar(var_level); - let f_type = mk_buty_arrow!(a.clone(), b.clone()); + let f_type = mk_uty_arrow!(a.clone(), b.clone()); ( vec![mk_uniftype::array(a)], - mk_buty_arrow!(f_type, mk_uniftype::array(b)), + mk_uty_arrow!(f_type, mk_uniftype::array(b)), ) } // forall a. Num -> (Num -> a) -> Array a PrimOp::ArrayGen => { let a = state.table.fresh_type_uvar(var_level); - let f_type = mk_buty_arrow!(TypeF::Number, a.clone()); + let f_type = mk_uty_arrow!(TypeF::Number, a.clone()); ( vec![mk_uniftype::num()], - mk_buty_arrow!(f_type, mk_uniftype::array(a)), + mk_uty_arrow!(f_type, mk_uniftype::array(a)), ) } // forall a b. { _ : a} -> (Str -> a -> b) -> { _ : b } @@ -117,10 +117,10 @@ impl PrimOpType for PrimOp { let a = state.table.fresh_type_uvar(var_level); let b = state.table.fresh_type_uvar(var_level); - let f_type = mk_buty_arrow!(TypeF::String, a.clone(), b.clone()); + let f_type = mk_uty_arrow!(TypeF::String, a.clone(), b.clone()); ( vec![mk_uniftype::dict(a)], - mk_buty_arrow!(f_type, mk_uniftype::dict(b)), + mk_uty_arrow!(f_type, mk_uniftype::dict(b)), ) } // forall a b. a -> b -> b @@ -128,7 +128,7 @@ impl PrimOpType for PrimOp { let fst = state.table.fresh_type_uvar(var_level); let snd = state.table.fresh_type_uvar(var_level); - (vec![fst], mk_buty_arrow!(snd.clone(), snd)) + (vec![fst], mk_uty_arrow!(snd.clone(), snd)) } // forall a. Array a -> Num PrimOp::ArrayLength => { @@ -173,19 +173,19 @@ impl PrimOpType for PrimOp { // Str -> < | a> for a rigid type variable a PrimOp::EnumFromString => ( vec![mk_uniftype::str()], - mk_buty_enum!(; state.table.fresh_erows_const(var_level)), + mk_uty_enum!(; state.table.fresh_erows_const(var_level)), ), // Str -> Str -> Bool PrimOp::StringIsMatch => ( vec![mk_uniftype::str()], - mk_buty_arrow!(mk_uniftype::str(), mk_uniftype::bool()), + mk_uty_arrow!(mk_uniftype::str(), mk_uniftype::bool()), ), // Str -> Str -> {matched: Str, index: Num, groups: Array Str} PrimOp::StringFind => ( vec![mk_uniftype::str()], - mk_buty_arrow!( + mk_uty_arrow!( mk_uniftype::str(), - mk_buty_record!( + mk_uty_record!( ("matched", TypeF::String), ("index", TypeF::Number), ("groups", mk_uniftype::array(TypeF::String)) @@ -195,9 +195,9 @@ impl PrimOpType for PrimOp { // String -> String -> Array { matched: String, index: Number, groups: Array String } PrimOp::StringFindAll => ( vec![mk_uniftype::str()], - mk_buty_arrow!( + mk_uty_arrow!( mk_uniftype::str(), - mk_uniftype::array(mk_buty_record!( + mk_uniftype::array(mk_uty_record!( ("matched", TypeF::String), ("index", TypeF::Number), ("groups", mk_uniftype::array(TypeF::String)) @@ -215,7 +215,7 @@ impl PrimOpType for PrimOp { // forall a. Str -> a -> a PrimOp::Trace => { let ty = state.table.fresh_type_uvar(var_level); - (vec![mk_uniftype::str()], mk_buty_arrow!(ty.clone(), ty)) + (vec![mk_uniftype::str()], mk_uty_arrow!(ty.clone(), ty)) } // Morally: Lbl -> Lbl // Actual: Dyn -> Dyn @@ -274,7 +274,7 @@ impl PrimOpType for PrimOp { // Sym -> Dyn -> Dyn -> Dyn PrimOp::Seal => ( vec![mk_uniftype::sym(), mk_uniftype::dynamic()], - mk_buty_arrow!(TypeF::Dyn, TypeF::Dyn), + mk_uty_arrow!(TypeF::Dyn, TypeF::Dyn), ), // String -> String -> String PrimOp::StringConcat => ( @@ -285,13 +285,13 @@ impl PrimOpType for PrimOp { // Currently: Dyn -> Dyn -> (Dyn -> Dyn) PrimOp::ContractApply => ( vec![mk_uniftype::dynamic(), mk_uniftype::dynamic()], - mk_buty_arrow!(mk_uniftype::dynamic(), mk_uniftype::dynamic()), + mk_uty_arrow!(mk_uniftype::dynamic(), mk_uniftype::dynamic()), ), // Ideally: Contract -> Label -> Dyn -> // Currently: Dyn -> Dyn -> (Dyn -> ) PrimOp::ContractCheck => ( vec![mk_uniftype::dynamic(), mk_uniftype::dynamic()], - mk_buty_arrow!( + mk_uty_arrow!( mk_uniftype::dynamic(), custom_contract_ret_type(state.ast_alloc) ), @@ -305,7 +305,7 @@ impl PrimOpType for PrimOp { // Sym -> Dyn -> Dyn -> Dyn PrimOp::Unseal => ( vec![mk_uniftype::sym(), mk_uniftype::dynamic()], - mk_buty_arrow!(TypeF::Dyn, TypeF::Dyn), + mk_uty_arrow!(TypeF::Dyn, TypeF::Dyn), ), // forall a b. a -> b -> Bool PrimOp::Eq => ( @@ -339,7 +339,7 @@ impl PrimOpType for PrimOp { let res = state.table.fresh_type_uvar(var_level); ( vec![mk_uniftype::str(), mk_uniftype::dict(res.clone())], - mk_buty_arrow!(res.clone(), mk_uniftype::dict(res)), + mk_uty_arrow!(res.clone(), mk_uniftype::dict(res)), ) } // forall a. Str -> { _ : a } -> { _ : a} @@ -388,7 +388,7 @@ impl PrimOpType for PrimOp { // -> Str -> Str PrimOp::Hash => ( vec![ - mk_buty_enum!("Md5", "Sha1", "Sha256", "Sha512"), + mk_uty_enum!("Md5", "Sha1", "Sha256", "Sha512"), mk_uniftype::str(), ], mk_uniftype::str(), @@ -397,13 +397,13 @@ impl PrimOpType for PrimOp { PrimOp::Serialize => { let ty_input = state.table.fresh_type_uvar(var_level); ( - vec![mk_buty_enum!("Json", "Yaml", "Toml"), ty_input], + vec![mk_uty_enum!("Json", "Yaml", "Toml"), ty_input], mk_uniftype::str(), ) } // -> Str -> Dyn PrimOp::Deserialize => ( - vec![mk_buty_enum!("Json", "Yaml", "Toml"), mk_uniftype::str()], + vec![mk_uty_enum!("Json", "Yaml", "Toml"), mk_uniftype::str()], mk_uniftype::dynamic(), ), // Num -> Num -> Num @@ -419,7 +419,7 @@ impl PrimOpType for PrimOp { // Str -> Str -> PrimOp::StringCompare => ( vec![mk_uniftype::str(), mk_uniftype::str()], - mk_buty_enum!("Lesser", "Equal", "Greater"), + mk_uty_enum!("Lesser", "Equal", "Greater"), ), // Str -> Str -> Array Str PrimOp::StringSplit => ( @@ -433,7 +433,7 @@ impl PrimOpType for PrimOp { let ty_array = mk_uniftype::array(ty_elt); ( vec![mk_uniftype::dynamic(), mk_uniftype::dynamic()], - mk_buty_arrow!(ty_array.clone(), ty_array), + mk_uty_arrow!(ty_array.clone(), ty_array), ) } // The first argument is a label, the third is a contract. @@ -443,7 +443,7 @@ impl PrimOpType for PrimOp { let ty_dict = mk_uniftype::dict(ty_field); ( vec![mk_uniftype::dynamic(), ty_dict.clone()], - mk_buty_arrow!(mk_uniftype::dynamic(), ty_dict), + mk_uty_arrow!(mk_uniftype::dynamic(), ty_dict), ) } // Morally: Str -> Lbl -> Lbl @@ -481,7 +481,7 @@ impl PrimOpType for PrimOp { let elt = state.table.fresh_type_uvar(var_level); let dict = mk_uniftype::dict(elt.clone()); - let split_result = mk_buty_record!( + let split_result = mk_uty_record!( ("left_only", dict.clone()), ("right_only", dict.clone()), ("left_center", dict.clone()), @@ -581,7 +581,7 @@ impl PrimOpType for PrimOp { /// |] /// ``` pub fn custom_contract_type(alloc: &AstAlloc) -> UnifType<'_> { - mk_buty_arrow!( + mk_uty_arrow!( mk_uniftype::dynamic(), mk_uniftype::dynamic(), custom_contract_ret_type(alloc) @@ -597,7 +597,7 @@ pub fn custom_contract_type(alloc: &AstAlloc) -> UnifType<'_> { /// |] /// ``` pub fn custom_contract_ret_type(alloc: &AstAlloc) -> UnifType<'_> { - mk_buty_enum!( + mk_uty_enum!( ("Ok", mk_uniftype::dynamic()), ("Error", error_data_type(alloc)) ) diff --git a/core/src/typecheck/pattern.rs b/core/src/typecheck/pattern.rs index 87982c401b..241ec1615b 100644 --- a/core/src/typecheck/pattern.rs +++ b/core/src/typecheck/pattern.rs @@ -2,7 +2,7 @@ use crate::{ bytecode::ast::pattern::*, error::TypecheckError, identifier::{Ident, LocIdent}, - mk_buty_record_row, + mk_uty_record_row, typ::{EnumRowsF, RecordRowsF, TypeF}, }; @@ -234,7 +234,7 @@ impl<'ast> PatternTypes<'ast> for RecordPattern<'ast> { // We use a dynamic tail here since we're in walk mode, // but if/when we remove dynamic record tails this could // likely be made an empty tail with no impact. - TypecheckMode::Walk => mk_buty_record_row!(; RecordRowsF::TailDyn), + TypecheckMode::Walk => mk_uty_record_row!(; RecordRowsF::TailDyn), TypecheckMode::Enforce => state.table.fresh_rrows_uvar(ctxt.var_level), } } else { diff --git a/core/src/typecheck/record.rs b/core/src/typecheck/record.rs index 8b23883f09..2539982d9c 100644 --- a/core/src/typecheck/record.rs +++ b/core/src/typecheck/record.rs @@ -167,11 +167,11 @@ impl<'ast> ResolvedRecord<'ast> { // Build the type {id1 : ?a1, id2: ?a2, .., idn: ?an}, which is the type of the whole // record. let rows = self.stat_fields.keys().zip(field_types.iter()).fold( - mk_buty_record_row!(), - |acc, (id, row_ty)| mk_buty_record_row!((*id, row_ty.clone()); acc), + mk_uty_record_row!(), + |acc, (id, row_ty)| mk_uty_record_row!((*id, row_ty.clone()); acc), ); - ty.unify(mk_buty_record!(; rows), state, &ctxt) + ty.unify(mk_uty_record!(; rows), state, &ctxt) .map_err(|err| err.into_typecheck_err(state, self.pos))?; // We reverse the order of `field_types`. The idea is that we can then pop each diff --git a/core/src/typecheck/subtyping.rs b/core/src/typecheck/subtyping.rs index 67f93b03cc..8df0c0ad77 100644 --- a/core/src/typecheck/subtyping.rs +++ b/core/src/typecheck/subtyping.rs @@ -139,7 +139,7 @@ impl<'ast> SubsumedBy<'ast> for UnifType<'ast> { ) => rrows1 .clone() .subsumed_by(rrows2.clone(), state, ctxt) - .map_err(|err| err.into_unif_err(mk_buty_record!(;rrows2), mk_buty_record!(;rrows1))), + .map_err(|err| err.into_unif_err(mk_uty_record!(;rrows2), mk_uty_record!(;rrows1))), // T <: U if T = U (inferred, checked) => checked.unify(inferred, state, &ctxt), } diff --git a/core/src/typecheck/unif.rs b/core/src/typecheck/unif.rs index 68df32d0f7..b2e67fdd35 100644 --- a/core/src/typecheck/unif.rs +++ b/core/src/typecheck/unif.rs @@ -1255,13 +1255,13 @@ impl<'ast> Unify<'ast> for UnifType<'ast> { .clone() .unify(erows2.clone(), state, ctxt) .map_err(|err| { - err.into_unif_err(mk_buty_enum!(; erows1), mk_buty_enum!(; erows2)) + err.into_unif_err(mk_uty_enum!(; erows1), mk_uty_enum!(; erows2)) }), (TypeF::Record(rrows1), TypeF::Record(rrows2)) => rrows1 .clone() .unify(rrows2.clone(), state, ctxt) .map_err(|err| { - err.into_unif_err(mk_buty_record!(; rrows1), mk_buty_record!(; rrows2)) + err.into_unif_err(mk_uty_record!(; rrows1), mk_uty_record!(; rrows2)) }), ( TypeF::Dict { From 345ab9e93c582dbf7da44ac3788171638a3004aa Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Mon, 23 Dec 2024 18:33:58 +0100 Subject: [PATCH 4/6] (File) cache migration - WIP --- core/src/cache.rs | 95 +++++++++++++++++++++++++++++------------------ 1 file changed, 58 insertions(+), 37 deletions(-) diff --git a/core/src/cache.rs b/core/src/cache.rs index 52f872d352..3d6060d47a 100644 --- a/core/src/cache.rs +++ b/core/src/cache.rs @@ -1,5 +1,6 @@ //! Source cache. +use crate::bytecode::ast::{self, Ast, AstAlloc}; use crate::closurize::Closurize as _; use crate::error::{Error, ImportError, ParseError, ParseErrors, TypecheckError}; use crate::eval::cache::Cache as EvalCache; @@ -17,7 +18,7 @@ use crate::term::record::{Field, RecordData}; use crate::term::{Import, RichTerm, SharedTerm, Term}; use crate::transform::import_resolution; use crate::typ::UnboundTypeVariableError; -use crate::typecheck::{self, type_check, TypecheckMode, Wildcards}; +use crate::typecheck::{self, typecheck, TypecheckMode, Wildcards}; use crate::{eval, parser, transform}; use io::Read; @@ -106,12 +107,19 @@ impl InputFormat { /// for files, to `FileId`s. /// - The term cache, holding parsed terms indexed by `FileId`s. /// -/// Terms possibly undergo typechecking and program transformation. The state of each entry (that +/// Terms possibly undergo typechecking and program transformations. The state of each entry (that /// is, the operations that have been performed on this term) is stored in an [EntryState]. +/// +/// # RFC007 +/// +/// As part of the migration to a new AST required by RFC007, as long as we don't have a fully +/// working bytecode virtual machine, the cache needs to keep both term under the old +/// representation (dubbed "mainline" in many places) and the new representation. #[derive(Debug, Clone)] -pub struct Cache { +pub struct Cache<'ast> { /// The content of the program sources plus imports. files: Files, + /// Reverse map from file ids to source paths. file_paths: HashMap, /// The name-id table, holding file ids stored in the database indexed by source names. file_ids: HashMap, @@ -121,19 +129,23 @@ pub struct Cache { rev_imports: HashMap>, /// The table storing parsed terms corresponding to the entries of the file database. terms: HashMap, + /// The allocator for the asts. + alloc: &'ast AstAlloc, + /// The table storing parsed terms in the new AST format. + asts: HashMap>, /// A table mapping FileIds to the package that they belong to. /// /// Path dependencies have already been canonicalized to absolute paths. packages: HashMap, /// The inferred type of wildcards for each `FileId`. - wildcards: HashMap, + wildcards: HashMap>, /// Whether processing should try to continue even in case of errors. Needed by the NLS. error_tolerance: ErrorTolerance, + /// Paths where to look for imports, as included by the user through either the CLI argument + /// `--import-path` or the environment variable `$NICKEL_IMPORT_PATH`. import_paths: Vec, - /// The map used to resolve package imports. package_map: Option, - #[cfg(debug_assertions)] /// Skip loading the stdlib, used for debugging purpose pub skip_stdlib: bool, @@ -149,14 +161,14 @@ pub enum ErrorTolerance { /// The different environments maintained during the REPL session for evaluation and typechecking. #[derive(Debug, Clone)] -pub struct Envs { +pub struct Envs<'ast> { /// The eval environment. pub eval_env: eval::Environment, /// The typing context. - pub type_ctxt: typecheck::Context, + pub type_ctxt: typecheck::Context<'ast>, } -impl Envs { +impl Envs<'_> { pub fn new() -> Self { Envs { eval_env: eval::Environment::new(), @@ -165,7 +177,7 @@ impl Envs { } } -impl Default for Envs { +impl Default for Envs<'_> { fn default() -> Self { Self::new() } @@ -370,13 +382,15 @@ pub enum SourceState { Stale(SystemTime), } -impl Cache { - pub fn new(error_tolerance: ErrorTolerance) -> Self { +impl<'ast> Cache<'ast> { + pub fn new(error_tolerance: ErrorTolerance, alloc: &'ast AstAlloc) -> Self { Cache { files: Files::new(), file_ids: HashMap::new(), file_paths: HashMap::new(), terms: HashMap::new(), + asts: HashMap::new(), + alloc, wildcards: HashMap::new(), imports: HashMap::new(), rev_imports: HashMap::new(), @@ -650,39 +664,46 @@ impl Cache { /// Typecheck an entry of the cache and update its state accordingly, or do nothing if the /// entry has already been typechecked. Require that the corresponding source has been parsed. /// If the source contains imports, recursively typecheck on the imports too. + /// + /// # RFC007 + /// + /// During the transition period between the old VM and the new bytecode VM, this method + /// performs typechecking on the new representation [crate::bytecode::ast::Ast], and is also + /// responsible for then converting the term to the legacy representation and populate the + /// corresponding term cache. pub fn typecheck( &mut self, file_id: FileId, - initial_ctxt: &typecheck::Context, + initial_ctxt: &typecheck::Context<'ast>, initial_mode: TypecheckMode, ) -> Result, CacheError> { - match self.terms.get(&file_id) { - Some(TermEntry { state, .. }) if *state >= EntryState::Typechecked => { - Ok(CacheOp::Cached(())) - } - Some(TermEntry { term, state, .. }) if *state >= EntryState::Parsed => { - if *state < EntryState::Typechecking { - let wildcards = measure_runtime!( - "runtime:type_check", - type_check(term, initial_ctxt.clone(), self, initial_mode)? - ); - self.update_state(file_id, EntryState::Typechecking); - self.wildcards.insert(file_id, wildcards); + // If the term cache is populated, given the current split of the pipeline between the old + // and the new AST, the term MUST have been typechecked. + if self.terms.get(&file_id).is_some() { + return Ok(CacheOp::Cached(())); + } - if let Some(imports) = self.imports.get(&file_id).cloned() { - for f in imports.into_iter() { - self.typecheck(f, initial_ctxt, initial_mode)?; - } - } + let Some(ast) = self.asts.get(&file_id) else { + return Err(CacheError::NotParsed); + }; - self.update_state(file_id, EntryState::Typechecked); - } - // The else case correponds to `EntryState::Typechecking`. There is nothing to do: - // cf (grep for) [transitory_entry_state] - Ok(CacheOp::Done(())) + let wildcards = measure_runtime!( + "runtime:type_check", + typecheck(self.alloc, ast, initial_ctxt.clone(), self, initial_mode)? + ); + + self.update_state(file_id, EntryState::Typechecking); + self.wildcards.insert(file_id, wildcards); + + if let Some(imports) = self.imports.get(&file_id).cloned() { + for f in imports.into_iter() { + self.typecheck(f, initial_ctxt, initial_mode)?; } - _ => Err(CacheError::NotParsed), } + + self.update_state(file_id, EntryState::Typechecked); + + Ok(CacheOp::Done(())) } /// Apply program transformations to an entry of the cache, and update its state accordingly, @@ -1440,7 +1461,7 @@ pub trait ImportResolver { fn get_path(&self, file_id: FileId) -> Option<&OsStr>; } -impl ImportResolver for Cache { +impl<'ast> ImportResolver for Cache<'ast> { fn resolve( &mut self, import: &Import, From 3e264fda9f88926926dded375c818c08f03ccf84 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Sat, 28 Dec 2024 16:18:42 +0100 Subject: [PATCH 5/6] Continue cache migration - realizing it's the wrong direction --- core/src/cache.rs | 192 +++++++++++++++++++++++--------------- core/src/label.rs | 8 +- core/src/program.rs | 2 +- core/src/repl/mod.rs | 2 +- lsp/nls/src/incomplete.rs | 2 +- 5 files changed, 123 insertions(+), 83 deletions(-) diff --git a/core/src/cache.rs b/core/src/cache.rs index 3d6060d47a..be926cbc1b 100644 --- a/core/src/cache.rs +++ b/core/src/cache.rs @@ -1,36 +1,41 @@ //! Source cache. -use crate::bytecode::ast::{self, Ast, AstAlloc}; -use crate::closurize::Closurize as _; -use crate::error::{Error, ImportError, ParseError, ParseErrors, TypecheckError}; -use crate::eval::cache::Cache as EvalCache; -use crate::eval::Closure; -use crate::files::{FileId, Files}; -use crate::metrics::measure_runtime; +use crate::{ + bytecode::ast::{compat::ToMainline, Ast, AstAlloc}, + closurize::Closurize as _, + error::{Error, ImportError, ParseError, ParseErrors, TypecheckError}, + eval::cache::Cache as EvalCache, + eval::Closure, + files::{FileId, Files}, + metrics::measure_runtime, + package::PackageMap, + parser::{lexer::Lexer, ErrorTolerantParser}, + position::TermPos, + program::FieldPath, + stdlib::{self as nickel_stdlib, StdlibModule}, + term::record::{Field, RecordData}, + term::{Import, RichTerm, SharedTerm, Term}, + transform::import_resolution, + typ::UnboundTypeVariableError, + typecheck::{self, typecheck, TypecheckMode, Wildcards}, + {eval, parser, transform}, +}; + #[cfg(feature = "nix-experimental")] use crate::nix_ffi; -use crate::package::PackageMap; -use crate::parser::{lexer::Lexer, ErrorTolerantParserCompat}; -use crate::position::TermPos; -use crate::program::FieldPath; -use crate::stdlib::{self as nickel_stdlib, StdlibModule}; -use crate::term::record::{Field, RecordData}; -use crate::term::{Import, RichTerm, SharedTerm, Term}; -use crate::transform::import_resolution; -use crate::typ::UnboundTypeVariableError; -use crate::typecheck::{self, typecheck, TypecheckMode, Wildcards}; -use crate::{eval, parser, transform}; - -use io::Read; + +use std::{ + collections::hash_map, + collections::{HashMap, HashSet}, + ffi::{OsStr, OsString}, + fs, io, + io::Read, + path::{Path, PathBuf}, + result::Result, + time::SystemTime, +}; + use serde::Deserialize; -use std::collections::hash_map; -use std::collections::{HashMap, HashSet}; -use std::ffi::{OsStr, OsString}; -use std::fs; -use std::io; -use std::path::{Path, PathBuf}; -use std::result::Result; -use std::time::SystemTime; use void::Void; /// Supported input formats. @@ -537,7 +542,7 @@ impl<'ast> Cache<'ast> { /// Parse a source and populate the corresponding entry in the cache, or do /// nothing if the entry has already been parsed. Support multiple formats. /// This function is always error tolerant, independently from `self.error_tolerant`. - fn parse_lax( + fn parse_tolerant( &mut self, file_id: FileId, format: InputFormat, @@ -545,16 +550,26 @@ impl<'ast> Cache<'ast> { if let Some(TermEntry { parse_errs, .. }) = self.terms.get(&file_id) { Ok(CacheOp::Cached(parse_errs.clone())) } else { - let (term, parse_errs) = self.parse_nocache_multi(file_id, format)?; - self.terms.insert( - file_id, - TermEntry { - term, - state: EntryState::Parsed, - parse_errs: parse_errs.clone(), - }, - ); - Ok(CacheOp::Done(parse_errs)) + if let InputFormat::Nickel = format { + let (ast, parse_errs) = self.parse_nickel_nocache(file_id)?; + + self.asts.insert(file_id, ast); + + Ok(CacheOp::Done(parse_errs)) + } else { + let (term, parse_errs) = self.parse_other_nocache(file_id, format)?; + + self.terms.insert( + file_id, + TermEntry { + term, + state: EntryState::Parsed, + parse_errs: parse_errs.clone(), + }, + ); + + Ok(CacheOp::Done(parse_errs)) + } } } @@ -566,7 +581,7 @@ impl<'ast> Cache<'ast> { file_id: FileId, format: InputFormat, ) -> Result, ParseErrors> { - let result = self.parse_lax(file_id, format); + let result = self.parse_tolerant(file_id, format); match self.error_tolerance { ErrorTolerance::Tolerant => result.map_err(|err| err.into()), @@ -578,13 +593,31 @@ impl<'ast> Cache<'ast> { } } - /// Parse a source without querying nor populating the cache. - pub fn parse_nocache(&self, file_id: FileId) -> Result<(RichTerm, ParseErrors), ParseError> { - self.parse_nocache_multi(file_id, InputFormat::default()) + /// Parse a Nickel source without querying nor populating the cache. + pub fn parse_nickel_nocache( + &self, + file_id: FileId, + ) -> Result<(Ast<'ast>, ParseErrors), ParseError> { + let (t, parse_errs) = measure_runtime!( + "runtime:parse:nickel", + parser::grammar::TermParser::new().parse_tolerant( + self.alloc, + file_id, + Lexer::new(self.files.source(file_id)) + )? + ); + + Ok((t, parse_errs)) } - /// Parse a source without querying nor populating the cache. Support multiple formats. - pub fn parse_nocache_multi( + /// Parse a source that isn't Nickel without querying nor populating the cache. Support + /// multiple formats. + /// + /// The Nickel/non Nickel distinction is a bit artificial at the moment, due to the fact that + /// parsing Nickel returns the new [crate::bytecode::ast::Ast], while parsing other formats + /// don't go through the new AST first but directly deserialize to the legacy + /// [crate::term::Term] for simplicity and performance reasons. + pub fn parse_other_nocache( &self, file_id: FileId, format: InputFormat, @@ -594,26 +627,22 @@ impl<'ast> Cache<'ast> { t.with_pos(pos) }; - let buf = self.files.source(file_id); + let source = self.files.source(file_id); match format { InputFormat::Nickel => { - let (t, parse_errs) = measure_runtime!( - "runtime:parse:nickel", - parser::grammar::TermParser::new() - .parse_tolerant_compat(file_id, Lexer::new(buf))? - ); - - Ok((t, parse_errs)) + // Panicking isn't great, but we expect this to be temporary, until RFC007 is fully + // implemented. + panic!("error: trying to parse a Nickel source with parse_other_nocache") } - InputFormat::Json => serde_json::from_str(self.files.source(file_id)) + InputFormat::Json => serde_json::from_str(source) .map(|t| (attach_pos(t), ParseErrors::default())) .map_err(|err| ParseError::from_serde_json(err, file_id, &self.files)), InputFormat::Yaml => { // YAML files can contain multiple documents. If there is only // one we transparently deserialize it. If there are multiple, // we deserialize the file as an array. - let de = serde_yaml::Deserializer::from_str(self.files.source(file_id)); + let de = serde_yaml::Deserializer::from_str(source); let mut terms = de .map(|de| { RichTerm::deserialize(de) @@ -641,21 +670,19 @@ impl<'ast> Cache<'ast> { )) } } - InputFormat::Toml => { - crate::serialize::toml_deser::from_str(self.files.source(file_id), file_id) - .map(|t| (attach_pos(t), ParseErrors::default())) - .map_err(|err| (ParseError::from_toml(err, file_id))) - } + InputFormat::Toml => crate::serialize::toml_deser::from_str(source, file_id) + .map(|t| (attach_pos(t), ParseErrors::default())) + .map_err(|err| (ParseError::from_toml(err, file_id))), #[cfg(feature = "nix-experimental")] InputFormat::Nix => { - let json = nix_ffi::eval_to_json(self.files.source(file_id)) + let json = nix_ffi::eval_to_json(source) .map_err(|e| ParseError::from_nix(e.what(), file_id))?; serde_json::from_str(&json) .map(|t| (attach_pos(t), ParseErrors::default())) .map_err(|err| ParseError::from_serde_json(err, file_id, &self.files)) } InputFormat::Text => Ok(( - attach_pos(Term::Str(self.files.source(file_id).into()).into()), + attach_pos(Term::Str(source.into()).into()), ParseErrors::default(), )), } @@ -1024,7 +1051,7 @@ impl<'ast> Cache<'ast> { pub fn prepare( &mut self, file_id: FileId, - initial_ctxt: &typecheck::Context, + initial_ctxt: &typecheck::Context<'ast>, ) -> Result, Error> { let mut result = CacheOp::Cached(()); @@ -1084,9 +1111,10 @@ impl<'ast> Cache<'ast> { pub fn prepare_nocache( &mut self, file_id: FileId, - initial_ctxt: &typecheck::Context, + initial_ctxt: &typecheck::Context<'ast>, ) -> Result<(RichTerm, Vec), Error> { - let (term, errs) = self.parse_nocache(file_id)?; + let (ast, errs) = self.parse_nickel_nocache(file_id)?; + if !errs.no_errors() { return Err(Error::ParseErrors(errs)); } @@ -1094,15 +1122,22 @@ impl<'ast> Cache<'ast> { let import_resolution::strict::ResolveResult { transformed_term: term, resolved_ids: pending, - } = import_resolution::strict::resolve_imports(term, self)?; + } = import_resolution::strict::resolve_imports(ast.to_mainline(), self)?; let wildcards = measure_runtime!( "runtime:type_check", - type_check(&term, initial_ctxt.clone(), self, TypecheckMode::Walk)? + typecheck( + self.alloc, + &ast, + initial_ctxt.clone(), + self, + TypecheckMode::Walk + )? ); let term = transform::transform(term, Some(&wildcards)) .map_err(|err| Error::ParseErrors(err.into()))?; + Ok((term, pending)) } @@ -1310,7 +1345,7 @@ impl<'ast> Cache<'ast> { /// it's used in benches. It probably does not have to be used for something else. pub fn typecheck_stdlib_( &mut self, - initial_ctxt: &typecheck::Context, + initial_ctxt: &typecheck::Context<'ast>, ) -> Result, CacheError> { self.files .stdlib_modules() @@ -1365,19 +1400,21 @@ impl<'ast> Cache<'ast> { /// Generate the initial typing context from the list of `file_ids` corresponding to the /// standard library parts. - pub fn mk_type_ctxt(&self) -> Result> { - let stdlib_terms_vec: Vec<(StdlibModule, RichTerm)> = self + pub fn mk_type_ctxt(&self) -> Result, CacheError> { + let stdlib_terms_vec: Vec<(StdlibModule, Ast<'ast>)> = self .files .stdlib_modules() .map(|(module, file_id)| { ( module, - self.get_owned(file_id) - .expect("cache::mk_type_env(): can't build environment, stdlib not parsed"), + self.asts + .get(&file_id) + .expect("cache::mk_type_env(): can't build environment, stdlib not parsed") + .clone(), ) }) .collect(); - Ok(typecheck::mk_initial_ctxt(&stdlib_terms_vec).unwrap()) + Ok(typecheck::mk_initial_ctxt(self.alloc, &stdlib_terms_vec).unwrap()) } /// Generate the initial evaluation environment from the list of `file_ids` corresponding to the @@ -1738,10 +1775,13 @@ pub mod resolvers { if let hash_map::Entry::Vacant(e) = self.term_cache.entry(file_id) { let buf = self.files.source(file_id); - let term = parser::grammar::TermParser::new() - .parse_strict_compat(file_id, Lexer::new(buf)) + let alloc = AstAlloc::new(); + + let ast = parser::grammar::TermParser::new() + .parse_strict(&alloc, file_id, Lexer::new(buf)) .map_err(|e| ImportError::ParseErrors(e, *pos))?; - e.insert(term); + e.insert(ast.to_mainline()); + Ok(( ResolvedTerm::FromFile { path: PathBuf::new(), diff --git a/core/src/label.rs b/core/src/label.rs index 2956e89d8b..5f8691036c 100644 --- a/core/src/label.rs +++ b/core/src/label.rs @@ -329,14 +329,14 @@ impl From<&TypeVarData> for Term { } } -impl ReifyAsUnifType for TypeVarData { - fn unif_type() -> UnifType { +impl<'ast> ReifyAsUnifType<'ast> for TypeVarData { + fn unif_type() -> UnifType<'ast> { mk_uty_record!(("polarity", Polarity::unif_type())) } } -impl ReifyAsUnifType for Polarity { - fn unif_type() -> UnifType { +impl<'ast> ReifyAsUnifType<'ast> for Polarity { + fn unif_type() -> UnifType<'ast> { mk_uty_enum!("Positive", "Negative") } } diff --git a/core/src/program.rs b/core/src/program.rs index 2f364b5d5b..a3cf9d3172 100644 --- a/core/src/program.rs +++ b/core/src/program.rs @@ -844,7 +844,7 @@ impl Program { } = self; let allocator = Allocator::default(); - let rt = vm.import_resolver().parse_nocache(*main_id)?.0; + let rt = vm.import_resolver().parse_nickel_nocache(*main_id)?.0; let rt = if apply_transforms { transform(rt, None).map_err(EvalError::from)? } else { diff --git a/core/src/repl/mod.rs b/core/src/repl/mod.rs index c6feb1b471..de802bbba5 100644 --- a/core/src/repl/mod.rs +++ b/core/src/repl/mod.rs @@ -295,7 +295,7 @@ impl Repl for ReplImpl { .import_resolver_mut() .replace_string(SourcePath::ReplTypecheck, String::from(exp)); // We ignore non fatal errors while type checking. - let (term, _) = self.vm.import_resolver().parse_nocache(file_id)?; + let (term, _) = self.vm.import_resolver().parse_nickel_nocache(file_id)?; let import_resolution::strict::ResolveResult { transformed_term: term, resolved_ids: pending, diff --git a/lsp/nls/src/incomplete.rs b/lsp/nls/src/incomplete.rs index 92f4af2feb..d282bf3a4d 100644 --- a/lsp/nls/src/incomplete.rs +++ b/lsp/nls/src/incomplete.rs @@ -155,7 +155,7 @@ pub fn parse_path_from_incomplete_input( .cache .replace_string(SourcePath::Snippet(path), to_parse); - match world.cache.parse_nocache(file_id) { + match world.cache.parse_nickel_nocache(file_id) { Ok((rt, _errors)) if !matches!(rt.as_ref(), Term::ParseError(_)) => { world.analysis.insert_usage(file_id, &rt, env); Some(resolve_imports(rt, world)) From c0f758b730c3478bb7de945a0ba7d74c02d1b054 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Sat, 28 Dec 2024 18:06:22 +0100 Subject: [PATCH 6/6] Painful realization: I'll need a profound restructuration of the cache, as it's getting rigged with lifetime issues (multiple indirect borrowing of self) --- core/src/cache.rs | 216 ++++++++++++++++++++++++++++++++------ core/src/transform/mod.rs | 5 +- 2 files changed, 185 insertions(+), 36 deletions(-) diff --git a/core/src/cache.rs b/core/src/cache.rs index be926cbc1b..dc6b7a47a8 100644 --- a/core/src/cache.rs +++ b/core/src/cache.rs @@ -1,4 +1,5 @@ //! Source cache. +use ast_cache::AstCache; use crate::{ bytecode::ast::{compat::ToMainline, Ast, AstAlloc}, @@ -15,9 +16,9 @@ use crate::{ stdlib::{self as nickel_stdlib, StdlibModule}, term::record::{Field, RecordData}, term::{Import, RichTerm, SharedTerm, Term}, - transform::import_resolution, + transform::{import_resolution, Wildcards}, typ::UnboundTypeVariableError, - typecheck::{self, typecheck, TypecheckMode, Wildcards}, + typecheck::{self, typecheck, TypecheckMode}, {eval, parser, transform}, }; @@ -121,7 +122,7 @@ impl InputFormat { /// working bytecode virtual machine, the cache needs to keep both term under the old /// representation (dubbed "mainline" in many places) and the new representation. #[derive(Debug, Clone)] -pub struct Cache<'ast> { +pub struct Cache { /// The content of the program sources plus imports. files: Files, /// Reverse map from file ids to source paths. @@ -134,16 +135,14 @@ pub struct Cache<'ast> { rev_imports: HashMap>, /// The table storing parsed terms corresponding to the entries of the file database. terms: HashMap, - /// The allocator for the asts. - alloc: &'ast AstAlloc, - /// The table storing parsed terms in the new AST format. - asts: HashMap>, + /// The cache for the new AST. + asts: AstCache, /// A table mapping FileIds to the package that they belong to. /// /// Path dependencies have already been canonicalized to absolute paths. packages: HashMap, /// The inferred type of wildcards for each `FileId`. - wildcards: HashMap>, + wildcards: HashMap, /// Whether processing should try to continue even in case of errors. Needed by the NLS. error_tolerance: ErrorTolerance, /// Paths where to look for imports, as included by the user through either the CLI argument @@ -387,15 +386,14 @@ pub enum SourceState { Stale(SystemTime), } -impl<'ast> Cache<'ast> { - pub fn new(error_tolerance: ErrorTolerance, alloc: &'ast AstAlloc) -> Self { +impl Cache { + pub fn new(error_tolerance: ErrorTolerance) -> Self { Cache { files: Files::new(), file_ids: HashMap::new(), file_paths: HashMap::new(), terms: HashMap::new(), - asts: HashMap::new(), - alloc, + asts: AstCache::new(), wildcards: HashMap::new(), imports: HashMap::new(), rev_imports: HashMap::new(), @@ -551,11 +549,10 @@ impl<'ast> Cache<'ast> { Ok(CacheOp::Cached(parse_errs.clone())) } else { if let InputFormat::Nickel = format { - let (ast, parse_errs) = self.parse_nickel_nocache(file_id)?; - - self.asts.insert(file_id, ast); - - Ok(CacheOp::Done(parse_errs)) + self.asts.insert_with_result(file_id, |alloc| { + let (ast, parse_errs) = self.parse_nickel_nocache(alloc, file_id)?; + Ok((ast, CacheOp::Done(parse_errs))) + }) } else { let (term, parse_errs) = self.parse_other_nocache(file_id, format)?; @@ -594,14 +591,17 @@ impl<'ast> Cache<'ast> { } /// Parse a Nickel source without querying nor populating the cache. - pub fn parse_nickel_nocache( - &self, + pub fn parse_nickel_nocache<'a, 'ast>( + &'a self, + // We take the allocator explicitly, to make sure `self.asts` is properly initialized + // before calling this function, and won't be dropped . + alloc: &'ast AstAlloc, file_id: FileId, ) -> Result<(Ast<'ast>, ParseErrors), ParseError> { let (t, parse_errs) = measure_runtime!( "runtime:parse:nickel", parser::grammar::TermParser::new().parse_tolerant( - self.alloc, + alloc, file_id, Lexer::new(self.files.source(file_id)) )? @@ -701,7 +701,7 @@ impl<'ast> Cache<'ast> { pub fn typecheck( &mut self, file_id: FileId, - initial_ctxt: &typecheck::Context<'ast>, + initial_ctxt: &typecheck::Context<'_>, initial_mode: TypecheckMode, ) -> Result, CacheError> { // If the term cache is populated, given the current split of the pipeline between the old @@ -710,17 +710,17 @@ impl<'ast> Cache<'ast> { return Ok(CacheOp::Cached(())); } - let Some(ast) = self.asts.get(&file_id) else { + let (Some(ast), Some(alloc)) = (self.asts.get(&file_id), self.asts.get_alloc()) else { return Err(CacheError::NotParsed); }; let wildcards = measure_runtime!( "runtime:type_check", - typecheck(self.alloc, ast, initial_ctxt.clone(), self, initial_mode)? + typecheck(alloc, &ast, initial_ctxt.clone(), self, initial_mode)? ); self.update_state(file_id, EntryState::Typechecking); - self.wildcards.insert(file_id, wildcards); + self.wildcards.insert(file_id, wildcards.iter().map(ToMainline::to_mainline).collect()); if let Some(imports) = self.imports.get(&file_id).cloned() { for f in imports.into_iter() { @@ -1051,7 +1051,7 @@ impl<'ast> Cache<'ast> { pub fn prepare( &mut self, file_id: FileId, - initial_ctxt: &typecheck::Context<'ast>, + initial_ctxt: &typecheck::Context<'_>, ) -> Result, Error> { let mut result = CacheOp::Cached(()); @@ -1108,12 +1108,13 @@ impl<'ast> Cache<'ast> { /// - typechecking /// - resolve imports performed inside these imports. /// - apply program transformations. - pub fn prepare_nocache( + pub fn prepare_nocache<'ast>( &mut self, + alloc: &'ast AstAlloc, file_id: FileId, initial_ctxt: &typecheck::Context<'ast>, ) -> Result<(RichTerm, Vec), Error> { - let (ast, errs) = self.parse_nickel_nocache(file_id)?; + let (ast, errs) = self.parse_nickel_nocache(alloc, file_id)?; if !errs.no_errors() { return Err(Error::ParseErrors(errs)); @@ -1127,7 +1128,7 @@ impl<'ast> Cache<'ast> { let wildcards = measure_runtime!( "runtime:type_check", typecheck( - self.alloc, + alloc, &ast, initial_ctxt.clone(), self, @@ -1135,6 +1136,8 @@ impl<'ast> Cache<'ast> { )? ); + let wildcards: Vec<_> = wildcards.iter().map(ToMainline::to_mainline).collect(); + let term = transform::transform(term, Some(&wildcards)) .map_err(|err| Error::ParseErrors(err.into()))?; @@ -1329,11 +1332,11 @@ impl<'ast> Cache<'ast> { // We have a small bootstraping problem: to typecheck the initial environment, we already // need an initial evaluation environment, since stdlib parts may reference each other. But // typechecking is performed before program transformations, so this environment is not - // final one. We have create a temporary initial environment just for typechecking, which is - // dropped right after. However: + // the final one. We have create a temporary initial environment just for typechecking, + // which is dropped right after. However: // 1. The stdlib is meant to stay relatively light. - // 2. Typechecking the standard library ought to occur only during development. Once the - // stdlib is stable, we won't have typecheck it at every execution. + // 2. Typechecking the standard library ought to occur only during development. We + // currently don't typecheck it for normal execution. let initial_env = self.mk_type_ctxt().map_err(|err| match err { CacheError::NotParsed => CacheError::NotParsed, CacheError::Error(_) => unreachable!(), @@ -1345,7 +1348,7 @@ impl<'ast> Cache<'ast> { /// it's used in benches. It probably does not have to be used for something else. pub fn typecheck_stdlib_( &mut self, - initial_ctxt: &typecheck::Context<'ast>, + initial_ctxt: &typecheck::Context<'_>, ) -> Result, CacheError> { self.files .stdlib_modules() @@ -1400,8 +1403,8 @@ impl<'ast> Cache<'ast> { /// Generate the initial typing context from the list of `file_ids` corresponding to the /// standard library parts. - pub fn mk_type_ctxt(&self) -> Result, CacheError> { - let stdlib_terms_vec: Vec<(StdlibModule, Ast<'ast>)> = self + pub fn mk_type_ctxt(&self) -> Result, CacheError> { + let stdlib_terms_vec: Vec<(StdlibModule, Ast<'_>)> = self .files .stdlib_modules() .map(|(module, file_id)| { @@ -1807,6 +1810,149 @@ pub mod resolvers { } } +/// Temporary AST cache (for the new [crate::bytecode::ast::Ast]) that holds the owned allocator of +/// the AST nodes. +/// +/// [ast_cache::AstCache] has a self-referential flavour and requires unsafe code (which is why +/// it's been put in its own module). Please do not mess with [ast_cache] unless you know what +/// you're doing. +mod ast_cache { + use super::{Ast, AstAlloc, FileId, HashMap}; + + #[derive(Debug)] + struct InnerAstCache { + alloc: AstAlloc, + /// **Caution**: the ASTs stored here are surely _not_ static, they are pointing to inside + /// `alloc`. We just use `'static` as a place-holder. However, we can't currently express + /// such self-referential structure in safe Rust (well, the AST nodes are actually stored + /// in the heap by the allocator, so it's not strictly speaking self-referential, but the + /// lifetime of `Ast` is still tied to `self`) + asts: HashMap>, + } + + impl InnerAstCache { + fn new() -> Self { + InnerAstCache { + alloc: AstAlloc::new(), + asts: HashMap::new(), + } + } + + /// Returns the underlying allocator, which might be required to call various helpers. + fn get_alloc(&self) -> &AstAlloc { + &self.alloc + } + + /// Retrieve the AST associated with a file id. + fn get<'ast>(&'ast self, file_id: &FileId) -> Option> { + self.asts.get(file_id).cloned() + } + + /// Takes a closure that builds an AST node from an allocator, a file ID, and populate the + /// corresponding entry in the cache with the AST. Returns the previously cached AST, if + /// any. + fn insert_with_alloc<'ast, F>(&'ast mut self, file_id: FileId, f: F) -> Option> + where + F: for<'a> FnOnce(&'ast AstAlloc) -> Ast<'ast>, + { + let ast = f(&self.alloc); + // Safety: we are transmuting the lifetime of the AST from `'ast` to `'static`. This is + // unsafe in general, but we never use or leak any `'static` reference. It's just a + // placeholder. We only store such `Ast<'static>` in `asts`, and return them as `'a` + // references where `self: 'a` in `get()`. + // + // Thus, the `'static` lifetime isn't observable from outsideof `AstCache`. + let promoted_ast = unsafe { std::mem::transmute::, Ast<'static>>(ast) }; + self.asts.insert(file_id, promoted_ast) + } + + pub(super) fn insert_with_result<'ast, F, T, E>(&'ast mut self, file_id: FileId, f: F) -> Result + where + F: for<'a> FnOnce(&'ast AstAlloc) -> Result<(Ast<'ast>, T), E>, + { + let (ast, result) = f(&self.alloc)?; + // Safety: we are transmuting the lifetime of the AST from `'ast` to `'static`. This is + // unsafe in general, but we never use or leak any `'static` reference. It's just a + // placeholder. We only store such `Ast<'static>` in `asts`, and return them as `'a` + // references where `self: 'a` in `get()`. + // + // Thus, the `'static` lifetime isn't observable from outsideof `AstCache`. + let promoted_ast = unsafe { std::mem::transmute::, Ast<'static>>(ast) }; + let _ = self.asts.insert(file_id, promoted_ast); + + Ok(result) + } + } + + /// A cache for [bytecode::ast::Ast] nodes. + /// + /// To make it possible to drop the AST nodes once typechecking has been performed, [AstCache] + /// is a wrapper around an optional [InnerAstCache]. Dropping the cache resets the option to + /// `None`. If one tries to insert into a dropped cache, the cache will automatically be + /// reinitialized, such that getting from and inserting into the cache are transparent + /// operations, whether the cache is actually live or not. + #[derive(Debug)] + pub(super) struct AstCache(Option); + + impl AstCache { + pub(super) fn new() -> Self { + AstCache(Some(InnerAstCache::new())) + } + + /// Clear the cache and drop alls the allocated AST node. + pub(super) fn clear(&mut self) { + self.0 = None; + } + + pub(super) fn get_alloc(&self) -> Option<&AstAlloc> { + self.0.as_ref().map(InnerAstCache::get_alloc) + } + + pub(super) fn get<'ast>(&'ast self, file_id: &FileId) -> Option> { + self.0.as_ref().and_then(|cache| cache.get(file_id)) + } + + pub(super) fn insert_with_alloc<'ast, F>( + &'ast mut self, + file_id: FileId, + f: F, + ) -> Option> + where + F: for<'a> FnOnce(&'ast AstAlloc) -> Ast<'ast>, + { + if self.0.is_none() { + self.0 = Some(InnerAstCache::new()); + } + + // unwrap(): we just initialized the cache if it was `None` in the if above. + self.0.as_mut().unwrap().insert_with_alloc(file_id, f) + } + + pub(super) fn insert_with_result<'ast, F, T, E>(&'ast mut self, file_id: FileId, f: F) -> Result + where + F: for<'a> FnOnce(&'ast AstAlloc) -> Result<(Ast<'ast>, T), E>, + { + if self.0.is_none() { + self.0 = Some(InnerAstCache::new()); + } + + // unwrap(): we just initialized the cache if it was `None` in the if above. + self.0.as_mut().unwrap().insert_with_result(file_id, f) + } + } + + /// [AstCache] can't realistically and safely be cloned (especially since the pointers in the + /// cache will still refers to the original arena). However, [super::Cache] needs to be + /// clonable, and [AstCache] is a just a cache - whether it's live or not shouldn't change the + /// correctness of [super::Cache]. Thus, we implement a `clone` that just returns a new empty + /// cache. + impl Clone for AstCache { + fn clone(&self) -> Self { + AstCache(None) + } + } +} + #[cfg(test)] mod tests { use std::path::Path; diff --git a/core/src/transform/mod.rs b/core/src/transform/mod.rs index 872a526c79..68e5d7bbd5 100644 --- a/core/src/transform/mod.rs +++ b/core/src/transform/mod.rs @@ -4,7 +4,6 @@ use crate::{ term::RichTerm, traverse::{Traverse, TraverseOrder}, typ::UnboundTypeVariableError, - typecheck::Wildcards, }; pub mod desugar_destructuring; @@ -13,6 +12,10 @@ pub mod gen_pending_contracts; pub mod import_resolution; pub mod substitute_wildcards; +/// RFC007: we can't use yet the new `typecheck::Wildcards` type, as they follow the new AST. We +/// temporarily redefine a `Wildcards` type that matches the old definition. +pub(crate) type Wildcards = Vec; + /// Apply all program transformations, excepted import resolution that is currently performed /// earlier, as it needs to be done before typechecking. ///