diff --git a/design/wasi_unstable/typenames.witx b/design/wasi_unstable/typenames.witx index 1d33a35f0..046ed0bf6 100644 --- a/design/wasi_unstable/typenames.witx +++ b/design/wasi_unstable/typenames.witx @@ -718,7 +718,7 @@ ) ;; Identifiers for preopened capabilities. -(typename preopentype_t +(typename $preopentype_t (enum u8 ;; A pre-opened directory. $PREOPENTYPE_DIR diff --git a/tools/witx/.gitignore b/tools/witx/.gitignore new file mode 100644 index 000000000..a9d37c560 --- /dev/null +++ b/tools/witx/.gitignore @@ -0,0 +1,2 @@ +target +Cargo.lock diff --git a/tools/witx/Cargo.toml b/tools/witx/Cargo.toml new file mode 100644 index 000000000..f24cb7183 --- /dev/null +++ b/tools/witx/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "witx" +version = "0.1.0" +description = "Parse and validate witx file format" +homepage = "https://github.com/WebAssembly/WASI" +repository = "https://github.com/WebAssembly/WASI" +license = "Apache-2.0" +categories = ["wasm"] +authors = ["Pat Hickey "] +edition = "2018" + +[lib] +crate-type=["rlib"] + +[[bin]] +name = "witx" +path = "src/main.rs" + +[dependencies] +clap = "2" +failure = "0.1" diff --git a/tools/witx/LICENSE b/tools/witx/LICENSE new file mode 100644 index 000000000..e061f56ab --- /dev/null +++ b/tools/witx/LICENSE @@ -0,0 +1,13 @@ +Copyright 2019 WebAssembly Community Group participants + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/tools/witx/src/ast.rs b/tools/witx/src/ast.rs new file mode 100644 index 000000000..4df4d1e24 --- /dev/null +++ b/tools/witx/src/ast.rs @@ -0,0 +1,163 @@ +#![allow(dead_code)] +use std::collections::HashMap; +use std::rc::{Rc, Weak}; + +pub use crate::parser::BuiltinType; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Id(String); + +impl Id { + pub fn new>(s: S) -> Self { + Id(s.as_ref().to_string()) + } + pub fn as_str(&self) -> &str { + self.0.as_str() + } +} + +#[derive(Debug, Clone)] +pub struct Document { + pub definitions: Vec, + pub entries: HashMap, +} + +#[derive(Debug, Clone)] +pub enum Definition { + Datatype(Rc), + Module(Rc), +} + +#[derive(Debug, Clone)] +pub enum Entry { + Datatype(Weak), + Module(Weak), +} + +impl Entry { + pub fn kind(&self) -> &'static str { + match self { + Entry::Datatype { .. } => "datatype", + Entry::Module { .. } => "module", + } + } +} + +#[derive(Debug, Clone)] +pub enum DatatypeIdent { + Builtin(BuiltinType), + Array(Box), + Pointer(Box), + ConstPointer(Box), + Ident(Rc), +} + +#[derive(Debug, Clone)] +pub struct Datatype { + pub name: Id, + pub variant: DatatypeVariant, +} + +#[derive(Debug, Clone)] +pub enum DatatypeVariant { + Alias(AliasDatatype), + Enum(EnumDatatype), + Flags(FlagsDatatype), + Struct(StructDatatype), + Union(UnionDatatype), +} + +#[derive(Debug, Clone)] +pub struct AliasDatatype { + pub name: Id, + pub to: DatatypeIdent, +} + +#[derive(Debug, Clone)] +pub enum IntRepr { + I8, + I16, + I32, + I64, +} + +#[derive(Debug, Clone)] +pub struct EnumDatatype { + pub name: Id, + pub repr: IntRepr, + pub variants: Vec, +} + +#[derive(Debug, Clone)] +pub struct FlagsDatatype { + pub name: Id, + pub repr: IntRepr, + pub flags: Vec, +} + +#[derive(Debug, Clone)] +pub struct StructDatatype { + pub name: Id, + pub members: Vec, +} + +#[derive(Debug, Clone)] +pub struct StructMember { + pub name: Id, + pub type_: DatatypeIdent, +} + +#[derive(Debug, Clone)] +pub struct UnionDatatype { + pub name: Id, + pub variants: Vec, +} + +#[derive(Debug, Clone)] +pub struct UnionVariant { + pub name: Id, + pub type_: DatatypeIdent, +} + +#[derive(Debug, Clone)] +pub struct Module { + pub name: Id, + pub definitions: Vec, + pub entries: HashMap, +} + +#[derive(Debug, Clone)] +pub enum ModuleDefinition { + Import(Rc), + Func(Rc), +} + +#[derive(Debug, Clone)] +pub enum ModuleEntry { + Import(Weak), + Func(Weak), +} + +#[derive(Debug, Clone)] +pub struct ModuleImport { + pub name: Id, + pub variant: ModuleImportVariant, +} + +#[derive(Debug, Clone)] +pub enum ModuleImportVariant { + Memory, +} + +#[derive(Debug, Clone)] +pub struct InterfaceFunc { + pub name: Id, + pub params: Vec, + pub results: Vec, +} + +#[derive(Debug, Clone)] +pub struct InterfaceFuncParam { + pub name: Id, + pub type_: DatatypeIdent, +} diff --git a/tools/witx/src/lexer.rs b/tools/witx/src/lexer.rs new file mode 100644 index 000000000..b004acf07 --- /dev/null +++ b/tools/witx/src/lexer.rs @@ -0,0 +1,354 @@ +use crate::Location; +use failure::Fail; +use std::path::{Path, PathBuf}; +use std::str::CharIndices; + +///! The lexer turns a string into a stream of located tokens. +///! The tokens are meant for consumption by the s-expression parser. +///! +///! Comments in source text look like `;; rest of line ...`. +///! Words look like `abcde_` +///! Idents look like `$abcde_` +///! Annotations look like `@abcde_` +///! Quotes look like `"a b cde 123 @#$%^&*() _"` +///! +///! This implementation was heavily influenced by `cranelift-reader` + +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub enum Token<'a> { + LPar, // ( + RPar, // ) + Word(&'a str), // Bare word + Ident(&'a str), // Starts with $ + Annot(&'a str), // Starts with @. short for annotation. + Quote(&'a str), // Found between balanced "". No escaping. +} + +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct LocatedToken<'a> { + pub token: Token<'a>, + pub location: Location, +} + +fn token(token: Token<'_>, location: Location) -> Result, LocatedError> { + Ok(LocatedToken { token, location }) +} + +#[derive(Debug, PartialEq, Eq, Clone, Copy, Fail)] +pub enum LexError { + #[fail(display = "Invalid character '{}'", _0)] + InvalidChar(char), + #[fail(display = "Empty identifier '$'")] + EmptyIdentifier, + #[fail(display = "Empty annotation '@'")] + EmptyAnnotation, + #[fail(display = "Unterminated quote")] + UnterminatedQuote, +} + +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct LocatedError { + pub error: LexError, + pub location: Location, +} + +fn error<'a>(error: LexError, location: Location) -> Result, LocatedError> { + Err(LocatedError { error, location }) +} + +pub struct Lexer<'a> { + source: &'a str, + chars: CharIndices<'a>, + lookahead: Option, + pos: usize, + line_number: usize, + column_start: usize, + tab_compensation: usize, + path: PathBuf, +} + +impl<'a> Lexer<'a> { + pub fn new>(s: &'a str, path: P) -> Lexer<'_> { + let mut lex = Lexer { + source: s, + chars: s.char_indices(), + lookahead: None, + pos: 0, + line_number: 1, + column_start: 0, + tab_compensation: 0, + path: path.as_ref().into(), + }; + lex.next_ch(); + lex + } + + fn next_ch(&mut self) -> Option { + if self.lookahead == Some('\n') { + self.line_number += 1; + self.column_start = self.pos + 1; // Next column starts a fresh line + self.tab_compensation = 0; + } else if self.lookahead == Some('\t') { + self.tab_compensation += 7; // One column for the position of the char itself, add 7 more for a tabwidth of 8 + } + match self.chars.next() { + Some((idx, ch)) => { + self.pos = idx; + self.lookahead = Some(ch); + } + None => { + self.pos = self.source.len(); + self.lookahead = None; + } + } + self.lookahead + } + + fn loc(&self) -> Location { + Location { + path: self.path.clone(), + line: self.line_number, + column: self.pos - self.column_start + self.tab_compensation, + } + } + + fn looking_at(&self, prefix: &str) -> bool { + self.source[self.pos..].starts_with(prefix) + } + + fn scan_char(&mut self, tok: Token<'a>) -> Result, LocatedError> { + assert!(self.lookahead.is_some()); + let loc = self.loc(); + self.next_ch(); + token(tok, loc) + } + + pub fn rest_of_line(&mut self) -> &'a str { + let begin = self.pos; + loop { + match self.next_ch() { + None | Some('\n') => return &self.source[begin..self.pos], + _ => {} + } + } + } + + fn scan_word(&mut self) -> Result, LocatedError> { + let begin = self.pos; + let loc = self.loc(); + assert!(self.lookahead == Some('_') || self.lookahead.unwrap().is_alphabetic()); + loop { + match self.next_ch() { + Some('_') | Some('-') => {} + Some(ch) if ch.is_alphanumeric() => {} + _ => break, + } + } + let text = &self.source[begin..self.pos]; + token(Token::Word(text), loc) + } + + fn scan_ident(&mut self) -> Result, LocatedError> { + let loc = self.loc(); + assert!(self.lookahead == Some('$')); + match self.next_ch() { + Some(ch) if ch.is_alphanumeric() || ch == '_' => {} + _ => Err(LocatedError { + error: LexError::EmptyIdentifier, + location: loc.clone(), + })?, + } + let begin = self.pos; + + loop { + match self.next_ch() { + Some('_') | Some('-') => {} + Some(ch) if ch.is_alphanumeric() => {} + _ => break, + } + } + + let text = &self.source[begin..self.pos]; + token(Token::Ident(text), loc) + } + + fn scan_annotation(&mut self) -> Result, LocatedError> { + let loc = self.loc(); + assert!(self.lookahead == Some('@')); + match self.next_ch() { + Some(ch) if ch.is_alphanumeric() || ch == '_' => {} + _ => Err(LocatedError { + error: LexError::EmptyAnnotation, + location: loc.clone(), + })?, + } + let begin = self.pos; + + loop { + match self.next_ch() { + Some('_') | Some('-') => {} + Some(ch) if ch.is_alphanumeric() => {} + _ => break, + } + } + + let text = &self.source[begin..self.pos]; + token(Token::Annot(text), loc) + } + + fn scan_quote(&mut self) -> Result, LocatedError> { + let begin = self.pos; + let loc = self.loc(); + assert!(self.lookahead == Some('"')); + loop { + match self.next_ch() { + None => Err(LocatedError { + error: LexError::UnterminatedQuote, + location: loc.clone(), + })?, + Some('"') => { + self.next_ch(); + break; + } + _ => {} + } + } + let text = &self.source[(begin + 1)..(self.pos - 1)]; + token(Token::Quote(text), loc) + } + + #[allow(clippy::should_implement_trait)] + pub fn next(&mut self) -> Option, LocatedError>> { + loop { + let loc = self.loc(); + return match self.lookahead { + None => None, + Some(c) => Some(match c { + '(' => self.scan_char(Token::LPar), + ')' => self.scan_char(Token::RPar), + '$' => self.scan_ident(), + '@' => self.scan_annotation(), + ';' => { + if self.looking_at(";;") { + self.rest_of_line(); + continue; + } else { + self.next_ch(); + error(LexError::InvalidChar(';'), loc) + } + } + '"' => self.scan_quote(), + '_' => self.scan_word(), + ch if ch.is_alphabetic() => self.scan_word(), + ch if ch.is_whitespace() => { + self.next_ch(); + continue; + } + _ => { + self.next_ch(); + error(LexError::InvalidChar(c), loc) + } + }), + }; + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use std::path::{Path, PathBuf}; + + fn testlexer(input: &str) -> Lexer { + Lexer::new(input, Path::new("/test")) + } + + fn token( + token: Token<'_>, + line: usize, + column: usize, + ) -> Option, LocatedError>> { + Some(super::token( + token, + Location { + path: PathBuf::from("/test"), + line, + column, + }, + )) + } + + fn error<'a>( + err: LexError, + line: usize, + column: usize, + ) -> Option, LocatedError>> { + Some(super::error( + err, + Location { + path: PathBuf::from("/test"), + line, + column, + }, + )) + } + #[test] + fn words_and_idents() { + let mut lex = testlexer("$gussie is a good $dog"); + // ruler 0 5 10 15 20 + assert_eq!(lex.next(), token(Token::Ident("gussie"), 1, 0)); + assert_eq!(lex.next(), token(Token::Word("is"), 1, 8)); + assert_eq!(lex.next(), token(Token::Word("a"), 1, 11)); + assert_eq!(lex.next(), token(Token::Word("good"), 1, 13)); + assert_eq!(lex.next(), token(Token::Ident("dog"), 1, 18)); + assert_eq!(lex.next(), None); + + let mut lex = + testlexer("$ok $a $_ $ _\nkebab-case\nsnake_case\n$kebab-ident\n$snake_ident"); + assert_eq!(lex.next(), token(Token::Ident("ok"), 1, 0)); + assert_eq!(lex.next(), token(Token::Ident("a"), 1, 4)); + assert_eq!(lex.next(), token(Token::Ident("_"), 1, 7)); + assert_eq!(lex.next(), error(LexError::EmptyIdentifier, 1, 10)); + assert_eq!(lex.next(), token(Token::Word("_"), 1, 12)); + assert_eq!(lex.next(), token(Token::Word("kebab-case"), 2, 0)); + assert_eq!(lex.next(), token(Token::Word("snake_case"), 3, 0)); + assert_eq!(lex.next(), token(Token::Ident("kebab-ident"), 4, 0)); + assert_eq!(lex.next(), token(Token::Ident("snake_ident"), 5, 0)); + assert_eq!(lex.next(), None); + } + + #[test] + fn comments() { + let mut lex = testlexer("the quick ;; brown fox\njumped\n;;over the three\nlazy;;dogs"); + assert_eq!(lex.next(), token(Token::Word("the"), 1, 0)); + assert_eq!(lex.next(), token(Token::Word("quick"), 1, 4)); + assert_eq!(lex.next(), token(Token::Word("jumped"), 2, 0)); + assert_eq!(lex.next(), token(Token::Word("lazy"), 4, 0)); + assert_eq!(lex.next(), None); + + let mut lex = testlexer("line1 ;;\n$sym_2;\n\t\tl3;;;333"); + assert_eq!(lex.next(), token(Token::Word("line1"), 1, 0)); + assert_eq!(lex.next(), token(Token::Ident("sym_2"), 2, 0)); + assert_eq!(lex.next(), error(LexError::InvalidChar(';'), 2, 6)); + assert_eq!(lex.next(), token(Token::Word("l3"), 3, 16)); // Two tabs = 16 columns + assert_eq!(lex.next(), None); + } + + #[test] + fn quotes() { + let mut lex = testlexer("a \"bc\" d"); + assert_eq!(lex.next(), token(Token::Word("a"), 1, 0)); + assert_eq!(lex.next(), token(Token::Quote("bc"), 1, 2)); + assert_eq!(lex.next(), token(Token::Word("d"), 1, 7)); + + let mut lex = testlexer("a \"b\nc\" d"); + assert_eq!(lex.next(), token(Token::Word("a"), 1, 0)); + assert_eq!(lex.next(), token(Token::Quote("b\nc"), 1, 2)); + assert_eq!(lex.next(), token(Token::Word("d"), 2, 3)); + + let mut lex = testlexer("a \"b"); + assert_eq!(lex.next(), token(Token::Word("a"), 1, 0)); + assert_eq!(lex.next(), error(LexError::UnterminatedQuote, 1, 2)); + } +} diff --git a/tools/witx/src/lib.rs b/tools/witx/src/lib.rs new file mode 100644 index 000000000..7d5040d00 --- /dev/null +++ b/tools/witx/src/lib.rs @@ -0,0 +1,93 @@ +/// Types describing a validated witx document +mod ast; +/// Lexer text into tokens +mod lexer; +/// Witx syntax parsing from SExprs +mod parser; +/// SExpr parsing from tokens +mod sexpr; +/// Resolve toplevel `use` declarations across files +mod toplevel; +/// Validate declarations into ast +mod validate; + +pub use ast::{ + AliasDatatype, BuiltinType, Datatype, DatatypeIdent, DatatypeVariant, Definition, Document, + Entry, EnumDatatype, FlagsDatatype, Id, IntRepr, InterfaceFunc, InterfaceFuncParam, Module, + ModuleDefinition, ModuleEntry, ModuleImport, ModuleImportVariant, StructDatatype, StructMember, + UnionDatatype, UnionVariant, +}; +pub use lexer::LexError; +pub use parser::{DeclSyntax, ParseError}; +pub use sexpr::SExprParseError; +pub use validate::ValidationError; + +use failure::Fail; +use std::io; +use std::path::{Path, PathBuf}; + +pub fn load>(path: P) -> Result { + use toplevel::parse_witx; + use validate::validate_document; + let parsed_decls = parse_witx(path)?; + validate_document(&parsed_decls).map_err(WitxError::Validation) +} + +/// Location in the source text +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct Location { + pub path: PathBuf, + pub line: usize, + pub column: usize, +} + +#[derive(Debug, Fail)] +pub enum WitxError { + #[fail(display = "{}", _0)] + SExpr(#[cause] SExprParseError), + #[fail(display = "when resolving use declaration for {:?}: {}", _0, _1)] + UseResolution(PathBuf, #[cause] io::Error), + #[fail(display = "{}", _0)] + Parse(#[cause] ParseError), + #[fail(display = "{}", _0)] + Validation(#[cause] ValidationError), +} + +impl WitxError { + pub fn report(&self) -> String { + use WitxError::*; + match self { + SExpr(sexpr) => sexpr.report(), + UseResolution(path, ioerr) => format!("when resolving `use {:?}`: {}", path, ioerr), + Parse(parse) => parse.report(), + Validation(validation) => validation.report(), + } + } +} +impl Location { + pub fn highlight_source(&self) -> String { + let mut msg = format!("in {:?}:\n", self.path); + if let Ok(src_line) = self.source_line() { + msg += &format!( + "{line_num: >5} | {src_line}\n{blank: >5} {caret: >column$}", + line_num = self.line, + src_line = src_line, + blank = " ", + caret = "^", + column = self.column, + ); + } + msg + } + pub fn source_line(&self) -> Result { + use std::fs::File; + use std::io::{BufRead, BufReader}; + let f = BufReader::new(File::open(&self.path)?); + let l = f + .lines() + .skip(self.line - 1) + .next() + .unwrap_or_else(|| Err(io::Error::new(io::ErrorKind::Other, "TODO")))?; + Ok(l) + } +} diff --git a/tools/witx/src/main.rs b/tools/witx/src/main.rs new file mode 100644 index 000000000..c2ad70322 --- /dev/null +++ b/tools/witx/src/main.rs @@ -0,0 +1,38 @@ +use clap::{App, Arg}; +use std::path::Path; +use std::process; +use witx::load; + +pub fn main() { + let app = App::new("witx") + .version(env!("CARGO_PKG_VERSION")) + .about("Validate witx file format") + .arg( + Arg::with_name("input") + .required(true) + .help("path to root of witx document"), + ) + .arg( + Arg::with_name("verbose") + .short("v") + .long("verbose") + .takes_value(false) + .required(false), + ) + .get_matches(); + + match load(Path::new(app.value_of("input").expect("required arg"))) { + Ok(doc) => { + if app.is_present("verbose") { + println!("{:?}", doc) + } + } + Err(e) => { + println!("{}", e.report()); + if app.is_present("verbose") { + println!("{:?}", e); + } + process::exit(1) + } + } +} diff --git a/tools/witx/src/parser.rs b/tools/witx/src/parser.rs new file mode 100644 index 000000000..a99df5fa4 --- /dev/null +++ b/tools/witx/src/parser.rs @@ -0,0 +1,583 @@ +use crate::sexpr::SExpr; +use crate::Location; +use failure::Fail; + +///! Parser turns s-expressions into unvalidated syntax constructs. +///! conventions: +///! `Type::starts_parsing(s-expr) -> bool` is for look-ahead: we use +///! this predicate to combine parsers for different `Type`s where both +///! alternatives are accepted. +///! `Type::parse(sexpr: &SExpr) -> Result` takes a single +///! s-expression and parses it into a `Self`. +///! for parsers that take a subset of a vector s-expression, the signature +///! `Type::parse(sexprs: &[SExpr], location: Location) -> Result` +///! has an additional `Location` argument, which should point to the parent SExpr::Vec. +///! This is used for error reporting in case the slice doesn't have the number of elements +///! expected. + +#[derive(Debug, Fail)] +#[fail(display = "{} at {:?}", _0, _1)] +pub struct ParseError { + pub message: String, + pub location: Location, +} + +impl ParseError { + pub fn report(&self) -> String { + format!("{}\n{}", self.location.highlight_source(), self.message) + } +} + +macro_rules! parse_err { + ($loc:expr, $msg:expr) => { + ParseError { message: $msg.to_string(), location: $loc.clone() } + }; + ($loc:expr, $fmt:expr, $( $arg:expr ),+ ) => { + ParseError { message: format!($fmt, $( $arg ),+), location: $loc.clone() } + }; +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct IdentSyntax { + pub name: String, + pub location: Location, +} + +macro_rules! id { + ($s:expr, $loc: expr) => { + IdentSyntax { + name: $s.to_string(), + location: $loc.clone(), + } + }; +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BuiltinType { + String, + Data, + U8, + U16, + U32, + U64, + S8, + S16, + S32, + S64, + F32, + F64, +} + +impl BuiltinType { + pub fn starts_parsing(sexpr: &SExpr) -> bool { + match sexpr { + SExpr::Word("string", _) + | SExpr::Word("data", _) + | SExpr::Word("u8", _) + | SExpr::Word("u16", _) + | SExpr::Word("u32", _) + | SExpr::Word("u64", _) + | SExpr::Word("s8", _) + | SExpr::Word("s16", _) + | SExpr::Word("s32", _) + | SExpr::Word("s64", _) + | SExpr::Word("f32", _) + | SExpr::Word("f64", _) => true, + _ => false, + } + } + pub fn parse(sexpr: &SExpr) -> Result { + match sexpr { + SExpr::Word("string", _loc) => Ok(BuiltinType::String), + SExpr::Word("data", _loc) => Ok(BuiltinType::Data), + SExpr::Word("u8", _loc) => Ok(BuiltinType::U8), + SExpr::Word("u16", _loc) => Ok(BuiltinType::U16), + SExpr::Word("u32", _loc) => Ok(BuiltinType::U32), + SExpr::Word("u64", _loc) => Ok(BuiltinType::U64), + SExpr::Word("s8", _loc) => Ok(BuiltinType::S8), + SExpr::Word("s16", _loc) => Ok(BuiltinType::S16), + SExpr::Word("s32", _loc) => Ok(BuiltinType::S32), + SExpr::Word("s64", _loc) => Ok(BuiltinType::S64), + SExpr::Word("f32", _loc) => Ok(BuiltinType::F32), + SExpr::Word("f64", _loc) => Ok(BuiltinType::F64), + _ => Err(parse_err!(sexpr.location(), "invalid builtin type")), + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum DatatypeIdentSyntax { + Builtin(BuiltinType), + Array(Box), + Pointer(Box), + ConstPointer(Box), + Ident(IdentSyntax), +} + +impl DatatypeIdentSyntax { + pub fn starts_parsing(sexpr: &SExpr) -> bool { + BuiltinType::starts_parsing(sexpr) + || match sexpr { + SExpr::Ident(_, _) => true, + SExpr::Vec(v, _) => match (v.get(0), v.get(1)) { + (Some(SExpr::Word("array", _)), Some(_)) => true, + (Some(SExpr::Annot("witx", _)), Some(SExpr::Word("pointer", _))) => true, + (Some(SExpr::Annot("witx", _)), Some(SExpr::Word("const_pointer", _))) => true, + _ => false, + }, + _ => false, + } + } + pub fn parse(sexpr: &SExpr) -> Result { + if BuiltinType::starts_parsing(sexpr) { + let builtin = BuiltinType::parse(sexpr)?; + Ok(DatatypeIdentSyntax::Builtin(builtin)) + } else { + match sexpr { + SExpr::Ident(i, loc) => Ok(DatatypeIdentSyntax::Ident(id!(i, loc))), + SExpr::Vec(v, loc) => match (v.get(0), v.get(1), v.get(2)) { + (Some(SExpr::Word("array", _)), Some(expr), None) => Ok( + DatatypeIdentSyntax::Array(Box::new(DatatypeIdentSyntax::parse(expr)?)), + ), + ( + Some(SExpr::Annot("witx", _)), + Some(SExpr::Word("pointer", _)), + Some(expr), + ) => Ok(DatatypeIdentSyntax::Pointer(Box::new( + DatatypeIdentSyntax::parse(expr)?, + ))), + ( + Some(SExpr::Annot("witx", _)), + Some(SExpr::Word("const_pointer", _)), + Some(expr), + ) => Ok(DatatypeIdentSyntax::ConstPointer(Box::new( + DatatypeIdentSyntax::parse(expr)?, + ))), + _ => Err(parse_err!(loc, "expected type identifier")), + }, + _ => Err(parse_err!(sexpr.location(), "expected type identifier")), + } + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum TopLevelSyntax { + Decl(DeclSyntax), + Use(IdentSyntax), +} + +impl TopLevelSyntax { + pub fn parse(sexpr: &SExpr) -> Result { + if DeclSyntax::starts_parsing(sexpr) { + let decl = DeclSyntax::parse(sexpr)?; + Ok(TopLevelSyntax::Decl(decl)) + } else { + match sexpr { + SExpr::Vec(v, vec_loc) => match v.get(0) { + Some(SExpr::Word("use", loc)) => match v.get(1) { + Some(SExpr::Quote(u, loc)) => Ok(TopLevelSyntax::Use(id!(u, loc))), + _ => Err(parse_err!(loc, "invalid use declaration")), + }, + _ => Err(parse_err!(vec_loc, "expected top level declaration")), + }, + _ => Err(parse_err!( + sexpr.location(), + "expected top level declaration" + )), + } + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum DeclSyntax { + Typename(TypenameSyntax), + Module(ModuleSyntax), +} + +impl DeclSyntax { + pub fn starts_parsing(sexpr: &SExpr) -> bool { + match sexpr { + SExpr::Vec(v, _) => match v.get(0) { + Some(SExpr::Word("typename", _)) => true, + Some(SExpr::Word("module", _)) => true, + _ => false, + }, + _ => false, + } + } + pub fn parse(sexpr: &SExpr) -> Result { + match sexpr { + SExpr::Vec(v, loc) => match v.get(0) { + Some(SExpr::Word("typename", loc)) => { + Ok(DeclSyntax::Typename(TypenameSyntax::parse(&v[1..], loc)?)) + } + Some(SExpr::Word("module", loc)) => { + Ok(DeclSyntax::Module(ModuleSyntax::parse(&v[1..], loc)?)) + } + _ => Err(parse_err!(loc, "invalid declaration")), + }, + _ => Err(parse_err!(sexpr.location(), "expected vec")), + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct TypenameSyntax { + pub ident: IdentSyntax, + pub def: TypedefSyntax, +} + +impl TypenameSyntax { + pub fn parse(sexpr: &[SExpr], loc: &Location) -> Result { + let ident = match sexpr.get(0) { + Some(SExpr::Ident(i, loc)) => id!(i, loc), + Some(s) => Err(parse_err!(s.location(), "expected typename identifier"))?, + None => Err(parse_err!(loc, "expected typename identifier"))?, + }; + let def = match sexpr.get(1) { + Some(expr) => TypedefSyntax::parse(expr)?, + _ => Err(parse_err!(loc, "expected type definition"))?, + }; + Ok(TypenameSyntax { ident, def }) + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum TypedefSyntax { + Ident(DatatypeIdentSyntax), + Enum(EnumSyntax), + Flags(FlagsSyntax), + Struct(StructSyntax), + Union(UnionSyntax), +} + +impl TypedefSyntax { + pub fn parse(sexpr: &SExpr) -> Result { + if DatatypeIdentSyntax::starts_parsing(sexpr) { + let ident = DatatypeIdentSyntax::parse(sexpr)?; + Ok(TypedefSyntax::Ident(ident)) + } else { + match sexpr { + SExpr::Vec(vs, loc) => match vs.get(0) { + Some(SExpr::Word("enum", loc)) => { + Ok(TypedefSyntax::Enum(EnumSyntax::parse(&vs[1..], loc)?)) + } + Some(SExpr::Word("flags", loc)) => { + Ok(TypedefSyntax::Flags(FlagsSyntax::parse(&vs[1..], loc)?)) + } + Some(SExpr::Word("struct", loc)) => { + Ok(TypedefSyntax::Struct(StructSyntax::parse(&vs[1..], loc)?)) + } + Some(SExpr::Word("union", loc)) => { + Ok(TypedefSyntax::Union(UnionSyntax::parse(&vs[1..], loc)?)) + } + _ => Err(parse_err!( + loc, + "expected type identifier or type definition" + )), + }, + _ => Err(parse_err!( + sexpr.location(), + "expected type identifier or type definition" + )), + } + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct EnumSyntax { + pub repr: BuiltinType, + pub members: Vec, +} + +impl EnumSyntax { + pub fn parse(sexpr: &[SExpr], loc: &Location) -> Result { + let repr = match sexpr.get(0) { + Some(e) => BuiltinType::parse(e)?, + _ => Err(parse_err!(loc, "no enum repr"))?, + }; + let members = sexpr[1..] + .iter() + .map(|m| match m { + SExpr::Ident(i, loc) => Ok(id!(i, loc)), + s => Err(parse_err!(s.location(), "expected enum member identifier")), + }) + .collect::, ParseError>>()?; + if members.is_empty() { + Err(parse_err!(loc, "expected at least one enum member"))? + } + Ok(EnumSyntax { repr, members }) + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct FlagsSyntax { + pub repr: BuiltinType, + pub flags: Vec, +} + +impl FlagsSyntax { + pub fn parse(sexpr: &[SExpr], loc: &Location) -> Result { + let repr = BuiltinType::parse( + sexpr + .get(0) + .ok_or_else(|| parse_err!(loc, "expected flag repr type"))?, + )?; + let flags = sexpr[1..] + .iter() + .map(|f| match f { + SExpr::Vec(vs, loc) => match (vs.get(0), vs.get(1)) { + (Some(SExpr::Word("flag", _)), Some(SExpr::Ident(i, loc))) => Ok(id!(i, loc)), + _ => Err(parse_err!(loc, "expected flag specifier")), + }, + s => Err(parse_err!(s.location(), "expected flag specifier")), + }) + .collect::, ParseError>>()?; + Ok(FlagsSyntax { repr, flags }) + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct StructSyntax { + pub fields: Vec, +} + +impl StructSyntax { + pub fn parse(sexpr: &[SExpr], loc: &Location) -> Result { + if sexpr.is_empty() { + Err(parse_err!(loc, "expected at least one struct member"))? + } + let fields = sexpr + .iter() + .map(|f| FieldSyntax::parse(f, "field")) + .collect::, ParseError>>()?; + Ok(StructSyntax { fields }) + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct FieldSyntax { + pub name: IdentSyntax, + pub type_: DatatypeIdentSyntax, +} + +impl FieldSyntax { + pub fn starts_parsing(sexpr: &SExpr, constructor: &str) -> bool { + match sexpr { + SExpr::Vec(v, _) => match v.get(0) { + Some(SExpr::Word(c, _)) => *c == constructor, + _ => false, + }, + _ => false, + } + } + pub fn parse(sexpr: &SExpr, constructor: &str) -> Result { + match sexpr { + SExpr::Vec(v, loc) => match v.get(0) { + Some(SExpr::Word(c, _)) if *c == constructor => { + let name = match v.get(1) { + Some(SExpr::Ident(i, loc)) => id!(i, loc), + _ => Err(parse_err!(loc, "expected {} name identifier", constructor))?, + }; + let type_ = DatatypeIdentSyntax::parse(v.get(2).ok_or_else(|| { + parse_err!(loc, "expected {} type identifier", constructor) + })?)?; + Ok(FieldSyntax { name, type_ }) + } + _ => Err(parse_err!(loc, "expected {}", constructor)), + }, + _ => Err(parse_err!(sexpr.location(), "expected {}", constructor)), + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct UnionSyntax { + pub fields: Vec, +} + +impl UnionSyntax { + pub fn parse(sexpr: &[SExpr], loc: &Location) -> Result { + if sexpr.is_empty() { + Err(parse_err!(loc, "expected at least one union member"))? + } + let fields = sexpr + .iter() + .map(|f| FieldSyntax::parse(f, "field")) + .collect::, ParseError>>()?; + Ok(UnionSyntax { fields }) + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ModuleSyntax { + pub name: IdentSyntax, + pub decls: Vec, +} + +impl ModuleSyntax { + pub fn parse(sexprs: &[SExpr], loc: &Location) -> Result { + let name = match sexprs.get(0) { + Some(SExpr::Ident(i, loc)) => id!(i, loc), + _ => Err(parse_err!(loc, "expected module name"))?, + }; + let decls = sexprs[1..] + .iter() + .map(|s| ModuleDeclSyntax::parse(s)) + .collect::, _>>()?; + Ok(ModuleSyntax { name, decls }) + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ModuleDeclSyntax { + Import(ModuleImportSyntax), + Func(InterfaceFuncSyntax), +} + +impl ModuleDeclSyntax { + pub fn parse(sexpr: &SExpr) -> Result { + if ModuleImportSyntax::starts_parsing(sexpr) { + Ok(ModuleDeclSyntax::Import(ModuleImportSyntax::parse(sexpr)?)) + } else if InterfaceFuncSyntax::starts_parsing(sexpr) { + Ok(ModuleDeclSyntax::Func(InterfaceFuncSyntax::parse(sexpr)?)) + } else { + Err(parse_err!(sexpr.location(), "expected import or function")) + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ModuleImportSyntax { + pub name: IdentSyntax, + pub type_: ImportTypeSyntax, +} + +impl ModuleImportSyntax { + pub fn starts_parsing(sexpr: &SExpr) -> bool { + match sexpr { + SExpr::Vec(vs, _) => match vs.get(0) { + Some(SExpr::Word("import", _)) => true, + _ => false, + }, + _ => false, + } + } + pub fn parse(sexpr: &SExpr) -> Result { + match sexpr { + SExpr::Vec(vs, vec_loc) => match (vs.get(0), vs.get(1)) { + (Some(SExpr::Word("import", _)), Some(SExpr::Quote(name, loc))) => { + let name = id!(name, loc); + let type_ = ImportTypeSyntax::parse(&vs[2..], vec_loc)?; + Ok(ModuleImportSyntax { name, type_ }) + } + _ => Err(parse_err!(vec_loc, "expected module import")), + }, + _ => Err(parse_err!(sexpr.location(), "expected module import")), + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ImportTypeSyntax { + Memory, +} + +impl ImportTypeSyntax { + pub fn parse(sexpr: &[SExpr], loc: &Location) -> Result { + if sexpr.len() > 1 { + Err(parse_err!(loc, "too many elements for an import type"))?; + } + match sexpr.get(0) { + Some(SExpr::Vec(vs, loc)) => match vs.get(0) { + Some(SExpr::Word("memory", _)) => { + if vs.len() == 1 { + Ok(ImportTypeSyntax::Memory) + } else { + Err(parse_err!(loc, "too many elements for memory declaration")) + } + } + _ => Err(parse_err!(loc, "expected import type")), + }, + _ => Err(parse_err!(loc, "expected import type")), + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct InterfaceFuncSyntax { + pub export: IdentSyntax, + pub params: Vec, + pub results: Vec, +} + +impl InterfaceFuncSyntax { + pub fn starts_parsing(sexpr: &SExpr) -> bool { + match sexpr { + SExpr::Vec(vs, _) => match (vs.get(0), vs.get(1)) { + (Some(SExpr::Annot("interface", _)), Some(SExpr::Word("func", _))) => true, + _ => false, + }, + _ => false, + } + } + pub fn parse(sexpr: &SExpr) -> Result { + match sexpr { + SExpr::Vec(vs, loc) => match (vs.get(0), vs.get(1)) { + (Some(SExpr::Annot("interface", _)), Some(SExpr::Word("func", _))) => { + let export = match vs.get(2) { + Some(SExpr::Vec(es, loc)) => match (es.get(0), es.get(1)) { + ( + Some(SExpr::Word("export", _)), + Some(SExpr::Quote(name, name_loc)), + ) => { + if es.len() == 2 { + id!(name, name_loc) + } else { + Err(parse_err!( + loc, + "too many elements for export declaration" + ))? + } + } + _ => Err(parse_err!(loc, "expected export declaration"))?, + }, + _ => Err(parse_err!(loc, "expected export declaration"))?, + }; + let mut params = Vec::new(); + let mut results = Vec::new(); + + for sexpr in &vs[3..] { + if FieldSyntax::starts_parsing(sexpr, "param") { + let param = FieldSyntax::parse(sexpr, "param")?; + params.push(param); + } else if FieldSyntax::starts_parsing(sexpr, "result") { + let result = FieldSyntax::parse(sexpr, "result")?; + results.push(result); + } else { + Err(parse_err!( + sexpr.location(), + "expected param or result field" + ))?; + } + } + + Ok(InterfaceFuncSyntax { + export, + params, + results, + }) + } + _ => Err(parse_err!(loc, "expected interface func declaration")), + }, + + _ => Err(parse_err!( + sexpr.location(), + "expected interface func declaration" + )), + } + } +} diff --git a/tools/witx/src/sexpr.rs b/tools/witx/src/sexpr.rs new file mode 100644 index 000000000..6a34cf83b --- /dev/null +++ b/tools/witx/src/sexpr.rs @@ -0,0 +1,236 @@ +pub use crate::lexer::LexError; +use crate::lexer::{Lexer, LocatedError, LocatedToken, Token}; +use crate::Location; +use failure::Fail; +use std::path::{Path, PathBuf}; + +///! The s-expression parser turns a string into a stream of SExprs. +///! It uses the `Lexer` under the hood. +///! This implementation was heavily influenced by `cranelift-reader` + +#[derive(Debug, PartialEq, Eq, Clone)] +pub enum SExpr<'a> { + Vec(Vec>, Location), + Word(&'a str, Location), + Ident(&'a str, Location), + Quote(&'a str, Location), + /// Short for Annotation + Annot(&'a str, Location), +} + +impl<'a> SExpr<'a> { + pub fn location(&self) -> Location { + match self { + SExpr::Vec(_, loc) => loc.clone(), + SExpr::Word(_, loc) => loc.clone(), + SExpr::Ident(_, loc) => loc.clone(), + SExpr::Quote(_, loc) => loc.clone(), + SExpr::Annot(_, loc) => loc.clone(), + } + } +} + +#[derive(Debug, PartialEq, Eq, Clone, Fail)] +pub enum SExprParseError { + #[fail(display = "Lexical error: {}", _0)] + Lex(LexError, Location), + #[fail(display = "Unexpected ')'")] + UnexpectedCloseParen(Location), + #[fail(display = "Unexpected end of input in {:?}", _0)] + UnexpectedEof(PathBuf), +} + +impl SExprParseError { + pub fn report(&self) -> String { + use SExprParseError::*; + match self { + Lex(lex_err, loc) => format!("{}\n{}", loc.highlight_source(), lex_err), + UnexpectedCloseParen(loc) => format!("{}\n{}", loc.highlight_source(), self), + UnexpectedEof(_path) => format!("{}", self), + } + } +} + +pub struct SExprParser<'a> { + lex: Lexer<'a>, + lookahead: Option>, + location: Location, +} + +impl<'a> SExprParser<'a> { + pub fn new>(text: &'a str, path: P) -> SExprParser<'_> { + SExprParser { + lex: Lexer::new(text, path.as_ref()), + lookahead: None, + location: Location { + path: path.as_ref().into(), + line: 0, + column: 0, + }, + } + } + fn consume(&mut self) -> Token<'a> { + self.lookahead.take().expect("no token to consume") + } + fn token(&mut self) -> Result>, SExprParseError> { + while self.lookahead == None { + match self.lex.next() { + Some(Ok(LocatedToken { token, location })) => { + self.location = location; + self.lookahead = Some(token) + } + Some(Err(LocatedError { error, location })) => { + self.location = location.clone(); + Err(SExprParseError::Lex(error, location))?; + } + None => break, + } + } + Ok(self.lookahead) + } + + pub fn match_sexpr(&mut self) -> Result, SExprParseError> { + let location = self.location.clone(); + match self.token()? { + Some(Token::LPar) => { + self.consume(); + let mut members = Vec::new(); + loop { + match self.token()? { + Some(Token::RPar) => { + self.consume(); + break; + } + _ => { + members.push(self.match_sexpr()?); + } + } + } + Ok(SExpr::Vec(members, location)) + } + Some(Token::Word(word)) => { + self.consume(); + Ok(SExpr::Word(word, location)) + } + Some(Token::Ident(id)) => { + self.consume(); + Ok(SExpr::Ident(id, location)) + } + Some(Token::Annot(id)) => { + self.consume(); + Ok(SExpr::Annot(id, location)) + } + Some(Token::Quote(q)) => { + self.consume(); + Ok(SExpr::Quote(q, location)) + } + Some(Token::RPar) => Err(SExprParseError::UnexpectedCloseParen(location)), + None => Err(SExprParseError::UnexpectedEof(self.location.path.clone())), + } + } + + pub fn match_sexprs(&mut self) -> Result>, SExprParseError> { + let mut sexprs = Vec::new(); + while self.token()?.is_some() { + sexprs.push(self.match_sexpr()?); + } + Ok(sexprs) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::path::PathBuf; + + fn loc(line: usize, col: usize) -> Location { + Location { + path: PathBuf::from("/test"), + line: line, + column: col, + } + } + + fn testparser(input: &str) -> SExprParser { + SExprParser::new(input, Path::new("/test")) + } + + #[test] + fn empty() { + let mut parser = testparser(""); + assert_eq!(parser.match_sexprs().expect("valid parse"), Vec::new()); + let mut parser = testparser(" ;; just a comment\n;;another"); + assert_eq!(parser.match_sexprs().expect("valid parse"), Vec::new()); + } + + #[test] + fn atoms() { + let mut parser = testparser("hello\n$world\n\"a quotation\""); + assert_eq!( + parser.match_sexprs().expect("valid parse"), + vec![ + SExpr::Word("hello", loc(1, 0)), + SExpr::Ident("world", loc(2, 0)), + SExpr::Quote("a quotation", loc(3, 0)), + ] + ); + } + + #[test] + fn lists() { + let mut parser = testparser("()"); + assert_eq!( + parser.match_sexprs().expect("valid parse"), + vec![SExpr::Vec(vec![], loc(1, 0))] + ); + + let mut parser = testparser("(hello\n$world\n\"a quotation\")"); + assert_eq!( + parser.match_sexprs().expect("valid parse"), + vec![SExpr::Vec( + vec![ + SExpr::Word("hello", loc(1, 1)), + SExpr::Ident("world", loc(2, 0)), + SExpr::Quote("a quotation", loc(3, 0)), + ], + loc(1, 0) + )] + ); + + let mut parser = testparser("((($deep)))"); + assert_eq!( + parser.match_sexprs().expect("valid parse"), + vec![SExpr::Vec( + vec![SExpr::Vec( + vec![SExpr::Vec(vec![SExpr::Ident("deep", loc(1, 3))], loc(1, 2))], + loc(1, 1) + )], + loc(1, 0) + )] + ); + } + + #[test] + fn errors() { + let mut parser = testparser("("); + assert_eq!( + parser.match_sexprs().err().expect("dies"), + SExprParseError::UnexpectedEof(PathBuf::from("/test")) + ); + let mut parser = testparser(")"); + assert_eq!( + parser.match_sexprs().err().expect("dies"), + SExprParseError::UnexpectedCloseParen(loc(1, 0)) + ); + let mut parser = testparser("())"); + assert_eq!( + parser.match_sexprs().err().expect("dies"), + SExprParseError::UnexpectedCloseParen(loc(1, 2)) + ); + let mut parser = testparser("$ ;; should be a lex error"); + assert_eq!( + parser.match_sexprs().err().expect("dies"), + SExprParseError::Lex(LexError::EmptyIdentifier, loc(1, 0),), + ); + } +} diff --git a/tools/witx/src/toplevel.rs b/tools/witx/src/toplevel.rs new file mode 100644 index 000000000..cf3a5e377 --- /dev/null +++ b/tools/witx/src/toplevel.rs @@ -0,0 +1,243 @@ +use crate::parser::{DeclSyntax, ParseError, TopLevelSyntax}; +use crate::sexpr::SExprParser; +use crate::WitxError; +use std::collections::HashSet; +use std::fs; +use std::path::{Path, PathBuf}; + +trait WitxIo { + fn fgets(&self, path: &Path) -> Result; + fn canonicalize(&self, path: &Path) -> Result; +} + +struct Filesystem; + +impl WitxIo for Filesystem { + fn fgets(&self, path: &Path) -> Result { + fs::read_to_string(path).map_err(|e| WitxError::UseResolution(path.to_path_buf(), e)) + } + fn canonicalize(&self, path: &Path) -> Result { + path.canonicalize() + .map_err(|e| WitxError::UseResolution(path.to_path_buf(), e)) + } +} + +pub fn parse_witx>(i: P) -> Result, WitxError> { + parse_witx_with(i, &Filesystem) +} + +fn parse_witx_with>( + i: P, + witxio: &dyn WitxIo, +) -> Result, WitxError> { + let input_path = witxio.canonicalize(&i.as_ref())?; + + let input = witxio.fgets(&input_path)?; + + let toplevel = parse_toplevel(&input, &input_path)?; + let mut resolved = HashSet::new(); + resolved.insert(input_path.clone()); + let search_path = input_path.parent().unwrap_or(Path::new(".")); + resolve_uses(toplevel, &search_path, &mut resolved, witxio) +} + +fn parse_toplevel(source_text: &str, file_path: &Path) -> Result, WitxError> { + let mut sexpr_parser = SExprParser::new(source_text, file_path); + let sexprs = sexpr_parser.match_sexprs().map_err(WitxError::SExpr)?; + let top_levels = sexprs + .iter() + .map(|s| TopLevelSyntax::parse(s)) + .collect::, ParseError>>() + .map_err(WitxError::Parse)?; + Ok(top_levels) +} + +fn resolve_uses( + toplevel: Vec, + search_path: &Path, + used: &mut HashSet, + witxio: &dyn WitxIo, +) -> Result, WitxError> { + let mut decls = Vec::new(); + + for t in toplevel { + match t { + TopLevelSyntax::Decl(d) => decls.push(d), + TopLevelSyntax::Use(u) => { + let abs_path = witxio.canonicalize(&search_path.join(u.name))?; + // Include the decls from a use declaration only once + // in a given toplevel. Same idea as #pragma once. + if !used.contains(&abs_path) { + used.insert(abs_path.clone()); + + let source_text = witxio.fgets(&abs_path)?; + let inner_toplevels = parse_toplevel(&source_text, &abs_path)?; + + let inner_decls = resolve_uses(inner_toplevels, search_path, used, witxio)?; + decls.extend(inner_decls) + } + } + } + } + + Ok(decls) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::parser::*; + use crate::Location; + use std::collections::HashMap; + + struct MockFs { + map: HashMap<&'static str, &'static str>, + } + + impl MockFs { + pub fn new(strings: Vec<(&'static str, &'static str)>) -> Self { + MockFs { + map: strings.into_iter().collect(), + } + } + } + + impl WitxIo for MockFs { + fn fgets(&self, path: &Path) -> Result { + if let Some(entry) = self.map.get(path.to_str().unwrap()) { + Ok(entry.to_string()) + } else { + use std::io::{Error, ErrorKind}; + Err(WitxError::UseResolution( + path.to_path_buf(), + Error::new(ErrorKind::Other, "mock fs: file not found"), + )) + } + } + fn canonicalize(&self, path: &Path) -> Result { + Ok(PathBuf::from(path)) + } + } + + #[test] + fn empty() { + assert_eq!( + parse_witx_with(&Path::new("/a"), &MockFs::new(vec![("/a", ";; empty")])) + .expect("parse"), + Vec::new(), + ); + } + + #[test] + fn one_use() { + assert_eq!( + parse_witx_with( + &Path::new("/a"), + &MockFs::new(vec![("/a", "(use \"b\")"), ("/b", ";; empty")]) + ) + .expect("parse"), + Vec::new(), + ); + } + + #[test] + fn multi_use() { + assert_eq!( + parse_witx_with( + &Path::new("/a"), + &MockFs::new(vec![ + ("/a", "(use \"b\")"), + ("/b", "(use \"c\")\n(typename $b_float f64)"), + ("/c", "(typename $c_int u32)") + ]) + ) + .expect("parse"), + vec![ + DeclSyntax::Typename(TypenameSyntax { + ident: IdentSyntax { + name: "c_int".to_owned(), + location: Location { + path: PathBuf::from("/c"), + line: 1, + column: 10, + } + }, + def: TypedefSyntax::Ident(DatatypeIdentSyntax::Builtin(BuiltinType::U32)) + }), + DeclSyntax::Typename(TypenameSyntax { + ident: IdentSyntax { + name: "b_float".to_owned(), + location: Location { + path: PathBuf::from("/b"), + line: 2, + column: 10, + } + }, + def: TypedefSyntax::Ident(DatatypeIdentSyntax::Builtin(BuiltinType::F64)) + }) + ], + ); + } + + #[test] + fn diamond_dependency() { + assert_eq!( + parse_witx_with( + &Path::new("/a"), + &MockFs::new(vec![ + ("/a", "(use \"b\")\n(use \"c\")"), + ("/b", "(use \"d\")"), + ("/c", "(use \"d\")"), + ("/d", "(typename $d_char u8)") + ]) + ) + .expect("parse"), + vec![DeclSyntax::Typename(TypenameSyntax { + ident: IdentSyntax { + name: "d_char".to_owned(), + location: Location { + path: PathBuf::from("/d"), + line: 1, + column: 10, + } + }, + def: TypedefSyntax::Ident(DatatypeIdentSyntax::Builtin(BuiltinType::U8)) + })], + ); + } + + #[test] + fn use_not_found() { + match parse_witx_with(&Path::new("/a"), &MockFs::new(vec![("/a", "(use \"b\")")])) + .err() + .unwrap() + { + WitxError::UseResolution(path, _error) => assert_eq!(path, PathBuf::from("/b")), + e => panic!("wrong error: {:?}", e), + } + } + + #[test] + fn use_invalid() { + match parse_witx_with( + &Path::new("/a"), + &MockFs::new(vec![("/a", "(use bbbbbbb)")]), + ) + .err() + .unwrap() + { + WitxError::Parse(e) => { + assert_eq!(e.message, "invalid use declaration"); + assert_eq!( + e.location, + Location { + path: PathBuf::from("/a"), + line: 1, + column: 1 + } + ); + } + e => panic!("wrong error: {:?}", e), + } + } +} diff --git a/tools/witx/src/validate.rs b/tools/witx/src/validate.rs new file mode 100644 index 000000000..7041fca98 --- /dev/null +++ b/tools/witx/src/validate.rs @@ -0,0 +1,390 @@ +use crate::{ + parser::{ + DatatypeIdentSyntax, DeclSyntax, EnumSyntax, FlagsSyntax, IdentSyntax, ImportTypeSyntax, + ModuleDeclSyntax, StructSyntax, TypedefSyntax, UnionSyntax, + }, + AliasDatatype, BuiltinType, Datatype, DatatypeIdent, DatatypeVariant, Definition, Document, + Entry, EnumDatatype, FlagsDatatype, Id, IntRepr, InterfaceFunc, InterfaceFuncParam, Location, + Module, ModuleDefinition, ModuleEntry, ModuleImport, ModuleImportVariant, StructDatatype, + StructMember, UnionDatatype, UnionVariant, +}; +use failure::Fail; +use std::collections::HashMap; +use std::rc::Rc; + +#[derive(Debug, Fail)] +pub enum ValidationError { + #[fail(display = "Unknown name `{}`", name)] + UnknownName { name: String, location: Location }, + #[fail(display = "Redefinition of name `{}`", name)] + NameAlreadyExists { + name: String, + at_location: Location, + previous_location: Location, + }, + #[fail( + display = "Wrong kind of name `{}`: expected {}, got {}", + name, expected, got + )] + WrongKindName { + name: String, + location: Location, + expected: &'static str, + got: &'static str, + }, + #[fail(display = "Recursive definition of name `{}`", name)] + Recursive { name: String, location: Location }, + #[fail(display = "Invalid representation `{:?}`", repr)] + InvalidRepr { + repr: BuiltinType, + location: Location, + }, +} + +impl ValidationError { + pub fn report(&self) -> String { + use ValidationError::*; + match self { + UnknownName { location, .. } + | WrongKindName { location, .. } + | Recursive { location, .. } + | InvalidRepr { location, .. } => format!("{}\n{}", location.highlight_source(), &self), + NameAlreadyExists { + at_location, + previous_location, + .. + } => format!( + "{}\n{}\nOriginally defined at:\n{}", + at_location.highlight_source(), + &self, + previous_location.highlight_source(), + ), + } + } +} + +pub fn validate_document(decls: &[DeclSyntax]) -> Result { + let mut validator = DocValidation::new(); + let mut definitions = Vec::new(); + for d in decls { + definitions.push(validator.validate_decl(&d)?); + } + + Ok(Document { + entries: validator.entries, + definitions, + }) +} + +struct IdentValidation { + names: HashMap, +} + +impl IdentValidation { + fn new() -> Self { + Self { + names: HashMap::new(), + } + } + fn introduce(&mut self, syntax: &IdentSyntax) -> Result { + if let Some(introduced) = self.names.get(&syntax.name) { + Err(ValidationError::NameAlreadyExists { + name: syntax.name.clone(), + at_location: syntax.location.clone(), + previous_location: introduced.clone(), + }) + } else { + self.names + .insert(syntax.name.clone(), syntax.location.clone()); + Ok(Id::new(&syntax.name)) + } + } + + fn get(&self, syntax: &IdentSyntax) -> Result { + if self.names.get(&syntax.name).is_some() { + Ok(Id::new(&syntax.name)) + } else { + Err(ValidationError::UnknownName { + name: syntax.name.clone(), + location: syntax.location.clone(), + }) + } + } +} + +struct DocValidation { + scope: IdentValidation, + pub entries: HashMap, +} + +impl DocValidation { + fn new() -> Self { + Self { + scope: IdentValidation::new(), + entries: HashMap::new(), + } + } + + fn validate_decl(&mut self, decl: &DeclSyntax) -> Result { + match decl { + DeclSyntax::Typename(decl) => { + let name = self.scope.introduce(&decl.ident)?; + let variant = + match &decl.def { + TypedefSyntax::Ident(syntax) => DatatypeVariant::Alias(AliasDatatype { + name: name.clone(), + to: self.validate_datatype_ident(&syntax)?, + }), + TypedefSyntax::Enum(syntax) => DatatypeVariant::Enum(self.validate_enum( + &name, + &syntax, + &decl.ident.location, + )?), + TypedefSyntax::Flags(syntax) => DatatypeVariant::Flags( + self.validate_flags(&name, &syntax, &decl.ident.location)?, + ), + TypedefSyntax::Struct(syntax) => DatatypeVariant::Struct( + self.validate_struct(&name, &syntax, &decl.ident.location)?, + ), + TypedefSyntax::Union(syntax) => DatatypeVariant::Union( + self.validate_union(&name, &syntax, &decl.ident.location)?, + ), + }; + let rc_datatype = Rc::new(Datatype { + name: name.clone(), + variant, + }); + self.entries + .insert(name, Entry::Datatype(Rc::downgrade(&rc_datatype))); + Ok(Definition::Datatype(rc_datatype)) + } + DeclSyntax::Module(syntax) => { + let name = self.scope.introduce(&syntax.name)?; + let mut module_validator = ModuleValidation::new(self); + let definitions = syntax + .decls + .iter() + .map(|d| module_validator.validate_decl(&d)) + .collect::, _>>()?; + + let rc_module = Rc::new(Module { + name: name.clone(), + definitions, + entries: module_validator.entries, + }); + self.entries + .insert(name, Entry::Module(Rc::downgrade(&rc_module))); + Ok(Definition::Module(rc_module)) + } + } + } + + fn validate_datatype_ident( + &self, + syntax: &DatatypeIdentSyntax, + ) -> Result { + match syntax { + DatatypeIdentSyntax::Builtin(b) => Ok(DatatypeIdent::Builtin(*b)), + DatatypeIdentSyntax::Array(a) => Ok(DatatypeIdent::Array(Box::new( + self.validate_datatype_ident(&a)?, + ))), + DatatypeIdentSyntax::Pointer(a) => Ok(DatatypeIdent::Pointer(Box::new( + self.validate_datatype_ident(&a)?, + ))), + DatatypeIdentSyntax::ConstPointer(a) => Ok(DatatypeIdent::ConstPointer(Box::new( + self.validate_datatype_ident(&a)?, + ))), + DatatypeIdentSyntax::Ident(i) => { + let id = self.scope.get(i)?; + match self.entries.get(&id) { + Some(Entry::Datatype(weak_d)) => Ok(DatatypeIdent::Ident( + weak_d.upgrade().expect("weak backref to defined type"), + )), + Some(e) => Err(ValidationError::WrongKindName { + name: i.name.clone(), + location: i.location.clone(), + expected: "datatype", + got: e.kind(), + }), + None => Err(ValidationError::Recursive { + name: i.name.clone(), + location: i.location.clone(), + }), + } + } + } + } + + fn validate_enum( + &self, + name: &Id, + syntax: &EnumSyntax, + location: &Location, + ) -> Result { + let mut enum_scope = IdentValidation::new(); + let repr = validate_int_repr(&syntax.repr, location)?; + let variants = syntax + .members + .iter() + .map(|i| enum_scope.introduce(i)) + .collect::, _>>()?; + + Ok(EnumDatatype { + name: name.clone(), + repr, + variants, + }) + } + + fn validate_flags( + &self, + name: &Id, + syntax: &FlagsSyntax, + location: &Location, + ) -> Result { + let mut flags_scope = IdentValidation::new(); + let repr = validate_int_repr(&syntax.repr, location)?; + let flags = syntax + .flags + .iter() + .map(|i| flags_scope.introduce(i)) + .collect::, _>>()?; + + Ok(FlagsDatatype { + name: name.clone(), + repr, + flags, + }) + } + + fn validate_struct( + &self, + name: &Id, + syntax: &StructSyntax, + _location: &Location, + ) -> Result { + let mut member_scope = IdentValidation::new(); + let members = syntax + .fields + .iter() + .map(|f| { + Ok(StructMember { + name: member_scope.introduce(&f.name)?, + type_: self.validate_datatype_ident(&f.type_)?, + }) + }) + .collect::, _>>()?; + + Ok(StructDatatype { + name: name.clone(), + members, + }) + } + + fn validate_union( + &self, + name: &Id, + syntax: &UnionSyntax, + _location: &Location, + ) -> Result { + let mut variant_scope = IdentValidation::new(); + let variants = syntax + .fields + .iter() + .map(|f| { + Ok(UnionVariant { + name: variant_scope.introduce(&f.name)?, + type_: self.validate_datatype_ident(&f.type_)?, + }) + }) + .collect::, _>>()?; + + Ok(UnionDatatype { + name: name.clone(), + variants, + }) + } +} + +fn validate_int_repr(type_: &BuiltinType, location: &Location) -> Result { + match type_ { + BuiltinType::U8 => Ok(IntRepr::I8), + BuiltinType::U16 => Ok(IntRepr::I16), + BuiltinType::U32 => Ok(IntRepr::I32), + BuiltinType::U64 => Ok(IntRepr::I64), + _ => Err(ValidationError::InvalidRepr { + repr: type_.clone(), + location: location.clone(), + }), + } +} + +struct ModuleValidation<'a> { + doc: &'a DocValidation, + scope: IdentValidation, + pub entries: HashMap, +} + +impl<'a> ModuleValidation<'a> { + fn new(doc: &'a DocValidation) -> Self { + Self { + doc, + scope: IdentValidation::new(), + entries: HashMap::new(), + } + } + + fn validate_decl( + &mut self, + decl: &ModuleDeclSyntax, + ) -> Result { + match decl { + ModuleDeclSyntax::Import(syntax) => { + let name = self.scope.introduce(&syntax.name)?; + let variant = match syntax.type_ { + ImportTypeSyntax::Memory => ModuleImportVariant::Memory, + }; + let rc_import = Rc::new(ModuleImport { + name: name.clone(), + variant, + }); + self.entries + .insert(name, ModuleEntry::Import(Rc::downgrade(&rc_import))); + Ok(ModuleDefinition::Import(rc_import)) + } + ModuleDeclSyntax::Func(syntax) => { + let name = self.scope.introduce(&syntax.export)?; + let mut argnames = IdentValidation::new(); + let params = syntax + .params + .iter() + .map(|f| { + Ok(InterfaceFuncParam { + name: argnames.introduce(&f.name)?, + type_: self.doc.validate_datatype_ident(&f.type_)?, + }) + }) + .collect::, _>>()?; + let results = syntax + .results + .iter() + .map(|f| { + Ok(InterfaceFuncParam { + name: argnames.introduce(&f.name)?, + type_: self.doc.validate_datatype_ident(&f.type_)?, + }) + }) + .collect::, _>>()?; + + let rc_func = Rc::new(InterfaceFunc { + name: name.clone(), + params, + results, + }); + self.entries + .insert(name, ModuleEntry::Func(Rc::downgrade(&rc_func))); + Ok(ModuleDefinition::Func(rc_func)) + } + } + } +} diff --git a/tools/witx/tests/wasi_unstable.rs b/tools/witx/tests/wasi_unstable.rs new file mode 100644 index 000000000..e9188db67 --- /dev/null +++ b/tools/witx/tests/wasi_unstable.rs @@ -0,0 +1,7 @@ +use std::path::Path; +use witx_frontend; + +#[test] +fn validate_wasi_unstable() { + witx_frontend::load(Path::new("../../design/wasi_unstable/wasi_unstable.witx")).unwrap(); +}