diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9550ee3..e12fb18 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -8,12 +8,14 @@ on: jobs: test: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 steps: - - uses: swift-actions/setup-swift@v2 - with: - swift-version: "5.10.1" - uses: actions/checkout@v4 + + - run: sudo $GITHUB_WORKSPACE/ci/install-swift.bash + working-directory: /home/runner + - run: swift --version + - run: swift package resolve - run: swift build - run: swift test diff --git a/Package.swift b/Package.swift index 88059a4..80f308d 100644 --- a/Package.swift +++ b/Package.swift @@ -39,7 +39,10 @@ let package = Package( ), .testTarget( name: "TypeScriptASTTests", - dependencies: ["TypeScriptAST"] + dependencies: ["TypeScriptAST"], + swiftSettings: [ + .enableUpcomingFeature("BareSlashRegexLiterals") + ] ), ] ) diff --git a/Sources/TypeScriptAST/Parser/CharacterEx.swift b/Sources/TypeScriptAST/Parser/CharacterEx.swift new file mode 100644 index 0000000..cf9c4e4 --- /dev/null +++ b/Sources/TypeScriptAST/Parser/CharacterEx.swift @@ -0,0 +1,44 @@ +extension Character { + static let tab = Character("\t") + static let lf = Character("\n") + static let cr = Character("\r") + static let crLf = Character("\r\n") + static let space = Character(" ") + static let exclamation = Character("!") + static let doubleQuote = Character("\"") + static let dollar = Character("$") + static let percent = Character("%") + static let ampersand = Character("&") + static let singleQuote = Character("'") + static let leftParen = Character("(") + static let rightParen = Character(")") + static let asterisk = Character("*") + static let plus = Character("+") + static let comma = Character(",") + static let minus = Character("-") + static let dot = Character(".") + static let slash = Character("/") + static let _0 = Character("0") + static let _9 = Character("9") + static let colon = Character(":") + static let semicolon = Character(";") + static let leftAngleBracket = Character("<") + static let equal = Character("=") + static let rightAngleBracket = Character(">") + static let question = Character("?") + static let A = Character("A") + static let Z = Character("Z") + static let leftSquareBracket = Character("[") + static let backslash = Character("\\") + static let rightSquareBracket = Character("]") + static let underscore = Character("_") + static let backQuote = Character("`") + static let a = Character("a") + static let n = Character("n") + static let r = Character("r") + static let t = Character("t") + static let z = Character("z") + static let leftBrace = Character("{") + static let pipe = Character("|") + static let rightBrace = Character("}") +} diff --git a/Sources/TypeScriptAST/Parser/Keyword.swift b/Sources/TypeScriptAST/Parser/Keyword.swift new file mode 100644 index 0000000..b1cce11 --- /dev/null +++ b/Sources/TypeScriptAST/Parser/Keyword.swift @@ -0,0 +1,8 @@ +public enum Keyword: String, Equatable & CustomStringConvertible { + case `import` + case from + + public var description: String { + rawValue + } +} diff --git a/Sources/TypeScriptAST/Parser/Parser.swift b/Sources/TypeScriptAST/Parser/Parser.swift new file mode 100644 index 0000000..065bda2 --- /dev/null +++ b/Sources/TypeScriptAST/Parser/Parser.swift @@ -0,0 +1,73 @@ +public struct Parser { + public init(string: String) { + self.tokenizer = Tokenizer(string: string) + } + + private var tokenizer: Tokenizer + + public mutating func parse() -> TSSourceFile { + var es: [any ASTNode] = [] + while let e = parseElement() { + es.append(e) + } + return TSSourceFile(es) + } + + public mutating func parseElement() -> (any ASTNode)? { + while true { + switch nextToken { + case nil: return nil + case .keyword(let k): + switch k { + case .import: + if let x = parseImport() { + return x + } + default: + print("skip keyword: \(k)") + readToken() + } + default: + if let x = parseExpression() { + return x + } + } + } + } + + private mutating func parseImport() -> TSImportDecl? { + let imp = readToken() + guard imp == .keyword(.import) else { return nil } + let lb = readToken() + guard lb == .symbol(.leftBrace) else { return nil } + var names: [String] = [] + loop: while true { + guard let tk = readToken() else { return nil } + switch tk { + case .symbol(.rightBrace): + break loop + default: + names.append(tk.description) + } + } + let from = readToken() + guard from == .keyword(.from) else { return nil } + let pathToken = readToken() + guard case .stringLiteral(let path) = pathToken else { return nil } + return TSImportDecl(names: names, from: path) + } + + private mutating func parseExpression() -> (any ASTNode)? { + readToken() + return nil + } + + @discardableResult + private mutating func readToken() -> Token? { + tokenizer.read() + } + + private var nextToken: Token? { + tokenizer.nextToken + } +} diff --git a/Sources/TypeScriptAST/Parser/Symbol.swift b/Sources/TypeScriptAST/Parser/Symbol.swift new file mode 100644 index 0000000..797a0ba --- /dev/null +++ b/Sources/TypeScriptAST/Parser/Symbol.swift @@ -0,0 +1,54 @@ +public enum Symbol: String, Hashable & CustomStringConvertible { + case exclamation = "!" // 21 + case exclamationEqual = "!=" + case exclamationEqualEqual = "!==" + case percent = "%" // 25 + case percentEqual = "%=" + case ampersand = "&" // 26 + case ampersandAmpersand = "&&" + case ampersandAmpersandEqual = "&&=" + case ampersandEqual = "&=" + case leftParen = "(" // 28 + case rightParen = ")" // 29 + case asterisk = "*" // 30 + case asteriskEqual = "*=" + case plus = "+" // 31 + case plusPlus = "++" + case plusEqual = "+=" + case comma = "," // 32 + case minus = "-" // 33 + case minusMinus = "--" + case minusEqual = "-=" + case dot = "." // 34 + case slash = "/" // 35 + case slashEqual = "/=" + case colon = ":" // 3a + case semicolon = ";" // 3b + case leftAngleBracket = "<" // 3c + case leftAngleBracketLeftAngleBracket = "<<" + case leftAngleBracketLeftAngleBracketEqual = "<<=" + case equal = "=" // 3d + case equalEqual = "==" + case equalEqualEqual = "===" + case equalRightAngleBracket = "=>" + case rightAngleBracket = ">" // 3e + case rightAngleBracketRightAngleBracket = ">>" + case rightAngleBracketRightAngleBracketEqual = ">>=" + case question = "?" // 3f + case questionDot = "?." + case questionQuestion = "??" + case questionQuestionEqual = "??=" + case leftSquareBracket = "[" // 5b + case backslash = "\\" // 5c + case rightSquareBracket = "]" // 5d + case leftBrace = "{" // 7b + case pipe = "|" // 7c + case pipeEqual = "|=" + case pipePipe = "||" + case pipePipeEqual = "||=" + case rightBrace = "}" // 7d + + public var description: String { + rawValue + } +} diff --git a/Sources/TypeScriptAST/Parser/Token.swift b/Sources/TypeScriptAST/Parser/Token.swift new file mode 100644 index 0000000..8cf7318 --- /dev/null +++ b/Sources/TypeScriptAST/Parser/Token.swift @@ -0,0 +1,33 @@ +public enum Token: Equatable & CustomStringConvertible & CustomDebugStringConvertible { + case keyword(Keyword) + case identifier(String) + case symbol(Symbol) + case stringLiteral(String) + + public var description: String { + switch self { + case .keyword(let x): return x.description + case .identifier(let x): return x + case .symbol(let x): return x.description + case .stringLiteral(let x): return "\"" + Self.escapeStringLiteralContent(x) + "\"" + } + } + + public var debugDescription: String { + switch self { + case .keyword(let x): return "keyword(\(x))" + case .identifier(let x): return "identifier(\(x))" + case .symbol(let x): return "symbol(\(x))" + case .stringLiteral(let x): return "stringLiteral(\(x))" + } + } + + public static func escapeStringLiteralContent(_ string: String) -> String { + var s = string + s = s.replacingOccurrences(of: "\\", with: "\\\\") + s = s.replacingOccurrences(of: "\"", with: "\\\"") + s = s.replacingOccurrences(of: "\n", with: "\\n") + return s + } +} + diff --git a/Sources/TypeScriptAST/Parser/Tokenizer.swift b/Sources/TypeScriptAST/Parser/Tokenizer.swift new file mode 100644 index 0000000..54945cf --- /dev/null +++ b/Sources/TypeScriptAST/Parser/Tokenizer.swift @@ -0,0 +1,426 @@ +public struct Tokenizer { + public init( + string: String, + position: String.Index? = nil + ) { + self.string = string + self.pos = position ?? string.startIndex + self.nextToken = nil + _ = self.read() + } + + public let string: String + + public var position: String.Index { + get { pos } + set { + pos = newValue + nextToken = nil + _ = read() + } + } + private var pos: String.Index + + public private(set) var nextToken: Token? + + public mutating func read() -> Token? { + let result = nextToken + nextToken = readNextToken() + return result + } + + public mutating func readAll() -> [Token] { + var result: [Token] = [] + while let token = read() { + result.append(token) + } + return result + } + + private mutating func readNextToken() -> Token? { + while true { + guard let c = char() else { return nil } + + if readWhitespace() { + continue + } + + if readComment() { + continue + } + + if let x = readStringLiteral() { + return .stringLiteral(x) + } + + if let x = readSymbol() { + return .symbol(x) + } + + if let x = readKeyword() { + return x + } + + print("skip character: \(c)") + advance() + } + } + + private mutating func readWhitespace() -> Bool { + return readString(where: isWhitespace) != nil + } + + private mutating func readComment() -> Bool { + var p = self.pos + switch char(at: p) { + case .slash: + advance(position: &p) + switch char(at: p) { + case .slash: + advance(position: &p) + loop: while let c = char(at: p) { + switch c { + case .lf, .cr, .crLf: + advance(position: &p) + break loop + default: + advance(position: &p) + } + } + self.pos = p + return true + case .asterisk: + advance(position: &p) + loop: while let c = char(at: p) { + switch c { + case .asterisk: + advance(position: &p) + switch char(at: p) { + case .slash: + advance(position: &p) + break loop + case nil: + break loop + default: + continue + } + default: + advance(position: &p) + } + } + self.pos = p + return true + default: + return false + } + default: + return false + } + } + + private mutating func readStringLiteral() -> String? { + switch char() { + case .doubleQuote: + advance() + var content = "" + loop: while let c = char() { + switch c { + case .doubleQuote: + advance() + break loop + case .lf, .cr, .crLf: + advance() + break loop + case .backslash: + advance() + guard let c = char() else { + content.append("\\") + break loop + } + advance() + switch c { + case .n: content.append("\n") + case .r: content.append("\r") + case .t: content.append("\t") + default: content.append(c) + } + default: + advance() + content.append(c) + } + } + return content + default: + return nil + } + } + + private mutating func readSymbol() -> Symbol? { + switch char() { + case .exclamation: + advance() + switch char() { + case .equal: + advance() + switch char() { + case .equal: + advance() + return .exclamationEqualEqual + default: + return .exclamationEqual + } + default: + return .exclamation + } + case .percent: + advance() + switch char() { + case .equal: + advance() + return .percentEqual + default: + return .percent + } + case .ampersand: + advance() + switch char() { + case .ampersand: + advance() + switch char() { + case .equal: + advance() + return .ampersandAmpersandEqual + default: + return .ampersandAmpersand + } + case .equal: + advance() + return .ampersandEqual + default: + return .ampersand + } + case .leftParen: + advance() + return .leftParen + case .rightParen: + advance() + return .rightParen + case .asterisk: + advance() + switch char() { + case .equal: + advance() + return .asteriskEqual + default: + return .asterisk + } + case .plus: + advance() + switch char() { + case .plus: + advance() + return .plusPlus + case .equal: + advance() + return .plusEqual + default: + return .plus + } + case .comma: + advance() + return .comma + case .minus: + advance() + switch char() { + case .minus: + advance() + return .minusMinus + case .equal: + advance() + return .minusEqual + default: + return .minus + } + case .dot: + advance() + return .dot + case .slash: + advance() + switch char() { + case .equal: + advance() + return .slashEqual + default: + return .slash + } + case .colon: + advance() + return .colon + case .semicolon: + advance() + return .semicolon + case .leftAngleBracket: + advance() + switch char() { + case .leftAngleBracket: + advance() + switch char() { + case .equal: + advance() + return .leftAngleBracketLeftAngleBracketEqual + default: + return .leftAngleBracketLeftAngleBracket + } + default: + return .leftAngleBracket + } + case .equal: + advance() + switch char() { + case .equal: + advance() + switch char() { + case .equal: + advance() + return .equalEqualEqual + default: + return .equalEqual + } + case .rightAngleBracket: + advance() + return .equalRightAngleBracket + default: + return .equal + } + case .rightAngleBracket: + advance() + switch char() { + case .rightAngleBracket: + advance() + switch char() { + case .equal: + advance() + return .rightAngleBracketRightAngleBracketEqual + default: + return .rightAngleBracketRightAngleBracket + } + default: + return .rightAngleBracket + } + case .question: + advance() + switch char() { + case .dot: + advance() + return .questionDot + case .question: + advance() + switch char() { + case .equal: + advance() + return .questionQuestionEqual + default: + return .questionQuestion + } + default: + return .question + } + case .leftSquareBracket: + advance() + return .leftSquareBracket + case .backslash: + advance() + return .backslash + case .rightSquareBracket: + advance() + return .rightSquareBracket + case .leftBrace: + advance() + return .leftBrace + case .pipe: + advance() + switch char() { + case .equal: + advance() + return .pipeEqual + case .pipe: + advance() + switch char() { + case .equal: + advance() + return .pipePipeEqual + default: + return .pipePipe + } + default: + return .pipe + } + case .rightBrace: + advance() + return .rightBrace + default: + return nil + } + } + + private mutating func readKeyword() -> Token? { + guard let s = readKeywordString() else { return nil } + + if let k = Keyword(rawValue: s) { + return .keyword(k) + } + return .identifier(s) + } + + private mutating func readKeywordString() -> String? { + readString(where: isKeyword) + } + + private mutating func readString(where predicate: (Character) -> Bool) -> String? { + let start = pos + guard let c = char(at: pos), predicate(c) else { + return nil + } + advance() + + while let c = char(at: pos), predicate(c) { + advance() + } + + return String(string[start.. Bool { + switch c { + case .space, .tab, .lf, .cr, .crLf: return true + default: return false + } + } + + private func isKeyword(_ c: Character) -> Bool { + switch c { + case .a ... .z, + .A ... .Z, + ._0 ... ._9, + .underscore, .dollar: return true + default: return false + } + } + + private func advance(position: inout String.Index) { + position = string.index(after: position) + } + + private mutating func advance() { + advance(position: &pos) + } + + private func char(at index: String.Index) -> Character? { + if index == string.endIndex { return nil } + return string[index] + } + + private func char() -> Character? { + char(at: pos) + } +} diff --git a/Tests/TypeScriptASTTests/ParserTests.swift b/Tests/TypeScriptASTTests/ParserTests.swift new file mode 100644 index 0000000..976d788 --- /dev/null +++ b/Tests/TypeScriptASTTests/ParserTests.swift @@ -0,0 +1,19 @@ +import Testing +import TypeScriptAST + +@Suite struct ParserTests { + static var data: [String] { + [""" + import { foo } from "./lib"; + + """] + } + + @Test(arguments: Self.data) func roundTrip(string: String) { + var p = Parser(string: string) + let ast = p.parse() + let parsed = ast.print() + let expected = string + #expect(parsed == expected) + } +} diff --git a/Tests/TypeScriptASTTests/TokenizerTests.swift b/Tests/TypeScriptASTTests/TokenizerTests.swift new file mode 100644 index 0000000..7aff9d3 --- /dev/null +++ b/Tests/TypeScriptASTTests/TokenizerTests.swift @@ -0,0 +1,118 @@ +import Testing +import TypeScriptAST + +@Suite struct TokenizerTests { + static func symbols(_ s: [Symbol]) -> [Token] { + s.map { .symbol($0) } + } + + static var tokensData: [(String, [Token])] { + let result: [(String, [Token])] = [ + ("", []), + ("import from", [.keyword(.import), .keyword(.from)]), + ("! % & ( ) * + , - . / : ; < = > ? [ \\ ] { | }", symbols([ + .exclamation, + .percent, + .ampersand, + .leftParen, + .rightParen, + .asterisk, + .plus, + .comma, + .minus, + .dot, + .slash, + .colon, + .semicolon, + .leftAngleBracket, + .equal, + .rightAngleBracket, + .question, + .leftSquareBracket, + .backslash, + .rightSquareBracket, + .leftBrace, + .pipe, + .rightBrace, + ])), + ("! != !== % %= & && &&= &= * *= + ++ += - -- -= / /=", symbols([ + .exclamation, + .exclamationEqual, + .exclamationEqualEqual, + .percent, + .percentEqual, + .ampersand, + .ampersandAmpersand, + .ampersandAmpersandEqual, + .ampersandEqual, + .asterisk, + .asteriskEqual, + .plus, + .plusPlus, + .plusEqual, + .minus, + .minusMinus, + .minusEqual, + .slash, + .slashEqual, + + ])), + ("< << <<= = == === => > >> >>= ? ?. ?? ??= | |= || ||=", symbols([ + .leftAngleBracket, + .leftAngleBracketLeftAngleBracket, + .leftAngleBracketLeftAngleBracketEqual, + .equal, + .equalEqual, + .equalEqualEqual, + .equalRightAngleBracket, + .rightAngleBracket, + .rightAngleBracketRightAngleBracket, + .rightAngleBracketRightAngleBracketEqual, + .question, + .questionDot, + .questionQuestion, + .questionQuestionEqual, + .pipe, + .pipeEqual, + .pipePipe, + .pipePipeEqual, + ])), + (""" + foo // comment + bar + """, [.identifier("foo"), .identifier("bar")]), + (""" + foo /* + comment */ bar /**/ baz /* * */ qux + """, [ + .identifier("foo"), .identifier("bar"), .identifier("baz"), .identifier("qux") + ]), + ("foo /* bar", [.identifier("foo")]), + ("foo /* bar *", [.identifier("foo")]), + (""" + foo // /* + bar + """, [.identifier("foo"), .identifier("bar")]), + (""" + foo /* + // */ bar + """, [.identifier("foo"), .identifier("bar")]), + (""" + "abc" + """, [.stringLiteral("abc")]), + (#""" + "a\"b\nc\\d" + """#, [.stringLiteral(#""" + a"b + c\d + """#)]) + ] + return result + } + + @Test(arguments: Self.tokensData) func tokens(string: String, expected: [Token]) { + var k = Tokenizer(string: string) + let tokens = k.readAll() + #expect(tokens == expected) + } +} diff --git a/ci/install-swift.bash b/ci/install-swift.bash new file mode 100755 index 0000000..16801b5 --- /dev/null +++ b/ci/install-swift.bash @@ -0,0 +1,18 @@ +#!/bin/bash +set -ue + +dir=swift-6.0.2-release/ubuntu2204 +version=swift-6.0.2-RELEASE +tar_platform=ubuntu22.04 + +if [[ $(uname -m) == "aarch64" ]]; then + dir=${dir}-aarch64 + tar_platform=${tar_platform}-aarch64 +fi + +url="https://download.swift.org/${dir}/${version}/${version}-${tar_platform}.tar.gz" + +set -x +curl -fLo swift.tar.gz ${url} +tar -xf swift.tar.gz --strip-components=2 -C /usr +rm swift.tar.gz