Author | Jakob Wakeling <[email protected]> |
Date | 2023-06-24 03:05:05 |
Commit | 33e34d4e2e25b4ea33dc2c84ec05967a59a42254 |
Parent | 09e2c5c89f7c6842d18b03f11e2be6f0d859a54b |
Spec lexemes, constants, variables, and types
Diffstat
A | doc/.markdownlint.json | | | 5 | +++++ |
M | doc/g.ebnf | | | 7 | ++++--- |
A | doc/spec.md | | | 173 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
M | doc/types.md | | | 22 | +++++++++++----------- |
4 files changed, 193 insertions, 14 deletions
diff --git a/doc/.markdownlint.json b/doc/.markdownlint.json new file mode 100644 index 0000000..bdc623a --- /dev/null +++ b/doc/.markdownlint.json @@ -0,0 +1,5 @@ +{ + "MD007": false, + "MD010": false, + "MD013": { "code_blocks": false } +} diff --git a/doc/g.ebnf b/doc/g.ebnf index 91d8fad..862f5e1 100644 --- a/doc/g.ebnf +++ b/doc/g.ebnf @@ -21,7 +21,7 @@ stmt_compound = "{", { stmt }, "}" ; stmt_decl = iden, ":", ( stmt_decl_constant | stmt_decl_variable ) ; stmt_decl_constant = [ type ], ":", expr ; -stmt_decl_variable = [ type ], "=", expr | type ; +stmt_decl_variable = [ type ], "=", expr ; (* Expressions *) expr = iden | literal @@ -67,12 +67,12 @@ type = iden ; literal_null = "null" ; literal_bool = "true" | "false" ; +literal_int = literal_bin | literal_oct | literal_dec | literal_hex ; literal_bin = "0b", digit_bin, { char_bin } ; literal_oct = "0o", digit_oct, { char_oct } ; literal_dec = [ "0d" ], digit_dec, { char_dec } ; literal_doz = "0z", digit_doz, { char_doz } ; literal_hex = "0x", digit_hex, { char_hex } ; -literal_int = literal_bin | literal_oct | literal_dec | literal_hex ; literal_flt = digit_dec, { char_dec }, ".", digit_dec, { char_dec } ; @@ -83,7 +83,7 @@ literal = literal_null | literal_bool | literal_int | literal_flt | literal_chr (* General *) escape = escape_std | escape_oct | escape_hex | escape_utf ; -escape_std = "\0" | "\a" | "\b" | "\t" | "\n" | "\v" | "\f" | "\r" | '\"' | "\'" | "\\" ; +escape_std = "\0" | "\a" | "\b" | "\t" | "\n" | "\v" | "\f" | "\r" | '\'' | "\"" | "\\" ; escape_oct = "\o", digit_oct, digit_oct, digit_oct ; escape_hex = "\x", digit_hex, digit_hex ; escape_utf = "\u", quadd_hex, [ quadd_hex ] ; diff --git a/doc/spec.md b/doc/spec.md new file mode 100644 index 0000000..8b7260c --- /dev/null +++ b/doc/spec.md @@ -0,0 +1,173 @@ +# G Programming Language Specification + +## Table of Contents + +- [Lexical Elements](#lexical-elements) + - [Comments](#comments) + - [Integer Literals](#integer-literals) + - [Floating-Point Literals](#floating-point-literals) + - [Character/Rune Literals](#characterrune-literals) + - [String Literals](#string-literals) +- [Constants](#constants) + - [Constant Declarations](#constant-declarations) +- [Variables](#variables) +- [Types](#types) + - [Basic Types](#basic-types) + +## Lexical Elements + +### Comments + +There are two forms of comments in G: + +- Single-line comments begin with `//` and go to the end of the line. +- Multi-line comments begin with `/*` and end with `*/`, and may be nested. + +Comments can start anywhere other than inside a character or string literal. + +### Integer Literals + +An integer literal is a sequence of digits that represent an integer constant. +An integer literal may optionally have a prefix indicating its base: `0b` for +binary, `0o` for octal, `0d` for decimal, `0z` for duodecimal, and `0x` for +hexadecimal. Without a prefix, literals will be interpreted as decimal. + +An underscore character `_` may appear after a prefix or between digits; these +underscores are solely for readability and do not change the literal's value. + +```ebnf +literal_int = literal_bin | literal_oct | literal_dec | literal_hex ; +literal_bin = "0b", [ "_" ], digit_bin, { "_", digit_bin } ; +literal_oct = "0o", [ "_" ], digit_oct, { "_", digit_oct } ; +literal_dec = [ "0d", [ "_" ] ], digit_dec, { "_", digit_dec } ; +literal_doz = "0z", [ "_" ], digit_doz, { "_", digit_doz } ; +literal_hex = "0x", [ "_" ], digit_hex, { "_", digit_hex } ; + +digit_bin = "0" | "1" ; +digit_oct = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" ; +digit_dec = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ; +digit_doz = digit_dec | "A" | "B" ; +digit_hex = digit_dec | "A" | "B" | "C" | "D" | "E" | "F" ; +``` + +### Floating-Point Literals + +A floating-point literal consists of a integer part (decimal digits), a decimal +point, a fractional part (decimal digits), and an exponent part (`e` followed by +a sign and decimal digits) and represents a floating point constant. + +An underscore character `_` may appear between digits in each part; these +underscores are solely for readability and do not change the literal's value. + +```ebnf +literal_flt = integer_seq, ".", integer_seq, [ "e", "+" | "-", integer_seq ] ; +integer_seq = digit_dec, { "_", digit_dec } ; +``` + +### Character/Rune Literals + +A character/rune literal is a character sequence between single quotes (e.g. +`'A'`) that represents a character/rune constant. Any character may appear +within the quotes except for a newline or unescaped single quotes. A single +quoted character represents the Unicode integer value of the character. Escape +sequences starting with a backslash represent a variety of values. + +Beyond single-character escape sequences, there are also escape sequences to +represent arbitrary octal, hexadecimal, and Unicode values. These escape +sequences may have range restrictions depending on their type, i.e. octal +escapes must represent a value between `0` and `255` inclusive and Unicode +escapes must conform to Unicode rules. + +```text +\0 U+0000 nul character +\a U+0007 alert or bell +\b U+0008 backspace +\t U+0009 horizontal tab +\n U+000A line feed or newline +\v U+000B vertical tab +\f U+000C form feed +\r U+000D carriage return +\' U+0027 single quote (not necessary inside string literals) +\" U+0022 double quote (not necessary inside character literals) +\\ U+005C backslash +``` + +```ebnf +literal_chr = "'", ANY_CHARACTER_EXCEPT_NEWLINE_SQUOTE_BACKSLASH | escape, "'" ; + +escape = escape_std | escape_oct | escape_hex | escape_utf ; +escape_std = "\0" | "\a" | "\b" | "\t" | "\n" | "\v" | "\f" | "\r" | '\'' | "\"" | "\\" ; +escape_oct = "\o", digit_oct, digit_oct, digit_oct ; +escape_hex = "\x", digit_hex, digit_hex ; +escape_utf = "\u", quadd_hex, [ quadd_hex ] ; +``` + +### String Literals + +A string literal is a character sequence between double quotes (e.g. `"string"`) +that represents a string constant. Any character may appear within the quotes +except for a newline or unescaped double quotes. The same selection of escape +sequences are valid in string literals as in character/rune literals. + +```ebnf +literal_str = '"', { ANY_CHARACTER_EXCEPT_NEWLINE_DQUOTE_BACKSLASH | escape }, '"' ; +``` + +## Constants + +There are **boolean**, **integer**, **floating-point**, **character**/**rune**, +and **string** constants in G. Integer, floating-point, and character/rune +constants are collectively reffered to as **numeric** constants. + +A boolean constant is represented by the keywords `true` and `false`. Numeric +constants represent exact values of an arbitrary precision and do not overflow. + +If a constant's type is not specified, then the default type for that constant +kind will be used. The default type of a constant is `bool`, `s32`, `f64`, +`rune`, or `str` depending on whether it is a boolean, integer, floating-point, +character/rune, or string respectively. + +### Constant Declarations + +Constant declarations have a value that cannot be changed, and must be evaluated +at compile time. + +```ebnf +stmt_decl_constant = iden, ":", [ type ], ":", expr ; +``` + +## Variables + +Variable declarations are initialised to a zero value by default. + +```ebnf +stmt_decl_variable = iden, ":", [ type ], "=", expr ; +``` + +## Types + +### Basic Types + +```text +/* Booolean Types */ +bool b8 b16 b32 b64 + +/* Integer Types */ +uint u8 u16 u32 u64 u128 // unsigned +sint s8 s16 s32 s64 s128 // signed + +/* Endian Specific Integer Types */ +u16le u32le u64le u128le s16le s32le s64le s128le // little endian +u16be u32be u64be u128be s16be s32be s64be s128be // big endian + +/* Floating-Point Types */ +f16 f32 f64 f128 + +/* Endian Specific Floating-Point Types */ +f16le f32le f64le f128le // little endian +f16be f32be f64be f128be // big endian + +/* Character Types */ +char // alias of u8 +rune // alias of u32 +``` diff --git a/doc/types.md b/doc/types.md index 7ade28a..5d4c429 100644 --- a/doc/types.md +++ b/doc/types.md @@ -2,14 +2,14 @@ ## General Types -``` +```text ptr pointer type type type information meta type ``` ## Boolean Types -``` +```text b8 8-bit boolean type b16 16-bit boolean type b32 32-bit boolean type @@ -20,7 +20,7 @@ bool alias for b8 ## Integer Types -``` +```text u8 8-bit unsigned integer type u16 16-bit unsigned integer type u32 32-bit unsigned integer type @@ -43,21 +43,21 @@ rune alias for u32 representing a Unicode code point ## Floating Point Types -``` -f16 16-bit IEEE 754 floating-point type -f32 32-bit IEEE 754 floating-point type -f64 64-bit IEEE 754 floating-point type +```text +f16 16-bit IEEE 754 floating-point type +f32 32-bit IEEE 754 floating-point type +f64 64-bit IEEE 754 floating-point type f128 128-bit IEEE 754 floating-point type -c32 32-bit complex floating-point type -c64 64-bit complex floating-point type +c32 32-bit complex floating-point type +c64 64-bit complex floating-point type c128 128-bit complex floating-point type c256 256-bit complex floating-point type ``` ## Endian Specific Types -``` +```text u16le 16-bit unsigned little endian integer type u32le 32-bit unsigned little endian integer type u64le 64-bit unsigned little endian integer type @@ -91,6 +91,6 @@ f128be 128-bit big endian floating-point type ## String Types -``` +```text str string type ```