Back Contents Next

16. Formal Syntax

This section covers various corners of the language which weren't explicitly stated.

16.1 Lexical Structure

The lexical structure of Opal can be described using an augmented form of regular expressions. Note that it is not possible to describe this structure with regular expressions alone. However, the exception is one which is easily added to a scanner based on an FSA by the use of a counter variable.

The lexical structure is specified by a set of rules. Each rule begins with the state or states in which it is to be used. If no state is specified it is assumed to be used only from the start state. States are written in bold inside angle brackets. If more than one state is used, the states are separated by commas.

The second part of each rule is a regular expression which may match the input. If it matches the input then the action associated with the rule is carried out. If multiple regular expressions match the longer match is used. Also, if a single expression could match in multiple ways, the longest match is used. When two expressions match the same number of characters the one listed first is used. Whenever an expression is matched, the matched characters are appended to the value buffer. A regular expression may contain:

In addition special names may be defined for certain expressions before the rules. This is done by assigning the names to regular expressions. These may then be used in place of the regular expressions they name. Regular expression names are written in italics

The third part of a rule is the action to be carried out. Actions may be of three forms. They are:

Multiple actions may be separated by semicolons. Actions are listed to the right of the regular expression in curly braces.

In Opal the character stream is broken into tokens. Their are six types of tokens. They are keywords, identifiers, operators, whitespace, literals and comments. Whitespace and comments are discarded after being recognized and before the token stream is given to the parser.

The named regular expressions are:

line-terminator = \r | \n | \r\n
input-character = [^\r\n]
whitespace = line-terminator | [ \t\f]

end-of-line-commment = "//" input-character* line-terminator
start-comment = "/*"
end-comment = \*+ "/"
comment-content = ( [^*/] | \*+[^*/] | "/"+[^*/])*

identfier = [a-zA-Z_] [a-zA-Z_0-9]* ("?" | "!")?

dec-literal = 0 | dec-num
dec-num = [1-9][0-9]*
hex-literal = "0x" 0* hex-digit{1,16}
hex-digit = [0-9a-fA-F]

float-literal = f-lit1 | f-lit2 | f-lit3
f-lit1 = dec-literal frac-part exponenet?
f-lit2 = frac-part exponent?
f-lit3 = dec-literal exponent
frac-part = \. [0-9]+
exponent = e [+\-]? dec-num

string-lit-char = [^\r\n\"\\]
char-lit-char = [^\r\n\'\\]

The start state is start. In addition to the normal actions the following actions are used:

The rules are:

States: Regular Expressions: Actions
// Keywords
"abstract" { ->keyword }
"break" { ->keyword }
"case" { ->keyword }
"catch" { ->keyword }
"class" { ->keyword }
"const" { ->keyword }
"continue" { ->keyword }
"create" { ->keyword }
"default" { ->keyword }
"destroy" { ->keyword }
"do" { ->keyword }
"else" { ->keyword }
"for" { ->keyword }
"final" { ->keyword }
"if" { ->keyword }
"import" { ->keyword }
"interface" { ->keyword }
"mutable" { ->keyword }
"operator" { ->keyword }
"outer" { ->keyword }
"personal" { ->keyword }
"public" { ->keyword }
"private" { ->keyword }
"protected" { ->keyword }
"return" { ->keyword }
"self" { ->keyword }
"scope" { ->keyword }
"super" { ->keyword }
"switch" { ->keyword }
"this" { ->keyword }
"throw" { ->keyword }
"throws" { ->keyword }
"try" { ->keyword }
"while" { ->keyword }
 
// Reserved Words
"new" { ->reserved }
"delete" { ->reserved }
"resize" { ->reserved }
"dim" { ->reserved }
"sizeof" { ->reserved }
"deprecated" { ->reserved }
"inner" { ->reserved }
"get" { ->reserved }
"set" { ->reserved }
"signal" { ->reserved }
"signals" { ->reserved }
"receive" { ->reserved }
"interrupt" { ->reserved }
 
// Separators
"(" { ->operator }
")" { ->operator }
"{" { ->operator }
"}" { ->operator }
"[" { ->operator }
"]" { ->operator }
"|" { ->operator }
";" { ->operator }
":" { ->operator }
"," { ->operator }
"." { ->operator }
".." { ->operator }
 
// Operators
"==" { ->operator }
"<" { ->operator }
">" { ->operator }
"<=" { ->operator }
">=" { ->operator }
"!=" { ->operator }
"!" { ->operator }
"bit_and" { ->operator }
"bit_or" { ->operator }
"complement" { ->operator }
"+" { ->operator }
"++" { ->operator }
"-" { ->operator }
"--" { ->operator }
"*" { ->operator }
"/" { ->operator }
"%" { ->operator }
"^" { ->operator }
"<<" { ->operator }
">>" { ->operator }
"<-" { ->operator }
"->" { ->operator }
"=" { ->operator }
"+=" { ->operator }
"-=" { ->operator }
"*=" { ->operator }
"/=" { ->operator }
"%=" { ->operator }
"shift_left=" { ->operator }
"shift_right=" { ->operator }
"bit_and=" { ->operator }
"bit_or=" { ->operator }
"bit_xor=" { ->operator }
"and" { ->operator }
"or" { ->operator }
"xor" { ->operator }
 
// Boolean Literals
"true" { ->literal }
"false" { ->literal }
 
// String Literal
\" { state=string }
\" \\U { state=string }
\" \\S { state=string }
 
// Character Literal
\' { state=char-lit }
\' \\U { state=char-lit }
\' \\S { state=char-lit }
 
// Numeric Literals Literal
dec-literal { ->literal }
hex-literal { ->literal }
float-literal { ->literal }
 
// Comments
end-of-line-comment { ->comment }
start-comment { state=comment }
 
// Whitespace
whitespace { ->whitespace }
 
// Identifiers
identifiers { ->identifier }
 
// Char and string literal states
<char-lit, string-lit> "\\0"
<char-lit, string-lit> "\\t"
<char-lit, string-lit> "\\n"
<char-lit, string-lit> "\\f"
<char-lit, string-lit> "\\r"
<char-lit, string-lit> "\\b"
<char-lit, string-lit> "\\\""
<char-lit, string-lit> "\\\'"
<char-lit, string-lit> "\\\\"
<char-lit, string-lit> "\\x" hex-digit {2}
<char-lit, string-lit> "\\u" hex-digit {4}
<char-lit, string-lit> \\ . { error }
<char-lit, string-lit> line-terminator { error }
 
<char-lit> char-lit-char
<char-lit> \' { ->literal }
 
<string-lit> string-lit-char
<string-lit> \" { ->literal }
 
// Comment state
<comment> "/"* start-of-comment { ++commentDepth }
<comment> comment-content
<comment> end-of-comment { --commentDepth; if(commentDepth==0)-> comment }

16.2 Syntactic Structure

This section formally describes the grammar of Opal using EBNF.

α-list::= α (, α)*

The grammar:

compilation-unit::= module
| object-content

module::= access-modifier identifier object-body

access-modifier::= public
| scope
| scope ( identifier-list )
| protected
| private
| personal
| ε

identifier::= id (. id)*

object-body::= { object-content }
| { }

object-content::= (named-entity | statement)*

named-entity::=  access-modifier basic-modifier* id parents? entity-value
| access-modifier basic-modifier* id : type (= expression)? ;
| access-modifier basic-modifier* class id pattern-params? parents? object-body
| access-modifier basic-modifier* interface id parents? object-body
| access-modifier basic-modifier* operator (infix-op | prefix-op) pattern-body
| destroy object-body

basic-modifier::= const | final | mutable | abstract

parents::= : object-type (: object-type-list)?
| :: object-type-list

type::= object-type | pattern-type

object-type::= basic-modifier* identifier

param-type::= type | this

pattern-type::= ( param-type-list? ) -> return-type throws-clause

throws-clause::= (throws type-list)?

pattern-body::= pattern-params -> return-type throws-clause object-body

pattern-param::= access-modifier basic-modifier* id? : type (= expression)? | this

pattern-params::= ( pattern-param-list? )

return-type::= type
| self

entity-value::= object-body
| pattern-body

statement::= if-stmt
| while-stmt
| do-while-stmt
| for-stmt
| switch-stmt
| try-stmt
| import-stmt
| block-stmt
| expression;
| throw expression ;
| return expression? ;
| break;
| continue;

if-stmt::= if(decls? expression) statement
| if(decls? expression) statement else statement

while-stmt::= while(decls? expression) statement

do-while-stmt::= do statement while(expression);
| do ( decls ) statement while(expression);

for-stmt::= for(decls? (expression ;)? expression?; expression?) statement

switch-stmt::= switch(decls? expression) {   switch-stmt-group-list }

switch-stmt-group::= switch-label object-content

switch-label::= case( expression ) | default

try-stmt::= try statement catch-clauses

catch-clauses::= catch-clauses catch-clause
| catch-clause

catch-clause::= catch ( id : type ) statement

import-stmt::= import identifier;
| import identifier (.*)+ ;
| import identifier.**;

block-stmt::= object-body

decls::= named-entity+

expression::= expression . id
| expression infix-op expression
| prefix-op expression
| expression ( expression-list )
| expression [ expression ]
| ( expression )
| : type = expression
| literal
| id
| self
| this
| outer
| super

literal::= literal
| array-literal
| object-literal
| pattern-literal

array-literal::= [ expression-list ]

object-literal::= (parents | :) object-body

pattern-literal::= (parents | :) pattern-body

infix-op::= , | .. | and | or | xor
| == | < | > | <= | >= | != | & | | | ^
| + | - | * | / | % | << | >> | <- | ->
| = | += | -= | *= | /= | %= | &= | |= | ^= | <<= | >>=

prefix-op::= ++ | + | -- | - | ! | ~


Back Contents Next

jwalker@cs.oberlin.edu