/* dylan-phrases.y -- Dylan phrase grammar */ /* * Paul Haahr * 27 July 1996 * * This grammar was produced by applying obvious transformations * to the phrase grammar from Appendix A: BNF (pages 414-424) of * the Dylan Reference Manual. * * 13 August 1996: Allowed operators to be both unary and binary, * to allow for "-" and potential future operators. Noted by * Keith Playford . * * This YACC program is in the public domain. */ /* names and related tokens */ /* * The BNF grammar from the Dylan Reference Manual categorizes * words in six ways (plus special handling of ``method''): * * unreserved-word * begin-word * function-word * define-body-word * define-list-word * core-word * * Unfortunately, the language isn't that simple, because the * categories can overlap. For example, the same word can be * both a define-body-word and a function-word. Similarly, * the define words are really ``unreserved'' words, except * following DEFINE. So, instead, I have categories covering * all possible permutations for the lexical analyzer to use, * and the parser synthesizes them into the versions actually * used in the grammar. It's ugly, but it seems to be the only * feasible approach. * * Effectively, each word has two kinds of properties, its * meaning as a kind of definition and its meaning inside * an expression. Thus, I've broken the notion of ``unreserved'' * into ``nondefining'' and ``nonexpression,'' which are * separable properties. Thus the types of word tokens * I'm using are the cartesian product of the sets: * * { nondefining, define-body, define-list } * { nonexpression, begin, function } */ %token NONDEFINING_NONEXPRESSION_WORD %token DEFINE_BODY_NONEXPRESSION_WORD %token DEFINE_LIST_NONEXPRESSION_WORD %token NONDEFINING_BEGIN_WORD %token DEFINE_BODY_BEGIN_WORD %token DEFINE_LIST_BEGIN_WORD %token NONDEFINING_FUNCTION_WORD %token DEFINE_BODY_FUNCTION_WORD %token DEFINE_LIST_FUNCTION_WORD %token BINARY_OPERATOR_ONLY %token UNARY_OPERATOR_ONLY %token UNARY_AND_BINARY_OPERATOR %token ESCAPED_WORD /* \word */ %token OPERATOR_NAME /* \operator */ %token CONSTRAINED_NAME /* core words */ %token DEFINE %token END %token HANDLER %token LET %token LOCAL %token MACRO %token OTHERWISE /* * The word ``method'' calls for special handling. According * to the default exports of the Dylan module, it should be * classified as a DEFINE_BODY_BEGIN_WORD, but, in other modules, * its classification could be different. I've handled it * by not detecting ``method'' in the lexical analyzer and * requiring a check at the semantic level. The only other * solution I could come up with required splitting every kind * of word token in two categories -- one which was spelled * ``method'' and one which wasn't; that seemed worse. * * See the non-terminal method_name for details and the comment * that precedes it for details. */ /* literals */ %token SYMBOL %token NUMBER %token CHARACTER_LITERAL %token STRING /* punctuation */ %token '(' ')' %token ',' %token '.' %token ';' %token '[' ']' %token '{' '}' %token COLON_COLON /* "::" */ %token '-' %token '=' %token EQUAL_EQUAL /* "==" */ %token EQUAL_ARROW /* "=>" */ %token HASH_PAREN /* "#(" */ %token HASH_BRACKET /* "#[" */ %token HASH_HASH /* "##" */ %token '?' %token QUERY_QUERY /* "??" */ %token QUERY_EQUAL /* "?=" */ %token ELLIPSIS /* "..." */ /* hash words */ %token HASH_T /* "#t" */ %token HASH_F /* "#f" */ %token HASH_NEXT /* "#next" */ %token HASH_REST /* "#rest" */ %token HASH_KEY /* "#key" */ %token HASH_ALL_KEYS /* "#all-keys" */ %token HASH_INCLUDE /* "#include" */ /* parsed fragments, from page 424 */ %token PARSED_DEFINITION %token PARSED_LOCAL_DECLARATION %token PARSED_FUNCTION_CALL %token PARSED_MACRO_CALL %token PARSED_LIST_CONSTANT %token PARSED_VECTOR_CONSTANT /* entry points */ %start start %token ENTRY_SOURCE_RECORD /* main entry point */ %token ENTRY_EXPRESSION /* pattern variable constraint :expression */ %token ENTRY_VARIABLE /* pattern variable constraint :variable */ %token ENTRY_NAME /* pattern variable constraint :name */ %token ENTRY_TOKEN /* pattern variable constraint :token */ %token ENTRY_BODY /* pattern variable constraint :body */ %token ENTRY_CASE_BODY /* pattern variable constraint :case-body */ %token ENTRY_MACRO /* pattern variable constraint :macro */ %token STOP /* end of current token stream */ %% /* parsing entry points */ /* * The Dylan grammar can be entered at several places, based on * how it is invoked. The top level entry point parses source * records, which are the units of programs. Each of the pattern * variable constraints (other than :*, for wildcards) can * needs to do test parsing on a token stream, so each requires * its own parser entry. (That's not entirely true, as some are * so simple that there's no need to really start up the parser * for :name or :token, and :body is no different inside the * parser for dealing with source records, but the easiest thing * to do for the grammar was provide all the entry points, and * leave up to a client of this grammar to choose which ones to * use in practice.) * * The STOP token is used to provide a portable way of indicating * that a token stream is over. STOP for source records should be * communicated based on the development environment, whereas STOP * for body or case-body comes from intermediate word detection. * * The pattern variable constraints and what syntax they accept * are listed on pages 157-158. */ start : ENTRY_SOURCE_RECORD source_record STOP | ENTRY_VARIABLE variable STOP | ENTRY_EXPRESSION expression STOP | ENTRY_NAME name STOP | ENTRY_TOKEN token STOP | ENTRY_BODY body_opt STOP | ENTRY_CASE_BODY case_body_opt STOP | ENTRY_MACRO macro STOP /* imports from lexical grammar, pages 408-413 */ token : name | SYMBOL | NUMBER | CHARACTER_LITERAL | STRING | operator | punctuation | hash_word punctuation : bracketing_punctuation | non_bracketing_punctuation bracketing_punctuation : '(' | ')' | '[' | ']' | '{' | '}' | HASH_PAREN | HASH_BRACKET non_bracketing_punctuation : ',' | '.' | ';' | COLON_COLON | '-' | '=' | EQUAL_EQUAL | EQUAL_ARROW | HASH_HASH | '?' | QUERY_QUERY | QUERY_EQUAL | ELLIPSIS core_word : non_end_core_word | END non_end_core_word : DEFINE | HANDLER | LET | LOCAL | MACRO | OTHERWISE nondefining_word : NONDEFINING_BEGIN_WORD | NONDEFINING_FUNCTION_WORD | NONDEFINING_NONEXPRESSION_WORD define_body_word : DEFINE_BODY_NONEXPRESSION_WORD | DEFINE_BODY_BEGIN_WORD | DEFINE_BODY_FUNCTION_WORD define_list_word : DEFINE_LIST_NONEXPRESSION_WORD | DEFINE_LIST_BEGIN_WORD | DEFINE_LIST_FUNCTION_WORD nonexpression_word : NONDEFINING_NONEXPRESSION_WORD | DEFINE_BODY_NONEXPRESSION_WORD | DEFINE_LIST_NONEXPRESSION_WORD begin_word : NONDEFINING_BEGIN_WORD | DEFINE_BODY_BEGIN_WORD | DEFINE_LIST_BEGIN_WORD function_word : NONDEFINING_FUNCTION_WORD | DEFINE_BODY_FUNCTION_WORD | DEFINE_LIST_FUNCTION_WORD reserved_word : core_word | NONDEFINING_FUNCTION_WORD | NONDEFINING_BEGIN_WORD | define_body_word | define_list_word unreserved_word : NONDEFINING_NONEXPRESSION_WORD name : reserved_word | unreserved_name escaped_name : ESCAPED_WORD | OPERATOR_NAME unreserved_name : unreserved_word | escaped_name nondefining_name : nondefining_word | escaped_name ordinary_name : nonexpression_word | escaped_name macro_name : nondefining_name | define_body_word | define_list_word name_not_end : macro_name | non_end_core_word hash_word : HASH_T | HASH_F | HASH_NEXT | HASH_REST | HASH_KEY | HASH_ALL_KEYS | HASH_INCLUDE operator : BINARY_OPERATOR_ONLY | UNARY_OPERATOR_ONLY | UNARY_AND_BINARY_OPERATOR unary_operator : UNARY_OPERATOR_ONLY | UNARY_AND_BINARY_OPERATOR binary_operator : BINARY_OPERATOR_ONLY | UNARY_AND_BINARY_OPERATOR /* Program Structure, page 414 */ source_record : body_opt body : constituents semicolon_opt constituents : constituent | constituents ';' constituent constituent : non_expression_constituent | expression non_expression_constituent : definition | local_declaration macro : definition_macro_call | statement | function_macro_call | PARSED_MACRO_CALL /* Property Lists, page 414 */ property_list : property | property_list ',' property property : SYMBOL value value : basic_fragment /* Fragments, pages 415-416 */ body_fragment : non_statement_body_fragment | statement non_statement_body_fragment_opt list_fragment : non_statement_list_fragment | statement non_statement_list_fragment_opt basic_fragment : non_statement_basic_fragment | statement non_statement_basic_fragment_opt non_statement_body_fragment : definition semicolon_fragment_opt | local_declaration semicolon_fragment_opt | simple_fragment body_fragment_opt | ',' body_fragment_opt | ';' body_fragment_opt semicolon_fragment : ';' body_fragment_opt non_statement_list_fragment : simple_fragment list_fragment_opt | ',' list_fragment_opt non_statement_basic_fragment : simple_fragment basic_fragment_opt simple_fragment : variable_name | constant_fragment | operator | bracketed_fragment | function_macro_call | hash_word | '.' | COLON_COLON | EQUAL_ARROW | '?' | QUERY_QUERY | QUERY_EQUAL | ELLIPSIS | HASH_HASH | OTHERWISE | PARSED_FUNCTION_CALL | PARSED_MACRO_CALL bracketed_fragment : '(' body_fragment_opt ')' | '[' body_fragment_opt ']' | '{' body_fragment_opt '}' constant_fragment : NUMBER | CHARACTER_LITERAL | STRING | SYMBOL | HASH_PAREN constants '.' constant ')' | HASH_PAREN constants_opt ')' | HASH_BRACKET constants_opt ']' | PARSED_LIST_CONSTANT | PARSED_VECTOR_CONSTANT /* Definitions, page 416 */ definition : definition_macro_call | DEFINE MACRO macro_definition | PARSED_DEFINITION definition_macro_call : DEFINE modifiers_opt define_body_word body_fragment_opt definition_tail | DEFINE modifiers_opt define_list_word list_fragment_opt modifier : nondefining_name modifiers : modifier | modifiers modifier definition_tail : END | END macro_name | END define_body_word macro_name /* Local Declarations, page 417 */ local_declaration : LET bindings | LET HANDLER condition '=' handler | LOCAL local_methods | PARSED_LOCAL_DECLARATION condition : unparenthesized_operand | '(' operand ')' operand_tails | '(' operand ',' property_list ')' handler : expression local_methods : method_definition | local_methods ',' method_definition bindings : variable '=' expression | '(' variable_list ')' '=' expression variable_list : variables | variables ',' HASH_REST variable_name | HASH_REST variable_name variables : variable | variables ',' variable variable : variable_name | variable_name COLON_COLON operand variable_name : ordinary_name /* Expression, page 418-419 */ expressions : expression | expressions ',' expression expression : binary_operand | expression binary_operator binary_operand expression_no_symbol : binary_operand_no_symbol | binary_operand_no_symbol binary_operator binary_operand unparenthesized_expression : unparenthesized_binary_operand | unparenthesized_expression binary_operator binary_operand binary_operand_no_symbol : unary_operator_opt operand binary_operand : SYMBOL | binary_operand_no_symbol unparenthesized_binary_operand : SYMBOL | unparenthesized_operand | unary_operator operand operand : operand operand_tail | leaf unparenthesized_operand : unparenthesized_operand operand_tail | unparenthesized_leaf operand_tail : '(' arguments_opt ')' | '[' arguments_opt ']' | '.' variable_name operand_tails : | operand_tails operand_tail function_macro_call : function_word '(' body_fragment_opt ')' leaf : '(' expression ')' | unparenthesized_leaf unparenthesized_leaf : literal | variable_name | function_macro_call | statement | PARSED_FUNCTION_CALL | PARSED_MACRO_CALL arguments : argument | arguments ',' argument argument : SYMBOL expression | expression_no_symbol | SYMBOL literal : NUMBER | CHARACTER_LITERAL | string_literal | HASH_T | HASH_F | HASH_PAREN constants '.' constant ')' | HASH_PAREN constants_opt ')' | HASH_BRACKET constants_opt ']' | PARSED_LIST_CONSTANT | PARSED_VECTOR_CONSTANT string_literal : STRING | string_literal STRING constants : constant | constants ',' constant constant : literal | SYMBOL /* Statements, page 419 */ statement : begin_word body_fragment_opt end_clause end_clause : END BEGIN_WORD_opt /* * The parsing of case bodies is changed in structure * from the official BNF grammar, because that version * leads to inherent shift/reduce conflicts between * the expression used to introduce a case and the * constituents of the case body. This grammar should * accept exactly the same set of input sequences, just * parse LALR(1). (I believe the grammar as presented * is probably LALR(2), but don't ask me to prove it.) */ case_body : case_label case_constiuents case_label : unparenthesized_expression EQUAL_ARROW | unparenthesized_expression ',' expressions EQUAL_ARROW | '(' expression ')' operand_tails EQUAL_ARROW | '(' expression ')' operand_tails ',' expressions EQUAL_ARROW | '(' expression ',' expressions ')' EQUAL_ARROW | OTHERWISE EQUAL_ARROW_opt case_constiuents : case_tail | '(' expression ')' case_tail | unparenthesized_expression case_tail | non_expression_constituent case_tail case_tail : | ';' case_constiuents | ';' case_label case_constiuents /* Methods, pages 420-421 */ method_definition : method_name parameter_list body_opt END method_name_opt /* * The hack for dealing with the word ``method'' is in * the production for method_name. If the optional * variable_name is present, it is the name of the * local method and the macro_name must be the word * ``method.'' If the variable_name is not present, * the name of the local method can be found in the * macro_name; it must be checked that it isn't a * core word, etc. I'm not sure that the syntactic * category macro_name is the best possible, but it * is workable. * * I feel unclean. */ method_name : macro_name variable_name_opt parameter_list : '(' parameters_opt ')' semicolon_opt | '(' parameters_opt ')' EQUAL_ARROW variable ';' | '(' parameters_opt ')' EQUAL_ARROW '(' values_list_opt ')' semicolon_opt parameters : required_parameters | required_parameters ',' next_rest_key_parameter_list | next_rest_key_parameter_list next_rest_key_parameter_list : HASH_NEXT variable_name | HASH_NEXT variable_name ',' rest_key_parameter_list | rest_key_parameter_list rest_key_parameter_list : HASH_REST variable_name | HASH_REST variable_name ',' key_parameter_list | key_parameter_list key_parameter_list : HASH_KEY | HASH_KEY ',' HASH_ALL_KEYS | HASH_KEY keyword_parameters | HASH_KEY keyword_parameters ',' HASH_ALL_KEYS required_parameters : required_parameter | required_parameters ',' required_parameter required_parameter : variable | variable_name EQUAL_EQUAL expression keyword_parameters : keyword_parameter | keyword_parameters ',' keyword_parameter keyword_parameter : SYMBOL_opt variable default_opt default : '=' expression values_list : variables | variables ',' HASH_REST variable | HASH_REST variable /* Macro Definitions, page 421 */ macro_definition : macro_name main_rule_set aux_rule_sets_opt END MACRO_opt macro_name_opt main_rule_set : body_style_definition_rules | list_style_definition_rules | statement_rules | function_rules body_style_definition_rules : body_style_definition_rule | body_style_definition_rules body_style_definition_rule list_style_definition_rules : list_style_definition_rule | list_style_definition_rules list_style_definition_rule statement_rules : statement_rule | statement_rules statement_rule function_rules : function_rule | function_rules function_rule /* * The following two rules are altered in substance from * what appears in the BNF grammar, because that's inherently * ambiguous in this area. The original rules follow the * word DEFINE with (roughly) * * definition_head_opt macro_name pattern_opt * * but there's no way a stupid parser can tell which word * in a sequence corresponds to the macro_name, unless some * extra context-sensitive information is introduced. Rather * than mandating such odd behavior, I've simply decided to * offload that work to the semantic analysis phase, and * to leave these rules as simple as possible. */ body_style_definition_rule : '{' DEFINE pattern semicolon_opt END '}' EQUAL_ARROW rhs list_style_definition_rule : '{' DEFINE pattern '}' EQUAL_ARROW rhs rhs : '{' template_opt '}' semicolon_opt statement_rule : '{' macro_name maybe_pattern_and_semicolon END '}' EQUAL_ARROW rhs function_rule : '{' macro_name '(' pattern_opt ')' '}' EQUAL_ARROW rhs /* definition_head : modifier_pattern | definition_head modifier_pattern */ /* modifier_pattern : modifier | pattern_variable */ /* Patterns, pages 421-423 */ maybe_pattern_and_semicolon : pattern semicolon_opt | semicolon_opt pattern : pattern_list | pattern ';' pattern_list pattern_list : pattern_sequence | pattern_sequence ',' pattern_list | property_list_pattern pattern_sequence : simple_pattern | pattern_sequence simple_pattern simple_pattern : name_not_end | EQUAL_ARROW | bracketed_pattern | binding_pattern | pattern_variable bracketed_pattern : '(' pattern_opt ')' | '[' pattern_opt ']' | '{' pattern_opt '}' binding_pattern : pattern_variable COLON_COLON pattern_variable | pattern_variable '=' pattern_variable | pattern_variable COLON_COLON pattern_variable '=' pattern_variable pattern_variable : '?' name | '?' CONSTRAINED_NAME | ELLIPSIS property_list_pattern : HASH_REST pattern_variable | HASH_KEY pattern_keywords_opt | HASH_REST pattern_variable ',' HASH_KEY pattern_keywords_opt pattern_keywords : HASH_ALL_KEYS | pattern_keyword | pattern_keyword ',' pattern_keywords pattern_keyword : '?' name default_opt | '?' CONSTRAINED_NAME default_opt | QUERY_QUERY name default_opt | QUERY_QUERY CONSTRAINED_NAME default_opt /* Templates, pages 423-424 */ template : template_element | template template_element template_element : name | SYMBOL | NUMBER | CHARACTER_LITERAL | STRING | UNARY_OPERATOR_ONLY | separator | hash_word | '.' | COLON_COLON | EQUAL_ARROW | '(' template_opt ')' | '[' template_opt ']' | '{' template_opt '}' | HASH_PAREN template_opt ')' | HASH_BRACKET template_opt ']' | PARSED_LIST_CONSTANT | PARSED_VECTOR_CONSTANT | substitution separator : ';' | ',' | binary_operator /* idle thought: what about moving '.' to here? */ substitution : name_prefix_opt '?' name_string_or_symbol name_suffix_opt | QUERY_QUERY name separator_opt ELLIPSIS | ELLIPSIS | QUERY_EQUAL name name_prefix : STRING HASH_HASH name_suffix : HASH_HASH STRING name_string_or_symbol : name | STRING | SYMBOL /* Auxiliary Rule Sets, page 424 */ aux_rule_sets : aux_rule_set | aux_rule_sets aux_rule_set aux_rule_set : SYMBOL aux_rules aux_rules : aux_rule | aux_rules aux_rule aux_rule : '{' pattern_opt '}' EQUAL_ARROW rhs /* optional items -- yacc should be able to handle this automatically */ BEGIN_WORD_opt : | begin_word EQUAL_ARROW_opt : | EQUAL_ARROW MACRO_opt : | MACRO macro_name_opt : | macro_name SYMBOL_opt : | SYMBOL unary_operator_opt : | unary_operator arguments_opt : | arguments aux_rule_sets_opt : | aux_rule_sets basic_fragment_opt : | basic_fragment body_opt : | body body_fragment_opt : | body_fragment case_body_opt : | case_body constants_opt : | constants default_opt : | default list_fragment_opt : | list_fragment method_name_opt : | method_name modifiers_opt : | modifiers name_prefix_opt : | name_prefix name_suffix_opt : | name_suffix non_statement_basic_fragment_opt : | non_statement_basic_fragment non_statement_body_fragment_opt : | non_statement_body_fragment non_statement_list_fragment_opt : | non_statement_list_fragment parameters_opt : | parameters pattern_opt : | pattern pattern_keywords_opt : | pattern_keywords semicolon_opt : | ';' semicolon_fragment_opt : | semicolon_fragment separator_opt : | separator template_opt : | template values_list_opt : | values_list variable_name_opt : | variable_name