2 * Copyright (C) 2005 Thadeu Lima de Souza Cascardo <cascardo@holoscopio.com>
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
28 GHashTable* terminals;
33 first_set_t* first_set_new ()
35 first_set_t* first_set;
36 first_set = g_malloc (sizeof (first_set_t));
37 first_set->has_empty = FALSE;
38 first_set->terminals = g_hash_table_new_full (symbol_hash, symbol_equal,
40 first_set->rules = NULL;
41 first_set->recursive = NULL;
45 void first_set_delete (first_set_t* first_set)
48 g_hash_table_destroy (first_set->terminals);
52 rule_delete (l->data);
55 g_list_free (first_set->rules);
56 l = first_set->recursive;
59 rule_delete (l->data);
62 g_list_free (first_set->recursive);
66 void first_set_insert_terminal (first_set_t* first_set, symbol_t* symbol)
68 g_hash_table_insert (first_set->terminals, symbol, NULL);
71 void first_set_insert (gpointer key, gpointer val, gpointer data)
73 g_hash_table_insert (data, symbol_copy (key), NULL);
76 GHashTable* first_set_union (GHashTable* a, GHashTable* b)
78 g_hash_table_foreach (b, first_set_insert, a);
82 void first_advance_recursive (first_set_t* first_set, symbol_t* symbol)
86 rules = first_set->recursive;
89 * For each recursive rule, remove recursiveness and insert any new
90 * rule to list of non-recursive rules.
98 symbol_t* first_symbol;
101 symbols = grammar_get_rule (rule);
103 assert (symbols != NULL);
105 first_symbol = symbols->data;
107 assert (first_symbol->value == symbol->value);
109 /* Remove any leading symbols equal to the recursive nonterminal */
110 while (first_symbol->value == symbol->value)
112 first_symbol = rule_pop (rule);
114 /* If no symbol remains after removing leading recursive
115 * nonterminal, it may produce the empty string */
116 if (first_symbol == NULL)
118 first_set->has_empty = TRUE;
122 /* If after removing leading recursive nonterminal, there
123 * is a terminal, it is in the first set of the nonterminal */
124 else if (first_symbol->terminal)
126 first_set_insert_terminal (first_set,
127 symbol_copy (first_symbol));
132 * If after removing leading recursive nonterminal, there
133 * is a nonterminal, the first of the remaining sequence
134 * of symbols is in the first set of the recursive
137 else if (first_symbol->value != symbol->value)
139 first_set->rules = g_list_append (first_set->rules, rule);
144 rules = g_list_next (rules);
148 g_list_free (first_set->recursive);
149 first_set->recursive = NULL;
153 void first_prepare (gpointer key, gpointer val, gpointer data)
160 first_set_t* first_set;
162 symbol = (symbol_t*) key;
163 rules = *(GList**) val;
164 first = (GHashTable*) data;
166 assert (symbol->terminal == FALSE);
168 /* For each nonterminal in the grammar, there is a first set */
169 first_set = first_set_new ();
170 g_hash_table_insert (first, symbol_copy (symbol), first_set);
173 * For each rule for the nonterminal in the grammar, it will either
174 * produce the empty string directly, start with a terminal that will
175 * be included in the first set, start with a different nonterminal or
176 * be a recursive rule.
178 while (rules != NULL)
183 symbol_t* first_symbol;
186 symbols = grammar_get_rule (rule);
190 first_set->has_empty = TRUE;
194 first_symbol = symbols->data;
195 if (first_symbol->terminal)
197 first_set_insert_terminal (first_set,
198 symbol_copy (first_symbol));
200 else if (first_symbol->value == symbol->value)
202 first_set->recursive = g_list_prepend (first_set->recursive,
207 first_set->rules = g_list_prepend (first_set->rules,
212 rules = g_list_next (rules);
217 * If there any no non-recursive rules starting with a nonterminal
218 * recursiveness may be easily eliminated.
220 if (first_set->rules == NULL && first_set->recursive != NULL)
223 * If the nonterminal may produce the empty string, each recursive
224 * rule will lead to a new rule with the leading recursive nonterminal
227 if (first_set->has_empty)
229 first_advance_recursive (first_set, symbol);
231 /* If it does not produce the empty string, every recursive rule
232 * may be simply removed. All terminals in first set are known at
238 l = first_set->recursive;
241 rule_delete (l->data);
244 g_list_free (first_set->recursive);
245 first_set->recursive = NULL;
247 assert (first_set->recursive == NULL);
252 void first_iterate (gpointer key, gpointer val, gpointer data)
256 first_set_t* first_set;
262 symbol = (symbol_t*) key;
263 first_set = (first_set_t*) val;
264 first = (GHashTable*) data;
266 assert (symbol->terminal == FALSE);
268 /* If there are no non-recursive rules starting with a
269 * nonterminal, there's not much to do. Only remove all
270 * the recursive rules or advance them as described in
273 if (first_set->rules == NULL)
275 if (first_set->recursive != NULL)
277 if (first_set->has_empty)
279 first_advance_recursive (first_set, symbol);
284 l = first_set->recursive;
287 rule_delete (l->data);
290 g_list_free (first_set->recursive);
291 first_set->recursive = NULL;
293 assert (first_set->recursive == NULL);
300 rules = first_set->rules;
304 * For each rule, try to eliminate it by checking the first set of
305 * the nonterminal with starts it. Rules may have been added by
306 * previous iterations or the advancement of recursive rules.
307 * So, trivial checks like empty rules and rules starting with
308 * terminals should be checked too.
310 while (rules != NULL)
315 symbol_t* first_symbol;
318 symbols = grammar_get_rule (rule);
320 /* If it is an empty rule, nonterminal may produce the empty
324 first_set->has_empty = TRUE;
330 first_set_t* next_set;
332 first_symbol = symbols->data;
334 /* If it starts with a terminal, include it in the first
335 * set and remove the rule. */
336 if (first_symbol->terminal)
338 first_set_insert_terminal (first_set,
339 symbol_copy (first_symbol));
342 /* Get the first set of the nonterminal which starts the rule */
343 else if (g_hash_table_lookup_extended (first, first_symbol, NULL,
344 (gpointer*) &next_set))
347 * If nonterminal produces the empty string, the first set
348 * of the sequence of symbols next to it is in the first
349 * set of the nonterminal we are treating. Add a new rule
350 * corresponding to this sequence of symbols. This is the
351 * rule which says: if there is a production A -> y1 y2 yk
352 * and the empty string is in the first of y1, then first
353 * of y2 yk is in first of A.
355 if (next_set->has_empty)
358 new_rule = rule_copy (rule);
360 new_rules = g_list_prepend (new_rules, new_rule);
362 /* The terminals in the first of the leading nonterminal in
363 * the rule are in the first of our current nonterminal */
364 first_set_union (first_set->terminals,
365 next_set->terminals);
367 * If there are other rules starting with a nonterminal
368 * for this nonterminal, they should be copied to the
369 * current nonterminal, so we can detect and treat indirect
370 * recursiveness properly.
372 if (next_set->rules != NULL)
374 /* FIXME: not much of an advance here, watch for
375 * this in a possible unstopabble loop */
376 /* FIXME: copy recursive rules too, with care */
378 next_rules = next_set->rules;
379 while (next_rules != NULL)
381 rule_t* next_rule = rule_copy (next_rules->data);
383 next_symbols = grammar_get_rule (next_rule);
385 * If the rule is empty, both terminals produce the
386 * empty set. This will be detected for the other
387 * nonterminal later. Optimization could be done
390 if (next_symbols == NULL)
392 first_set->has_empty = TRUE;
393 rule_delete (next_rule);
397 symbol_t* next_symbol;
398 next_symbol = next_symbols->data;
400 * This rule starts with a terminal. Ignore it.
401 * We could add it as an optimization. TODO
403 if (next_symbol->terminal)
405 rule_delete (next_rule);
407 /* This is an indirect recursive rule. */
408 else if (next_symbol->value == symbol->value)
410 first_set->recursive =
411 g_list_prepend (first_set->recursive,
414 /* Next time we treat current nonterminal,
415 * we will check this rule directly. */
418 new_rules = g_list_prepend (new_rules, next_rule);
421 next_rules = g_list_next (next_rules);
425 * If nonterminal has recursive rules, we should expect
426 * more rules to add here and so we will check it again
427 * in the next iteration. Keep the rule. If there is only
428 * indirect recursiveness, we will detect it through the
429 * rules we have added before. We should remove it in this
430 * case so we make advance.
432 if (next_set->recursive != NULL)
434 new_rules = g_list_prepend (new_rules, rule);
448 rules = g_list_next (rules);
452 g_list_free (first_set->rules);
453 first_set->rules = new_rules;
459 void first_check (gpointer key, gpointer val, gpointer data)
463 first_set_t* first_set;
466 symbol = (symbol_t*) key;
467 first_set = (first_set_t*) val;
468 stop = (gboolean*) data;
470 assert (symbol->terminal == FALSE);
472 /* If there is anything besides terminals in the first set,
473 * we must iterate more and remove all of them. */
474 if (first_set->rules != NULL || first_set->recursive != NULL)
482 * Calculate the first set for every nonterminal symbol in the grammar.
483 * We should iterate through the rules for each nonterminal until only
484 * terminals are known to be in the first set of it.
486 GHashTable* grammar_first (grammar_t* grammar)
490 first = g_hash_table_new_full (symbol_hash, symbol_equal,
493 g_hash_table_foreach (grammar->grammar, first_prepare, first);
496 g_hash_table_foreach (first, first_iterate, first);
498 g_hash_table_foreach (first, first_check, &stop);
499 } while (stop == FALSE);
503 static void put_key_on_list (gpointer key, gpointer val, gpointer data)
507 *l = g_list_prepend (*l, symbol_copy (key));
510 GList* first_get (GHashTable* first, symbol_t* symbol)
513 first_set_t* first_set;
516 if (g_hash_table_lookup_extended (first, symbol, NULL,
517 (gpointer*) &first_set))
519 if (first_set->has_empty)
520 l = g_list_prepend (l, symbol_new (TRUE, 0));
521 g_hash_table_foreach (first_set->terminals, put_key_on_list, &l);
527 GList* first_rule (GHashTable* first, rule_t* rule)
530 GHashTable* terminals;
536 symbols = grammar_get_rule (rule);
538 terminals = g_hash_table_new_full (symbol_hash, symbol_equal, g_free, NULL);
540 while (symbols != NULL)
542 first_set_t* first_set;
544 symbol = (symbol_t*) symbols->data;
545 if (symbol->terminal)
547 g_hash_table_insert (terminals, symbol_copy (symbol), NULL);
550 if (!g_hash_table_lookup_extended (first, symbol, NULL,
551 (gpointer*) &first_set))
553 g_hash_table_destroy (terminals);
556 first_set_union (terminals, first_set->terminals);
557 if (first_set->has_empty == FALSE)
559 symbols = g_list_next (symbols);
563 l = g_list_prepend (l, symbol_new (TRUE, 0));
564 g_hash_table_foreach (terminals, put_key_on_list, &l);
566 g_hash_table_destroy (terminals);
572 void first_print_symbol (gpointer key, gpointer val, gpointer data)
575 symbol = (symbol_t*) key;
576 fprintf (stdout, "%s\n", g_quark_to_string (symbol->value));
579 void first_print_set (gpointer key, gpointer val, gpointer data)
582 first_set_t* first_set;
583 symbol = (symbol_t*) key;
584 first_set = (first_set_t*) val;
585 fprintf (stdout, "FIRST of %s:\n", g_quark_to_string (symbol->value));
586 if (first_set->has_empty)
587 fprintf (stdout, "empty symbol\n");
588 g_hash_table_foreach (first_set->terminals, first_print_symbol, NULL);
591 void first_print (GHashTable* first)
593 g_hash_table_foreach (first, first_print_set, NULL);