--- /dev/null
+#include <grammar.h>
+#include <stdio.h>
+#include <assert.h>
+
+typedef struct
+{
+ gboolean has_empty;
+ GHashTable* terminals;
+ GList* rules;
+ GList* recursive;
+} first_set_t;
+
+first_set_t* first_set_new ()
+{
+ first_set_t* first_set;
+ first_set = g_malloc (sizeof (first_set_t));
+ first_set->has_empty = FALSE;
+ first_set->terminals = g_hash_table_new_full (symbol_hash, symbol_equal,
+ g_free, NULL);
+ first_set->rules = NULL;
+ first_set->recursive = NULL;
+ return first_set;
+}
+
+void first_set_delete (first_set_t* first_set)
+{
+ GList* l;
+ g_hash_table_destroy (first_set->terminals);
+ l = first_set->rules;
+ while (l != NULL)
+ {
+ rule_delete (l->data);
+ l = g_list_next (l);
+ }
+ g_list_free (first_set->rules);
+ l = first_set->recursive;
+ while (l != NULL)
+ {
+ rule_delete (l->data);
+ l = g_list_next (l);
+ }
+ g_list_free (first_set->recursive);
+ g_free (first_set);
+}
+
+void first_set_insert_terminal (first_set_t* first_set, symbol_t* symbol)
+{
+ g_hash_table_insert (first_set->terminals, symbol, NULL);
+}
+
+void first_set_insert (gpointer key, gpointer val, gpointer data)
+{
+ g_hash_table_insert (data, symbol_copy (key), NULL);
+}
+
+GHashTable* first_set_union (GHashTable* a, GHashTable* b)
+{
+ g_hash_table_foreach (b, first_set_insert, a);
+ return a;
+}
+
+void first_advance_recursive (first_set_t* first_set, symbol_t* symbol)
+{
+
+ GList* rules;
+ rules = first_set->recursive;
+
+ /*
+ * For each recursive rule, remove recursiveness and insert any new
+ * rule to list of non-recursive rules.
+ */
+
+ while (rules != NULL)
+ {
+
+ rule_t* rule;
+ GList* symbols;
+ symbol_t* first_symbol;
+
+ rule = rules->data;
+ symbols = grammar_get_rule (rule);
+
+ assert (symbols != NULL);
+
+ first_symbol = symbols->data;
+
+ assert (first_symbol->value == symbol->value);
+
+ /* Remove any leading symbols equal to the recursive nonterminal */
+ while (first_symbol->value == symbol->value)
+ {
+ first_symbol = rule_pop (rule);
+
+ /* If no symbol remains after removing leading recursive
+ * nonterminal, it may produce the empty string */
+ if (first_symbol == NULL)
+ {
+ first_set->has_empty = TRUE;
+ rule_delete (rule);
+ break;
+ }
+ /* If after removing leading recursive nonterminal, there
+ * is a terminal, it is in the first set of the nonterminal */
+ else if (first_symbol->terminal)
+ {
+ first_set_insert_terminal (first_set,
+ symbol_copy (first_symbol));
+ rule_delete (rule);
+ break;
+ }
+ /*
+ * If after removing leading recursive nonterminal, there
+ * is a nonterminal, the first of the remaining sequence
+ * of symbols is in the first set of the recursive
+ * nonterminal
+ */
+ else if (first_symbol->value != symbol->value)
+ {
+ first_set->rules = g_list_append (first_set->rules, rule);
+ break;
+ }
+ }
+
+ rules = g_list_next (rules);
+
+ }
+
+ g_list_free (first_set->recursive);
+ first_set->recursive = NULL;
+
+}
+
+void first_prepare (gpointer key, gpointer val, gpointer data)
+{
+
+ symbol_t* symbol;
+ GList* rules;
+ GHashTable* first;
+
+ first_set_t* first_set;
+
+ symbol = (symbol_t*) key;
+ rules = *(GList**) val;
+ first = (GHashTable*) data;
+
+ assert (symbol->terminal == FALSE);
+
+ /* For each nonterminal in the grammar, there is a first set */
+ first_set = first_set_new ();
+ g_hash_table_insert (first, symbol_copy (symbol), first_set);
+
+ /*
+ * For each rule for the nonterminal in the grammar, it will either
+ * produce the empty string directly, start with a terminal that will
+ * be included in the first set, start with a different nonterminal or
+ * be a recursive rule.
+ */
+ while (rules != NULL)
+ {
+
+ rule_t* rule;
+ GList* symbols;
+ symbol_t* first_symbol;
+
+ rule = rules->data;
+ symbols = grammar_get_rule (rule);
+
+ if (symbols == NULL)
+ {
+ first_set->has_empty = TRUE;
+ }
+ else
+ {
+ first_symbol = symbols->data;
+ if (first_symbol->terminal)
+ {
+ first_set_insert_terminal (first_set,
+ symbol_copy (first_symbol));
+ }
+ else if (first_symbol->value == symbol->value)
+ {
+ first_set->recursive = g_list_prepend (first_set->recursive,
+ rule_copy (rule));
+ }
+ else
+ {
+ first_set->rules = g_list_prepend (first_set->rules,
+ rule_copy (rule));
+ }
+ }
+
+ rules = g_list_next (rules);
+
+ }
+
+ /*
+ * If there any no non-recursive rules starting with a nonterminal
+ * recursiveness may be easily eliminated.
+ */
+ if (first_set->rules == NULL && first_set->recursive != NULL)
+ {
+ /*
+ * If the nonterminal may produce the empty string, each recursive
+ * rule will lead to a new rule with the leading recursive nonterminal
+ * removed.
+ */
+ if (first_set->has_empty)
+ {
+ first_advance_recursive (first_set, symbol);
+ }
+ /* If it does not produce the empty string, every recursive rule
+ * may be simply removed. All terminals in first set are known at
+ * this point.
+ */
+ else
+ {
+ GList* l;
+ l = first_set->recursive;
+ while (l != NULL)
+ {
+ rule_delete (l->data);
+ l = g_list_next (l);
+ }
+ g_list_free (first_set->recursive);
+ first_set->recursive = NULL;
+ }
+ assert (first_set->recursive == NULL);
+ }
+
+}
+
+void first_iterate (gpointer key, gpointer val, gpointer data)
+{
+
+ symbol_t* symbol;
+ first_set_t* first_set;
+ GHashTable* first;
+
+ GList* rules;
+ GList* new_rules;
+
+ symbol = (symbol_t*) key;
+ first_set = (first_set_t*) val;
+ first = (GHashTable*) data;
+
+ assert (symbol->terminal == FALSE);
+
+ /* If there are no non-recursive rules starting with a
+ * nonterminal, there's not much to do. Only remove all
+ * the recursive rules or advance them as described in
+ * the first_prepare.
+ */
+ if (first_set->rules == NULL)
+ {
+ if (first_set->recursive != NULL)
+ {
+ if (first_set->has_empty)
+ {
+ first_advance_recursive (first_set, symbol);
+ }
+ else
+ {
+ GList* l;
+ l = first_set->recursive;
+ while (l != NULL)
+ {
+ rule_delete (l->data);
+ l = g_list_next (l);
+ }
+ g_list_free (first_set->recursive);
+ first_set->recursive = NULL;
+ }
+ assert (first_set->recursive == NULL);
+ }
+ return;
+ }
+ else
+ {
+
+ rules = first_set->rules;
+ new_rules = NULL;
+
+ /*
+ * For each rule, try to eliminate it by checking the first set of
+ * the nonterminal with starts it. Rules may have been added by
+ * previous iterations or the advancement of recursive rules.
+ * So, trivial checks like empty rules and rules starting with
+ * terminals should be checked too.
+ */
+ while (rules != NULL)
+ {
+
+ rule_t* rule;
+ GList* symbols;
+ symbol_t* first_symbol;
+
+ rule = rules->data;
+ symbols = grammar_get_rule (rule);
+
+ /* If it is an empty rule, nonterminal may produce the empty
+ * string */
+ if (symbols == NULL)
+ {
+ first_set->has_empty = TRUE;
+ rule_delete (rule);
+ }
+ else
+ {
+
+ first_set_t* next_set;
+
+ first_symbol = symbols->data;
+
+ /* If it starts with a terminal, include it in the first
+ * set and remove the rule. */
+ if (first_symbol->terminal)
+ {
+ first_set_insert_terminal (first_set,
+ symbol_copy (first_symbol));
+ rule_delete (rule);
+ }
+ /* Get the first set of the nonterminal which starts the rule */
+ else if (g_hash_table_lookup_extended (first, first_symbol, NULL,
+ (gpointer*) &next_set))
+ {
+ /*
+ * If nonterminal produces the empty string, the first set
+ * of the sequence of symbols next to it is in the first
+ * set of the nonterminal we are treating. Add a new rule
+ * corresponding to this sequence of symbols. This is the
+ * rule which says: if there is a production A -> y1 y2 yk
+ * and the empty string is in the first of y1, then first
+ * of y2 yk is in first of A.
+ */
+ if (next_set->has_empty)
+ {
+ rule_t* new_rule;
+ new_rule = rule_copy (rule);
+ rule_pop (new_rule);
+ new_rules = g_list_prepend (new_rules, new_rule);
+ }
+ /* The terminals in the first of the leading nonterminal in
+ * the rule are in the first of our current nonterminal */
+ first_set_union (first_set->terminals,
+ next_set->terminals);
+ /*
+ * If there are other rules starting with a nonterminal
+ * for this nonterminal, they should be copied to the
+ * current nonterminal, so we can detect and treat indirect
+ * recursiveness properly.
+ */
+ if (next_set->rules != NULL)
+ {
+ /* FIXME: not much of an advance here, watch for
+ * this in a possible unstopabble loop */
+ /* FIXME: copy recursive rules too, with care */
+ GList* next_rules;
+ next_rules = next_set->rules;
+ while (next_rules != NULL)
+ {
+ rule_t* next_rule = rule_copy (next_rules->data);
+ GList* next_symbols;
+ next_symbols = grammar_get_rule (next_rule);
+ /*
+ * If the rule is empty, both terminals produce the
+ * empty set. This will be detected for the other
+ * nonterminal later. Optimization could be done
+ * here. TODO
+ */
+ if (next_symbols == NULL)
+ {
+ first_set->has_empty = TRUE;
+ rule_delete (next_rule);
+ }
+ else
+ {
+ symbol_t* next_symbol;
+ next_symbol = next_symbols->data;
+ /* TODO: Check this assertion is correct. */
+ assert (next_symbol->terminal == FALSE);
+ /* This is an indirect recursive rule. */
+ if (next_symbol->value == symbol->value)
+ {
+ first_set->recursive =
+ g_list_prepend (first_set->recursive,
+ next_rule);
+ }
+ /* Next time we treat current nonterminal,
+ * we will check this rule directly. */
+ else
+ {
+ new_rules = g_list_prepend (new_rules, next_rule);
+ }
+ }
+ next_rules = g_list_next (next_rules);
+ }
+ }
+ /*
+ * If nonterminal has recursive rules, we should expect
+ * more rules to add here and so we will check it again
+ * in the next iteration. Keep the rule. If there is only
+ * indirect recursiveness, we will detect it through the
+ * rules we have added before. We should remove it in this
+ * case so we make advance.
+ */
+ if (next_set->recursive != NULL)
+ {
+ new_rules = g_list_prepend (new_rules, rule);
+ }
+ else
+ {
+ rule_delete (rule);
+ }
+ }
+ else
+ {
+ assert (TRUE);
+ }
+
+ }
+
+ rules = g_list_next (rules);
+
+ }
+
+ g_list_free (first_set->rules);
+ first_set->rules = new_rules;
+
+ }
+
+}
+
+void first_check (gpointer key, gpointer val, gpointer data)
+{
+
+ symbol_t* symbol;
+ first_set_t* first_set;
+ gboolean* stop;
+
+ symbol = (symbol_t*) key;
+ first_set = (first_set_t*) val;
+ stop = (gboolean*) data;
+
+ assert (symbol->terminal == FALSE);
+
+ /* If there is anything besides terminals in the first set,
+ * we must iterate more and remove all of them. */
+ if (first_set->rules != NULL || first_set->recursive != NULL)
+ {
+ *stop = FALSE;
+ }
+
+}
+
+/*
+ * Calculate the first set for every nonterminal symbol in the grammar.
+ * We should iterate through the rules for each nonterminal until only
+ * terminals are known to be in the first set of it.
+ */
+GHashTable* grammar_first (Grammar* grammar)
+{
+ GHashTable* first;
+ gboolean stop;
+ first = g_hash_table_new_full (symbol_hash, symbol_equal,
+ g_free,
+ first_set_delete);
+ g_hash_table_foreach (grammar->grammar, first_prepare, first);
+ do
+ {
+ g_hash_table_foreach (first, first_iterate, first);
+ stop = TRUE;
+ g_hash_table_foreach (first, first_check, &stop);
+ } while (stop == FALSE);
+ return first;
+}
+
+void put_key_on_list (gpointer key, gpointer val, gpointer data)
+{
+ GList** l;
+ l = (GList**) l;
+ *l = g_list_prepend (*l, key);
+}
+
+GList* first_get (GHashTable* first, symbol_t* symbol)
+{
+
+ first_set_t* first_set;
+ GList* l;
+ l = NULL;
+ if (g_hash_table_lookup_extended (first, symbol, NULL,
+ (gpointer*) &first_set))
+ {
+ g_hash_table_foreach (first->terminals, put_key_on_list, &l);
+ }
+ return l;
+
+}