Laboratory 2

A list of all words from Romanian language can be downloaded from here

Assignment: make an utility class that has a method which has as parameter a string (indicating a text written in Romanian language) and which computes the frequency of the words in the text and updates a table with the obtained values.  The goal is to obtain the frequencies of the words in a natural language. 

Simulating weighted NFA

First of all let us discuss the simulation of a "standard" NFA M=(Q,E,d,q0,F) with an input string w. At each step we keep two lists (S1 and S2): the first one (S1) keeps all the states in which M can be after reading a prefix x of w; the second one (S2) keeps all the states in which M will be after reading the symbol following x in w. Initially, the first list (S1) contains q0 and the second list (S2) is empty. The simulation proceeds as follows: let "a" be the current symbol from the input string.  Then for each state q in S1 one computes d(q,a) and gets the set of all states in which M can be after reading "a"; all the states from this set are added to S2. After all the states from S1 are considered then one sets S1=S2, proceeds to the next symbol of w, and repeats the process. After analyzing entirely w, then in order to check if w belongs to L(M) , one has to see if S1 contains a final state from F.     

The idea is to represent the transition function d in a way that allows a fast computation of d(q,a) (recall that our NFA will be "huge").  So we might consider that the transition function d is represented as a  HashMap<Pair,ArrayList<Tuple>>  where the keys are pairs (state, symbol) and the values are lists of tuples  (nextstate, probability, wordindex); recall that in case of a weighted NFA we also have to store the probability of the transition. In addition, recall that each branch of M corresponds to a word (from Romanian language, for instance), hence we also have to maintain and keep the word index.  

package nlp;

import java.io.*;

public class Tuple implements Serializable{

private Integer nextstate;

private Double probability;

private Short wordindex=0;

Tuple(Integer nextstate, Double probability)

    {

    this.nextstate=nextstate;

    this.probability=probability;

    }

  

    public Integer getNextState() {

        return nextstate;

    }

    public Double getProbability() {

        return probability;

    }

    public Short getWordIndex() {

        return wordindex;

    }

    

    public void setWordIndex(short word_index)

        {

        this.wordindex=word_index;

        }

    

    

    @Override

    public String toString()

        {

        return nextstate+", probab="+probability+", wordindex="+wordindex;    

        }

}

/****************************************************************************************************/

/****************************************************************************************************/

package nlp;

import java.io.*;

/*the class is used to model the pair (state,symbol)*/

public class Pair implements Serializable {

private Integer state;

private String symbol;

Pair(Integer state, String symbol)

    {

    this.state=state;

    this.symbol=symbol;

    }

    public Integer getState() {

        return state;

    }

    public void setState(Integer state) {

        this.state = state;

    }

    public String getSymbol() {

        return symbol;

    }

    public void setSymbol(String symbol) {

        this.symbol = symbol;

    }

    @Override

    public String toString()

        {

        return state+","+symbol; 

        }

    

    

    @Override

    public boolean equals(Object obj) {

        if (obj == null) {

            return false;

        }

        if (getClass() != obj.getClass()) {

            return false;

        }

        final Pair other = (Pair) obj;

        if (this.state != other.state && (this.state == null || !this.state.equals(other.state))) {

            return false;

        }

        if ((this.symbol == null) ? (other.symbol != null) : !this.symbol.equals(other.symbol)) {

            return false;

        }

        return true;

    }

    @Override

    public int hashCode() {

        int hash = 3;

        hash = 31 * hash + (this.state != null ? this.state.hashCode() : 0);

        hash = 31 * hash + (this.symbol != null ? this.symbol.hashCode() : 0);

        return hash;

    }   

}

/****************************************************************************************************/

/****************************************************************************************************/

package weightednfa;

import java.util.*;

import java.io.*;

    

public class TransitionFunction implements Serializable{

   private HashMap<Pair,ArrayList<Tuple>>  transition_table;   

   

   public TransitionFunction()

        {

        this.transition_table=new HashMap<Pair,ArrayList<Tuple>>();

        }

   

   public TransitionFunction(int initial_size)

        {

        this.transition_table=new HashMap<Pair,ArrayList<Tuple>>(initial_size);

        }  

   

   boolean addTransition(Pair p, Tuple t)

        {    

        ArrayList<Tuple> temp=this.transition_table.get(p);

        if(temp!=null)

            {

            //check if the transition already exists    

            for(Tuple tuple:temp)

                    {

                    if(tuple.getNextState().equals(t.getNextState()))

                        return false; //break;    

                    }

                //if the transition does not exists then it will be added 

            temp.add(t);

            this.transition_table.put(p, temp); //nu stiu daca mai este cazul!

            return true;

            }     

        else

            {

            temp=new ArrayList<Tuple>(2);

            temp.add(t);

            this.transition_table.put(p, temp);

            return true;

            }

        }

   

      ArrayList<Tuple> getValue(Integer state, String symbol)

        {    

        return transition_table.get(new Pair(state,symbol));

        }

   

   

   ArrayList<Tuple> getValue(Pair p)

        {    

        return transition_table.get(p);

        }

   

   

  @Override         

  public String toString()

        {

        String out="";

        Set<Pair> pairs=this.transition_table.keySet();

        Iterator<Pair> it=pairs.iterator();

        while(it.hasNext())

            {

            Pair p=it.next();    

            ArrayList<Tuple> arr=this.transition_table.get(p);

            for(Tuple t:arr)

                out+="d("+p.toString()+")="+t.toString()+"\n";

            }

        

        return out;

        }

   

  

 HashMap<Integer,Integer> getTheNodesThatAppearInLTransitions()

    {

    int i=0;    

    HashMap<Integer,Integer> renamed_nodes=new HashMap<Integer,Integer>();

    

    Set<Pair> trs=this.transition_table.keySet();

    Iterator<Pair> it=trs.iterator();

    while(it.hasNext())

        {

        Pair p=it.next();

        Integer node=p.getState();

        if(p.getSymbol().equals("#"))

            {

            if(renamed_nodes.get(node)==null)

                {

                renamed_nodes.put(node,i);

                i++;

                }

            ArrayList<Tuple> alt=this.transition_table.get(p);

            for(Tuple t:alt)

                {

                node=t.getNextState();    

                if(renamed_nodes.get(node)==null)

                    {

                    renamed_nodes.put(node,i);

                    i++;

                    }

                }

            }

        }

    return renamed_nodes;

    }    

HashMap<Integer,Integer> getTheLTransitions()

    {

    return null;

    } 

}