Laboratory 2
A list of all words from Romanian language can be downloaded from here
Assignment: make an utility class that has a method which has as parameter a string (indicating a text written in Romanian language) and which computes the frequency of the words in the text and updates a table with the obtained values. The goal is to obtain the frequencies of the words in a natural language.
Simulating weighted NFA
First of all let us discuss the simulation of a "standard" NFA M=(Q,E,d,q0,F) with an input string w. At each step we keep two lists (S1 and S2): the first one (S1) keeps all the states in which M can be after reading a prefix x of w; the second one (S2) keeps all the states in which M will be after reading the symbol following x in w. Initially, the first list (S1) contains q0 and the second list (S2) is empty. The simulation proceeds as follows: let "a" be the current symbol from the input string. Then for each state q in S1 one computes d(q,a) and gets the set of all states in which M can be after reading "a"; all the states from this set are added to S2. After all the states from S1 are considered then one sets S1=S2, proceeds to the next symbol of w, and repeats the process. After analyzing entirely w, then in order to check if w belongs to L(M) , one has to see if S1 contains a final state from F.
The idea is to represent the transition function d in a way that allows a fast computation of d(q,a) (recall that our NFA will be "huge"). So we might consider that the transition function d is represented as a HashMap<Pair,ArrayList<Tuple>> where the keys are pairs (state, symbol) and the values are lists of tuples (nextstate, probability, wordindex); recall that in case of a weighted NFA we also have to store the probability of the transition. In addition, recall that each branch of M corresponds to a word (from Romanian language, for instance), hence we also have to maintain and keep the word index.
package nlp;
import java.io.*;
public class Tuple implements Serializable{
private Integer nextstate;
private Double probability;
private Short wordindex=0;
Tuple(Integer nextstate, Double probability)
{
this.nextstate=nextstate;
this.probability=probability;
}
public Integer getNextState() {
return nextstate;
}
public Double getProbability() {
return probability;
}
public Short getWordIndex() {
return wordindex;
}
public void setWordIndex(short word_index)
{
this.wordindex=word_index;
}
@Override
public String toString()
{
return nextstate+", probab="+probability+", wordindex="+wordindex;
}
}
/****************************************************************************************************/
/****************************************************************************************************/
package nlp;
import java.io.*;
/*the class is used to model the pair (state,symbol)*/
public class Pair implements Serializable {
private Integer state;
private String symbol;
Pair(Integer state, String symbol)
{
this.state=state;
this.symbol=symbol;
}
public Integer getState() {
return state;
}
public void setState(Integer state) {
this.state = state;
}
public String getSymbol() {
return symbol;
}
public void setSymbol(String symbol) {
this.symbol = symbol;
}
@Override
public String toString()
{
return state+","+symbol;
}
@Override
public boolean equals(Object obj) {
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
final Pair other = (Pair) obj;
if (this.state != other.state && (this.state == null || !this.state.equals(other.state))) {
return false;
}
if ((this.symbol == null) ? (other.symbol != null) : !this.symbol.equals(other.symbol)) {
return false;
}
return true;
}
@Override
public int hashCode() {
int hash = 3;
hash = 31 * hash + (this.state != null ? this.state.hashCode() : 0);
hash = 31 * hash + (this.symbol != null ? this.symbol.hashCode() : 0);
return hash;
}
}
/****************************************************************************************************/
/****************************************************************************************************/
package weightednfa;
import java.util.*;
import java.io.*;
public class TransitionFunction implements Serializable{
private HashMap<Pair,ArrayList<Tuple>> transition_table;
public TransitionFunction()
{
this.transition_table=new HashMap<Pair,ArrayList<Tuple>>();
}
public TransitionFunction(int initial_size)
{
this.transition_table=new HashMap<Pair,ArrayList<Tuple>>(initial_size);
}
boolean addTransition(Pair p, Tuple t)
{
ArrayList<Tuple> temp=this.transition_table.get(p);
if(temp!=null)
{
//check if the transition already exists
for(Tuple tuple:temp)
{
if(tuple.getNextState().equals(t.getNextState()))
return false; //break;
}
//if the transition does not exists then it will be added
temp.add(t);
this.transition_table.put(p, temp); //nu stiu daca mai este cazul!
return true;
}
else
{
temp=new ArrayList<Tuple>(2);
temp.add(t);
this.transition_table.put(p, temp);
return true;
}
}
ArrayList<Tuple> getValue(Integer state, String symbol)
{
return transition_table.get(new Pair(state,symbol));
}
ArrayList<Tuple> getValue(Pair p)
{
return transition_table.get(p);
}
@Override
public String toString()
{
String out="";
Set<Pair> pairs=this.transition_table.keySet();
Iterator<Pair> it=pairs.iterator();
while(it.hasNext())
{
Pair p=it.next();
ArrayList<Tuple> arr=this.transition_table.get(p);
for(Tuple t:arr)
out+="d("+p.toString()+")="+t.toString()+"\n";
}
return out;
}
HashMap<Integer,Integer> getTheNodesThatAppearInLTransitions()
{
int i=0;
HashMap<Integer,Integer> renamed_nodes=new HashMap<Integer,Integer>();
Set<Pair> trs=this.transition_table.keySet();
Iterator<Pair> it=trs.iterator();
while(it.hasNext())
{
Pair p=it.next();
Integer node=p.getState();
if(p.getSymbol().equals("#"))
{
if(renamed_nodes.get(node)==null)
{
renamed_nodes.put(node,i);
i++;
}
ArrayList<Tuple> alt=this.transition_table.get(p);
for(Tuple t:alt)
{
node=t.getNextState();
if(renamed_nodes.get(node)==null)
{
renamed_nodes.put(node,i);
i++;
}
}
}
}
return renamed_nodes;
}
HashMap<Integer,Integer> getTheLTransitions()
{
return null;
}
}