Finding and storing the multiplicity of words from a file

One needs:

MySQL database schema "NLP" with a table "words". The table has the following structure

string: varchar(45), nr: int(11), frequency: decimal(10,7)

- the column "string" contains the words discovered in the input file

- the column "nr" contains the number of times the word appears in the input file

- the column "frequency" specifies the weight of the word in the input file  

In order to compile, do not forget to add "mysql-connector-java..." into the "Project properties/Libraries/Compile" (for NetBeans).  

The input file contains a large text in a given language. 

/***************************************************************************************/

//author: Dragos Sburlan

//description: the class handle a database connection

package nlp;

import java.sql.DriverManager;

import java.sql.Connection;

import java.sql.ResultSetMetaData;

import java.sql.Statement;

import java.sql.ResultSet;

import java.util.ArrayList;

public final class DBAccessController {

private Connection connection=null;

public DBAccessController(String url,String userId,String password)

    {

    try {

        Class.forName("com.mysql.jdbc.Driver").newInstance();

        connection= DriverManager.getConnection(url,userId,password);

        }

    catch(java.lang.ClassNotFoundException exceptionClassNotFound) {}

    catch(java.lang.InstantiationException instantException) {}

    catch(java.lang.IllegalAccessException illegalAccess) {}

    catch(java.sql.SQLException sqle) {}    

    }

public final synchronized ArrayList runSQL(String queryString)

    {

    try {

        Statement statement = connection.createStatement();

        connection.setAutoCommit(true);

        boolean flag = statement.execute(queryString);

        if(flag) 

            {

            ResultSet res=statement.getResultSet();

            ResultSetMetaData rsmd=res.getMetaData();

            int numberOfColumns = rsmd.getColumnCount();

            ArrayList general=new ArrayList();

            while (res.next()) 

                {

                ArrayList vect=new ArrayList(numberOfColumns);

                for (int i=1;i<=numberOfColumns;i++) 

                    {

                    Object o=res.getObject(i);

                    vect.add(o);

                    }

                general.add(vect);

                }

            return general;

            }

        else return null;

        } 

    catch (java.sql.SQLException sqle) {System.out.println(sqle.toString()); return null;}

}

public final void stop() 

    {

    try 

        {

        connection.close();

        }

    catch(java.sql.SQLException e){}

    }

}

/***************************************************************************************/

//author: Dragos Sburlan

//description: the utility class WordsCounter is responsible of counting the occurrences of a word in a file 

package nlp;

import java.io.*;

import java.util.*;

public class WordsCounter {

        private int words_total_number = 0;

        private HashMap<String,Integer> words_ht=new HashMap<>();

        

        WordsCounter(String file_name)

            {

            String line = "";

            try

                {

                BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file_name), "UTF-16"));

                while (true) 

                    {

                    line=br.readLine();    

                    if(line!=null)

                        {

                        String line_words[]=line.split("[^\\w]+");

                            

                        for(int i=0;i<line_words.length;i++)

                            {

                            if(words_ht.get(line_words[i])!=null)

                                {

                                words_ht.put(line_words[i],words_ht.get(line_words[i])+1);

                                }

                            else 

                                {

                                words_ht.put(line_words[i],1);

}

                            this.words_total_number++;

                            }

                        }

                    else break;

                    }

                }

            catch (IOException e){System.out.println(e.toString());}            

            }

        

        

        int getWordsNumber(){return this.words_total_number;}

        HashMap <String,Integer> getWordsHashmap(){return this.words_ht;}

}

/***************************************************************************************/

//author: Dragos Sburlan

package nlp;

import java.util.*;

import java.io.*;

public class NLP {

    public static void main(String[] args) {

WordsCounter wc = new WordsCounter("c:\\NLP\\input_file.txt");

HashMap<String,Integer> words_hm=wc.getWordsHashmap();

//reading from hashtable and printing output_tmp.txt

                //do this for speeding up (instead of adding each record from the hashmap

                //we build a string/file containing the data and we load the file as a big chunk 


                Set<String> set = words_hm.keySet();

                Iterator<String> it =set.iterator();

                StringBuilder sb=new StringBuilder();

                while (it.hasNext())

                    {    

                    String word_tmp = it.next();

                    int nr=words_hm.get(word_tmp);

                    double frequency=Math.log(((double) nr)/wc.getWordsNumber());

                    sb.append(word_tmp).append(", ").append(nr).append(", ").append(frequency).append("\r\n");

                    }

                try

                    {

                    BufferedWriter out = new BufferedWriter (new FileWriter ("c:\\NLP\\output_tmp.txt"));

                    out.write(sb.toString());

                    out.close();

                    }

                catch(Exception exc){System.err.println("Error: "+exc.getMessage());}

                

                String querydb = "LOAD DATA LOCAL INFILE '/NLP/output_tmp.txt' INTO TABLE words FIELDS TERMINATED BY ',' LINES TERMINATED BY '\r\n'";

                

                //connecting to DB and loading the file

                String address="jdbc:mysql://localhost/NLP"; 

                String user="root";

                String pass="root";

                DBAccessController dba=new  DBAccessController(address,user, pass);

                dba.runSQL(querydb);

                

                //getting a record for testing

                String request="select * from words where string='the'";

                ArrayList result=dba.runSQL(request);

                ArrayList first_record=(ArrayList)result.get(0);

                System.out.println(first_record.get(0));

                

dba.stop();

}

}