Finding and storing the multiplicity of words from a file

One needs:

MySQL database schema "NLP" with a table "words". The table has the following structure

string: varchar(45), nr: int(11), frequency: decimal(10,7)

- the column "string" contains the words discovered in the input file

- the column "nr" contains the number of times the word appears in the input file

- the column "frequency" specifies the weight of the word in the input file  

In order to compile, do not forget to add "mysql-connector-java..." into the "Project properties/Libraries/Compile" (for NetBeans).  

The input file contains a large text in a given language. 


//author: Dragos Sburlan

//description: the class handle a database connection

package nlp;

import java.sql.DriverManager;

import java.sql.Connection;

import java.sql.ResultSetMetaData;

import java.sql.Statement;

import java.sql.ResultSet;

import java.util.ArrayList;

public final class DBAccessController {

private Connection connection=null;

public DBAccessController(String url,String userId,String password)


    try {


        connection= DriverManager.getConnection(url,userId,password);


    catch(java.lang.ClassNotFoundException exceptionClassNotFound) {}

    catch(java.lang.InstantiationException instantException) {}

    catch(java.lang.IllegalAccessException illegalAccess) {}

    catch(java.sql.SQLException sqle) {}    


public final synchronized ArrayList runSQL(String queryString)


    try {

        Statement statement = connection.createStatement();


        boolean flag = statement.execute(queryString);



            ResultSet res=statement.getResultSet();

            ResultSetMetaData rsmd=res.getMetaData();

            int numberOfColumns = rsmd.getColumnCount();

            ArrayList general=new ArrayList();

            while ( 


                ArrayList vect=new ArrayList(numberOfColumns);

                for (int i=1;i<=numberOfColumns;i++) 


                    Object o=res.getObject(i);





            return general;


        else return null;


    catch (java.sql.SQLException sqle) {System.out.println(sqle.toString()); return null;}


public final void stop() 






    catch(java.sql.SQLException e){}




//author: Dragos Sburlan

//description: the utility class WordsCounter is responsible of counting the occurrences of a word in a file 

package nlp;


import java.util.*;

public class WordsCounter {

        private int words_total_number = 0;

        private HashMap<String,Integer> words_ht=new HashMap<>();


        WordsCounter(String file_name)


            String line = "";



                BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file_name), "UTF-16"));

                while (true) 





                        String line_words[]=line.split("[^\\w]+");


                        for(int i=0;i<line_words.length;i++)













                    else break;



            catch (IOException e){System.out.println(e.toString());}            




        int getWordsNumber(){return this.words_total_number;}

        HashMap <String,Integer> getWordsHashmap(){return this.words_ht;}



//author: Dragos Sburlan

package nlp;

import java.util.*;


public class NLP {

    public static void main(String[] args) {

WordsCounter wc = new WordsCounter("c:\\NLP\\input_file.txt");

HashMap<String,Integer> words_hm=wc.getWordsHashmap();

//reading from hashtable and printing output_tmp.txt

                //do this for speeding up (instead of adding each record from the hashmap

                //we build a string/file containing the data and we load the file as a big chunk 

                Set<String> set = words_hm.keySet();

                Iterator<String> it =set.iterator();

                StringBuilder sb=new StringBuilder();

                while (it.hasNext())


                    String word_tmp =;

                    int nr=words_hm.get(word_tmp);

                    double frequency=Math.log(((double) nr)/wc.getWordsNumber());

                    sb.append(word_tmp).append(", ").append(nr).append(", ").append(frequency).append("\r\n");




                    BufferedWriter out = new BufferedWriter (new FileWriter ("c:\\NLP\\output_tmp.txt"));




                catch(Exception exc){System.err.println("Error: "+exc.getMessage());}


                String querydb = "LOAD DATA LOCAL INFILE '/NLP/output_tmp.txt' INTO TABLE words FIELDS TERMINATED BY ',' LINES TERMINATED BY '\r\n'";


                //connecting to DB and loading the file

                String address="jdbc:mysql://localhost/NLP"; 

                String user="root";

                String pass="root";

                DBAccessController dba=new  DBAccessController(address,user, pass);



                //getting a record for testing

                String request="select * from words where string='the'";

                ArrayList result=dba.runSQL(request);

                ArrayList first_record=(ArrayList)result.get(0);




