opennlp2

Apache Solr 7.6 + SolrJ で形態素解析の詳しい結果を取得する(英語)

概要

自然言語処理の結果を管理コンソールで見る方法は多く紹介されているが、Javaでコールする方法が見つからなかったので調べました。

コード

package hello.solr;import java.util.ArrayList;import java.util.HashMap;import org.apache.solr.client.solrj.SolrClient;import org.apache.solr.client.solrj.impl.HttpSolrClient;import org.apache.solr.client.solrj.request.DocumentAnalysisRequest;import org.apache.solr.common.SolrInputDocument;import org.apache.solr.common.SolrInputField;import org.apache.solr.common.util.NamedList;import org.apache.solr.common.util.SimpleOrderedMap;public class HelloAnalysisEnglishSimple { @SuppressWarnings({ "unchecked", "rawtypes" }) static public void main(String[] args) throws Exception { String fieldName = "field_text_opennlp"; String coreName = "core_nlp"; String text = "Hello, I'm a data engineer of Nissan Motor."; HashMap<String, SolrInputField> fields = new HashMap<String, SolrInputField>(); // Document SolrInputDocument doc = new SolrInputDocument(fields); { // Document Field doc.setField("id", "0"); doc.setField(fieldName, text); } // Request DocumentAnalysisRequest request = new DocumentAnalysisRequest(); request.addDocument(doc); String solrLocation = "http://localhost:8983/solr/" + coreName; // NLP Client SolrClient client = new HttpSolrClient.Builder(solrLocation).build(); // NLP Response NamedList<Object> response = client.request(request); // Get analysis response NamedList<Object> analysis = (NamedList<Object>) response .get("analysis"); SimpleOrderedMap f = ((SimpleOrderedMap) ((SimpleOrderedMap) analysis .getVal(0)).get(fieldName)); SimpleOrderedMap index = (SimpleOrderedMap) f.get("index"); NamedList nlpResult = (NamedList) index.getVal(0); ArrayList wordListPOS = (ArrayList) nlpResult .get("org.apache.lucene.analysis.opennlp.OpenNLPPOSFilter"); for (int n = 0; n < wordListPOS.size(); n++) { SimpleOrderedMap wordPOS = (SimpleOrderedMap) wordListPOS.get(n); System.err.println("text='" + wordPOS.get("text") + "',type='" + wordPOS.get("type") + "'"); } }}

結果

text='Hello',type='UH' text=',',type=',' text='I',type='PRP' text=''m',type='VBP' text='a',type='DT' text='data',type='NN' text='engineer',type='NN' text='of',type='IN' text='Nissan',type='NNP' text='Motor',type='NNP' text='.',type='.'