#!/usr/local/bin/ruby
#coding: utf-8
ENV['JAVA_HOME'] = '/usr/lib/jvm/java-6-sun'
require 'rjb/list'
require 'cgi'
Rjb::add_jar(File.expand_path('./kuromoji-0.7.7.jar'))
Token = Rjb::import('org/atilika/kuromoji/Token')
Tokenizer=Rjb::import('org/atilika/kuromoji/Tokenizer')
Mode = Rjb::import('org.atilika.kuromoji.Tokenizer$Mode')
cgi = CGI.new
m = cgi['mode']
mode = Mode.valueOf(m)
tknizer = Tokenizer.builder.mode(mode).build
s = cgi['text']
list = tknizer.tokenize(s)
body = ''
list.each do |x|
body << "#{CGI::escapeHTML(x.surface_form)}:#{x.all_features}<br/>"
end
cgi.out('status' => 'OK',
'type' => 'text/html',
'charset' => 'utf-8',
'language' => 'ja') {
<<HTML
<!DOCTYPE HTML>
<html>
<head>
<title>tokenize text with kuromoji</title>
</head>
<body>
#{body}
<hr/>
<input id="bb" type="button" value="back"/>
<script type="text/javascript">
(function () {
document.getElementById('bb').addEventListener('click', function() {
document.location.href='tokenize.html';
});
})();
</script>
</body>
</html>
HTML
}