成语大全网 - 经典成语 - 跪求Lucene 3.0.1 自带 Demo 包里使用中科院ICTCLAS 分词的具体方法。

跪求Lucene 3.0.1 自带 Demo 包里使用中科院ICTCLAS 分词的具体方法。

public class Indexer {

private IndexWriter writer;

private Analyzer analyzer;

public Indexer(String indexDir) throws IOException {

Directory dir = FSDirectory.open(new File(indexDir));

analyzer = new SmartChineseAnalyzer(Version.LUCENE_35, true); // 中文分词

writer = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.UNLIMITED);

}

// 建立索引

public void indexFile(File f) throws Exception {

System.out.println("Indexing " + f.getCanonicalPath());

Document doc = getDocument(f);

writer.addDocument(doc);

}

// 输出关键词文本内容

public void getTermText(String fieldName, String text) {

TokenStream stream = analyzer.reusableTokenStream(fieldName, new StringReader(text)); // 重用流,以便提速性能

CharTermAttribute charTerm = stream.addAttribute(CharTermAttribute.class); // 获取关键词

String word;

while(stream.incrementToken()) {

word = charTerm.toString();

System.out.print(word + " ");

}

}

protected Document getDocument(File f) throws Exception {

Document doc = new Document();

doc.add(new Field("contents", new FileReader(f)));

doc.add(new Field("filename", f.getName(),

Field.Store.YES, Field.Index.NOT_ANALYZED));

doc.add(new Field("fullpath", f.getCanonicalPath(),

Field.Store.YES, Field.Index.NOT_ANALYZED));

return doc;

}

}

imdict-chinese-analyzer - imdict智能词典所采用的智能中文分词程序

/p/imdict-chinese-analyzer/

这些应该对你有用。