除了最常用的 TermQuery 與
NumericRangeQuery 外,Lucene 還提供了許多查詢方式:
- 所有索引查詢
- 字首查詢
- 字首範圍查詢
- 布林查詢
- 片語查詢
- 鬼牌查詢
- 模糊查詢
public class SearchTestCase extends TestCase {
private static final Version VERSION = Version.LUCENE_36;
private static final String F_TITLE = "title";
private Directory directory = new RAMDirectory();
private IndexWriter writer;
private IndexReader reader;
private IndexSearcher searcher;
@Override
protected void setUp() throws Exception {
super.setUp();
// create index
this.writer = this.createWriter();
// this.writer.setInfoStream(System.out);
System.out.println("addDocument...");
this.writer.addDocument(this.createDocument("Spring Core"));
this.writer.addDocument(this.createDocument("SpringMVC"));
this.writer.addDocument(this.createDocument("Spring AOP"));
this.writer.addDocument(this.createDocument("Hibernate Core"));
this.writer.addDocument(this.createDocument("Hibernate Search"));
this.writer.addDocument(this.createDocument("Hibernator"));
this.writer.addDocument(this.createDocument("Lucene Core"));
this.writer.addDocument(this.createDocument("jQuery in action"));
this.writer.addDocument(this.createDocument("Java in a nutshell, edition"));
this.writer.addDocument(this.createDocument("Java in a nutshell, 2edition"));
this.writer.addDocument(this.createDocument("Java in a nutshell, 3edition"));
this.writer.addDocument(this.createDocument("Java in a nutshell, 4edition"));
System.out.println("commit...");
// 資料太多時,可以分批 commit
// commit後(沒有close)就可以使用 IndexReader
this.writer.commit();
// 沒有 close 或 commit,不能使用 IndexReader
// this.closeWriter();
}
private Document createDocument(String title) {
Document doc = new Document();
doc.add(new Field(SearchTestCase.F_TITLE, title, Field.Store.YES,
Field.Index.ANALYZED));
return doc;
}
@Override
protected void tearDown() throws Exception {
super.tearDown();
this.closeWriter();
this.closeSearcher();
}
/**
* 取得所有索引,score 無用
*/
public void testMatchAllDocsQuery() {
System.out.println("testMatchAllDocsQuery...");
try {
IndexSearcher searcher = this.createSearcher();
TopDocs hits = searcher.search(new MatchAllDocsQuery(), 100);
this.showDocuments(searcher, hits);
assertEquals(12, hits.totalHits);
}
catch (IOException e) {
Assert.fail(e.getMessage());
}
}
/**
* 字首查詢
*/
public void testPrefixQuery() {
System.out.println("testPrefixQuery...");
try {
IndexSearcher searcher = this.createSearcher();
// TermQuery
TopDocs hits = searcher.search(new TermQuery(new Term(
SearchTestCase.F_TITLE, "spring")), 100);
this.showDocuments(searcher, hits);
assertEquals(2, hits.totalHits);
// PrefixQuery
hits = searcher.search(new PrefixQuery(new Term(SearchTestCase.F_TITLE,
"spring")), 100);
this.showDocuments(searcher, hits);
assertEquals(3, hits.totalHits);
}
catch (IOException e) {
Assert.fail(e.getMessage());
}
}
/**
* 字首範圍查詢,必須掃過所有的索引,所以會有效能問題
*/
public void testTermRangeQuery() {
System.out.println("testTermRangeQuery...");
try {
IndexSearcher searcher = this.createSearcher();
// 很奇怪,結尾用小寫的 L,查不到 Lucene
TopDocs hits = searcher.search(new TermRangeQuery(SearchTestCase.F_TITLE,
"h", "m", true, true), 100);
this.showDocuments(searcher, hits);
assertEquals(9, hits.totalHits);
}
catch (IOException e) {
Assert.fail(e.getMessage());
}
}
/**
* 布林查詢
*/
public void testBooleanQuery() {
System.out.println("testBooleanQuery...");
try {
IndexSearcher searcher = this.createSearcher();
// +spring core
BooleanQuery bq = new BooleanQuery();
// MUST - 一定要有
bq.add(new TermQuery(new Term(SearchTestCase.F_TITLE, "spring")),
BooleanClause.Occur.MUST);
// SHOULD - 不一定要有,有的話 score 比較高
bq.add(new TermQuery(new Term(SearchTestCase.F_TITLE, "core")),
BooleanClause.Occur.SHOULD);
TopDocs hits = searcher.search(bq, 100);
this.showDocuments(searcher, hits);
assertEquals(2, hits.totalHits);
// -spring core lucene
bq = new BooleanQuery();
// MUST_NOT - 一定不要有
bq.add(new TermQuery(new Term(SearchTestCase.F_TITLE, "spring")),
BooleanClause.Occur.MUST_NOT);
// SHOULD - 不一定要有,有的話 score 比較高
bq.add(new TermQuery(new Term(SearchTestCase.F_TITLE, "core")),
BooleanClause.Occur.SHOULD);
bq.add(new TermQuery(new Term(SearchTestCase.F_TITLE, "lucene")),
BooleanClause.Occur.SHOULD);
hits = searcher.search(bq, 100);
this.showDocuments(searcher, hits);
assertEquals(2, hits.totalHits);
// (+spring -core) and (+hibernate -core)
// BooleanQuery 可以使用 Query,包括 BooleanQuery
bq = new BooleanQuery();
bq.add(new TermQuery(new Term(SearchTestCase.F_TITLE, "spring")),
BooleanClause.Occur.MUST);
bq.add(new TermQuery(new Term(SearchTestCase.F_TITLE, "core")),
BooleanClause.Occur.MUST_NOT);
BooleanQuery bq2 = new BooleanQuery();
bq2.add(new TermQuery(new Term(SearchTestCase.F_TITLE, "hibernate")),
BooleanClause.Occur.MUST);
bq2.add(new TermQuery(new Term(SearchTestCase.F_TITLE, "core")),
BooleanClause.Occur.MUST_NOT);
BooleanQuery bq3 = new BooleanQuery();
bq3.add(bq, BooleanClause.Occur.SHOULD);
bq3.add(bq2, BooleanClause.Occur.SHOULD);
hits = searcher.search(bq3, 100);
this.showDocuments(searcher, hits);
assertEquals(2, hits.totalHits);
}
catch (IOException e) {
Assert.fail(e.getMessage());
}
}
/**
* 片語查詢
*/
public void testPhraseQuery() {
System.out.println("testPhraseQuery...");
try {
IndexSearcher searcher = this.createSearcher();
// hibernate 與 core 緊鄰,所以 slot 為 0
TopDocs hits = searcher.search(this.createPhraseQuery(0, new String[] {
"hibernate", "core"
}), 100);
this.showDocuments(searcher, hits);
assertEquals(1, hits.totalHits);
// jquery 與 action 中間隔了一個字,所以 slot 為 1
hits = searcher.search(this.createPhraseQuery(1, new String[] {
"jquery", "action"
}), 100);
this.showDocuments(searcher, hits);
assertEquals(1, hits.totalHits);
}
catch (IOException e) {
Assert.fail(e.getMessage());
}
}
private PhraseQuery createPhraseQuery(int slop, String[] terms) {
PhraseQuery q = new PhraseQuery();
q.setSlop(slop);
for (String s : terms) {
q.add(new Term(SearchTestCase.F_TITLE, s));
}
return q;
}
/**
* 鬼牌查詢,必須掃過所有的索引,所以會有效能問題
*/
public void testWildcardQuery() {
System.out.println("testWildcardQuery...");
try {
IndexSearcher searcher = this.createSearcher();
// * 表示零個以上字元,所有 spring 開頭的
TopDocs hits = searcher.search(new WildcardQuery(new Term(
SearchTestCase.F_TITLE, "spring*")), 100);
this.showDocuments(searcher, hits);
assertEquals(3, hits.totalHits);
// * 也可以放在前面,所有 core 結尾的
hits = searcher.search(new WildcardQuery(new Term(SearchTestCase.F_TITLE,
"*core")), 100);
this.showDocuments(searcher, hits);
assertEquals(3, hits.totalHits);
// ? 表示一個字元,所有 xedition 的
hits = searcher.search(new WildcardQuery(new Term(SearchTestCase.F_TITLE,
"?edition")), 100);
this.showDocuments(searcher, hits);
assertEquals(3, hits.totalHits);
}
catch (IOException e) {
Assert.fail(e.getMessage());
}
}
/**
* 模糊查詢,必須掃過所有的索引,所以會有效能問題
*/
public void testFuzzyQuery() {
System.out.println("testFuzzyQuery...");
try {
IndexSearcher searcher = this.createSearcher();
// hibernator 也中選
TopDocs hits = searcher.search(new FuzzyQuery(new Term(
SearchTestCase.F_TITLE, "hibernate")), 100);
this.showDocuments(searcher, hits);
assertEquals(3, hits.totalHits);
}
catch (IOException e) {
Assert.fail(e.getMessage());
}
}
/**
* 倒出文章
*/
private void showDocuments(IndexSearcher searcher, TopDocs hits)
throws CorruptIndexException, IOException {
Document d;
for (ScoreDoc sd : hits.scoreDocs) {
d = searcher.doc(sd.doc);
System.out.println(d.get(F_TITLE) + " - " + sd.score);
}
}
private IndexWriter createWriter() throws CorruptIndexException,
LockObtainFailedException, IOException {
IndexWriterConfig config = new IndexWriterConfig(SearchTestCase.VERSION,
new StandardAnalyzer(SearchTestCase.VERSION));
config.setOpenMode(OpenMode.CREATE);
return new IndexWriter(this.directory, config);
}
private IndexSearcher createSearcher() throws CorruptIndexException,
IOException {
return new IndexSearcher(this.createReader());
}
private IndexReader createReader() throws CorruptIndexException, IOException {
return IndexReader.open(this.directory);
}
private void closeWriter() {
if (this.writer != null) {
try {
this.writer.close();
}
catch (IOException e) {
e.printStackTrace();
}
}
}
private void closeSearcher() {
this.closeReader();
if (this.searcher != null) {
try {
this.searcher.close();
}
catch (IOException e) {
e.printStackTrace();
}
}
}
private void closeReader() {
if (this.reader != null) {
try {
this.reader.close();
}
catch (IOException e) {
e.printStackTrace();
}
}
}
}
---
沒有留言:
張貼留言