IV. Span Query
SpanQuery是Lucene1.4中新添加的一个比较有用的Query.她可以保存更多有用的信息.有如下几个子类
|
SpanQuery type |
Description |
|
SpanTermQuery |
Used in conjunction with the other span query types. On its own, it functionally equivalent to TermQuery. |
|
SpanFirstQuery |
Matches spans that occur within the first part of a field. |
|
SpanNearQuery |
Matches spans that occur near one another. |
|
SpanNotQuery |
Matches spans that don't overlap one another. |
|
SpanOrQuery |
Aggregates matches of span queries. |
看个例子,
001 package lia.advsearching; 002 003 import junit.framework.TestCase; 004 import lia.analysis.AnalyzerUtils; 005 import org.apache.lucene.analysis.Analyzer; 006 import org.apache.lucene.analysis.Token; 007 import org.apache.lucene.analysis.WhitespaceAnalyzer; 008 import org.apache.lucene.document.Document; 009 import org.apache.lucene.document.Field; 010 import org.apache.lucene.index.IndexReader; 011 import org.apache.lucene.index.IndexWriter; 012 import org.apache.lucene.index.Term; 013 import org.apache.lucene.search.Hits; 014 import org.apache.lucene.search.IndexSearcher; 015 import org.apache.lucene.search.PhraseQuery; 016 import org.apache.lucene.search.Query; 017 import org.apache.lucene.search.spans.SpanFirstQuery; 018 import org.apache.lucene.search.spans.SpanNearQuery; 019 import org.apache.lucene.search.spans.SpanNotQuery; 020 import org.apache.lucene.search.spans.SpanOrQuery; 021 import org.apache.lucene.search.spans.SpanQuery; 022 import org.apache.lucene.search.spans.SpanTermQuery; 023 import org.apache.lucene.search.spans.Spans; 024 import org.apache.lucene.store.RAMDirectory; 025 026 import java.io.IOException; 027 028 public class SpanQueryTest extends TestCase { 029 private RAMDirectory directory; 030 private IndexSearcher searcher; 031 private IndexReader reader; 032 033 private SpanTermQuery quick; 034 private SpanTermQuery brown; 035 private SpanTermQuery red; 036 private SpanTermQuery fox; 037 private SpanTermQuery lazy; 038 private SpanTermQuery sleepy; 039 private SpanTermQuery dog; 040 private SpanTermQuery cat; 041 private Analyzer analyzer; 042 043 protected void setUp() throws Exception { 044 directory = new RAMDirectory(); 045 046 analyzer = new WhitespaceAnalyzer(); 047 IndexWriter writer = new IndexWriter(directory, 048 analyzer, true); 049 050 Document doc = new Document(); 051 doc.add(Field.Text("f", 052 "the quick brown fox jumps over the lazy dog")); // 添加doc1 053 writer.addDocument(doc); 054 055 doc = new Document(); 056 doc.add(Field.Text("f", 057 "the quick red fox jumps over the sleepy cat"));// 添加toc2 058 writer.addDocument(doc); 059 060 writer.close(); 061 062 searcher = new IndexSearcher(directory); 063 reader = IndexReader.open(directory); 064 065 quick = new SpanTermQuery(new Term("f", "quick")); //构造SpanTermQuery 该类是其他几个类的基础 066 brown = new SpanTermQuery(new Term("f", "brown")); 067 red = new SpanTermQuery(new Term("f", "red")); 068 fox = new SpanTermQuery(new Term("f", "fox")); 069 lazy = new SpanTermQuery(new Term("f", "lazy")); 070 sleepy = new SpanTermQuery(new Term("f", "sleepy")); 071 dog = new SpanTermQuery(new Term("f", "dog")); 072 cat = new SpanTermQuery(new Term("f", "cat")); 073 } 074 // 下面是3个帮助测试的函数 075 private void assertOnlyBrownFox(Query query)throws Exception { 076 Hits hits = searcher.search(query); 077 assertEquals(1, hits.length()); 078 assertEquals("wrong doc", 0, hits.id(0)); 079 } 080 081 private void assertBothFoxes(Query query) throws Exception { 082 Hits hits = searcher.search(query); 083 assertEquals(2, hits.length()); 084 } 085 086 private void assertNoMatches(Query query) throws Exception { 087 Hits hits = searcher.search(query); 088 assertEquals(0, hits.length()); 089 } 090 091 public void testSpanTermQuery() throws Exception { // 单个TermQuery在功能上和TermQuery相似 092 assertOnlyBrownFox(brown); 093 dumpSpans(brown); /// 参考结果(1) 094 } 095 096 public void testSpanFirstQuery() throws Exception { // 在给定的范围搜索 097 SpanFirstQuery sfq = new SpanFirstQuery(brown, 2); // 前两个 "the quick brown fox jumps over the lazy dog" 098 assertNoMatches(sfq); 099 100 dumpSpans(sfq); 101 102 sfq = new SpanFirstQuery(brown, 3); // 前3个 "the quick brown fox jumps over the lazy dog" 103 dumpSpans(sfq); 104 assertOnlyBrownFox(sfq); 105 } 106 107 public void testSpanNearQuery() throws Exception { 108 SpanQuery[] quick_brown_dog = 109 new SpanQuery[]{quick, brown, dog}; 110 SpanNearQuery snq = 111 new SpanNearQuery(quick_brown_dog, 0, true); // 没有匹配的结果 112 assertNoMatches(snq); 113 dumpSpans(snq); 114 115 snq = new SpanNearQuery(quick_brown_dog, 4, true);// 没有匹配的结果 116 assertNoMatches(snq); 117 dumpSpans(snq); 118 119 snq = new SpanNearQuery(quick_brown_dog, 5, true); // 120 assertOnlyBrownFox(snq); 121 dumpSpans(snq); 122 123 // interesting - even a sloppy phrase query would require 124 // more slop to match 125 snq = new SpanNearQuery(new SpanQuery[]{lazy, fox}, 3, false); // 注意这个 126 assertOnlyBrownFox(snq); 127 dumpSpans(snq); 128 129 PhraseQuery pq = new PhraseQuery(); 130 pq.add(new Term("f", "lazy")); 131 pq.add(new Term("f", "fox")); 132 pq.setSlop(4); 133 assertNoMatches(pq); 134 135 pq.setSlop(5); 136 assertOnlyBrownFox(pq); 137 } 138 139 public void testSpanNotQuery() throws Exception { 140 SpanNearQuery quick_fox = 141 new SpanNearQuery(new SpanQuery[]{quick, fox}, 1, true); 142 assertBothFoxes(quick_fox); 143 dumpSpans(quick_fox); // 结果看下面(2) 144 145 SpanNotQuery quick_fox_dog = new SpanNotQuery(quick_fox, dog); 146 assertBothFoxes(quick_fox_dog); 147 dumpSpans(quick_fox_dog); 148 149 SpanNotQuery no_quick_red_fox = 150 new SpanNotQuery(quick_fox, red); 151 assertOnlyBrownFox(no_quick_red_fox); 152 dumpSpans(no_quick_red_fox); 153 } 154 155 public void testSpanOrQuery() throws Exception { // 参考结果(3) 156 SpanNearQuery quick_fox = 157 new SpanNearQuery(new SpanQuery[]{quick, fox}, 1, true); 158 159 SpanNearQuery lazy_dog = 160 new SpanNearQuery(new SpanQuery[]{lazy, dog}, 0, true); 161 162 SpanNearQuery sleepy_cat = 163 new SpanNearQuery(new SpanQuery[]{sleepy, cat}, 0, true); 164 165 SpanNearQuery qf_near_ld = 166 new SpanNearQuery( 167 new SpanQuery[]{quick_fox, lazy_dog}, 3, true); 168 assertOnlyBrownFox(qf_near_ld); 169 dumpSpans(qf_near_ld); 170 171 SpanNearQuery qf_near_sc = 172 new SpanNearQuery( 173 new SpanQuery[]{quick_fox, sleepy_cat}, 3, true); 174 dumpSpans(qf_near_sc); 175 176 SpanOrQuery or = new SpanOrQuery( 177 new SpanQuery[]{qf_near_ld, qf_near_sc}); 178 assertBothFoxes(or); 179 dumpSpans(or); 180 } 181 182 public void testPlay() throws Exception { 183 SpanOrQuery or = new SpanOrQuery(new SpanQuery[]{quick, fox}); 184 dumpSpans(or); 185 186 SpanNearQuery quick_fox = 187 new SpanNearQuery(new SpanQuery[]{quick, fox}, 1, true); 188 SpanFirstQuery sfq = new SpanFirstQuery(quick_fox, 4); 189 dumpSpans(sfq); 190 191 dumpSpans(new SpanTermQuery(new Term("f", "the"))); 192 193 SpanNearQuery quick_brown = 194 new SpanNearQuery(new SpanQuery[]{quick, brown}, 0, false); 195 dumpSpans(quick_brown); 196 197 } 198 199 private void dumpSpans(SpanQuery query) throws IOException { 200 Spans spans = query.getSpans(reader); 201 System.out.println(query + ":"); 202 int numSpans = 0; 203 204 Hits hits = searcher.search(query); 205 float[] scores = new float[2]; 206 for (int i = 0; i < hits.length(); i++) { 207 scores[hits.id(i)] = hits.score(i); 208 } 209 210 while (spans.next()) { 211 numSpans++; 212 213 int id = spans.doc(); 214 Document doc = reader.document(id); 215 216 // for simplicity - assume tokens are in sequential, 217 // positions, starting from 0 218 Token[] tokens = AnalyzerUtils.tokensFromAnalysis( 219 analyzer, doc.get("f")); 220 StringBuffer buffer = new StringBuffer(); 221 buffer.append(" "); 222 for (int i = 0; i < tokens.length; i++) { 223 if (i == spans.start()) { 224 buffer.append("<"); 225 } 226 buffer.append(tokens[i].termText()); 227 if (i + 1 == spans.end()) { 228 buffer.append(">"); 229 } 230 buffer.append(" "); 231 } 232 buffer.append("(" + scores[id] + ") "); 233 System.out.println(buffer); 234 // System.out.println(searcher.explain(query, id)); 235 } 236 237 if (numSpans == 0) { 238 System.out.println(" No spans"); 239 } 240 System.out.println(); 241 } 242 }
共4页 1 2 3 4
|
来源:
Java爱好者
| 作者:
| 发表时间:
2006-11-13 11:13:00
|
|
|