Can Whoosh search words like "C++" or "wi-fi"?

129 views
Skip to first unread message

Fred Ivanov

unread,
Mar 22, 2018, 2:32:41 PM3/22/18
to Whoosh
I can not find documents containing C++.

Tao Wang

unread,
Dec 22, 2020, 11:48:44 PM12/22/20
to Whoosh

```
from whoosh.analysis import StopFilter, CompositeAnalyzer, RegexTokenizer, LowercaseFilter
from whoosh.index import create_in, open_dir
from whoosh.fields import Schema, TEXT, ID
from whoosh.qparser import QueryParser

analyzer = StopFilter(stoplist=[], minsize=1)

expression = r'''(?x) # set flag to allow verbose regexps
(?:[A-Z]\.)+ # abbreviations, e.g. U.S.A.
|\w+\+\+
|\$?\d+(?:\.\d+)?%? # currency and percentages, e.g. $12.40, 82%
|\w+(?:-\w+)*
|\.\.\. # ellipsis
|(?:[.,;"'?():-_`]) # these are separate tokens; includes ], [
'''
schema = Schema(content=TEXT(stored=True, analyzer=CompositeAnalyzer(RegexTokenizer(expression=expression), LowercaseFilter(), StopFilter())))
ix = create_in("match_cpp", schema)
writer = ix.writer()
content = 'c++ wi-fi hello world'
writer.add_document(content=content)
writer.commit()
with ix.searcher() as searcher:
    words = content.split()
    for word in words:
        query = QueryParser('content', ix.schema).parse(word)
        print(query)
        results = searcher.search(query)
        for result in results:
            print(result)
        print()
```
Reply all
Reply to author
Forward
0 new messages