1#!/usr/bin/env python 2# 3# Simple command-line search script. 4# 5# Copyright (C) 2003 James Aylett 6# Copyright (C) 2004,2007,2009,2013 Olly Betts 7# 8# This program is free software; you can redistribute it and/or 9# modify it under the terms of the GNU General Public License as10# published by the Free Software Foundation; either version 2 of the11# License, or (at your option) any later version.12#13# This program is distributed in the hope that it will be useful,14# but WITHOUT ANY WARRANTY; without even the implied warranty of15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the16# GNU General Public License for more details.17#18# You should have received a copy of the GNU General Public License19# along with this program; if not, write to the Free Software20# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-130121# USA2223importsys24importxapian2526# We require at least two command line arguments.27iflen(sys.argv)<3:28print("Usage: %s PATH_TO_DATABASE QUERY"%sys.argv[0],file=sys.stderr)29sys.exit(1)3031try:32# Open the database for searching.33database=xapian.Database(sys.argv[1])3435# Start an enquire session.36enquire=xapian.Enquire(database)3738# Combine the rest of the command line arguments with spaces between39# them, so that simple queries don't have to be quoted at the shell40# level.41query_string=str.join(' ',sys.argv[2:])4243# Parse the query string to produce a Xapian::Query object.44qp=xapian.QueryParser()45stemmer=xapian.Stem("english")46qp.set_stemmer(stemmer)47qp.set_database(database)48qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME)49query=qp.parse_query(query_string)50print("Parsed query is: %s"%str(query))5152# Find the top 10 results for the query.53enquire.set_query(query)54matches=enquire.get_mset(0,10)5556# Display the results.57print("%i results found."%matches.get_matches_estimated())58print("Results 1-%i:"%matches.size())5960forminmatches:61print("%i: %i%% docid=%i [%s]"%(m.rank+1,m.percent,m.docid,m.document.get_data().decode('utf-8')))6263exceptExceptionase:64print("Exception: %s"%str(e),file=sys.stderr)65sys.exit(1)
1#!/usr/bin/env python 2# 3# Index each paragraph of a text file as a Xapian document. 4# 5# Copyright (C) 2003 James Aylett 6# Copyright (C) 2004,2007,2013,2014 Olly Betts 7# 8# This program is free software; you can redistribute it and/or 9# modify it under the terms of the GNU General Public License as10# published by the Free Software Foundation; either version 2 of the11# License, or (at your option) any later version.12#13# This program is distributed in the hope that it will be useful,14# but WITHOUT ANY WARRANTY; without even the implied warranty of15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the16# GNU General Public License for more details.17#18# You should have received a copy of the GNU General Public License19# along with this program; if not, write to the Free Software20# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-130121# USA2223importsys24importxapian25importstring2627iflen(sys.argv)!=2:28print("Usage: %s PATH_TO_DATABASE"%sys.argv[0],file=sys.stderr)29sys.exit(1)3031try:32# Open the database for update, creating a new database if necessary.33database=xapian.WritableDatabase(sys.argv[1],xapian.DB_CREATE_OR_OPEN)3435indexer=xapian.TermGenerator()36stemmer=xapian.Stem("english")37indexer.set_stemmer(stemmer)3839para=''40try:41forlineinsys.stdin:42line=line.strip()43ifline=='':44ifpara!='':45# We've reached the end of a paragraph, so index it.46doc=xapian.Document()47doc.set_data(para)4849indexer.set_document(doc)50indexer.index_text(para)5152# Add the document to the database.53database.add_document(doc)54para=''55else:56ifpara!='':57para+=' '58para+=line59exceptStopIteration:60pass6162exceptExceptionase:63print("Exception: %s"%str(e),file=sys.stderr)64sys.exit(1)
1#!/usr/bin/env python 2# 3# Simple example script demonstrating query expansion. 4# 5# Copyright (C) 2003 James Aylett 6# Copyright (C) 2004,2006,2007,2012,2013,2014 Olly Betts 7# 8# This program is free software; you can redistribute it and/or 9# modify it under the terms of the GNU General Public License as10# published by the Free Software Foundation; either version 2 of the11# License, or (at your option) any later version.12#13# This program is distributed in the hope that it will be useful,14# but WITHOUT ANY WARRANTY; without even the implied warranty of15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the16# GNU General Public License for more details.17#18# You should have received a copy of the GNU General Public License19# along with this program; if not, write to the Free Software20# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-130121# USA2223importsys24importxapian2526# We require at least two command line arguments.27iflen(sys.argv)<3:28print("Usage: %s PATH_TO_DATABASE QUERY [-- [DOCID...]]"%sys.argv[0],file=sys.stderr)29sys.exit(1)3031try:32# Open the database for searching.33database=xapian.Database(sys.argv[1])3435# Start an enquire session.36enquire=xapian.Enquire(database)3738# Combine command line arguments up to "--" with spaces between39# them, so that simple queries don't have to be quoted at the shell40# level.41query_string=sys.argv[2]42index=343whileindex<len(sys.argv):44arg=sys.argv[index]45index+=146ifarg=='--':47# Passed marker, move to parsing relevant docids.48break49query_string+=' '50query_string+=arg5152# Create an RSet with the listed docids in.53reldocs=xapian.RSet()54forindexinrange(index,len(sys.argv)):55reldocs.add_document(int(sys.argv[index]))5657# Parse the query string to produce a Xapian::Query object.58qp=xapian.QueryParser()59stemmer=xapian.Stem("english")60qp.set_stemmer(stemmer)61qp.set_database(database)62qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME)63query=qp.parse_query(query_string)6465ifnotquery.empty():66print("Parsed query is: %s"%str(query))6768# Find the top 10 results for the query.69enquire.set_query(query)70matches=enquire.get_mset(0,10,reldocs)7172# Display the results.73print("%i results found."%matches.get_matches_estimated())74print("Results 1-%i:"%matches.size())7576forminmatches:77print("%i: %i%% docid=%i [%s]"%(m.rank+1,m.percent,m.docid,m.document.get_data()))7879# Put the top 5 (at most) docs into the rset if rset is empty80ifreldocs.empty():81rel_count=082forminmatches:83reldocs.add_document(m.docid)84rel_count+=185ifrel_count==5:86break8788# Get the suggested expand terms89eterms=enquire.get_eset(10,reldocs)90print("%i suggested additional terms"%eterms.size())91forkineterms:92print("%s: %f"%(k.term,k.weight))9394exceptExceptionase:95print("Exception: %s"%str(e),file=sys.stderr)96sys.exit(1)
1#!/usr/bin/env python 2# 3# Simple command-line match decider example 4# 5# Copyright (C) 2003 James Aylett 6# Copyright (C) 2004,2007,2009,2013 Olly Betts 7# 8# This program is free software; you can redistribute it and/or 9# modify it under the terms of the GNU General Public License as10# published by the Free Software Foundation; either version 2 of the11# License, or (at your option) any later version.12#13# This program is distributed in the hope that it will be useful,14# but WITHOUT ANY WARRANTY; without even the implied warranty of15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the16# GNU General Public License for more details.17#18# You should have received a copy of the GNU General Public License19# along with this program; if not, write to the Free Software20# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-130121# USA2223importsys24importxapian2526# This example runs a query like simplesearch does, but uses a MatchDecider27# (mymatchdecider) to discard any document for which value 0 is equal to28# the string passed as the second command line argument.2930iflen(sys.argv)<4:31print("Usage: %s PATH_TO_DATABASE AVOID_VALUE QUERY"%sys.argv[0],file=sys.stderr)32sys.exit(1)3334classmymatchdecider(xapian.MatchDecider):35def__init__(self,avoidvalue):36xapian.MatchDecider.__init__(self)37self.avoidvalue=avoidvalue3839def__call__(self,doc):40returndoc.get_value(0)!=self.avoidvalue4142try:43# Open the database for searching.44database=xapian.Database(sys.argv[1])4546# Start an enquire session.47enquire=xapian.Enquire(database)4849# Combine the rest of the command line arguments with spaces between50# them, so that simple queries don't have to be quoted at the shell51# level.52avoid_value=sys.argv[2]53query_string=str.join(' ',sys.argv[3:])5455# Parse the query string to produce a Xapian::Query object.56qp=xapian.QueryParser()57stemmer=xapian.Stem("english")58qp.set_stemmer(stemmer)59qp.set_database(database)60qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME)61query=qp.parse_query(query_string)62print("Parsed query is: %s"%str(query))6364# Find the top 10 results for the query.65enquire.set_query(query)66mdecider=mymatchdecider(avoid_value)67matches=enquire.get_mset(0,10,None,mdecider)6869# Display the results.70print("%i results found."%matches.get_matches_estimated())71print("Results 1-%i:"%matches.size())7273forminmatches:74print("%i: %i%% docid=%i [%s]"%(m.rank+1,m.percent,m.docid,m.document.get_data()))7576exceptExceptionase:77print("Exception: %s"%str(e),file=sys.stderr)78sys.exit(1)