#!/usr/bin/python

import sys
sys.stderr = sys.stdout

print 'Content-Type:text/html'
print

import cgi
import types
from urllib import urlencode
import urllib2
from xml.sax import make_parser


# configuration options:

# encoding:
ENC = 'iso-8859-1'
#XML parser: 'DOM' or 'SAX'
PARSER = 'DOM'
# attributes to be requested:
ATTRS_SHOWN = 'author title'

# You may want to specify search engine URL directly,
# or it may be compiled using collectionID field
# sent along with the query.
# If you use collectionID, then leave DEF_engine variable blank
# (i.e. equal to empty string, '')
# CollectionID will be used to compile an URL like this:
# http://www.insuma.de/cgi-bin/[collectionID]/web2xml.py
# You should define default collectionID value
# (for the script not to fail in case collectionID is
# absent for some reason).
# If you use collectionID, your script will be able to handle
# queries to different collections (depending on what collectionID
# is specified). Usually, collectioID is an integer, but the sample
# collection is identified by 'xml' string:
# DEF_engine = ''
# DEF_collectionID = 'xml'

# On the other hand, this feature is rarely useful
# and may lead to some problems. So, if you have no reason for
# using collectionID, you'd better specify search engine URL here:
DEF_engine = 'http://www.insuma.de/cgi-bin/xml/web2xml.py'


# Remember that either DEF_engine or DEF_collectionID should always
# be present (and non-empty)! (And if DEF_engine is set, then 
# DEF_collectionID doesn't matter at all.)


#default values:
DEF_entries = '10'
DEF_start_after = '1'
# (they're strings and not integers due to the way we use them.)
DEF_hilight = 'on'
DEF_ranking = 'on'


try:
    data = cgi.parse()
except:
    print "Can't parse sent data!"
    sys.exit()

query        = unicode(data.get('query', [''])[0], ENC)
entries      = int(data.get('show_max', [DEF_entries])[0])
start_after  = int(data.get('start_after', [DEF_start_after])[0])
hilight      = data.get('highlighting', [DEF_hilight])[0]
ranking      = data.get('ranking', [DEF_ranking])[0]

if hilight <> 'on': hilight = 'off'
if ranking <> 'on': ranking = 'off'
# (they're either 'on' or 'off', nothing else.)

# the search engine URL isn't hardcoded in configuration:
if not DEF_engine:
    collectionID = data.get('collectionID', [DEF_collectionID])[0]
    if collectionID <> DEF_collectionID:
        try:
	    # is it integer?
	    collectionID = int(collectionID)
	except:
	    collectionID = DEF_collectionID
        if type(collectionID) == type(1):
	    collectionID = str(collectionID)
    ENGINE = 'http://www.insuma.de/cgi-bin/' + collectionID + '/web2xml.py'
else: ENGINE = DEF_engine
    

print '''<html>
<head>
<title>Test search page</title>
</head>
<body>
<h1>Insuma found you!</h1>
<form action="client.py">
Enter your query here:
<input name="query" size="20" ''',
if query <> '':
    print 'value="' + query.encode(ENC) + '" ',
print '''/>
<input type="hidden" name="entries" value="%i" />
<input type="hidden" name="highlighting" value="%s" />
<input type="hidden" name="ranking" value="%s" />
<input type="submit" value="Go!" />
</form>''' % (entries, hilight, ranking)


########## Here the query to the server is compiled: #################
# If you want to use the script for complicated queries,
# this block should be certainly replaced with something else
# (it may be an example from the tutorial or code you write yourself).
# And don't forget, that you will probably use not only single
# QUERY variable, but a number of variables, depending on your
# search form. So you will need also make changes in other parts
# of the script:
# 1) Getting CGI-variables; 
# 2) Printing search form;
# 3) Printing page navigation (print_pager() function).
if query == '':
    print '<strong>Enter a search query, please!</strong>'
    print '</body></html>'
    sys.exit()

if ranking == 'on':
    ATTRS_SHOWN += ' score'    
if hilight == 'on':
    ATTRS_SHOWN += ' summary'
xml_query = """<query max_results="%s" start_from="%s" show_attrs="%s">
<or>
<condition attr="body" predicate="match" value="%s" />
	<and>
	<condition attr="body" predicate="bmatch" value="&apos;%s&apos;" />
	<condition attr="body" predicate="match" value="%s" />
	</and>
</or>
</query>""" %(entries, start_after, ATTRS_SHOWN, query, query, query)
########################################################################


##### Here the POST message is formed:
data = """--TeRMiNaToR
Content-Disposition: form-data; name="xml_query"
Content-Type: text/xml

<?xml version="1.0" encoding="iso-8859-1"?>
<!DOCTYPE query>
""" + xml_query.encode(ENC) + """

--TeRMiNaToR--"""

##### Now the message is sent and the answer is retrieved:
request = urllib2.Request(ENGINE)
request.add_header('Content-type', 'multipart/form-data; boundary="TeRMiNaToR"')
request.add_data(data)

# If you go via a proxy, uncomment this
# request.set_proxy('www-cache.informatik.uni-tuebingen.de:3128', 'http')

try:
    answer = urllib2.urlopen(request)
    # uncomment the next two lines to see raw output
    # print answer.read()
    # sys.exit(0)
except urllib2.HTTPError, a:
    print a.read()
    sys.exit()
except urllib2.URLError:
    print "<strong>Can't connect to the search engine!</strong></body></html>"
    sys.exit() 



##### Here are described a few functions used later:

def print_pager():
    """ This function prints page navigation:
    <<Previous | 1 | 2 | 3 | 4 | Next>>"""
    
    # if there's only one page of results:
    if total_hits <= entries:
        return ''
    
    map = {'query': query.encode(ENC), 'show_max': str(entries)}
    
    # if these are equal to default values, no need to send them:
    if hilight <> DEF_hilight:
        map['highlighting'] = hilight
    if ranking <> DEF_ranking:
        map['score'] = ranking
    if not DEF_engine and (collectionID <> DEF_collectionID):
        map['collectionID'] = collectionID

    link = 'client.py?' + urlencode(map)
    num_of_pages = total_hits/entries + 1
    if start_after > 1:
        print '<a href="%s&start_after=%i">&lt;&lt;Previous</a> |' % (link, start_after - entries)
    for i in range(num_of_pages):
    # same as "for (i=0; i<num_of_pages, i++)" 
        if start_after <> i*entries + 1:
            print '<a href="%s&start_after=%i">%i</a> ' % (link, i*entries + 1, i + 1)
        else:
            print str(i + 1) + ' '
        #if not last page, print "pipe" character:
        if i < num_of_pages - 1:
            print '| '
        
    if total_hits >= entries + start_after:
        print '| <a href="%s&start_after=%i">Next&gt;&gt;</a>' % (link, entries + start_after)


def print_error(error_class, error_message):
    """ This function prints error messages basing on information from 
		an ERROR element of the XML result. """
    if  error_class == '':
       	# error_class isn't set. It means the exception is triggered
        # by an xml error.
   	    print "XML parser encountered an error!"
    else:
   	    # error_class is set. It means the exception was triggered
       	# by an ERROR element in the Insuma's XML.
        print '<strong>'
        if error_class == 'internal':
       	    print 'The query wasn\'t processed due to an internal error:'
        else:
   	        print 'Insuma search engine encountered an error in your query:'
        print '</strong><pre>'
        print error_message.encode(ENC)
        print '</pre>'


######## Here starts result parsing & printing. ##########

################# Using SAX version ######################
if PARSER == 'SAX':
    import tut_handler

    parser = make_parser()
    handler = tut_handler.insuma_handler()
    parser.setContentHandler(handler)

    try:
        parser.parse(answer)
        total_hits = handler.total_hits
        if not total_hits:
            print "<strong>No matching documents found!</strong>"
        print_pager()
    except:
        print_error(tut_handler.error_class, tut_handler.error_message)


################# Using DOM version ######################

elif PARSER == 'DOM':
	from xml.dom.minidom import parseString
	from cDoc import *

	s = answer.read()
	try:
		result = parseString(s)
		result_element = result.getElementsByTagName('result')[0]

		error = result.getElementsByTagName('error')
		if len(error) == 0:
			documents = result.getElementsByTagName('document')
		        total_hits = int(result_element.attributes['total_hits'].nodeValue)
			if len(documents) == 0:
				print "<strong>No matching documents found!</strong>"
			for entry in documents:
				doc = cDocument(entry)
				doc.print_doc()
			print_pager()
		else:
			error = error[0]
			error_class = error.attributes['class'].nodeValue
			error_message = getElementContent(error)
			print_error(error_class, error_message)

	except:
		print s
	
################# No parser chosen ######################

else:
	print 'XML parser not chosen! Can\'t proceed! Bye-bye!'

######### Here ends result parsing & printing ##########


print '</body></html>'

