#!/usr/bin/env python3

"""Experimentation of the Internet-Draft defining a HTTP QUERY method
draft-ietf-httpbis-safe-method-w-body
<https://datatracker.ietf.org/doc/draft-ietf-httpbis-safe-method-w-body/>. It
is a search engine for my blog <https://www.bortzmeyer.org/>.

A public version is installed at
<https://www.bortzmeyer.org/methodquery>. You can send the body of the
query as plain text, encoded Web query or JSON. For plain text, the
body is the search term. For encoded query and JSON, the parameters
are "query" (the term you searxch) and "limit" (the maximum number of
results). You can request the result in plain text, HTML or JSON. Here
are examples with curl (note that the output format is requested with
the HTTP Accept header):

# Encoded Web query
curl -X QUERY -d query=foobar https://www.bortzmeyer.org/methodquery
# Plain text query
curl -X QUERY -H "Content-Type: text/plain" -d foobar https://www.bortzmeyer.org/methodquery
# JSON query
curl -X QUERY -H "Content-Type: application/json" -d ' {"query": "foobar", "limit": 3} '   https://www.bortzmeyer.org/methodquery
# Encoded Web query, HTML output
curl -X QUERY -H "Accept: text/html" -d 'query=foobar&limit=5'  https://www.bortzmeyer.org/methodquery
# Encoded Web query, JSON output
curl -X QUERY -H "Accept: application/json" -d 'query=foobar&limit=5'  https://www.bortzmeyer.org/methodquery

If you want to test it locally, you'll probably have to replace the
query() function and the initialisation code (at the end) with
something querying a local data source (this version queries my
database).

You can run it with 'python method-query.py' or through a WSGI server.

TODO: implement HEAD (useful with the return of Accept-Query)

"""

# The media types we accept in requests
ACCEPTED = ["text/plain", "application/x-www-form-urlencoded", "application/json"]
# The media types we can produce in responses
PRODUCED = ["application/json", "text/html", "text/plain"] # Must be sorted in the order
                                       # of increasing preference.
# Maximum number of URLs in the response                                       
MAX = 10
# Time to live of the answer
TTL = 3600 # seconds
#
LOCALE = "fr_FR.UTF-8"

# Do not touch:
PATH = ""

# Libraries we need:

# https://github.com/falconry/python-mimeparse. Also in PyPi, under
# the name python-mimeparse (NOT mimeparse).  WARNING: the
# python3-mimeparse package in Debian "trixie" (stable, as of
# 2025-12-17) is not the right one.
import mimeparse

# http://www.yattag.org/. Also in PyPi
from yattag import Doc

# https://www.psycopg.org/. Also in PyPi
import psycopg2

# Standard library :

import locale
import re
import urllib.parse
import json
import datetime

def pack(start_response, status, data, ctype=PRODUCED[-1]):
    datae = data.encode() # Always encode to UTF-8
    locale.setlocale(locale.LC_ALL, "C") # For the formatting of the Expires field
    expires = datetime.datetime.now(datetime.UTC) + datetime.timedelta(seconds=TTL)
    response_headers = [("Content-type", "%s; charset=UTF-8" % ctype),
                        ("Accept-Query", ", ".join(ACCEPTED)),
                        ("Content-Length", str(len(datae)))]
    if status.startswith("200"):
        response_headers.append(("Expires", expires.strftime("%a, %d %b %Y %H:%M:%S GMT")))
        response_headers.append(("Cache-Control", "max-age=%i" % TTL))
    locale.setlocale(locale.LC_ALL, LOCALE)
    start_response(status, response_headers)
    return [datae]

def query(term, max=MAX):
    """ Returns an array of tuples (URL, title). If no result, return an empty array. """
    cursor.execute("SELECT to_tsquery('french', %(query)s)", {"query": term})
    cursor.execute("SELECT filename,title,excerpt,published,updated FROM Blog.search(%(query)s) LIMIT %(limit)s;",
                       {"query": term, "limit": int(max)})
    urls = []
    for result in cursor.fetchall():
        urls.append(("https://www.bortzmeyer.org/" + re.sub(r"(.*?).((entry|rfc|fiche)|)xml$", "\\1.html", result[0]), result[1]))
    return urls

def format(start_response, term, limit, urls, ctype=PRODUCED[-1]):
    status = "200 OK"
    if ctype == "text/plain":
        output = """
    Query of "%s" OK

    """  % term
        for result in urls:
            output += """%s \"%s\"
    """ % (result[0] , result[1])
        if len(urls) == 0:
            output += """No result found
    """
    elif ctype == "text/html":
        doc, tag, text = Doc().tagtext()
        doc.asis('<?xml version="1.0" ?>\n')
        doc.asis('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">')
        with tag('html', ('xml:lang', 'fr'), lang = 'fr', xmlns = 'http://www.w3.org/1999/xhtml'):
            text("\n")
            with tag('head'):
                with tag('title'):
                    text("Query of \"%s\" OK" % term)
                with tag('meta', ('http-equiv', "Content-Type"), content = "text/html; charset=UTF-8"):
                    pass
            text("\n")
            with tag('body'):
                if len(urls) > 0:
                    with tag('h1'):
                        text("Query of \"%s\" OK\n" % term)
                    with tag('ul'):
                        for result in urls:
                            with tag('li'):
                                with tag('a', href = result[0]):
                                    text(result[1])
                            text("\n")
                else:
                    with tag('h1'):
                        text("Query of \"%s\" gave no result\n" % term)
            text("\n")
        text("\n")
        output = doc.getvalue()
    elif ctype == "application/json":
        response = {"query": term,
                  "limit": int(limit),
                  "results": []}
        for result in urls:
            response["results"].append({"url": result[0], "title": result[1]})
        output = json.dumps(response)
    else:
        raise Exception("Should never happen")
    return pack(start_response, status, output, ctype)

def unsup(start_response, method):
    status = "405 Unsupported method"
    output = """
Method %s is unsupported.
"""  % method
    return pack(start_response, status, output)

def noterm():
    status = "400 Empty term"    
    output = "Empty query term" 
    return pack(start_response, status, output)

def wrongtype(start_response, type):
    status = "415 Wrong type"
    output = """
Wrong media type %s, I only accept %s
"""  % (type, ",".join(ACCEPTED))
    return pack(start_response, status, output)

def wrongsyntax(start_response, mediatype, content):
    status = "400 Wrong syntax"
    output = """
Wrong syntax for %s: "%s"
"""  % (mediatype, content)
    return pack(start_response, status, output)

def notype(start_response):
    status = "400 No type"
    output = """
Content type must be set, and I only accept %s
""" % ",".join(ACCEPTED)
    return pack(start_response, status, output)

def wrongcharset(start_response, charset):
    status = "400 Wrong charset"
    output = """
Unknown charset %s
"""  % charset
    return pack(start_response, status, output)

def noaccept(start_response):
    status = "422 No accepted type"
    output = """
Cannot produce the types you accept
"""  
    return pack(start_response, status, output)

def notfound(start_response, path):
    status = "404 Not found"
    output = """
Path %s not found, query must use %s
"""  % (path, PATH)
    return pack(start_response, status, output)

def application(environ, start_response):
    if environ["PATH_INFO"] != PATH:
        return(notfound(start_response, environ["PATH_INFO"]))
    if environ["REQUEST_METHOD"] == "QUERY":
        try:
            body_size = int(environ.get("CONTENT_LENGTH", 0))
        except (ValueError):
            body_size = 0
        body = environ["wsgi.input"].read(body_size)
        if "CONTENT_TYPE" not in environ:
            return notype(start_response)
        (type, subtype, charsettuple) = mimeparse.parse_mime_type(environ["CONTENT_TYPE"])
        mtype = "%s/%s" % (type, subtype)
        if mtype not in ACCEPTED:
            return wrongtype(start_response, mtype)
        best = PRODUCED[-1]
        if "HTTP_ACCEPT" in environ:
            try:
                best = mimeparse.best_match(PRODUCED, environ["HTTP_ACCEPT"])
            except mimeparse.MimeTypeParseException:
                    pass # Ignore broken Accept
        if best is None or best == "":
            return noaccept(start_response)
        max = MAX
        if mtype == "application/x-www-form-urlencoded":
            try:
                result = urllib.parse.parse_qs(body, strict_parsing=True)
            except ValueError:
                return wrongsyntax(start_response, mtype, body)
            if b"query" not in result:
                status = "422 Missing query"
                output = """
                 Missing parameter "query" in "%s"
"""  % (str(result))
                return pack(start_response, status, output)
            if b"limit" in result:
                max = result[b"limit"][0]
            status = "200 OK"
            search_term = result[b"query"][0].decode()
            if search_term is None or search_term == "":
                return noterm()
            return format(start_response, search_term, max,
                          query(search_term, max), best)
        elif mtype == "text/plain":
            try:
                if "charset" not in charsettuple:
                    charset = "UTF-8"
                else:
                    charset = charsettuple["charset"]
                search_term = body.decode(charset)
                if search_term is None or search_term == "":
                    return noterm()
            except LookupError:
                return wrongcharset(start_response, charsettuple["charset"])
            return format(start_response, search_term, max,
                          query(search_term, max), best)
        elif mtype == "application/json":
            try:
                result = json.loads(body.decode())
            except json.decoder.JSONDecodeError:
                return wrongsyntax(start_response, mtype, body.decode())
            if "query" not in result:
                status = "422 Missing query"
                output = """
                 Missing parameter "query" in "%s"
"""  % (str(result))
                return pack(start_response, status, output)
            if "limit" in result:
                max = result["limit"]
            status = "200 OK"
            search_term = result["query"]
            if search_term is None or search_term == "":
                return noterm()
            return format(start_response, search_term, max,
                          query(search_term, max), best)
        else:
            raise Exception("Should never happen")
    else:
        return unsup(start_response, environ["REQUEST_METHOD"])

# Initialize
locale.setlocale(locale.LC_ALL, LOCALE)
conn = psycopg2.connect("dbname=blog")
cursor = conn.cursor()
cursor.execute("SET client_encoding TO 'UTF-8'")

if __name__ == "__main__":
    import wsgiref.simple_server as server
    PORT = 8081
    PATH = "/"
    httpd = server.make_server("", PORT, application)
    print("Serving HTTP on port %i..." % PORT)
    print("You can test, for instance, with 'curl -X QUERY -d query=foobar localhost:%s/'." % PORT)
    print("See more examples at the beginning of the source code.")
    # Respond to requests until process is killed
    httpd.serve_forever()

