<?php

/*
 * Class which repairs queries.
 *
 * BNF:
 *
 *   Query  ::= ( Clause )*
 *   Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
 *
 * See: https://github.com/apache/lucene-solr/blob/ab793e7ab568b40e8a8621f1ce526c0766582640/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj
 *
 * Another useful resource: https://github.com/polyfractal/elasticsearch-inquisitor/blob/005f8190390ca31a626d0133703efa8888e03d79/_site/js/vendor/lucene/lucene-query.grammar
 */
class Crossroads_Elasticsearch_Model_QueryParser {
    const WHITESPACE = " \t\v\n\r\f";
    const RESERVED   = ": \t\r\n\f\\()\"+-/^~[]";
    const NUMBER     = "0123456789.";

    protected $origBuffer = "";
    protected $buffer     = "";

    public function __construct($str = "") {
        $this->setBuffer($str);
    }

    public function setBuffer($str) {
        $this->origBuffer = $str;
        $this->buffer     = $str;
        $this->stack      = [];
    }

    public function repair($str) {
        $buf = [];

        $this->setBuffer($str);

        while($t = $this->repairNode()) {
            $buf[] = $t;
        }

        return implode("", $buf);
    }

    public function repairNode() {
        if($w = $this->takeWhile(self::WHITESPACE)) {
            return " ";
        }

        if($op = $this->tokenOperator()) {
            if($op !== "NOT") {
                return "\\".$op;
            }

            $exp = $this->repairGroupExp();

            if($exp) {
                return $op.$exp;
            }

            return "\\".$op;
        }

        $left = $this->repairGroupExp().$this->takeWhile(self::WHITESPACE);

        $op = $this->tokenOperator();

        if($op) {
            $op   .= $this->takeWhile(self::WHITESPACE);
            $right = $this->repairGroupExp();

            if($right) {
                return $left.$op.$right;
            }

            return $left."\\".$op;
        }

        return $left;
    }

    protected function repairGroupExp() {
        $start = $this->takeWhile(self::WHITESPACE);

        if($paren = $this->repairParenExp()) {
            return $start.$paren;
        }

        return $start.$this->repairFieldExp();
    }

    protected function repairParenExp() {
        $this->takeWhile(self::WHITESPACE);

        if($this->peek() !== "(") {
            return false;
        }

        $open = $this->next();

        $inner = $this->repairNode();

        if($this->peek() === ")") {
            return "($inner)";
        }

        return "\\(".$inner;
    }

    protected function repairFieldExp() {
        $this->takeWhile(self::WHITESPACE);

        $name = $this->tokenFieldName();

        if($name) {
            $range = $this->repairRangeOperatorExp();

            if($range) {
                return $name.":".$range;
            }

            $group = $this->repairParenExp();

            if($group) {
                return $name.":".$group;
            }

            $token = $this->repairTerm();

            return $token ? $name.":".$token : $name."\\:";
        }

        $range = $this->repairRangeOperatorExp();

        if($range) {
            return $range;
        }

        return $this->repairTerm();
    }

    protected function repairRangeOperatorExp() {
        $this->takeWhile(self::WHITESPACE);

        if( ! in_array($this->peek(), ["[", "{"], true)) {
            return false;
        }

        $open = $this->next();
        $min  = $this->takeWhileNot(self::RESERVED);

        $this->takeWhile(self::WHITESPACE);

        $t = $this->takeToken("TO");

        if( ! $t) {
            return "\\".$open.$min;
        }

        $this->takeWhile(self::WHITESPACE);

        $max = $this->takeWhileNot(self::RESERVED);

        if( ! $max) {
            return "\\".$open.$min;
        }

        $this->takeWhile(self::WHITESPACE);

        if(in_array($this->peek(), ["]", "}"], true)) {
            return $open.$min." TO ".$max.$this->next();
        }

        return "\\".$open." TO ".$max;
    }

    protected function repairTerm() {
        $this->takeWhile(self::WHITESPACE);

        $op = $this->repairPrefixOperator();

        if($str = $this->repairQuotedTerm()) {
            $mod   = $this->repairProximityModifier();
            $boost = $this->repairBoostModifier();

            return $op.$str.$mod.$boost;
        }

        $str   = $this->repairUnquotedTerm();
        $mod   = $this->repairFuzzyModifier();
        $boost = $this->repairBoostModifier();

        if($str) {
            return $op.$str.($mod ?: "").($boost ?: "");
        }

        // Escape them if they are not already escaped
        return ($op    && strpos($op,    "\\") === false ? "\\$op"    : $op).
               ($mod   && strpos($mod,   "\\") === false ? "\\$mod"   : $mod).
               ($boost && strpos($boost, "\\") === false ? "\\$boost" : $boost);
    }

    protected function repairPrefixOperator() {
        switch($this->peek()) {
        case "-":
        case "+":
            return $this->next();
        }

        return "";
    }

    protected function repairQuotedTerm() {
        $this->takeWhile(self::WHITESPACE);

        if($this->peek() !== "\"") {
            return false;
        }

        $this->next();

        $str = $this->takeWhileNot("\"");

        if($this->peek() === "\"") {
            $this->next();
        }

        return "\"".$str."\"";
    }

    protected function repairUnquotedTerm() {
        if($str = $this->takeWhileNot(self::RESERVED)) {
            return $str;
        }

        if($t = $this->next()) {
            // Reserved token which does not make sense, escape
            return "\\".$t;
        }

        return "";
    }

    protected function repairFuzzyModifier() {
        $exp = $this->takeTokenWs("~", self::RESERVED . self::NUMBER);

        if( ! $exp) {
            return "";
        }

        if($this->takeTokenWs("0.", self::NUMBER) && ($n = $this->takeWhile("0123456789"))) {
            $exp .= "0.".$n;
        }
        elseif($n = $this->takeWhile("0123456789")) {
            $exp .= $n;
        }

        // A fuzzy modifier needs to be followed by whitespace or EOF
        if(strlen($this->buffer) === 0) {
            return $exp;
        }

        if(strspn($this->buffer, self::WHITESPACE) > 0) {
            return $exp;
        }

        // Failed, we have to escape it
        return "\\".$exp;
    }

    protected function repairProximityModifier() {
        $num = "";
        $exp = $this->takeTokenWs("~", self::RESERVED . self::NUMBER);

        if( ! $exp) {
            return "";
        }

        $exp .= $this->takeWhile(self::WHITESPACE);

        $num = $this->takeWhile("0123456789");

        if( ! $num) {
            // Proximity requires a number
            return str_replace($exp, "~", "\~");
        }

        // A fuzzy modifier needs to be followed by whitespace or EOF
        if(strlen($this->buffer) === 0) {
            return $exp;
        }

        if(strspn($this->buffer, self::WHITESPACE) > 0) {
            return $exp;
        }

        // Failed, we have to escape it
        return "\\".$exp;
    }

    protected function repairBoostModifier() {
        $num = "";
        $exp = $this->takeTokenWs("^", self::RESERVED . self::NUMBER);

        if( ! $exp) {
            return "";
        }

        if($this->takeTokenWs("0.") && ($n = $this->takeWhile("0123456789"))) {
            $num = "0.".$n;
        }
        elseif($n = $this->takeWhile("0123456789")) {
            $num = "".$n;
        }

        if( ! $num) {
            return "\\^";
        }

        if(strlen($this->buffer) === 0) {
            return $exp;
        }

        if(strspn($this->buffer, self::WHITESPACE) > 0) {
            return $exp." ";
        }

        // Failed, we have to escape it
        return "\\".$exp;
    }

    protected function tokenFieldname() {
        $i = strpos($this->buffer, ":");

        if($i === false) {
            return false;
        }

        // $j < $i, always, since ":" is part of RESERVED
        $j = strspn($this->buffer, self::RESERVED);

        // unquotedTerm[WHITESPACE]:
        if(strspn($this->buffer, self::WHITESPACE, $j, $i - $j - 1) > 0) {
            $t = substr($this->buffer, 0, $j);

            $this->buffer = substr($this->buffer, $i);

            return $t;
        }

        return false;
    }

    protected function tokenOperator() {
        if($t = $this->takeTokenWs("NOT")) {
            return $t;
        }

        if(($t = $this->takeTokenWs("AND")) ||
           ($t = $this->takeTokenWs("&&"))) {
            return $t;
        }

        if(($t = $this->takeTokenWs("||")) ||
           ($t = $this->takeTokenWs("OR"))) {
           return $t;
        }

        return false;
    }

    protected function takeToken($token, $terminators = self::RESERVED) {
        $l = strlen($token);
        $s = substr($this->buffer, 0, $l + 1);
        $t = substr($s, 0, $l);
        $e = substr($s, $l);

        // Token must match exactly
        if($t !== $token) {
            return false;
        }

        if(strlen($e) > 0 && ! strspn($e, $terminators)) {
            return false;
        }

        return $this->next($l);
    }

    protected function takeTokenWs($token, $terminators = self::RESERVED) {
        $wsL = strspn($this->buffer, self::WHITESPACE);
        $l = strlen($token);
        $s = substr($this->buffer, $wsL, $l + 1);
        $t = substr($s, 0, $l);
        $e = substr($s, $l);

        // Token must match exactly
        if($t !== $token) {
            return false;
        }

        if(strlen($e) > 0 && ! strspn($e, $terminators)) {
            return false;
        }

        return $this->next($wsL + $l);
    }

    protected function peek($i = 1) {
        if(strlen($this->buffer) < $i) {
            return false;
        }

        return substr($this->buffer, 0, $i);
    }

    protected function next($i = 1) {
        if(strlen($this->buffer) < $i) {
            return false;
        }

        $t = substr($this->buffer, 0, $i);

        $this->buffer = substr($this->buffer, $i);

        return $t;
    }

    protected function takeWhile($set) {
        $tokenLen = strspn($this->buffer, $set);
        $token    = substr($this->buffer, 0, $tokenLen);

        $this->buffer = substr($this->buffer, $tokenLen);

        return $token;
    }

    protected function takeWhileNot($set) {
        $tokenLen = strcspn($this->buffer, $set);
        $token    = substr($this->buffer, 0, $tokenLen);

        $this->buffer = substr($this->buffer, $tokenLen);

        return $token;
    }
}
