//! VEXPR.CPP   (25/07/2020)
//. J.L.Villar
//. Math expresion interpreter and evaluator.
//. The evaluation is performed using an operand stack and an execurion stack, as in RPN notation.
//
// History:
//
// [08/03/2019] History tracking starts.
// [24/02/2020] Error messages are made more informative.
// [25/07/2020] Added comparison functions gt, lt, eq, ge, le, ne.
// [31/07/2020] The operator parsing mechanism has been changed to allow multiletter operators.
//              The comparison functions were removed and replaced by the standard operators.
//              Also logical operators &&, || and ! were added, and the corresponding precedence
//              levels were also added.
//              Unary operators are now also fetched in the right operand of binary operators
//              expressions, like 2+-3, 3*-2 or x>y && !y>x.
//              Operator levels are now stored into the operator objects by the constructor.

extern "C" {
	#include <stdlib.h>
	#include <time.h>
	#include <math.h>
	#include <string.h>
	#include <stdio.h>
	#include <ctype.h>
}
#include "stack.h"
#include "hash.h"
#include "vexpr.h"

const int errbuflen=32;
char errbuf[errbuflen+1];
const char *subject;

static void error(const char *p1,const char *p2=0) {
	if (p2) {
		strncpy(errbuf,p2,errbuflen);
		strcpy(errbuf+errbuflen-3,"...");
		throw x__VExpr(p1,errbuf);
	}
	throw x__VExpr(p1);
}

//: Random number generation and initialization.

static double vvrand() {return (double(rand())+1)/(double(RAND_MAX)+2);}

static class randinitializer{
	static bool isinit;
public:
	randinitializer() {
		if (!isinit) srand(time(NULL));
		isinit = true;
	}
} rinit;

bool randinitializer::isinit = false;

//: Operator precedence levels
enum {L_MIN = 0,L_EOS = L_MIN,L_INIT,L_OR,L_AND,L_NEG,L_CMP,L_ADD,L_SUB,L_MUL,L_MOD,L_DIV,L_POW,L_ATOMIC,L_MAX = L_ATOMIC};

static const char *ch_sep = "+-*%/^<>=!()&|, \n\r\t\v"; // whitespaces, operator starting chars and group delimiters
static const char *ch_skip = " \t\n\r\v"; // whitespaces
static const char *ch_op1 = "+-!"; // unary operators
static const char *ch_op2 = "+-*%/^<>=!&|"; // binary operators (starting chars)
static const char *ch_assignop = "+-*%/^"; // assignment oerators (single char)

//: RPN stack item object

class RPNAtom {
public:
	virtual void operator () () const = 0;
	virtual int level() const = 0;
//	virtual void write() const = 0;
	virtual void clear() {};
	virtual ~RPNAtom() {clear();}; //this must be repeated in every derived class!!
};

//: RPN operand stack

DynStack<double> opstk("RPN Operand Stack");

//: RPN operator stack

DynStack<const RPNAtom *> rpnstk("RPN Exec Stack");

//: Math variable object

class MathVar : public RPNAtom {
public:
//	char *name;
	mutable double value;
	int level() const {return L_ATOMIC;}
	MathVar(double v = 0) : value(v) {}
//	MathVar(const char *s,double v = 0) {
//		name = new char[strlen(s)+1];
//		strcpy(name,s);
//		value = v;
//	}
	MathVar(const MathVar &m) {
//		name = new char[strlen(m.name)+1];
//		strcpy(name,m.name);
		value = m.value;
	}
	const MathVar & operator = (const MathVar &m) {
//		if (name) delete[] name;
//		name = new char[strlen(m.name)+1];
//		strcpy(name,m.name);
		value = m.value;
		return *this;
	}
	void operator () () const {opstk.push(value);}
	~MathVar() {clear();}
	void clear() {
//		if (name) delete[] name;
//		name = 0;
	}
//	void write() const {printf("%s ",name);}
private:
//	MathVar() : name(0) {}
};

//. RPN stack mark static objects
static class MathNull : public RPNAtom {
public:
	int level() const {return L_INIT;}
	void operator() () const {error("A syntax error is causing the execution of a non-executable separator (MathNull)",subject);}
//	void write() const {printf("<NULL>");}
} ArgSep,ClPar,Eos;

class MathConst : public RPNAtom {
public:
	const char *name;
	double value;
	MathConst(const char *n = 0,double v = 0) : name(n), value(v) {}
	int level() const {return L_ATOMIC;}
	void operator() () const {opstk.push(value);}
//	void write() const {printf("%s ",name);}
};

class MathFunc0 : public RPNAtom {
public:
	const char *name;
	double (*func)();
	MathFunc0(const char *n = 0,double (*f)() = 0) : name(n), func(f) {}
	int level() const {return L_ATOMIC;}
	void operator() () const {opstk.push(func());}
//	void write() const {printf("%s ",name);}
};

class MathFunc1 : public RPNAtom {
public:
	const char *name;
	double (*func)(double);
	MathFunc1(const char *n = 0,double (*f)(double) = 0) : name(n), func(f) {}
	int level() const {return L_ATOMIC;}
	void operator() () const {
		double x = opstk.pop();
		opstk.push(func(x));
	}
//	void write() const {printf("%s ",name);}
};

class MathOp1 : public MathFunc1 {
public:
	int oplevel;
	MathOp1(const char *n = 0,double (*f)(double) = 0,int l = L_ATOMIC) : MathFunc1(n,f), oplevel(l) {}
	int level() const {return oplevel;}
//	void write() const {printf("%s_1 ",name);}
};

class MathFunc2 : public RPNAtom {
public:
	const char *name;
	double (*func)(double,double);
	MathFunc2(const char *n = 0,double (*f)(double,double) = 0) : name(n), func(f) {}
	int level() const {return L_ATOMIC;}
	void operator() () const {
		double y = opstk.pop();
		double x = opstk.pop();
		opstk.push(func(x,y));
	}
//	void write() const {printf("%s ",name);}
};

class MathOp2 : public MathFunc2 {
public:
	int oplevel;
	MathOp2(const char *n = 0,double (*f)(double,double) = 0,int l = L_ATOMIC) : MathFunc2(n,f), oplevel(l) {}
	int level() const {return oplevel;}
};

static double frac(double x) {return fmod(x,1);}
static double fsgn(double x) {return x >= 0 ? 1 : -1;}
static double sqr(double x) {return x*x;}
static double heaviside(double x) {return (x >= 0) ? 1 : 0;}
static double plus(double x) {return x;}
static double minus(double x) {return -x;}
static double neg(double x) {return x?0:1;}
static double add(double x,double y) {return x+y;}
static double sub(double x,double y) {return x-y;}
static double mul(double x,double y) {return x*y;}
static double mod(double x,double y) {return fmod(x,y);}
static double div(double x,double y) {return x/y;}
static double max(double x,double y) {return x>y?x:y;}
static double min(double x,double y) {return x>y?y:x;}
static double gt(double x,double y) {return x>y?1:0;}
static double lt(double x,double y) {return x<y?1:0;}
static double ge(double x,double y) {return x>=y?1:0;}
static double le(double x,double y) {return x<=y?1:0;}
static double eq(double x,double y) {return x==y?1:0;}
static double ne(double x,double y) {return x!=y?1:0;}
static double op_and(double x,double y) {return x&&y?1:0;}
static double op_or(double x,double y) {return x||y?1:0;}

static MathConst theconstarray[] = {
	MathConst("pi",M_PI),
	MathConst("e",M_E),
	MathConst()
};

static MathFunc0 thefn0array[] = {
	MathFunc0("rand",vvrand),
	MathFunc0()
};

static MathFunc1 thefn1array[] = {
	MathFunc1("abs",fabs),MathFunc1("acos",acos),MathFunc1("asin",asin),
	MathFunc1("atan",atan),MathFunc1("ceil",ceil),MathFunc1("ch",cosh),
	MathFunc1("cos",cos),MathFunc1("exp",exp),MathFunc1("floor",floor),
	MathFunc1("frac",frac),MathFunc1("log",log),MathFunc1("log10",log10),
	MathFunc1("sh",sinh),MathFunc1("sin",sin),MathFunc1("sqr",sqr),
	MathFunc1("sqrt",sqrt),MathFunc1("sgn",fsgn),MathFunc1("tan",tan),
	MathFunc1("th",tanh),MathFunc1("u",heaviside),
//	MathFunc1("+",plus),MathFunc1("-",minus),
	MathFunc1()
};

static MathOp1 theop1array[] = {
	MathOp1("+",plus,L_ADD),MathOp1("-",minus,L_SUB),MathOp1("!",neg,L_NEG),
	MathOp1()
};

static MathFunc2 thefn2array[] = {
	MathFunc2("atan2",atan2),MathFunc2("hypot",hypot),MathFunc2("pow",pow),
//	MathFunc2("+",add),MathFunc2("-",sub),MathFunc2("*",mul),
//	MathFunc2("%",mod),MathFunc2("/",div),MathFunc2("^",pow),
	MathFunc2("max",max),MathFunc2("min",min),
//	MathFunc2("gt",gt),MathFunc2("lt",lt),
//	MathFunc2("ge",ge),MathFunc2("le",le),
//	MathFunc2("eq",eq),MathFunc2("ne",ne),
	MathFunc2("mod",fmod),
	MathFunc2()
};

static MathOp2 theop2array[] = {
	MathOp2("&&",op_and,L_AND),MathOp2("||",op_or,L_OR),
	MathOp2("<",lt,L_CMP),MathOp2(">",gt,L_CMP),
	MathOp2("<=",le,L_CMP),MathOp2(">=",ge,L_CMP),
	MathOp2("==",eq,L_CMP),MathOp2("!=",ne,L_CMP),
	MathOp2("+",add,L_ADD),MathOp2("-",sub,L_SUB),MathOp2("*",mul,L_MUL),
	MathOp2("%",mod,L_MOD),MathOp2("/",div,L_DIV),MathOp2("^",pow,L_POW),
	MathOp2()
};

//: RPN object tables

class NamedMathObj : public BasicId {
	bool owns_object;
// owns_object = false: do not duplicate or delete objects (just copy the pointers) (this is for static objects containing the ID string).
// owns_object = true: duplicate the ID and delete everything (name buffer can now be reused, and *pa will be deleted by the destructor).
// fill() can modify ownership of the object pointed by pa, but not of the underlying ID!!!
// Constructor call    fill() call               Result
//  own = true          own = true      ID buffer is duplicated. ID and *pa will be deleted by the destructor
//  own = true          own = false     ID buffer is duplicated. The destructor will only delete the ID buffer
//  own = false         own = true      ID buffer is neither duplicated nor deleted by the destructor, but it still deletes *pa.
//  own = false         own = false     Nothing is duplicated nor deleted by the destructor (DEFAULT BEHAVIOR).
        RPNAtom *pa;
public:
        NamedMathObj(const char *name,bool own = false) : BasicId(name,own), pa(0), owns_object(own) {
//printf("\nCreating NamedMathObj object associated to \"%s\" at %p (%s)\n",operator const char *(),this,own?"owns objects":"does not own objects");fflush(stdout);
        }
        ~NamedMathObj() {
//printf("\nDestroying NamedMathObj object associated to \"%s\" at %p (%s)\n",operator const char *(),this,owns_object?"owns object":"does not own object");fflush(stdout);
                if (owns_object && pa) delete pa;
		pa = 0;
        }
        void fill(RPNAtom *_pa, bool own = false) { // True if *_pa must be deleted by the destructor. 
//printf("\nTransferring RPNAtom object (%p) associated to \"%s\" into %p (%s)\n",_pa,operator const char *(),this,own?"object is owned":"object is not owned");fflush(stdout);
                if (pa) error("Attempting to set twice a NamedMathObj object",subject);
		pa = _pa;
		owns_object = own;
        }
	void operator () () const {pa->operator () ();} // Never call it before using fill()!!!
	int level() const {return pa->level();} // Never call it before using fill()!!!
//	void write() const {pa->write();} // Never call it before using fill()!!!
	void clear() {pa->clear();} // Never call it before using fill()!!!
	RPNAtom *getobjpointer() {return pa;}
};

//: Math object tables initializer

template <class T>
static void fill_math_table(HashTable<NamedMathObj,BasicId> &ht,T t[]) {
	for (int i = 0; t[i].name; i++) {
		NamedMathObj *po = ht.insert(BasicId(t[i].name),false); // Does not own the ID buffer
		po->fill(&(t[i]),false); // Object ownership not transferred to ht!!!
	}
}

HashTable<NamedMathObj,BasicId> ConstTable(16);
HashTable<NamedMathObj,BasicId> Fn0Table(16);
HashTable<NamedMathObj,BasicId> Fn1Table(64);
HashTable<NamedMathObj,BasicId> Fn2Table(32);
HashTable<NamedMathObj,BasicId> Op1Table(16);
HashTable<NamedMathObj,BasicId> Op2Table(16);
HashTable<NamedMathObj,BasicId> VarTable(256);

static class MathInitializer{
	static bool inited;
public:
	MathInitializer() {
		if (!inited) {
			inited = true;
			fill_math_table(ConstTable,theconstarray);
			fill_math_table(Fn0Table,thefn0array);
			fill_math_table(Fn1Table,thefn1array);
			fill_math_table(Fn2Table,thefn2array);
			fill_math_table(Op1Table,theop1array);
			fill_math_table(Op2Table,theop2array);
		}
	}
} mathinit;

bool MathInitializer::inited = false;

//: Look at table functions

MathConst *findconst(const BasicId &id) {
	NamedMathObj *p = ConstTable.find(id);
	return p ? (MathConst *)(p->getobjpointer()) : 0;
}

MathFunc0 *findfn0(const BasicId &id) {
	NamedMathObj *p = Fn0Table.find(id);
	return p ? (MathFunc0 *)(p->getobjpointer()) : 0;
}

MathFunc1 *findfn1(const BasicId &id) {
	NamedMathObj *p = Fn1Table.find(id);
	return p ? (MathFunc1 *)(p->getobjpointer()) : 0;
}

MathFunc2 *findfn2(const BasicId &id) {
	NamedMathObj *p = Fn2Table.find(id);
	return p ? (MathFunc2 *)(p->getobjpointer()) : 0;
}

MathOp1 *findop1(const BasicId &id) {
	NamedMathObj *p = Op1Table.find(id);
	return p ? (MathOp1 *)(p->getobjpointer()) : 0;
}

MathOp2 *findop2(const BasicId &id) {
	NamedMathObj *p = Op2Table.find(id);
	return p ? (MathOp2 *)(p->getobjpointer()) : 0;
}

MathVar *findvar(const BasicId &id) {
	NamedMathObj *p = VarTable.find(id);
	return p ? (MathVar *)(p->getobjpointer()) : 0;
}

MathVar *definevar(const BasicId &id, double v) {
//printf("Defining math variable %s\n",(const char *) id);fflush(stdout);
	NamedMathObj *p = VarTable.insert(id,true); // Duplicate ID buffer on actual insertion 
	if (!p->getobjpointer()) {
//printf("Inserting a new MathVar object %s\n",(const char *) id);fflush(stdout);
		p->fill(new MathVar,true); // Object is owned by *p
	}
	MathVar *pv = (MathVar *)(p->getobjpointer());
	pv->value = v;
	return pv;
}

//: Parser functions

// Parser buffers and pointers
const static char *buf;
const static char *str;

// Skipping whitespaces
static int skip() {
	int i = strspn(str,ch_skip);
	str += i;
	return i;
}

static char *skip(char *&rs) {
	if (rs) {
		int i = strspn(rs,ch_skip);
		rs += i;
	}
	return rs;
}

// Parsing numeric constants
static bool getnumber(double &x) {
	const char *oldstr = str;
	char *p;
	x = strtod(str,&p);
	str = p;
	if (str > oldstr)
		if (!*str || strchr(ch_sep,*str)) return true;
		else error("Bad number termination: separator needed",str);
	str = oldstr;
	return false;
}

// Auxiliary buffer for function, operator, constant and variable names.
enum {MAXNAMELEN = 64};
static char namebuf[MAXNAMELEN + 1]; // STATIC SIZE LIMITATION

// Parsing identifiers
// Identifiers longer than MAXNAMELEN are truncated (without causing any error)
static bool getname(char *namebuf) {
	int j = 0;
	const char *initialstr=str;
	for (;;) {
		char c = *str++;
		if (!isalpha(c) && !isdigit(c) && c != '_' && c != '@') break;
		if (j < MAXNAMELEN) namebuf[j++] = c;
	}
	--str;
	namebuf[j]=0;
	if (!j) error("Identifier expected",initialstr);
	if (*str && !strchr(ch_sep,*str)) error("Illegal character in identifier",initialstr);
	return true;
}

//: The two flushto() functions implement the precedence of the binary operators
//. and the parsing of function arguments and parenthesized expressions.
//. Flushing to a level or mark object is performing the execution of the RPN stack
//. until the level is reached or the mark object is found.
void flushto(int level) {
	try {
		for (;;) {
			const RPNAtom &at = *rpnstk.pop();
			if (at.level() < level) {
				const RPNAtom *pat=&at;
				rpnstk.push(pat);
				break;
			}
//			at.write();
			at();
		}
	} catch (x_underflow) {
		error("RPN Stack is corrupted (RPN stack underflow)",subject);
	} catch (x_overflow) {
		error("RPN Stack exhausted (RPN stack overflow)",subject);
	}
}

void flushto(const RPNAtom &a) {
	try {
		for (;;) {
			const RPNAtom &at = *rpnstk.pop();
			if (&at == &a) break;
//			at.write();
			at();
		}
	} catch (x_underflow) {
		error("RPN Stack is corrupted (RPN stack underflow)",subject);
	} catch (x_overflow) {
		error("RPN Stack exhausted (RPN stack overflow)",subject);
	}
}

//. Parse unary operator and return the corresponding object
const RPNAtom *getop1() {
	namebuf[0] = *str++; // all unary operators are single-letter
	namebuf[1] = 0;
	return findop1(namebuf);
}

//. Parse unary operator and return the corresponding object
const RPNAtom *getop2() {
	switch (namebuf[0] = *str++) {
	case '<': // operators < and <=
	case '>': // operators > and >=
		if (*str == '=') {
			namebuf[1] = *str++;
			namebuf[2] = 0;
		} else namebuf[1] = 0;
		break;
	case '!': // operator !=
	case '=': // operator ==
		if (*str == '=') {
			namebuf[1] = *str++;
			namebuf[2] = 0;
		} else {
			namebuf[1] = 0;
			return 0;
		}
		break;
	case '|': // operator ||
	case '&': // operator &&
		if (*str == namebuf[0]) {
			namebuf[1] = *str++;
			namebuf[2] = 0;
		} else {
			namebuf[1] = 0;
			return 0;
		}
		break;
	default: namebuf[1] = 0; // other sigle-letter operators
	}
	return findop2(namebuf);
}

//. Read a token form the expression buffer and partially evaluate
//. the contents of the stacks when necessary, via the flush() function.
//. The boolean isbeg indicates whether the parser is at the beginning 
//. of a group (full expression, parenthesis or function argument). This is necessary
//. to detect unary operators. Also unary operators are fetched in theright argument
//. in a binary operator expression (like 2+-2 or 2*-1 or x<y && !x>y).
//. The return value is true when there exist more tokens to be parsed.
//. Some special atoms mark groups in the RPN stack (ClPar, ArgSep, Eos).
bool token(bool initbeg = false) {
	static bool isbeg = true;
	if (initbeg) isbeg = true;
	skip();
	if (!*str) {
		flushto(Eos);
		isbeg = false;
		return false;
	}
	if (isbeg && strchr(ch_op1,*str)) {
		const RPNAtom *pat=getop1();
		if (!pat) error("INTERNAL: unimplemented unary operator",namebuf);
		rpnstk.push(pat);
		isbeg = false;
		return true;
	}
	isbeg = false;
	if (strchr(ch_op2,*str)) {
		const RPNAtom *pat=getop2();
		if (!pat) error("INTERNAL: unimplemented binary operator",namebuf);
		flushto(pat->level());
		rpnstk.push(pat);
		isbeg = true;
		return true;
	}
	if (*str == '(') {
		str++;
		const RPNAtom *pat=&ClPar;
		rpnstk.push(pat);
		isbeg = true;
		return true; 
	}
	const RPNAtom *at = 0;
	double x;
	if (getnumber(x)) {
		flushto(L_ATOMIC);
//		printf("%g ",x);
		opstk.push(x);
		return true; 
	}
	if (*str == ')') {
		str++;
		flushto(ClPar);
		return true;
	}
	if (*str == ',') {
		str++;
		flushto(ArgSep);
		isbeg = true;
		return true;
	}
	if (!getname(namebuf)) error("INTERNAL: unhandled error condition (missing name)",namebuf);
	BasicId id = BasicId(namebuf);
	skip();
	if (*str == '(') {
		str++;
		at = findfn0(id);
		if (at) {
			rpnstk.push(at);
			skip();
			if (*str++ != ')') error("Syntax error: missing ')' in function with zero arguments",str);
			return true; 
		}
		at = findfn1(id);
		if (at) {
			rpnstk.push(at);
			const RPNAtom *pat=&ClPar;
			rpnstk.push(pat);
			isbeg = true;
			return true; 
		}
		at = findfn2(id);
		if (at) {
			rpnstk.push(at);
			const RPNAtom *pat=&ClPar;
			rpnstk.push(pat);
			pat=&ArgSep;
			rpnstk.push(pat);
			isbeg = true;
			return true; 
		}
		error("Undefined function",namebuf);
		return false;
	}
	at = findconst(id);
	if (at) {
		flushto(L_ATOMIC);
//		at->write();
		(*at)();
		return true; 
	}
	at = findvar(id);
	if (at) {
		flushto(L_ATOMIC);
//		at->write();
		(*at)();
		return true; 
	}
	if (!at) error("Undefined variable or constant name",namebuf);
	return false;
}

//. Perform the variable assignment.
//. The variable name can contain a binary operator in the end.
//. In this case, the variable must be already created.
double decl_var(const char *n,double v) {
	if (!n) error("Null string in lvalue",subject);
	if (!*n) error("Empty string in lvalue",subject);
	buf = str = n;
	skip();
	if (isalpha(*str) || *str == '_' || *str == '@') {
		if (getname(namebuf)) {
			skip();
			if (!*str) {
				definevar(BasicId(namebuf),v);
				return v;
			} else if (strchr(ch_assignop,*str)) {
				MathVar *pv = findvar(BasicId(namebuf));
				if (!pv) error("Undefined variable in assignment expression",namebuf);
				if (str[1]) error("Syntax error in assignment expression",n);
				MathOp2 *po = findop2(BasicId(str));
				if (!po) error("INTERNAL: Unimplemented assignment operator",str);
				return pv->value = po->func(pv->value,v);
			}
		}
	}
	error("Bad lvalue syntax",n);
}

//. Perform the expression evaluation.
//. Parsing and evaluation are done at once by means
//. of the two stacks (operand and RPN stacks).
//. In the end the RPN stack must be empty and 
//. the operand stack must contain a single element
//. that is the result of the evaluation.
double eval_expr(const char *s) {
	if (!s) error("No expression to evaluate (null string)");
	buf = str = s;
	double res = 0;
	opstk.flush();
	rpnstk.flush();
	try {
		const RPNAtom *pat=&Eos;
		rpnstk.push(pat);
		if (token(true)) while(token());
		res = opstk.pop();
	} catch (x_underflow) {
		error("Syntax error: to few operands in the stack",s);
	} catch (x_overflow) {
		error("RPN Stack exhausted (RPN stack overflow)",s);
	}
	if (!opstk.empty()) error("Syntax error: unused operands in the stack",s);
	if (!rpnstk.empty()) error("Syntax error: RPN stack is corrupted",s);
	return res;
}

//. Process the assignment and the evaluation parts separately.
double process_assignment(char *s) {
	skip(s);
	if (!s || !*s) return 0;
	char *p = strchr(s,'=');
	if (!p) return eval_expr(s);
	if (p != s && strchr("!<>",p[-1])) return eval_expr(s);
        if (p[1] == '=') return eval_expr(s);
	*p++ = 0;
	double v = process_assignment(p);
	v = decl_var(s,v);
	return v;
}

//. Evaluate a VEXPR query
//. The evaluator works on a copy of the query string.
double vexpr(const char *q) {
	if (!q) return 0;
	subject=q;
	char *p=0;
	try {
		p = new char[strlen(q)+1];
		strcpy(p,q);
		double v = process_assignment(p);
		if (p) delete[] p;
		p = 0;
		return v;
	} catch (...) {
		if (p) delete[] p;
		p = 0;
		throw;
	}
	return 0;
}

//@
