#! python

import subprocess
import sys

def compare (input, pairs):
	dimensions = ['rtb_type', 'participation']
	metrics = ['ecpm_avg', 'bid_avg', 'paid_avg']

	template = ','.join (['$' + name for name in dimensions] + ['(sum(if($host = "{0}", $' + name + ')) - sum(if($host = "{1}", $' + name + '))) / sum(if($host = "{1}", $' + name + '))' for name in metrics]).strip ()
	values = []

	for (candidate, origin) in pairs:
		expression = template.format (candidate, origin)
		process = subprocess.Popen (['./tesca', '-i', 'csv:headers', '-o', 'csv', '-e', expression], stdin = subprocess.PIPE, stdout = subprocess.PIPE)

		for line in input:
			process.stdin.write (line)

		out, err = process.communicate ()
		out = out.strip ()

		if process.returncode <> 0 or len (out) == 0:
			return []

		for line in out.split ('\n'):
			fields = line.strip ().split (',')

			fields_dimensions = fields[0:len (dimensions)]
			fields_metrics = fields[len (dimensions):]

			values.extend ([(candidate, origin, fields_dimensions, key, float (value)) for (key, value) in zip (metrics, fields_metrics) if len (value) > 0])

	return values

def prepare (input):
	expression = '''
		$5:						$rtb_type,
		$11:					$participation,
		case(slice($2,-2),
			".1",	"test1",
			".2",	"test2",
			".4",	"ref1",
			".5",	"ref2",
					"prod"):	$host,
		avg(if($8 >= 0, $8)):	$ecpm_avg,
		avg(if($9 >= 0, $9)):	$bid_avg,
		avg(if($10 >= 0, $10)):	$paid_avg
	'''

	process = subprocess.Popen (['./tesca', '-o', 'csv:headers', '-e', expression], stdin = subprocess.PIPE, stdout = subprocess.PIPE)

	for line in input:
		process.stdin.write (line)

	out, err = process.communicate ()
	out = out.strip ()

	if process.returncode <> 0:
		return False

	return out

#source = prepare (sys.stdin)
#open ('.tesca-data', 'wb').write (source)
source = open ('.tesca-data', 'rb')

values = compare (source, [('ref1', 'ref2')])
values = filter (lambda metric: metric[4] > 0.1, values)

print str (values)
