//=============================================================================
//
//   Copyright (c) 2006-2007, Carnegie Mellon University.  
//   All rights reserved.
//
//   Redistribution and use in source and binary forms, with or without
//   modification, are permitted provided that the following conditions
//   are met:
//
//   1. Redistributions of source code must retain the above copyright
//      notice, this list of conditions and the following disclaimer. 
//
//   2. Redistributions in binary form must reproduce the above copyright
//      notice, this list of conditions and the following disclaimer in
//      the documentation and/or other materials provided with the
//      distribution.
//
//   This work was supported in part by funding from the Defense Advanced 
//   Research Projects Agency and the National Science Foundation of the 
//   United States of America, and the CMU Sphinx Speech Consortium.
//
//   THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
//   ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
//   THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
//   PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
//   NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
//   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
//   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
//   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
//   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
//   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
//   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
//=============================================================================
 
//-----------------------------------------------------------------------------
// 
//	LIVEMODE.CPP  -  Functions maintaining the state of the server when
//                   running in live mode and getting partial and final
//                   recognition results from the engines.
// 
// ----------------------------------------------------------------------------
// 
// BEFORE MAKING CHANGES TO THIS CODE, please read the appropriate 
// documentation, available in the Documentation folder. 
//
// ANY SIGNIFICANT CHANGES made should be reflected back in the documentation
// file(s)
//
// ANY CHANGES made (even small bug fixes, should be reflected in the history
// below, in reverse chronological order
// 
// HISTORY --------------------------------------------------------------------
//
//   [2005-08-05] (antoine) : started this
//
//-----------------------------------------------------------------------------

#include <libOlympusUtility.h>
using namespace Olympus;

#include "GalaxyInterface.h"
#include "EngineInterface.h"

#include "LiveMode.h"


#include "ad.h"			/* CMU A2D routines/data structures */

#include "BufferedAD.h"
#include "VAD.h"
#include "PowerVAD.h"
#include "GMMVAD.h"

// Number of bytes of audio read at each iteration from the BufferedAD
#define AUDIO_BUFFER_SIZE 16384

int iAudioState = ST_IDLE;
TSpeechState ssState = ssSilence;

char *sUttID = NULL;
FILE *fpUttRawFile = NULL;

char **asEngineLastHyp = NULL;
int *aiEngineLastHypTimestamp = NULL;

// Provides an id for each engine_proc_partial request 
// (mostly for debug purposes, to synchronize AudioServer 
// and engine logs)
int iPartialRequestCounter = 0;
long iLastPartialHypTimestamp = 0;
long iPartialHypMinDelay = 10;

extern Olympus::Hubs hubs;
// Performs initializations specific to Live mode
void InitLiveMode() {
	// Allocate the array storing the last partial hyp for each engine
	asEngineLastHyp = (char**)malloc(iNumEngines*sizeof(char*));
	aiEngineLastHypTimestamp = (int*)malloc(iNumEngines*sizeof(int));
	for (int i = 0; i < iNumEngines; i++) {
		asEngineLastHyp[i] = (char*)malloc(MAX_STRING_SIZE*sizeof(char));
		aiEngineLastHypTimestamp[i] = -1;
	}
}

// Sends messages to the hub indicating a change in speech state (speech vs non-speech)
void notifyStateChange(TSpeechState ssNewState) {

//	Log(STD_STREAM, "State=%d", ssNewState);

	bool bInSpeech = (ssState == ssMainSpeech) || (ssState == ssDTMF);
	bool bNewInSpeech = (ssNewState == ssMainSpeech) || (ssNewState == ssDTMF);

	// The state did not change since last time, do nothing
	if (bNewInSpeech == bInSpeech)
		return;

	// String representation of the state
	string sState;
	if (ssNewState==ssMainSpeech)
		sState =  "speech";
	else if (ssNewState==ssDTMF)
		sState = "dtmf";
	else
		sState = "pause";

	Log(STD_STREAM, "Start of %s segment detected [User:%s]", sState.c_str(), sUttID);

	// Fills in the properties frame
	Gal_Frame gfProperties = Gal_ReadFrameFromString(FormatString(
		"{c properties \
				:state	\"%s\"\
			}", sState.c_str()).c_str());

	// Computes the timestamp of the event
	int iTimestamp = (iABStartPointer + iABReadPointer)/iSampleRateMS + 
		iTimestampOffset;
	// Start of speech detection typically introduces a delay
	if (ssNewState == ssMainSpeech)
		iTimestamp -= pvVAD->GetSilenceToSpeechDelay();

	// Creates the result frame
	Gal_Frame gfStateTransition = Gal_MakeFrame("main", GAL_CLAUSE);
	Gal_SetProp(gfStateTransition, ":event_type", Gal_StringObject("vad"));
	Gal_SetProp(gfStateTransition, ":event_timestamp", 
		Gal_IntObject(iTimestamp));
	Gal_SetProp(gfStateTransition, ":properties", Gal_FrameObject(gfProperties));

	// Sends state change notification to the hub
	int iLength;
	char *sFrame = Gal_PPFrameToString(gfStateTransition, NULL, &iLength);
	Log( STD_STREAM, "Sending VAD state change notification to the hub:\n%s", sFrame);

	// sends the frame to the hub
    GalIO_CommWriteFrame(pCommStruct, gfStateTransition, GAL_FALSE);
	Gal_FreeFrame(gfStateTransition);

	ssState = ssNewState;
}

// A: Main loop
void ProcessAudio() {
	char sToEngine[MAX_MESSAGE_SIZE];
	int16 pBuffer[AUDIO_BUFFER_SIZE];

	if (iAudioState == ST_IDLE) {
		
		FlushAllEngineSockets();
		
		return;
	} else if (iAudioState == ST_INSIDE_UTT) {
		// Gets the next block of audio data
		int iNumRead = BufferedADRead(pBuffer, AUDIO_BUFFER_SIZE);
		if (iNumRead > 0) {
			
			// Log the audio to the utterance raw file
			if (fpUttRawFile) {
				fwrite(pBuffer, iNumRead, sizeof(short), fpUttRawFile);
			}

			// Check whether we're in a speech or silence/noise segment
			TSpeechState ssNewState = pvVAD->GetCurrentSpeechState(pBuffer, iNumRead);
			notifyStateChange(ssNewState);

			// Announces the upcoming data to all engines
			sprintf_s( sToEngine, MAX_MESSAGE_SIZE, "engine_proc_raw %d \n", iNumRead * (int)sizeof(short));
			SendMessageToAllEngines( sToEngine);

			// Sends the actual data
			SendDataToAllEngines((char*)pBuffer, iNumRead * (int)sizeof(short));

		}

		long iTimestamp = GetSessionTimestamp();
		if (iTimestamp - iLastPartialHypTimestamp > iPartialHypMinDelay) {		
			iPartialRequestCounter++;

			Gal_Frame gfPartial = CollectPartialHypotheses();

			// if we got some results, sends the frame to the hub
      if (gfPartial) {
        //if we're connected to multiple hubs, share this parse with all of them
        hubs.send(gfPartial);

        Gal_FreeFrame(gfPartial);
      }

			iLastPartialHypTimestamp = iTimestamp;
		}
	} else if (iAudioState == ST_OUTSIDE_UTT) {
		
		FlushAllEngineSockets();

		// Gets the next block of audio data
		int iNumRead = BufferedADRead(pBuffer, AUDIO_BUFFER_SIZE);
		if (iNumRead > 0) {
			
			// Check whether we're in a speech or silence/noise segment
			TSpeechState ssNewState = pvVAD->GetCurrentSpeechState(pBuffer, iNumRead); 
			notifyStateChange(ssNewState);
		}
	}
}

// A: Signals the beginning of an utterance
void StartUtterance(int iTimestamp, char *sID) {
	SetBufferedADPointer(iTimestamp);
	if (sID) 
		sUttID = _strdup(sID);
	else
		sUttID = _strdup("DUMMY_ID");

	// Opens the utterance raw audio log file
	char sRawUttFileName[MAX_FILENAME_SIZE];
	sprintf_s(sRawUttFileName, MAX_FILENAME_SIZE, "%s%s.raw", hub_log_dir, sUttID);;
	if (fopen_s(&fpUttRawFile, sRawUttFileName, "wb") != 0) {
		Log(ERR_STREAM, "Failed to open utterance log file %s", sRawUttFileName);
	} else {
		Log(STD_STREAM, "Opened utterance log file %s", sRawUttFileName);
	}

	char sToEngine[MAX_MESSAGE_SIZE];
	sprintf_s(sToEngine, MAX_MESSAGE_SIZE, "engine_begin_utt %s %d \n", sUttID, iTimestamp);
	SendMessageToAllEngines( sToEngine);

	for (int i = 0; i < iNumEngines; i++) {
		strncpy_s(asEngineLastHyp[i], MAX_STRING_SIZE, "", 1);
	}

	iLastPartialHypTimestamp = 0;

	// (antoine) the following is just for debugging purposes
//	SendMessageToAllEngines("engine_end_utt \nengine_proc_result \n");

	iAudioState = ST_INSIDE_UTT;
}

// Requests and gathers the current partial hypothesis for each engine
Gal_Frame CollectPartialHypotheses() {

	int iTimestamp = 0;

	// Gets the current partial hypothesis from each engine
	Gal_Frame *gfResults = (Gal_Frame*)GetPartialResultFromAllEngines();

	// Collect results from all engines
	bool bGotResult = false;
	Gal_Object *pgoUtterances = (Gal_Object *)malloc(sizeof(Gal_Object)*iNumEngines);
	for (int i = 0; i < iNumEngines; i++) {
		if (gfResults[i]) {
			// Only send if there is a partial hypothesis and it is different from 
			// the last one we sent
			// extracts the list of hypothesis returned by this engine
			int iNumHyps = 0;
			Gal_Object *pgoHyps = Gal_GetList(gfResults[i], ":partial_results", &iNumHyps);

			// for now we only use the first hypothesis
			// THIS SHOULD BE CHANGED TO ALLOW N-BEST LISTS
			pgoUtterances[i] = pgoHyps[0];
			for (int j = 1; j < iNumHyps; j++) {
				Gal_FreeObject(pgoHyps[j]);
			}

			// this tests whether at least one hypothesis is non-null
			Gal_Frame gfHyp = Gal_FrameValue(pgoUtterances[i]);
			// check whether this is an emptyhyp or not
			char *lpszConfHyp = Gal_GetString(gfHyp, ":hyp");
			if (lpszConfHyp && (strlen(lpszConfHyp)>0)) {
				if (Gal_GetObject(gfHyp, ":end_timestamp")) {
					iTimestamp = Gal_GetInt(gfHyp, ":end_timestamp");
				}				

				if (iTimestamp > aiEngineLastHypTimestamp[i]) {
					Log( STD_STREAM, "%s: %s", asEngineName[i], lpszConfHyp);

					bGotResult = true;
				} else {

					// we didn't get any new result from this engine
					Gal_FreeObject(pgoUtterances[i]);

					Gal_Frame gfEmptyHypFrame = Gal_MakeFrame("hypothesis", GAL_CLAUSE);
					Gal_SetProp(gfEmptyHypFrame, ":engine_name", Gal_StringObject(asEngineName[i]));
					Gal_SetProp(gfEmptyHypFrame, ":uttid", Gal_StringObject(sUttID));
					Gal_SetProp(gfEmptyHypFrame, ":emptyhyp", 
						Gal_StringObject("This space intentionally left blank"));
					pgoUtterances[i] = Gal_FrameObject(gfEmptyHypFrame);
				}
			} else {

				// we didn't get any new result from this engine
				Gal_FreeObject(pgoUtterances[i]);

				Gal_Frame gfEmptyHypFrame = Gal_MakeFrame("hypothesis", GAL_CLAUSE);
				Gal_SetProp(gfEmptyHypFrame, ":engine_name", Gal_StringObject(asEngineName[i]));
				Gal_SetProp(gfEmptyHypFrame, ":uttid", Gal_StringObject(sUttID));
				Gal_SetProp(gfEmptyHypFrame, ":emptyhyp", 
					Gal_StringObject("This space intentionally left blank"));
				pgoUtterances[i] = Gal_FrameObject(gfEmptyHypFrame);
			}
		} else {
			// we didn't get any result from this engine
			Gal_Frame gfEmptyHypFrame = Gal_MakeFrame("hypothesis", GAL_CLAUSE);
			Gal_SetProp(gfEmptyHypFrame, ":engine_name", Gal_StringObject(asEngineName[i]));
			Gal_SetProp(gfEmptyHypFrame, ":uttid", Gal_StringObject(sUttID));
			Gal_SetProp(gfEmptyHypFrame, ":emptyhyp", 
				Gal_StringObject("This space intentionally left blank"));
			pgoUtterances[i] = Gal_FrameObject(gfEmptyHypFrame);
		}
	}

	if (bGotResult) {
		Gal_Frame gfRepl = Gal_MakeFrame("main", GAL_CLAUSE);
		Gal_SetProp(gfRepl, ":timestamp", 
			Gal_IntObject(iTimestamp));
		Gal_SetProp(gfRepl, ":partialhyps", 
			Gal_ListObject(pgoUtterances, iNumEngines));

		int iLength;
		char *sFrame = Gal_PPFrameToString(gfRepl, NULL, &iLength);
		Log( STD_STREAM, "Sending partial results [User:%s]\n%s", sUttID, sFrame);
		free(sFrame);
		return gfRepl;
	} else {
		Log( STD_STREAM, "No partial hypothesis yet [User:%s]", sUttID);
		return NULL;
	}
}

// A: Signals the end of an utterance and gets 
//    the final result from the engines
Gal_Frame FinalizeUtterance() {

	int iTimestamp = 0;

	iAudioState = ST_OUTSIDE_UTT;

	// Close the utterance file
	if (fpUttRawFile) {
		fclose(fpUttRawFile);
	}

	// Asks and wait for the final hypothesis from each engine
	Gal_Frame *gfResults = (Gal_Frame*)GetFinalResultFromAllEngines();

	// Extracts the hypothesis from each engine
	bool bGotResult = false;
	
	Gal_Object *pgoUtterances = (Gal_Object *)malloc(sizeof(Gal_Object)*asTotalNBestSize); // (bthomson) Changed for NBest support. Previously * iNumEngines

	int curOutputN = 0;
	for (int i = 0; i < iNumEngines; i++) {
		if (gfResults[i]) {
			// extracts the list of hypothesis returned by this engine
			int iNumHyps = 0;
			Gal_Object *pgoHyps = Gal_GetList(gfResults[i], ":results", &iNumHyps);

			// (bthomson) CHANGED TO ALLOW N-BEST LISTS
			for (int j=0; j < asEngineNBestSize[i]; j++)
			{
				pgoUtterances[curOutputN] = pgoHyps[j];
				// this tests whether at least one engine returned a non-null
				// hypothesis
				Gal_Frame gfHyp = Gal_FrameValue(pgoUtterances[curOutputN]);
				// check whether this is an emptyhyp or not
				char *lpszConfHyp = Gal_GetString(gfHyp, ":hyp");
				if (lpszConfHyp && (strlen(lpszConfHyp)>0)) {
					if (Gal_GetObject(gfHyp, ":end_timestamp")) {
						iTimestamp = Gal_GetInt(gfHyp, ":end_timestamp");
					}
					// this is a real hyp (non-empty)
					bGotResult = true;
				}
				Log( STD_STREAM, "%s: %s", asEngineName[i], lpszConfHyp);
				curOutputN ++;
			}
			// Free any hypotheses not used
			for (int j = asEngineNBestSize[i]; j < iNumHyps; j++) {
				Gal_FreeObject(pgoHyps[j]);
			}			
		} 
		else {
			for (int j=0; j < asEngineNBestSize[i]; j++)
			{
				// we didn't get any result from this engine
				Gal_Frame gfEmptyHypFrame = Gal_MakeFrame("hypothesis", GAL_CLAUSE);
				Gal_SetProp(gfEmptyHypFrame, ":engine_name", Gal_StringObject(asEngineName[i]));
				Gal_SetProp(gfEmptyHypFrame, ":uttid", Gal_StringObject(sUttID));
				Gal_SetProp(gfEmptyHypFrame, ":emptyhyp", 
					Gal_StringObject("This space intentionally left blank"));
				pgoUtterances[j] = Gal_FrameObject(gfEmptyHypFrame);
				Log( STD_STREAM, "%s: **NO RESULTS**", asEngineName[i]);
			}
		}
	}

	Gal_Frame gfRepl = Gal_MakeFrame("main", GAL_CLAUSE);
	if (bGotResult) {
		Gal_SetProp(gfRepl, ":timestamp", 
			Gal_IntObject(iTimestamp));
		Gal_SetProp(gfRepl, ":confhyps", 
			Gal_ListObject(pgoUtterances, asTotalNBestSize )); // (bthomson) Change for NBest handling
	}
	else {
		// We didn't get any results from the engines, use current timestamp
		iTimestamp = GetSessionTimestamp(GetCurrentAbsoluteTimestamp());
		Gal_SetProp(gfRepl, ":timestamp", 
			Gal_IntObject(iTimestamp));
		Gal_SetProp(gfRepl, ":emptyhyp", 
			Gal_StringObject("This space intentionally left blank"));
	}

	int iLength;
	char *sFrame = Gal_PPFrameToString(gfRepl, NULL, &iLength);
	Log( STD_STREAM, "Sending final results [User:%s]\n%s", sUttID, sFrame);
	free(sFrame);

	return gfRepl;
}

// A: Cancels the current utterance
void CancelUtterance() {

	int iTimestamp = 0;

	iAudioState = ST_OUTSIDE_UTT;

	// Close the utterance file
	if (fpUttRawFile) {
		fclose(fpUttRawFile);
	}

	// Notifies all engines to cancel the utterance
	SendMessageToAllEngines("engine_cancel_utt \n");
	FlushAllEngineSockets();
}