//-------------------------------------------------------------------------------------
//
// Copyright 2009 Intel Corporation
// All Rights Reserved
//
// Permission is granted to use, copy, distribute and prepare derivative works of this
// software for any purpose and without fee, provided, that the above copyright notice
// and this statement appear in all copies.  Intel makes no representations about the
// suitability of this software for any purpose.  THIS SOFTWARE IS PROVIDED "AS IS."
// INTEL SPECIFICALLY DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, AND ALL LIABILITY,
// INCLUDING CONSEQUENTIAL AND OTHER INDIRECT DAMAGES, FOR THE USE OF THIS SOFTWARE,
// INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PROPRIETARY RIGHTS, AND INCLUDING THE
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  Intel does not
// assume any responsibility for any errors which may appear in this software nor any
// responsibility to update it.
//

#include "StdAfx.h"
#include "Mutex.h"
#include "TaskScheduler.h"
#include "ParallelFor.h"

class CInternalForTask : public CInternalTask
{
public:
	CInternalForTask(CTaskCompletion* pCompletion, const IForTask* pForTask, const CRange &Range):
		CInternalTask(pCompletion),
		m_pForTask(pForTask),
		m_Range(Range)
	{}

	virtual bool Run(CWorkerThread* pThread) 
	{
		bool bOk;
		
		bOk = m_pForTask->DoRange(pThread, m_Range);
		
		delete this;
		return bOk;
	}
	
	virtual bool Split(CWorkerThread* pThread, CInternalTask** ppTask)
	{
		CInternalForTask*	pNew;
		int					CutIndex;
		
		*ppTask = NULL;

		/* check we can split */ 
		if (m_Range.end-m_Range.begin < 2*m_Range.granularity)
			return false;
		
		/* decide on cut point */ 
		CutIndex = (m_Range.begin + m_Range.end)/2;
		
		/* create new task and reduce our range*/ 
		pNew = new CInternalForTask(m_pCompletion, m_pForTask, CRange(CutIndex, m_Range.end, m_Range.granularity));
		m_Range.end = CutIndex;

		/* done */ 
		*ppTask = pNew;
		return true;
	}
	
	virtual bool PartialPop(CWorkerThread* pThread, CInternalTask** ppTask) 
	{
		CInternalForTask*	pNew;
		int					CutIndex;
		
		*ppTask = NULL;

		/* check we can split */ 
		if (m_Range.end-m_Range.begin < 2*m_Range.granularity)
			return false;
		
		/* decide on cut point */ 
		CutIndex = m_Range.begin + m_Range.granularity;
		
		/* create new task and reduce our range*/ 
		pNew = new CInternalForTask(m_pCompletion, m_pForTask, CRange(m_Range.begin, CutIndex, m_Range.granularity));
		m_Range.begin = CutIndex;

		/* done */ 
		*ppTask = pNew;
		return true;
	}
	
	virtual bool Spread(CTaskPool* pPool) 
	{
		CWorkerThread*		pThread;
		CInternalForTask*	pLastTask;
		int					begin, end, granularity;
		int					PartSize;
		unsigned			iThread, Count;
		
		/* count parts */ 
		begin		= m_Range.begin;
		end			= m_Range.end;
		granularity	= m_Range.granularity;
		
		Count = (m_Range.end-m_Range.begin)/m_Range.granularity;
		if (Count > pPool->m_ThreadCount)
			Count = pPool->m_ThreadCount;
			
		PartSize = (m_Range.end-m_Range.begin)/Count;

		/*  */ 
		m_Range = CRange(begin, begin+PartSize, m_Range.granularity);
		
		pLastTask = this;
		pThread   = &pPool->m_Thread[0];
		
		{
			CSpinMutexLock L(&pThread->m_TaskMutex);	
			
			m_pCompletion->MarkBusy(true);
			pThread->m_Task[0]		= this;
			pThread->m_TaskCount	= 1;
		}
		
		begin += PartSize;
		
		for(iThread=1; iThread<Count; iThread++)
		{
			pLastTask = new CInternalForTask(m_pCompletion, m_pForTask, CRange(begin, begin+PartSize, m_Range.granularity));
			pThread   = &pPool->m_Thread[iThread];
			
			{
				CSpinMutexLock L(&pThread->m_TaskMutex);
			
				m_pCompletion->MarkBusy(true);
				pThread->m_Task[0]		= pLastTask;
				pThread->m_TaskCount	= 1;
			}

			begin += PartSize;
		}
		
		pLastTask->m_Range.end = end;
		
		return true;
	}


public:
	const IForTask*			m_pForTask;
	CRange					m_Range;
};

bool ParallelFor(CWorkerThread* pThread, const IForTask *pTask, const CRange &Range)
{
	CTaskCompletion		Completion;
	CInternalForTask*	pInternalTask;
	
	pInternalTask	= new CInternalForTask(&Completion, pTask, Range);
	
	pThread->PushTask(pInternalTask);
	pThread->WorkUntilDone(&Completion);

	return true;
}

bool ParallelFor(const IForTask *pTask, const CRange &Range)
{
	return ParallelFor(CWorkerThread::GetCurrent(), pTask, Range);
}

