#ifdef __GNUC__
#include <cstdio>
#include <iostream>
#include <sstream>
#include <iomanip>
#include <map>
#include <string>
#include <vector>

#include <cupti_version.h>
#include <cupti_events.h>
#include <cupti_metrics.h>
#include <cuda_runtime_api.h>

/* Specific errors from CUDA lib */
#define CHECK_CU_ERROR(err, cufunc) \
if (err != CUDA_SUCCESS) \
{ \
printf ("[%s:%d] Error %d for CUDA Driver API function '%s'. cuptiQuery failed\n", __FILE__, __LINE__, err, cufunc); \
}

/* Specific errors from CuPTI lib */
#define CHECK_CUPTI_ERROR(err, cuptifunc) \
if (err != CUPTI_SUCCESS) \
{ \
printf ("[%s:%d] Error %d for CUPTI API function '%s'. cuptiQuery failed\n", __FILE__, __LINE__, err, cuptifunc); \
}

#define TAU_CUPTI_MAX_NAME 40
#define TAU_CUPTI_MAX_DESCRIPTION 480

#define TAU_CUPTI_COUNTER_ACTUAL 0
#define TAU_CUPTI_COUNTER_BOUNDED 1
#define TAU_CUPTI_COUNTER_AVERAGED 2

//This setting will aggregate the event values collected across all event
//domains. Thus the event results will report values as if all SM had an event
//domain available to collect this value. WARNING: If the kernel being measured
//is not large enough to utilize all the available SMs on a device this
//aggregation will result in skewed data.
#define TAU_CUPTI_NORMALIZE_EVENTS_ACROSS_ALL_SMS

//#define DISABLE_CUPTI


// These really should come from TauInit.h and TauAPI.h but TAU's include files are so
// jacked up that including those files causes weird errors.  Just work around the problem...
extern "C" int Tau_init_initializeTAU();
extern "C" void Tau_destructor_trigger();


struct CuptiCounterEvent
{
    static void printHeader();

    CuptiCounterEvent(int device_n, int domain_n, int event_n);

    CUdevice device;
	CUpti_EventDomainID domain;
	CUpti_EventID event;

	std::string device_name;
	std::string domain_name;
	std::string event_name;
	std::string event_description;
	std::string tag; // string presented to the user.

	void print();
};

struct CuptiCounterMap: public std::map<std::string, CuptiCounterEvent*>
{
    CuptiCounterMap() {
        Tau_init_initializeTAU();
    }
    ~CuptiCounterMap() {
        Tau_destructor_trigger();
    }
};
typedef CuptiCounterMap counter_map_t;
typedef CuptiCounterMap::iterator counter_map_it;

struct CuptiCounterVector: public std::vector<CuptiCounterEvent*>
{
    CuptiCounterVector() {
        Tau_init_initializeTAU();
    }
    ~CuptiCounterVector() {
        Tau_destructor_trigger();
    }
};
typedef CuptiCounterVector counter_vec_t;

struct CuptiCounterIdMap : public std::map<int, int>
{
    CuptiCounterIdMap() {
        Tau_init_initializeTAU();
    }
    ~CuptiCounterIdMap() {
        Tau_destructor_trigger();
    }
};
typedef CuptiCounterIdMap counter_id_map_t;

#ifdef DISABLE_CUPTI

extern int Tau_CuptiLayer_get_num_events() {}

extern bool Tau_CuptiLayer_is_initialized() { return false;}

extern void Tau_CuptiLayer_init() {}

extern void Tau_CuptiLayer_finalize() {}

extern void Tau_CuptiLayer_enable() {}

extern void Tau_CuptiLayer_disable() {}

extern void Tau_CuptiLayer_register_counter(CuptiCounterEvent* ev) {}

extern int Tau_CuptiLayer_Initialize_callbacks();

extern void Tau_CuptiLayer_Initialize_Map();

counter_map_t Counter_Map;

/* mapping the metric number to the cupti metric number */
counter_id_map_t internal_id_map; 
extern counter_id_map_t internal_id_map() {return internal_id_map;}
counter_id_map_t internal_id_map_backwards; 
extern counter_id_map_t internal_id_map_backwards() {return internal_id_map_backwards;}
#else

extern bool Tau_CuptiLayer_is_initialized();

extern void Tau_CuptiLayer_enable();

extern void Tau_CuptiLayer_disable();

extern void Tau_CuptiLayer_init();

extern void Tau_CuptiLayer_finalize();

extern void Tau_CuptiLayer_register_all_counters();

extern void Tau_CuptiLayer_register_counter(CuptiCounterEvent* ev);

extern void Tau_CuptiLayer_Initialize_callbacks();

extern void Tau_CuptiLayer_Initialize_Map();

extern counter_map_t& Tau_CuptiLayer_Counter_Map();

extern counter_id_map_t interal_id_map();
#endif

#endif //__GNUC__

/*
 * C interface between TauMetrics, TauReadMetrics, and CuptiLayer. A C interface
 * is needed because while TauMetrics, TauReadMetrics along with the rest of TAU
 * maybe compiled with any compiler, CuptiLayer must be compiled by g++.
*/

#include <stdint.h>



extern "C" int Tau_CuptiLayer_get_num_events();

extern "C" void Tau_CuptiLayer_set_event_name(int metric_n, int type);

extern "C" char const * Tau_CuptiLayer_get_event_name(int metric_n);

extern "C" int Tau_CuptiLayer_get_cupti_event_id(int metric_n);

extern "C" int Tau_CuptiLayer_get_metric_event_id(int metric_n);

extern "C" void Tau_CuptiLayer_read_counters(int d, uint64_t *cb);

extern "C" uint64_t Tau_CuptiLayer_read_counter(int metric_n);

extern "C" bool Tau_CuptiLayer_is_cupti_counter(char const * str);

extern "C" void Tau_CuptiLayer_register_string(char const * str, int metric_n);

extern "C" void Tau_cuda_Event_Synchonize();
