Table of Contents
The database schema in TAUdb is designed to flexibly and efficiently store multidimensional parallel performance data. There are 5 dimensions to the actual timer measurements, and 4 dimensions to the counter measurements
Timer dimensions
- 
                  
Process and thread of execution
 - 
                  
Timer source code location (i.e. foo())
 - 
                  
Metric of interest (i.e. FP_OPS, TIME)
 - 
                  
Phase of execution (i.e. iteration number, timestamp)
 - 
                  
Dynamic timer context (i.e. parameter values)
 
Counter dimensions
- 
                  
Process and thread of execution
 - 
                  
Timer source code location (i.e. foo())
 - 
                  
Phase of execution (i.e. iteration number, timestamp)
 - 
                  
Dynamic timer context (i.e. parameter values)
 
Below is the SQL schema definition for TAUdb.
/****************************/
/* CREATE THE STATIC TABLES */
/****************************/
CREATE TABLE schema_version (
 version     INT NOT NULL,
 description VARCHAR NOT NULL
);
/* IF THE SCHEMA IS MODIFIED, INCREMENT THIS VALUE */
/* 0 = PERFDMF (ORIGINAL) */
/* 1 = TAUDB (APRIL, 2012) */
/*VALUES (1, 'TAUdb redesign from Spring, 2012');*/
INSERT INTO schema_version (version, description) 
  VALUES (2, 'Changes after Nov. 9, 2012 release');
/* These are our supported parsers. */
CREATE TABLE data_source (
 id          INT UNIQUE NOT NULL,
 name        VARCHAR NOT NULL,
 description VARCHAR
);
INSERT INTO data_source (name,id,description) 
  VALUES ('ppk',0,'TAU Packed profiles (TAU)');
INSERT INTO data_source (name,id,description) 
  VALUES ('TAU profiles',1,'TAU profiles (TAU)');
INSERT INTO data_source (name,id,description) 
  VALUES ('DynaProf',2,'PAPI DynaProf profiles (UTK)');
INSERT INTO data_source (name,id,description) 
  VALUES ('mpiP',3,'mpiP: Lightweight, Scalable MPI Profiling (Vetter, Chambreau)');
INSERT INTO data_source (name,id,description) 
  VALUES ('HPM',4,'HPM Toolkit profiles (IBM)');
INSERT INTO data_source (name,id,description) 
  VALUES ('gprof',5,'gprof profiles (GNU)');
INSERT INTO data_source (name,id,description) 
  VALUES ('psrun',6,'PerfSuite psrun profiles (NCSA)');
INSERT INTO data_source (name,id,description) 
  VALUES ('pprof',7,'TAU pprof.dat output (TAU)');
INSERT INTO data_source (name,id,description) 
  VALUES ('Cube',8,'Cube data (FZJ)');
INSERT INTO data_source (name,id,description) 
  VALUES ('HPCToolkit',9,'HPC Toolkit profiles (Rice Univ.)');
INSERT INTO data_source (name,id,description) 
  VALUES ('SNAP',10,'TAU Snapshot profiles (TAU)');
INSERT INTO data_source (name,id,description) 
  VALUES ('OMPP',11,'OpenMP Profiler profiles (Fuerlinger)');
INSERT INTO data_source (name,id,description) 
  VALUES ('PERIXML',12,'Data Exchange Format (PERI)');
INSERT INTO data_source (name,id,description) 
  VALUES ('GPTL',13,'General Purpose Timing Library (ORNL)');
INSERT INTO data_source (name,id,description) 
  VALUES ('Paraver',14,'Paraver profiles (BSC)');
INSERT INTO data_source (name,id,description) 
  VALUES ('IPM',15,'Integrated Performance Monitoring (NERSC)');
INSERT INTO data_source (name,id,description) 
  VALUES ('Google',16,'Google profiles (Google)');
INSERT INTO data_source (name,id,description) 
  VALUES ('Cube3',17,'Cube 3D profiles (FZJ)');
INSERT INTO data_source (name,id,description) 
  VALUES ('Gyro',100,'Self-timing profiles from Gyro application');
INSERT INTO data_source (name,id,description) 
  VALUES ('GAMESS',101,'Self-timing profiles from GAMESS application');
INSERT INTO data_source (name,id,description) 
  VALUES ('Other',999,'Other profiles');
/* threads make it convenient to identify timer values.
   Special values for thread_index:
   -1 mean (nulls ignored)
   -2 total
   -3 stddev (nulls ignored)
   -4 min
   -5 max
   -6 mean (nulls are 0 value)
   -7 stddev (nulls are 0 value)
*/
CREATE TABLE derived_thread_type (
 id INT NOT NULL,
 name VARCHAR NOT NULL,
 description VARCHAR NOT NULL
);
INSERT INTO derived_thread_type (id, name, description) 
  VALUES (-1, 'MEAN', 'MEAN (nulls ignored)');
INSERT INTO derived_thread_type (id, name, description) 
  VALUES (-2, 'TOTAL', 'TOTAL');
INSERT INTO derived_thread_type (id, name, description) 
  VALUES (-3, 'STDDEV', 'STDDEV (nulls ignored)');
INSERT INTO derived_thread_type (id, name, description) 
  VALUES (-4, 'MIN', 'MIN');
INSERT INTO derived_thread_type (id, name, description) 
  VALUES (-5, 'MAX', 'MAX');
INSERT INTO derived_thread_type (id, name, description) 
  VALUES (-6, 'MEAN', 'MEAN (nulls are 0 value)');
INSERT INTO derived_thread_type (id, name, description) 
  VALUES (-7, 'STDDEV', 'STDDEV (nulls are 0 value)');
/**************************/
/* CREATE THE TRIAL TABLE */
/**************************/
/* trials are the top level table */
CREATE TABLE trial (
 id                  SERIAL NOT NULL PRIMARY KEY,
 name                VARCHAR,
 /* where did this data come from? */
 data_source         INT,
 /* number of processes */
 node_count          INT,
 /* legacy values - these are actually "max" values - i.e. not all nodes have
  * this many threads */
 contexts_per_node   INT,
 /* how many threads per node? */
 threads_per_context INT,
 /* total number of threads */
 total_threads       INT,
 /* reference to the data source table. */
 FOREIGN KEY(data_source) REFERENCES data_source(id) 
   ON DELETE NO ACTION ON UPDATE NO ACTION
);
/******************************/
/* CREATE THE DATA DIMENSIONS */
/******************************/
/* threads are the "location" dimension */
CREATE TABLE thread (
 id           SERIAL NOT NULL PRIMARY KEY,
 /* trial this thread belongs to */
 trial        INT NOT NULL,
 /* process rank, really */
 node_rank    INT NOT NULL,
 /* legacy value */
 context_rank INT NOT NULL,
 /* thread rank relative to the process */
 thread_rank  INT NOT NULL,
 /* thread index from 0 to N-1 */
 thread_index INT NOT NULL,
 FOREIGN KEY(trial) REFERENCES trial(id) ON DELETE 
   NO ACTION ON UPDATE NO ACTION
);
/* metrics are things like num_calls, num_subroutines, TIME, PAPI
   counters, and derived metrics. */
CREATE TABLE metric (
 id      SERIAL NOT NULL PRIMARY KEY,
 /* trial this value belongs to */
 trial   INT NOT NULL,
 /* name of the metric */
 name    VARCHAR NOT NULL,
 /* if this metric is derived by one of the tools */
 derived BOOLEAN NOT NULL DEFAULT FALSE,
 FOREIGN KEY(trial) REFERENCES trial(id)
   ON DELETE NO ACTION ON UPDATE NO ACTION
);
/* timers are timers, capturing some interval value.  For callpath or
   phase profiles, the parent refers to the calling function or phase. */
CREATE TABLE timer (
 id                SERIAL NOT NULL PRIMARY KEY,
 /* trial this value belongs to */
 trial             INT NOT NULL,
 /* name of the timer */
 name              VARCHAR NOT NULL,
 /* short name of the timer - without source or parameter info */
 short_name        VARCHAR NOT NULL,
 /* filename */
 source_file       VARCHAR,
 /* line number of the start of the block of code */
 line_number       INT,
 /* line number of the end of the block of code */
 line_number_end   INT,
 /* column number of the start of the block of code */
 column_number     INT,
 /* column number of the end of the block of code */
 column_number_end INT,
 FOREIGN KEY(trial) REFERENCES trial(id)
   ON DELETE NO ACTION ON UPDATE NO ACTION
);
/* timer index on the trial and name columns */
CREATE INDEX timer_trial_index on timer (trial, name);
/***********************************/
/* CREATE THE TIMER RELATED TABLES */
/***********************************/
/* timer groups are the groups such as TAU_DEFAULT,
   MPI, OPENMP, TAU_PHASE, TAU_CALLPATH, TAU_PARAM, etc. 
   This mapping table allows for NxN mappings between timers
   and groups */
CREATE TABLE timer_group (
 timer INT,
 group_name  VARCHAR NOT NULL,
 FOREIGN KEY(timer) REFERENCES timer(id)
   ON DELETE NO ACTION ON UPDATE NO ACTION
);
/* index for faster queries into groups */
CREATE INDEX timer_group_index on timer_group (timer, group_name);
/* timer parameters are parameter based profile values. 
 * an example is foo (x,y) where x=4 and y=10. In that example,
 * timer would be the index of the timer with the
 * name 'foo (x,y) <x>=<4> <y>=<10>'. This table would have two
 * entries, one for the x value and one for the y value. */
CREATE TABLE timer_parameter (
 timer     INT,
 parameter_name  VARCHAR NOT NULL,
 parameter_value VARCHAR NOT NULL,
 FOREIGN KEY(timer) REFERENCES timer(id)
   ON DELETE NO ACTION ON UPDATE NO ACTION
);
/* timer callpath have the information about the call graph in a trial.
 * If the profile is "flat", these will all have no parents. Otherwise,
 * the parent points to a node in the callgraph, the calling timer 
 * (function). */
CREATE TABLE timer_callpath (
 id        SERIAL NOT NULL PRIMARY KEY,
 /* what timer is this? */
 timer     INT NOT NULL,
 /* what is the parent timer? */
 parent    INT,
 FOREIGN KEY(timer) REFERENCES timer(id)
   ON DELETE NO ACTION ON UPDATE NO ACTION,
 FOREIGN KEY(parent) REFERENCES timer_callpath(id)
   ON DELETE NO ACTION ON UPDATE NO ACTION
);
/* By definition, profiles have no time data. However, there are a few
 * examples where time ranges make sense, such as tracking call stacks
 * or associating metadata to a particular phase. The time_range table
 * is used to give other measurements a time context. The iteration
 * start and end can be used to indicate which loop iterations or 
 * calls to a function are relevant for this time range. */
CREATE TABLE time_range (
 id SERIAL NOT NULL PRIMARY KEY,
 /* starting iteration */
 iteration_start INT NOT NULL,
 /* ending iteration. */
 iteration_end INT,
 /* starting timestamp */
 time_start BIGINT NOT NULL,
 /* ending timestamp. */
 time_end BIGINT
);
/* timer_call_data records have the dynamic information for when a node
 * in the callgraph is visited by a thread. If you are tracking dynamic
 * callstacks, you would use the time_range field. If you are storing
 * snapshot data, you would use the time_range field. */
CREATE TABLE timer_call_data (
 id          SERIAL NOT NULL PRIMARY KEY,
 /* what callgraph node is this? */
 timer_callpath       INT NOT NULL,
 /* what thread is this? */
 thread      INT NOT NULL,
 /* how many times this timer was called */
 calls       INT,
 /* how many subroutines this timer called */
 subroutines INT,
 /* what is the time_range? this is for supporting snapshots */
 time_range  INT,
 FOREIGN KEY(timer_callpath) REFERENCES timer_callpath(id)
   ON DELETE NO ACTION ON UPDATE NO ACTION,
 FOREIGN KEY(thread) REFERENCES thread(id)
   ON DELETE NO ACTION ON UPDATE NO ACTION,
 FOREIGN KEY(time_range) REFERENCES time_range(id)
   ON DELETE NO ACTION ON UPDATE NO ACTION
);
/* timer values have the timer of one timer
   on one thread for one metric, at one location on the callgraph. */
CREATE TABLE timer_value (
 /* what node in the callgraph and thread is this? */
 timer_call_data       INT NOT NULL,
 /* what metric is this? */
 metric                INT NOT NULL,
 /* The inclusive value for this timer */
 inclusive_value       DOUBLE PRECISION,
 /* The exclusive value for this timer */
 exclusive_value       DOUBLE PRECISION,
 /* The inclusive percent for this timer */
 inclusive_percent     DOUBLE PRECISION,
 /* The exclusive percent for this timer */
 exclusive_percent     DOUBLE PRECISION,
 /* The variance for this timer */
 sum_exclusive_squared DOUBLE PRECISION,
 FOREIGN KEY(timer_call_data) REFERENCES timer_call_data(id)
   ON DELETE NO ACTION ON UPDATE NO ACTION,
 FOREIGN KEY(metric) REFERENCES metric(id)
   ON DELETE NO ACTION ON UPDATE NO ACTION
);
/* one metric, one thread, one timer */
CREATE INDEX timer_value_index on timer_value (timer_call_data, metric);
/*************************************/
/* CREATE THE COUNTER RELATED TABLES */
/*************************************/
/* counters measure some counted value. */
CREATE TABLE counter (
 id          SERIAL      NOT NULL PRIMARY KEY,
 trial       INT         NOT NULL,
 name        VARCHAR        NOT NULL,
 FOREIGN KEY(trial) REFERENCES trial(id)
   ON DELETE NO ACTION ON UPDATE NO ACTION
);
/* counter index on the trial and name columns */
CREATE INDEX counter_trial_index on counter (trial, name);
CREATE TABLE counter_value (
 /* what counter is this? */
 counter            INT NOT NULL,
 /* where in the callgraph? */
 timer_callpath     INT,
 /* what thread is this? */
 thread             INT NOT NULL,
 /* The total number of samples */
 sample_count       INT,         
 /* The maximum value seen */
 maximum_value      DOUBLE PRECISION,
 /* The minimum value seen */
 minimum_value      DOUBLE PRECISION,
 /* The mean value seen */
 mean_value         DOUBLE PRECISION,
 /* The variance for this counter */
 standard_deviation DOUBLE PRECISION,
 FOREIGN KEY(counter) REFERENCES counter(id)
   ON DELETE NO ACTION ON UPDATE NO ACTION,
 FOREIGN KEY(timer_callpath) REFERENCES timer_callpath(id)
   ON DELETE NO ACTION ON UPDATE NO ACTION,
 FOREIGN KEY(thread) REFERENCES thread(id)
   ON DELETE NO ACTION ON UPDATE NO ACTION
);
/* one thread, one counter */
CREATE INDEX counter_value_index on counter_value (counter, thread);
/**************************************/
/* CREATE THE METADATA RELATED TABLES */
/**************************************/
/* primary metadata is metadata that is not nested, does not
   contain unique data for each thread. */
CREATE TABLE primary_metadata (
 trial    INT NOT NULL,
 name     VARCHAR NOT NULL,
 value    VARCHAR,
 FOREIGN KEY(trial) REFERENCES trial(id)
   ON DELETE NO ACTION ON UPDATE NO ACTION
);
/* create an index for faster queries against the primary_metadata table */
CREATE INDEX primary_metadata_index on primary_metadata (trial, name);
/* secondary metadata is metadata that could be nested, could
   contain unique data for each thread, and could be an array. */
CREATE TABLE secondary_metadata (
 id       VARCHAR NOT NULL PRIMARY KEY,
 /* trial this value belongs to */
 trial    INT NOT NULL,
 /* this metadata value could be associated with a thread */
 thread   INT,
 /* this metadata value could be associated with a timer that happened */
 timer_callpath    INT,
 /* which call to the context timer was this? */
 time_range    INT,
 /* this metadata value could be a nested structure */
 parent   VARCHAR,
 /* the name of the metadata field */
 name     VARCHAR NOT NULL,
 /* the value of the metadata field */
 value    VARCHAR,
 /* this metadata value could be an array - so tokenize it */
 is_array BOOLEAN DEFAULT FALSE,
 FOREIGN KEY(trial) REFERENCES trial(id)
   ON DELETE NO ACTION ON UPDATE NO ACTION,
 FOREIGN KEY(thread) REFERENCES thread(id)
   ON DELETE NO ACTION ON UPDATE NO ACTION,
 FOREIGN KEY(timer_callpath) REFERENCES timer_callpath(id)
   ON DELETE NO ACTION ON UPDATE NO ACTION,
 FOREIGN KEY(parent) REFERENCES secondary_metadata(id)
   ON DELETE NO ACTION ON UPDATE NO ACTION,
 FOREIGN KEY(time_range) REFERENCES time_range(id)
   ON DELETE NO ACTION ON UPDATE NO ACTION
);
/* create an index for faster queries against the secondary_metadata table */
CREATE INDEX secondary_metadata_index on secondary_metadata 
   (trial, name, thread, parent);
/**************************************/
/* CREATE THE METADATA RELATED TABLES */
/**************************************/
/* this is the view table, which organizes and filters trials */
create table taudb_view (
    id                    SERIAL            NOT NULL    PRIMARY KEY,
    /* views can be nested */
    parent                INTEGER            NULL,
    /* name of the view */
    name                VARCHAR    NOT NULL,
    /* view conjoin type for parameters */
    conjoin                VARCHAR    NOT NULL,
    FOREIGN KEY (parent) REFERENCES taudb_view(id)
      ON DELETE CASCADE ON UPDATE CASCADE
);
create table taudb_view_parameter (
    /* the view ID */
    taudb_view            INTEGER    NOT NULL,
    /* the table name for the where clause */
    table_name            VARCHAR    NOT NULL,
    /* the column name for the where clause.
       If the table_name is one of the metadata tables, this is the 
       value of the "name" column */
    column_name            VARCHAR    NOT NULL,
    /* the operator for the where clause */
    operator            VARCHAR    NOT NULL,
    /* the value for the where clause */
    value                VARCHAR    NOT NULL,
    FOREIGN KEY (taudb_view) REFERENCES taudb_view(id)
      ON DELETE CASCADE ON UPDATE CASCADE
);
/* simple view of all trials */
INSERT INTO taudb_view (parent, name, conjoin) 
    VALUES (NULL, 'All Trials', 'and');
/* must have a parameter or else the sub views for this view 
   do not work correctly*/
INSERT INTO taudb_view_parameter 
    (taudb_view, table_name, column_name, operator, value) 
	VALUES (1, 'trial', 'total_threads', '>', '-1');
/* the application and experiment columns are not used in the 
   latest schema, but keeping them makes the code in 
   PerfExplorer simpler. */
create table analysis_settings (
    id                  SERIAL          NOT NULL    PRIMARY KEY,
    taudb_view          INTEGER         NULL,
    application         INTEGER         NULL,
    experiment          INTEGER         NULL,
    trial               INTEGER         NULL,
    metric              INTEGER         NULL,
    method              VARCHAR(255)    NOT NULL,
    dimension_reduction VARCHAR(255)    NOT NULL,
    normalization       VARCHAR(255)    NOT NULL,
    FOREIGN KEY (taudb_view) REFERENCES taudb_view(id)
        ON DELETE CASCADE ON UPDATE CASCADE,
    FOREIGN KEY (trial) REFERENCES trial(id)
        ON DELETE CASCADE ON UPDATE CASCADE,
    FOREIGN KEY (metric) REFERENCES metric(id)
        ON DELETE CASCADE ON UPDATE CASCADE
);
create table analysis_result (
    id                  SERIAL          NOT NULL    PRIMARY KEY,
    analysis_settings   INTEGER         NOT NULL,
    description         VARCHAR(255)    NOT NULL,
    thumbnail_size      INTEGER         NULL,
    image_size          INTEGER         NULL,
    thumbnail           BYTEA           NULL,
    image               BYTEA           NULL,
    result_type         INTEGER         NOT NULL
);
 /* Performance indexes! */
create index trial_name_index on trial(name);
create index timer_name_index on timer(name);
CREATE INDEX timer_callpath_parent on timer_callpath(parent);
CREATE INDEX thread_trial on thread(trial);
CREATE INDEX timer_call_data_timer_callpath on 
    timer_call_data(timer_callpath);
CREATE INDEX counter_name_index on counter(name);
CREATE INDEX timer_call_data_thread on timer_call_data(thread);
/* SHORT TERM FIX! These views make sure that charts 
   (mostly) work... for now. */
DROP VIEW IF EXISTS interval_location_profile;
DROP VIEW IF EXISTS interval_mean_summary;
DROP VIEW IF EXISTS interval_total_summary;
DROP VIEW IF EXISTS interval_event_value;
DROP VIEW IF EXISTS interval_event;
DROP VIEW IF EXISTS atomic_location_profile;
DROP VIEW IF EXISTS atomic_mean_summary;
DROP VIEW IF EXISTS atomic_total_summary;
DROP VIEW IF EXISTS atomic_event_value;
DROP VIEW IF EXISTS atomic_event;
CREATE OR REPLACE VIEW interval_event 
(id, trial, name, group_name, source_file, line_number, line_number_end) 
AS  
SELECT tcp.id, t.trial, t.name, tg.group_name,  
t.source_file, t.line_number, t.line_number_end  
FROM timer_callpath tcp  
INNER JOIN timer t ON tcp.timer = t.id  
INNER JOIN timer_group tg ON tg.timer = t.id; 
CREATE OR REPLACE VIEW interval_event_value 
(interval_event, node, context, thread, metric, inclusive_percentage,  
inclusive, exclusive_percentage, exclusive, call, subroutines,  
inclusive_per_call, sum_exclusive_squared) 
AS SELECT tcd.timer_callpath, t.node_rank, t.context_rank,  
t.thread_rank, tv.metric, tv.inclusive_percent,  
tv.inclusive_value, tv.exclusive_percent, tv.exclusive_value, tcd.calls, 
tcd.subroutines, tv.inclusive_value / tcd.calls, tv.sum_exclusive_squared 
FROM timer_value tv 
INNER JOIN timer_call_data tcd on tv.timer_call_data = tcd.id 
INNER JOIN thread t on tcd.thread = t.id; 
CREATE OR REPLACE VIEW interval_location_profile 
AS SELECT * from interval_event_value WHERE thread >= 0; 
 
CREATE OR REPLACE VIEW interval_total_summary 
AS SELECT * from interval_event_value WHERE thread = -2; 
 
CREATE OR REPLACE VIEW interval_mean_summary 
AS SELECT * from interval_event_value WHERE thread = -1; 
 
 
CREATE OR REPLACE VIEW atomic_event  
(id, trial, name, group_name, source_file, line_number) 
AS SELECT c.id, c.trial, c.name, NULL, NULL, NULL 
FROM counter c; 
CREATE OR REPLACE VIEW atomic_event_value 
(atomic_event, node, context, thread, sample_count, 
maximum_value, minimum_value, mean_value, standard_deviation) 
AS SELECT cv.counter, t.node_rank, t.context_rank, t.thread_rank, 
cv.sample_count, cv.maximum_value, cv.minimum_value, cv.mean_value, 
cv.standard_deviation FROM counter_value cv 
INNER JOIN thread t ON cv.thread = t.id;
 
CREATE OR REPLACE VIEW atomic_location_profile 
AS SELECT * FROM atomic_event_value WHERE thread >= 0; 
 
CREATE OR REPLACE VIEW atomic_total_summary 
AS SELECT * FROM atomic_event_value WHERE thread = -2; 
 
CREATE OR REPLACE VIEW atomic_mean_summary 
AS SELECT * FROM atomic_event_value WHERE thread >= -1;