Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,7 @@ CMakeCache.txt
doc

apps/tensor_times_vector/tensor_times_vector

.cache
.vscode
compile_commands.json
50 changes: 47 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,25 @@ project(taco
LANGUAGES C CXX
)
option(CUDA "Build for NVIDIA GPU (CUDA must be preinstalled)" OFF)
option(ISPC "Build for Intel ISPC Compiler (ISPC Compiler must be preinstalled)" OFF)
option(PYTHON "Build TACO for python environment" OFF)
option(OPENMP "Build with OpenMP execution support" OFF)
option(OPENMP "Build with OpenMP execution support" ON)
option(COVERAGE "Build with code coverage analysis" OFF)
set(TACO_FEATURE_CUDA 0)
set(TACO_FEATURE_OPENMP 0)
set(TACO_FEATURE_ISPC 0)
set(TACO_FEATURE_OPENMP 1)
set(TACO_FEATURE_PYTHON 0)
if(CUDA)
message("-- Searching for CUDA Installation")
find_package(CUDA REQUIRED)
add_definitions(-DCUDA_BUILT)
set(TACO_FEATURE_CUDA 1)
endif(CUDA)
if(ISPC)
message("-- Searching for ISPC Installation")
add_definitions(-DISPC_BUILT)
set(TACO_FEATURE_ISPC 1)
endif(ISPC)
if(OPENMP)
message("-- Will use OpenMP for parallel execution")
add_definitions(-DUSE_OPENMP)
Expand Down Expand Up @@ -88,6 +95,39 @@ if(OPENMP)
set(C_CXX_FLAGS "-fopenmp ${C_CXX_FLAGS}")
endif(OPENMP)

set(PAPI_DIR "/home/min/a/kadhitha/workspace/my_taco/papi/src/install/")

find_path(PAPI_DIR
NAMES include/papi.h
)

find_library(PAPI_LIBRARIES
# Pick the static library first for easier run-time linking.
NAMES libpapi.a papi
HINTS ${PAPI_DIR}/lib ${HILTIDEPS}/lib
)

find_path(PAPI_INCLUDE_DIRS
NAMES papi.h
HINTS ${PAPI_DIR}/include ${HILTIDEPS}/include
)

include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(PAPI DEFAULT_MSG
PAPI_LIBRARIES
PAPI_INCLUDE_DIRS
)

mark_as_advanced(
PAPI_PREFIX_DIRS
PAPI_LIBRARIES
PAPI_INCLUDE_DIRS
)

include_directories(${PAPI_INCLUDE_DIRS})

# project (ValgrindExample)

if(COVERAGE)
find_program(PATH_TO_GCOVR gcovr REQUIRED)
# add coverage tooling to build flags
Expand All @@ -97,7 +137,8 @@ if(COVERAGE)
message("-- Code coverage analysis (gcovr) enabled")
endif(COVERAGE)

set(C_CXX_FLAGS "${C_CXX_FLAGS}")
set(C_CXX_FLAGS "${C_CXX_FLAGS} -I/${PAPI_DIR}/include -L/${PAPI_DIR}/lib")
# set(C_CXX_FLAGS "${C_CXX_FLAGS}")
set(CMAKE_C_FLAGS "${C_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS "${C_CXX_FLAGS} -std=c++14")

Expand All @@ -110,6 +151,9 @@ set(TACO_INCLUDE_DIR ${TACO_PROJECT_DIR}/include)

enable_testing()
include_directories(${TACO_INCLUDE_DIR})
# include_directories("/home/min/a/kadhitha/workspace/my_taco/valgrind")
# project (ValgrindExample)
# include (CTest)

set(TACO_LIBRARY_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTORY})

Expand Down
13 changes: 12 additions & 1 deletion include/taco/codegen/module.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class Module {
public:
/// Create a module for some target
Module(Target target=getTargetFromEnvironment())
: lib_handle(nullptr), moduleFromUserSource(false), target(target) {
: lib_handle(nullptr), so_lib_handle(nullptr), moduleFromUserSource(false), target(target) {
setJITLibname();
setJITTmpdir();
}
Expand All @@ -44,11 +44,16 @@ class Module {
/// before calling. If there's no function of this name then a nullptr is
/// returned.
void* getFuncPtr(std::string name);
void* getFuncPtr(std::string& sofile, std::string name);

/// Call a raw function in this module and return the result
int callFuncPackedRaw(std::string name, std::string& sofile, void** args);
int callFuncPackedRaw(std::string name, void** args);

/// Call a raw function in this module and return the result
int callFuncPackedRaw(std::string name, std::string& sofile, std::vector<void*> args) {
return callFuncPackedRaw(name, sofile, args.data());
}
int callFuncPackedRaw(std::string name, std::vector<void*> args) {
return callFuncPackedRaw(name, args.data());
}
Expand All @@ -57,6 +62,10 @@ class Module {
int callFuncPacked(std::string name, void** args) {
return callFuncPackedRaw("_shim_"+name, args);
}

int callFuncPacked(std::string name, std::string& sofile, void** args) {
return callFuncPackedRaw("_shim_"+name, sofile,args);
}

/// Call a function using the taco_tensor_t interface and return the result
int callFuncPacked(std::string name, std::vector<void*> args) {
Expand All @@ -68,10 +77,12 @@ class Module {

private:
std::stringstream source;
std::stringstream additional_source;
std::stringstream header;
std::string libname;
std::string tmpdir;
void* lib_handle;
void* so_lib_handle;
std::vector<Stmt> funcs;

// true iff the module was created from user-provided source
Expand Down
12 changes: 12 additions & 0 deletions include/taco/cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,19 @@
#define CUDA_BUILT false
#endif

#ifndef ISPC_BUILT
#define ISPC_BUILT false
#endif

namespace taco {

/// Functions used by taco to interface with ISPC
bool should_use_ISPC_codegen();
void set_ISPC_codegen_enabled(bool enabled);
bool is_ISPC_code_stream_enabled();
void set_ISPC_code_stream_enabled(bool enabled);


/// Functions used by taco to interface with CUDA (especially unified memory)
/// Check if should use CUDA codegen
bool should_use_CUDA_codegen();
Expand Down
3 changes: 3 additions & 0 deletions include/taco/index_notation/transformations.h
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,9 @@ IndexStmt parallelizeOuterLoop(IndexStmt stmt);
*/
IndexStmt reorderLoopsTopologically(IndexStmt stmt);

IndexStmt loopFusionOverFission(IndexStmt stmt, Assignment assignment,
std::string side, int iters);

/**
* Performs scalar promotion so that reductions are done by accumulating into
* scalar temporaries whenever possible.
Expand Down
2 changes: 1 addition & 1 deletion include/taco/ir/ir.h
Original file line number Diff line number Diff line change
Expand Up @@ -591,7 +591,7 @@ struct Switch : public StmtNode<Switch> {
static const IRNodeType _type_info = IRNodeType::Switch;
};

enum class LoopKind {Serial, Static, Dynamic, Runtime, Vectorized, Static_Chunked};
enum class LoopKind {Serial, Static, Dynamic, Runtime, Vectorized, Static_Chunked, Foreach, Mul_Thread, Init};

/** A for loop from start to end by increment.
* A vectorized loop will require the increment to be 1 and the
Expand Down
3 changes: 3 additions & 0 deletions include/taco/ir/ir_printer.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ class IRPrinter : public IRVisitorStrict {
public:
IRPrinter(std::ostream& stream);
IRPrinter(std::ostream& stream, bool color, bool simplify);
IRPrinter(std::ostream& stream, std::ostream& stream2, bool color, bool simplify);
virtual ~IRPrinter();

void setColor(bool color);
Expand Down Expand Up @@ -72,6 +73,7 @@ class IRPrinter : public IRVisitorStrict {
virtual void visit(const Break*);

std::ostream &stream;
std::ostream &stream2;
int indent;
bool color;
bool simplify;
Expand Down Expand Up @@ -109,6 +111,7 @@ class IRPrinter : public IRVisitorStrict {
void doIndent();
void printBinOp(Expr a, Expr b, std::string op, Precedence precedence);
bool needsParentheses(Precedence precedence);
void sendToStream(std::stringstream &stream);

std::string keywordString(std::string);
std::string commentString(std::string);
Expand Down
2 changes: 1 addition & 1 deletion include/taco/ir_tags.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ namespace taco {
/// ParallelUnit::GPUWarp can be optionally used to allow for GPU warp-level primitives
/// ParallelUnit::GPUThread causes for every iteration to be executed on a separate GPU thread
enum class ParallelUnit {
NotParallel, DefaultUnit, GPUBlock, GPUWarp, GPUThread, CPUThread, CPUVector, CPUThreadGroupReduction, GPUBlockReduction, GPUWarpReduction
NotParallel, DefaultUnit, GPUBlock, GPUWarp, GPUThread, CPUThread, CPUVector, CPUThreadGroupReduction, GPUBlockReduction, GPUWarpReduction, CPUSimd, CPUSpmd
};
extern const char *ParallelUnit_NAMES[];

Expand Down
3 changes: 3 additions & 0 deletions include/taco/lower/lowerer_impl_imperative.h
Original file line number Diff line number Diff line change
Expand Up @@ -499,10 +499,13 @@ class LowererImplImperative : public LowererImpl {

bool emitUnderivedGuards = true;

int loopDepth = 0;
int inParallelLoopDepth = 0;

std::map<ParallelUnit, ir::Expr> parallelUnitSizes;
std::map<ParallelUnit, IndexVar> parallelUnitIndexVars;
std::map<int, ParallelUnit> forUnits; // <loopdepth, ParallelUnit>
std::map<TensorVar,int> whereTempsWithLoopDepth;

/// Keep track of what IndexVars have already been defined
std::set<IndexVar> definedIndexVars;
Expand Down
1 change: 1 addition & 0 deletions include/taco/taco_tensor_t.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#ifndef TACO_TENSOR_T_DEFINED
#define TACO_TENSOR_T_DEFINED

#include <cstdint>
#include <stdint.h>

typedef enum { taco_mode_dense, taco_mode_sparse } taco_mode_t;
Expand Down
2 changes: 2 additions & 0 deletions include/taco/tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,8 @@ class TensorBase {

/// Compile the tensor expression.
void compile();
void compute(std::ofstream& statfile);
void compute(std::ofstream& statfile, std::string& sofile);

void compile(IndexStmt stmt, bool assembleWhileCompute=false);

Expand Down
22 changes: 22 additions & 0 deletions include/taco/util/strings.h
Original file line number Diff line number Diff line change
@@ -1,20 +1,42 @@
#ifndef TACO_UTIL_STRINGS_H
#define TACO_UTIL_STRINGS_H

#include "taco/cuda.h"
#include <string>
#include <sstream>
#include <vector>
#include <map>
#include <iomanip>
#include <limits>

#include "taco/type.h"

// To get the value of a compiler macro variable
#define STRINGIFY(x) #x
#define TO_STRING(x) STRINGIFY(x)

namespace taco {
namespace util {

// /// Turn anything except floating points that can be written to a stream
// /// into a string.
// template <class T>
// typename std::enable_if<!std::is_floating_point<T>::value, std::string>::type
// toStringISPC(const T &val) {

// std::stringstream sstream;
// if (val == Int32) {
// sstream << "int32";
// }
// else if (val == Int64) {
// sstream << "int64";
// }
// else {
// sstream << val;
// }
// return sstream.str();
// }

/// Turn anything except floating points that can be written to a stream
/// into a string.
template <class T>
Expand Down
1 change: 1 addition & 0 deletions include/taco/version.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,6 @@
#define TACO_FEATURE_OPENMP @TACO_FEATURE_OPENMP@
#define TACO_FEATURE_PYTHON @TACO_FEATURE_PYTHON@
#define TACO_FEATURE_CUDA @TACO_FEATURE_CUDA@
#define TACO_FEATURE_ISPC @TACO_FEATURE_ISPC@

#endif /* TACO_VERSION_H */
Binary file added out/taco-uml/._taco.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading