Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions BitmapFileHeader.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#pragma once
#include <cstdint>

#pragma pack(2)
#pragma pack(push, 2)

namespace Mandelbrot{

Expand All @@ -12,4 +12,6 @@ struct BitmapFileHeader {
int32_t dataOffset;
};

}
}

#pragma pack(pop)
6 changes: 4 additions & 2 deletions BitmapInfoHeader.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

using namespace std;

#pragma pack(2)
#pragma pack(push, 2)

namespace Mandelbrot {

Expand All @@ -23,4 +23,6 @@ struct BitmapInfoHeader{

};

}
}

#pragma pack(pop)
68 changes: 66 additions & 2 deletions Mandelbrot.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,51 @@
#include <iostream>
#include <vector>
#include <math.h>
#include <cstdint>
#if defined(__ARM_NEON) || defined(__ARM_NEON__)
#include <arm_neon.h>
#endif
#include "Mandelbrot.h"

using namespace std;

namespace Mandelbrot{

#if defined(__ARM_NEON) || defined(__ARM_NEON__)
static inline void getIterationsNeon4(float32x4_t c_re, float32x4_t c_im, int out_iters[4]){
const float32x4_t threshold2 = vdupq_n_f32(4.0f);
float32x4_t z_re = vdupq_n_f32(0.0f);
float32x4_t z_im = vdupq_n_f32(0.0f);
int32x4_t iters = vdupq_n_s32(0);
uint32x4_t active = vdupq_n_u32(0xFFFFFFFFu);

for (int i = 0; i < Mandelbrot::MAX_ITERATIONS; i++){
float32x4_t z_re2 = vmulq_f32(z_re, z_re);
float32x4_t z_im2 = vmulq_f32(z_im, z_im);
float32x4_t z_re_im = vmulq_f32(z_re, z_im);

float32x4_t z_re_new = vaddq_f32(vsubq_f32(z_re2, z_im2), c_re);
float32x4_t z_im_new = vaddq_f32(vaddq_f32(z_re_im, z_re_im), c_im);

float32x4_t mag2 = vaddq_f32(vmulq_f32(z_re_new, z_re_new), vmulq_f32(z_im_new, z_im_new));
uint32x4_t still_in = vcleq_f32(mag2, threshold2);

uint32x4_t inc_mask = vandq_u32(active, still_in);
iters = vaddq_s32(iters, vreinterpretq_s32_u32(vandq_u32(inc_mask, vdupq_n_u32(1))));

z_re = z_re_new;
z_im = z_im_new;
active = inc_mask;

if (vmaxvq_u32(active) == 0){
break;
}
}

vst1q_s32(out_iters, iters);
}
#endif

Mandelbrot::Mandelbrot(int width, int height, const int N_THREADS):
_bitmap(width, height),
_width(width),
Expand Down Expand Up @@ -50,7 +89,33 @@ void Mandelbrot::draw(string fileName, drawColor colourSelection ){

auto work = [&](int thread_id){
for (int y = thread_id; y < _height; y+= NUM_THREADS){
for (int x = 0; x < _width; x++){
int x = 0;
#if defined(__ARM_NEON) || defined(__ARM_NEON__)
const float32x4_t x_step = vdupq_n_f32(2.0f/_width);
const float32x4_t x_base = vdupq_n_f32((-_width/2.0f - 150.0f) * 2.0f/_width);
const float32x4_t y_fractal = vdupq_n_f32((y - _height/2.0f) * 2.0f/_width);
for (; x + 3 < _width; x += 4){
int32_t x_vals[4] = {x, x + 1, x + 2, x + 3};
float32x4_t x_offsets = vcvtq_f32_s32(vld1q_s32(x_vals));
float32x4_t c_re = vaddq_f32(x_base, vmulq_f32(x_offsets, x_step));
float32x4_t c_im = y_fractal;

int iters[4];
getIterationsNeon4(c_re, c_im, iters);

unique_lock<mutex> l(histMutex);
for (int lane = 0; lane < 4; lane++){
int idx = y*_width + (x + lane);
int num_iters = iters[lane];
pfractalData[idx] = num_iters;
if (num_iters != MAX_ITERATIONS){
p[num_iters]++;
}
}
l.unlock();
}
#endif
for (; x < _width; x++){
double xFractal = (x - _width/2 - 150) * 2.0/_width;
double yFractal = (y - _height/2) * 2.0/_width;

Expand Down Expand Up @@ -153,4 +218,3 @@ bool Mandelbrot::_validHistogram(){
return false;
}
}

35 changes: 34 additions & 1 deletion build.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,36 @@
#!/bin/bash

g++ --std=c++11 -g -O0 main.cpp Mandelbrot.cpp bitmap.cpp -o ./builds/mandelbrot-parallel
set -euo pipefail

MODE="${1:-release}"
CXX="${CXX:-g++}"
SRC=(main.cpp Mandelbrot.cpp bitmap.cpp)
OUT_DIR="./builds"

mkdir -p "$OUT_DIR"

case "$MODE" in
debug)
OUT="$OUT_DIR/mandelbrot-parallel-debug"
CXXFLAGS=(--std=c++11 -g -O0)
;;
release)
OUT="$OUT_DIR/mandelbrot-parallel"
CXXFLAGS=(
--std=c++11
-O3
-mcpu=neoverse-n1+crc+crypto
-ffast-math
-funroll-loops
-flto
-DNDEBUG
)
;;
*)
echo "Usage: $0 [debug|release]"
exit 1
;;
esac

echo "Building $MODE -> $OUT"
"$CXX" "${CXXFLAGS[@]}" "${SRC[@]}" -o "$OUT"
4 changes: 2 additions & 2 deletions main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ using namespace std;

int main(int argc, char* argv[]){

const int NUM_THREADS = std::stoi(argv[1]);
const int NUM_THREADS = argc == 2 ? std::stoi(argv[1]) : 1;
std::cout << "Number of Threads = " << NUM_THREADS << std::endl;

Mandelbrot::Mandelbrot myplot(1920, 1080, NUM_THREADS);
myplot.draw("/home/ec2-user/Mandelbrot-final/Mandelbrot-Example/images/Green-Parallel-512.bmp", Mandelbrot::Mandelbrot::GREEN);
myplot.draw("Green-Parallel-512.bmp", Mandelbrot::Mandelbrot::GREEN);

return 0;
}