-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmakefile
More file actions
64 lines (53 loc) · 1.73 KB
/
makefile
File metadata and controls
64 lines (53 loc) · 1.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
SHELL=/bin/bash
MAKEFLAGS += --silent
image_name = pyspark_base
all: clean raw-data-prep test build run
run:
docker exec -it pyspark-container_spark-master_1 bash -c "\
bin/spark-submit \
--master spark://spark-master:7077 \
--py-files app/jobs.zip,app/shared.zip,app/libs.zip \
--files app/config.json \
app/main.py --job movies \
"
docker exec -it pyspark-container_spark-master_1 bash -c "\
bin/spark-submit \
--master spark://spark-master:7077 \
--py-files app/jobs.zip,app/shared.zip,app/libs.zip \
--files app/config.json \
app/main.py --job movie_genres \
"
test:
cd pipeline && python -m pytest tests
build:
# Copy data
sudo cp -R dataset/* data/ | true
# Compile Job
mkdir ./dist | true
cp pipeline/main.py ./dist
cp pipeline/config.json ./dist
cd pipeline && \
sudo zip -r ../dist/jobs.zip jobs \
sudo zip -r ../dist/shared.zip shared && \
cd ..
docker run --rm -v $(PWD):/foo -w /foo lambci/lambda:build-python3.7 \
pip install -r pipeline/requirements.txt -t ./dist/libs
cd ./dist/libs && sudo zip -r -D ../libs.zip .
# Build cluster for running spark application
docker-compose up -d --build --scale spark-worker=2
clean:
sudo rm -rf dataset/* dist/* apps/* data/*
# Delete image from docker and clear running containers for the image
docker-compose down --remove-orphans || true
# docker rm -f $(docker ps -a -q) || true
# docker volume rm $(docker volume ls -q) || true
raw-data-prep:
python get_data.py
build-image:
docker build -t $(image_name) .
docker run -it pyspark_base bash -c "\
/bin/bash /start-spark.sh; \
./bin/spark-submit \
--master spark://spark-master:7077 \
examples/src/main/python/pi.py 1000 \
"