build spark

参考 Building Spark

https://github.com/apache/spark/blob/master/build/mvn

docker image

以社区提供的 apache/spark:v3.3.0 为例,其 Dockerfile 参见 github

ARG java_image_tag=11-jre-slim  
  
FROM openjdk:${java_image_tag}  
  
ARG spark_uid=185  
  
# Before building the docker image, first build and make a Spark distribution following  
# the instructions in https://spark.apache.org/docs/latest/building-spark.html.  
# If this docker file is being used in the context of building your images from a Spark  
# distribution, the docker build command should be invoked from the top level directory  
# of the Spark distribution. E.g.:  
# docker build -t spark:latest -f kubernetes/dockerfiles/spark/Dockerfile .  
  
RUN set -ex && \  
    sed -i 's/http:\/\/deb.\(.*\)/https:\/\/deb.\1/g' /etc/apt/sources.list && \    apt-get update && \    ln -s /lib /lib64 && \    apt install -y bash tini libc6 libpam-modules krb5-user libnss3 procps && \    mkdir -p /opt/spark && \    mkdir -p /opt/spark/examples && \    mkdir -p /opt/spark/work-dir && \    touch /opt/spark/RELEASE && \    rm /bin/sh && \    ln -sv /bin/bash /bin/sh && \    echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su && \    chgrp root /etc/passwd && chmod ug+rw /etc/passwd && \    rm -rf /var/cache/apt/*  
COPY jars /opt/spark/jars  
COPY bin /opt/spark/bin  
COPY sbin /opt/spark/sbin  
COPY kubernetes/dockerfiles/spark/entrypoint.sh /opt/  
COPY kubernetes/dockerfiles/spark/decom.sh /opt/  
COPY examples /opt/spark/examples  
COPY kubernetes/tests /opt/spark/tests  
COPY data /opt/spark/data  
  
ENV SPARK_HOME /opt/spark  
  
WORKDIR /opt/spark/work-dir  
RUN chmod g+w /opt/spark/work-dir  
RUN chmod a+x /opt/decom.sh  
  
ENTRYPOINT [ "/opt/entrypoint.sh" ]  
  
# Specify the User that the actual main process will run as  
USER ${spark_uid}  

spark-shell

pyspark

pyspark

spark-submit

https://mallikarjuna_g.gitbooks.io/spark/content/spark-submit.html

spark programming

https://aiyanbo.gitbooks.io/spark-programming-guide-zh-cn/content/quick-start/using-spark-shell.html

How it works