-
Notifications
You must be signed in to change notification settings - Fork 2
/
Dockerfile
139 lines (118 loc) · 4.22 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
FROM ubuntu:18.04
LABEL author="Allan Batista <[email protected]>"
EXPOSE 8080 5555 8793
SHELL ["/bin/bash", "-c"]
# no interaction
ENV DEBIAN_FRONTEND noninteractive
ENV TERM linux
ENV SLUGIFY_USES_TEXT_UNIDECODE=yes
# airflow
ENV AIRFLOW=/opt/airflow
ENV AIRFLOW_HOME=$AIRFLOW/home
ENV AIRFLOW__CORE__DAGS_FOLDER=$AIRFLOW/dags
ENV AIRFLOW__CORE__PLUGINS_FOLDER=$AIRFLOW/plugins
ENV AIRFLOW__CORE__BASE_LOG_FOLDER=$AIRFLOW/logs
ENV AIRFLOW_KEYS=$AIRFLOW/keys
ENV AIRFLOW_VERSION=1.10.10
ENV AIRFLOW_COMPONENTS=all_dbs,async,celery,cloudant,crypto,gcp_api,google_auth,hdfs,hive,jdbc,mysql,oracle,password,postgres,rabbitmq,redis,s3,samba,slack,ssh,github_enterprise
ENV AIRFLOW_GPL_UNIDECODE=yes
ENV C_FORCE_ROOT=true
# language
ENV LANGUAGE en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LC_ALL en_US.UTF-8
ENV LC_CTYPE en_US.UTF-8
ENV LC_MESSAGES en_US.UTF-8
# pip install extensions
ENV PYTHON_PACKAGES=
ENV PYTHONDONTWRITEBYTECODE=true
# google cloud sdk
ENV PATH=$PATH:/usr/local/gcloud/google-cloud-sdk/bin
ENV CLOUDSDK_PYTHON="python2.7"
ENV GOOGLE_APPLICATION_CREDENTIALS_JSON=
ENV GOOGLE_APPLICATION_ACCOUNT=
ENV CLOUD_SDK_REPO=cloud-sdk-bionic
# oracle driver
ENV ORACLE_HOME=/opt/cx_oracle
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$ORACLE_HOME
# base
RUN mkdir -p $AIRFLOW_HOME && \
mkdir -p $AIRFLOW_KEYS && \
mkdir -p $AIRFLOW__CORE__DAGS_FOLDER && \
mkdir -p $AIRFLOW__CORE__BASE_LOG_FOLDER && \
mkdir -p $AIRFLOW__CORE__PLUGINS_FOLDER
ADD airflow/home /opt/airflow/home
ADD instantclient-basic-linux.x64-19.3.0.0.0dbru.zip /tmp/
WORKDIR /opt/airflow
RUN apt-get update -y \
&& apt-get install -y \
python-minimal \
python3-pip \
python3-dev \
python3-setuptools \
zip \
wget \
git \
vim \
locales \
build-essential \
curl \
default-libmysqlclient-dev \
freetds-dev \
libkrb5-dev \
libsasl2-dev \
libssl-dev \
libffi-dev \
libpq-dev \
libaio1 \
openjdk-8-jre \
openjdk-8-jdk \
&& sed -i -e 's/# en_US.UTF-8 UTF-8/en_US.UTF-8 UTF-8/' /etc/locale.gen && locale-gen \
&& apt-get clean
RUN echo "deb http://packages.cloud.google.com/apt $CLOUD_SDK_REPO main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \
apt-get update -y && \
apt-get install google-cloud-sdk -y
RUN ln -sf $(which pip3) /usr/bin/pip \
&& ln -sf $(which python3) /usr/bin/python
## install oracle driver
RUN cd /tmp/ && \
unzip instantclient-basic-linux.x64-19.3.0.0.0dbru.zip && \
mv instantclient_19_3 $ORACLE_HOME && \
rm instantclient-basic-linux.x64-19.3.0.0.0dbru.zip
## Install Airflow
RUN pip install "apache-airflow[${AIRFLOW_COMPONENTS}]==${AIRFLOW_VERSION}" --no-cache-dir
RUN mkdir -p /airflow_custom
ADD airflow/airflow_custom /airflow_custom
RUN python -m pip install -e /airflow_custom
## Install additional packages
RUN pip install boto3 \
pandas \
psycopg2 \
psycopg2-binary \
py-postgresql \
numpy \
matplotlib \
scikit-learn \
google-cloud-bigquery==1.11.3 \
google-cloud-storage \
google-cloud-pubsub \
tensorflow \
sasl \
thrift_sasl \
setuptools \
wheel \
pika \
pymongo \
unidecode \
cx_Oracle \
nltk \
git+https://github.com/facebookresearch/fastText \
-U --no-cache-dir
RUN python -c "import nltk; nltk.download('rslp')"
# remove apt cache
RUN apt-get clean --dry-run
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
ENTRYPOINT [ "/entrypoint.sh" ]
CMD ["/entrypoint.sh", "webserver"]