From 3145984e888b2b38e3eec5c536ca0f2b8a366258 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Fri, 15 May 2020 21:01:07 +0100 Subject: Add docker image for tcpd --- Dockerfile | 31 +++++++++++++++++++++++++++++++ README.md | 18 +++++++++++++++++- 2 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..b242e8c --- /dev/null +++ b/Dockerfile @@ -0,0 +1,31 @@ +FROM ubuntu:20.04 + +# Install necessary packages and ensure python means python3 +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get remove -y python && \ + apt-get install -y --no-install-recommends \ + git \ + build-essential \ + python3 \ + python3-dev \ + python3-pip && \ + echo "alias python='python3'" >> /root/.bash_aliases && \ + echo "alias pip='pip3'" >> /root/.bash_aliases && \ + cd /usr/local/bin && ln -s /usr/bin/python3 python && \ + cd /usr/local/bin && ln -s /usr/bin/pip3 pip && \ + pip install virtualenv + +# Make bash the default shell +RUN mv /bin/sh /bin/sh.old && cp /bin/bash /bin/sh + +# Clone the dataset repo +RUN git clone https://github.com/alan-turing-institute/TCPD + +# Change working dir +WORKDIR TCPD + +# Create virtualenv +RUN make venv + +# Build the dataset when container is run. +CMD ["make", "export"] diff --git a/README.md b/README.md index bb9da64..f0b2543 100644 --- a/README.md +++ b/README.md @@ -51,8 +51,24 @@ paper](https://arxiv.org/abs/2003.06222): Many of the time series in the dataset are included in this repository. However, due to licensing restrictions, some series can not be redistributed and need to be downloaded locally. We've added a Python script and a Makefile -to make this process as easy as possible. +to make this process as easy as possible. There is also a Dockerfile to +facilitate reproducibility. +### Using Docker + +To build the dataset using Docker, first build the docker containers + +``` +$ docker build -t tcpd https://github.com/alan-turing-institute/TCPD.git +``` + +then build the dataset: + +``` +$ docker run -i -t -v /path/to/where/you/want/the/dataset:/TCPD/export tcpd +``` + +### Using the command line To obtain the dataset, please run the following steps: -- cgit v1.2.3