aboutsummaryrefslogtreecommitdiff
path: root/examples/python
diff options
context:
space:
mode:
Diffstat (limited to 'examples/python')
-rw-r--r--examples/python/README.md59
-rw-r--r--examples/python/load_dataset.py113
2 files changed, 172 insertions, 0 deletions
diff --git a/examples/python/README.md b/examples/python/README.md
new file mode 100644
index 0000000..157ba9a
--- /dev/null
+++ b/examples/python/README.md
@@ -0,0 +1,59 @@
+# Loading a TCPD time series in Python
+
+The ``load_dataset.py`` file contains example code to load a time series as a
+``TimeSeries`` object.
+
+```python
+>>> from load_dataset import TimeSeries
+>>> ts = TimeSeries.from_json('../../datasets/ozone/ozone.json')
+```
+
+To export the time series as a [pandas
+DataFrame](https://pandas.pydata.org/pandas-docs/stable/getting_started/dsintro.html#dataframe),
+simply use:
+
+```python
+>>> ts.df
+ t Total Emissions
+0 0 380000.0
+1 1 400000.0
+2 2 440000.0
+3 3 480000.0
+4 4 510000.0
+5 5 540000.0
+...
+```
+
+The ``TimeSeries`` instance ``ts`` has an integer time axis at ``ts.t`` and
+the observations at ``ts.y``. The time axis is zero-based by default. If you
+prefer to use a one-based indexing, simply run:
+
+```python
+>>> ts.make_one_based()
+>>> ts.df
+ t Total Emissions
+0 1 380000.0
+1 2 400000.0
+2 3 440000.0
+3 4 480000.0
+4 5 510000.0
+5 6 540000.0
+...
+```
+
+Many of the time series in TCPD have date or datetime labels for the time
+axis. This axis can be retrieved using:
+
+```python
+>>> ts.datestr
+array(['1961', '1962', '1963', '1964', '1965', '1966', '1967', '1968',
+ ...
+ '2009', '2010', '2011', '2012', '2013', '2014'], dtype='<U4')
+```
+
+which uses the date format stored in ``ts.datefmt``.
+
+```python
+>>> ts.datefmt
+'%Y'
+```
diff --git a/examples/python/load_dataset.py b/examples/python/load_dataset.py
new file mode 100644
index 0000000..59cbb1a
--- /dev/null
+++ b/examples/python/load_dataset.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+Example code for loading a dataset to a TimeSeries object.
+
+Note that this code requires Pandas to be available.
+
+Author: Gertjan van den Burg
+Copyright: The Alan Turing Institute, 2019
+License: See LICENSE file.
+
+"""
+
+import json
+import numpy as np
+import pandas as pd
+
+
+class TimeSeries:
+ def __init__(
+ self,
+ t,
+ y,
+ name=None,
+ longname=None,
+ datestr=None,
+ datefmt=None,
+ columns=None,
+ ):
+ self.t = t
+ self.y = y
+
+ self.name = name
+ self.longname = longname
+ self.datestr = datestr
+ self.datefmt = datefmt
+ self.columns = columns
+
+ # whether the series is stored as zero-based or one-based
+ self.zero_based = True
+
+ @property
+ def n_obs(self):
+ return len(self.t)
+
+ @property
+ def n_dim(self):
+ return self.y.shape[1]
+
+ @property
+ def shape(self):
+ return (self.n_obs, self.n_dim)
+
+ @classmethod
+ def from_json(cls, filename):
+ with open(filename, "rb") as fp:
+ data = json.load(fp)
+
+ tidx = np.array(data["time"]["index"])
+ tidx = np.squeeze(tidx)
+
+ if "format" in data["time"]:
+ datefmt = data["time"]["format"]
+ datestr = np.array(data["time"]["raw"])
+ else:
+ datefmt = None
+ datestr = None
+
+ y = np.zeros((data["n_obs"], data["n_dim"]))
+ columns = []
+
+ for idx, series in enumerate(data["series"]):
+ columns.append(series.get("label", "V%i" % (idx + 1)))
+ thetype = np.int if series["type"] == "integer" else np.float64
+ vec = np.array(series["raw"], dtype=thetype)
+ y[:, idx] = vec
+
+ ts = cls(
+ tidx,
+ y,
+ name=data["name"],
+ longname=data["longname"],
+ datefmt=datefmt,
+ datestr=datestr,
+ columns=columns,
+ )
+ return ts
+
+ @property
+ def df(self):
+ d = {"t": self.t}
+ for i in range(len(self.columns)):
+ col = self.columns[i]
+ val = self.y[:, i]
+ d[col] = val
+ return pd.DataFrame(d)
+
+ def make_one_based(self):
+ """ Convert the time index to a one-based time index. """
+ if self.zero_based:
+ self.t = [t + 1 for t in self.t]
+ self.zero_based = False
+
+ def __repr__(self):
+ return "TimeSeries(name=%s, n_obs=%s, n_dim=%s)" % (
+ self.name,
+ self.n_obs,
+ self.n_dim,
+ )
+
+ def __str__(self):
+ return repr(self)