Skip to content
Snippets Groups Projects
Commit dc616e5a authored by Christoph Lange's avatar Christoph Lange
Browse files

presentation for htbd seminar April

parent 1b796829
No related branches found
No related tags found
1 merge request!11Try fitting randomly generated data
Pipeline #11128 failed
......@@ -11,6 +11,8 @@ class VariableLSTM(tf.keras.Model):
units,
label_length,
total_length,
norm_std=1.,
norm_mean=0.,
lstm_regularization=0.,
dense_regularization=0.,
lstm_dropout=0.,
......@@ -21,6 +23,8 @@ class VariableLSTM(tf.keras.Model):
self.units = units
self.label_length = label_length
self.total_length = total_length
self.norm_std = tf.convert_to_tensor(norm_std)
self.norm_mean = tf.convert_to_tensor(norm_mean)
self.lstm_cell = tf.keras.layers.LSTMCell(
units,
......@@ -137,3 +141,20 @@ class VariableLSTM(tf.keras.Model):
predictions = predictions_array.stack()
return tf.transpose(predictions, [1, 0, 2])
# def test_step(self, data):
# # Unpack the data
# x, y = data
# # Compute predictions
# y_pred = self(x, training=False)
#
# # redo normalization
# original_y = y * tf.cast(self.norm_std, dtype=y.dtype) + tf.cast(self.norm_mean, dtype=y.dtype)
# original_y_pred = y_pred * tf.cast(self.norm_std, dtype=y_pred.dtype) + tf.cast(self.norm_mean, dtype=y_pred.dtype)
# # Updates the metrics tracking the loss
# self.compiled_loss(original_y, original_y_pred, regularization_losses=self.losses)
# # Update the metrics.
# self.compiled_metrics.update_state(original_y, original_y_pred)
# # Return a dict mapping metric names to current value.
# # Note that it will include the loss (tracked in self.metrics).
# return {m.name: m.result() for m in self.metrics}
This diff is collapsed.
%% Cell type:markdown id: tags:
## Setup
%% Cell type:code id: tags:
``` python
%load_ext autoreload
%autoreload 2
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
import os
import datetime
import functools
import IPython
import IPython.display
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import glucose_ts
# plot layouts
sns.set_style("whitegrid")
sns.set_context("talk", font_scale=2, rc={
"lines.linewidth": 3.5,
'figsize': (20, 20),
'lines.markersize': 20,
})
sns.color_palette("dark")
plt.subplots_adjust(wspace=1.5)
DATA_DIR = os.path.join(os.path.dirname(os.path.dirname(glucose_ts.__file__)), 'data')
```
%% Output
The autoreload extension is already loaded. To reload it, use:
%reload_ext autoreload
%% Cell type:markdown id: tags:
## Load and Normalize the data
%% Cell type:code id: tags:
``` python
calibration_df = pd.read_csv(os.path.join(DATA_DIR, 'regression.csv'))
calibration_df['voltage (V)'] = calibration_df['Voltage (mV)'] / 1000.
```
%% Cell type:markdown id: tags:
## Fit a Linear Regression Model
%% Cell type:code id: tags:
``` python
import sklearn.linear_model
lr_model = sklearn.linear_model.LinearRegression().fit(calibration_df['voltage (V)'].values.reshape(-1, 1), calibration_df['concentration (g/l)'])
lr_model.predict(np.linspace(.95, 1.02, 100).reshape(-1, 1))
```
%% Output
array([ 5.18595998, 5.1251253 , 5.06429063, 5.00345595, 4.94262128,
4.8817866 , 4.82095193, 4.76011725, 4.69928258, 4.63844791,
4.57761323, 4.51677856, 4.45594388, 4.39510921, 4.33427453,
4.27343986, 4.21260518, 4.15177051, 4.09093583, 4.03010116,
3.96926649, 3.90843181, 3.84759714, 3.78676246, 3.72592779,
3.66509311, 3.60425844, 3.54342376, 3.48258909, 3.42175441,
3.36091974, 3.30008507, 3.23925039, 3.17841572, 3.11758104,
3.05674637, 2.99591169, 2.93507702, 2.87424234, 2.81340767,
2.75257299, 2.69173832, 2.63090365, 2.57006897, 2.5092343 ,
2.44839962, 2.38756495, 2.32673027, 2.2658956 , 2.20506092,
2.14422625, 2.08339157, 2.0225569 , 1.96172223, 1.90088755,
1.84005288, 1.7792182 , 1.71838353, 1.65754885, 1.59671418,
1.5358795 , 1.47504483, 1.41421016, 1.35337548, 1.29254081,
1.23170613, 1.17087146, 1.11003678, 1.04920211, 0.98836743,
0.92753276, 0.86669808, 0.80586341, 0.74502874, 0.68419406,
0.62335939, 0.56252471, 0.50169004, 0.44085536, 0.38002069,
0.31918601, 0.25835134, 0.19751666, 0.13668199, 0.07584732,
0.01501264, -0.04582203, -0.10665671, -0.16749138, -0.22832606,
-0.28916073, -0.34999541, -0.41083008, -0.47166476, -0.53249943,
-0.5933341 , -0.65416878, -0.71500345, -0.77583813, -0.8366728 ])
%% Cell type:code id: tags:
``` python
fig, axises = plt.subplots(
1,
1,
figsize=(15, 15)
)
sns.scatterplot(
data=calibration_df,
x='voltage (V)',
y='concentration (g/l)',
ax=axises,
alpha=.2
)
sns.lineplot(
np.linspace(.95, 1.01, 50),
lr_model.predict(np.linspace(.95, 1.01, 50).reshape(-1, 1))
)
axises.set_title('fitting regression model', pad=20)
fig.savefig(
os.path.join(os.path.dirname(os.path.dirname(glucose_ts.__file__)), 'presentations', 'images', 'regression.png'),
facecolor='w',
edgecolor='w',
transparent=True,
)
```
%% Output
/home/christoph/.virtualenvs/glucose-ts/lib/python3.8/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
warnings.warn(
%% Cell type:code id: tags:
``` python
from sklearn.metrics import mean_squared_error
import math
mean_squared_error(
lr_model.predict(calibration_df['voltage (V)'].values.reshape(-1, 1)),
calibration_df['concentration (g/l)']
)
```
%% Output
0.2134445901945714
......
No preview for this file type
......@@ -7,6 +7,7 @@
\usepackage[english]{babel} % 'babel' is necesary to automatically translate certain elements
\usepackage[utf8]{inputenc}
\usepackage{mathtools}
% insert an slide with the section head at the beginning of each section
\AtBeginSection[]{
......@@ -29,7 +30,7 @@
% parameters
\title{TF3 Improving signal quality}
\subtitle{Getting glucose concentrations closer to online}
\author[Christoph Lange]{Christoph Lange, Katharina Paulick, Bo Kern}
\author[Christoph Lange]{Bo Kern, Christoph Lange, Katharina Paulick, Meijiao Wan, Niko Dejonge}
\institute{Technische Universität Berlin}
% footer logo
......@@ -91,19 +92,41 @@
\begin{frame}{Microscopic Image Analysis}
\smallHead
We aim for:
\begin{columns}
\column{0.47 \textwidth}
We aim for:
\begin{itemize}
\item analyzing cell cultures offline
\item staining the membranes
\item automatically cut one picture into multiple
\item retrieve parameters that describe the cell morphology
\item train a model that predicts the at-line quantities
\end{itemize}
\begin{figure}[h!]
\includegraphics[scale=0.1]{images/cell_parameter.png}
\end{figure}
\column{0.5 \textwidth}
\begin{figure}[h!]
\includegraphics[scale=0.16]{images/old_new_image.png}
\end{figure}
\end{columns}
\end{frame}
\begin{frame}{Vision: Sensor Fusion}
\smallHead
We want to
\begin{itemize}
\item analyzing cell cultures offline
\item staining the membranes
\item automatically cut one picture into multiple
\item retrieve parameters that describe the cell morphology
\item train a model that predicts the at-line quantities
\item combine different sensor source to predict at-line parameters
\item Integrate signal processing algorithms into a model evaluation framework with the supervised models from TF1
\item Comparison of different signal processing mechanisms on the respective sensor signal
\end{itemize}
\begin{figure}[h!]
\includegraphics[scale=0.2]{images/microscopic_images.png}
\end{figure}
\end{frame}
......@@ -124,7 +147,7 @@ We aim for:
\item one measurement at a time
\item enzyme based amperometric sensors
\item enables at-line measurements
\item integrated via SILA
\item integrated via SiLA
\end{itemize}
\column{0.5 \textwidth}
......@@ -163,8 +186,40 @@ We aim for:
\subsection{Time Series Forecasting}
\begin{frame}{Time Series Dataset}
\smallHead
\begin{columns}
\column{0.4 \textwidth}
The dataset consists of $43$ time series from different sensors and runs
\begin{itemize}
\item $1000$ voltage measurements $V_i$
\item equidistant points in time $t_i$ for $9$ minutes
\item mostly starting at $1.02 V$
\item mostly decreasing
\item different noise levels
\end{itemize}
\emph{Goal:} Get the final value earlier
\column{0.55 \textwidth}
\begin{figure}[h!]
\includegraphics[width=\linewidth, height=5.2cm]{images/time_series_dataset.png}
\end{figure}
\end{columns}
\end{frame}
\begin{frame}{Time Series}
\begin{frame}{Time Series as Generalized Logistic Curve}
\smallHead
\begin{columns}
......@@ -194,7 +249,7 @@ We observe a fit for the whole time horizon.
\end{frame}
\begin{frame}{Predicting Time Series}
\begin{frame}{Predicting Time Series: Maximum Likelihood}
\smallHead
\begin{columns}
......@@ -220,6 +275,118 @@ model overfits to first part
\end{figure}
\end{columns}
\end{frame}
\begin{frame}{Predicting Time Series: Maximum a Posteriori}
\smallHead
\begin{columns}
\column{0.4 \textwidth}
\begin{itemize}
\item cut time series after $t_c = 2$ minutes
\item we have time, voltage pairs $ V_i, t_i $
\item try to estimate A, K, B, M and $\nu$
\item use maximum a posteriori
\item gaussian priors for A, K, B, M and $\nu$
\item assumption that all parameter are independent
\end{itemize}
model looks fine for this one sample.
\column{0.5 \textwidth}
\begin{figure}[h!]
\includegraphics[width=\linewidth, height=5.2cm]{images/cut_off_map.png}
\end{figure}
\end{columns}
\end{frame}
\begin{frame}{Predicting Time Series: LSTM}
\smallHead
\begin{columns}
\column{0.4 \textwidth}
\begin{itemize}
\item reduce number of data points by factor $20$
\item cut time series after $t_c = 1.8 $ minutes
\item model just cares about the voltages $\{ V_i | t_i \leq t_c \} $
\item predict one step at a time
\item feed the last prediction as input for the next one
\end{itemize}
Shorter input horizons, fit looks acceptable
\column{0.5 \textwidth}
\begin{figure}[h!]
\includegraphics[scale=0.16]{images/lstm_examples.png}
\end{figure}
\end{columns}
\end{frame}
\begin{frame}{Evaluating the LSTM}
\smallHead
\begin{columns}
\column{0.4 \textwidth}
Define a "final" voltage as the mean after $t_f = 7 $.
\begin{equation*}
V_f \coloneqq \frac 1 {\left| \{i \in \mathbb{N} | t_i \geq t_f \} \right|} \sum_{ i \in \{i \in \mathbb{N} | t_i \geq t_f \}} V_i
\end{equation*}
and take a look at the distribution of the residuals
\begin{equation*}
\hat{V_f} - V_f
\end{equation*}
and the mean squared error for $N$ time series amounts to
\begin{equation*}
\frac 1 N \sum (\hat{V_f} - V_f )^2 \approx 10^{-4}
\end{equation*}
\column{0.5 \textwidth}
\begin{figure}[h!]
\includegraphics[scale=0.27]{images/residuals.png}
\end{figure}
\end{columns}
\end{frame}
\subsection{Open Questions}
\begin{frame}{Open Questions}
\smallHead
\begin{itemize}
\item Can we get the voltage curves more consistent?
\item Is a straight line the right model family for the regression problem glucose / voltage regression problem?
\item Two separate models vs. a joint model predicting glucose directly?
\item Can we use a physics informed neural network?
\item What is the performance of the Generalized Logistic Curve model?
\item Can we predict sooner?
\item Do the curves change when we have a real batch fermentation?
\item More parallel measurements?
\end{itemize}
\end{frame}
......@@ -239,8 +406,8 @@ model overfits to first part
\item \href{http://git-workshop-py-packaging.rtfd.io/}{Python Packaging}
\end{itemize}
\item \href{https://git.tu-berlin.de/bvt-htbd/kiwi/cookiecutter-python-package-template}{Python Package Template} and \href{https://kiwi-python-package-template.readthedocs.io/en/latest/index.html}{documentation} how to use it
\item Continuous Integration
\item Sphinx documentation
\item Continuous Integration for HTBD
\item \href{https://glucose-ts.readthedocs.io/en/latest/}{Sphinx documentation} for the Glucose Models to ease the usage
\end{itemize}
\column{0.5 \textwidth}
......@@ -253,5 +420,12 @@ model overfits to first part
\end{frame}
\begin{frame}{}
\centering \Huge
\emph{Questions ?}
\end{frame}
\end{document}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment