Commit f556fd73 authored by ch.lange's avatar ch.lange
Browse files

syntesizing experiments

parent e23ae1ee
Pipeline #176221 failed with stage
in 9 minutes and 58 seconds
......@@ -8,3 +8,5 @@ img/ filter=lfs diff=lfs merge=lfs -text
*.ipynb filter=lfs diff=lfs merge=lfs -text
*.json filter=lfs diff=lfs merge=lfs -text
*.txt filter=lfs diff=lfs merge=lfs -text
*.mat filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
This source diff could not be displayed because it is stored in LFS. You can view the blob instead.
This source diff could not be displayed because it is stored in LFS. You can view the blob instead.
This source diff could not be displayed because it is stored in LFS. You can view the blob instead.
This source diff could not be displayed because it is stored in LFS. You can view the blob instead.
This source diff could not be displayed because it is stored in LFS. You can view the blob instead.
This source diff could not be displayed because it is stored in LFS. You can view the blob instead.
This source diff could not be displayed because it is stored in LFS. You can view the blob instead.
This source diff could not be displayed because it is stored in LFS. You can view the blob instead.
This source diff could not be displayed because it is stored in LFS. You can view the blob instead.
presentations/images/4_water_mean_std.png

130 Bytes | W: | H:

presentations/images/4_water_mean_std.png

130 Bytes | W: | H:

presentations/images/4_water_mean_std.png
presentations/images/4_water_mean_std.png
presentations/images/4_water_mean_std.png
presentations/images/4_water_mean_std.png
  • 2-up
  • Swipe
  • Onion skin
presentations/images/4_water_means.png

130 Bytes | W: | H:

presentations/images/4_water_means.png

130 Bytes | W: | H:

presentations/images/4_water_means.png
presentations/images/4_water_means.png
presentations/images/4_water_means.png
presentations/images/4_water_means.png
  • 2-up
  • Swipe
  • Onion skin
presentations/images/all_glucose_means.png

131 Bytes | W: | H:

presentations/images/all_glucose_means.png

130 Bytes | W: | H:

presentations/images/all_glucose_means.png
presentations/images/all_glucose_means.png
presentations/images/all_glucose_means.png
presentations/images/all_glucose_means.png
  • 2-up
  • Swipe
  • Onion skin
presentations/images/baseline_correction_nn.png

130 Bytes | W: | H:

presentations/images/baseline_correction_nn.png

130 Bytes | W: | H:

presentations/images/baseline_correction_nn.png
presentations/images/baseline_correction_nn.png
presentations/images/baseline_correction_nn.png
presentations/images/baseline_correction_nn.png
  • 2-up
  • Swipe
  • Onion skin
presentations/images/bin_correlations.png

130 Bytes | W: | H:

presentations/images/bin_correlations.png

130 Bytes | W: | H:

presentations/images/bin_correlations.png
presentations/images/bin_correlations.png
presentations/images/bin_correlations.png
presentations/images/bin_correlations.png
  • 2-up
  • Swipe
  • Onion skin
import ast
import click
import functools
import pandas as pd
from ray import tune
from ray.tune import CLIReporter
import torch
from raman_and_nir.data.utils import mix_concentrations
from raman_and_nir.models.cnn import ray_tune_wrapper_mock_data
......@@ -19,7 +22,7 @@ class PythonLiteralOption(click.Option):
raise click.BadParameter(value)
@click.group(name='tune-hyper')
@click.group(name='raman')
def cli_group():
pass
......@@ -94,7 +97,7 @@ def cli_group():
@click.option('--wave-noise-std', cls=PythonLiteralOption, default='[0.]')
@click.option('--random-scale-std', cls=PythonLiteralOption, default='[0.]')
@click.option('--max-random-shift', cls=PythonLiteralOption, default='[0]')
def raman_cnn_grid_search(
def cnn_grid_search(
results_dir,
well_type,
wavelength,
......@@ -166,3 +169,51 @@ def raman_cnn_grid_search(
click.echo(
'Finished! \n Results can be found here: %s' % results_dir
)
@cli_group.command(
help='Create a csv for mixing standards'
)
@click.option(
'--results-path',
help='where to leave the results',
type=str
)
@click.option(
'--substance-names',
cls=PythonLiteralOption,
default="['pilsener', 'ale']"
)
@click.option(
'--target-concentrations',
cls=PythonLiteralOption,
default="[(1, 2), (0, 1)]"
)
@click.option(
'--source-concentrations',
cls=PythonLiteralOption,
default="[5, 5]"
)
@click.option(
'--sample-volume',
help='the volume each sample is supposed to have',
type=float
)
def mix_solutions(
results_path,
substance_names,
target_concentrations,
source_concentrations,
sample_volume,
):
combinations = mix_concentrations(
substance_names,
target_concentrations,
source_concentrations,
sample_volume,
)
click.echo('Obtained %s different combinations' % len(combinations))
pd.DataFrame(combinations).to_csv(results_path)
click.echo('Wrote all combinations to path %s' % results_path)
\ No newline at end of file
import datetime
import multiprocessing
import numpy as np
import os
import pandas as pd
import raman_and_nir
def read_kaiser_txt(path, lower_wn=None, upper_wn=None):
lower_wn = lower_wn or -np.inf
upper_wn = upper_wn or np.inf
with open(path, 'r') as file_reader:
wavenumbers, intensities = zip(*[
[float(one) for one in one_line.split('\t')]
for one_line in file_reader.readlines()
])
selection = np.logical_and(
lower_wn <= np.array(wavenumbers),
np.array(wavenumbers) <= upper_wn
)
return {
'wavelength': 785.,
'timestamp': datetime.datetime.fromtimestamp(
os.path.getmtime(path)
),
'wavenumbers': np.array(wavenumbers)[selection],
'intensities': np.array(intensities)[selection],
}
def get_kaiser_subset(start_sequence=''):
root, dirs, files = list(
os.walk(
os.path.join(
raman_and_nir.data.data_dir,
'mock_experiments',
'kaiser_rxn1',
)
)
)[0]
all_paths = [
os.path.join(root, one)
for one in files
if one.startswith(start_sequence)
]
all_spectra = sorted(
multiprocessing.Pool().map(
read_kaiser_txt,
all_paths
),
key=lambda one: one['timestamp'],
)
return pd.DataFrame(
index=[
one['timestamp']
for one in all_spectra
],
columns=all_spectra[0]['wavenumbers'],
data=np.array([
one['intensities']
for one in all_spectra
])
)
import datetime
import functools
import multiprocessing
import numpy as np
import os
import pandas as pd
import re
import raman_and_nir
def read_timegate_txt(path, lower_wn=None, upper_wn=None):
lower_wn = lower_wn or -np.inf
upper_wn = upper_wn or np.inf
with open(path, 'r') as file_reader:
pure_content = [
one_line.split('\n')[0]
for idx, one_line in enumerate(file_reader.readlines())
if idx not in [0, 1] + list(range(3, 10)) + list(range(11, 18))
]
wavenumbers, intensities = zip(*[
[float(one) for one in one_line.split(', ')]
for one_line in pure_content[2:]
])
selection = np.logical_and(
lower_wn <= np.array(wavenumbers),
np.array(wavenumbers) <= upper_wn
)
return {
'timestamp': datetime.datetime.strptime(
pure_content[0].split('Date = ')[-1],
'%d-%b-%Y %H:%M:%S'
),
'wavelength': float(
re.compile('= (\d+.\d+)nm').findall(pure_content[1])[0]
),
'wavenumbers': np.array(wavenumbers)[selection],
'intensities': np.array(intensities)[selection],
}
def read_timegate_mock_1d(lower_wn=None, upper_wn=None):
root, dirs, files = list(
os.walk(
os.path.join(
raman_and_nir.data.data_dir,
'mock_experiments',
'timegate_picoraman_m2',
)
)
)[0]
all_paths = [
os.path.join(root, dir, '%s.txt' % dir.split('_pp')[0])
for dir in dirs
if dir.endswith('_pp')
]
path_info = [
{
'plate_number': int(
re.compile('P(\d)[A-Z]\d+.txt').findall(one)[0]
),
'plate_row': str(
re.compile('P\d([A-Z])\d+.txt').findall(one)[0]
),
'plate_col': int(
re.compile('P\d[A-Z](\d+).txt').findall(one)[0]
),
}
for one in all_paths
]
all_spectra = [
{**spectra, **info}
for spectra, info in zip(
multiprocessing.Pool().map(
functools.partial(
read_timegate_txt,
lower_wn=lower_wn,
upper_wn=upper_wn
),
all_paths
),
path_info
)
]
annotations = {
(int(one['plate']), one['row'], int(one['column'])):
(
one['glucose_concentration'],
one['Na_acetate_concentration'],
one['Mg_SO4_concentration'],
int(one['MSM_present'])
)
for (idx, one) in pd.read_csv(
os.path.join(
raman_and_nir.data.data_dir,
'mock_experiments',
'concentrations.csv'
)
).iterrows()
}
return [
{
**one,
**dict(
zip(
raman_and_nir.data.annotation_names,
annotations[
(
one['plate_number'],
one['plate_row'],
one['plate_col'],
)
]
)
)
}
for one in all_spectra
]
def get_timegate_water_locations_1d():
root, dirs, files = list(
os.walk(
os.path.join(
raman_and_nir.data.data_dir,
'mock_experiments',
'timegate_picoraman_m2',
'location_variance'
)
)
)[0]
all_paths = [
os.path.join(root, dir, '%s.txt' % dir.split('_pp')[0])
for dir in dirs
if dir.endswith('_pp')
]
all_spectra = sorted(
multiprocessing.Pool().map(
read_timegate_txt,
all_paths
),
key=lambda one: one['timestamp'],
)
return pd.DataFrame(
index=[
one['timestamp']
for one in all_spectra
],
columns=all_spectra[0]['wavenumbers'],
data=np.array([
one['intensities']
for one in all_spectra
])
)
......@@ -11,6 +11,7 @@ class SpectrumMeansConcentrations(torch.utils.data.Dataset):
lower_bounds,
upper_bounds,
dtype=None,
ref_spectrum=None,
sf_mean=None,
sf_std=None,
concentration_means=None,
......@@ -26,7 +27,10 @@ class SpectrumMeansConcentrations(torch.utils.data.Dataset):
],
dim=0,
)
self.reference = torch.mean(spectra, dim=0)
if ref_spectrum is None:
self.reference = torch.mean(spectra, dim=0)
else:
self.reference = ref_spectrum
spectra_factors = torch.tensor(spectra / self.reference)
# normalize spectra factors
......
import numpy as np
from sklearn.cluster import KMeans
import torch
from raman_and_nir.data.torch.concentrations_spectra import predict_baseline_usecase, baseline_for_peaks
class ResidualSpectra(torch.utils.data.Dataset):
def __init__(
self,
dataset,
baseline_model,
norm_max=None,
add_means=False,
):
self.add_means = add_means
device = (
torch.device("cuda") if torch.cuda.is_available()
else torch.device("cpu")
)
baseline_predictions = list()
labels = list()
concentrations = list()
normalized_means = list()
# doing predictions
for one_batch in dataset:
batch_size = one_batch['concentrations'].shape[0]
base_input, base_label = baseline_for_peaks(one_batch)
baseline_predictions.append(
(
baseline_model.to(device)(
base_input.to(device)
).reshape(batch_size, -1)
* dataset.dataset.sf_std.to(device)
+ dataset.dataset.sf_mean.to(device)
) * dataset.dataset.reference.to(device)
)
labels.append(
(
base_label.to(device).reshape(batch_size, -1)
* dataset.dataset.sf_std.to(device)
+ dataset.dataset.sf_mean.to(device)
) * dataset.dataset.reference.to(device)
)
normalized_means.append(one_batch['spectra_means'])
concentrations.append(one_batch['concentrations'])
wavenumbers = one_batch['wavenumbers'][0, :]
residuals = torch.torch.subtract(
torch.concat(labels, dim=0),
torch.concat(baseline_predictions, dim=0),
)
# normalize to 0-1
if norm_max is None:
self.norm_max = torch.max(residuals)
else:
self.norm_max = norm_max
self.normed_residuals = (
residuals / self.norm_max
).cpu().detach().numpy()
self.concentrations = torch.concat(
concentrations
).cpu().detach().numpy()
self.wavenumbers = wavenumbers.cpu().detach().numpy()
self.normed_means = torch.concat(
normalized_means
).cpu().detach().numpy()
def get_rmse_centers(self, num_centers, error_fraction):
rmse_per_wn = np.sqrt(
np.mean(
np.square(
self.normed_residuals,
),
axis=0,
)
)
wn_rmse = [
(
one_wn,
rmse,
)
for one_wn, rmse in zip(self.wavenumbers, rmse_per_wn)
]
bad_wavenumbers, erros = zip(
*sorted(wn_rmse, key=lambda w_r: w_r[1])[
-int(len(wn_rmse) * error_fraction):
]
)
return np.array(
sorted([
float(one_center)
for one_center in KMeans(n_clusters=num_centers).fit(
np.array(bad_wavenumbers)[:, None]
).cluster_centers_
])
)
def __len__(self):
return self.normed_residuals.shape[0]
def __getitem__(self, idx):
if self.add_means:
inputs = np.concatenate([
self.concentrations[idx],
self.normed_means[idx]
])
else:
inputs = self.concentrations[idx]
return (
inputs,
self.normed_residuals[idx],
)
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment