"""
Module for producing testing data for resistics and helper functions to compare
instances of the same object.
This includes testing data for:
- Record
- History
- TimeMetadata
- TimeData
- DecimatedData
- SpectraData
- Evaluation frequency SpectraData
- RegressionInputMetadata
- Solution
"""
from typing import List, Dict, Optional, Type, Union
import numpy as np
import pandas as pd
from resistics.common import Record, History, get_record, known_chan
from resistics.time import get_time_metadata, TimeMetadata, TimeData
from resistics.decimate import get_eval_freqs_size, DecimationParameters
from resistics.decimate import DecimatedMetadata, DecimatedData
from resistics.spectra import SpectraLevelMetadata, SpectraMetadata, SpectraData
from resistics.gather import SiteCombinedMetadata
from resistics.transfunc import Component, TransferFunction, ImpedanceTensor
from resistics.regression import RegressionInputMetadata, Solution
DEFAULT_TIME_DATA_DTYPE = np.float32
[docs]def record_example1() -> Record:
"""Get an example Record"""
from resistics.common import get_record
return get_record(
creator={"name": "example1", "a": 5, "b": -7.0},
messages=["Message 1", "Message 2"],
)
[docs]def record_example2() -> Record:
"""Get an example Record"""
from resistics.common import get_record
return get_record(
creator={"name": "example2", "a": "parzen", "b": -21},
messages=["Message 5", "Message 6"],
)
[docs]def history_example() -> History:
"""Get a History example"""
from resistics.common import History
return History(records=[record_example1(), record_example2()])
[docs]def time_data_ones(
fs: float = 10,
first_time: str = "2020-01-01 00:00:00",
n_samples: int = 10,
dtype: Optional[Type] = None,
) -> TimeData:
"""
TimeData with all ones
Parameters
----------
fs : float, optional
The sampling frequency, by default 10
first_time : str, optional
The time of the first sample, by default "2020-01-01 00:00:00"
n_samples : int, optional
The number of samples, by default 10
dtype : Optional[Type], optional
The data type for the values, by default None
Returns
-------
TimeData
The TimeData
"""
if dtype is None:
dtype = DEFAULT_TIME_DATA_DTYPE
metadata = time_metadata_mt(fs, first_time, n_samples)
data = np.ones(shape=(len(metadata.chans), n_samples), dtype=dtype)
creator = {
"name": "time_data_ones",
"fs": fs,
"first_time": first_time,
"n_samples": n_samples,
}
messages = ["Generated time data with fixed values"]
record = get_record(creator, messages)
metadata.history.add_record(record)
return TimeData(metadata, data)
[docs]def time_data_simple(
fs: float = 10,
first_time: str = "2020-01-01 00:00:00",
dtype: Optional[Type] = None,
) -> TimeData:
"""
Time data with 16 samples
Parameters
----------
fs : float, optional
The sampling frequency, by default 10
first_time : str, optional
The time of the first sample, by default "2020-01-01 00:00:00"
dtype : Optional[Type], optional
The data type for the values, by default None
Returns
-------
TimeData
The TimeData
"""
if dtype is None:
dtype = DEFAULT_TIME_DATA_DTYPE
data = np.array(
[
[1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7],
[1, 2, 3, 5, 1, 2, 3, 4, 2, 6, 7, 6, 5, 4, 3, 2],
[2, 0, 0, 1, 2, 3, 0, 0, 0, 0, 0, 1, 3, 3, 3, 3],
],
dtype=dtype,
)
n_samples = data.shape[1]
metadata = time_metadata_mt(fs, first_time, n_samples)
creator = {"name": "time_data_simple", "fs": fs, "first_time": first_time}
messages = ["Generated time data with simple values"]
record = get_record(creator, messages)
metadata.history.add_record(record)
return TimeData(metadata, data)
[docs]def time_data_with_nans(
fs: float = 10,
first_time: str = "2020-01-01 00:00:00",
dtype: Optional[Type] = None,
) -> TimeData:
"""
TimeData with 16 samples and some nan values
Parameters
----------
fs : float, optional
Sampling frequency, by default 10
first_time : str, optional
The time of the first sample, by default "2020-01-01 00:00:00"
dtype : Optional[Type], optional
The data type for the values, by default None
Returns
-------
TimeData
The TimeData
"""
if dtype is None:
dtype = DEFAULT_TIME_DATA_DTYPE
data = np.array(
[
[1, 1, 1, 0, np.nan, 0, 1, 1, 1, np.nan, 0, 0, 0, 0, 1, 1],
[1, 2, np.nan, np.nan, 5, 6, np.nan, 8, 9, 1, 2, 3, 4, 5, 6, 7],
[np.nan, 2, 3, 5, 1, 2, 3, 4, 2, 6, 7, np.nan, np.nan, 4, 3, 2],
[2, 0, 0, 1, 2, 3, np.nan, np.nan, np.nan, 0, 0, 1, 3, 3, 3, 3],
],
dtype=dtype,
)
n_samples = data.shape[1]
metadata = time_metadata_mt(fs, first_time, n_samples)
creator = {"name": "time_data_with_nans", "fs": fs, "first_time": first_time}
messages = ["Generated time data with some nan values"]
record = get_record(creator, messages)
metadata.history.add_record(record)
return TimeData(metadata, data)
[docs]def time_data_linear(
fs: float = 10,
first_time: str = "2020-01-01 00:00:00",
n_samples: int = 10,
dtype: Optional[Type] = None,
) -> TimeData:
"""
Get TimeData with linear data
Parameters
----------
fs : float, optional
The sampling frequency, by default 10
first_time : str, optional
Time of first sample, by default "2020-01-01 00:00:00"
n_samples : int, optional
The number of samples, by default 10
dtype : Optional[Type], optional
The data type for the values, by default None
Returns
-------
TimeData
TimeData with linear values
"""
if dtype is None:
dtype = DEFAULT_TIME_DATA_DTYPE
metadata = time_metadata_mt(fs, first_time, n_samples)
data = np.empty(shape=(metadata.n_chans, n_samples), dtype=dtype)
for idx in range(metadata.n_chans):
data[idx, :] = np.arange(n_samples)
creator = {
"name": "time_data_linear",
"fs": fs,
"first_time": first_time,
"n_samples": n_samples,
}
messages = ["Generated time data with linear values"]
record = get_record(creator, messages)
metadata.history.add_record(record)
return TimeData(metadata, data)
[docs]def time_data_random(
fs: float = 10,
first_time: str = "2020-01-01 00:00:00",
n_samples: int = 10,
dtype: Optional[Type] = None,
) -> TimeData:
"""
TimeData with random values and specifiable number of samples
Parameters
----------
fs : float, optional
The sampling frequency, by default 10
first_time : str, optional
Time of first sample, by default "2020-01-01 00:00:00"
n_samples : int, optional
The number of samples, by default 10
dtype : Optional[Type], optional
The data type for the values, by default None
Returns
-------
TimeData
The TimeData
"""
if dtype is None:
dtype = DEFAULT_TIME_DATA_DTYPE
metadata = time_metadata_mt(fs, first_time, n_samples)
data = np.random.normal(0, 3, size=(metadata.n_chans, n_samples)).astype(dtype)
creator = {
"name": "time_data_random",
"fs": fs,
"first_time": first_time,
"n_samples": n_samples,
}
messages = ["Generated time data with random values"]
record = get_record(creator, messages)
metadata.history.add_record(record)
return TimeData(metadata, data)
[docs]def time_data_periodic(
frequencies: List[float],
fs: float = 50,
first_time: str = "2020-01-01 00:00:00",
n_samples: int = 100,
dtype: Optional[Type] = None,
) -> TimeData:
"""
Get period TimeData
Parameters
----------
frequencies : List[float]
Frequencies to include
fs : float, optional
Sampling frequency, by default 50
first_time : str, optional
The first time, by default "2020-01-01 00:00:00"
n_samples : int, optional
The number of samples, by default 100
dtype : Optional[Type], optional
The data type for the values, by default None
Returns
-------
TimeData
Periodic TimeData
"""
if dtype is None:
dtype = DEFAULT_TIME_DATA_DTYPE
metadata = time_metadata_1chan(fs, first_time, n_samples)
times = np.arange(0, n_samples) * (1 / fs)
data = np.zeros(shape=(1, n_samples), dtype=dtype)
for freq in frequencies:
data += np.sin(times * 2 * np.pi * freq)
creator = {
"name": "time_data_periodic",
"frequencies": frequencies,
"fs": fs,
"first_time": first_time,
"n_samples": n_samples,
}
messages = ["Generated time data with periodic values"]
record = get_record(creator, messages)
metadata.history.add_record(record)
return TimeData(metadata, data)
[docs]def time_data_with_offset(
offset=0.05,
fs: float = 10,
first_time: str = "2020-01-01 00:00:00",
n_samples: int = 11,
dtype: Optional[Type] = None,
) -> TimeData:
"""
Get TimeData with an offset on the sampling
Parameters
----------
offset : float, optional
The offset on the sampling in seconds, by default 0.05
fs : float, optional
The sampling frequency, by default 10
first_time : str, optional
The first time of the TimeData, by default "2020-01-01 00:00:00"
n_samples : int, optional
The number of samples, by default 11
dtype : Optional[Type], optional
The data type for the values, by default None
Returns
-------
TimeData
The TimeData
"""
if dtype is None:
dtype = DEFAULT_TIME_DATA_DTYPE
first_time = (pd.to_datetime(first_time) + pd.Timedelta(offset, "s")).isoformat()
metadata = time_metadata_1chan(fs, first_time, n_samples)
data = np.arange(0, n_samples).reshape(1, n_samples)
creator = {
"name": "time_data_with_offset",
"offset": offset,
"fs": fs,
"first_time": first_time,
"n_samples": n_samples,
}
messages = ["Generated time data with an offset"]
record = get_record(creator, messages)
metadata.history.add_record(record)
return TimeData(metadata, data)
[docs]def decimated_data_random(
fs: float = 0.25,
first_time: str = "2021-01-01 00:00:00",
n_samples: int = 1024,
n_levels: int = 3,
factor: int = 4,
) -> DecimatedData:
"""
Get random decimated data
Parameters
----------
fs : float, optional
Sampling frequency, by default 10
first_time : str, optional
The time of the first sample, by default "2021-01-01 00:00:00"
n_samples : int, optional
The number of samples, by default 1024
n_levels : int, optional
The number of levels, by default 3
factor : int, optional
The decimation factor for each level, by default 4
Returns
-------
DecimatedData
The decimated data
"""
metadata = decimated_metadata(
fs, first_time, n_samples=n_samples, n_levels=n_levels, factor=factor
)
data = {}
for ilevel in range(metadata.n_levels):
level_samples = metadata.levels_metadata[ilevel].n_samples
data[ilevel] = np.random.normal(0, 3, size=(metadata.n_chans, level_samples))
creator = {
"name": "decimated_data_random",
"fs": fs,
"first_time": first_time,
"n_levels": n_levels,
}
record = get_record(creator, "Generated random decimated data")
metadata.history.add_record(record)
return DecimatedData(metadata, data)
[docs]def decimated_data_linear(
fs: float = 0.25,
first_time: str = "2021-01-01 00:00:00",
n_samples: int = 1024,
n_levels: int = 3,
factor: int = 4,
):
"""
Get linear decimated data
Parameters
----------
fs : float, optional
Sampling frequency, by default 10
first_time : str, optional
The time of the first sample, by default "2021-01-01 00:00:00"
n_samples : int, optional
The number of samples, by default 1024
n_levels : int, optional
The number of levels, by default 3
factor : int, optional
The decimation factor for each level, by default 4
Returns
-------
DecimatedData
The decimated data
"""
metadata = decimated_metadata(
fs, first_time, n_samples=n_samples, n_levels=n_levels, factor=factor
)
data = {}
for ilevel in range(metadata.n_levels):
level_samples = metadata.levels_metadata[ilevel].n_samples
level_data = np.array([np.arange(level_samples), -1 * np.arange(level_samples)])
data[ilevel] = level_data
creator = {
"name": "decimated_data_linear",
"fs": fs,
"first_time": first_time,
"n_levels": n_levels,
}
record = get_record(creator, "Generated linear decimated data")
metadata.history.add_record(record)
return DecimatedData(metadata, data)
[docs]def decimated_data_periodic(
frequencies: Dict[str, List[float]],
fs: float = 0.25,
first_time: str = "2021-01-01 00:00:00",
n_samples: int = 1024,
n_levels: int = 3,
factor: int = 4,
):
"""
Get periodic decimated data
Parameters
----------
frequencies : Dict[str, List[float]]
Mapping from channel to list of frequencies to add
fs : float, optional
Sampling frequency, by default 10
first_time : str, optional
The time of the first sample, by default "2021-01-01 00:00:00"
n_samples : int, optional
The number of samples, by default 1024
n_levels : int, optional
The number of levels, by default 3
factor : int, optional
The decimation factor for each level, by default 4
Returns
-------
DecimatedData
The decimated data
"""
metadata = decimated_metadata(
fs, first_time, n_samples=n_samples, n_levels=n_levels, factor=factor
)
data = {}
for ilevel in range(metadata.n_levels):
level_samples = metadata.levels_metadata[ilevel].n_samples
level_fs = metadata.levels_metadata[ilevel].fs
times = np.arange(0, level_samples) * (1 / level_fs)
level_data = []
for chan in metadata.chans:
chan_data = np.zeros(shape=(level_samples))
for freq in frequencies[chan]:
if freq > level_fs / 2:
continue
chan_data += np.sin(times * 2 * np.pi * freq)
level_data.append(chan_data)
data[ilevel] = np.array(level_data)
creator = {
"name": "decimated_data_periodic",
"fs": fs,
"first_time": first_time,
"n_levels": n_levels,
}
record = get_record(creator, "Generated periodic decimated data")
metadata.history.add_record(record)
return DecimatedData(metadata, data)
[docs]def spectra_data_basic() -> SpectraData:
"""
Spectra data with a single decimation level
Returns
-------
SpectraData
Spectra data with a single level, a single channel and two windows
"""
data = {}
# fmt:off
data[0] = np.array(
[
[[0 + 0j, 1 + 1j, 2 + 2j, 3 + 3j, 4 + 4j, 5 + 5j, 6 + 6j, 7 + 7j, 8 + 8j, 9 + 9j]],
[[-1 + 1j, 0 + 2j, 1 + 3j, 2 + 4j, 3 + 5j, 4 + 6j, 5 + 7j, 6 + 8j, 7 + 9j, 8 + 10j]],
]
)
# fmt:on
freqs = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90]
level_metadata = SpectraLevelMetadata(
fs=180,
n_wins=2,
win_size=20,
olap_size=5,
index_offset=0,
n_freqs=10,
freqs=freqs,
)
metadata_dict = time_metadata_1chan().dict()
metadata_dict["fs"] = [180]
metadata_dict["n_levels"] = 1
metadata_dict["levels_metadata"] = [level_metadata]
metadata_dict["ref_time"] = metadata_dict["first_time"]
metadata = SpectraMetadata(**metadata_dict)
creator = {
"name": "spec_data_basic",
}
record = get_record(creator, "Generated spectra data with 1 channel and 1 level")
metadata.history.add_record(record)
return SpectraData(metadata, data)
[docs]def generate_evaluation_data(
chans: List[str], soln: Solution, n_wins: int
) -> np.ndarray:
"""
Generate evaluation frequency data that satisfies a provided solution
The returned array has the shape:
n_wins x n_chans x n_evals
Which is close to the shape required for spectra data
There is an extra check provided to check if a channel appears in both the
input and output channels, which could be a tricky scenario.
The data is produced randomly using np.random.randn, meaning that it is
sampled from a standard normal distribution
Parameters
----------
chans : List[str]
The channels in the data
soln : Solution
The Solution that needs to be satisfied
n_wins : int
The number of windows to generate
Returns
-------
np.ndarray
The evaluation frequency data array
"""
n_evals = len(soln.freqs)
n_chans = len(chans)
in_chans = soln.tf.in_chans
out_chans = soln.tf.out_chans
# create the data array to hold the data and generate the data
data_array = np.empty((n_evals, n_chans, n_wins), dtype=np.complex128)
for eval_idx in range(n_evals):
freq_tensor = soln.get_tensor(eval_idx)
# generate input channels
freq_data = {in_chan: np.random.randn(n_wins) for in_chan in in_chans}
# calculate output channels from input and solution
for out_idx, out_chan in enumerate(out_chans):
if out_chan in in_chans:
# ignore if the channel already appears in the input data
continue
products = [
freq_tensor[out_idx, in_idx] * freq_data[in_chan]
for in_idx, in_chan in enumerate(in_chans)
]
freq_data[out_chan] = np.sum(products, axis=0)
# add the data to the data array
for chan_idx, chan in enumerate(chans):
data_array[eval_idx, chan_idx, ...] = freq_data[chan]
return data_array.transpose()
[docs]def evaluation_data(
fs: float, dec_params: DecimationParameters, n_wins: int, soln: Solution
) -> SpectraData:
"""
Generate evaluation frequency data that will satisfy a given solution. This
will generate random data between the low and high values
Parameters
----------
fs : float
The sampling frequency of the original data
dec_params : DecimationParameters
The data decimation information
n_wins : int
The number of windows to generate
soln : Solution
The solution that the generated data should satisfy
Returns
-------
SpectraData
The evaluation frequency data
Raises
------
ValueError
If the number of evaluation frequencies is not exactly divisible by the
number of levels
"""
# get information about the decimation levels
n_levels = dec_params.n_levels
per_level = dec_params.per_level
levels_fs = dec_params.dec_fs
eval_freqs_for_levels = {
ilevel: dec_params.get_eval_freqs(ilevel) for ilevel in range(n_levels)
}
# create the data
chans = list(set(soln.tf.in_chans + soln.tf.out_chans))
data_array = generate_evaluation_data(chans, soln, n_wins)
data = {}
for ilevel in range(n_levels):
istart = ilevel * per_level
iend = istart + per_level
data[ilevel] = data_array[..., istart:iend]
# create the metadata
levels_metadata = []
for ilevel, level_fs in enumerate(levels_fs):
levels_metadata.append(
SpectraLevelMetadata(
fs=level_fs,
n_wins=n_wins,
win_size=20,
olap_size=5,
index_offset=0,
n_freqs=per_level,
freqs=eval_freqs_for_levels[ilevel],
)
)
levels_metadata[-1].summary()
metadata_dict = time_metadata_general(chans).dict()
metadata_dict["chans"] = chans
metadata_dict["fs"] = levels_fs
metadata_dict["n_levels"] = len(levels_metadata)
metadata_dict["levels_metadata"] = levels_metadata
metadata_dict["ref_time"] = metadata_dict["first_time"]
spec_metadata = SpectraMetadata(**metadata_dict)
return SpectraData(spec_metadata, data)
[docs]def transfer_function_random(n_in: int, n_out: int) -> TransferFunction:
"""
Generate a random transfer function
n_in and n_out must be less than or equal to 26 as the random samples are
taken from the alphabet
Parameters
----------
n_in : int
Number of input channels
n_out : int
Number of output channels
Returns
-------
TransferFunction
A randomly generated transfer function
Raises
------
ValueError
If any of the channel names is duplicated
"""
import random
import string
ins = string.ascii_lowercase
outs = string.ascii_uppercase
in_chans = random.sample(ins, n_in)
out_chans = random.sample(outs, n_out)
if len(set(ins + outs)) < len(ins) + len(outs):
raise ValueError(f"There is a duplicate somewhere, {ins=}, {outs=}")
return TransferFunction(
name="testing", variation="random", in_chans=in_chans, out_chans=out_chans
)
[docs]def components_mt() -> Dict[str, Component]:
"""
Get example components for the Impedance Tensor
Returns
-------
Dict[str, Component]
Dictionary of component values (ExHx, ExHy, EyHx, EyHy)
"""
return {
"ExHx": Component(real=[1, 1, 2, 2, 3, 3], imag=[5, 5, 4, 4, 3, 3]),
"ExHy": Component(real=[1, 2, 3, 4, 5, 6], imag=[-5, -4, -3, -2, -1, 1]),
"EyHx": Component(real=[-1, -2, -3, -4, -5, -6], imag=[5, 4, 3, 2, 1, 2]),
"EyHy": Component(real=[-1, -1, -2, -2, -3, -2], imag=[-5, -5, -4, -4, -3, -4]),
}
[docs]def solution_mt() -> Solution:
"""
Get an example impedance tensor solution
Returns
-------
Solution
The solution for an MT dataset
"""
tf = ImpedanceTensor()
fs = 256
freqs = [100, 80, 60, 40, 20, 10]
components = components_mt()
metadata = regression_input_metadata_single_site(fs, freqs, tf)
return Solution(
tf=tf,
freqs=freqs,
components=components,
history=History(),
contributors=metadata.contributors,
)
[docs]def solution_general(
fs: float, tf: TransferFunction, n_evals: int, components: Dict[str, Component]
) -> Solution:
"""
Create a Solution instance from the specified components
Parameters
----------
fs : float
The sampling frequency of the original data
tf : TransferFunction
The transfer function to be solved
n_evals : int
The number of evaluation frequencies
components : Dict[str, Component]
The components of the solution
Returns
-------
Solution
The Solution instance
"""
freqs = get_eval_freqs_size(fs, n_evals).tolist()
metadata = regression_input_metadata_single_site(fs, freqs, tf)
return Solution(
tf=tf,
freqs=freqs,
components=components,
history=History(),
contributors=metadata.contributors,
)
[docs]def solution_random_int(
fs: float, tf: TransferFunction, n_evals=10, low: int = -10, high: int = 10
) -> Solution:
"""
Generate a set of random integer components for a solution
Parameters
----------
fs : float
The original sampling frequency of the data
tf : TransferFunction
The transfer function
n_evals : int, optional
The number of evaluation frequencies, by default 10
low : int, optional
A low value for the integers, by default -10
high : int, optional
A high value for the integers, by default 10
Returns
-------
Solution
A randomly generated solution for the transfer function
"""
soln_components = tf.solution_components()
# generate the components with values for each evaluation frequency
components = {
comp: Component(
real=np.random.randint(low, high, size=n_evals).tolist(),
imag=np.random.randint(low, high, size=n_evals).tolist(),
)
for comp in soln_components
}
return solution_general(fs, tf, n_evals, components)
[docs]def solution_random_float(fs: float, tf: TransferFunction, n_evals=10) -> Solution:
"""
Generate a set of random float components for a solution
This uses the numpy np.random.randn which generates numbers on a standard
distribution and then multiplies that with a random integer between 0 and
10.
Parameters
----------
fs : float
The original sampling frequency of the data
tf : TransferFunction
The transfer function
n_evals : int, optional
The number of evaluation frequencies, by default 10
Returns
-------
Solution
A randomly generated solution for the transfer function
"""
soln_components = tf.solution_components()
# generate the components with values for each evaluation frequency
components = {
comp: Component(
real=(np.random.randn(n_evals) * np.random.randint(0, 10)).tolist(),
imag=(np.random.randn(n_evals) * np.random.randint(0, 10)).tolist(),
)
for comp in soln_components
}
return solution_general(fs, tf, n_evals, components)
[docs]def remove_record_times(records: Dict) -> Dict:
"""
Remove timestamps from records
Timestamps can make comparision of two data objects harder as processes need
to have been run at exactly the same time for equality, which is unlikely to
be the case in tests
Parameters
----------
records : Dict
The history records
Returns
-------
Dict
The history records with timestamps removed
"""
for rec in records:
rec.pop("time_local")
rec.pop("time_utc")
return records
[docs]def assert_time_data_equal(
time_data1: TimeData, time_data2: TimeData, history_times: bool = True
):
"""
Assert that two time data instances are equal
Parameters
----------
time_data1 : TimeData
Time data 1
time_data2 : TimeData
Time data 2
history_times : bool, optional
Flag to include history timestamps in the comparison, by default True.
Including timestamps will cause a failure if processes were not run at
exactly the same time.
"""
metadata1 = time_data1.metadata.dict()
history1 = metadata1.pop("history")
metadata2 = time_data2.metadata.dict()
history2 = metadata2.pop("history")
# compare core metadata
assert metadata1 == metadata2
# compare histories
if not history_times:
history1["records"] = remove_record_times(history1["records"])
history2["records"] = remove_record_times(history2["records"])
assert history1 == history2
# compare data
np.testing.assert_array_equal(time_data1.data, time_data2.data)
[docs]def assert_soln_equal(soln1: Solution, soln2: Solution):
"""
Check that two solutions are nearly the same
Parameters
----------
soln1 : Solution
The first solution
soln2 : Solution
The second solution
"""
df1 = soln1.to_dataframe()
df2 = soln2.to_dataframe()
pd.testing.assert_frame_equal(df1, df2)