Resolve outstanding issues for tests to pass.

berland · berland · commit b4f5e871cb00 · 2021-03-09T12:53:40.000+01:00
Mostly related to realization.get_smry() returning dummy index
diff --git a/src/fmu/ensemble/observations.py b/src/fmu/ensemble/observations.py
@@ -172,7 +172,7 @@ def load_smry(self, realization, smryvector, time_index="yearly", smryerror=None
         """
         dataseries = realization.get_smry(
             column_keys=[smryvector], time_index=time_index
-        )[smryvector]
+        )[["DATE", smryvector]].set_index("DATE")[smryvector]
 
         # In the context of this function, datetimes are not supported. Ensure dates:
         if isinstance(dataseries.index, pd.DatetimeIndex):
diff --git a/src/fmu/ensemble/virtualensemble.py b/src/fmu/ensemble/virtualensemble.py
@@ -872,11 +872,6 @@ def get_smry(self, column_keys=None, time_index="monthly"):
 
             # Now ask the VirtualRealization to do interpolation
             interp = vreal.get_smry(column_keys=column_keys, time_index=time_index)
-            # Assume we get back a dataframe indexed by the dates from vreal
-            # We must reset that index, and ensure the index column
-            # gets a correct name
-            interp.index = interp.index.set_names(["DATE"])
-            interp = interp.reset_index()
             interp["REAL"] = realidx
             smry_interpolated.append(interp)
         return pd.concat(smry_interpolated, ignore_index=True, sort=False)
diff --git a/src/fmu/ensemble/virtualrealization.py b/src/fmu/ensemble/virtualrealization.py
@@ -287,6 +287,10 @@ def get_smry(self, column_keys=None, time_index="monthly"):
         Returns data for those columns that are known, unknown
         columns will be issued a warning for.
 
+        The returned dataframe will have a dummy index, and the dates in
+        the column DATE. The DATE column will contain either datetime.datetime
+        or pandas.Timestamp objects.
+
         BUG: If some columns are available only in certain dataframes,
         we might miss them (e.g. we ask for yearly FOPT, and we have
         yearly smry with only WOPT data, and FOPT is only in daily
@@ -359,9 +363,10 @@ def get_smry(self, column_keys=None, time_index="monthly"):
         )
 
         smry = self.get_df("unsmry--" + chosen_smry)[["DATE"] + column_keys]
+        # index is dummy, the date is in the DATE column
+        smry.set_index("DATE", inplace=True)
 
         # Add the extra datetimes to interpolate at.
-        smry.set_index("DATE", inplace=True)
         smry.index = pd.to_datetime(smry.index)
         smry = smry.append(
             pd.DataFrame(index=pd.to_datetime(time_index_dt)), sort=False
@@ -390,8 +395,9 @@ def get_smry(self, column_keys=None, time_index="monthly"):
                 smry[noncum_columns].fillna(method="bfill").fillna(value=0)
             )
 
-        smry.index = smry.index.set_names(["DATE"])
-        return smry.loc[pd.to_datetime(time_index_dt)]
+        smry = smry.loc[pd.to_datetime(time_index_dt)]
+        smry.index.name = "DATE"
+        return smry.reset_index()
 
     def get_smry_dates(self, freq="monthly", normalize=False):
         """Return list of datetimes available in the realization
diff --git a/tests/test_observations.py b/tests/test_observations.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 """Testing observations in fmu-ensemble."""
 
 import os
@@ -66,24 +65,26 @@ def test_real_mismatch():
     )
     realmis = obs.mismatch(real)
 
-    # Check layout of returned data
-    assert isinstance(realmis, pd.DataFrame)
-    assert len(realmis) == 1
+    pd.testing.assert_frame_equal(
+        realmis,
+        pd.DataFrame(
+            [
+                {
+                    "OBSTYPE": "txt",
+                    "OBSKEY": "parameters.txt/FWL",
+                    "MISMATCH": -2.0,
+                    "L1": 2.0,
+                    "L2": 4.0,
+                    "SIMVALUE": 1700,
+                    "OBSVALUE": 1702,
+                    "MEASERROR": 1,
+                    "SIGN": -1,
+                }
+            ]
+        ),
+    )
     assert "REAL" not in realmis.columns  # should only be there for ensembles.
-    assert "OBSTYPE" in realmis.columns
-    assert "OBSKEY" in realmis.columns
     assert "DATE" not in realmis.columns  # date is not relevant
-    assert "MISMATCH" in realmis.columns
-    assert "L1" in realmis.columns
-    assert "L2" in realmis.columns
-
-    # Check actually computed values, there should only be one row with data:
-    assert realmis.loc[0, "OBSTYPE"] == "txt"
-    assert realmis.loc[0, "OBSKEY"] == "parameters.txt/FWL"
-    assert realmis.loc[0, "MISMATCH"] == -2
-    assert realmis.loc[0, "SIGN"] == -1
-    assert realmis.loc[0, "L1"] == 2
-    assert realmis.loc[0, "L2"] == 4
 
     # Another observation set:
     obs2 = Observations(
@@ -96,12 +97,46 @@ def test_real_mismatch():
         }
     )
     realmis2 = obs2.mismatch(real)
-    assert len(realmis2) == 3
-    assert "parameters.txt/RMS_SEED" in realmis2["OBSKEY"].values
-    assert "outputs.txt/top_structure" in realmis2["OBSKEY"].values
-    assert "npv.txt" in realmis2["OBSKEY"].values
-
-    # assert much more!
+    pd.testing.assert_frame_equal(
+        realmis2,
+        pd.DataFrame(
+            [
+                {
+                    "OBSTYPE": "txt",
+                    "OBSKEY": "parameters.txt/RMS_SEED",
+                    "MISMATCH": -177148215.0,
+                    "L1": 177148215.0,
+                    "L2": 3.1381490077686224e16,
+                    "SIMVALUE": 422851785,
+                    "OBSVALUE": 600000000,
+                    "MEASERROR": 1,
+                    "SIGN": -1,
+                },
+                {
+                    "OBSTYPE": "txt",
+                    "OBSKEY": "outputs.txt/top_structure",
+                    "MISMATCH": 24.0,
+                    "L1": 24.0,
+                    "L2": 576.0,
+                    "SIMVALUE": 3224,
+                    "OBSVALUE": 3200,
+                    "MEASERROR": 1,
+                    "SIGN": 1,
+                },
+                {
+                    "OBSTYPE": "scalar",
+                    "OBSKEY": "npv.txt",
+                    "MISMATCH": 44.0,
+                    "L1": 44.0,
+                    "L2": 1936.0,
+                    "SIMVALUE": 3444,
+                    "OBSVALUE": 3400,
+                    "MEASERROR": 1,
+                    "SIGN": 1,
+                },
+            ]
+        ),
+    )
 
     # Test that we can write the observations to yaml
     # and verify that the exported yaml can be reimported
@@ -215,6 +250,26 @@ def test_smry():
     # loaded realization.
     mismatch = obs.mismatch(real)
 
+    # Assert the first row exactly:
+    pd.testing.assert_frame_equal(
+        mismatch.head(1),
+        pd.DataFrame(
+            [
+                {
+                    "OBSTYPE": "smry",
+                    "OBSKEY": "WBP4:OP_1",
+                    "DATE": datetime.date(2001, 1, 1),
+                    "MEASERROR": 4.0,
+                    "MISMATCH": -2.159454345703125,
+                    "OBSVALUE": 251.0,
+                    "SIMVALUE": 248.84054565429688,
+                    "L1": 2.159454345703125,
+                    "L2": 4.663243071176112,
+                    "SIGN": -1,
+                }
+            ]
+        ),
+    )
     assert len(mismatch) == 21  # later: implement counting in the obs object
     assert mismatch.L1.sum() > 0
     assert mismatch.L2.sum() > 0
@@ -537,7 +592,6 @@ def test_ensset_mismatch():
         == mismatch[mismatch.ENSEMBLE == "iter-1"].L1.sum()
     )
 
-    # This is quite hard to input in dict-format. Better via YAML..
     obs_pr = Observations(
         {
             "smry": [
diff --git a/tests/test_virtualrealization.py b/tests/test_virtualrealization.py
@@ -155,10 +155,10 @@ def test_get_smry():
     assert all(vfopt == fopt)
     # But note that the dtype of the index in each dataframe differs
     # vfopt.index.dtype == datetime, while fopt.index.dtype == object
-    assert len(fopt.columns) == 1  # DATE is index (unlabeled)
+    assert len(fopt.columns) == 2  # DATE is the first column
 
     dvfopt = vreal.get_smry(column_keys="FOPT", time_index="daily")
-    assert all(dvfopt.diff() >= 0)
+    assert all(dvfopt["FOPT"].diff().dropna() >= 0)
     # Linear interpolation should give many unique values:
     assert len(dvfopt["FOPT"].unique()) == 1462
     # Length is here 1462 while daily smry for the scratchrealization
@@ -256,7 +256,7 @@ def test_get_smry2():
 
     alldefaults = vreal.get_smry()
     assert len(alldefaults) == monthly_length
-    assert len(alldefaults.columns) == 49
+    assert len(alldefaults.columns) == 50
 
 
 def test_get_smry_cumulative():