diff --git a/swat/tests/cas/test_bygroups.py b/swat/tests/cas/test_bygroups.py index df5a573f..53284e1c 100644 --- a/swat/tests/cas/test_bygroups.py +++ b/swat/tests/cas/test_bygroups.py @@ -112,8 +112,17 @@ def assertTablesEqual(self, a, b, fillna=-999999, sortby=SORT_KEYS, a = a.sort_values(sortby) b = b.sort_values(sortby) self.assertEqual(list(a.columns), list(b.columns)) - a = a.fillna(value=fillna) - b = b.fillna(value=fillna) + + if pd_version >= (2, 2, 0) and pd_version < (3, 0, 0): + # fix 2.2 and 2.3 FutureWarning: + # Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated + with pd.option_context('future.no_silent_downcasting', True): + a = a.fillna(value=fillna) + b = b.fillna(value=fillna) + else: + a = a.fillna(value=fillna) + b = b.fillna(value=fillna) + for lista, listb in zip(list(a.to_records(index=include_index)), list(b.to_records(index=include_index))): lista = list(lista) @@ -701,7 +710,8 @@ def test_column_max(self): sortby=['Origin', 'EngineSize']) @unittest.skipIf(pd_version < (0, 16, 0), 'Need newer version of Pandas') - @unittest.skipIf(pd_version >= (1, 0, 0), 'Raises AssertionError in Pandas 1') + @unittest.skipIf(pd_version >= (1, 0, 0) and pd_version < (2, 0, 0), + 'Raises AssertionError in Pandas 1') def test_max(self): df = self.get_cars_df().sort_values(SORT_KEYS) tbl = self.table.sort_values(SORT_KEYS) diff --git a/swat/tests/cas/test_datamsg.py b/swat/tests/cas/test_datamsg.py index f7ec5103..e6e37276 100644 --- a/swat/tests/cas/test_datamsg.py +++ b/swat/tests/cas/test_datamsg.py @@ -350,10 +350,13 @@ def test_text(self): self.assertTablesEqual(f, s, sortby=SORT_KEYS) def test_json(self): + import io df = self.table.to_frame() jsondf = df.to_json() - dmh = swat.datamsghandlers.JSON(jsondf) + # Pandas 3 no longer supports passing a json text string. + # You must pass a file path or object with a read method + dmh = swat.datamsghandlers.JSON(io.StringIO(jsondf)) tbl = self.s.addtable(table='cars', **dmh.args.addtable).casTable diff --git a/swat/tests/cas/test_table.py b/swat/tests/cas/test_table.py index b04e0af4..992a4f01 100644 --- a/swat/tests/cas/test_table.py +++ b/swat/tests/cas/test_table.py @@ -1162,13 +1162,16 @@ def test_describe(self): self.assertEqual(desc.loc['freq'].tolist(), dfdesc.loc['freq'].tolist()) # Percentiles + # Pandas < 3 always includes percentile 0.5 even if you don't ask for it + # Starting with Pandas 3, percentile 0.5 is not included unless you ask for it + # CASTable always includes 0.5, regardless of pandas version. desc = self.table.describe(percentiles=[0.3, 0.7]) - dfdesc = df.describe(percentiles=[0.3, 0.7]) + dfdesc = df.describe(percentiles=[0.3, 0.5, 0.7]) self.assertEqual(desc.index.tolist(), dfdesc.index.tolist()) self.assertEqual(desc.columns.tolist(), dfdesc.columns.tolist()) desc = self.table.describe(percentiles=0.4) - dfdesc = df.describe(percentiles=[0.4]) + dfdesc = df.describe(percentiles=[0.4, 0.5]) self.assertEqual(desc.index.tolist(), dfdesc.index.tolist()) self.assertEqual(desc.columns.tolist(), dfdesc.columns.tolist()) @@ -1536,17 +1539,39 @@ def test_mode(self): tblgrp = tbl[['Make', 'Type']].groupby(['Make']) # TODO: Pandas mode sets columns with all unique values to NaN - self.assertEqual( - dfgrp.get_group('Acura').mode()[['Type']].to_csv(index=False), - tblgrp.mode().loc['Acura', ['Type']].dropna(how='all').to_csv(index=False)) + if pd_version >= (2, 2, 0): + # Syntax Change in pandas 3. + # Future Warning in Pandas 2.2+ + # When grouping with a length-1 list-like, + # you will need to pass a length-1 tuple to get_group + self.assertEqual( + dfgrp.get_group(('Acura',)).mode()[['Type']].to_csv(index=False), + tblgrp.mode().loc['Acura', ['Type']].dropna(how='all') + .to_csv(index=False)) + else: + self.assertEqual( + dfgrp.get_group('Acura').mode()[['Type']].to_csv(index=False), + tblgrp.mode().loc['Acura', ['Type']].dropna(how='all') + .to_csv(index=False)) dfgrp = df[['Cylinders', 'MPG_City']].groupby(['Cylinders']) tblgrp = tbl[['Cylinders', 'MPG_City']].groupby(['Cylinders']) # TODO: Pandas mode sets columns with all unique values to NaN - self.assertEqual( - dfgrp.get_group(6.0).mode()[['MPG_City']].to_csv(index=False), - tblgrp.mode().loc[6.0, ['MPG_City']].dropna(how='all').to_csv(index=False)) + if pd_version >= (2, 2, 0): + # Syntax Change in pandas 3. + # Future Warning in Pandas 2.2+ + # When grouping with a length-1 list-like, + # you will need to pass a length-1 tuple to get_group + self.assertEqual( + dfgrp.get_group((6.0,)).mode()[['MPG_City']].to_csv(index=False), + tblgrp.mode().loc[6.0, ['MPG_City']].dropna(how='all') + .to_csv(index=False)) + else: + self.assertEqual( + dfgrp.get_group(6.0).mode()[['MPG_City']].to_csv(index=False), + tblgrp.mode().loc[6.0, ['MPG_City']].dropna(how='all') + .to_csv(index=False)) def test_median(self): df = self.get_cars_df() @@ -4652,7 +4677,9 @@ def test_to_html(self): html = tbl.to_html(index=False) - df2 = pd.read_html(html)[0] + # Starting with Pandas 3 you can no longer + # pass an html string to pandas read_html. + df2 = pd.read_html(io.StringIO(html))[0] df['Model'] = df['Model'].str.strip() diff --git a/swat/utils/testing.py b/swat/utils/testing.py index 99da1ea5..485ebb6d 100644 --- a/swat/utils/testing.py +++ b/swat/utils/testing.py @@ -37,6 +37,8 @@ RE_TYPE = type(re.compile(r'')) +pd_version = tuple([int(x) for x in re.match(r'^(\d+)\.(\d+)\.(\d+)', + pd.__version__).groups()]) warnings.filterwarnings('ignore', category=OptionWarning) warnings.filterwarnings('ignore', category=RuntimeWarning) @@ -93,8 +95,17 @@ def assertTablesEqual(self, a, b, fillna=-999999, sortby=None, precision=None): a = a.sort_values(sortby, na_position='first') b = b.sort_values(sortby, na_position='first') self.assertEqual(list(a.columns), list(b.columns)) - a = a.fillna(value=fillna) - b = b.fillna(value=fillna) + + if pd_version >= (2, 2, 0) and pd_version < (3, 0, 0): + # fix 2.2 and 2.3 FutureWarning: + # Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated + with pd.option_context('future.no_silent_downcasting', True): + a = a.fillna(value=fillna) + b = b.fillna(value=fillna) + else: + a = a.fillna(value=fillna) + b = b.fillna(value=fillna) + if precision is not None: a = a.round(decimals=precision) b = b.round(decimals=precision) @@ -108,8 +119,17 @@ def assertColsEqual(self, a, b, fillna=-999999, sort=False, precision=None): a = a.to_series() if hasattr(b, 'to_series'): b = b.to_series() - a = a.fillna(value=fillna) - b = b.fillna(value=fillna) + + if pd_version >= (2, 2, 0) and pd_version < (3, 0, 0): + # fix 2.2 and 2.3 FutureWarning: + # Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated + with pd.option_context('future.no_silent_downcasting', True): + a = a.fillna(value=fillna) + b = b.fillna(value=fillna) + else: + a = a.fillna(value=fillna) + b = b.fillna(value=fillna) + if precision is not None: a = a.round(decimals=precision) b = b.round(decimals=precision)