From 4fc15f555ef1f69151806226e9d2b04f391882d6 Mon Sep 17 00:00:00 2001 From: Melissa DeLucchi <113376043+delucchi-cmu@users.noreply.github.com> Date: Fri, 19 Apr 2024 15:41:02 -0400 Subject: [PATCH] Unpin pandas and numpy versions. (#284) --- pyproject.toml | 4 +- .../catalog/test_run_round_trip.py | 91 ------------------ tests/hipscat_import/conftest.py | 5 - .../data/test_formats/multiindex.parquet | Bin 4036 -> 0 bytes 4 files changed, 2 insertions(+), 98 deletions(-) delete mode 100644 tests/hipscat_import/data/test_formats/multiindex.parquet diff --git a/pyproject.toml b/pyproject.toml index a95973f0..c7fc02ae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,11 +21,11 @@ dependencies = [ "healpy", "hipscat >= 0.2.9", "ipykernel", # Support for Jupyter notebooks - "pandas < 2.1.0", + "pandas", "pyarrow", "pyyaml", "tqdm", - "numpy < 1.25", + "numpy", "fsspec <= 2024.2.0", # Remove when pyarrow updates to reflect api changes ] diff --git a/tests/hipscat_import/catalog/test_run_round_trip.py b/tests/hipscat_import/catalog/test_run_round_trip.py index bb363088..82621e2e 100644 --- a/tests/hipscat_import/catalog/test_run_round_trip.py +++ b/tests/hipscat_import/catalog/test_run_round_trip.py @@ -205,97 +205,6 @@ def test_import_preserve_index( assert_parquet_file_ids(output_file, "obs_id", expected_indexes) -@pytest.mark.dask -def test_import_multiindex( - dask_client, - formats_multiindex, - assert_parquet_file_ids, - assert_parquet_file_index, - tmp_path, -): - """Test basic execution, with input with pandas metadata - - this is *similar* to the above test - - the input file is a parquet file with a multi-level pandas index. - this verifies that the parquet file at the end also has pandas - metadata, and the user's preferred id is retained as the index, - when requested. - """ - - index_arrays = [ - [ - "star1", - "star1", - "star1", - "star1", - "galaxy1", - "galaxy1", - "galaxy2", - "galaxy2", - ], - ["r", "r", "i", "i", "r", "r", "r", "r"], - ] - expected_indexes = list(zip(index_arrays[0], index_arrays[1])) - assert_parquet_file_index(formats_multiindex, expected_indexes) - data_frame = pd.read_parquet(formats_multiindex, engine="pyarrow") - assert data_frame.index.names == ["obj_id", "band"] - npt.assert_array_equal( - data_frame.columns, - ["ra", "dec", "mag"], - ) - - ## Don't generate a hipscat index. Verify that the original index remains. - args = ImportArguments( - output_artifact_name="multiindex", - input_file_list=[formats_multiindex], - file_reader="parquet", - sort_columns="obj_id,band", - add_hipscat_index=False, - output_path=tmp_path, - dask_tmp=tmp_path, - highest_healpix_order=1, - progress_bar=False, - ) - - runner.run(args, dask_client) - - # Check that the catalog parquet file exists - output_file = os.path.join(args.catalog_path, "Norder=0", "Dir=0", "Npix=11.parquet") - - assert_parquet_file_index(output_file, expected_indexes) - data_frame = pd.read_parquet(output_file, engine="pyarrow") - assert data_frame.index.names == ["obj_id", "band"] - npt.assert_array_equal( - data_frame.columns, - ["ra", "dec", "mag", "Norder", "Dir", "Npix"], - ) - - ## DO generate a hipscat index. Verify that the original index is preserved in a column. - args = ImportArguments( - output_artifact_name="multiindex_preserve", - input_file_list=[formats_multiindex], - file_reader="parquet", - sort_columns="obj_id,band", - add_hipscat_index=True, - output_path=tmp_path, - dask_tmp=tmp_path, - highest_healpix_order=1, - progress_bar=False, - ) - - runner.run(args, dask_client) - - # Check that the catalog parquet file exists - output_file = os.path.join(args.catalog_path, "Norder=0", "Dir=0", "Npix=11.parquet") - - data_frame = pd.read_parquet(output_file, engine="pyarrow") - assert data_frame.index.name == "_hipscat_index" - npt.assert_array_equal( - data_frame.columns, - ["obj_id", "band", "ra", "dec", "mag", "Norder", "Dir", "Npix"], - ) - assert_parquet_file_ids(output_file, "obj_id", index_arrays[0]) - - @pytest.mark.dask def test_import_constant_healpix_order( dask_client, diff --git a/tests/hipscat_import/conftest.py b/tests/hipscat_import/conftest.py index bccb9afe..d36eaa66 100644 --- a/tests/hipscat_import/conftest.py +++ b/tests/hipscat_import/conftest.py @@ -120,11 +120,6 @@ def formats_pandasindex(test_data_dir): return os.path.join(test_data_dir, "test_formats", "pandasindex.parquet") -@pytest.fixture -def formats_multiindex(test_data_dir): - return os.path.join(test_data_dir, "test_formats", "multiindex.parquet") - - @pytest.fixture def small_sky_parts_dir(test_data_dir): return os.path.join(test_data_dir, "small_sky_parts") diff --git a/tests/hipscat_import/data/test_formats/multiindex.parquet b/tests/hipscat_import/data/test_formats/multiindex.parquet deleted file mode 100644 index 4c5444ec770ab83b76829e48cf059b3d08fb05b3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4036 zcmcInZ)_7~7{7K<_D^jH*R*B{vp_SG4cl%7$_MQAM!VLoY@O@%3cB1~|J`5k4@i1@otm83xY1NLF8X{`BM*Udzq{@Gp|LO+iN)y^JZ%k^N$U6=52{2 z*FgR4^|pW8fO~V6RdU}F_iIM|8FP&D{Qc@j?`*lkdghMJ{#jr-&-~R3Z!Cm3&qeF& z=RHQwv*)d^AO9u5c^3Tni|_u-dY;%nH~CYPYti*u26KBYC&JfkZQ_2avw^$Fq#L*& z@38%~0k}73StWN%?tAc|5J(g0eRJOisGnv8rd)AYy0$vk!)w04$PQ@cCfrecRXvwiEaC;N6;94Ow=2iccueKhc@yb+*sCZNGK(w8+vDB9smx z#RA06tvaYAqZ<#PHsYpbpmefX$Ahn!zZbz! z*R?8c5nPzrkmoWz|eZd0pF6)HItXe~-S zmrW#5VyP(RDoYM1nB)q`E+z|1f+;W%e%P9nAe@)dXzk>qzJXoVBS-3BmZPxB3|hYo zg=SlUA|+&*!j7S46b+6B7>M4Hen6v9v%)E9uA~|7aw#~5&80NsT`pDc`SLBS+gB94 zy=FG;qN1#(hSjWhUQ5x5oMZ}FS<#Zfvoa%Bkd$&2MwYWBw6n8GInQS$WOZ-1Z?{{a z(^jo`gU_k@FmpE%iR`0xs{_=0vil))xrn_4L5#yMx*j2jA;Oy`d<2mu$h2bfdYAc2 z$OvmRzkhsKeTTb}Mezp+qC~(t_*9a>r?Lp{H2iu?H07G1W1|E?mSDYiXFwE+aes6u z<_yoWBhdmYjq>|=FB=fuJR`VsaWaq(kU<`;3kY`fJk2`G4wiDI7_h}-8G)+=b_9F{ z$mPng)FfJ8Oi<-~h$Q&9C zx|c5OBZKZ#bQ$(TD4rV3^GmqMV1=GgxQYxF&BuN2B27626c>eH1o2A8^Ko4p?y`?=0f;QCn7ClqX57P8n;nDQXiBagKyP} z^NJuU=oUcp2PPO?FkRq60bUM`I`rU;Ae%CT(0r<0Xa*9r2s&0Q!!(M|jKK}18=q9F zj3_>}AbM1)__g_j-jxxD{A3BOS4?HN6T~FCHPk~hlVB;ILBl z)Y52%5QN1a5$MveU&~LTq!0)5m4>ORQU)?LkX}Nq&%w&Z`XqYiM1?lup{`fwy*9E5 s0yA&UG4jD;vM|8qas$fuPOtXG(yM&qSPsA+){Q^ddtDed2LHeR2Y~9Ty#N3J