geopandas · theroggy · Oct 20, 2024 · Oct 20, 2024
diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py
@@ -93,8 +93,20 @@ def spatialite_available(path):
  return False
 
 
-@pytest.mark.parametrize("encoding", ["utf-8", "cp1252", None])
-def test_read_csv_encoding(tmp_path, encoding):
+@pytest.mark.parametrize(
+ "encoding, arrow",
+ [
+ ("utf-8", False),
+ pytest.param("utf-8", True, marks=requires_pyarrow_api),
+ ("cp1252", False),
+ (None, False),
+ ],
+)
+def test_read_csv_encoding(tmp_path, encoding, arrow):
+ """ "Test reading CSV files with different encodings.
+
+ Arrow only supports utf-8 encoding.
+ """
  # Write csv test file. Depending on the os this will be written in a different
  # encoding: for linux and macos this is utf-8, for windows it is cp1252.
  csv_path = tmp_path / "test.csv"
@@ -105,7 +117,7 @@ def test_read_csv_encoding(tmp_path, encoding):
  # Read csv. The data should be read with the same default encoding as the csv file
  # was written in, but should have been converted to utf-8 in the dataframe returned.
  # Hence, the asserts below, with strings in utf-8, be OK.
- df = read_dataframe(csv_path, encoding=encoding)
+ df = read_dataframe(csv_path, encoding=encoding, use_arrow=arrow)
 
  assert len(df) == 1
  assert df.columns.tolist() == ["näme", "city"]
@@ -117,14 +129,14 @@ def test_read_csv_encoding(tmp_path, encoding):
  locale.getpreferredencoding().upper() == "UTF-8",
  reason="test requires non-UTF-8 default platform",
 )
-def test_read_csv_platform_encoding(tmp_path):
+def test_read_csv_platform_encoding(tmp_path, use_arrow):
  """verify that read defaults to platform encoding; only works on Windows (CP1252)"""
  csv_path = tmp_path / "test.csv"
  with open(csv_path, "w", encoding=locale.getpreferredencoding()) as csv:
  csv.write("näme,city\n")
  csv.write("Wilhelm Röntgen,Zürich\n")
 
- df = read_dataframe(csv_path)
+ df = read_dataframe(csv_path, use_arrow=use_arrow)
 
  assert len(df) == 1
  assert df.columns.tolist() == ["näme", "city"]
@@ -943,9 +955,20 @@ def test_read_sql_dialect_sqlite_gpkg(naturalearth_lowres, use_arrow):
  assert df.iloc[0].geometry.area > area_canada
 
 
-@pytest.mark.parametrize("encoding", ["utf-8", "cp1252", None])
-def test_write_csv_encoding(tmp_path, encoding):
- """Test if write_dataframe uses the default encoding correctly."""
+@pytest.mark.parametrize(
+ "encoding, arrow",
+ [
+ ("utf-8", False),
+ pytest.param("utf-8", True, marks=requires_arrow_write_api),
+ ("cp1252", False),
+ (None, False),
+ ],
+)
+def test_write_csv_encoding(tmp_path, encoding, arrow):
+ """Test if write_dataframe uses the default encoding correctly.
+
+ Arrow only supports utf-8 encoding.
+ """
  # Write csv test file. Depending on the os this will be written in a different
  # encoding: for linux and macos this is utf-8, for windows it is cp1252.
  csv_path = tmp_path / "test.csv"
@@ -958,7 +981,7 @@ def test_write_csv_encoding(tmp_path, encoding):
  # same encoding as above.
  df = pd.DataFrame({"näme": ["Wilhelm Röntgen"], "city": ["Zürich"]})
  csv_pyogrio_path = tmp_path / "test_pyogrio.csv"
- write_dataframe(df, csv_pyogrio_path, encoding=encoding)
+ write_dataframe(df, csv_pyogrio_path, encoding=encoding, use_arrow=arrow)
 
  # Check if the text files written both ways can be read again and give same result.
  with open(csv_path, encoding=encoding) as csv: