diff --git a/.github/workflows/pypi-build-artifacts.yml b/.github/workflows/pypi-build-artifacts.yml index da282c6b46..230209a988 100644 --- a/.github/workflows/pypi-build-artifacts.yml +++ b/.github/workflows/pypi-build-artifacts.yml @@ -71,6 +71,8 @@ jobs: # Ignore 32 bit architectures CIBW_ARCHS: "auto64" CIBW_PROJECT_REQUIRES_PYTHON: ">=3.10,<3.14" + # Keep these in sync with Python CI job `cibw-dev-env-smoke-test` + # in .github/workflows/python-ci.yml to catch import-time regressions early. CIBW_BEFORE_TEST: "uv sync --directory {project} --only-group dev --no-install-project" CIBW_TEST_COMMAND: "uv run --directory {project} pytest tests/avro/test_decoder.py" # Skip free-threaded (PEP 703) builds until we evaluate decoder_fast support diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml index ddc71d174b..fadce41b11 100644 --- a/.github/workflows/python-ci.yml +++ b/.github/workflows/python-ci.yml @@ -200,3 +200,26 @@ jobs: merge-multiple: true - name: Generate coverage report (75%) # Coverage threshold should only increase over time — never decrease it! run: COVERAGE_FAIL_UNDER=75 make coverage-report + + cibw-dev-env-smoke-test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + - uses: actions/setup-python@v6 + with: + python-version: '3.12' + - name: Install UV + uses: astral-sh/setup-uv@v7 + with: + enable-cache: true + # Why this exists: + # Catch import-time regressions (e.g., global conftest optional deps) + # in the same dev-only environment used by cibuildwheel wheel tests. + # Keep this in sync with wheel build test setup in + # .github/workflows/pypi-build-artifacts.yml: + # CIBW_BEFORE_TEST: uv sync --directory {project} --only-group dev --no-install-project + # CIBW_TEST_COMMAND: uv run --directory {project} pytest tests/avro/test_decoder.py + - name: Mirror wheel CIBW_BEFORE_TEST + run: uv sync --directory . --only-group dev --no-install-project + - name: Mirror wheel CIBW_TEST_COMMAND + run: uv run --directory . pytest tests/avro/test_decoder.py diff --git a/pyproject.toml b/pyproject.toml index 00d4a9c56e..3c6624dc7d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -118,6 +118,7 @@ dev = [ "docutils!=0.21.post1", "mypy-boto3-glue>=1.28.18", "mypy-boto3-dynamodb>=1.28.18", + "google-cloud-bigquery>=3.33.0,<4", "pyarrow-stubs>=20.0.0.20251107", # Remove when pyarrow >= 23.0.0 https://github.com/apache/arrow/pull/47609 "sqlalchemy>=2.0.18,<3", ] diff --git a/tests/conftest.py b/tests/conftest.py index 5c85f49a77..cd839e5064 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -49,14 +49,6 @@ from pytest_lazy_fixtures import lf from pyiceberg.catalog import Catalog, load_catalog -from pyiceberg.catalog.bigquery_metastore import BigQueryMetastoreCatalog -from pyiceberg.catalog.dynamodb import DynamoDbCatalog -from pyiceberg.catalog.glue import GlueCatalog -from pyiceberg.catalog.hive import HiveCatalog -from pyiceberg.catalog.memory import InMemoryCatalog -from pyiceberg.catalog.noop import NoopCatalog -from pyiceberg.catalog.rest import RestCatalog -from pyiceberg.catalog.sql import SqlCatalog from pyiceberg.expressions import BoundReference from pyiceberg.io import ( ADLS_ACCOUNT_KEY, @@ -2497,6 +2489,8 @@ def warehouse(tmp_path_factory: pytest.TempPathFactory) -> Path: @pytest.fixture def table_v1(example_table_metadata_v1: dict[str, Any]) -> Table: + from pyiceberg.catalog.noop import NoopCatalog + table_metadata = TableMetadataV1(**example_table_metadata_v1) return Table( identifier=("database", "table"), @@ -2509,6 +2503,8 @@ def table_v1(example_table_metadata_v1: dict[str, Any]) -> Table: @pytest.fixture def table_v2(example_table_metadata_v2: dict[str, Any]) -> Table: + from pyiceberg.catalog.noop import NoopCatalog + table_metadata = TableMetadataV2(**example_table_metadata_v2) return Table( identifier=("database", "table"), @@ -2521,6 +2517,8 @@ def table_v2(example_table_metadata_v2: dict[str, Any]) -> Table: @pytest.fixture def table_v3(example_table_metadata_v3: dict[str, Any]) -> Table: + from pyiceberg.catalog.noop import NoopCatalog + table_metadata = TableMetadataV3(**example_table_metadata_v3) return Table( identifier=("database", "table"), @@ -2535,6 +2533,8 @@ def table_v3(example_table_metadata_v3: dict[str, Any]) -> Table: def table_v2_orc(example_table_metadata_v2: dict[str, Any]) -> Table: import copy + from pyiceberg.catalog.noop import NoopCatalog + metadata_dict = copy.deepcopy(example_table_metadata_v2) if not metadata_dict["properties"]: metadata_dict["properties"] = {} @@ -2553,6 +2553,8 @@ def table_v2_orc(example_table_metadata_v2: dict[str, Any]) -> Table: def table_v2_with_fixed_and_decimal_types( table_metadata_v2_with_fixed_and_decimal_types: dict[str, Any], ) -> Table: + from pyiceberg.catalog.noop import NoopCatalog + table_metadata = TableMetadataV2( **table_metadata_v2_with_fixed_and_decimal_types, ) @@ -2567,6 +2569,8 @@ def table_v2_with_fixed_and_decimal_types( @pytest.fixture def table_v2_with_extensive_snapshots(example_table_metadata_v2_with_extensive_snapshots: dict[str, Any]) -> Table: + from pyiceberg.catalog.noop import NoopCatalog + table_metadata = TableMetadataV2(**example_table_metadata_v2_with_extensive_snapshots) return Table( identifier=("database", "table"), @@ -2579,6 +2583,8 @@ def table_v2_with_extensive_snapshots(example_table_metadata_v2_with_extensive_s @pytest.fixture def table_v2_with_statistics(table_metadata_v2_with_statistics: dict[str, Any]) -> Table: + from pyiceberg.catalog.noop import NoopCatalog + table_metadata = TableMetadataV2(**table_metadata_v2_with_statistics) return Table( identifier=("database", "table"), @@ -3000,11 +3006,15 @@ def ray_session() -> Generator[Any, None, None]: # Catalog fixtures -def _create_memory_catalog(name: str, warehouse: Path) -> InMemoryCatalog: +def _create_memory_catalog(name: str, warehouse: Path) -> Catalog: + from pyiceberg.catalog.memory import InMemoryCatalog + return InMemoryCatalog(name, warehouse=f"file://{warehouse}") -def _create_sql_catalog(name: str, warehouse: Path) -> SqlCatalog: +def _create_sql_catalog(name: str, warehouse: Path) -> Catalog: + from pyiceberg.catalog.sql import SqlCatalog + catalog = SqlCatalog( name, uri="sqlite:///:memory:", @@ -3014,7 +3024,9 @@ def _create_sql_catalog(name: str, warehouse: Path) -> SqlCatalog: return catalog -def _create_sql_without_rowcount_catalog(name: str, warehouse: Path) -> SqlCatalog: +def _create_sql_without_rowcount_catalog(name: str, warehouse: Path) -> Catalog: + from pyiceberg.catalog.sql import SqlCatalog + props = { "uri": f"sqlite:////{warehouse}/sql-catalog", "warehouse": f"file://{warehouse}", @@ -3152,48 +3164,83 @@ def test_table_properties() -> dict[str, str]: def does_support_purge_table(catalog: Catalog) -> bool: + from pyiceberg.catalog.noop import NoopCatalog + from pyiceberg.catalog.rest import RestCatalog + if isinstance(catalog, RestCatalog): return property_as_bool(catalog.properties, "supports_purge_table", True) + from pyiceberg.catalog.hive import HiveCatalog + if isinstance(catalog, (HiveCatalog, NoopCatalog)): return False return True def does_support_atomic_concurrent_updates(catalog: Catalog) -> bool: + from pyiceberg.catalog.noop import NoopCatalog + from pyiceberg.catalog.rest import RestCatalog + if isinstance(catalog, RestCatalog): return property_as_bool(catalog.properties, "supports_atomic_concurrent_updates", True) + from pyiceberg.catalog.hive import HiveCatalog + if isinstance(catalog, (HiveCatalog, NoopCatalog)): return False return True def does_support_nested_namespaces(catalog: Catalog) -> bool: + from pyiceberg.catalog.dynamodb import DynamoDbCatalog + from pyiceberg.catalog.glue import GlueCatalog + from pyiceberg.catalog.noop import NoopCatalog + from pyiceberg.catalog.rest import RestCatalog + if isinstance(catalog, RestCatalog): return property_as_bool(catalog.properties, "supports_nested_namespaces", True) - if isinstance(catalog, (HiveCatalog, NoopCatalog, GlueCatalog, BigQueryMetastoreCatalog, DynamoDbCatalog)): + from pyiceberg.catalog.bigquery_metastore import BigQueryMetastoreCatalog + from pyiceberg.catalog.hive import HiveCatalog + + if isinstance(catalog, (HiveCatalog, BigQueryMetastoreCatalog, NoopCatalog, GlueCatalog, DynamoDbCatalog)): return False return True def does_support_schema_evolution(catalog: Catalog) -> bool: + from pyiceberg.catalog.noop import NoopCatalog + from pyiceberg.catalog.rest import RestCatalog + if isinstance(catalog, RestCatalog): return property_as_bool(catalog.properties, "supports_schema_evolution", True) + from pyiceberg.catalog.hive import HiveCatalog + if isinstance(catalog, (HiveCatalog, NoopCatalog)): return False return True def does_support_slash_in_identifier(catalog: Catalog) -> bool: + from pyiceberg.catalog.noop import NoopCatalog + from pyiceberg.catalog.rest import RestCatalog + from pyiceberg.catalog.sql import SqlCatalog + if isinstance(catalog, RestCatalog): return property_as_bool(catalog.properties, "supports_slash_in_identifier", True) + from pyiceberg.catalog.hive import HiveCatalog + if isinstance(catalog, (HiveCatalog, NoopCatalog, SqlCatalog)): return False return True def does_support_dot_in_identifier(catalog: Catalog) -> bool: + from pyiceberg.catalog.noop import NoopCatalog + from pyiceberg.catalog.rest import RestCatalog + from pyiceberg.catalog.sql import SqlCatalog + if isinstance(catalog, RestCatalog): return property_as_bool(catalog.properties, "supports_dot_in_identifier", True) + from pyiceberg.catalog.hive import HiveCatalog + if isinstance(catalog, (HiveCatalog, NoopCatalog, SqlCatalog)): return False return True diff --git a/uv.lock b/uv.lock index 46fbc8002d..01da16cfff 100644 --- a/uv.lock +++ b/uv.lock @@ -4539,6 +4539,7 @@ dev = [ { name = "deptry" }, { name = "docutils" }, { name = "fastavro" }, + { name = "google-cloud-bigquery" }, { name = "moto", extra = ["server"] }, { name = "mypy-boto3-dynamodb" }, { name = "mypy-boto3-glue" }, @@ -4628,6 +4629,7 @@ dev = [ { name = "deptry", specifier = ">=0.14,<0.25" }, { name = "docutils", specifier = "!=0.21.post1" }, { name = "fastavro", specifier = "==1.12.1" }, + { name = "google-cloud-bigquery", specifier = ">=3.33.0,<4" }, { name = "moto", extras = ["server"], specifier = ">=5.0.2,<6" }, { name = "mypy-boto3-dynamodb", specifier = ">=1.28.18" }, { name = "mypy-boto3-glue", specifier = ">=1.28.18" },