Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove redundant channel column #1010

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,6 @@ yarn-error.log*
conda-store.sqlite

*.lockb

# generated test assets
conda-store-server/tests/alembic.ini
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// Copyright (c) conda-store development team. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
# Copyright (c) conda-store development team. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.

"""${message}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
# Copyright (c) conda-store development team. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.

"""remove conda package build channel

Revision ID: 89637f546129
Revises: bf065abf375b
Create Date: 2024-12-04 13:09:25.562450

"""
from alembic import op
from sqlalchemy import Column, INTEGER, String, ForeignKey, table, select


# revision identifiers, used by Alembic.
revision = '89637f546129'
down_revision = 'bf065abf375b'
branch_labels = None
depends_on = None

# Due to the issue fixed in https://github.com/conda-incubator/conda-store/pull/961
# many conda_package_build entries have the wrong package entry (but the right channel).
# Because the packages are duplicated, we can not recreate the _conda_package_build_uc
# constraint without the channel_id.
# So, this function will go thru each conda_package_build and re-associate it with the
# correct conda_package based on the channel id.
def fix_misrepresented_packages(conn):
# conda_packages is a hash of channel-id_name_version to conda_package id
conda_packages = {}

# dummy tables to run queries against
conda_package_build_table = table(
"conda_package_build",
Column("id", INTEGER),
Column("channel_id", INTEGER),
Column("package_id", INTEGER, ForeignKey("conda_package.id")),
)
conda_package_table = table(
"conda_package",
Column("id", INTEGER),
Column("channel_id", INTEGER),
Column("name", String),
Column("version", String),
)

def get_conda_package_id(conn, channel_id, name, version):
hashed_name = f"{channel_id}-{name}-{version}"

# if package exists in the conda_packages dict, return it
if hashed_name in conda_packages:
return conda_packages[hashed_name]

# if not, then query the db for the package
package = conn.execute(
select(conda_package_table).where(
conda_package_table.c.channel_id == channel_id,
conda_package_table.c.name == name,
conda_package_table.c.version == version,
)
).first()

# save the package into the conda_packages dict
conda_packages[hashed_name] = package.id
return package.id

for row in conn.execute(
select(
conda_package_build_table.c.id,
conda_package_build_table.c.package_id,
conda_package_build_table.c.channel_id,
conda_package_table.c.name,
conda_package_table.c.version
).join(
conda_package_build_table,
conda_package_build_table.c.package_id == conda_package_table.c.id
)
):
# the channel_id might already be empty
if row[2] is None:
continue

package_id = get_conda_package_id(conn, row[2], row[3], row[4])
# if found package id does not match the found package id, we'll need to updated it
if package_id != row[1]:
update_package_query = conda_package_build_table.update().where(
conda_package_build_table.c.id == op.inline_literal(row[0])
).values(
{"package_id": op.inline_literal(package_id)}
)
conn.execute(update_package_query)
conn.commit()

def upgrade():
bind = op.get_bind()

# So, go thru each conda_package_build and re-associate it with the correct conda_package
# based on the channel id.
fix_misrepresented_packages(bind)

with op.batch_alter_table("conda_package_build") as batch_op:
# remove channel column from constraints
batch_op.drop_constraint(
"_conda_package_build_uc",
)

# re-add the constraint without the channel column
batch_op.create_unique_constraint(
"_conda_package_build_uc",
[
"package_id",
"subdir",
"build",
"build_number",
"sha256",
],
)

# remove channel column
batch_op.drop_column(
"channel_id",
)


def downgrade():
with op.batch_alter_table("conda_package_build") as batch_op:
# remove channel column from constraints
batch_op.drop_constraint(
constraint_name="_conda_package_build_uc",
)

# add channel column
batch_op.add_column(
Column("channel_id", INTEGER)
)

batch_op.create_foreign_key("fk_channel_id", "conda_channel", ["channel_id"], ["id"])

# re-add the constraint with the channel column
batch_op.create_unique_constraint(
constraint_name="_conda_package_build_uc",
columns=[
"channel_id",
"package_id",
"subdir",
"build",
"build_number",
"sha256",
],
)
9 changes: 0 additions & 9 deletions conda-store-server/conda_store_server/_internal/orm.py
Original file line number Diff line number Diff line change
Expand Up @@ -756,7 +756,6 @@ class CondaPackageBuild(Base):

__table_args__ = (
UniqueConstraint(
"channel_id",
"package_id",
"subdir",
"build",
Expand All @@ -771,14 +770,6 @@ class CondaPackageBuild(Base):
package_id: Mapped[int] = mapped_column(ForeignKey("conda_package.id"))
package: Mapped["CondaPackage"] = relationship(back_populates="builds")

"""
Some package builds have the exact same data from different channels.
Thus, when adding a channel, populating CondaPackageBuild can encounter
duplicate keys errors. That's why we need to distinguish them by channel_id.
"""
channel_id: Mapped[int] = mapped_column(ForeignKey("conda_channel.id"))
channel: Mapped["CondaChannel"] = relationship(CondaChannel)

build: Mapped[str] = mapped_column(Unicode(64), index=True)
build_number: Mapped[int]
constrains: Mapped[dict] = mapped_column(JSON)
Expand Down
1 change: 1 addition & 0 deletions conda-store-server/environment-dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ dependencies:
- pytest-celery
- pytest-mock
- pytest-cov
- pytest-alembic
- docker-py<=7
- docker-compose
# build dependencies
Expand Down
1 change: 1 addition & 0 deletions conda-store-server/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ dependencies = [
"pytest",
"pytest-celery",
"pytest-playwright",
"pytest-alembic",
"twine>=5.0.0",
"pkginfo>=1.10", # Needed to support metadata 2.3
"pytest-cov",
Expand Down
3 changes: 3 additions & 0 deletions conda-store-server/tests/_internal/alembic/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Copyright (c) conda-store development team. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Copyright (c) conda-store development team. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
# Copyright (c) conda-store development team. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.

from sqlalchemy import text

from conda_store_server import api
from conda_store_server._internal import orm


def setup_bad_data_db(conda_store):
"""A database fixture populated with
* 2 channels
* 2 conda packages
* 5 conda package builds
"""
with conda_store.session_factory() as db:
# create test channels
api.create_conda_channel(db, "test-channel-1")
api.create_conda_channel(db, "test-channel-2")
db.commit()

# create some sample conda_package's
# For simplicity, the channel_id's match the id of the conda_package.
# So, when checking that the package build entries have been reassembled
# the right way, check that the package_id in the conda_package_build is
# equal to what would have been the channel_id (before the migration is run)
conda_package_records = [
{
"id": 1,
"channel_id": 1,
"name": "test-package-1",
"version": "1.0.0",
},
{
"id": 2,
"channel_id": 2,
"name": "test-package-1",
"version": "1.0.0",
},
]
for cpb in conda_package_records:
conda_package = orm.CondaPackage(**cpb)
db.add(conda_package)
db.commit()

# create some conda_package_build's
conda_package_builds = [
{
"id": 1,
"build": "py310h06a4308_0",
"package_id": 1,
"build_number": 0,
"sha256": "one",
"subdir": "linux-64",
},
{
"id": 2,
"build": "py311h06a4308_0",
"package_id": 1,
"build_number": 0,
"sha256": "two",
"subdir": "linux-64",
},
{
"id": 3,
"build": "py38h06a4308_0",
"package_id": 1,
"build_number": 0,
"sha256": "three",
"subdir": "linux-64",
},
{
"id": 4,
"build": "py39h06a4308_0",
"package_id": 2,
"build_number": 0,
"sha256": "four",
"subdir": "linux-64",
},
{
"id": 5,
"build": "py310h06a4308_0",
"package_id": 2,
"build_number": 0,
"sha256": "five",
"subdir": "linux-64",
},
]
default_values = {
"depends": "",
"md5": "",
"timestamp": 0,
"constrains": "",
"size": 0,
}
for cpb in conda_package_builds:
conda_package = orm.CondaPackageBuild(**cpb, **default_values)
db.add(conda_package)
db.commit()

# force in some channel data
# conda_package_build 1 should have package_id 2 after migration
db.execute(text("UPDATE conda_package_build SET channel_id=2 WHERE id=1"))
# conda_package_build 2 should have package_id 1 after migration
db.execute(text("UPDATE conda_package_build SET channel_id=1 WHERE id=2"))
# conda_package_build 3 should have package_id 1 after migration
db.execute(text("UPDATE conda_package_build SET channel_id=1 WHERE id=3"))
# conda_package_build 4 should have package_id 2 after migration
db.execute(text("UPDATE conda_package_build SET channel_id=2 WHERE id=4"))

# don't set conda_package_build 5 channel_id as a test case
# conda_package_build 5 package_id should be unchanged (2) after migration

db.commit()


def test_remove_conda_package_build_channel_basic(
conda_store, alembic_config, alembic_engine, alembic_runner
):
"""Simply run the upgrade and downgrade for this migration"""
# migrate all the way to the target revision
alembic_runner.migrate_up_to("89637f546129")

# try downgrading
alembic_runner.migrate_down_one()

# try upgrading once more
alembic_runner.migrate_up_one()


def test_remove_conda_package_build_bad_data(
conda_store, alembic_config, alembic_engine, alembic_runner
):
"""Simply run the upgrade and downgrade for this migration"""
# migrate all the way to the target revision
alembic_runner.migrate_up_to("89637f546129")

# try downgrading
alembic_runner.migrate_down_one()

# seed db with data that has broken data
setup_bad_data_db(conda_store)

# try upgrading once more
alembic_runner.migrate_up_one()

# ensure all packages builds have the right package associated
with conda_store.session_factory() as db:
build = (
db.query(orm.CondaPackageBuild)
.filter(orm.CondaPackageBuild.id == 1)
.first()
)
assert build.package_id == 2

build = (
db.query(orm.CondaPackageBuild)
.filter(orm.CondaPackageBuild.id == 2)
.first()
)
assert build.package_id == 1

build = (
db.query(orm.CondaPackageBuild)
.filter(orm.CondaPackageBuild.id == 3)
.first()
)
assert build.package_id == 1

build = (
db.query(orm.CondaPackageBuild)
.filter(orm.CondaPackageBuild.id == 4)
.first()
)
assert build.package_id == 2

build = (
db.query(orm.CondaPackageBuild)
.filter(orm.CondaPackageBuild.id == 5)
.first()
)
assert build.package_id == 2
Loading
Loading