From 2cd24a197dd88f71ef323e63979d3c10ac9a28ba Mon Sep 17 00:00:00 2001 From: Daksh Bhayana Date: Sat, 12 Apr 2025 23:51:38 +0530 Subject: [PATCH 1/5] Normalized email column in Email --- warehouse/accounts/forms.py | 3 +- warehouse/accounts/models.py | 6 +-- warehouse/accounts/services.py | 5 ++ warehouse/accounts/views.py | 2 +- warehouse/filters.py | 15 ++++++ ...c99e8775603d_new_columns_in_email_model.py | 54 +++++++++++++++++++ 6 files changed, 79 insertions(+), 6 deletions(-) create mode 100644 warehouse/migrations/versions/c99e8775603d_new_columns_in_email_model.py diff --git a/warehouse/accounts/forms.py b/warehouse/accounts/forms.py index 0d3eb0b16959..34a28fb5e07c 100644 --- a/warehouse/accounts/forms.py +++ b/warehouse/accounts/forms.py @@ -37,6 +37,7 @@ NoRecoveryCodes, TooManyFailedLogins, ) +from warehouse.filters import get_normalized_email from warehouse.accounts.models import DisableReason, ProhibitedEmailDomain from warehouse.accounts.services import RECOVERY_CODE_BYTES from warehouse.captcha import recaptcha @@ -346,7 +347,7 @@ def validate_email(self, field): ) # Check if this email address is already in use - userid = self.user_service.find_userid_by_email(field.data) + userid = self.user_service.find_userid_by_email(get_normalized_email(field.data)) if userid and userid == self.user_id: self.request.metrics.increment( diff --git a/warehouse/accounts/models.py b/warehouse/accounts/models.py index 55e93a8c65e2..2c8c7e6ee702 100644 --- a/warehouse/accounts/models.py +++ b/warehouse/accounts/models.py @@ -416,6 +416,8 @@ class Email(db.ModelBase): ) user: Mapped[User] = orm.relationship(back_populates="emails") email: Mapped[str] = mapped_column(String(length=254)) + normalized_email: Mapped[CITEXT] = mapped_column(CITEXT) + domain: Mapped[CITEXT] = mapped_column(CITEXT) primary: Mapped[bool] verified: Mapped[bool] public: Mapped[bool_false] @@ -433,10 +435,6 @@ class Email(db.ModelBase): comment="Status strings returned by the domain validation service.", ) - @property - def domain(self): - return self.email.split("@")[-1].lower() - class ProhibitedEmailDomain(db.Model): __tablename__ = "prohibited_email_domains" diff --git a/warehouse/accounts/services.py b/warehouse/accounts/services.py index efb9904c4a7a..8e832c25e6b7 100644 --- a/warehouse/accounts/services.py +++ b/warehouse/accounts/services.py @@ -61,6 +61,7 @@ UserTermsOfServiceEngagement, WebAuthn, ) +from warehouse.filters import get_normalized_email, get_email_domain from warehouse.events.tags import EventTag from warehouse.metrics import IMetricsService from warehouse.rate_limiting import DummyRateLimiter, IRateLimiter @@ -300,8 +301,12 @@ def add_email( if primary is None: primary = True if user.primary_email is None else False + normalized_email = get_normalized_email(email_address) + domain = get_email_domain(email_address) email = Email( email=email_address, + normalized_email=normalized_email, + domain=domain, user=user, primary=primary, verified=verified, diff --git a/warehouse/accounts/views.py b/warehouse/accounts/views.py index 38c186dc9c62..c49750865743 100644 --- a/warehouse/accounts/views.py +++ b/warehouse/accounts/views.py @@ -763,7 +763,7 @@ def register(request, _form_class=RegistrationForm): request.registry.settings.get("terms.revision"), TermsOfServiceEngagement.Agreed, ) - email = user_service.add_email(user.id, form.email.data, primary=True) + email = user_service.add_email(user.id, form.email.data, primary=True, ratelimit=False) user.record_event( tag=EventTag.Account.AccountCreate, request=request, diff --git a/warehouse/filters.py b/warehouse/filters.py index d5bae6aee89c..3276ebd7b5c8 100644 --- a/warehouse/filters.py +++ b/warehouse/filters.py @@ -192,6 +192,21 @@ def format_email(metadata_email: str) -> tuple[str, str]: emails.append((name, email)) return emails[0][0], emails[0][1] +def get_normalized_email(email: str) -> str: + """ + Normalize the email address by lowercasing it and stripping whitespace. + """ + return email.lower().strip() + + +def get_email_domain(email: str) -> str: + """ + Extract the domain from the email address. + """ + try: + return email.split("@")[1].lower() + except IndexError: + return "" def remove_invalid_xml_unicode(value: str | None) -> str | None: """ diff --git a/warehouse/migrations/versions/c99e8775603d_new_columns_in_email_model.py b/warehouse/migrations/versions/c99e8775603d_new_columns_in_email_model.py new file mode 100644 index 000000000000..651d8c0fba92 --- /dev/null +++ b/warehouse/migrations/versions/c99e8775603d_new_columns_in_email_model.py @@ -0,0 +1,54 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""New Columns in Email Model + +Revision ID: c99e8775603d +Revises: 4f8982e60deb +Create Date: 2025-04-12 18:45:40.713109 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "c99e8775603d" +down_revision = "4f8982e60deb" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + # Add columns + op.add_column('user_emails', sa.Column('normalized_email', CITEXT())) + op.add_column('user_emails', sa.Column('domain', CITEXT())) + + # Populate data + op.execute(""" + UPDATE user_emails + SET normalized_email = LOWER(email), + domain = LOWER(SPLIT_PART(email, '@', 2)) + """) + + # Add constraints + op.alter_column('user_emails', 'normalized_email', nullable=False) + op.alter_column('user_emails', 'domain', nullable=False) + + +def downgrade() -> None: + """Downgrade schema.""" + # Drop columns + op.drop_column('user_emails', 'domain') + op.drop_column('user_emails', 'normalized_email') From 865abbf1ae34210722847746406cf0bbbf7234be Mon Sep 17 00:00:00 2001 From: Daksh Bhayana Date: Sat, 12 Apr 2025 23:57:19 +0530 Subject: [PATCH 2/5] WIP --- .../versions/c99e8775603d_new_columns_in_email_model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/warehouse/migrations/versions/c99e8775603d_new_columns_in_email_model.py b/warehouse/migrations/versions/c99e8775603d_new_columns_in_email_model.py index 651d8c0fba92..80e7ff14da68 100644 --- a/warehouse/migrations/versions/c99e8775603d_new_columns_in_email_model.py +++ b/warehouse/migrations/versions/c99e8775603d_new_columns_in_email_model.py @@ -21,6 +21,7 @@ from alembic import op import sqlalchemy as sa +from sqlalchemy.dialects.postgresql import CITEXT # revision identifiers, used by Alembic. revision = "c99e8775603d" From 45a6b5c2bf172cdddd4216285821c3ccc143dccf Mon Sep 17 00:00:00 2001 From: Daksh Bhayana Date: Sun, 13 Apr 2025 01:56:43 +0530 Subject: [PATCH 3/5] Formatter and linter --- tests/common/db/accounts.py | 5 ++++ tests/unit/accounts/test_models.py | 10 ++++++- warehouse/accounts/forms.py | 6 +++-- warehouse/accounts/services.py | 2 +- warehouse/accounts/views.py | 2 +- warehouse/admin/views/users.py | 5 ++++ warehouse/filters.py | 2 ++ ...c99e8775603d_new_columns_in_email_model.py | 27 ++++++++++--------- 8 files changed, 42 insertions(+), 17 deletions(-) diff --git a/tests/common/db/accounts.py b/tests/common/db/accounts.py index 0b392a0064e2..386619eb6a52 100644 --- a/tests/common/db/accounts.py +++ b/tests/common/db/accounts.py @@ -25,6 +25,7 @@ User, UserTermsOfServiceEngagement, ) +from warehouse.filters import get_email_domain, get_normalized_email from .base import WarehouseFactory @@ -116,6 +117,10 @@ class Meta: # TODO: Replace when factory_boy supports `unique`. # See https://github.com/FactoryBoy/factory_boy/pull/997 email = factory.Sequence(lambda _: fake.unique.safe_email()) + normalized_email = factory.LazyAttribute( + lambda obj: get_normalized_email(obj.email) + ) + domain = factory.LazyAttribute(lambda obj: get_email_domain(obj.email)) verified = True primary = True diff --git a/tests/unit/accounts/test_models.py b/tests/unit/accounts/test_models.py index a6981e69debd..322612ac1b77 100644 --- a/tests/unit/accounts/test_models.py +++ b/tests/unit/accounts/test_models.py @@ -19,6 +19,7 @@ from warehouse.accounts.models import Email, RecoveryCode, User, UserFactory, WebAuthn from warehouse.authnz import Permissions +from warehouse.filters import get_email_domain, get_normalized_email from warehouse.utils.security_policy import principals_for from ...common.db.accounts import ( @@ -61,7 +62,14 @@ def test_has_primary_verified_email(self, db_session, email, verified, allowed): user = DBUserFactory.create() if email: - e = Email(email=email, user=user, primary=True, verified=verified) + e = Email( + email=email, + user=user, + normalized_email=get_normalized_email(email), + domain=get_email_domain(email), + primary=True, + verified=verified, + ) db_session.add(e) db_session.flush() diff --git a/warehouse/accounts/forms.py b/warehouse/accounts/forms.py index 34a28fb5e07c..51749374c446 100644 --- a/warehouse/accounts/forms.py +++ b/warehouse/accounts/forms.py @@ -37,7 +37,6 @@ NoRecoveryCodes, TooManyFailedLogins, ) -from warehouse.filters import get_normalized_email from warehouse.accounts.models import DisableReason, ProhibitedEmailDomain from warehouse.accounts.services import RECOVERY_CODE_BYTES from warehouse.captcha import recaptcha @@ -47,6 +46,7 @@ send_recovery_code_used_email, ) from warehouse.events.tags import EventTag +from warehouse.filters import get_normalized_email from warehouse.i18n import localize as _ # Common messages, set as constants to keep them from drifting. @@ -347,7 +347,9 @@ def validate_email(self, field): ) # Check if this email address is already in use - userid = self.user_service.find_userid_by_email(get_normalized_email(field.data)) + userid = self.user_service.find_userid_by_email( + get_normalized_email(field.data) + ) if userid and userid == self.user_id: self.request.metrics.increment( diff --git a/warehouse/accounts/services.py b/warehouse/accounts/services.py index 8e832c25e6b7..438fadb0d4a1 100644 --- a/warehouse/accounts/services.py +++ b/warehouse/accounts/services.py @@ -61,8 +61,8 @@ UserTermsOfServiceEngagement, WebAuthn, ) -from warehouse.filters import get_normalized_email, get_email_domain from warehouse.events.tags import EventTag +from warehouse.filters import get_email_domain, get_normalized_email from warehouse.metrics import IMetricsService from warehouse.rate_limiting import DummyRateLimiter, IRateLimiter from warehouse.utils.crypto import BadData, SignatureExpired, URLSafeTimedSerializer diff --git a/warehouse/accounts/views.py b/warehouse/accounts/views.py index c49750865743..38c186dc9c62 100644 --- a/warehouse/accounts/views.py +++ b/warehouse/accounts/views.py @@ -763,7 +763,7 @@ def register(request, _form_class=RegistrationForm): request.registry.settings.get("terms.revision"), TermsOfServiceEngagement.Agreed, ) - email = user_service.add_email(user.id, form.email.data, primary=True, ratelimit=False) + email = user_service.add_email(user.id, form.email.data, primary=True) user.record_event( tag=EventTag.Account.AccountCreate, request=request, diff --git a/warehouse/admin/views/users.py b/warehouse/admin/views/users.py index 4524bcc5e074..e298c4eae7da 100644 --- a/warehouse/admin/views/users.py +++ b/warehouse/admin/views/users.py @@ -45,6 +45,7 @@ send_account_recovery_initiated_email, send_password_reset_by_admin_email, ) +from warehouse.filters import get_email_domain, get_normalized_email from warehouse.observations.models import ObservationKind from warehouse.packaging.models import JournalEntry, Project, Release, Role from warehouse.utils.paginate import paginate_url_factory @@ -101,6 +102,8 @@ class EmailForm(wtforms.Form): unverify_reason = wtforms.fields.StringField(render_kw={"readonly": True}) domain_last_checked = wtforms.fields.DateTimeField(render_kw={"readonly": True}) domain_last_status = wtforms.fields.StringField(render_kw={"readonly": True}) + normalized_email = wtforms.fields.StringField(render_kw={"readonly": True}) + domain = wtforms.fields.StringField(render_kw={"readonly": True}) class EmailsForm(wtforms.Form): @@ -288,6 +291,8 @@ def user_add_email(user, request): email = Email( email=form.email.data, + normalized_email=get_normalized_email(form.email.data), + domain=get_email_domain(form.email.data), user=user, primary=form.primary.data, verified=form.verified.data, diff --git a/warehouse/filters.py b/warehouse/filters.py index 3276ebd7b5c8..5279cb866df6 100644 --- a/warehouse/filters.py +++ b/warehouse/filters.py @@ -192,6 +192,7 @@ def format_email(metadata_email: str) -> tuple[str, str]: emails.append((name, email)) return emails[0][0], emails[0][1] + def get_normalized_email(email: str) -> str: """ Normalize the email address by lowercasing it and stripping whitespace. @@ -208,6 +209,7 @@ def get_email_domain(email: str) -> str: except IndexError: return "" + def remove_invalid_xml_unicode(value: str | None) -> str | None: """ Remove invalid unicode characters from a string. diff --git a/warehouse/migrations/versions/c99e8775603d_new_columns_in_email_model.py b/warehouse/migrations/versions/c99e8775603d_new_columns_in_email_model.py index 80e7ff14da68..904ae80592ed 100644 --- a/warehouse/migrations/versions/c99e8775603d_new_columns_in_email_model.py +++ b/warehouse/migrations/versions/c99e8775603d_new_columns_in_email_model.py @@ -16,40 +16,43 @@ Create Date: 2025-04-12 18:45:40.713109 """ -from typing import Sequence, Union +from collections.abc import Sequence +from typing import Union -from alembic import op import sqlalchemy as sa +from alembic import op from sqlalchemy.dialects.postgresql import CITEXT # revision identifiers, used by Alembic. revision = "c99e8775603d" down_revision = "4f8982e60deb" -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None def upgrade() -> None: """Upgrade schema.""" # Add columns - op.add_column('user_emails', sa.Column('normalized_email', CITEXT())) - op.add_column('user_emails', sa.Column('domain', CITEXT())) + op.add_column("user_emails", sa.Column("normalized_email", CITEXT())) + op.add_column("user_emails", sa.Column("domain", CITEXT())) # Populate data - op.execute(""" + op.execute( + """ UPDATE user_emails SET normalized_email = LOWER(email), domain = LOWER(SPLIT_PART(email, '@', 2)) - """) + """ + ) # Add constraints - op.alter_column('user_emails', 'normalized_email', nullable=False) - op.alter_column('user_emails', 'domain', nullable=False) + op.alter_column("user_emails", "normalized_email", nullable=False) + op.alter_column("user_emails", "domain", nullable=False) def downgrade() -> None: """Downgrade schema.""" # Drop columns - op.drop_column('user_emails', 'domain') - op.drop_column('user_emails', 'normalized_email') + op.drop_column("user_emails", "domain") + op.drop_column("user_emails", "normalized_email") From 1bbc123b43896490e1c951b6d37abd26a7e331df Mon Sep 17 00:00:00 2001 From: Daksh Bhayana Date: Sun, 13 Apr 2025 02:02:50 +0530 Subject: [PATCH 4/5] Ran make translations --- warehouse/locale/messages.pot | 48 +++++++++++++++++------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/warehouse/locale/messages.pot b/warehouse/locale/messages.pot index 3819c3d9ddec..83f09272d7bc 100644 --- a/warehouse/locale/messages.pot +++ b/warehouse/locale/messages.pot @@ -14,111 +14,111 @@ msgstr "" msgid "Locale updated" msgstr "" -#: warehouse/accounts/forms.py:52 warehouse/accounts/forms.py:290 +#: warehouse/accounts/forms.py:53 warehouse/accounts/forms.py:291 msgid "The email address isn't valid. Try again." msgstr "" -#: warehouse/accounts/forms.py:53 +#: warehouse/accounts/forms.py:54 msgid "The password is invalid. Try again." msgstr "" -#: warehouse/accounts/forms.py:55 +#: warehouse/accounts/forms.py:56 msgid "" "The username is invalid. Usernames must be composed of letters, numbers, " "dots, hyphens and underscores. And must also start and finish with a " "letter or number. Choose a different username." msgstr "" -#: warehouse/accounts/forms.py:72 +#: warehouse/accounts/forms.py:73 msgid "Null bytes are not allowed." msgstr "" -#: warehouse/accounts/forms.py:86 +#: warehouse/accounts/forms.py:87 msgid "No user found with that username" msgstr "" -#: warehouse/accounts/forms.py:107 +#: warehouse/accounts/forms.py:108 #, python-brace-format msgid "TOTP code must be ${totp_length} digits." msgstr "" -#: warehouse/accounts/forms.py:127 +#: warehouse/accounts/forms.py:128 #, python-brace-format msgid "Recovery Codes must be ${recovery_code_length} characters." msgstr "" -#: warehouse/accounts/forms.py:141 +#: warehouse/accounts/forms.py:142 msgid "Choose a username with 50 characters or less." msgstr "" -#: warehouse/accounts/forms.py:159 +#: warehouse/accounts/forms.py:160 msgid "" "This username is already being used by another account. Choose a " "different username." msgstr "" -#: warehouse/accounts/forms.py:172 warehouse/accounts/forms.py:221 -#: warehouse/accounts/forms.py:234 +#: warehouse/accounts/forms.py:173 warehouse/accounts/forms.py:222 +#: warehouse/accounts/forms.py:235 msgid "Password too long." msgstr "" -#: warehouse/accounts/forms.py:204 +#: warehouse/accounts/forms.py:205 #, python-brace-format msgid "" "There have been too many unsuccessful login attempts. You have been " "locked out for ${time}. Please try again later." msgstr "" -#: warehouse/accounts/forms.py:237 +#: warehouse/accounts/forms.py:238 msgid "Your passwords don't match. Try again." msgstr "" -#: warehouse/accounts/forms.py:271 +#: warehouse/accounts/forms.py:272 msgid "The email address is too long. Try again." msgstr "" -#: warehouse/accounts/forms.py:343 +#: warehouse/accounts/forms.py:344 msgid "You can't use an email address from this domain. Use a different email." msgstr "" -#: warehouse/accounts/forms.py:358 +#: warehouse/accounts/forms.py:361 msgid "" "This email address is already being used by this account. Use a different" " email." msgstr "" -#: warehouse/accounts/forms.py:369 +#: warehouse/accounts/forms.py:372 msgid "" "This email address is already being used by another account. Use a " "different email." msgstr "" -#: warehouse/accounts/forms.py:409 warehouse/manage/forms.py:141 +#: warehouse/accounts/forms.py:412 warehouse/manage/forms.py:141 #: warehouse/manage/forms.py:783 msgid "The name is too long. Choose a name with 100 characters or less." msgstr "" -#: warehouse/accounts/forms.py:415 +#: warehouse/accounts/forms.py:418 msgid "URLs are not allowed in the name field." msgstr "" -#: warehouse/accounts/forms.py:504 +#: warehouse/accounts/forms.py:507 msgid "Invalid TOTP code." msgstr "" -#: warehouse/accounts/forms.py:521 +#: warehouse/accounts/forms.py:524 msgid "Invalid WebAuthn assertion: Bad payload" msgstr "" -#: warehouse/accounts/forms.py:590 +#: warehouse/accounts/forms.py:593 msgid "Invalid recovery code." msgstr "" -#: warehouse/accounts/forms.py:599 +#: warehouse/accounts/forms.py:602 msgid "Recovery code has been previously used." msgstr "" -#: warehouse/accounts/forms.py:629 +#: warehouse/accounts/forms.py:632 msgid "The username isn't valid. Try again." msgstr "" From da1135d25404fee4d96ceca22b1245a50275cd76 Mon Sep 17 00:00:00 2001 From: Daksh Bhayana Date: Sun, 13 Apr 2025 02:17:06 +0530 Subject: [PATCH 5/5] FMT fix --- .../versions/c99e8775603d_new_columns_in_email_model.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/warehouse/migrations/versions/c99e8775603d_new_columns_in_email_model.py b/warehouse/migrations/versions/c99e8775603d_new_columns_in_email_model.py index 904ae80592ed..ea7b1a89b332 100644 --- a/warehouse/migrations/versions/c99e8775603d_new_columns_in_email_model.py +++ b/warehouse/migrations/versions/c99e8775603d_new_columns_in_email_model.py @@ -17,7 +17,6 @@ """ from collections.abc import Sequence -from typing import Union import sqlalchemy as sa @@ -40,7 +39,7 @@ def upgrade() -> None: # Populate data op.execute( """ - UPDATE user_emails + UPDATE user_emails SET normalized_email = LOWER(email), domain = LOWER(SPLIT_PART(email, '@', 2)) """