Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENG-6704] [ENG-6705] Reference PR #10857

Merged
merged 27 commits into from
Dec 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
292dca2
[ENG-6364] Migrate Preprint Affilations (#10787)
Johnetordoff Nov 5, 2024
f832e5e
[ENG-4438] Add OOPSpam and Akismet metrics to spam report (#10783)
uditijmehta Nov 5, 2024
f67b86f
Add PrivateSpamMetricsReport (#10791)
mfraezz Nov 7, 2024
eadb41f
[ENG-6435] Fix: duplicate reports when run for past years (#10800)
aaxelb Nov 15, 2024
913889d
[ENG-6506] Fix: counted-usage clobbers (#10799)
aaxelb Nov 15, 2024
6cef157
Merge branch 'hotfix/24.09.3'
mfraezz Nov 27, 2024
0ec9101
Avoid Sequence Scans on BFN
mfraezz Dec 2, 2024
0a510f5
Use low queue for metric reporters
mfraezz Dec 2, 2024
ecd96ec
Merge branch 'hotfix/24.09.4'
mfraezz Dec 2, 2024
869c146
Merge branch 'hotfix/24.09.4' into develop
mfraezz Dec 2, 2024
663db9b
Merge remote-tracking branch 'upstream/develop' into feature/b-and-i-…
cslzchen Dec 4, 2024
d34cac0
Fix failures caused by base class MonthlyReporter update
cslzchen Dec 4, 2024
8997814
Follow-up fix for target/next (start/end) month
cslzchen Dec 5, 2024
ed9bac7
Merge pull request #10822 from cslzchen/feature/b-and-i-with-dashboar…
cslzchen Dec 5, 2024
cadb79b
Merge branch 'feature/b-and-i-24-22-release' into release/24.10.0
cslzchen Dec 5, 2024
40e7f26
Update changelog and bump versions
cslzchen Dec 5, 2024
279245a
Merge branch 'release/24.10.0'
cslzchen Dec 5, 2024
c025346
Merge tag '24.10.0' into develop
cslzchen Dec 5, 2024
d9b4598
Fix backfill, report
mfraezz Dec 6, 2024
86dae50
Merge branch 'hotfix/24.10.1'
mfraezz Dec 6, 2024
68c84ce
Merge branch 'hotfix/24.10.1' into develop
mfraezz Dec 6, 2024
c966fac
[Feature] Dashboard B&I (#10843)
mfraezz Dec 11, 2024
4d1708f
Update CHANGELOG, bump version
mfraezz Dec 11, 2024
d68a07b
Merge branch 'release/24.11.0'
mfraezz Dec 11, 2024
c72f3c6
Merge branch 'release/24.11.0' into develop
mfraezz Dec 11, 2024
6dce520
Assume default for global_ notifications
mfraezz Dec 3, 2024
8643e3f
Merge branch 'hotfix/24.11.1' into develop
mfraezz Dec 11, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,20 @@

We follow the CalVer (https://calver.org/) versioning scheme: YY.MINOR.MICRO.

24.11.0 (2024-12-11)
====================
- Institutional Dashboard Project Bugfix Release

24.10.0 (2024-12-05)
====================

- Migrate Preprint Affilations
- Add OOPSpam and Akismet metrics to spam report
- Add PrivateSpamMetricsReport
- Update PrivateSpamMetricsReporter to work with refactored MonthlyReporter
- Fix duplicate reports when run for past years
- Fix counted-usage clobbers

24.09.0 (2024-11-14)
====================

Expand Down
2 changes: 2 additions & 0 deletions api/institutions/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,7 @@ class Meta:
})

id = IDField(source='meta.id', read_only=True)
report_yearmonth = YearmonthField(read_only=True)
user_name = ser.CharField(read_only=True)
department = ser.CharField(read_only=True, source='department_name')
orcid_id = ser.CharField(read_only=True)
Expand Down Expand Up @@ -372,6 +373,7 @@ class Meta:

id = IDField(read_only=True)

report_yearmonth = YearmonthField(read_only=True)
user_count = ser.IntegerField(read_only=True)
public_project_count = ser.IntegerField(read_only=True)
private_project_count = ser.IntegerField(read_only=True)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ def test_get_report(self, app, url, institutional_admin, institution, reports, u
assert data['type'] == 'institution-summary-metrics'

attributes = data['attributes']
assert attributes['report_yearmonth'] == '2024-08'
assert attributes['user_count'] == 200
assert attributes['public_project_count'] == 150
assert attributes['private_project_count'] == 125
Expand Down Expand Up @@ -254,6 +255,7 @@ def test_get_report_with_multiple_months_and_institutions(

attributes = data['attributes']

assert attributes['report_yearmonth'] == '2024-09'
assert attributes['user_count'] == 250
assert attributes['public_project_count'] == 200
assert attributes['private_project_count'] == 150
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,7 @@ def test_get_report_formats_csv_tsv(self, app, url, institutional_admin, institu
response_body = resp.text
expected_response = [
[
'report_yearmonth',
'account_creation_date',
'department',
'embargoed_registration_count',
Expand All @@ -460,6 +461,7 @@ def test_get_report_formats_csv_tsv(self, app, url, institutional_admin, institu
'user_name'
],
[
'2024-08',
'2018-02',
'Center, \t Greatest Ever',
'1',
Expand Down Expand Up @@ -512,6 +514,7 @@ def test_csv_tsv_ignores_pagination(self, app, url, institutional_admin, institu
month_last_login='2018-02',
)
expected_data.append([
'2024-08',
'2018-02',
'QBatman',
'1',
Expand Down Expand Up @@ -552,6 +555,7 @@ def test_csv_tsv_ignores_pagination(self, app, url, institutional_admin, institu
response_rows = list(reader)
# Validate header row
expected_header = [
'report_yearmonth',
'account_creation_date',
'department',
'embargoed_registration_count',
Expand Down Expand Up @@ -606,6 +610,7 @@ def test_get_report_format_table_json(self, app, url, institutional_admin, insti
response_data = json.loads(resp.body)
expected_data = [
{
'report_yearmonth': '2024-08',
'account_creation_date': '2018-02',
'department': 'Safety "The Wolverine" Weapon X',
'embargoed_registration_count': 1,
Expand Down
16 changes: 8 additions & 8 deletions api_tests/metrics/test_counted_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ def test_by_client_session_id(self, app, mock_save, user):
assert resp.status_code == 201
assert_saved_with(
mock_save,
# doc_id: sha256(b'http://example.foo/|http://example.foo/blahblah/blee|5b7c8b0a740a5b23712258a9d1164d2af008df02a8e3d339f16ead1d19595b34|1981-01-01|3').hexdigest()
expected_doc_id='55fffffdc0d674d15a5e8763d14e4ae90f658fbfb6fbf94f88a5d24978f02e72',
# doc_id: sha256(b'http://example.foo/|http://example.foo/blahblah/blee|5b7c8b0a740a5b23712258a9d1164d2af008df02a8e3d339f16ead1d19595b34|1981-01-01|3|api,view').hexdigest()
expected_doc_id='3239044c7462dd318edd0522a0ed7d84b9c6502ef16cb40dfcae6c1f456d57a2',
expected_attrs={
'platform_iri': 'http://example.foo/',
'item_guid': 'zyxwv',
Expand Down Expand Up @@ -132,8 +132,8 @@ def test_by_client_session_id_anon(self, app, mock_save):
assert resp.status_code == 201
assert_saved_with(
mock_save,
# doc_id: sha256(b'http://example.foo/|http://example.foo/bliz/|5b7c8b0a740a5b23712258a9d1164d2af008df02a8e3d339f16ead1d19595b34|1981-01-01|3').hexdigest()
expected_doc_id='e559ffbc4bd3e3e69252d34c273f0e771ec89ee455ec9b60fbbadf3944e4af4e',
# doc_id: sha256(b'http://example.foo/|http://example.foo/bliz/|5b7c8b0a740a5b23712258a9d1164d2af008df02a8e3d339f16ead1d19595b34|1981-01-01|3|view,web').hexdigest()
expected_doc_id='d01759e963893f9dc9b2ccf016a5ef29135673779802b5578f31449543677e82',
expected_attrs={
'platform_iri': 'http://example.foo/',
'item_guid': 'zyxwv',
Expand Down Expand Up @@ -166,8 +166,8 @@ def test_by_user_auth(self, app, mock_save, user):
assert resp.status_code == 201
assert_saved_with(
mock_save,
# doc_id: sha256(b'http://example.foo/|http://osf.io/mst3k|ec768abb16c3411570af99b9d635c2c32d1ca31d1b25eec8ee73759e7242e74a|1981-01-01|3').hexdigest()
expected_doc_id='743494d8a55079b91e202da1dbdfce5aea72e310c57a34b36df2c2af5ed4d362',
# doc_id: sha256(b'http://example.foo/|http://osf.io/mst3k|ec768abb16c3411570af99b9d635c2c32d1ca31d1b25eec8ee73759e7242e74a|1981-01-01|3|view,web').hexdigest()
expected_doc_id='7b8bc27c6d90fb45aa5bbd02deceba9f7384ed61b9a6e7253317c262020b94c2',
expected_attrs={
'platform_iri': 'http://example.foo/',
'item_guid': 'yxwvu',
Expand Down Expand Up @@ -196,8 +196,8 @@ def test_by_useragent_header(self, app, mock_save):
assert resp.status_code == 201
assert_saved_with(
mock_save,
# doc_id: sha256(b'http://example.foo/|yxwvu|97098dd3f7cd26053c0d0264d1c84eaeea8e08d2c55ca34017ffbe53c749ba5a|1981-01-01|3').hexdigest()
expected_doc_id='a50ac1b2dc1c918cdea7be50b005117fdb6ee00ea069ca3aa4aaf03c0f905fa0',
# doc_id: sha256(b'http://example.foo/|yxwvu|97098dd3f7cd26053c0d0264d1c84eaeea8e08d2c55ca34017ffbe53c749ba5a|1981-01-01|3|api,view').hexdigest()
expected_doc_id='d669528b30f443ffe506e183537af9624ef290090e90a200ecce7b7ca19c77f7',
expected_attrs={
'platform_iri': 'http://example.foo/',
'item_guid': 'yxwvu',
Expand Down
34 changes: 34 additions & 0 deletions osf/external/askismet/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,3 +133,37 @@ def submit_ham(self, user_ip, user_agent, **kwargs):
)
if res.status_code != requests.codes.ok:
raise AkismetClientError(reason=res.text)

def get_flagged_count(self, start_date, end_date, category='node'):
from osf.models import NodeLog, PreprintLog

if category not in ['node', 'preprint']:
raise ValueError(f"Invalid category '{category}'. Expected 'node' or 'preprint'.")

log_model = NodeLog if category == 'node' else PreprintLog
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO: we may have not covered PreprintLog fixes/updates in DOI, need to take a look.


flagged_count = log_model.objects.filter(
action=log_model.FLAG_SPAM,
created__gt=start_date,
created__lt=end_date,
**{f'{category}__spam_data__who_flagged__in': ['akismet', 'both']}
).count()

return flagged_count

def get_hammed_count(self, start_date, end_date, category='node'):
from osf.models import NodeLog, PreprintLog

if category not in ['node', 'preprint']:
raise ValueError(f"Invalid category '{category}'. Expected 'node' or 'preprint'.")

log_model = NodeLog if category == 'node' else PreprintLog

hammed_count = log_model.objects.filter(
action=log_model.CONFIRM_HAM,
created__gt=start_date,
created__lt=end_date,
**{f'{category}__spam_data__who_flagged__in': ['akismet', 'both']}
).count()

return hammed_count
34 changes: 34 additions & 0 deletions osf/external/oopspam/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,37 @@ def check_content(self, user_ip, content, **kwargs):

# OOPSpam returns a spam score out of 6. 3 or higher indicates spam
return spam_score >= settings.OOPSPAM_SPAM_LEVEL, resp_json

def get_flagged_count(self, start_date, end_date, category='node'):
from osf.models import NodeLog, PreprintLog

if category not in ['node', 'preprint']:
raise ValueError(f"Invalid category '{category}'. Expected 'node' or 'preprint'.")

log_model = NodeLog if category == 'node' else PreprintLog

flagged_count = log_model.objects.filter(
action=log_model.FLAG_SPAM,
created__gt=start_date,
created__lt=end_date,
**{f'{category}__spam_data__who_flagged__in': ['oopspam', 'both']}
).count()

return flagged_count

def get_hammed_count(self, start_date, end_date, category='node'):
from osf.models import NodeLog, PreprintLog

if category not in ['node', 'preprint']:
raise ValueError(f"Invalid category '{category}'. Expected 'node' or 'preprint'.")

log_model = NodeLog if category == 'node' else PreprintLog

hammed_count = log_model.objects.filter(
action=log_model.CONFIRM_HAM,
created__gt=start_date,
created__lt=end_date,
**{f'{category}__spam_data__who_flagged__in': ['oopspam', 'both']}
).count()

return hammed_count
118 changes: 118 additions & 0 deletions osf/management/commands/migrate_preprint_affiliation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import datetime
import logging

from django.core.management.base import BaseCommand
from django.db import transaction
from django.db.models import F, Exists, OuterRef

from osf.models import PreprintContributor, InstitutionAffiliation

logger = logging.getLogger(__name__)

AFFILIATION_TARGET_DATE = datetime.datetime(2024, 9, 19, 14, 37, 48, tzinfo=datetime.timezone.utc)


class Command(BaseCommand):
"""Assign affiliations from users to preprints where they have write or admin permissions, with optional exclusion by user GUIDs."""

help = 'Assign affiliations from users to preprints where they have write or admin permissions.'

def add_arguments(self, parser):
parser.add_argument(
'--exclude-guids',
nargs='+',
dest='exclude_guids',
help='List of user GUIDs to exclude from affiliation assignment'
)
parser.add_argument(
'--dry-run',
action='store_true',
dest='dry_run',
help='If true, performs a dry run without making changes'
)
parser.add_argument(
'--batch-size',
type=int,
default=1000,
dest='batch_size',
help='Number of contributors to process in each batch'
)

def handle(self, *args, **options):
start_time = datetime.datetime.now()
logger.info(f'Script started at: {start_time}')

exclude_guids = set(options.get('exclude_guids') or [])
dry_run = options.get('dry_run', False)
batch_size = options.get('batch_size', 1000)

if dry_run:
logger.info('Dry run mode activated.')

processed_count, updated_count = assign_affiliations_to_preprints(
exclude_guids=exclude_guids,
dry_run=dry_run,
batch_size=batch_size
)

finish_time = datetime.datetime.now()
logger.info(f'Script finished at: {finish_time}')
logger.info(f'Total processed: {processed_count}, Updated: {updated_count}')
logger.info(f'Total run time: {finish_time - start_time}')


def assign_affiliations_to_preprints(exclude_guids=None, dry_run=True, batch_size=1000):
exclude_guids = exclude_guids or set()
processed_count = updated_count = 0

# Subquery to check if the user has any affiliated institutions
user_has_affiliations = Exists(
InstitutionAffiliation.objects.filter(
user=OuterRef('user')
)
)

contributors_qs = PreprintContributor.objects.filter(
preprint__preprintgroupobjectpermission__permission__codename__in=['write_preprint'],
preprint__preprintgroupobjectpermission__group__user=F('user'),
).filter(
user_has_affiliations
).select_related(
'user',
'preprint'
).exclude(
user__guids___id__in=exclude_guids
).order_by('pk') # Ensure consistent ordering for batching

total_contributors = contributors_qs.count()
logger.info(f'Total contributors to process: {total_contributors}')

# Process contributors in batches
with transaction.atomic():
for offset in range(0, total_contributors, batch_size):
# Use select_for_update() to ensure query hits the primary database
batch_contributors = contributors_qs[offset:offset + batch_size].select_for_update()

logger.info(f'Processing contributors {offset + 1} to {min(offset + batch_size, total_contributors)}')

for contributor in batch_contributors:
user = contributor.user
preprint = contributor.preprint

if preprint.created > AFFILIATION_TARGET_DATE:
continue

user_institutions = user.get_affiliated_institutions()
processed_count += 1
if not dry_run:
preprint.affiliated_institutions.add(*user_institutions)
updated_count += 1
logger.info(
f'Assigned {len(user_institutions)} affiliations from user <{user._id}> to preprint <{preprint._id}>.'
)
else:
logger.info(
f'Dry run: Would assign {len(user_institutions)} affiliations from user <{user._id}> to preprint <{preprint._id}>.'
)

return processed_count, updated_count
1 change: 1 addition & 0 deletions osf/metrics/counted_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ def _fill_document_id(counted_usage):
counted_usage.session_id,
counted_usage.timestamp.date(),
time_window,
','.join(sorted(counted_usage.action_labels)),
)


Expand Down
2 changes: 2 additions & 0 deletions osf/metrics/reporters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from .public_item_usage import PublicItemUsageReporter
from .user_count import UserCountReporter
from .spam_count import SpamCountReporter
from .private_spam_metrics import PrivateSpamMetricsReporter


class AllDailyReporters(enum.Enum):
Expand All @@ -32,3 +33,4 @@ class AllMonthlyReporters(enum.Enum):
INSTITUTIONAL_USERS = InstitutionalUsersReporter
INSTITUTIONAL_SUMMARY = InstitutionalSummaryMonthlyReporter
ITEM_USAGE = PublicItemUsageReporter
PRIVATE_SPAM_METRICS = PrivateSpamMetricsReporter
Loading
Loading