Skip to content

Commit

Permalink
allow more first name searches when alternate identities exist but ar…
Browse files Browse the repository at this point in the history
…e all stanford or blank institutio
  • Loading branch information
peetucket committed Nov 24, 2020
1 parent e7f0b2c commit 2580879
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 11 deletions.
10 changes: 7 additions & 3 deletions app/models/author.rb
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,15 @@ def institution
Settings.HARVESTER.INSTITUTION.name
end

# indicates if the LastName, FirstInitial form for this user is ambiguous within our author database and there are no alternate identities
# indicates if the LastName, FirstInitial form for this user is ambiguous within our author database (including any alternate identities that include Stanford as an insitution)
# also checks to see if there are alternate identities with institutions other than Stanford, which is problematic, and should be considered ambiguous
def ambiguous_first_initial?
return true unless first_name && last_name
first_initial_not_unique = self.class.where('preferred_first_name like ? and preferred_last_name = ?', "#{first_name[0]}%", last_name).where(active_in_cap: true, cap_import_enabled: true).size > 1
(first_initial_not_unique || !author_identities.empty?)
first_initial_not_unique = self.class.where('preferred_first_name like ? and preferred_last_name = ?', "#{first_name[0]}%", last_name).size > 1
author_identities_not_unique = author_identities.map do |author_identity|
(!author_identity.institution.blank? && !author_identity.institution.include?('Stanford')) || self.class.where('preferred_first_name like ? and preferred_last_name = ? and id != ?', "#{author_identity.first_name[0]}%", author_identity.last_name, author_identity.author_id).size > 1
end
(first_initial_not_unique || author_identities_not_unique.include?(true))
end

# @return [Array<Integer>] ScienceWireIds for approved publications
Expand Down
20 changes: 20 additions & 0 deletions spec/factories/author.rb
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,26 @@
end
end

factory :author_duped_last_name, parent: :author do
sunetid { FactoryBot.generate(:random_id) }
cap_profile_id { FactoryBot.generate(:random_id) }
university_id { FactoryBot.generate(:random_id) }
california_physician_license { FactoryBot.generate(:random_string) }
active_in_cap { true }
email { '[email protected]' }
official_first_name { 'Albert' }
official_last_name { 'Edler' }
official_middle_name { '' }
preferred_first_name { 'Albert' }
preferred_last_name { 'Edler' }
preferred_middle_name { '' }
emails_for_harvest { '[email protected]' }
end

factory :inactive_author, parent: :author do
active_in_cap { false }
end

factory :author_with_alternate_identities, parent: :author do
transient do
alt_count { 1 } # default number of alternate identities to create
Expand Down
4 changes: 2 additions & 2 deletions spec/lib/web_of_science/query_author_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,8 @@

it 'ignores the bad alternate identity data' do
expect(author_one_identity.author_identities.first.first_name).to eq '.' # bad first name
# we get three name variants out (we would have more if we allowed the bad name variant)
expect(described_class.new(author_one_identity).send(:names)).to eq %w[Edler,Alice Edler,Alice,Jim Edler,Alice,J]
# we do not get the name variant with the period for a first name (we would have more if we allowed the bad name variant)
expect(described_class.new(author_one_identity).send(:names)).to eq %w[Edler,Alice Edler,A Edler,Alice,Jim Edler,Alice,J Edler,AJ Edler,A,J]
end
end
end
Expand Down
24 changes: 18 additions & 6 deletions spec/models/author_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,28 @@
end

describe '#ambiguous_first_initial?' do
it 'confirms ambiguous first initial' do
subject.update_from_cap_authorship_profile_hash(auth_hash)
expect(subject.author_identities.size).to eq(2) # has alternate identities
expect(subject.ambiguous_first_initial?).to eq(true) # thus cannot search with first initial
end
it 'confirms non-ambiguous first initial' do
it 'confirms non-ambiguous first initial within stanford with no alternate identities' do
odd_name = create :odd_name
expect(odd_name.author_identities.size).to eq(0) # has no alternate identities
expect(odd_name.ambiguous_first_initial?).to eq(false) # and no other odd names likes this at stanford, so ok to search with first initial
end
it 'confirms non-ambiguous first initial within stanford with stanford only alternate identities' do
subject.update_from_cap_authorship_profile_hash(auth_hash)
expect(subject.author_identities.size).to eq(2) # has alternate identities
expect(subject.ambiguous_first_initial?).to eq(false) # ok, because all of the alternate identities are stanford or no institution, and no other first name ambiguity
end
it 'confirms ambiguous first initial within stanford with no alternate identities' do
create :author_duped_last_name
expect(subject.author_identities.size).to eq(0) # no alternate identities
expect(subject.ambiguous_first_initial?).to eq(true) # yes, because we now have another stanford author with the same last name and same first initial
end
it 'confirms ambiguous first initial even when non ambiguous within Stanford due to a non-Stanford alternate identity existing' do
author_with_alternate_identities = create :author_with_alternate_identities
expect(author_with_alternate_identities.author_identities.size).to eq(1) # alternate identities for primary author
expect(author_with_alternate_identities.author_identities.first.institution).not_to be blank? # alternate institution is not empty
expect(author_with_alternate_identities.author_identities.first.institution.include?('Stanford')).to be false # alternate institution is not Stanford
expect(author_with_alternate_identities.ambiguous_first_initial?).to eq(true) # yes, because even though there are no other stanford authors with similar names, they have a non-Stanford alternate identity
end
end

describe '#first_name' do
Expand Down

0 comments on commit 2580879

Please sign in to comment.