Skip to content

Commit

Permalink
experiments in only using first initial when not ambiguous
Browse files Browse the repository at this point in the history
  • Loading branch information
peetucket committed Apr 24, 2018
1 parent c86208b commit 2705bc5
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 12 deletions.
7 changes: 7 additions & 0 deletions app/models/author.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,13 @@ def institution
Settings.HARVESTER.INSTITUTION.name
end

# indicates if the LastName, FirstInitial form for this user is ambiguous within our author database and there are no alternate identities
def ambiguous_first_initial?
return true unless first_name && last_name
first_initial_not_unique = self.class.where('preferred_first_name like ? and preferred_last_name = ?', "#{first_name[0]}%", last_name).where(active_in_cap: true, cap_import_enabled: true).size > 1
(first_initial_not_unique || !author_identities.empty?)
end

# @return [Array<Integer>] ScienceWireIds for approved publications
def approved_sciencewire_ids
publications.where("contributions.status = 'approved'")
Expand Down
14 changes: 7 additions & 7 deletions lib/agent/author_name.rb
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,10 @@ def text_search_query
text_search_terms.map { |x| "\"#{x}\"" }.join(' or ')
end

def text_search_terms
def text_search_terms(options = {})
use_first_initial = options[:use_first_initial] || true
@text_search_terms ||=
[first_name_query, middle_name_query].flatten.reject(&:empty?).uniq
[first_name_query(use_first_initial), middle_name_query].flatten.reject(&:empty?).uniq
end

def ==(other)
Expand All @@ -68,12 +69,11 @@ def ==(other)
# 'Lastname,Firstname' or
# 'Lastname,FirstInitial'
# @return [Array<String>|String] names
def first_name_query
def first_name_query(use_first_initial)
return '' if last.empty? && first.empty?
[
"#{last_name},#{first_name}",
"#{last_name},#{first_initial}"
]
query = ["#{last_name},#{first_name}"]
query << "#{last_name},#{first_initial}" if use_first_initial
query
end

# Name variants for:
Expand Down
2 changes: 1 addition & 1 deletion lib/web_of_science/query_author.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def names
ident.last_name,
ident.first_name,
Settings.HARVESTER.USE_MIDDLE_NAME ? ident.middle_name : ''
).text_search_terms
).text_search_terms(use_first_initial: !author.ambiguous_first_initial?)
end.flatten.uniq
end

Expand Down
13 changes: 13 additions & 0 deletions spec/factories/author.rb
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,19 @@
end
end

factory :odd_name, parent: :author do
active_in_cap true
cap_import_enabled true
official_first_name 'Somebody'
official_last_name 'WithReallyUnusualName'
official_middle_name ''
preferred_first_name 'Somebody'
preferred_last_name 'WithReallyUnusualName'
preferred_middle_name ''
email '[email protected]'
emails_for_harvest '[email protected]'
end

# Public data from
# - https://stanfordwho.stanford.edu
# - https://med.stanford.edu/profiles/russ-altman
Expand Down
31 changes: 27 additions & 4 deletions spec/lib/agent/author_name_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -117,18 +117,18 @@

describe '#text_search_terms' do
it 'includes first_name_query and middle_name_query elements' do
fnames = all_names.send(:first_name_query)
fnames = all_names.send(:first_name_query, true)
mnames = all_names.send(:middle_name_query)
expect(all_names.text_search_terms).to include(*fnames, *mnames)
end
end

describe '#first_name_query' do
it 'when no names are present returns an empty String' do
expect(no_names.send(:first_name_query)).to eq ''
expect(no_names.send(:first_name_query, true)).to eq ''
end
context 'when all names are present' do
let(:fn_query) { all_names.send(:first_name_query) }
context 'when all names are present with middle initial' do
let(:fn_query) { all_names.send(:first_name_query, true) }
it 'is Array<String> with non-empty unique values' do
expect(fn_query).to be_an Array
expect(fn_query).to all(be_a(String))
Expand All @@ -150,6 +150,29 @@
expect(fn_query).to all(exclude(",#{all_names.middle_initial}"))
end
end
context 'when all names are present without middle initial' do
let(:fn_query) { all_names.send(:first_name_query, false) }
it 'is Array<String> with non-empty unique values' do
expect(fn_query).to be_an Array
expect(fn_query).to all(be_a(String))
expect(fn_query).not_to include(be_empty)
expect(fn_query.size).to eq(fn_query.uniq.size)
end
it 'includes name with first_name' do
expect(fn_query).to include "#{all_names.last_name},#{all_names.first_name}"
end
it 'does not include name with first_initial' do
expect(fn_query).not_to include "#{all_names.last_name},#{all_names.first_initial}"
end
it 'does not include name with middle_name' do
expect(fn_query).not_to include "#{all_names.last_name},#{all_names.first_name},#{all_names.middle_name}"
expect(fn_query).to all(exclude(",#{all_names.middle_name}"))
end
it 'does not include name with middle_initial' do
expect(fn_query).not_to include "#{all_names.last_name},#{all_names.first_name},#{all_names.middle_initial}"
expect(fn_query).to all(exclude(",#{all_names.middle_initial}"))
end
end
end

describe '#middle_name_query' do
Expand Down
13 changes: 13 additions & 0 deletions spec/models/author_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,19 @@
end
end

describe '#ambiguous_first_initial?' do
it 'confirms ambiguous first initial' do
subject.update_from_cap_authorship_profile_hash(auth_hash)
expect(subject.author_identities.size).to eq(2) # has alternate identities
expect(subject.ambiguous_first_initial?).to eq(true) # thus cannot search with first initial
end
it 'confirms non-ambiguous first initial' do
odd_name = create :odd_name
expect(odd_name.author_identities.size).to eq(0) # has no alternate identities
expect(odd_name.ambiguous_first_initial?).to eq(false) # and no other odd names likes this at stanford, so ok to search with first initial
end
end

describe '#first_name' do
it 'is the preferred_first_name' do
subject.update_from_cap_authorship_profile_hash(auth_hash)
Expand Down

0 comments on commit 2705bc5

Please sign in to comment.