diff --git a/iis-wf/iis-wf-affmatching/src/main/java/eu/dnetlib/iis/wf/affmatching/match/FirstWordsHashBucketMatcherFactory.java b/iis-wf/iis-wf-affmatching/src/main/java/eu/dnetlib/iis/wf/affmatching/match/FirstWordsHashBucketMatcherFactory.java index b89ed49cf..032beb5c0 100644 --- a/iis-wf/iis-wf-affmatching/src/main/java/eu/dnetlib/iis/wf/affmatching/match/FirstWordsHashBucketMatcherFactory.java +++ b/iis-wf/iis-wf-affmatching/src/main/java/eu/dnetlib/iis/wf/affmatching/match/FirstWordsHashBucketMatcherFactory.java @@ -79,9 +79,9 @@ public static ImmutableList createNameFirstWordsHashBucketMatc return ImmutableList.of( createNameCountryStrictMatchVoter(0.981f, new GetOrgNameFunction()), createNameStrictCountryLooseMatchVoter(0.966f, new GetOrgNameFunction()), - createSectionedNameStrictCountryLooseMatchVoter(0.917f, new GetOrgNameFunction()), - createSectionedNameLevenshteinCountryLooseMatchVoter(0.910f, new GetOrgNameFunction()), - createSectionedNameStrictCountryLooseMatchVoter(0.882f, new GetOrgShortNameFunction()), + createSectionedNameStrictCountryLooseMatchVoter(0.815f, new GetOrgNameFunction()), + createSectionedNameLevenshteinCountryLooseMatchVoter(0.816f, new GetOrgNameFunction()), + createSectionedNameStrictCountryLooseMatchVoter(0.682f, new GetOrgShortNameFunction()), commonAffOrgNameWordsVoter); } diff --git a/iis-wf/iis-wf-affmatching/src/main/java/eu/dnetlib/iis/wf/affmatching/match/MainSectionHashBucketMatcherFactory.java b/iis-wf/iis-wf-affmatching/src/main/java/eu/dnetlib/iis/wf/affmatching/match/MainSectionHashBucketMatcherFactory.java index 0b0c095c9..950720831 100644 --- a/iis-wf/iis-wf-affmatching/src/main/java/eu/dnetlib/iis/wf/affmatching/match/MainSectionHashBucketMatcherFactory.java +++ b/iis-wf/iis-wf-affmatching/src/main/java/eu/dnetlib/iis/wf/affmatching/match/MainSectionHashBucketMatcherFactory.java @@ -114,8 +114,8 @@ public static ImmutableList createAlternativeNameMainSectionHa return ImmutableList.of( createNameCountryStrictMatchVoter(1f, new GetOrgAlternativeNamesFunction()), createNameStrictCountryLooseMatchVoter(1f, new GetOrgAlternativeNamesFunction()), - createSectionedNameStrictCountryLooseMatchVoter(1f, new GetOrgAlternativeNamesFunction()), - createSectionedNameLevenshteinCountryLooseMatchVoter(1f, new GetOrgAlternativeNamesFunction()) + createSectionedNameStrictCountryLooseMatchVoter(0.987f, new GetOrgAlternativeNamesFunction()), + createSectionedNameLevenshteinCountryLooseMatchVoter(0.988f, new GetOrgAlternativeNamesFunction()) ); } diff --git a/iis-wf/iis-wf-affmatching/src/test/resources/experimentalData/input/all_organizations.json b/iis-wf/iis-wf-affmatching/src/test/resources/experimentalData/input/all_organizations.json index 4da2ff9cc..7ec8e4fee 100644 --- a/iis-wf/iis-wf-affmatching/src/test/resources/experimentalData/input/all_organizations.json +++ b/iis-wf/iis-wf-affmatching/src/test/resources/experimentalData/input/all_organizations.json @@ -61086,4 +61086,19 @@ {"id":"20|dedup_wf_001::fd702fb85b83582332dfdc6c8cdeff18","name":"Norsk LandbruksrÄdgiving","shortName":"NLR","countryName":"Norway","countryCode":"NO","websiteUrl":"http://www.nlr.no/"} {"id":"20|dedup_wf_001::fa02c455756943e58e7fb08e5a7576d9","name":"National Aerospace Laboratory","shortName":"NLR","countryName":"Netherlands","countryCode":"NL","websiteUrl":"http://www.nlr.nl/"} {"id":"20|dedup_wf_001::8541f5855b79a9a508bab2d16d3c9a5e","name":"North Little Rock Health Department","shortName":"NLR","countryName":"United States","countryCode":"US","websiteUrl":"http://nlr.ar.gov/"} -{"id":"20|dedup_wf_001::351db7072f44f66a3d8ee47b88dac2ee","name":"Netherlands Leprosy Relief","shortName":"NLR","countryName":"Netherlands","countryCode":"NL","websiteUrl":"http://leprosyrelief.org/"} \ No newline at end of file +{"id":"20|dedup_wf_001::351db7072f44f66a3d8ee47b88dac2ee","name":"Netherlands Leprosy Relief","shortName":"NLR","countryName":"Netherlands","countryCode":"NL","websiteUrl":"http://leprosyrelief.org/"} + +{"id":"20|openorgs____::f5f8a6f2df34e11a00157845bdba14f1","name":"Department of Education","shortName":"Department of Education","countryName":"Philippines","countryCode":"PH","websiteUrl":"http://www.deped.gov.ph/"} +{"id":"20|pending_org_::74114f0833bfb23f09d6fcd263b77d5a","name":"Department of Education","shortName":"TAFE","countryName":"Australia","countryCode":"AU","websiteUrl":"https://www.education.gov.au"} +{"id":"20|openorgs____::22c14af7928c73f46f00abe2f33a5d84","name":"Department of Education","shortName":"Department of Education","countryName":"Ireland","countryCode":"IE","websiteUrl":null} +{"id":"20|ror_________::dfde3ffe0c274d69dd15506b8ab26545","name":"Department of Education","shortName":"DENI","countryName":"United Kingdom","countryCode":"GB","websiteUrl":"https://www.deni.gov.uk/"} +{"id":"20|pending_org_::ae2099bec3e47f7567554a0148083543","name":"DEPARTMENT OF EDUCATION","shortName":"DEPARTMENT OF EDUCATION","countryName":"UNKNOWN","countryCode":"UNKNOWN","websiteUrl":null} +{"id":"20|openorgs____::9d7fa3f561ff71105cbd355db80771ce","name":"Department of Education","shortName":"DENI","countryName":"United Kingdom","countryCode":"GB","websiteUrl":"https://www.deni.gov.uk/"} +{"id":"20|pending_org_::bcc20762d34a86a99848c6168100f784","name":"Department of Education","shortName":"Department of Education","countryName":"UNKNOWN","countryCode":"UNKNOWN","websiteUrl":null} +{"id":"20|openorgs____::535ab088eeda944e72c195b359142ad9","name":"Department of Education","shortName":"Department of Education","countryName":"Spain","countryCode":"ES","websiteUrl":"http://www.hezkuntza.ejgv.euskadi.eus/r43-2722/en"} + + + + + + diff --git a/iis-wf/iis-wf-affmatching/src/test/resources/experimentalData/input/docs_with_aff_real_data.json b/iis-wf/iis-wf-affmatching/src/test/resources/experimentalData/input/docs_with_aff_real_data.json index 5d1db9607..ac269e18d 100644 --- a/iis-wf/iis-wf-affmatching/src/test/resources/experimentalData/input/docs_with_aff_real_data.json +++ b/iis-wf/iis-wf-affmatching/src/test/resources/experimentalData/input/docs_with_aff_real_data.json @@ -159,4 +159,6 @@ {"id":"50|od_______908::77e4ff0ae0a493b0a64acbdead7dc5d8","affiliations":[{"organization":"Center of Excellence in Biotechnology Research, King Saud University","countryName":"Saudi Arabia","countryCode":"SA","address":"Riyadh 11451, Kingdom of","rawText":"Center of Excellence in Biotechnology Research, King Saud University, Riyadh 11451, Kingdom of Saudi Arabia"},{"organization":"Biomarkers Research Program, Biochemistry Department, College of Science, King Saud University","countryName":"Saudi Arabia","countryCode":"SA","address":"PO Box, 2455, Riyadh 11451, Kingdom of","rawText":"Biomarkers Research Program, Biochemistry Department, College of Science, King Saud University, PO Box, 2455, Riyadh 11451, Kingdom of Saudi Arabia"},{"organization":"Prince Mutaib Chair for Biomarkers of Osteoporosis, Biochemistry Department, College of Science, King Saud University","countryName":"Saudi Arabia","countryCode":"SA","address":"Riyadh 11451, Kingdom of","rawText":"Prince Mutaib Chair for Biomarkers of Osteoporosis, Biochemistry Department, College of Science, King Saud University, Riyadh 11451, Kingdom of Saudi Arabia"},{"organization":"Clinical Pharmacy Department, College of Pharmacy, King Saud University","countryName":"Saudi Arabia","countryCode":"SA","address":"Riyadh 11451, Kingdom of","rawText":"Clinical Pharmacy Department, College of Pharmacy, King Saud University, Riyadh 11451, Kingdom of Saudi Arabia"},{"organization":"College of Food Science & Agriculture, Department of Food Science & Nutrition, King Saud University","countryName":"Saudi Arabia","countryCode":"SA","address":"Riyadh, Kingdom of","rawText":"College of Food Science & Agriculture, Department of Food Science & Nutrition, King Saud University, Riyadh, Kingdom of Saudi Arabia"},{"organization":"Department of Health Administration, King Saud University","countryName":"Saudi Arabia","countryCode":"SA","address":"Riyadh, Kingdom of","rawText":"Department of Health Administration, King Saud University, Riyadh, Kingdom of Saudi Arabia"},{"organization":"Health Affairs for Riyadh Region, Ministry of Health","countryName":"Saudi Arabia","countryCode":"SA","address":"Riyadh, Kingdom of","rawText":"Health Affairs for Riyadh Region, Ministry of Health, Riyadh, Kingdom of Saudi Arabia"},{"organization":"Department of Clinical Lab Sciences, College of Applied Medical Sciences, King Saud University","countryName":"Saudi Arabia","countryCode":"SA","address":"Riyadh 11451, Kingdom of","rawText":"Department of Clinical Lab Sciences, College of Applied Medical Sciences, King Saud University, Riyadh 11451, Kingdom of Saudi Arabia"}],"title": null,"abstract": null,"language": null,"keywords": null,"externalIdentifiers": null,"journal": null,"year": null,"publisher": null,"references": null,"authors": null,"volume": null,"issue": null,"pages": null,"publicationTypeName": null,"text": ""} {"id":"50|od_______908::77e52f5d2ecf1ad4e1fcd5d2ab6b79c2","affiliations":[{"organization":"INRA","countryName":"France","countryCode":"FR","address":null,"rawText":"INRA, France"},{"organization":"Northeastern University","countryName":"United States of America","countryCode":"US","address":null,"rawText":"Northeastern University, United States of America"}],"title": null,"abstract": null,"language": null,"keywords": null,"externalIdentifiers": null,"journal": null,"year": null,"publisher": null,"references": null,"authors": null,"volume": null,"issue": null,"pages": null,"publicationTypeName": null,"text": ""} {"id":"50|dedup_wf_001::6e9c8dd0ff2c33c49a207b43a3bdcae6","affiliations":[{"organization":"Department of Medical Oncology, The s econd a ffiliated h ospital of Zhejiang University s chool of Medicine","countryName":null,"countryCode":null,"address":"h angzhou, Zhejiang Province, People's republic of china","rawText":"Department of Medical Oncology, The s econd a ffiliated h ospital of Zhejiang University s chool of Medicine, h angzhou, Zhejiang Province, People's republic of china"},{"organization":"cancer institute, Key laboratory of cancer Prevention and intervention, chinese n ational Ministry of education","countryName":null,"countryCode":null,"address":"h angzhou, Zhejiang Province, People's republic of china","rawText":"cancer institute, Key laboratory of cancer Prevention and intervention, chinese n ational Ministry of education, h angzhou, Zhejiang Province, People's republic of china"},{"organization":"nlr","countryName":null,"countryCode":null,"address":"Mlr, WBc, neutrophil, monocyte","rawText":"nlr, Mlr, WBc, neutrophil, monocyte"}],"title":null,"abstract":null,"language":null,"keywords":null,"externalIdentifiers":null,"journal":null,"year":null,"publisher":null,"references":null,"authors":null,"volume":null,"issue":null,"pages":null,"publicationTypeName":null,"text":""} -{"id":"50|dedup_wf_001::42330b7e2ea76d9fff4c9556fcb57f88","affiliations":[{"organization":"Institute of Physics, M. Curie-Sklodowska University","countryName":null,"countryCode":null,"address":"Radziszewskiego 10, Pl 20-031 Lublin","rawText":"Institute of Physics, M. Curie-Sklodowska University, Radziszewskiego 10, Pl 20-031 Lublin"}],"title":null,"abstract":null,"language":null,"keywords":null,"externalIdentifiers":null,"journal":null,"year":null,"publisher":null,"references":null,"authors":null,"volume":null,"issue":null,"pages":null,"publicationTypeName":null,"text":""} \ No newline at end of file +{"id":"50|dedup_wf_001::42330b7e2ea76d9fff4c9556fcb57f88","affiliations":[{"organization":"Institute of Physics, M. Curie-Sklodowska University","countryName":null,"countryCode":null,"address":"Radziszewskiego 10, Pl 20-031 Lublin","rawText":"Institute of Physics, M. Curie-Sklodowska University, Radziszewskiego 10, Pl 20-031 Lublin"}],"title":null,"abstract":null,"language":null,"keywords":null,"externalIdentifiers":null,"journal":null,"year":null,"publisher":null,"references":null,"authors":null,"volume":null,"issue":null,"pages":null,"publicationTypeName":null,"text":""} + +{"id":"50|doi_dedup___::f15c0d796f8825294e2dacb172bb87f5","affiliations":[{"organization":"Department of Education, University of Catania","countryName":null,"countryCode":null,"address":"I-95124 Catania, Italy; g.chisari@tin.it","rawText":"Department of Education, University of Catania, I-95124 Catania, Italy; g.chisari@tin.it"}],"title":null,"abstract":null,"language":null,"keywords":null,"externalIdentifiers":null,"journal":null,"year":null,"publisher":null,"references":null,"authors":null,"volume":null,"issue":null,"pages":null,"publicationTypeName":null,"text":""}