-
Notifications
You must be signed in to change notification settings - Fork 0
/
05_StrongLeroux.jl
81 lines (64 loc) · 2.74 KB
/
05_StrongLeroux.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# # Step 6 - inflating the predictions with the Newfoundland data
using DelimitedFiles
using DataFrames
using CSV: CSV
using GBIF
using NCBITaxonomy: NCBITaxonomy
using EcologicalNetworks
# We start by reading the Newfoundland food web, and check the names that are
# mammals. The actual code to read the network looks exactly like the one to
# read the European metaweb.
sl_raw = readdlm("data/NLfoodweb.csv", ',')
sl_sp = replace.(sl_raw[1, 2:end], "." => " ")
sl_A = Bool.(sl_raw[2:end, 2:end])
# Because the original data use a mix of scientific and vernacular names, we are
# going to rely on `NCBITaxonomy.jl. synonym matching abilities to first get the
# taxonomic names, and then pass those to GBIF. Please do keep in mind that
# unless the `NCBITAXONOMY_PATH` environmental variable is set, the raw taxonomy
# dump will be stored in the project folder (and this is a rather big file).
scinames = Dict{String,String}()
# Note that we do *not* restrict the name matching to only mammals, as there are
# non-mammal species in the Newfoundland metaweb.
for s in sl_sp
try
t = NCBITaxonomy.taxon(s; strict=false)
scinames[s] = t.name
catch
@info "Newfoundland taxon $(s) unmatched on NCBI"
continue
end
end
# The next step is to get the names from NCBI, and match them to the GBIF
# backbone. We ended up relying on this two-step solution because using the GBIF
# name matching directly missed a handful of species, and the Newfoundland
# dataset is relatively small.
# This loop will go through all nodes in the Newfoundland metaweb, match them at
# the species level, and only return them if they are part of the *Mammalia*
# class. There may be a few info messages about unmatched taxa, which are nodes
# from the original data that are at a higher rank than species.
valnames = Dict{String,String}()
for (s, t) in scinames
gbifmatch = GBIF.taxon(t; strict=false)
if !isnothing(gbifmatch)
if !ismissing(gbifmatch.species)
if gbifmatch.class.first == "Mammalia"
valnames[s] = gbifmatch.species.first
end
end
end
end
# With the two dictionaries, we can get the positions of species from the
# Newfoundland metaweb that are mammals:
idxmatch = findall(x -> x in keys(valnames), sl_sp)
# And we can now assemble the network:
spnames = [valnames[s] for s in sl_sp[idxmatch]]
A = sl_A[idxmatch, idxmatch]'
NL = UnipartiteNetwork(A, spnames)
# We finally save the network as a CSV - note that we do not add interactions
# here, as this will be done as part of the thresholding step, which is the very
# last in the pipeline.
df = DataFrame(; from=String[], to=String[])
for i in interactions(NL)
push!(df, (i.from, i.to))
end
CSV.write("artifacts/newfoundland.csv", df)