Skip to content

Commit

Permalink
expanded readme
Browse files Browse the repository at this point in the history
  • Loading branch information
thieled committed Feb 12, 2024
1 parent 57b99b0 commit 4d7f7bd
Show file tree
Hide file tree
Showing 2 changed files with 138 additions and 96 deletions.
51 changes: 38 additions & 13 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -41,38 +41,63 @@ library(meteoR)
countries <- call_meteor(method = "get",
ressource = "query",
type = "Country",
format = "dataframe",
n_max = Inf
format = "dataframe"
)
channel <- call_meteor(method = "get",
ressource = "query",
type = "Channel",
format = "dataframe"
)
channel_website <- dplyr::filter(channel, `_unique_name` == "website")
countries_selection <- countries |> dplyr::filter(name %in% c("Austria",
"Germany"))
countries_sel <- countries |> dplyr::filter(name %in% c("Austria",
"Germany",
"France",
"Italy"))
# Query the API for news sources from these countries
news_sources <- call_meteor(method = "get",
ressource = "query",
type = "NewsSource",
countries = countries_sel$uid,
geographic_scope = c("national", "multinational"),
countries = countries_selection$uid,
channel = channel_website$uid,
geographic_scope = c("national"),
format = "dataframe",
publication_kind = c("newspaper", "magazine"),
n_max = 50
n_max = 10, n = 10
)
dplyr::glimpse(news_sources)
# Get more insights
view_df <- view_uid(uid = head(news_sources)$uid,
view_df <- view_uid(uid = news_sources$uid,
format = "dataframe",
unnest_cutoff = 1)
dplyr::glimpse(view_df)
# Get follower count from external API call:
call_ws <- function(website
){
r <- call_meteor(method = "post",
ressource = "external",
option = "website",
website = website,
format = "raw"
)
return(r)
}
# Apply
res <- pbapply::pblapply(head(view_df, 3)$identifier, FUN = call_ws)
res <- fleece::rectangularize(res)
dplyr::glimpse(res)
```


183 changes: 100 additions & 83 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,110 +32,127 @@ library(meteoR)
countries <- call_meteor(method = "get",
ressource = "query",
type = "Country",
format = "dataframe",
n_max = Inf
format = "dataframe"
)
#> 50 obs. + 50 obs. + 50 obs. + 50 obs. + 50 obs. + 2 obs.

channel <- call_meteor(method = "get",
ressource = "query",
type = "Channel",
format = "dataframe"
)
#> 22 obs.

channel_website <- dplyr::filter(channel, `_unique_name` == "website")

countries_selection <- countries |> dplyr::filter(name %in% c("Austria",
"Germany"))

countries_sel <- countries |> dplyr::filter(name %in% c("Austria",
"Germany",
"France",
"Italy"))
# Query the API for news sources from these countries
news_sources <- call_meteor(method = "get",
ressource = "query",
type = "NewsSource",
countries = countries_sel$uid,
geographic_scope = c("national", "multinational"),
countries = countries_selection$uid,
channel = channel_website$uid,
geographic_scope = c("national"),
format = "dataframe",
publication_kind = c("newspaper", "magazine"),
n_max = 50
n_max = 10, n = 10
)
#> 50 obs.


dplyr::glimpse(news_sources)
#> Rows: 50
#> Columns: 20
#> $ `_unique_name` <chr> "newssource_it_20minuti_print", "newssource_f…
#> $ geographic_scope <chr> "national", "national", "national", "national…
#> $ name <chr> "20 Minuti", "Alternatives Économiques", "Alt…
#> $ uid <chr> "0x249fc", "0x22321", "0x22323", "0x4201f", "…
#> $ wikidata_id <chr> "Q7245532", "Q2840427", NA, "Q860331", NA, NA…
#> $ dgraph.type <list> "NewsSource", "NewsSource", "NewsSource", "N…
#> $ alternate_names_1 <chr> "20minutes.fr", "Alter éco", "Alternatives éc…
#> $ alternate_names_2 <chr> "vingt minutes", "Alternatives economiques", …
#> $ alternate_names_3 <chr> NA, "Alternatives Economiques", NA, NA, NA, N…
#> $ channel__unique_name <chr> "print", "print", "facebook", "print", "print…
#> $ channel_name <chr> "Print", "Print", "Facebook", "Print", "Print…
#> $ channel_uid <chr> "0x11", "0x11", "0xd", "0x11", "0x11", "0x11"…
#> $ publication_kind_1 <chr> "newspaper", "magazine", "magazine", "magazin…
#> $ publication_kind_2 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
#> $ countries__unique_name_1 <chr> "country_italy", "country_france", "country_f…
#> $ countries__unique_name_2 <chr> NA, NA, NA, "country_switzerland", NA, NA, NA…
#> $ countries_name_1 <chr> "Italy", "France", "France", "Germany", "Germ…
#> $ countries_name_2 <chr> NA, NA, NA, "Switzerland", NA, NA, NA, NA, NA…
#> $ countries_uid_1 <chr> "0x2d", "0x2f", "0x2f", "0x1b", "0x1b", "0x1b…
#> $ countries_uid_2 <chr> NA, NA, NA, "0x32", NA, NA, NA, NA, NA, "0x32…
#> 10 obs.


# Get more insights
view_df <- view_uid(uid = head(news_sources)$uid,
view_df <- view_uid(uid = news_sources$uid,
format = "dataframe",
unnest_cutoff = 1)
#> 26 obs. 31 obs. 23 obs. 26 obs. 28 obs. 27 obs.
#> 32 obs. 30 obs. 29 obs. 30 obs. 30 obs. 27 obs. 29 obs. 23 obs. 23 obs. 30 obs.


dplyr::glimpse(view_df)
#> Rows: 6
#> Columns: 49
#> $ `_date_created` <chr> "2022-06-30T14:44:00.682505+…
#> $ `_unique_name` <chr> "newssource_it_20minuti_prin…
#> $ contains_ads <chr> "yes", "yes", "NA", "yes", "…
#> $ date_founded <chr> "2011-01-01T00:00:00+00:00",…
#> $ defunct <lgl> FALSE, FALSE, FALSE, TRUE, N…
#> Rows: 10
#> Columns: 48
#> $ `_date_created` <chr> "2023-02-08T14:16:02.771084+…
#> $ `_unique_name` <chr> "newssource_de_bilanzde_webs…
#> $ audience_size_recent <int> 3033, 11875, 4096, 42832, 16…
#> $ `audience_size_recent|timestamp` <chr> "2023-02-08T00:00:00+00:00",…
#> $ `audience_size_recent|unit` <chr> "daily visitors", "daily vis…
#> $ contains_ads <chr> "yes", "yes", "no", "yes", "…
#> $ defunct <lgl> TRUE, FALSE, NA, NA, FALSE, …
#> $ entry_review_status <chr> "accepted", "accepted", "acc…
#> $ geographic_scope <chr> "national", "national", "nat…
#> $ name <chr> "20 Minuti", "Alternatives É
#> $ party_affiliated <chr> "NA", "NA", "NA", "NA", NA, …
#> $ payment_model <chr> "not free", "not free", NA,
#> $ publication_cycle <chr> "daily", "monthly", "continu
#> $ special_interest <lgl> FALSE, FALSE, FALSE, TRUE, F
#> $ uid <chr> "0x249fc", "0x22321", "0x223
#> $ wikidata_id <chr> "Q7245532", "Q2840427", NA,
#> $ audience_size_recent <int> NA, 74057, NA, NA, 1161910,
#> $ `audience_size_recent|timestamp` <chr> NA, "2020-01-01T00:00:00+00:
#> $ `audience_size_recent|unit` <chr> NA, "subscribers", NA, NA, "…
#> $ identifier <chr> NA, NA, "AlternativesEconomi
#> $ verified_account <lgl> NA, NA, NA, FALSE, NA, NA
#> $ audience_size <list> "2022-06-30T00:00:00+00:00",
#> $ publication_kind <list> "newspaper", "magazine", "m
#> $ alternate_names <list> <NULL>, <NULL>, "Alternativ
#> $ topical_focus <list> <NULL>, <NULL>, <NULL>, "ec
#> $ `_edited_by__edited_by|timestamp` <chr> "2022-12-19T11:35:52.197084
#> $ `_edited_by_display_name` <chr> "Celina Dinhopl", "Celina Di
#> $ `_edited_by_uid` <chr> "0x4e57", "0x4e57", "0x4e57"
#> $ countries__unique_name <chr> "country_italy", "country_fr
#> $ identifier <chr> "https://bilanz.de/", "https
#> $ name <chr> "bilanz.de", "die-bank.de", …
#> $ party_affiliated <chr> "NA", NA, "no", "NA", "yes",
#> $ payment_model <chr> "NA", "partly free", "partly
#> $ publication_cycle <chr> "NA", "continuous", "continu
#> $ special_interest <lgl> TRUE, TRUE, FALSE, FALSE, FA
#> $ uid <chr> "0x42020", "0x445c6", "0x187
#> $ verified_account <lgl> FALSE, FALSE, NA, NA, NA, NA
#> $ date_founded <chr> NA, NA, NA, "1996-01-01T00:0
#> $ audience_size <list> "2023-02-08T00:00:00+00:00"…
#> $ publication_kind <list> "magazine", "magazine", <NU
#> $ topical_focus <list> "economy", "economy", <NULL…
#> $ alternate_names <list> <NULL>, <NULL>, <NULL>, <NU
#> $ `_edited_by__edited_by|timestamp` <chr> "2023-02-08T14:16:15.015135+
#> $ `_edited_by_display_name` <chr> "Paul Balluff", "Paul Balluf
#> $ `_edited_by_uid` <chr> "0x2711", "0x2711", "0x2711"
#> $ `audience_size|count_0` <int> 3033, 11875, 4096, 42832, 16
#> $ `audience_size|data_from_0` <chr> "https://siterankdata.com/bi
#> $ `audience_size|unit_0` <chr> "daily visitors", "daily vis
#> $ countries__unique_name <chr> "country_germany", "country_
#> $ countries_entry_review_status <chr> "accepted", "accepted", "acc…
#> $ countries_name <chr> "Italy", "France", "France",
#> $ countries_uid <chr> "0x2d", "0x2f", "0x2f", NA,
#> $ languages__unique_name <chr> "language_italian", "languag
#> $ countries_name <chr> "Germany", "Germany", "Germa
#> $ countries_uid <chr> "0x1b", "0x1b", "0x1b", "0x1
#> $ languages__unique_name <chr> "language_german", "language
#> $ languages_entry_review_status <chr> "accepted", "accepted", "acc…
#> $ languages_name <chr> "Italian", "French", "French…
#> $ languages_uid <chr> "0x37e2ed", "0x37e2bc", "0x3…
#> $ `audience_size|data_from_0` <chr> NA, "https://en.wikipedia.or…
#> $ `audience_size|unit_0` <chr> NA, "subscribers", NA, NA, "…
#> $ related_news_sources__unique_name <chr> NA, NA, NA, "newssource_de_b…
#> $ related_news_sources_entry_review_status <chr> NA, NA, NA, "accepted", NA, …
#> $ related_news_sources_name <chr> NA, NA, NA, "bilanz.de", NA,…
#> $ related_news_sources_uid <chr> NA, NA, NA, "0x42020", NA, NA
#> $ `_edited_by_dgraph.type` <list> ["User"], ["User"], ["User"]…
#> $ countries_dgraph.type <list> ["Entry", "Country"], ["Entr…
#> $ languages_name <chr> "German", "German", "German"…
#> $ languages_uid <chr> "0x37e338", "0x37e338", "0x3…
#> $ related_news_sources__unique_name <chr> "newssource_de_bilanz_print"…
#> $ related_news_sources_entry_review_status <chr> "accepted", NA, NA, NA, NA, …
#> $ related_news_sources_name <chr> "Bilanz", NA, NA, NA, NA, NA…
#> $ related_news_sources_uid <chr> "0x4201f", NA, NA, NA, NA, N…
#> $ `_edited_by_dgraph.type` <list> ["User"], ["User"], ["User"…
#> $ countries_dgraph.type <list> ["Entry", "Country"], ["Ent…
#> $ languages_dgraph.type <list> ["Language", "Entry"], ["La…
#> $ `audience_size|count_0` <list> <NULL>, "74057", <NULL>, <N…
#> $ related_news_sources_countries <list<tibble[,5]>> <NULL>, <NULL>, <NULL>, [<t…
#> $ related_news_sources_dgraph.type <list> <NULL>, <NULL>, <NULL>, ["E…
#> $ related_news_sources_channel__unique_name <chr> NA, NA, NA, "website", NA, NA
#> $ related_news_sources_channel_name <chr> NA, NA, NA, "Se…
#> $ related_news_sources_channel_uid <chr> NA, NA, NA, "0x10", NA, NA
#> $ related_news_sources_countries <list<tibble[,5]>> [<tbl_df[2 x 5]…
#> $ related_news_sources_dgraph.type <list> ["Entry", "NewsSource"], <N…
#> $ related_news_sources_channel__unique_name <chr> "print", NA, NA, NA, NA, NA,…
#> $ related_news_sources_channel_name <chr> "Print", NA, NA, NA, NA, NA,…
#> $ related_news_sources_channel_uid <chr> "0x11", NA, NA, NA, NA, NA, …



# Get follower count from external API call:
call_ws <- function(website
){
r <- call_meteor(method = "post",
ressource = "external",
option = "website",
website = website,
format = "raw"
)
return(r)
}

# Apply
res <- pbapply::pblapply(head(view_df, 3)$identifier, FUN = call_ws)

res <- fleece::rectangularize(res)

dplyr::glimpse(res)
#> Rows: 3
#> Columns: 12
#> $ audience_size <chr> "2024-02-12", "2024-02-12", "2024-02-12"
#> $ `audience_size|count` <int> 1896530, 11875, 13338
#> $ `audience_size|data_from` <chr> "https://siterankdata.com/bilanz.de", "https…
#> $ `audience_size|unit` <chr> "daily visitors", "daily visitors", "daily v…
#> $ identifier <chr> "https://www.welt.de/wirtschaft", "https://d…
#> $ name <chr> "bilanz.de", "die-bank.de", "ef-magazin.de"
#> $ alternate_names_1 <chr> "Wirtschaft - News & Aktuelle Nachrichten - …
#> $ alternate_names_2 <chr> "https://www.welt.de/wirtschaft/", NA, "http…
#> $ channel_feeds_1 <chr> NA, NA, "https://ef-magazin.de/feed/atom/"
#> $ channel_feeds_2 <chr> NA, NA, "https://ef-magazin.de/feed/rss/"
#> $ `channel_feeds|kind_0` <chr> NA, NA, "rss"
#> $ `channel_feeds|kind_1` <chr> NA, NA, "rss"
```

0 comments on commit 4d7f7bd

Please sign in to comment.