expanded readme

thieled · Feb 12, 2024 · 4d7f7bd · 4d7f7bd
1 parent 57b99b0
commit 4d7f7bd
Show file tree

Hide file tree

Showing 2 changed files with 138 additions and 96 deletions.
diff --git a/README.Rmd b/README.Rmd
@@ -41,38 +41,63 @@ library(meteoR)
 countries <- call_meteor(method = "get",
                             ressource = "query",
                             type = "Country",
-                            format = "dataframe",
-                            n_max = Inf
+                            format = "dataframe"
+)
+
+channel <- call_meteor(method = "get",
+                            ressource = "query",
+                            type = "Channel",
+                            format = "dataframe"
 )
 
+channel_website <-  dplyr::filter(channel, `_unique_name` == "website")
+
+countries_selection <- countries |> dplyr::filter(name %in% c("Austria",
+                                                        "Germany"))
 
-countries_sel <- countries |> dplyr::filter(name %in% c("Austria",
-                                                        "Germany",
-                                                        "France",
-                                                        "Italy"))
 # Query the API for news sources from these countries
 news_sources <- call_meteor(method = "get",
                             ressource = "query",
                             type = "NewsSource",
-                            countries = countries_sel$uid,
-                            geographic_scope = c("national", "multinational"),
+                            countries = countries_selection$uid,
+                            channel = channel_website$uid,
+                            geographic_scope = c("national"),
                             format = "dataframe",
                             publication_kind = c("newspaper", "magazine"),
-                            n_max = 50
+                            n_max = 10, n = 10
 )
 
 
-dplyr::glimpse(news_sources)
-
-
 # Get more insights
-view_df <- view_uid(uid = head(news_sources)$uid, 
+view_df <- view_uid(uid = news_sources$uid, 
                     format = "dataframe",
                     unnest_cutoff = 1)
 
+
 dplyr::glimpse(view_df)
 
 
+
+# Get follower count from external API call:
+call_ws <- function(website
+  ){
+    r <- call_meteor(method = "post",
+                     ressource = "external",
+                     option = "website", 
+                     website = website,
+                     format = "raw"
+    )
+    return(r)
+  }
+
+# Apply 
+res <- pbapply::pblapply(head(view_df, 3)$identifier, FUN = call_ws)
+  
+res <- fleece::rectangularize(res)
+  
+dplyr::glimpse(res)
+
+
 ```
 
 
diff --git a/README.md b/README.md
@@ -32,110 +32,127 @@ library(meteoR)
 countries <- call_meteor(method = "get",
                             ressource = "query",
                             type = "Country",
-                            format = "dataframe",
-                            n_max = Inf
+                            format = "dataframe"
 )
 #> 50 obs. + 50 obs. + 50 obs. + 50 obs. + 50 obs. + 2 obs.
 
+channel <- call_meteor(method = "get",
+                            ressource = "query",
+                            type = "Channel",
+                            format = "dataframe"
+)
+#> 22 obs.
+
+channel_website <-  dplyr::filter(channel, `_unique_name` == "website")
+
+countries_selection <- countries |> dplyr::filter(name %in% c("Austria",
+                                                        "Germany"))
 
-countries_sel <- countries |> dplyr::filter(name %in% c("Austria",
-                                                        "Germany",
-                                                        "France",
-                                                        "Italy"))
 # Query the API for news sources from these countries
 news_sources <- call_meteor(method = "get",
                             ressource = "query",
                             type = "NewsSource",
-                            countries = countries_sel$uid,
-                            geographic_scope = c("national", "multinational"),
+                            countries = countries_selection$uid,
+                            channel = channel_website$uid,
+                            geographic_scope = c("national"),
                             format = "dataframe",
                             publication_kind = c("newspaper", "magazine"),
-                            n_max = 50
+                            n_max = 10, n = 10
 )
-#> 50 obs.
-
-
-dplyr::glimpse(news_sources)
-#> Rows: 50
-#> Columns: 20
-#> $ `_unique_name`           <chr> "newssource_it_20minuti_print", "newssource_f…
-#> $ geographic_scope         <chr> "national", "national", "national", "national…
-#> $ name                     <chr> "20 Minuti", "Alternatives Économiques", "Alt…
-#> $ uid                      <chr> "0x249fc", "0x22321", "0x22323", "0x4201f", "…
-#> $ wikidata_id              <chr> "Q7245532", "Q2840427", NA, "Q860331", NA, NA…
-#> $ dgraph.type              <list> "NewsSource", "NewsSource", "NewsSource", "N…
-#> $ alternate_names_1        <chr> "20minutes.fr", "Alter éco", "Alternatives éc…
-#> $ alternate_names_2        <chr> "vingt minutes", "Alternatives economiques", …
-#> $ alternate_names_3        <chr> NA, "Alternatives Economiques", NA, NA, NA, N…
-#> $ channel__unique_name     <chr> "print", "print", "facebook", "print", "print…
-#> $ channel_name             <chr> "Print", "Print", "Facebook", "Print", "Print…
-#> $ channel_uid              <chr> "0x11", "0x11", "0xd", "0x11", "0x11", "0x11"…
-#> $ publication_kind_1       <chr> "newspaper", "magazine", "magazine", "magazin…
-#> $ publication_kind_2       <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
-#> $ countries__unique_name_1 <chr> "country_italy", "country_france", "country_f…
-#> $ countries__unique_name_2 <chr> NA, NA, NA, "country_switzerland", NA, NA, NA…
-#> $ countries_name_1         <chr> "Italy", "France", "France", "Germany", "Germ…
-#> $ countries_name_2         <chr> NA, NA, NA, "Switzerland", NA, NA, NA, NA, NA…
-#> $ countries_uid_1          <chr> "0x2d", "0x2f", "0x2f", "0x1b", "0x1b", "0x1b…
-#> $ countries_uid_2          <chr> NA, NA, NA, "0x32", NA, NA, NA, NA, NA, "0x32…
+#> 10 obs.
 
 
 # Get more insights
-view_df <- view_uid(uid = head(news_sources)$uid, 
+view_df <- view_uid(uid = news_sources$uid, 
                     format = "dataframe",
                     unnest_cutoff = 1)
-#> 26 obs. 31 obs. 23 obs. 26 obs. 28 obs. 27 obs.
+#> 32 obs. 30 obs. 29 obs. 30 obs. 30 obs. 27 obs. 29 obs. 23 obs. 23 obs. 30 obs.
+
 
 dplyr::glimpse(view_df)
-#> Rows: 6
-#> Columns: 49
-#> $ `_date_created`                           <chr> "2022-06-30T14:44:00.682505+…
-#> $ `_unique_name`                            <chr> "newssource_it_20minuti_prin…
-#> $ contains_ads                              <chr> "yes", "yes", "NA", "yes", "…
-#> $ date_founded                              <chr> "2011-01-01T00:00:00+00:00",…
-#> $ defunct                                   <lgl> FALSE, FALSE, FALSE, TRUE, N…
+#> Rows: 10
+#> Columns: 48
+#> $ `_date_created`                           <chr> "2023-02-08T14:16:02.771084+…
+#> $ `_unique_name`                            <chr> "newssource_de_bilanzde_webs…
+#> $ audience_size_recent                      <int> 3033, 11875, 4096, 42832, 16…
+#> $ `audience_size_recent|timestamp`          <chr> "2023-02-08T00:00:00+00:00",…
+#> $ `audience_size_recent|unit`               <chr> "daily visitors", "daily vis…
+#> $ contains_ads                              <chr> "yes", "yes", "no", "yes", "…
+#> $ defunct                                   <lgl> TRUE, FALSE, NA, NA, FALSE, …
 #> $ entry_review_status                       <chr> "accepted", "accepted", "acc…
 #> $ geographic_scope                          <chr> "national", "national", "nat…
-#> $ name                                      <chr> "20 Minuti", "Alternatives É…
-#> $ party_affiliated                          <chr> "NA", "NA", "NA", "NA", NA, …
-#> $ payment_model                             <chr> "not free", "not free", NA, …
-#> $ publication_cycle                         <chr> "daily", "monthly", "continu…
-#> $ special_interest                          <lgl> FALSE, FALSE, FALSE, TRUE, F…
-#> $ uid                                       <chr> "0x249fc", "0x22321", "0x223…
-#> $ wikidata_id                               <chr> "Q7245532", "Q2840427", NA, …
-#> $ audience_size_recent                      <int> NA, 74057, NA, NA, 1161910, …
-#> $ `audience_size_recent|timestamp`          <chr> NA, "2020-01-01T00:00:00+00:…
-#> $ `audience_size_recent|unit`               <chr> NA, "subscribers", NA, NA, "…
-#> $ identifier                                <chr> NA, NA, "AlternativesEconomi…
-#> $ verified_account                          <lgl> NA, NA, NA, FALSE, NA, NA
-#> $ audience_size                             <list> "2022-06-30T00:00:00+00:00",…
-#> $ publication_kind                          <list> "newspaper", "magazine", "m…
-#> $ alternate_names                           <list> <NULL>, <NULL>, "Alternativ…
-#> $ topical_focus                             <list> <NULL>, <NULL>, <NULL>, "ec…
-#> $ `_edited_by__edited_by|timestamp`         <chr> "2022-12-19T11:35:52.197084…
-#> $ `_edited_by_display_name`                 <chr> "Celina Dinhopl", "Celina Di…
-#> $ `_edited_by_uid`                          <chr> "0x4e57", "0x4e57", "0x4e57"…
-#> $ countries__unique_name                    <chr> "country_italy", "country_fr…
+#> $ identifier                                <chr> "https://bilanz.de/", "https…
+#> $ name                                      <chr> "bilanz.de", "die-bank.de", …
+#> $ party_affiliated                          <chr> "NA", NA, "no", "NA", "yes",…
+#> $ payment_model                             <chr> "NA", "partly free", "partly…
+#> $ publication_cycle                         <chr> "NA", "continuous", "continu…
+#> $ special_interest                          <lgl> TRUE, TRUE, FALSE, FALSE, FA…
+#> $ uid                                       <chr> "0x42020", "0x445c6", "0x187…
+#> $ verified_account                          <lgl> FALSE, FALSE, NA, NA, NA, NA…
+#> $ date_founded                              <chr> NA, NA, NA, "1996-01-01T00:0…
+#> $ audience_size                             <list> "2023-02-08T00:00:00+00:00"…
+#> $ publication_kind                          <list> "magazine", "magazine", <NU…
+#> $ topical_focus                             <list> "economy", "economy", <NULL…
+#> $ alternate_names                           <list> <NULL>, <NULL>, <NULL>, <NU…
+#> $ `_edited_by__edited_by|timestamp`         <chr> "2023-02-08T14:16:15.015135+…
+#> $ `_edited_by_display_name`                 <chr> "Paul Balluff", "Paul Balluf…
+#> $ `_edited_by_uid`                          <chr> "0x2711", "0x2711", "0x2711"…
+#> $ `audience_size|count_0`                   <int> 3033, 11875, 4096, 42832, 16…
+#> $ `audience_size|data_from_0`               <chr> "https://siterankdata.com/bi…
+#> $ `audience_size|unit_0`                    <chr> "daily visitors", "daily vis…
+#> $ countries__unique_name                    <chr> "country_germany", "country_…
 #> $ countries_entry_review_status             <chr> "accepted", "accepted", "acc…
-#> $ countries_name                            <chr> "Italy", "France", "France",…
-#> $ countries_uid                             <chr> "0x2d", "0x2f", "0x2f", NA, …
-#> $ languages__unique_name                    <chr> "language_italian", "languag…
+#> $ countries_name                            <chr> "Germany", "Germany", "Germa…
+#> $ countries_uid                             <chr> "0x1b", "0x1b", "0x1b", "0x1…
+#> $ languages__unique_name                    <chr> "language_german", "language…
 #> $ languages_entry_review_status             <chr> "accepted", "accepted", "acc…
-#> $ languages_name                            <chr> "Italian", "French", "French…
-#> $ languages_uid                             <chr> "0x37e2ed", "0x37e2bc", "0x3…
-#> $ `audience_size|data_from_0`               <chr> NA, "https://en.wikipedia.or…
-#> $ `audience_size|unit_0`                    <chr> NA, "subscribers", NA, NA, "…
-#> $ related_news_sources__unique_name         <chr> NA, NA, NA, "newssource_de_b…
-#> $ related_news_sources_entry_review_status  <chr> NA, NA, NA, "accepted", NA, …
-#> $ related_news_sources_name                 <chr> NA, NA, NA, "bilanz.de", NA,…
-#> $ related_news_sources_uid                  <chr> NA, NA, NA, "0x42020", NA, NA
-#> $ `_edited_by_dgraph.type`                  <list> ["User"], ["User"], ["User"]…
-#> $ countries_dgraph.type                     <list> ["Entry", "Country"], ["Entr…
+#> $ languages_name                            <chr> "German", "German", "German"…
+#> $ languages_uid                             <chr> "0x37e338", "0x37e338", "0x3…
+#> $ related_news_sources__unique_name         <chr> "newssource_de_bilanz_print"…
+#> $ related_news_sources_entry_review_status  <chr> "accepted", NA, NA, NA, NA, …
+#> $ related_news_sources_name                 <chr> "Bilanz", NA, NA, NA, NA, NA…
+#> $ related_news_sources_uid                  <chr> "0x4201f", NA, NA, NA, NA, N…
+#> $ `_edited_by_dgraph.type`                  <list> ["User"], ["User"], ["User"…
+#> $ countries_dgraph.type                     <list> ["Entry", "Country"], ["Ent…
 #> $ languages_dgraph.type                     <list> ["Language", "Entry"], ["La…
-#> $ `audience_size|count_0`                   <list> <NULL>, "74057", <NULL>, <N…
-#> $ related_news_sources_countries            <list<tibble[,5]>> <NULL>, <NULL>, <NULL>, [<t…
-#> $ related_news_sources_dgraph.type          <list> <NULL>, <NULL>, <NULL>, ["E…
-#> $ related_news_sources_channel__unique_name <chr> NA, NA, NA, "website", NA, NA
-#> $ related_news_sources_channel_name         <chr> NA, NA, NA, "Se…
-#> $ related_news_sources_channel_uid          <chr> NA, NA, NA, "0x10", NA, NA
+#> $ related_news_sources_countries            <list<tibble[,5]>> [<tbl_df[2 x 5]…
+#> $ related_news_sources_dgraph.type          <list> ["Entry", "NewsSource"], <N…
+#> $ related_news_sources_channel__unique_name <chr> "print", NA, NA, NA, NA, NA,…
+#> $ related_news_sources_channel_name         <chr> "Print", NA, NA, NA, NA, NA,…
+#> $ related_news_sources_channel_uid          <chr> "0x11", NA, NA, NA, NA, NA, …
+
+
+
+# Get follower count from external API call:
+call_ws <- function(website
+  ){
+    r <- call_meteor(method = "post",
+                     ressource = "external",
+                     option = "website", 
+                     website = website,
+                     format = "raw"
+    )
+    return(r)
+  }
+
+# Apply 
+res <- pbapply::pblapply(head(view_df, 3)$identifier, FUN = call_ws)
+
+res <- fleece::rectangularize(res)
+
+dplyr::glimpse(res)
+#> Rows: 3
+#> Columns: 12
+#> $ audience_size             <chr> "2024-02-12", "2024-02-12", "2024-02-12"
+#> $ `audience_size|count`     <int> 1896530, 11875, 13338
+#> $ `audience_size|data_from` <chr> "https://siterankdata.com/bilanz.de", "https…
+#> $ `audience_size|unit`      <chr> "daily visitors", "daily visitors", "daily v…
+#> $ identifier                <chr> "https://www.welt.de/wirtschaft", "https://d…
+#> $ name                      <chr> "bilanz.de", "die-bank.de", "ef-magazin.de"
+#> $ alternate_names_1         <chr> "Wirtschaft - News & Aktuelle Nachrichten - …
+#> $ alternate_names_2         <chr> "https://www.welt.de/wirtschaft/", NA, "http…
+#> $ channel_feeds_1           <chr> NA, NA, "https://ef-magazin.de/feed/atom/"
+#> $ channel_feeds_2           <chr> NA, NA, "https://ef-magazin.de/feed/rss/"
+#> $ `channel_feeds|kind_0`    <chr> NA, NA, "rss"
+#> $ `channel_feeds|kind_1`    <chr> NA, NA, "rss"
 ```