CORDIS project data available on the EU Open Data Portal. More details in README.md
.
library(tidyverse)
library(ggplot2)
library(ggrepel)
library(scales)
library(janitor)
library(countrycode)
library(wbstats)
library(igraph)
library(yaml)
library(here)
library(knitr)
library(DT)
library(conflicted)
conflict_prefer("filter", "dplyr")
# credits: https://colorhunt.co/palette/180404
<- c("#fcbf1e", "#40bad5", "#035aa6")
colorhunt source(here("R", "global.R"))
Below, we transform the raw data to calculate an adjacency matrix containing pairwise counts for the number of times countries participate in projects jointly, either as project coordinators or participants.
<-
h2020 here("data", "h2020.csv") %>%
read_csv()
# country participation size
<- h2020 %>%
country_participation count(country, sort = TRUE)
<- h2020 %>%
df_collab # removes third-countries and single-country projects
filter(group %in% c("eu15", "eu13", "ac"), nunique > 1)
# building the graph ------------------------------------------------------
# adjacency matrix
<- df_collab %>%
pivot group_by(rcn, country) %>%
summarise(n = n(), .groups = "drop") %>%
pivot_wider(names_from = country,
values_from = n,
values_fill = 0)
<- pivot %>%
collaborations select(-rcn) %>%
as.matrix() %>%
crossprod()
<- collaborations %>%
g_collab ::graph_from_adjacency_matrix(mode = "undirected",
igraphweighted = TRUE)
<- igraph::simplify(g_collab)
g ## remove same-country collaboration
## same as removing diagonal 2x
<- igraph::strength(g)
node_size <- igraph::E(g)$weight
edge_weight
<- igraph::layout_with_fr(g, weights = edge_weight)
point_coordinates colnames(point_coordinates) <- c("x", "y")
<-
nodes %>%
point_coordinates as_tibble() %>%
mutate(country = names(igraph::V(g))) %>%
left_join(info_country, by = "country") %>%
mutate(
status = str_to_upper(group),
country_name = countrycode(country, "eurostat", "country.name"),
s = node_size
%>%
) arrange(s)
<- get.data.frame(g) %>%
edges as_tibble() %>%
left_join(select(nodes, x:country), by = c("from" = "country")) %>%
rename(from_x = x, from_y = y) %>%
left_join(select(nodes, x:country), by = c("to" = "country")) %>%
rename(to_x = x, to_y = y) %>%
mutate(s = weight) %>%
arrange(s)
<- eigen_centrality(g, directed = FALSE) %>%
df_eigen pluck("vector") %>%
list_to_df(c("eigen", "country"))
<- strength(g) %>%
df_degree list_to_df(c("degree", "country"))
<- df_eigen %>%
df_centrality left_join(df_degree, by = "country") %>%
select(country, everything()) %>%
mutate(
eigen = round(1000 * eigen, 3),
degree = round(degree / 1000, 3),
eigen_rank = rank(eigen),
eigen_rank = 1 + max(eigen_rank) - eigen_rank,
degree_rank = rank(degree),
degree_rank = 1 + max(degree_rank) - degree_rank
%>%
) arrange(country)
datatable(df_centrality, rownames = FALSE)
ggplot() +
geom_segment(
data = edges,
aes(
x = from_x,
xend = to_x,
y = from_y,
yend = to_y,
size = s,
colour = s
),show.legend = FALSE
+
) scale_color_gradient(low = rgb(0, 0, 0, .05),
high = rgb(0, 0, 0, .45)) +
geom_point(
data = nodes,
aes(x, y, size = s, fill = status),
pch = 21,
colour = "white"
+
) scale_fill_manual(values = colorhunt) +
scale_size(range = c(.25, 5)) +
guides(
fill = guide_legend(override.aes = list(size = 7, shape = 21)),
color = "none",
size = "none") +
geom_text_repel(
data = nodes,
aes(x, y, label = country_name),
size = 5.5,
segment.color = NA,
bg.color = "white",
bg.r = 0.15
+
) labs(title = "Force-Directed Graph of H2020 Collaborations",
subtitle = "EU + Associated Countries, 2014-2020",
caption = "Source: CORDIS") +
theme_bw() +
theme(
plot.title.position = "plot",
plot.title = element_text(size = 25, face = "bold"),
plot.subtitle = element_text(size = 18),
plot.caption = element_text(size = 10),
legend.position = c(.95, .95),
legend.title = element_blank(),
legend.text = element_text(size = 16),
legend.key.size = unit(2, "lines"),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.line = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()
)
Difficult to perceive differences between the core countries.
# recalculating point coordinates using "log(edge weights)"
<- igraph::layout_with_fr(g, weights = log(edge_weight))
point_coordinates colnames(point_coordinates) <- c("x", "y")
<-
nodes %>%
point_coordinates as_tibble() %>%
mutate(country = names(igraph::V(g))) %>%
left_join(info_country, by = "country") %>%
mutate(
status = str_to_upper(group),
country_name = countrycode(country, "eurostat", "country.name"),
s = node_size
%>%
) arrange(s)
<- get.data.frame(g) %>%
edges as_tibble() %>%
left_join(select(nodes, x:country), by = c("from" = "country")) %>%
rename(from_x = x, from_y = y) %>%
left_join(select(nodes, x:country), by = c("to" = "country")) %>%
rename(to_x = x, to_y = y) %>%
mutate(s = weight) %>%
arrange(s)
ggplot() +
geom_segment(
data = edges,
aes(
x = from_x,
xend = to_x,
y = from_y,
yend = to_y,
size = s,
colour = s
),show.legend = FALSE
+
) scale_color_gradient(low = rgb(0, 0, 0, .05),
high = rgb(0, 0, 0, .45)) +
geom_point(
data = nodes,
aes(x, y, size = s, fill = status),
pch = 21,
colour = "white"
+
) scale_fill_manual(values = colorhunt) +
scale_size(range = c(.25, 15)) +
guides(
fill = guide_legend(override.aes = list(size = 7, shape = 21)),
color = "none",
size = "none") +
geom_text_repel(
data = nodes,
aes(x, y, label = country_name),
size = 5.5,
segment.color = NA,
bg.color = "white",
bg.r = 0.15
+
) labs(title = "Log-Force-Directed Graph of H2020 Collaborations",
subtitle = "EU + Associated Countries, 2014-2020",
caption = "Source: CORDIS") +
theme_bw() +
theme(
plot.title.position = "plot",
plot.title = element_text(size = 25, face = "bold"),
plot.subtitle = element_text(size = 18),
plot.caption = element_text(size = 10),
legend.position = c(.95, .95),
legend.title = element_blank(),
legend.text = element_text(size = 16),
legend.key.size = unit(2, "lines"),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.line = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()
)
%>%
df_collab ggplot(aes(role, contribution)) +
geom_boxplot() +
scale_y_log10(labels = label_number(accuracy = 1)) +
coord_flip() +
theme_bw()
Re-calculating weights using country-specific financial contributions:
<- df_collab %>%
pivot group_by(rcn, country) %>%
summarise(n = sum(contribution, na.rm = TRUE),
.groups = "drop") %>%
pivot_wider(names_from = country,
values_from = n,
values_fill = 0)
<- pivot %>%
collaborations select(-rcn) %>%
as.matrix() %>%
crossprod()
<- collaborations %>%
g_collab ::graph_from_adjacency_matrix(mode = "undirected",
igraphweighted = TRUE)
# remove same-country collaboration (diagonal 2x)
<- igraph::simplify(g_collab)
g <- igraph::strength(g) # ignore same-country collaboration
node_size <- igraph::E(g)$weight
edge_weight
<- igraph::layout_with_fr(g, weights = log(edge_weight))
point_coordinates colnames(point_coordinates) <- c("x", "y")
<-
nodes %>%
point_coordinates as_tibble() %>%
mutate(country = names(igraph::V(g))) %>%
left_join(info_country, by = "country") %>%
mutate(
status = str_to_upper(group),
country_name = countrycode(country, "eurostat", "country.name"),
s = node_size
%>%
) arrange(s)
<- get.data.frame(g) %>%
edges as_tibble() %>%
left_join(select(nodes, x:country), by = c("from" = "country")) %>%
rename(from_x = x, from_y = y) %>%
left_join(select(nodes, x:country), by = c("to" = "country")) %>%
rename(to_x = x, to_y = y) %>%
mutate(s = weight) %>%
arrange(s)
ggplot() +
geom_segment(
data = edges,
aes(
x = from_x,
xend = to_x,
y = from_y,
yend = to_y,
size = s,
colour = s
),show.legend = FALSE
+
) scale_color_gradient(low = rgb(0, 0, 0, .05),
high = rgb(0, 0, 0, .45)) +
geom_point(
data = nodes,
aes(x, y, size = s, fill = status),
pch = 21,
colour = "white"
+
) scale_fill_manual(values = colorhunt) +
scale_size(range = c(.25, 15)) +
guides(
fill = guide_legend(override.aes = list(size = 7, shape = 21)),
color = "none",
size = "none") +
geom_text_repel(
data = nodes,
aes(x, y, label = country_name),
size = 5.5,
segment.color = NA,
bg.color = "white",
bg.r = 0.15
+
) labs(title = "Log-Force-Directed Graph of H2020 Collaborations",
subtitle = "EU + Associated Countries, 2014-2020",
caption = "Source: CORDIS") +
theme_bw() +
theme(
plot.title.position = "plot",
plot.title = element_text(size = 25, face = "bold"),
plot.subtitle = element_text(size = 18),
plot.caption = element_text(size = 10),
legend.position = c(.95, .95),
legend.title = element_blank(),
legend.text = element_text(size = 16),
legend.key.size = unit(2, "lines"),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.line = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()
)
# re-calculate layout on log-scale of edge weight
# spread core:
<- g %>%
point_coordinates # edge weight
::layout_with_fr(weights = log(edge_weight))
igraphcolnames(point_coordinates) <- c("x", "y")
<-
nodes %>%
point_coordinates as_tibble() %>%
mutate(country = names(igraph::V(g))) %>%
left_join(info_country, by = "country") %>%
mutate(
status = str_to_upper(group),
country_name = countrycode(country, "eurostat", "country.name"),
s = node_size
%>%
) arrange(s)
<- g %>%
edges get.data.frame() %>%
as_tibble() %>%
left_join(select(nodes, x:country), by = c("from" = "country")) %>%
rename(from_x = x, from_y = y) %>%
left_join(select(nodes, x:country), by = c("to" = "country")) %>%
rename(to_x = x, to_y = y) %>%
mutate(s = (weight / 1000) ^ 2.5) %>% # ad-hoc
arrange(s)
# separate rules to approach Nature's representation
<- 1 # ad-hoc
threshold <- edges %>%
core_edges filter(s > threshold)
<- edges %>%
outer_edges filter(s <= threshold)
<- outer_edges %>%
outer_edges_reversed rename(from = to, to = from) %>%
select(from, everything())
<- outer_edges %>%
outer_edges bind_rows(outer_edges_reversed) %>%
anti_join(core_edges, by = "from") %>%
anti_join(core_edges, by = c("from" = "to")) %>%
group_by(from) %>%
top_n(1, s) %>%
mutate(s = 1)
<- core_edges %>%
edges bind_rows(outer_edges) %>%
arrange(s) # plot stronger edges last
Below, a log-log scale updated representation of Figure 2 from Network dynamics in collaborative research in the EU, 2003–2017.
<- wbstats::wb_data(
population_table indicator = "SP.POP.TOTL",
country = countrycode(pull(info_country, country), "eurostat", "iso2c"),
start_date = 2017,
end_date = 2017
%>%
) clean_names() %>%
select(iso2c, population = sp_pop_totl)
<- df_collab %>%
funding group_by(country) %>%
summarise(contrib = sum(contribution, na.rm = TRUE),
.groups = "drop")
<- df_collab %>%
collaborations # keep group information
group_by(group, country) %>%
summarise(n = n(),
.groups = "drop") %>%
# for world bank compatibility
mutate(
status = str_to_upper(group),
iso2c = countrycode(country, "eurostat", "iso2c"),
country_name = countrycode(country, "eurostat", "country.name")) %>%
left_join(population_table, by = "iso2c") %>%
left_join(funding, by = "country")
ggplot(aes(population, n, label = country_name, col = status),
data = collaborations) +
geom_smooth(
method = "lm",
se = FALSE,
col = "gray75",
linetype = "dotted",
size = .5
+
) geom_point(size = 3, alpha = .8) +
geom_text_repel(
size = 5,
force = 2,
segment.alpha = 0.5,
bg.color = rgb(1, 1, 1, .5),
bg.r = 0.15,
show.legend = FALSE
+
) scale_color_manual(values = colorhunt) +
scale_x_log10(labels = label_number(accuracy = 1, scale = 1e-6)) +
scale_y_log10(labels = label_number(accuracy = 1, scale = 1e-3)) +
labs(
title = "H2020 Collaborations and Population",
subtitle = "Shown in log-log scale",
caption = "Source: CORDIS, World Bank",
color = element_blank(),
x = "Population (mio)",
y = "No. Collaborations (k)"
+
) theme_minimal() +
theme(
plot.title.position = "plot",
legend.position = c(.1, .96),
legend.direction = "horizontal",
legend.key.size = unit(4, "mm"),
legend.background = element_rect(
linetype = "dotted",
fill = alpha("white", 1),
color = alpha("black", 0.25)
) )
Related plots:
ggplot(aes(population, contrib, label = country_name, col = status),
data = collaborations) +
geom_smooth(
method = "lm",
se = FALSE,
col = "gray75",
linetype = "dotted",
size = .5
+
) geom_point(size = 3, alpha = .8) +
geom_text_repel(
size = 5,
force = 2,
segment.alpha = 0.5,
bg.color = rgb(1, 1, 1, .5),
bg.r = 0.15,
show.legend = FALSE
+
) scale_color_manual(values = colorhunt) +
scale_x_log10(labels = label_number(accuracy = 1, scale = 1e-6)) +
scale_y_log10(labels = label_number(accuracy = 1, scale = 1e-6)) +
labs(
title = "H2020 Contributions and Population",
subtitle = "Shown in log-log scale",
caption = "Source: CORDIS, World Bank",
color = element_blank(),
x = "Population (mio)",
y = "Total Contribution (mioEUR)"
+
) theme_minimal() +
theme(
plot.title.position = "plot",
legend.position = c(.1, .96),
legend.direction = "horizontal",
legend.key.size = unit(4, "mm"),
legend.background = element_rect(
linetype = "dotted",
fill = alpha("white", 1),
color = alpha("black", 0.25)
) )
ggplot(aes(n, contrib, label = country_name, col = status),
data = collaborations) +
geom_smooth(
method = "lm",
se = FALSE,
col = "gray75",
linetype = "dotted",
size = .5
+
) geom_point(size = 3, alpha = .8) +
geom_text_repel(
size = 5,
force = 2,
segment.alpha = 0.5,
bg.color = rgb(1, 1, 1, .5),
bg.r = 0.15,
show.legend = FALSE
+
) scale_color_manual(values = colorhunt) +
scale_x_log10(labels = label_number(accuracy = 1, scale = 1e-3)) +
scale_y_log10(labels = label_number(accuracy = 1, scale = 1e-6)) +
labs(
title = "H2020 Contributions and Collaborations",
subtitle = "Shown in log-log scale",
caption = "Source: CORDIS, World Bank",
color = element_blank(),
x = "No. Collaborations (k)",
y = "Total Contribution (mioEUR)"
+
) theme_minimal() +
theme(
plot.title.position = "plot",
legend.position = c(.1, .96),
legend.direction = "horizontal",
legend.key.size = unit(4, "mm"),
legend.background = element_rect(
linetype = "dotted",
fill = alpha("white", 1),
color = alpha("black", 0.25)
) )