library(dplyr)
df = starwars[,c(1:3,6,11)]
df# A tibble: 87 × 5
name height mass eye_color species
<chr> <int> <dbl> <chr> <chr>
1 Luke Skywalker 172 77 blue Human
2 C-3PO 167 75 yellow Droid
3 R2-D2 96 32 red Droid
4 Darth Vader 202 136 yellow Human
5 Leia Organa 150 49 brown Human
6 Owen Lars 178 120 blue Human
7 Beru Whitesun lars 165 75 blue Human
8 R5-D4 97 32 red Droid
9 Biggs Darklighter 183 84 brown Human
10 Obi-Wan Kenobi 182 77 blue-gray Human
# … with 77 more rows
select() Επιλογή στηλών
filter() Επιλογή γραμμών
arrange() Αναδιοργάνωση, ιεράρχηση
mutate() Κατασκευή νέων στηλών
group_by() Ομαδοποίηση ‘split-apply-combine’
concept
summarise() Συνοψη τιμών
df %>% select(name, mass, eye_color)# A tibble: 87 × 3
name mass eye_color
<chr> <dbl> <chr>
1 Luke Skywalker 77 blue
2 C-3PO 75 yellow
3 R2-D2 32 red
4 Darth Vader 136 yellow
5 Leia Organa 49 brown
6 Owen Lars 120 blue
7 Beru Whitesun lars 75 blue
8 R5-D4 32 red
9 Biggs Darklighter 84 brown
10 Obi-Wan Kenobi 77 blue-gray
# … with 77 more rows
df %>% filter(height < 100)# A tibble: 7 × 5
name height mass eye_color species
<chr> <int> <dbl> <chr> <chr>
1 R2-D2 96 32 red Droid
2 R5-D4 97 32 red Droid
3 Yoda 66 17 brown Yoda's species
4 Wicket Systri Warrick 88 20 brown Ewok
5 Dud Bolt 94 45 yellow Vulptereen
6 Ratts Tyerell 79 15 unknown Aleena
7 R4-P17 96 NA red, blue Droid
df %>% filter(height < 100, eye_color=="red")# A tibble: 2 × 5
name height mass eye_color species
<chr> <int> <dbl> <chr> <chr>
1 R2-D2 96 32 red Droid
2 R5-D4 97 32 red Droid
df %>% arrange(height)# A tibble: 87 × 5
name height mass eye_color species
<chr> <int> <dbl> <chr> <chr>
1 Yoda 66 17 brown Yoda's species
2 Ratts Tyerell 79 15 unknown Aleena
3 Wicket Systri Warrick 88 20 brown Ewok
4 Dud Bolt 94 45 yellow Vulptereen
5 R2-D2 96 32 red Droid
6 R4-P17 96 NA red, blue Droid
7 R5-D4 97 32 red Droid
8 Sebulba 112 40 orange Dug
9 Gasgano 122 NA black Xexto
10 Watto 137 NA yellow Toydarian
# … with 77 more rows
df %>% arrange(desc(height))# A tibble: 87 × 5
name height mass eye_color species
<chr> <int> <dbl> <chr> <chr>
1 Yarael Poof 264 NA yellow Quermian
2 Tarfful 234 136 blue Wookiee
3 Lama Su 229 88 black Kaminoan
4 Chewbacca 228 112 blue Wookiee
5 Roos Tarpals 224 82 orange Gungan
6 Grievous 216 159 green, yellow Kaleesh
7 Taun We 213 NA black Kaminoan
8 Rugor Nass 206 NA orange Gungan
9 Tion Medon 206 80 black Pau'an
10 Darth Vader 202 136 yellow Human
# … with 77 more rows
df %>% mutate(NEASTILI= mass/height) # A tibble: 87 × 6
name height mass eye_color species NEASTILI
<chr> <int> <dbl> <chr> <chr> <dbl>
1 Luke Skywalker 172 77 blue Human 0.448
2 C-3PO 167 75 yellow Droid 0.449
3 R2-D2 96 32 red Droid 0.333
4 Darth Vader 202 136 yellow Human 0.673
5 Leia Organa 150 49 brown Human 0.327
6 Owen Lars 178 120 blue Human 0.674
7 Beru Whitesun lars 165 75 blue Human 0.455
8 R5-D4 97 32 red Droid 0.330
9 Biggs Darklighter 183 84 brown Human 0.459
10 Obi-Wan Kenobi 182 77 blue-gray Human 0.423
# … with 77 more rows
df %>% mutate(NEASTILI= mass/height) %>%
arrange(desc(NEASTILI))# A tibble: 87 × 6
name height mass eye_color species NEASTILI
<chr> <int> <dbl> <chr> <chr> <dbl>
1 Jabba Desilijic Tiure 175 1358 orange Hutt 7.76
2 Grievous 216 159 green, yellow Kaleesh 0.736
3 IG-88 200 140 red Droid 0.7
4 Owen Lars 178 120 blue Human 0.674
5 Darth Vader 202 136 yellow Human 0.673
6 Jek Tono Porkins 180 110 blue Human 0.611
7 Bossk 190 113 red Trandoshan 0.595
8 Tarfful 234 136 blue Wookiee 0.581
9 Dexter Jettster 198 102 yellow Besalisk 0.515
10 Chewbacca 228 112 blue Wookiee 0.491
# … with 77 more rows
df %>% group_by(eye_color)# A tibble: 87 × 5
# Groups: eye_color [15]
name height mass eye_color species
<chr> <int> <dbl> <chr> <chr>
1 Luke Skywalker 172 77 blue Human
2 C-3PO 167 75 yellow Droid
3 R2-D2 96 32 red Droid
4 Darth Vader 202 136 yellow Human
5 Leia Organa 150 49 brown Human
6 Owen Lars 178 120 blue Human
7 Beru Whitesun lars 165 75 blue Human
8 R5-D4 97 32 red Droid
9 Biggs Darklighter 183 84 brown Human
10 Obi-Wan Kenobi 182 77 blue-gray Human
# … with 77 more rows
df %>% group_by(eye_color) %>%
summarise(Plithos=n(), MO=mean(mass, na.rm=T))# A tibble: 15 × 3
eye_color Plithos MO
<chr> <int> <dbl>
1 black 10 76.3
2 blue 19 86.5
3 blue-gray 1 77
4 brown 21 66.1
5 dark 1 NaN
6 gold 1 NaN
7 green, yellow 1 159
8 hazel 3 66
9 orange 8 282.
10 pink 1 NaN
11 red 5 81.4
12 red, blue 1 NaN
13 unknown 3 31.5
14 white 1 48
15 yellow 11 81.1
df %>% group_by(eye_color) %>%
summarise(Plithos=n(), MO=mean(mass, na.rm=T)) %>% filter(Plithos>1, MO>50 )# A tibble: 7 × 3
eye_color Plithos MO
<chr> <int> <dbl>
1 black 10 76.3
2 blue 19 86.5
3 brown 21 66.1
4 hazel 3 66
5 orange 8 282.
6 red 5 81.4
7 yellow 11 81.1
Λήψη δεδομένων από την παγκόσμια τράπεζα δεδομένων για χώρες.
library(wbstats)
myvar = c(
gdp_capita ="NY.GDP.PCAP.CD",
unemployment="SL.UEM.TOTL.ZS",
pop="SP.POP.TOTL",
birth_rate="SP.DYN.CBRT.IN"
)
data = wb_data(myvar, start_date = 2015, end_date = 2022)Επιλογή μόνο τα δεδομένα του έτους 2018
ena = data %>% filter(date ==2018)Επιλογή μόνο μιας χώρας
ena %>% filter(country=="Albania")# A tibble: 1 × 8
iso2c iso3c country date gdp_capita unemployment birth_rate pop
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 AL ALB Albania 2018 5288. 12.3 11.8 2866376
Διαχρονική εξέλιξη του μέσου παγκόσμιου ΑΕΠ
duo=data %>% group_by(date) %>% summarise( MO_gdp=mean(gdp_capita, na.rm=T))
duo# A tibble: 7 × 2
date MO_gdp
<dbl> <dbl>
1 2015 16688.
2 2016 16901.
3 2017 17763.
4 2018 18816.
5 2019 18691.
6 2020 16304.
7 2021 16047.
Διαχρονική εξέλιξη της μέσης παγκόσμιας ανεργίας
tria=data %>% group_by(date) %>% summarise(MO_unempl=mean(unemployment, na.rm=T))
tria# A tibble: 7 × 2
date MO_unempl
<dbl> <dbl>
1 2015 8.13
2 2016 7.99
3 2017 7.72
4 2018 7.48
5 2019 7.31
6 2020 8.51
7 2021 8.49
Διαχρονική εξέλιξη τoυ συνολικού πληθυσμού (σε εκατομμύρια)
tes = data %>% group_by(date) %>% summarise(pop_ekatom=sum(pop, na.rm=T)/1000000)
tes# A tibble: 7 × 2
date pop_ekatom
<dbl> <dbl>
1 2015 7321.
2 2016 7407.
3 2017 7492.
4 2018 7576.
5 2019 7657.
6 2020 7737.
7 2021 7810.
Οπτικοποίηση του συνολικού πληθυσμού (σε εκατομμύρια)
library(ggplot2)
ggplot(tes) + geom_line(aes(x = date, y =pop_ekatom, colour =pop_ekatom)) +
labs(title = "Population (millions)", x="Year" )Επιλογή ενός υποσυνόλου χωρών
mycountries = c("Greece", "France","Italy","Germany","Turkiye")
mysel = data %>% filter(country %in% mycountries)Οπτικοποίηση της ανεργίας
ggplot(mysel) + geom_line(aes(x = date, y = unemployment, colour =country)) +
labs(title = "unemployment", x="Year" )Μέσος όρος ανεργίας για το υποσύνολο των χωρών
mysel %>% group_by(country) %>% summarise(MO_unempl=mean(unemployment, na.rm=T))# A tibble: 5 × 2
country MO_unempl
<chr> <dbl>
1 France 9.04
2 Germany 3.77
3 Greece 19.7
4 Italy 10.6
5 Turkiye 11.9
Λήψη δεδομένων για ευρύτερες γεωγραφικές περιοχές
data2 = wb_data(myvar, start_date = 2015, end_date=2022, country="regions_only")
head(data2)# A tibble: 6 × 8
iso2c iso3c country date gdp_capita unemployment birth_rate pop
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Z4 EAS East Asia & Pacific 2015 9599. 4.07 13.5 2291503725
2 Z4 EAS East Asia & Pacific 2016 9868. 3.98 14.4 2307707227
3 Z4 EAS East Asia & Pacific 2017 10466. 3.86 13.7 2324120551
4 Z4 EAS East Asia & Pacific 2018 11324. 3.73 12.5 2338485387
5 Z4 EAS East Asia & Pacific 2019 11494. 3.82 12.1 2351127942
6 Z4 EAS East Asia & Pacific 2020 11484. 4.32 10.9 2361517682
γραφήματα για τις ευρύτερες περιοχές
ggplot(data2) + geom_line(aes(x=date, y=gdp_capita, colour=country)) +
labs(title = "gdp_capita", x="Year" )ggplot(data2) + geom_line(aes(x=date, y=unemployment, colour=country)) +
labs(title = "unemployment", x="Year" )ggplot(data2) + geom_line(aes(x=date, y=birth_rate, colour=country)) +
labs(title = "birth_rate", x="Year" )ggplot(data2) + geom_line(aes(x=date, y=pop/1000000, colour=country)) +
labs(title = "pop (millions)", x="Year" )