1 Δεδομένα

https://dplyr.tidyverse.org/

library(dplyr) 
df = starwars[,c(1:3,6,11)]
df
# A tibble: 87 × 5
   name               height  mass eye_color species
   <chr>               <int> <dbl> <chr>     <chr>  
 1 Luke Skywalker        172    77 blue      Human  
 2 C-3PO                 167    75 yellow    Droid  
 3 R2-D2                  96    32 red       Droid  
 4 Darth Vader           202   136 yellow    Human  
 5 Leia Organa           150    49 brown     Human  
 6 Owen Lars             178   120 blue      Human  
 7 Beru Whitesun lars    165    75 blue      Human  
 8 R5-D4                  97    32 red       Droid  
 9 Biggs Darklighter     183    84 brown     Human  
10 Obi-Wan Kenobi        182    77 blue-gray Human  
# … with 77 more rows

2 Λεξιλόγιο ‘dplyr’

  • select() Επιλογή στηλών

  • filter() Επιλογή γραμμών

  • arrange() Αναδιοργάνωση, ιεράρχηση

  • mutate() Κατασκευή νέων στηλών

  • group_by() Ομαδοποίηση ‘split-apply-combine’ concept

  • summarise() Συνοψη τιμών

2.1 select

df %>% select(name, mass, eye_color)
# A tibble: 87 × 3
   name                mass eye_color
   <chr>              <dbl> <chr>    
 1 Luke Skywalker        77 blue     
 2 C-3PO                 75 yellow   
 3 R2-D2                 32 red      
 4 Darth Vader          136 yellow   
 5 Leia Organa           49 brown    
 6 Owen Lars            120 blue     
 7 Beru Whitesun lars    75 blue     
 8 R5-D4                 32 red      
 9 Biggs Darklighter     84 brown    
10 Obi-Wan Kenobi        77 blue-gray
# … with 77 more rows

2.2 filter (φιλτράρισμα)

df %>% filter(height < 100)
# A tibble: 7 × 5
  name                  height  mass eye_color species       
  <chr>                  <int> <dbl> <chr>     <chr>         
1 R2-D2                     96    32 red       Droid         
2 R5-D4                     97    32 red       Droid         
3 Yoda                      66    17 brown     Yoda's species
4 Wicket Systri Warrick     88    20 brown     Ewok          
5 Dud Bolt                  94    45 yellow    Vulptereen    
6 Ratts Tyerell             79    15 unknown   Aleena        
7 R4-P17                    96    NA red, blue Droid         
df %>% filter(height < 100, eye_color=="red")
# A tibble: 2 × 5
  name  height  mass eye_color species
  <chr>  <int> <dbl> <chr>     <chr>  
1 R2-D2     96    32 red       Droid  
2 R5-D4     97    32 red       Droid  

2.3 arrange (διάταξη)

df %>% arrange(height)
# A tibble: 87 × 5
   name                  height  mass eye_color species       
   <chr>                  <int> <dbl> <chr>     <chr>         
 1 Yoda                      66    17 brown     Yoda's species
 2 Ratts Tyerell             79    15 unknown   Aleena        
 3 Wicket Systri Warrick     88    20 brown     Ewok          
 4 Dud Bolt                  94    45 yellow    Vulptereen    
 5 R2-D2                     96    32 red       Droid         
 6 R4-P17                    96    NA red, blue Droid         
 7 R5-D4                     97    32 red       Droid         
 8 Sebulba                  112    40 orange    Dug           
 9 Gasgano                  122    NA black     Xexto         
10 Watto                    137    NA yellow    Toydarian     
# … with 77 more rows
df %>% arrange(desc(height))
# A tibble: 87 × 5
   name         height  mass eye_color     species 
   <chr>         <int> <dbl> <chr>         <chr>   
 1 Yarael Poof     264    NA yellow        Quermian
 2 Tarfful         234   136 blue          Wookiee 
 3 Lama Su         229    88 black         Kaminoan
 4 Chewbacca       228   112 blue          Wookiee 
 5 Roos Tarpals    224    82 orange        Gungan  
 6 Grievous        216   159 green, yellow Kaleesh 
 7 Taun We         213    NA black         Kaminoan
 8 Rugor Nass      206    NA orange        Gungan  
 9 Tion Medon      206    80 black         Pau'an  
10 Darth Vader     202   136 yellow        Human   
# … with 77 more rows

2.4 mutate

df %>% mutate(NEASTILI= mass/height) 
# A tibble: 87 × 6
   name               height  mass eye_color species NEASTILI
   <chr>               <int> <dbl> <chr>     <chr>      <dbl>
 1 Luke Skywalker        172    77 blue      Human      0.448
 2 C-3PO                 167    75 yellow    Droid      0.449
 3 R2-D2                  96    32 red       Droid      0.333
 4 Darth Vader           202   136 yellow    Human      0.673
 5 Leia Organa           150    49 brown     Human      0.327
 6 Owen Lars             178   120 blue      Human      0.674
 7 Beru Whitesun lars    165    75 blue      Human      0.455
 8 R5-D4                  97    32 red       Droid      0.330
 9 Biggs Darklighter     183    84 brown     Human      0.459
10 Obi-Wan Kenobi        182    77 blue-gray Human      0.423
# … with 77 more rows
df %>% mutate(NEASTILI= mass/height) %>% 
 arrange(desc(NEASTILI))
# A tibble: 87 × 6
   name                  height  mass eye_color     species    NEASTILI
   <chr>                  <int> <dbl> <chr>         <chr>         <dbl>
 1 Jabba Desilijic Tiure    175  1358 orange        Hutt          7.76 
 2 Grievous                 216   159 green, yellow Kaleesh       0.736
 3 IG-88                    200   140 red           Droid         0.7  
 4 Owen Lars                178   120 blue          Human         0.674
 5 Darth Vader              202   136 yellow        Human         0.673
 6 Jek Tono Porkins         180   110 blue          Human         0.611
 7 Bossk                    190   113 red           Trandoshan    0.595
 8 Tarfful                  234   136 blue          Wookiee       0.581
 9 Dexter Jettster          198   102 yellow        Besalisk      0.515
10 Chewbacca                228   112 blue          Wookiee       0.491
# … with 77 more rows

2.5 group_by (ομαδοποίηση)

df %>% group_by(eye_color)
# A tibble: 87 × 5
# Groups:   eye_color [15]
   name               height  mass eye_color species
   <chr>               <int> <dbl> <chr>     <chr>  
 1 Luke Skywalker        172    77 blue      Human  
 2 C-3PO                 167    75 yellow    Droid  
 3 R2-D2                  96    32 red       Droid  
 4 Darth Vader           202   136 yellow    Human  
 5 Leia Organa           150    49 brown     Human  
 6 Owen Lars             178   120 blue      Human  
 7 Beru Whitesun lars    165    75 blue      Human  
 8 R5-D4                  97    32 red       Droid  
 9 Biggs Darklighter     183    84 brown     Human  
10 Obi-Wan Kenobi        182    77 blue-gray Human  
# … with 77 more rows

2.6 summarise (σύνοψη)

df %>% group_by(eye_color) %>% 
 summarise(Plithos=n(), MO=mean(mass, na.rm=T))
# A tibble: 15 × 3
   eye_color     Plithos    MO
   <chr>           <int> <dbl>
 1 black              10  76.3
 2 blue               19  86.5
 3 blue-gray           1  77  
 4 brown              21  66.1
 5 dark                1 NaN  
 6 gold                1 NaN  
 7 green, yellow       1 159  
 8 hazel               3  66  
 9 orange              8 282. 
10 pink                1 NaN  
11 red                 5  81.4
12 red, blue           1 NaN  
13 unknown             3  31.5
14 white               1  48  
15 yellow             11  81.1
df %>% group_by(eye_color) %>% 
 summarise(Plithos=n(), MO=mean(mass, na.rm=T)) %>%  filter(Plithos>1, MO>50 )
# A tibble: 7 × 3
  eye_color Plithos    MO
  <chr>       <int> <dbl>
1 black          10  76.3
2 blue           19  86.5
3 brown          21  66.1
4 hazel           3  66  
5 orange          8 282. 
6 red             5  81.4
7 yellow         11  81.1

3 Δεδομένα Χωρών

Λήψη δεδομένων από την παγκόσμια τράπεζα δεδομένων για χώρες.

library(wbstats)

myvar = c(
 gdp_capita ="NY.GDP.PCAP.CD",
 unemployment="SL.UEM.TOTL.ZS",
 pop="SP.POP.TOTL", 
 birth_rate="SP.DYN.CBRT.IN"
 )
data = wb_data(myvar, start_date = 2015, end_date = 2022)

Επιλογή μόνο τα δεδομένα του έτους 2018

ena = data %>% filter(date ==2018)

Επιλογή μόνο μιας χώρας

ena %>% filter(country=="Albania")
# A tibble: 1 × 8
  iso2c iso3c country  date gdp_capita unemployment birth_rate     pop
  <chr> <chr> <chr>   <dbl>      <dbl>        <dbl>      <dbl>   <dbl>
1 AL    ALB   Albania  2018      5288.         12.3       11.8 2866376

Διαχρονική εξέλιξη του μέσου παγκόσμιου ΑΕΠ

duo=data %>% group_by(date) %>%  summarise( MO_gdp=mean(gdp_capita, na.rm=T))
duo
# A tibble: 7 × 2
   date MO_gdp
  <dbl>  <dbl>
1  2015 16688.
2  2016 16901.
3  2017 17763.
4  2018 18816.
5  2019 18691.
6  2020 16304.
7  2021 16047.

Διαχρονική εξέλιξη της μέσης παγκόσμιας ανεργίας

tria=data %>% group_by(date) %>%  summarise(MO_unempl=mean(unemployment, na.rm=T))
tria
# A tibble: 7 × 2
   date MO_unempl
  <dbl>     <dbl>
1  2015      8.13
2  2016      7.99
3  2017      7.72
4  2018      7.48
5  2019      7.31
6  2020      8.51
7  2021      8.49

Διαχρονική εξέλιξη τoυ συνολικού πληθυσμού (σε εκατομμύρια)

tes = data %>% group_by(date) %>%  summarise(pop_ekatom=sum(pop, na.rm=T)/1000000)
tes
# A tibble: 7 × 2
   date pop_ekatom
  <dbl>      <dbl>
1  2015      7321.
2  2016      7407.
3  2017      7492.
4  2018      7576.
5  2019      7657.
6  2020      7737.
7  2021      7810.

Οπτικοποίηση του συνολικού πληθυσμού (σε εκατομμύρια)

library(ggplot2)
ggplot(tes) + geom_line(aes(x = date, y =pop_ekatom, colour =pop_ekatom)) + 
 labs(title = "Population (millions)", x="Year" )

3.1 Σύγκριση μεταξύ χωρών

Επιλογή ενός υποσυνόλου χωρών

mycountries = c("Greece", "France","Italy","Germany","Turkiye")
mysel = data %>% filter(country %in% mycountries)

Οπτικοποίηση της ανεργίας

ggplot(mysel) + geom_line(aes(x = date, y = unemployment, colour =country)) + 
 labs(title = "unemployment", x="Year" )

Μέσος όρος ανεργίας για το υποσύνολο των χωρών

mysel %>% group_by(country) %>%  summarise(MO_unempl=mean(unemployment, na.rm=T))
# A tibble: 5 × 2
  country MO_unempl
  <chr>       <dbl>
1 France       9.04
2 Germany      3.77
3 Greece      19.7 
4 Italy       10.6 
5 Turkiye     11.9 

3.2 Ευρύτερες περιοχές

Λήψη δεδομένων για ευρύτερες γεωγραφικές περιοχές

data2 = wb_data(myvar, start_date = 2015, end_date=2022,  country="regions_only")
head(data2)
# A tibble: 6 × 8
  iso2c iso3c country              date gdp_capita unemployment birth_rate        pop
  <chr> <chr> <chr>               <dbl>      <dbl>        <dbl>      <dbl>      <dbl>
1 Z4    EAS   East Asia & Pacific  2015      9599.         4.07       13.5 2291503725
2 Z4    EAS   East Asia & Pacific  2016      9868.         3.98       14.4 2307707227
3 Z4    EAS   East Asia & Pacific  2017     10466.         3.86       13.7 2324120551
4 Z4    EAS   East Asia & Pacific  2018     11324.         3.73       12.5 2338485387
5 Z4    EAS   East Asia & Pacific  2019     11494.         3.82       12.1 2351127942
6 Z4    EAS   East Asia & Pacific  2020     11484.         4.32       10.9 2361517682

γραφήματα για τις ευρύτερες περιοχές

ggplot(data2) + geom_line(aes(x=date, y=gdp_capita, colour=country)) + 
 labs(title = "gdp_capita", x="Year" )

ggplot(data2) + geom_line(aes(x=date, y=unemployment, colour=country)) + 
 labs(title = "unemployment", x="Year" )

ggplot(data2) + geom_line(aes(x=date, y=birth_rate, colour=country)) + 
 labs(title = "birth_rate", x="Year" )

ggplot(data2) + geom_line(aes(x=date, y=pop/1000000, colour=country)) + 
 labs(title = "pop (millions)", x="Year" )