DPLYR is a package in R language that uses grammar of data manipulation. It makes access
to information in a dataset easier and faster. It provides with a set of verbs or functions used to transform data. Here, we will discuss some of its key functions using covid 19 dataset:
Examples
filter( )
#To filter the data with information for only one country
library("dplyr")
COVID_19%>% #%>% is pipe operator
filter(location == "Pakistan")
![](https://static.wixstatic.com/media/56e222_3e8b3e66f45e4a429234c0fd6fa123a1~mv2.png/v1/fill/w_980,h_287,al_c,q_85,usm_0.66_1.00_0.01,enc_auto/56e222_3e8b3e66f45e4a429234c0fd6fa123a1~mv2.png)
mutate( )
#In order to calculate individuals that are alive we subtracted deaths from cases and created a new column
Alive_data = COVID_19%>%
mutate(Alive_individulas = cases-deaths)
Alive_data
![](https://static.wixstatic.com/media/56e222_0dd25718d13347d8ad840d96068a451f~mv2.png/v1/fill/w_980,h_217,al_c,q_85,usm_0.66_1.00_0.01,enc_auto/56e222_0dd25718d13347d8ad840d96068a451f~mv2.png)
select( )
#Here, we intend to select only three columns from the data i.e location, cases and deaths to form a new dataset
Data_location_cases_deaths = COVID_19%>%
select(location, cases, deaths)
Data_location_cases_deaths
![](https://static.wixstatic.com/media/56e222_c288d1ff9c2b4be986d4c3dff9c531b5~mv2.png/v1/fill/w_977,h_287,al_c,q_85,enc_auto/56e222_c288d1ff9c2b4be986d4c3dff9c531b5~mv2.png)
summarize( )
Total_cases = summarize(COVID_19, Total_cases = sum(cases))
Total_cases
![](https://static.wixstatic.com/media/56e222_73929ab2e1c747ecad20966c781d7153~mv2.jpg/v1/fill/w_911,h_161,al_c,q_80,enc_auto/56e222_73929ab2e1c747ecad20966c781d7153~mv2.jpg)
group_by( )
Total_deaths = COVID_19%>%
group_by(location)%>%
summarize(Total_deaths = sum(deaths))
Total_deaths
![](https://static.wixstatic.com/media/56e222_af4922d6f5b74b8f868a4fdad6ececf2~mv2.png/v1/fill/w_980,h_286,al_c,q_85,usm_0.66_1.00_0.01,enc_auto/56e222_af4922d6f5b74b8f868a4fdad6ececf2~mv2.png)
Comments