data(mtcars)

head(mtcars)

summary(mtcars)

      mpg             cyl             disp             hp       
 Min.   :10.40   Min.   :4.000   Min.   : 71.1   Min.   : 52.0  
 1st Qu.:15.43   1st Qu.:4.000   1st Qu.:120.8   1st Qu.: 96.5  
 Median :19.20   Median :6.000   Median :196.3   Median :123.0  
 Mean   :20.09   Mean   :6.188   Mean   :230.7   Mean   :146.7  
 3rd Qu.:22.80   3rd Qu.:8.000   3rd Qu.:326.0   3rd Qu.:180.0  
 Max.   :33.90   Max.   :8.000   Max.   :472.0   Max.   :335.0  
      drat             wt             qsec             vs        
 Min.   :2.760   Min.   :1.513   Min.   :14.50   Min.   :0.0000  
 1st Qu.:3.080   1st Qu.:2.581   1st Qu.:16.89   1st Qu.:0.0000  
 Median :3.695   Median :3.325   Median :17.71   Median :0.0000  
 Mean   :3.597   Mean   :3.217   Mean   :17.85   Mean   :0.4375  
 3rd Qu.:3.920   3rd Qu.:3.610   3rd Qu.:18.90   3rd Qu.:1.0000  
 Max.   :4.930   Max.   :5.424   Max.   :22.90   Max.   :1.0000  
       am              gear            carb      
 Min.   :0.0000   Min.   :3.000   Min.   :1.000  
 1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:2.000  
 Median :0.0000   Median :4.000   Median :2.000  
 Mean   :0.4062   Mean   :3.688   Mean   :2.812  
 3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:4.000  
 Max.   :1.0000   Max.   :5.000   Max.   :8.000

apply(mtcars,2,table)

$mpg

10.4 13.3 14.3 14.7   15 15.2 15.5 15.8 16.4 17.3 17.8 18.1 18.7 19.2 19.7   21 
   2    1    1    1    1    2    1    1    1    1    1    1    1    2    1    2 
21.4 21.5 22.8 24.4   26 27.3 30.4 32.4 33.9 
   2    1    2    1    1    1    2    1    1 

$cyl

 4  6  8 
11  7 14 

$disp

 71.1  75.7  78.7    79  95.1   108 120.1 120.3   121 140.8   145 146.7   160 
    1     1     1     1     1     1     1     1     1     1     1     1     2 
167.6   225   258 275.8   301   304   318   350   351   360   400   440   460 
    2     1     1     3     1     1     1     1     1     2     1     1     1 
  472 
    1 

$hp

 52  62  65  66  91  93  95  97 105 109 110 113 123 150 175 180 205 215 230 245 
  1   1   1   2   1   1   1   1   1   1   3   1   2   2   3   3   1   1   1   2 
264 335 
  1   1 

$drat

2.76 2.93    3 3.07 3.08 3.15 3.21 3.23 3.54 3.62 3.69  3.7 3.73 3.77 3.85  3.9 
   2    1    1    3    2    2    1    1    1    1    1    1    1    1    1    2 
3.92 4.08 4.11 4.22 4.43 4.93 
   3    2    1    2    1    1 

$wt

1.513 1.615 1.835 1.935  2.14   2.2  2.32 2.465  2.62  2.77  2.78 2.875  3.15 
    1     1     1     1     1     1     1     1     1     1     1     1     1 
 3.17  3.19 3.215 3.435  3.44  3.46  3.52  3.57  3.73  3.78  3.84 3.845  4.07 
    1     1     1     1     3     1     1     2     1     1     1     1     1 
 5.25 5.345 5.424 
    1     1     1 

$qsec

 14.5  14.6 15.41  15.5 15.84 16.46  16.7 16.87  16.9 17.02 17.05  17.3  17.4 
    1     1     1     1     1     1     1     1     1     2     1     1     1 
17.42  17.6 17.82 17.98    18  18.3 18.52  18.6 18.61  18.9 19.44 19.47  19.9 
    1     1     1     1     1     1     1     1     1     2     1     1     1 
   20 20.01 20.22  22.9 
    1     1     1     1 

$vs

 0  1 
18 14 

$am

 0  1 
19 13 

$gear

 3  4  5 
15 12  5 

$carb

 1  2  3  4  6  8 
 7 10  3 10  1  1

mtcars$cyl <- as.factor(mtcars$cyl)
mtcars$vs <- factor(mtcars$vs, labels = c("V-shaped", "straight"))
mtcars$am <- factor(mtcars$am, labels = c("automatic", "manual"))
mtcars$carb <- as.factor(mtcars$carb)
mtcars$gear <- as.factor(mtcars$gear)

summary(mtcars)

      mpg        cyl         disp             hp             drat      
 Min.   :10.40   4:11   Min.   : 71.1   Min.   : 52.0   Min.   :2.760  
 1st Qu.:15.43   6: 7   1st Qu.:120.8   1st Qu.: 96.5   1st Qu.:3.080  
 Median :19.20   8:14   Median :196.3   Median :123.0   Median :3.695  
 Mean   :20.09          Mean   :230.7   Mean   :146.7   Mean   :3.597  
 3rd Qu.:22.80          3rd Qu.:326.0   3rd Qu.:180.0   3rd Qu.:3.920  
 Max.   :33.90          Max.   :472.0   Max.   :335.0   Max.   :4.930  
       wt             qsec              vs             am     gear   carb  
 Min.   :1.513   Min.   :14.50   V-shaped:18   automatic:19   3:15   1: 7  
 1st Qu.:2.581   1st Qu.:16.89   straight:14   manual   :13   4:12   2:10  
 Median :3.325   Median :17.71                                5: 5   3: 3  
 Mean   :3.217   Mean   :17.85                                       4:10  
 3rd Qu.:3.610   3rd Qu.:18.90                                       6: 1  
 Max.   :5.424   Max.   :22.90                                       8: 1

# install.packages('dplyr') # if you didn't do that earlier
library(dplyr)

Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

arrange(
  summarize(
    group_by(
      filter(mtcars, as.numeric(carb) > 1),
      cyl
      ),
    Avg_mpg = mean(mpg)
    ),
  desc(Avg_mpg)
  )

a <- filter(mtcars, as.numeric(carb) > 1)
b <- group_by(a, cyl)
c <- summarise(b, Avg_mpg = mean(mpg))
d <- arrange(c, desc(Avg_mpg))
print(d)

# A tibble: 3 x 2
  cyl   Avg_mpg
  <fct>   <dbl>
1 4        25.9
2 6        19.7
3 8        15.1

mtcars %>%
  filter(as.numeric(carb) > 1) %>%
  group_by(cyl) %>%
  summarise(Avg_mpg = mean(mpg)) %>%
  arrange(desc(Avg_mpg))

library(dplyr)

select(mtcars, mpg, cyl)

mtcars %>% select(mpg, cyl)

mtcars %>% select(mpg:qsec)

mtcars %>% select(-mpg:-qsec)

mtcars2 <- mtcars %>% select(-mpg:-qsec)
mtcars2

mtcars %>% select(starts_with("c"))

mtcars %>% select(ends_with("t"))

mtcars %>% select(contains("a"))

mtcars %>% select(matches("^(c|m)")) # starts with either c or m

mtcars %>% select(MilesPerGallon = mpg)

mtcars %>% rename(MilesPerGallon = mpg)

mtcars %>% filter(cyl == 4)

mtcars %>% filter(cyl == 4 & hp >90)

mtcars %>% filter((cyl == 4 | cyl == 6) & hp >110)

mtcars %>% filter(cyl %in% c(4,6) & hp >110)

mtcars %>% group_by(cyl)

mtcars %>% summarise(mean = mean(hp))

mtcars %>%
  group_by(cyl) %>%
  summarise(mean = mean(hp))

mtcars %>%
  select(cyl, mpg, hp) %>%
  filter(cyl %in% c(4,6)) %>%
  group_by(cyl) %>%
  summarise(
    count = n(),
    mean_mpg = mean(mpg, na.rm = T),
    sd_mpg = sd(mpg, na.rm = T),
    mean_hp = mean(hp, na.rm = T),
    sd_hp = sd(hp, na.rm = T)
  )

mtcars %>% arrange(cyl)

mtcars %>% arrange(desc(mpg))

mtcars %>% arrange(cyl, desc(mpg))

mtcars %>%
  mutate(g100m = 1/mpg*100,
         g100m = round(g100m,3)) %>%
  arrange(g100m)

mtcars %>%
  transmute(g100m = 1/mpg*100) %>%
  round(3) %>%
  arrange(g100m)

# install.packages("ggplot2")  # If you did not install it previously
library(ggplot2)

Registered S3 methods overwritten by 'ggplot2':
  method         from 
  [.quosures     rlang
  c.quosures     rlang
  print.quosures rlang

ggplot(mtcars)+
  geom_point(aes(x = mpg, y = disp))

ggplot(mtcars)+
  geom_histogram(aes(mpg))

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(mtcars)+
  geom_histogram(aes(mpg), fill = "blue")

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(mtcars)+
  geom_bar(aes(x = cyl))

ggplot(mtcars)+
  geom_boxplot(aes(y= mpg))

ggplot(mtcars)+
  geom_boxplot(aes(x= cyl, y= mpg))

ggplot(mtcars) +
    geom_histogram(aes(mpg),fill = "blue")

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(mtcars)+
  geom_point(aes(x = mpg, y = disp), color = "blue")

ggplot(mtcars)+
  geom_point(aes(x = mpg, y = disp, color = cyl))

ggplot(mtcars)+
  geom_point( aes(x = mpg, y = disp, color = hp))

ggplot(mtcars)+
  geom_point(aes(x = mpg, y = disp),size = 10)

ggplot(mtcars)+
  geom_point(aes(x = mpg, y = disp, size = hp))

ggplot(mtcars)+
  geom_point( mapping = aes(x = mpg, y = disp), alpha = 0.3)

ggplot(mtcars)+
  geom_point(aes(x = mpg, y = disp, alpha = hp))

ggplot(mtcars)+
  geom_point(aes(x = mpg, y = disp), shape = 25)

ggplot(mtcars)+
  geom_point(aes(x = mpg, y = disp, shape = cyl))

ggplot(mtcars)+
  geom_point(aes(x = mpg, y = disp)) +
  facet_wrap(~ cyl, nrow = 1)

ggplot(mtcars)+
  geom_point(aes(x = mpg, y = disp)) +
  facet_wrap(am~ cyl, nrow = 2)

ggplot(mtcars)+
  geom_bar(aes(x = cyl))

ggplot(mtcars)+
  geom_col(aes(x = as.factor(cyl), y=mpg))

library(dplyr)

ggplot(data =mtcars %>% group_by(cyl) %>% summarise(mean_mpg = mean(mpg)))+
  geom_col(aes(x = (as.factor(cyl)), y= mean_mpg))+
  ylab("Mean mpg")+
  xlab("Number of Cylinders")

ggplot(mtcars)+
  geom_bar(aes(x = cyl, color = cyl))

ggplot(mtcars)+
  geom_bar(aes(x = cyl, fill = cyl))

ggplot(mtcars)+
  geom_bar(aes(x = cyl, fill = cyl)) +
           ggtitle("Barplot for the number of cylinders") +
           xlab("Number of cylinders") +
           ylab("frequency") +
           theme_minimal()

str(mtcars)

'data.frame':	32 obs. of  11 variables:
 $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
 $ cyl : Factor w/ 3 levels "4","6","8": 2 2 1 2 3 2 3 1 1 2 ...
 $ disp: num  160 160 108 258 360 ...
 $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
 $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
 $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
 $ qsec: num  16.5 17 18.6 19.4 17 ...
 $ vs  : Factor w/ 2 levels "V-shaped","straight": 1 1 2 2 1 2 1 2 2 2 ...
 $ am  : Factor w/ 2 levels "automatic","manual": 2 2 2 1 1 1 1 1 1 1 ...
 $ gear: Factor w/ 3 levels "3","4","5": 2 2 2 1 1 1 1 2 2 2 ...
 $ carb: Factor w/ 6 levels "1","2","3","4",..: 4 4 1 1 2 1 4 2 2 4 ...

pairs(mtcars[,c(1,3:7)])

pima <- read.csv("pima.csv")

str(pima)

'data.frame':	768 obs. of  9 variables:
 $ NTP     : int  6 1 8 1 0 5 3 10 2 8 ...
 $ PGC     : int  148 85 183 89 137 116 78 115 197 125 ...
 $ DBP     : int  72 66 64 66 40 74 50 0 70 96 ...
 $ TSFT    : int  35 29 0 23 35 0 32 0 45 0 ...
 $ SI      : int  0 0 0 94 168 0 88 0 543 0 ...
 $ BMI     : num  33.6 26.6 23.3 28.1 43.1 25.6 31 35.3 30.5 0 ...
 $ DPF     : num  0.627 0.351 0.672 0.167 2.288 ...
 $ Age     : int  50 31 32 21 33 30 26 29 53 54 ...
 $ Diabetes: Factor w/ 2 levels "negative","positive": 2 1 2 1 2 1 2 1 2 2 ...

head(pima)

summary(pima)

      NTP              PGC             DBP              TSFT      
 Min.   : 0.000   Min.   :  0.0   Min.   :  0.00   Min.   : 0.00  
 1st Qu.: 1.000   1st Qu.: 99.0   1st Qu.: 62.00   1st Qu.: 0.00  
 Median : 3.000   Median :117.0   Median : 72.00   Median :23.00  
 Mean   : 3.845   Mean   :120.9   Mean   : 69.11   Mean   :20.54  
 3rd Qu.: 6.000   3rd Qu.:140.2   3rd Qu.: 80.00   3rd Qu.:32.00  
 Max.   :17.000   Max.   :199.0   Max.   :122.00   Max.   :99.00  
       SI             BMI             DPF              Age       
 Min.   :  0.0   Min.   : 0.00   Min.   :0.0780   Min.   :21.00  
 1st Qu.:  0.0   1st Qu.:27.30   1st Qu.:0.2437   1st Qu.:24.00  
 Median : 30.5   Median :32.00   Median :0.3725   Median :29.00  
 Mean   : 79.8   Mean   :31.99   Mean   :0.4719   Mean   :33.24  
 3rd Qu.:127.2   3rd Qu.:36.60   3rd Qu.:0.6262   3rd Qu.:41.00  
 Max.   :846.0   Max.   :67.10   Max.   :2.4200   Max.   :81.00  
     Diabetes  
 negative:500  
 positive:268

pima$AgeGroups <- cut(pima$Age,c(20,40,60,81))
table(pima$AgeGroups)

(20,40] (40,60] (60,81] 
    574     167      27

ggplot(pima) +
  geom_bar(aes(x=AgeGroups, fill = AgeGroups))

ggplot(pima) +
  geom_bar(aes(x=AgeGroups, fill = Diabetes))

ggplot(pima) +
  geom_bar(aes(x=AgeGroups, fill = Diabetes), position= 'dodge')

ggplot(pima) +
  geom_col(aes(x=AgeGroups, y = BMI, fill = Diabetes))

ggplot(pima) +
  geom_col(aes(x=AgeGroups, y = BMI, fill = Diabetes), 
           position = 'dodge')

pima %>% group_by(AgeGroups, Diabetes) %>% summarise(mean_BMI = mean(BMI))

ggplot(data =pima %>% group_by(AgeGroups, Diabetes) %>% summarise(mean_BMI = mean(BMI)))+
  geom_col(aes(x = AgeGroups, y= mean_BMI, fill = AgeGroups),
           show.legend = F)+
  facet_wrap(~Diabetes)+
  ylab("Mean BMI")+
  xlab("Age Groups")+
  theme_bw()

ggplot(pima, aes(BMI))+
  geom_histogram(binwidth = 5, fill = "blue")+
  theme_bw()

ggplot(pima, aes(BMI))+
  geom_histogram(binwidth = 10, fill = "green")+
  theme_bw()

ggplot(pima, aes(BMI))+
  geom_histogram(binwidth = 10, fill = "red")+
  theme_bw()+
  facet_grid(~ Diabetes)

ggplot(pima, aes(NTP, fill= Diabetes))+
  geom_histogram(binwidth = 3)+
  theme_bw()+
  facet_grid(~ AgeGroups)

ggplot(pima, aes(NTP, fill= Diabetes))+
  geom_histogram(binwidth = 3, position = 'dodge')+
  theme_bw()+
  facet_grid(~ AgeGroups)

ggplot(data = pima)+
  geom_boxplot(mapping = aes(y= NTP))+
  ggtitle("Boxplot of NTP")

ggplot(data = pima)+
  geom_boxplot(mapping = aes(x= AgeGroups, y= NTP))

ggplot(data = pima)+
  geom_boxplot(mapping = aes(x= AgeGroups, y= NTP))+
  facet_grid(~ Diabetes)

ggplot(pima)+
  geom_point(aes(x=PGC, y=DBP))

ggplot(pima)+
  geom_point(aes(x=PGC, y=DBP, col = Diabetes))

str(pima)

'data.frame':	768 obs. of  10 variables:
 $ NTP      : int  6 1 8 1 0 5 3 10 2 8 ...
 $ PGC      : int  148 85 183 89 137 116 78 115 197 125 ...
 $ DBP      : int  72 66 64 66 40 74 50 0 70 96 ...
 $ TSFT     : int  35 29 0 23 35 0 32 0 45 0 ...
 $ SI       : int  0 0 0 94 168 0 88 0 543 0 ...
 $ BMI      : num  33.6 26.6 23.3 28.1 43.1 25.6 31 35.3 30.5 0 ...
 $ DPF      : num  0.627 0.351 0.672 0.167 2.288 ...
 $ Age      : int  50 31 32 21 33 30 26 29 53 54 ...
 $ Diabetes : Factor w/ 2 levels "negative","positive": 2 1 2 1 2 1 2 1 2 2 ...
 $ AgeGroups: Factor w/ 3 levels "(20,40]","(40,60]",..: 2 1 1 1 1 1 1 1 2 2 ...

pairs(pima[,1:8], col = as.factor(pima$Diabetes))

cyl	Avg_mpg
4	25.90
6	19.74
8	15.10

cyl	Avg_mpg
4	25.90
6	19.74
8	15.10

	cyl	carb
Mazda RX4	6	4
Mazda RX4 Wag	6	4
Datsun 710	4	1
Hornet 4 Drive	6	1
Hornet Sportabout	8	2
Valiant	6	1
Duster 360	8	4
Merc 240D	4	2
Merc 230	4	2
Merc 280	6	4
Merc 280C	6	4
Merc 450SE	8	3
Merc 450SL	8	3
Merc 450SLC	8	3
Cadillac Fleetwood	8	4
Lincoln Continental	8	4
Chrysler Imperial	8	4
Fiat 128	4	1
Honda Civic	4	2
Toyota Corolla	4	1
Toyota Corona	4	1
Dodge Challenger	8	2
AMC Javelin	8	2
Camaro Z28	8	4
Pontiac Firebird	8	2
Fiat X1-9	4	1
Porsche 914-2	4	2
Lotus Europa	4	2
Ford Pantera L	8	4
Ferrari Dino	6	6
Maserati Bora	8	8
Volvo 142E	4	2

	drat	wt
Mazda RX4	3.90	2.620
Mazda RX4 Wag	3.90	2.875
Datsun 710	3.85	2.320
Hornet 4 Drive	3.08	3.215
Hornet Sportabout	3.15	3.440
Valiant	2.76	3.460
Duster 360	3.21	3.570
Merc 240D	3.69	3.190
Merc 230	3.92	3.150
Merc 280	3.92	3.440
Merc 280C	3.92	3.440
Merc 450SE	3.07	4.070
Merc 450SL	3.07	3.730
Merc 450SLC	3.07	3.780
Cadillac Fleetwood	2.93	5.250
Lincoln Continental	3.00	5.424
Chrysler Imperial	3.23	5.345
Fiat 128	4.08	2.200
Honda Civic	4.93	1.615
Toyota Corolla	4.22	1.835
Toyota Corona	3.70	2.465
Dodge Challenger	2.76	3.520
AMC Javelin	3.15	3.435
Camaro Z28	3.73	3.840
Pontiac Firebird	3.08	3.845
Fiat X1-9	4.08	1.935
Porsche 914-2	4.43	2.140
Lotus Europa	3.77	1.513
Ford Pantera L	4.22	3.170
Ferrari Dino	3.62	2.770
Maserati Bora	3.54	3.570
Volvo 142E	4.11	2.780

	MilesPerGallon
Mazda RX4	21.0
Mazda RX4 Wag	21.0
Datsun 710	22.8
Hornet 4 Drive	21.4
Hornet Sportabout	18.7
Valiant	18.1
Duster 360	14.3
Merc 240D	24.4
Merc 230	22.8
Merc 280	19.2
Merc 280C	17.8
Merc 450SE	16.4
Merc 450SL	17.3
Merc 450SLC	15.2
Cadillac Fleetwood	10.4
Lincoln Continental	10.4
Chrysler Imperial	14.7
Fiat 128	32.4
Honda Civic	30.4
Toyota Corolla	33.9
Toyota Corona	21.5
Dodge Challenger	15.5
AMC Javelin	15.2
Camaro Z28	13.3
Pontiac Firebird	19.2
Fiat X1-9	27.3
Porsche 914-2	26.0
Lotus Europa	30.4
Ford Pantera L	15.8
Ferrari Dino	19.7
Maserati Bora	15.0
Volvo 142E	21.4

Week 11 - Exploratory Data Analysis (EDA), data manipulation (dplyr) and data visualization (ggplot2) with R.¶

Exploratory Data Analysis (EDA)¶

Example mtcars dataset¶

Data Manipulation with dplyr{#dplyr}¶

The Pipe Operator (%>%)¶

Nested option¶

Multiple Object Opinion¶

%>% (Pipe) Option¶

Using dplyr to manipulate data¶

select()¶

filter()¶

group_by() and summarise()¶

arrange()¶

mutate()¶

Data Visualization in R¶

ggplot2¶

Aesthetics¶

Geometries¶

Install ggplot2¶

Some basic graphs with ggplot¶

Some Aesthetic Properties¶

color¶

size¶

alpha¶

shape¶

Facets¶

geom_bar(), geom_col() and statistical transformations¶

A few improvements in ggplot¶

Some other graphs¶

Pima Dataset Example¶

Pima Data Set¶

Barplot¶

Histogram¶

Boxplot¶

Scatterplots¶

Multiple Scatter Plots¶

	mpg	cyl	disp	hp	drat	wt	qsec
Mazda RX4	21.0	6	160.0	110	3.90	2.620	16.46
Mazda RX4 Wag	21.0	6	160.0	110	3.90	2.875	17.02
Datsun 710	22.8	4	108.0	93	3.85	2.320	18.61
Hornet 4 Drive	21.4	6	258.0	110	3.08	3.215	19.44
Hornet Sportabout	18.7	8	360.0	175	3.15	3.440	17.02
Valiant	18.1	6	225.0	105	2.76	3.460	20.22
Duster 360	14.3	8	360.0	245	3.21	3.570	15.84
Merc 240D	24.4	4	146.7	62	3.69	3.190	20.00
Merc 230	22.8	4	140.8	95	3.92	3.150	22.90
Merc 280	19.2	6	167.6	123	3.92	3.440	18.30
Merc 280C	17.8	6	167.6	123	3.92	3.440	18.90
Merc 450SE	16.4	8	275.8	180	3.07	4.070	17.40
Merc 450SL	17.3	8	275.8	180	3.07	3.730	17.60
Merc 450SLC	15.2	8	275.8	180	3.07	3.780	18.00
Cadillac Fleetwood	10.4	8	472.0	205	2.93	5.250	17.98
Lincoln Continental	10.4	8	460.0	215	3.00	5.424	17.82
Chrysler Imperial	14.7	8	440.0	230	3.23	5.345	17.42
Fiat 128	32.4	4	78.7	66	4.08	2.200	19.47
Honda Civic	30.4	4	75.7	52	4.93	1.615	18.52
Toyota Corolla	33.9	4	71.1	65	4.22	1.835	19.90
Toyota Corona	21.5	4	120.1	97	3.70	2.465	20.01
Dodge Challenger	15.5	8	318.0	150	2.76	3.520	16.87
AMC Javelin	15.2	8	304.0	150	3.15	3.435	17.30
Camaro Z28	13.3	8	350.0	245	3.73	3.840	15.41
Pontiac Firebird	19.2	8	400.0	175	3.08	3.845	17.05
Fiat X1-9	27.3	4	79.0	66	4.08	1.935	18.90
Porsche 914-2	26.0	4	120.3	91	4.43	2.140	16.70
Lotus Europa	30.4	4	95.1	113	3.77	1.513	16.90
Ford Pantera L	15.8	8	351.0	264	4.22	3.170	14.50
Ferrari Dino	19.7	6	145.0	175	3.62	2.770	15.50
Maserati Bora	15.0	8	301.0	335	3.54	3.570	14.60
Volvo 142E	21.4	4	121.0	109	4.11	2.780	18.60

	vs	am	gear	carb
Mazda RX4	V-shaped	manual	4	4
Mazda RX4 Wag	V-shaped	manual	4	4
Datsun 710	straight	manual	4	1
Hornet 4 Drive	straight	automatic	3	1
Hornet Sportabout	V-shaped	automatic	3	2
Valiant	straight	automatic	3	1
Duster 360	V-shaped	automatic	3	4
Merc 240D	straight	automatic	4	2
Merc 230	straight	automatic	4	2
Merc 280	straight	automatic	4	4
Merc 280C	straight	automatic	4	4
Merc 450SE	V-shaped	automatic	3	3
Merc 450SL	V-shaped	automatic	3	3
Merc 450SLC	V-shaped	automatic	3	3
Cadillac Fleetwood	V-shaped	automatic	3	4
Lincoln Continental	V-shaped	automatic	3	4
Chrysler Imperial	V-shaped	automatic	3	4
Fiat 128	straight	manual	4	1
Honda Civic	straight	manual	4	2
Toyota Corolla	straight	manual	4	1
Toyota Corona	straight	automatic	3	1
Dodge Challenger	V-shaped	automatic	3	2
AMC Javelin	V-shaped	automatic	3	2
Camaro Z28	V-shaped	automatic	3	4
Pontiac Firebird	V-shaped	automatic	3	2
Fiat X1-9	straight	manual	4	1
Porsche 914-2	V-shaped	manual	5	2
Lotus Europa	straight	manual	5	2
Ford Pantera L	V-shaped	manual	5	4
Ferrari Dino	V-shaped	manual	5	6
Maserati Bora	V-shaped	manual	5	8
Volvo 142E	straight	manual	4	2

cyl	count	mean_mpg	sd_mpg	mean_hp	sd_hp
4	11	26.66364	4.509828	82.63636	20.93453
6	7	19.74286	1.453567	122.28571	24.26049

NTP	PGC	DBP	TSFT	SI	BMI	DPF	Age	Diabetes
6	148	72	35	0	33.6	0.627	50	positive
1	85	66	29	0	26.6	0.351	31	negative
8	183	64	0	0	23.3	0.672	32	positive
1	89	66	23	94	28.1	0.167	21	negative
0	137	40	35	168	43.1	2.288	33	positive
5	116	74	0	0	25.6	0.201	30	negative

AgeGroups	Diabetes	mean_BMI
(20,40]	negative	30.09363
(20,40]	positive	35.82108
(40,60]	negative	32.48472
(40,60]	positive	34.10526
(60,81]	negative	26.75000
(60,81]	positive	33.12857

g100m
2.950
3.086
3.289
3.289
3.663
3.846
4.098
4.386
4.386
4.651
4.673
4.673
4.762
4.762
5.076
5.208
5.208
5.348
5.525
5.618
5.780
6.098
6.329
6.452
6.579
6.579
6.667
6.803
6.993
7.519
9.615
9.615