R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Registered S3 methods overwritten by 'stars':
##   method             from
##   st_bbox.SpatRaster sf  
##   st_crs.SpatRaster  sf
# Ingests data into R. Putting nothing in for the file separator command makes a
# "space" the file separator. This will prevent your data from turning in to a 
# giant text string.

OMG2021 = read.csv("c:/work/mast467/data/2021/2021.dat",sep="")
OMG2020 = read.csv("c:/work/mast467/data/2020/2020.dat",sep="")
OMG2019 = read.csv("c:/work/mast467/data/2019/2019.dat",sep="")
OMG2018 = read.csv("c:/work/mast467/data/2018/2018.dat",sep="")
OMG2017 = read.csv("c:/work/mast467/data/2017/2017.dat",sep="")

# Establishes names for the data frames. I decided having a separate frame with all
# of the profiles present would be useful for multi-year trend analysis. The native
# R functions for combining data frames were combining the data in a way I did not
# like, so I decided to just do it manually. Very inefficient, but I know the data is 
# correct in the final "OMGALL". A better way would be to just write all of the data
# to a single file once using gawk as opposed to building it in R. 

var_names = c("CAST","DATE","LAT","LON","D","T","S")
colnames(OMG2021) = var_names
colnames(OMG2020) = var_names
colnames(OMG2019) = var_names
colnames(OMG2018) = var_names
colnames(OMG2017) = var_names

# Additional digits were added to the cast numbers to handle the issue of 
# repeated cast numbers in the OMGALL data set. 

a1 = c(OMG2021$CAST+0.1)
a2 = c(OMG2020$CAST+0.2)
a3 = c(OMG2019$CAST+0.3)
a4 = c(OMG2018$CAST+0.4)
a5 = c(OMG2017$CAST+0.5)

CASTALL = c(a1,a2,a3,a4,a5)

b1 = c(OMG2021$DATE)
b2 = c(OMG2020$DATE)
b3 = c(OMG2019$DATE)
b4 = c(OMG2018$DATE)
b5 = c(OMG2017$DATE)

DATEALL = c(b1,b2,b3,b4,b5)

c1 = c(OMG2021$LAT)
c2 = c(OMG2020$LAT)
c3 = c(OMG2019$LAT)
c4 = c(OMG2018$LAT)
c5 = c(OMG2017$LAT)

LATALL = c(c1,c2,c3,c4,c5)

d1 = c(OMG2021$LON)
d2 = c(OMG2020$LON)
d3 = c(OMG2019$LON)
d4 = c(OMG2018$LON)
d5 = c(OMG2017$LON)

LONALL = c(d1,d2,d3,d4,d5)

e1 = c(OMG2021$D)
e2 = c(OMG2020$D)
e3 = c(OMG2019$D)
e4 = c(OMG2018$D)
e5 = c(OMG2017$D)

PALL = c(e1,e2,e3,e4,e5)

f1 = c(OMG2021$T)
f2 = c(OMG2020$T)
f3 = c(OMG2019$T)
f4 = c(OMG2018$T)
f5 = c(OMG2017$T)

TALL = c(f1,f2,f3,f4,f5)

g1 = c(OMG2021$S)
g2 = c(OMG2020$S)
g3 = c(OMG2019$S)
g4 = c(OMG2018$S)
g5 = c(OMG2017$S)

SALL = c(g1,g2,g3,g4,g5)

OMGALL = data.frame(CASTALL,DATEALL,LATALL,LONALL,PALL,TALL,SALL)
colnames(OMGALL) = var_names

rm(a1,a2,a3,a4,a5,b1,b2,b3,b4,b5,c1,c2,c3,c4,c5,d1,d2,d3,d4,d5,e1,e2,e3,e4,e5)
rm(f1,f2,f3,f4,f5,g1,g2,g3,g4,g5,CASTALL,DATEALL,LATALL,LONALL,PALL,TALL,SALL)
rm(var_names)
# Date Conversion to R's preferred date format.

OMGALL$DATE = as.Date(OMGALL$DATE,"%m/%d/%Y")
OMG2021$DATE = as.Date(OMG2021$DATE,"%m/%d/%Y")
OMG2020$DATE = as.Date(OMG2020$DATE,"%m/%d/%Y")
OMG2019$DATE = as.Date(OMG2019$DATE,"%m/%d/%Y")
OMG2018$DATE = as.Date(OMG2018$DATE,"%m/%d/%Y")
OMG2017$DATE = as.Date(OMG2017$DATE,"%m/%d/%Y")
# Quality Control Code 

# Removes all bad Temperature and Salinity data. Removes all
# data with a depth of less than 2m. Replaces all -99s with R's native missing data "NA".
# Finally rebuilds the OMG "year" data.frames with QA checked values. 

# Current thresholds for bad data are, any -99, T <= -2, and S >= 50. 

QA = 1  # SET TO 1 IF QA PROCESSING IS DESIRED.

if (QA == 1) {

OMGALL["D"][OMGALL["D"] <= 2] <- -99
TOSSALL_idx = which(OMGALL$D == -99,OMGALL$T <= -2 && OMGALL$T >= -98,OMGALL$S >= 50)
OMGALL <- OMGALL[-c(TOSSALL_idx), ]

OMGALL["T"][OMGALL["T"] == -99] <- NA
OMGALL["S"][OMGALL["S"] == -99] <- NA

OMG2021 = OMGALL[OMGALL$DATE > "2021-01-01" & OMGALL$DATE < "2021-12-31",]
OMG2020 = OMGALL[OMGALL$DATE > "2020-01-01" & OMGALL$DATE < "2020-12-31",]
OMG2019 = OMGALL[OMGALL$DATE > "2019-01-01" & OMGALL$DATE < "2019-12-31",]
OMG2018 = OMGALL[OMGALL$DATE > "2018-01-01" & OMGALL$DATE < "2018-12-31",]
OMG2017 = OMGALL[OMGALL$DATE > "2017-01-01" & OMGALL$DATE < "2017-12-31",]

rm(TOSSALL_idx)
}
# Deepest Depth Code

# Finds the difference between each value in the the D column and returns the
# index number. If the difference is negative, that means the depth went from 
# a high number to a low number. This method does not include the last "deepest"
# depth, so it must be appended to the index.

a = diff(OMG2021$D)
b = which(a < 0)
BTM2021_idx = append(b,length(a))

a = diff(OMG2020$D)
b = which(a < 0)
BTM2020_idx = append(b,length(a))

a = diff(OMG2019$D)
b = which(a < 0)
BTM2019_idx = append(b,length(a))

a = diff(OMG2018$D)
b = which(a < 0)
BTM2018_idx = append(b,length(a))

a = diff(OMG2017$D)
b = which(a < 0)
BTM2017_idx = append(b,length(a))

a = diff(OMGALL$D)
b = which(a < 0)
BTMALL_idx = append(b,length(a))

BTM2021 = OMG2021[BTM2021_idx,]
BTM2020 = OMG2020[BTM2020_idx,]
BTM2019 = OMG2019[BTM2019_idx,]
BTM2018 = OMG2018[BTM2018_idx,]
BTM2017 = OMG2017[BTM2017_idx,]
BTMALL = OMGALL[BTMALL_idx,]



rm(a,b)
# Statistics Code

TotalCasts = length(unique(OMGALL$CAST))

CastDistALL = as.numeric(format(OMGALL$DATE[BTMALL_idx],'%Y'))

CastHistYEAR = ggplot(BTMALL,aes(x = DATE)) +
  geom_histogram(bins = 9, color="darkblue", fill="lightblue") +
  xlab("Year") +
  ylab("Number of Casts") +
  ggtitle("Annual Distribution of Casts in Region 3","Total Number of Casts = 310") +
  theme(plot.title = element_text(hjust = 0.5))
  
CastHistYEAR

dt = BTMALL
dt$DATE = as.numeric(format(dt$DATE,'%m'))
CastHistMONTH = ggplot(dt,aes(x = DATE)) +
  geom_histogram(bins = 30, color="darkblue", fill="lightblue") +
  xlab("Month") +
  ylab("Number of Casts") +
  ggtitle("Seasonal Distribution of Casts in Region 3 (2017 - 2021)","Total Number of Casts = 310") +
  theme(plot.title = element_text(hjust = 0.5))+
  xlim(0.5,11.5)

CastHistMONTH
## Warning: Removed 2 rows containing missing values (geom_bar).

BTMALL_vec = OMGALL$D[BTMALL_idx]

DeepDistAll = ggplot(NULL) +
  geom_histogram(bins = 40, color="darkblue", fill="lightblue") +
  aes(BTMALL_vec) + 
  xlab("Deepest Depth of CTD") +
  ylab("Number of Casts") +
  ggtitle("Depth Distribution of all Casts","Total Number of Casts = 310") +
  theme(plot.title = element_text(hjust = 0.5))

DeepDistAll

# Single Plot Temperature, Salinity, and T(S). For the T(S) profile, I used the 
# code structure provided by Dr. Muenchow in his TEST document.

cc1_idx = which(OMG2021$CAST == 186.1)
cc1 = OMG2021[cc1_idx,]

ggplot(cc1,aes(x=T,y=-D)) +
  geom_path() +
  ggtitle("Temperature Profile",cc1$CAST) +
  theme(plot.title = element_text(hjust = 0.5)) +
  xlab("Temp, C") +
  ylab("Depth, m")  

ggplot(cc1,aes(x=S,y=-D)) +
  geom_path() +
  ggtitle("Salinity Profile",cc1$CAST) +
  theme(plot.title = element_text(hjust = 0.5)) +
  xlab("Temp, C") +
  ylab("Depth, m") 

ggplot(cc1,mapping=aes(x=S,y=T,color=D)) +
  geom_point() +
  ggtitle("T(S) Profile",cc1$CAST) +
  theme(plot.title = element_text(hjust = 0.5)) +
  xlab("Salinity, psu") +
  ylab("Temperature, C") +
  scale_color_viridis_c(guide=guide_colorbar(barheight=15))

# Convert cc1 values to Pressure, Absolute Salinity, and Potential Temperature.

cc1_Convert = cc1
cc1_Convert$D = gsw_p_from_z(cc1_Convert$D*-1,latitude = cc1_Convert$LAT)
cc1_Convert$S = gsw_SA_from_SP(cc1_Convert$S, cc1_Convert$D, cc1_Convert$LON, cc1_Convert$LAT)
cc1_Convert$T = gsw_pt_from_t(cc1_Convert$S,cc1_Convert$T,cc1_Convert$D,0)

newnames = c("CAST","DATE","LAT","LON","P","PT","SA")
colnames(cc1_Convert) = newnames
rm(newnames)

cc1_CT = gsw_CT_from_t(cc1_Convert$SA,cc1$T,cc1_Convert$P)
cc1_r0 = gsw_sigma0(cc1_Convert$SA,cc1_CT)
cc1_r1 = gsw_sigma1(cc1_Convert$SA,cc1_CT)

# cc1_tf = gsw_CT_freezing_poly(cc1_Convert$SA,cc1_Convert$P,1)

ggplot(NULL) +
  geom_path(data = cc1, aes(x = T, color = "Insitu Temp")) +
  geom_path(data = cc1_Convert, aes(x = PT, color = "Potential Temp")) +
  aes(y = cc1$D*-1) +
  ggtitle("In-situ Temp vs Potential Temperature",cc1$CAST) +
  theme(plot.title = element_text(hjust = 0.5)) +
  xlab("T and PT, C") +
  ylab("Depth, m") +
  labs(colour = "Unit") 
## Warning: Use of `cc1$D` is discouraged. Use `D` instead.

ggplot(NULL) +
  geom_path(data = cc1_Convert, aes(x = SA, y = cc1_r0)) +
  ggtitle("Absolute Salinity and Potential Density Profile",cc1$CAST) +
  theme(plot.title = element_text(hjust = 0.5)) +
  xlab("SA g/kg") +
  ylab("Potential Density kg/m^3, ref = 0")

# rm(cc1,cc1_idx)
# Multi-Plot +- 300m

# Uses the previously made bottom vectors with logic to filter +- 300m. Then 
# re-indexes a new data set consisting of only +- 300m values respectively. Plot
# structures are nearly identical to the single plots above, but I had to add limits to the 
# x and y values to handle the -99's present in the data. These "missing" -99's are
# what account for the "Missing Values" error in the geom_path() function. Also, 
# geom_path() allows you to group the line that is drawn and I chose to group by cast number
# this means that the end of cast (n) won't be connected to the beginning of cast (n+1).

a1 = OMG2021[BTM2021_idx,]
b1 = which(a1$D <= 300)
c1 = a1$CAST[b1]
SHALLOW2021_idx = which(OMG2021$CAST == c1)
SHALLOW2021 = OMG2021[SHALLOW2021_idx,]

ggplot(SHALLOW2021,aes(x=T,y=-D)) +
  xlim(-2,6) +
  geom_path(group = SHALLOW2021$CAST) +
  ggtitle("2021 Shallow Temperature Profiles (<300m)") +
  theme(plot.title = element_text(hjust = 0.5)) +
  xlab("Temp, C") +
  ylab("Depth, m") 
## Warning: Removed 2 row(s) containing missing values (geom_path).

ggplot(SHALLOW2021,aes(x=S,y=-D)) +
  xlim(26,35) +
  geom_path(group = SHALLOW2021$CAST) +
  ggtitle("2021 Shallow Salinity Profiles (<300m)") +
  theme(plot.title = element_text(hjust = 0.5)) +
  xlab("Salinity, PSU") +
  ylab("Depth, m") 
## Warning: Removed 2 row(s) containing missing values (geom_path).

a1 = OMG2021[BTM2021_idx,]
b2 = which(a1$D >= 300)
c2 = a1$CAST[b2]
DEEP2021_idx = which(OMG2021$CAST == c2)
## Warning in OMG2021$CAST == c2: longer object length is not a multiple of shorter
## object length
DEEP2021 = OMG2021[DEEP2021_idx,]

ggplot(DEEP2021,aes(x=T,y=-D)) +
  #xlim(-2,6) +
  geom_path(group = DEEP2021$CAST) +
  ggtitle("2021 Deep Temperature Profiles (>300m)") +
  theme(plot.title = element_text(hjust = 0.5)) +
  xlab("Temp, C") +
  ylab("Depth, m") 
## Warning: Removed 14 row(s) containing missing values (geom_path).

ggplot(DEEP2021,aes(x=S,y=-D)) +
  xlim(26,35) +
  geom_path(group = DEEP2021$CAST) +
  ggtitle("2021 Deep Salinity Profiles (>300m)") +
  theme(plot.title = element_text(hjust = 0.5)) +
  xlab("Salinity, PSU") +
  ylab("Depth, m") 
## Warning: Removed 14 row(s) containing missing values (geom_path).

ggplot(SHALLOW2021,mapping=aes(x=S,y=T,color=D)) +
  xlim(26,35) +
  ylim(-2,6) +
  geom_point() +
  ggtitle("2021 Shallow T(S) Profiles (<300m)") +
  theme(plot.title = element_text(hjust = 0.5)) +
  xlab("Salinity, psu") +
  ylab("Temperature, C") +
  scale_color_viridis_c(guide=guide_colorbar(barheight=15))
## Warning: Removed 2 rows containing missing values (geom_point).

ggplot(DEEP2021,mapping=aes(x=S,y=T,color=D)) +
  xlim(26,35) +
  ylim(-2,6) +
  geom_point() +
  ggtitle("2021 Deep T(S) Profiles (>300m)") +
  theme(plot.title = element_text(hjust = 0.5)) +
  xlab("Salinity, psu") +
  ylab("Temperature, C") +
  scale_color_viridis_c(guide=guide_colorbar(barheight=15))
## Warning: Removed 119 rows containing missing values (geom_point).

rm(a1,b1,c1,b2,c2)
#MAPS. I am using the ggOceanMaps package, which requires the ggOceanMapsData package to run.
# I recommend making Bathymetry and Glaciers = "FALSE" when experimenting as they take up
# a lot of computing resources.

# TO DO: Zoom map in on study area. Make the legend better. Make the contour colors different
# from the plots. 

study_region = data.frame(lon = c(-90, -90, -50, -50), lat = c(76, 72, 72, 76))

GREENLAND = basemap(limits = c(-70,-20,58,85),rotate = TRUE, bathymetry = TRUE ,glaciers = TRUE) +
  ggtitle("Greenland","Disko Island to Melville Bay (Region 3)") +
  theme(plot.title = element_text(hjust = 0.5)) +
  geom_spatial_polygon(data = study_region, aes(x = lon, y = lat), alpha = 0.5, fill = "red", color = "red")
## Warning in sp::proj4string(shapefiles$land): CRS object has comment, which is
## lost in output
## Using lon and lat as longitude and latitude columns, respectively.
## projection transformed from +init=epsg:4326 to +proj=stere +lat_0=90 +lat_ts=71 +lon_0=-45 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs
GREENLAND
## Assuming `crs = 4326` in stat_spatial_identity()

OMGALL_SINGULAR = OMGALL[BTMALL_idx,]

REGION3 = basemap(limits = c(-71,-51,71,77),rotate = TRUE,bathymetry = TRUE, glaciers = TRUE) +
  ggtitle("Spatial Distribution of CTD Measurements in Region 3 (2017 - 2021)") +
  theme(plot.title = element_text(hjust = 0.5)) +
  geom_spatial_point(data = OMGALL_SINGULAR, aes(x = LON, y = LAT, color = D))
## Warning in sp::proj4string(shapefiles$land): CRS object has comment, which is
## lost in output
## Using lon and lat as longitude and latitude columns, respectively.
## projection transformed from +init=epsg:4326 to +proj=stere +lat_0=90 +lat_ts=71 +lon_0=-61 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs
REGION3
## Assuming `crs = 4326` in stat_spatial_identity()