Commit 979b28db authored by Digp's avatar Digp
Browse files

Updated Figure names and directories according with the

resubmission of the paper.
parent 5ab28e09
Pipeline #3090 skipped
## "cache" saves trinucleotide backbones in RAM for latter RMSD calculations
cache <-
function(ntID, name, ntinfo) {
## Trim trinucleotide backbone
pdb <- trimByID(ntID=ntID, ntinfo=ntinfo, prev=1, post=1,
alt=c("uniq"), cutoff=0, justbackbone=TRUE,
verbose=FALSE)
## Save as an independent pdb file in RAM
assign(name, pdb, envir=.GlobalEnv)
}
## "pwrmsd" calculates pair wise RMSD between nucleotides presaved in RAM
pwrmsd <-
function(ntIDs) {
pdb1 <- get(paste("nt", ntIDs[1], sep=""))
pdb2 <- get(paste("nt", ntIDs[2], sep=""))
sel1 <- atom.select(pdb1)
sel2 <- atom.select(pdb2)
fit=fit.xyz(pdb1$xyz, pdb2$xyz,
fixed.inds=sel1$xyz, mobile.inds=sel2$xyz)
return(rmsd(pdb1$xyz[,sel1$xyz],fit[,sel2$xyz]))
}
## "filter_pyle" reproduces the iterative process of RMSD comparison to chose
## an helical nucleotide of reference
filter_pyle <-
function(ntinfo, helicalntID, RMSD, bandwidths=c(40,40), cutoff=0.85) {
twideMatrix <- t(acast(RMSD, nt1~nt2, value.var="rmsd"))
twideMatrix<-rbind(rep(NA,length(helicalntID)-1),twideMatrix)
twideMatrix<-cbind(twideMatrix,rep(NA,length(helicalntID)))
rownames(twideMatrix)[1]<-helicalntID[1]
colnames(twideMatrix)[length(helicalntID)]<-helicalntID[length(helicalntID)]
Matrix <- twideMatrix
while (length(helicalntID) > 1) {
Matrix <- Matrix[which(rownames(Matrix) %in% helicalntID),
which(colnames(Matrix) %in% helicalntID)]
column_sums <- colSums(Matrix, na.rm=TRUE)
row_sums <- rowSums(Matrix, na.rm=TRUE)
RMSDaverages <- (column_sums + row_sums)/(length(row_sums) - 1)
index <- which.min(RMSDaverages)
lowestaverage <- RMSDaverages[index]
reference_nt <- helicalntID[index]
A <- Matrix[which(rownames(Matrix) == reference_nt), ]
names(A) <- colnames(Matrix)
A <- A[complete.cases(A)]
B <- Matrix[, which(colnames(Matrix) == reference_nt)]
names(B) <- colnames(Matrix)
B <- B[complete.cases(B)]
reference_nt_RMSD <- append(A,B); rm(list=c("A","B"))
helicalntID <- sort(append(reference_nt, as.numeric(
names(reference_nt_RMSD)[which(
reference_nt_RMSD < lowestaverage)])))
}
return(helicalntID)
}
#!/usr/bin/Rscript
## EXAMPLE A
## Description: From Leontis non-redundant RNA list, get eta-theta plot
## following Pyle's method.
## Author: Diego Gallego
## ----------------------------------------------------------------------------
## Preprocess
## ----------------------------------------------------------------------------
## Load necessary packages for the example
library(veriNA3d)
library(bio3d)
library(reshape2) ## Specific dependency for this example!
## Load specific functions for the example
source("dependencies.R")
## ----------------------------------------------------------------------------
## Get Raw data
cat("Getting data\n")
## ----------------------------------------------------------------------------
## Get latest Leontist non-redundant list of RNA structures
infochains <- getLeontisList(threshold="2.5A", as.df=TRUE)
## Get structural data
ntinfo <- pipeNucData(pdbID=infochains$pdb, model=infochains$model,
chain=infochains$chain, cores=1)
## ----------------------------------------------------------------------------
## Clean data. Raw -> Tidy
cat("Cleaning data\n")
## ----------------------------------------------------------------------------
## Subset north nucleotides
north <- ntinfo[cleanByPucker(ntinfo, surenorth=TRUE), ]
## Find non-broken canonical nucleotides
id <- which(north$puc_valid==TRUE & north$Break==FALSE & north$big_b == FALSE &
north$eta_valid == TRUE & north$theta_valid == TRUE &
grepl("^[GAUC]-[GAUC]-[GAUC]$", north$localenv, perl=TRUE))
usefulnt <- north$ntID[id]
## Filter helical nucleotides before ploting =================================
## Create all the necessary trinucleotides for pair-wise RMSD calculations
invisible(mapply(FUN=cache, ntID=usefulnt, name=paste("nt", usefulnt, sep=""),
MoreArgs=list(ntinfo=ntinfo)))
## Find helical nucleotides
HDR <- findHDR(ntID=usefulnt, ntinfo=ntinfo, x="eta", y="theta", SD_DENS=15)
## Compute pair-wise RMSD between helical nucleotides
pairwise <- t(combn(HDR[[1]], 2))
RMSD_calc <- apply(FUN=pwrmsd, MARGIN=1, X=pairwise)
RMSD <- data.frame(nt1=pairwise[, 1], nt2=pairwise[, 2], rmsd=RMSD_calc)
## Apply Pyle method
ref <- filter_pyle(ntinfo=ntinfo, helicalntID=HDR[[1]], RMSD=RMSD)
pairs <- data.frame(usefulnt=usefulnt, reference=rep(ref, length(usefulnt)))
RMSD2 <- apply(FUN=pwrmsd, MARGIN=1, X=pairs)
## ===========================================================================
## ----------------------------------------------------------------------------
## Exploratory analysis. Plot
cat("Plotting data\n")
## ----------------------------------------------------------------------------
tiff("exampleA.tiff", height=10.5, width=16.5, units="cm", res=300)
plot2D(ntinfo=ntinfo, x="eta", y="theta", etatheta=TRUE,
ntID=pairs[which(RMSD2 > 0.85), 1])
dev.off()
This diff is collapsed.
real 125m49.951s
user 113m31.407s
sys 0m21.852s
#!/usr/bin/Rscript
## EXAMPLE B
## Description: From the whole PDB, create a dataset of RNA structures with Mg
## and a threshold resolution of 2 Angstroms. Then, measure and
## plot the distances between Mg and other atom types.
## Author: Diego Gallego
## ----------------------------------------------------------------------------
## Preprocess
## ----------------------------------------------------------------------------
## Load necessary packages for the example
library(veriNA3d)
library(ggplot2) ## Specific dependency for this example!
library(reshape2) ## Specific dependency for this example!
## ----------------------------------------------------------------------------
## Get Raw data
cat("Getting data\n")
## ----------------------------------------------------------------------------
## From the whole PDB, prepare dataset of interest ============================
## Retrieve ALL entries in the PDB (including latest entries)
pdblist <- queryEntryList(justIDs=FALSE)
## Keep only Nucleic Acid structures solved by diffraction techniques
nalist <- pdblist[which(grepl("nuc", pdblist$type, perl=TRUE) &
pdblist$technique == "diffraction"), "pdbID"]
## Query which structures contain MG
mg <- toupper(unlist(queryAPI(ID="MG", API="ebi", verbose=TRUE,
string1="pdb/compound/in_pdb/", string2="")))
mgnalist <- nalist[which(nalist %in% mg)]
## Query resolution and keep only structures under 2 A
resol <- applyToPDB(mgnalist, queryResol)
mgnalist <- resol[resol[, 2] < 2, 1]
## Check entities in the structures and keep only PDBs with RNA
naent <- applyToPDB(mgnalist, countEntities, as.df=FALSE)
naent <- as.data.frame(do.call(rbind, naent), stringsAsFactors=FALSE)
mgrnalist <- mgnalist[which(naent$RNA > 0)]
## ============================================================================
## Loop over list of PDB IDs
distances <- do.call(rbind, lapply(seq_along(mgrnalist), function(i) {
## Download mmCIF and get entity of Mg
cif <- cifParser(mgrnalist[i])
MG <- cifEntity(cif)[grepl("MAGNE", cifEntity(cif)$pdbx_description), "id"]
## Measure distances between Mg and other entities
return(measureEntityDist(cif, refent=MG, entities="all", cutoff=4, n=NULL))
}))
## ----------------------------------------------------------------------------
## Clean data. Raw -> Tidy
cat("Cleaning data\n")
## ----------------------------------------------------------------------------
Ow <- distances$distance[distances$resid_B == "HOH" & distances$elety_B == "O"]
distances <- distances[distances$resid_B %in% c("A", "C", "G", "U"), ]
Ob <- distances$distance[distances$elety_B %in% c("O2", "O4", "O6")]
Or <- distances$distance[distances$elety_B %in% c("O2'", "O3'", "O4'", "O5'")]
Op <- distances$distance[distances$elety_B %in% c("OP1", "OP2", "OP3")]
Nb <- distances$distance[distances$elety_B %in% c("N1", "N2", "N3", "N7")]
## Data frame with distances per type of aotm
dist <- list(Nb=Nb, Ob=Ob, Or=Or, Op=Op, Ow=Ow)
df <- melt(dist, value.name="Distance"); names(df)[2] <- "atom"
df$atom <- factor(df$atom, levels=c("Ow", "Op", "Or", "Ob", "Nb"))
## ----------------------------------------------------------------------------
## Exploratory analysis. Plot
cat("Plotting data\n")
## ----------------------------------------------------------------------------
tiff("exampleB.tiff", height=10.5, width=16.5, units="cm", res=300)
colors <- c("#87CEEB", "#6A5ACD", "#F08080", "#DC143C", "#4169E1")
ggplot(df, aes(x=Distance, fill=atom)) + ylab("Frequency") +
scale_fill_manual(values=colors) + scale_y_continuous(breaks=NULL) +
scale_x_continuous(breaks=seq(1.2, 4, 0.4)) +
geom_histogram(binwidth=0.03, color="white") +
theme(panel.grid.minor=element_blank())
dev.off()
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment