本文主要是介绍G4Hunter 计算G4 score脚本,希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!
使用G4Hunter函数计算G4 Score
输入序列格式如下:
Id Sequence
G2_TraesCS1A03G0141400.1 GGTTAGCTAGCGGCCGTAGGAATAGG
G2_TraesCS1B03G0192900.1 GGTTAGATAGCGGCCGTAGGAATAGG
G2_TraesCS1D03G0129100.2 GGTTAGCTAGCGGCCGTAGGAATAGG
G2_TraesCS1A03G0162100.2 GGCGCCGGCTCCCCCTGGGCGG
G2_TraesCS1B03G0213800.3 GGCGCCGGCTCCCCCTGGGCGG
G2_TraesCS1D03G0150100.3 GGCGCCGGCTCCCCCTGGGCGG
G2_TraesCS1D03G0613500.1 GGGAGGGAGGGAGGGGGGATTCGG
G3_TraesCS1D03G0613500.1 GGGAGGGAGGGAGGGGGG
G3B1_TraesCS1D03G0613500.1 GGGCTCCTGCCGGAGGACTCGGCGGGCGGG
G3B2_TraesCS1D03G0613500.1 GGGAGGGAGGGAGGGGGG
G3V1_TraesCS1D03G0613500.1 GGGAGGGAGGGAGGGGGG
G3V2_TraesCS1D03G0613500.1 GGGAGGGAGGGAGGGGGGATTCGG
G2_TraesCS1A03G0673200.1 GGGAGGAGGAGTCTTCCGGGTCTTGGG
所用代码如下:
################################################################################
################# Function used for the G4Hunter paper #########################
################################################################################
#### L. Lacroix, laurent.lacroix@inserm.fr , 20150928################################################################################
###### G4translate change the DNA code into G4Hunter code.
###### Only G or C are taken into account. non G/C bases are translated in 0
###### It is OK if N or U are in the sequence
###### but might be a problem if other letters or numbers are present
###### lowercase ARE not welcome
G4translate <- function(x) # x is a Rle of a sequence
{xres=xrunValue(xres)[runValue(x)=='C' & runLength(x)>3] <- -4runValue(xres)[runValue(x)=='C' & runLength(x)==3] <- -3runValue(xres)[runValue(x)=='C' & runLength(x)==2] <- -2runValue(xres)[runValue(x)=='C' & runLength(x)==1] <- -1runValue(xres)[runValue(x)=='G' & runLength(x)>3] <- 4runValue(xres)[runValue(x)=='G' & runLength(x)==3] <- 3runValue(xres)[runValue(x)=='G' & runLength(x)==2] <- 2runValue(xres)[runValue(x)=='G' & runLength(x)==1] <- 1runValue(xres)[runValue(x)!='C' & runValue(x)!='G'] <- 0Rle(as.numeric(xres))
}
################################################################################################################################################################
###### G4Hscore return the G4Hscore of a sequence
###### The output is a number
G4Hscore <- function(y) # y can be DNAString or a DNAStringSet or a simple char string.
{require(S4Vectors)y2 <- Rle(strsplit(as.character(y),NULL)[[1]])y3 <- G4translate(y2)mean(y3)
}
################################################################################################################################################################
###### G4runmean return the G4Hscore in a window of 25 using runmean.
###### The output is a Rle of the runmeans
G4runmean <- function(y,k=25) #y is a DNAString or a DNAStringSet, k is the window for the runmean
{require(S4Vectors)y2 <- Rle(strsplit(as.character(y),NULL)[[1]])y3 <- G4translate(y2)runmean(y3,k)
}
################################################################################################################################################################
#### function to extract sequences with abs(G4Hscore)>=hl (threshold)
#### from a chromosome (i) of a genome (gen)
#### return a GRanges
#### use masked=5 for unmasked genome
#### need a genome to be in the memory in the genome variable
#### k is the window size for the runmean
#### i is the chromosome number
#### hl is the thresholdG4hunt <- function(i,k=25,hl=1.5,gen=genome,masked=5)
{require(GenomicRanges)chr <- gen[[i]]if (masked==2) {chr=injectHardMask(chr)}if (masked==3) {active(masks(chr))['RM']=T;chr=injectHardMask(chr)}if (masked==0) {active(masks(chr))=F;chr=injectHardMask(chr)}if (masked==4) {active(masks(chr))=T;chr=injectHardMask(chr)}chr_G4hk <- G4runmean(chr,k)tgenome <- G4translate(Rle(strsplit(as.character(chr),NULL)[[1]]))if (class(gen)=="DNAStringSet"){seqname <- names(gen)[i]}else{seqname <- seqnames(gen)[i]} j <- hlchrCh <- Views(chr_G4hk, chr_G4hk<=(-j))chrGh <- Views(chr_G4hk, chr_G4hk>=j)IRC <- IRanges(start=start(chrCh),end=(end(chrCh)+k-1))IRG <- IRanges(start=start(chrGh),end=(end(chrGh)+k-1))nIRC <- reduce(IRC)nIRG <- reduce(IRG)# overlapping results on the same strand are fusednchrCh <- Views(chr_G4hk,start=start(nIRC),end=(end(nIRC)-k+1))nchrGh <- Views
这篇关于G4Hunter 计算G4 score脚本的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!