[glu-genetics] 5 new revisions pushed by bioinformed@gmail.com on 2012-08-30 20:51 GMT

2 views
Skip to first unread message

glu-ge...@googlecode.com

unread,
Aug 30, 2012, 4:51:24 PM8/30/12
to glu...@googlegroups.com
5 new revisions:

Revision: 5c81e0ebe0e0
Author: Kevin B. Jacobs <jac...@bioinformed.com>
Date: Thu Aug 16 08:32:38 2012
Log: Fix doc strings. These are gapped alignments, not ungapped.
http://code.google.com/p/glu-genetics/source/detail?r=5c81e0ebe0e0

Revision: dd16e94e7cf1
Author: Kevin B. Jacobs <jac...@bioinformed.com>
Date: Thu Aug 30 13:38:22 2012
Log: Update supported strand conventions
http://code.google.com/p/glu-genetics/source/detail?r=dd16e94e7cf1

Revision: 3076ad9e2d53
Author: Kevin B. Jacobs <jac...@bioinformed.com>
Date: Thu Aug 30 13:40:07 2012
Log: Add BAF-deviation to cnv.stats output
http://code.google.com/p/glu-genetics/source/detail?r=3076ad9e2d53

Revision: 07889938c14c
Author: Kevin B. Jacobs <jac...@bioinformed.com>
Date: Thu Aug 30 13:41:06 2012
Log: estimate_ld is not smart enough to compute from diplotypes
http://code.google.com/p/glu-genetics/source/detail?r=07889938c14c

Revision: 496d0f50ef1f
Author: Kevin B. Jacobs <jac...@bioinformed.com>
Date: Thu Aug 30 13:42:05 2012
Log: Add support for plotting zoomed event end-points and regions
http://code.google.com/p/glu-genetics/source/detail?r=496d0f50ef1f

==============================================================================
Revision: 5c81e0ebe0e0
Author: Kevin B. Jacobs <jac...@bioinformed.com>
Date: Thu Aug 16 08:32:38 2012
Log: Fix doc strings. These are gapped alignments, not ungapped.
http://code.google.com/p/glu-genetics/source/detail?r=5c81e0ebe0e0

Modified:
/glu/lib/seqlib/_edits.pyx
/glu/lib/seqlib/edits.py

=======================================
--- /glu/lib/seqlib/_edits.pyx Tue Jun 7 06:11:30 2011
+++ /glu/lib/seqlib/_edits.pyx Thu Aug 16 08:32:38 2012
@@ -524,7 +524,7 @@
def smith_waterman(s1, s2, match_score=10, mismatch_score=-9,
gap_score=-12,

anchor_left=False,anchor_right=False,local_align=None):
'''
- Align s1 to s2 using the Smith-Waterman algorithm for local ungapped
+ Align s1 to s2 using the Smith-Waterman algorithm for local gapped
alignment. An alignment score and sequence of alignment operations are
returned.

The operations to align s1 to s2 are returned as a sequence, represented
@@ -776,7 +776,7 @@
gap_open_score=-15, gap_extend_score=-6,

anchor_left=False,anchor_right=False,local_align=None):
'''
- Align s1 to s2 using the Smith-Waterman algorithm for local ungapped
+ Align s1 to s2 using the Smith-Waterman algorithm for local gapped
alignment. An alignment score and sequence of alignment operations are
returned.

The operations to align s1 to s2 are returned as a sequence, represented
@@ -1041,7 +1041,7 @@
def smith_waterman_gotoh2_align(s1, s2, match_score=10, mismatch_score=-9,
gap_open_score=-15,
gap_extend_score=-6):
'''
- Align s1 to s2 using the Smith-Waterman algorithm for local ungapped
+ Align s1 to s2 using the Smith-Waterman algorithm for local gapped
alignment. An alignment score and sequence of alignment operations are
returned.

The operations to align s1 to s2 are returned as a sequence, represented
@@ -1250,7 +1250,7 @@
def smith_waterman_gotoh2_score(s1, s2, match_score=10, mismatch_score=-9,
gap_open_score=-15,
gap_extend_score=-6):
'''
- Align s1 to s2 using the Smith-Waterman algorithm for local ungapped
+ Align s1 to s2 using the Smith-Waterman algorithm for local gapped
alignment. An alignment score and sequence of alignment operations are
returned.

The operations to align s1 to s2 are returned as a sequence, represented
=======================================
--- /glu/lib/seqlib/edits.py Tue Jun 7 06:17:44 2011
+++ /glu/lib/seqlib/edits.py Thu Aug 16 08:32:38 2012
@@ -898,7 +898,7 @@
def smith_waterman(s1, s2, match_score=10, mismatch_score=-9,
gap_score=-12,

anchor_left=False,anchor_right=False,local_align=None):
'''
- Align s1 to s2 using the Smith-Waterman algorithm for local ungapped
+ Align s1 to s2 using the Smith-Waterman algorithm for local gapped
alignment. An alignment score and sequence of alignment operations are
returned.

The operations to align s1 to s2 are returned as a sequence, represented
@@ -1125,7 +1125,7 @@
s2_homopolymer_penalty=None,

anchor_left=False,anchor_right=False,local_align=None):
'''
- Align s1 to s2 using the Smith-Waterman algorithm for local ungapped
+ Align s1 to s2 using the Smith-Waterman algorithm for local gapped
alignment. An alignment score and sequence of alignment operations are
returned.

The operations to align s1 to s2 are returned as a sequence, represented

==============================================================================
Revision: dd16e94e7cf1
Author: Kevin B. Jacobs <jac...@bioinformed.com>
Date: Thu Aug 30 13:38:22 2012
Log: Update supported strand conventions
http://code.google.com/p/glu-genetics/source/detail?r=dd16e94e7cf1

Modified:
/glu/modules/convert/from_gfr.py

=======================================
--- /glu/modules/convert/from_gfr.py Wed Aug 15 08:14:16 2012
+++ /glu/modules/convert/from_gfr.py Thu Aug 30 13:38:22 2012
@@ -29,9 +29,8 @@
# HEADER strand
GFR_GENOTYPE_FORMATS = [ ( 'AB', 'ab'),
('Forward','forward'),
- ('Reverse','reverse'),
( 'Top', 'top'),
- ('Bottom', 'bottom') ]
+ ('Design', 'design') ]


GENO_TYPE = 'S2'

==============================================================================
Revision: 3076ad9e2d53
Author: Kevin B. Jacobs <jac...@bioinformed.com>
Date: Thu Aug 30 13:40:07 2012
Log: Add BAF-deviation to cnv.stats output
http://code.google.com/p/glu-genetics/source/detail?r=3076ad9e2d53

Modified:
/glu/modules/cnv/gdat.py
/glu/modules/cnv/stats.py

=======================================
--- /glu/modules/cnv/gdat.py Sun Aug 5 11:27:40 2012
+++ /glu/modules/cnv/gdat.py Thu Aug 30 13:40:07 2012
@@ -538,3 +538,21 @@
lrr_adj -= alpha

return lrr_adj
+
+
+def baf_deviation(baf,genos):
+ bdev = np.empty_like(baf)
+ bdev.fill(np.nan)
+
+ hom_mask = (genos=='AA')|(genos=='BB')
+ het_mask = genos=='AB'
+ mis_mask = genos==' '
+
+ hom_dev = np.minimum(baf,1-baf)
+ het_dev = np.abs(baf-0.5)
+
+ bdev[hom_mask] = hom_dev[hom_mask]
+ bdev[het_mask] = het_dev[het_mask]
+ bdev[mis_mask] = np.minimum(hom_dev[mis_mask], het_dev[mis_mask])
+
+ return bdev
=======================================
--- /glu/modules/cnv/stats.py Mon Aug 13 11:26:42 2012
+++ /glu/modules/cnv/stats.py Thu Aug 30 13:40:07 2012
@@ -20,7 +20,8 @@

from glu.lib.progressbar import progress_loop
from glu.lib.fileutils import table_writer
-from glu.modules.cnv.gdat import GDATFile, BatchTableWriter,
parallel_gdat_iter, create_gdat_qn
+from glu.modules.cnv.gdat import GDATFile, BatchTableWriter,
parallel_gdat_iter, \
+ create_gdat_qn, baf_deviation


def skewtest(s):
@@ -121,7 +122,6 @@
regions = parse_chromosome_regions(regions)

for chrom,start,stop in regions:
- print '!!!',chrom,start,stop
pos,indices = chrom_index[chrom]
region_mask = (pos>=start)&(pos<stop)
region_indices = indices[region_mask]
@@ -138,13 +138,17 @@
out = table_writer(options.output,hyphen=sys.stdout)
out.writerow(['ASSAY_ID','GDAT',
'COUNT_ORIG','LRR_ORIG_MEAN','LRR_ORIG_SD','BAF_ORIG_SKEW',
+ 'BDEV_ORIG_MEAN', 'BDEV_ORIG_SD',
'BAF_ORIG_AA_MEAN','BAF_ORIG_AA_SD',
'BAF_ORIG_AB_MEAN','BAF_ORIG_AB_SD',
- 'BAF_ORIG_BB_MEAN','BAF_ORIG_BB_SD','MISSING_RATE_ORIG',
+ 'BAF_ORIG_BB_MEAN','BAF_ORIG_BB_SD',
+ 'MISSING_RATE_ORIG',
'COUNT_NORM','LRR_NORM_MEAN','LRR_NORM_SD','BAF_NORM_SKEW',
+ 'BDEV_NORM_MEAN', 'BDEV_NORM_SD',
'BAF_NORM_AA_MEAN','BAF_NORM_AA_SD',
'BAF_NORM_AB_MEAN','BAF_NORM_AB_SD',
- 'BAF_NORM_BB_MEAN','BAF_NORM_BB_SD','MISSING_RATE_NORM'])
+ 'BAF_NORM_BB_MEAN','BAF_NORM_BB_SD',
+ 'MISSING_RATE_NORM'])

for i,(lrr_orig,lrr_norm,baf_orig,baf_norm,genos) in enumerate(data):
if not options.progress and options.output!='-':
@@ -163,16 +167,21 @@
BB = genos=='BB'
NN = genos==' '

+ bdev_orig = baf_deviation(baf_orig,genos)
+ bdev_norm = baf_deviation(baf_norm,genos)
+
valid = np.isfinite(baf_orig)&np.isfinite(lrr_orig)
count_orig = valid.sum()
baf_orig_skew = skewtest(baf_orig[valid&AB])
- lrr_orig_u = lrr_orig[valid ].mean()
+ lrr_orig_mu = lrr_orig[valid ].mean()
lrr_orig_sd = lrr_orig[valid ].std(ddof=1)
- baf_orig_aa_u = baf_orig[valid&AA].mean()
+ bdev_orig_mu = bdev_orig[valid ].mean()
+ bdev_orig_sd = bdev_orig[valid ].std(ddof=1)
+ baf_orig_aa_mu = baf_orig[valid&AA].mean()
baf_orig_aa_sd = baf_orig[valid&AA].std(ddof=1)
- baf_orig_ab_u = baf_orig[valid&AB].mean()
+ baf_orig_ab_mu = baf_orig[valid&AB].mean()
baf_orig_ab_sd = baf_orig[valid&AB].std(ddof=1)
- baf_orig_bb_u = baf_orig[valid&BB].mean()
+ baf_orig_bb_mu = baf_orig[valid&BB].mean()
baf_orig_bb_sd = baf_orig[valid&BB].std(ddof=1)
missing_orig = 1-valid.sum()/valid_snps
#missing_orig = (valid&NN).sum()/valid_snps
@@ -180,26 +189,30 @@
valid = np.isfinite(baf_norm)&np.isfinite(lrr_norm)
count_norm = valid.sum()
baf_norm_skew = skewtest(baf_norm[valid&AB])
- lrr_norm_u = lrr_norm[valid ].mean()
+ lrr_norm_mu = lrr_norm[valid ].mean()
lrr_norm_sd = lrr_norm[valid ].std(ddof=1)
- baf_norm_aa_u = baf_norm[valid&AA].mean()
+ bdev_norm_mu = bdev_norm[valid ].mean()
+ bdev_norm_sd = bdev_norm[valid ].std(ddof=1)
+ baf_norm_aa_mu = baf_norm[valid&AA].mean()
baf_norm_aa_sd = baf_norm[valid&AA].std(ddof=1)
- baf_norm_ab_u = baf_norm[valid&AB].mean()
+ baf_norm_ab_mu = baf_norm[valid&AB].mean()
baf_norm_ab_sd = baf_norm[valid&AB].std(ddof=1)
- baf_norm_bb_u = baf_norm[valid&BB].mean()
+ baf_norm_bb_mu = baf_norm[valid&BB].mean()
baf_norm_bb_sd = baf_norm[valid&BB].std(ddof=1)
missing_norm = 1-valid.sum()/valid_snps
#missing_norm = (valid&NN).sum()/valid_snps

out.writerow([assay,name,
- count_orig,lrr_orig_u,lrr_orig_sd,baf_orig_skew,
- baf_orig_aa_u,baf_orig_aa_sd,
- baf_orig_ab_u,baf_orig_ab_sd,
- baf_orig_bb_u,baf_orig_bb_sd,missing_orig,
- count_norm,lrr_norm_u,lrr_norm_sd,baf_norm_skew,
- baf_norm_aa_u,baf_norm_aa_sd,
- baf_norm_ab_u,baf_norm_ab_sd,
- baf_norm_bb_u,baf_norm_bb_sd,missing_norm,
+ count_orig,lrr_orig_mu,lrr_orig_sd,baf_orig_skew,
+ bdev_orig_mu,bdev_orig_sd,
+ baf_orig_aa_mu,baf_orig_aa_sd,
+ baf_orig_ab_mu,baf_orig_ab_sd,
+ baf_orig_bb_mu,baf_orig_bb_sd,missing_orig,
+ count_norm,lrr_norm_mu,lrr_norm_sd,baf_norm_skew,
+ bdev_norm_mu,bdev_norm_sd,
+ baf_norm_aa_mu,baf_norm_aa_sd,
+ baf_norm_ab_mu,baf_norm_ab_sd,
+ baf_norm_bb_mu,baf_norm_bb_sd,missing_norm,
])

gdat.close()

==============================================================================
Revision: 07889938c14c
Author: Kevin B. Jacobs <jac...@bioinformed.com>
Date: Thu Aug 30 13:41:06 2012
Log: estimate_ld is not smart enough to compute from diplotypes
http://code.google.com/p/glu-genetics/source/detail?r=07889938c14c

Modified:
/glu/modules/qc/correlation.py

=======================================
--- /glu/modules/qc/correlation.py Mon Aug 8 05:03:50 2011
+++ /glu/modules/qc/correlation.py Thu Aug 30 13:41:06 2012
@@ -290,7 +290,8 @@

dips = count_diplotypes(locus1, locus2)

- r2_em,dp = estimate_ld(dips)
+ #r2_em,dp = estimate_ld(dips)
+ r2_em,dp = estimate_ld(locus1,locus2)
#r2_trend= trend_r2(locus1,locus2)
r2_trend = trend_r2_dips(dips)

@@ -303,7 +304,7 @@

concord = concord/comps if comps else 1

-
out.writerow([lname1,len(locus1),missing1,maf1,missing2,maf2,concord,comps,r2_em,
r2_trend])
+
out.writerow([lname1,len(locus1),missing1,maf1,missing2,maf2,concord,comps,r2_em,r2_trend])


def correlation_dosage(options,filename1,filename2):

==============================================================================
Revision: 496d0f50ef1f
Author: Kevin B. Jacobs <jac...@bioinformed.com>
Date: Thu Aug 30 13:42:05 2012
Log: Add support for plotting zoomed event end-points and regions
http://code.google.com/p/glu-genetics/source/detail?r=496d0f50ef1f

Modified:
/glu/modules/cnv/plot_events.py

=======================================
--- /glu/modules/cnv/plot_events.py Mon Aug 13 11:26:42 2012
+++ /glu/modules/cnv/plot_events.py Thu Aug 30 13:42:05 2012
@@ -18,14 +18,11 @@
from itertools import groupby
from operator import attrgetter

-from collections import namedtuple
-
from glu.lib.fileutils import table_reader, table_writer,
cook_table, table_options
from glu.lib.recordtype import recordtype

from glu.modules.cnv.gdat import GDATIndex, get_gcmodel, gc_correct
from glu.modules.cnv.plot import plot_chromosome
-from glu.modules.cnv.normalize import quantile

from glu.lib.genolib.transform import GenoTransform

@@ -33,6 +30,25 @@
Event = recordtype('Event', 'chrom start stop fields lrr baf baf_model
state pmosaic')


+def load_events(options):
+ add_fields =
['Probes','STATE','PMosaic','LRR_mean','LRR_sd','BAF_BANDS','BAF_means','BAF_sds']
+
+ events = table_reader(options.events)
+ events = cook_table(events,options)
+
+ header = next(events)
+ extra = [ h for h in add_fields if h not in header ]
+ header += extra
+ blanks = ['']*len(extra)
+
+ Event = recordtype('Event', header)._make
+ events = [ Event(e+blanks) for e in events ]
+
+ events.sort(key=attrgetter(options.assayid,options.chromid))
+
+ return header,events
+
+
def fit_gmm1(components,x):
from scikits.learn.mixture import GMM, DPGMM, VBGMM

@@ -469,6 +485,17 @@
return chrmask


+def mask_event(start,stop,pos,chrom_genos,chrom_baf,chrom_lrr):
+ chrom_mask = pos>=start
+ chrom_mask &= pos< stop
+ pos = pos[chrom_mask]
+ chrom_genos = chrom_genos[chrom_mask]
+ chrom_baf = chrom_baf[chrom_mask]
+ chrom_lrr = chrom_lrr[chrom_mask]
+
+ return pos,chrom_genos,chrom_baf,chrom_lrr
+
+
def option_parser():
from glu.lib.glu_argparse import GLUArgumentParser

@@ -499,6 +526,8 @@
help='List of samples to exclude,
only samples not present will be kept')
parser.add_argument('--chrmask', action='append',
help='Show only chrom:start-stop
region for events on chrom. May be specified multiple times.')
+ parser.add_argument('--zoomends', metavar='BP', type=float,
default=None,
+ help='Zoom in on event start and end
regions with +- BP window.')

table_options(parser)

@@ -515,25 +544,13 @@
gcmodels = {}
chip_indices = {}

- add_fields =
['Probes','STATE','PMosaic','LRR_mean','LRR_sd','BAF_BANDS','BAF_means','BAF_sds']
+ header,events = load_events(options)

- events = table_reader(options.events)
- events = cook_table(events,options)
-
- header = next(events)
- extra = [ h for h in add_fields if h not in header ]
- header += extra
- blanks = ['']*len(extra)
-
- Event = recordtype('Event', header)._make
- events = [ Event(e+blanks) for e in events ]
-
- events.sort(key=attrgetter(options.assayid,options.chromid))
-
- out = table_writer(options.outevents) if options.outevents else
None
-
- if out:
+ if options.outevents:
+ out = table_writer(options.outevents)
out.writerow(header)
+ else:
+ out = None

transform = GenoTransform.from_object(options)

@@ -633,8 +650,8 @@

for chrom,chrom_events in groupby(eventrecs,key=attrgetter('chrom')):
chrom = norm_chromosome(chrom)
- chrom_events = list(chrom_events)
pos,index = chrom_indices[chrom]
+ chrom_events = list(chrom_events)

#chrom_valid = valid_mask[index]
#chrom_normal = normal_mask[index]
@@ -652,15 +669,43 @@

if chrom in chrmask:
start,stop = chrmask[chrom]
- chrom_mask = pos>=start
- chrom_mask &= pos< stop
- pos = pos[chrom_mask]
- chrom_genos = chrom_genos[chrom_mask]
- chrom_baf = chrom_baf[chrom_mask]
- chrom_lrr = chrom_lrr[chrom_mask]
+ pos,chrom_genos,chrom_baf,chrom_lrr =
mask_event(start,stop,pos,chrom_genos,
+
chrom_baf,chrom_lrr)

- plot_chromosome(plotname, pos, chrom_lrr, chrom_baf,
genos=chrom_genos,
- title=title, events=chrom_events)
+ if not options.zoomends:
+ plot_chromosome(plotname, pos, chrom_lrr, chrom_baf,
genos=chrom_genos,
+ title=title, events=chrom_events)
+
+ else:
+ for i,event in enumerate(chrom_events):
+ if event.stop-event.start<2*options.zoomends:
+ start,stop =
event.start-options.zoomends,event.stop+options.zoomends
+ end_pos,end_genos,end_baf,end_lrr =
mask_event(start,stop,pos,chrom_genos,
+
chrom_baf,chrom_lrr)
+
+ end_plotname =
plotname.replace('.png','_event%d_region.png' % (i+1))
+ end_title = title + ' EVENT %d REGION' % (i+1)
+ plot_chromosome(end_plotname, end_pos, end_lrr, end_baf,
genos=end_genos,
+ title=end_title, events=[event])
+
+ else:
+ start,stop =
event.start-options.zoomends,event.start+options.zoomends
+ end_pos,end_genos,end_baf,end_lrr =
mask_event(start,stop,pos,chrom_genos,
+
chrom_baf,chrom_lrr)
+
+ end_plotname =
plotname.replace('.png','_event%d_begin.png' % (i+1))
+ end_title = title + ' EVENT %d START' % (i+1)
+ plot_chromosome(end_plotname, end_pos, end_lrr, end_baf,
genos=end_genos,
+ title=end_title, events=[event])
+
+ start,stop =
event.stop-options.zoomends,event.stop+options.zoomends
+ end_pos,end_genos,end_baf,end_lrr =
mask_event(start,stop,pos,chrom_genos,
+
chrom_baf,chrom_lrr)
+
+ end_plotname = plotname.replace('.png','_event%d_end.png' %
(i+1))
+ end_title = title + ' EVENT %d END' % (i+1)
+ plot_chromosome(end_plotname, end_pos, end_lrr, end_baf,
genos=end_genos,
+ title=end_title, events=[event])


if __name__=='__main__':
Reply all
Reply to author
Forward
0 new messages