[glu-genetics] 9 new revisions pushed by bioinformed@gmail.com on 2013-06-30 14:37 GMT

2 views
Skip to first unread message

glu-ge...@googlecode.com

unread,
Jun 30, 2013, 10:38:02 AM6/30/13
to glu...@googlegroups.com
9 new revisions:

Revision: 05a8fc5e2c7f
Branch: default
Author: Kevin Jacobs <kevin....@invitae.com>
Date: Sun Jun 30 06:22:01 2013
Log: Fix str->bytes to make recent Cython version happy
http://code.google.com/p/glu-genetics/source/detail?r=05a8fc5e2c7f

Revision: c8146cde4be4
Branch: default
Author: Kevin Jacobs <kevin....@invitae.com>
Date: Sun Jun 30 06:23:11 2013
Log: struct.admix: Perform 10 E-M iterations before bounded
optimization
http://code.google.com/p/glu-genetics/source/detail?r=c8146cde4be4

Revision: 7cff1a5acb07
Branch: default
Author: Kevin Jacobs <kevin....@invitae.com>
Date: Sun Jun 30 06:24:19 2013
Log: Output [start,end) coordinates
http://code.google.com/p/glu-genetics/source/detail?r=7cff1a5acb07

Revision: ef043d746850
Branch: default
Author: Kevin Jacobs <kevin....@invitae.com>
Date: Sun Jun 30 06:24:52 2013
Log: Add a locus quality filter
http://code.google.com/p/glu-genetics/source/detail?r=ef043d746850

Revision: 420f1d981f8a
Branch: default
Author: Kevin Jacobs <kevin....@invitae.com>
Date: Sun Jun 30 06:25:12 2013
Log: Convert locus quality to float or None
http://code.google.com/p/glu-genetics/source/detail?r=420f1d981f8a

Revision: fb932986fda1
Branch: default
Author: Kevin Jacobs <kevin....@invitae.com>
Date: Sun Jun 30 06:25:34 2013
Log: Remove spurious ESP name annotation
http://code.google.com/p/glu-genetics/source/detail?r=fb932986fda1

Revision: f235aebb7e9a
Branch: default
Author: Kevin Jacobs <kevin....@invitae.com>
Date: Sun Jun 30 06:26:19 2013
Log: Add short chromosome names to cytoband map
http://code.google.com/p/glu-genetics/source/detail?r=f235aebb7e9a

Revision: ec32b9042eb4
Branch: default
Author: Kevin Jacobs <kevin....@invitae.com>
Date: Sun Jun 30 06:45:24 2013
Log: Disable tabix cache, since that feature isn't in the mainline
release
http://code.google.com/p/glu-genetics/source/detail?r=ec32b9042eb4

Revision: ce990b033708
Branch: default
Author: Kevin Jacobs <kevin....@invitae.com>
Date: Sun Jun 30 06:46:21 2013
Log: Add BAM MAPQ filter
http://code.google.com/p/glu-genetics/source/detail?r=ce990b033708

==============================================================================
Revision: 05a8fc5e2c7f
Branch: default
Author: Kevin Jacobs <kevin....@invitae.com>
Date: Sun Jun 30 06:22:01 2013
Log: Fix str->bytes to make recent Cython version happy
http://code.google.com/p/glu-genetics/source/detail?r=05a8fc5e2c7f

Modified:
/glu/lib/seqlib/_cigar.pyx

=======================================
--- /glu/lib/seqlib/_cigar.pyx Tue Feb 8 08:17:28 2011
+++ /glu/lib/seqlib/_cigar.pyx Sun Jun 30 06:22:01 2013
@@ -82,7 +82,7 @@
result[pos] = 0

# Convert result to a Python string, free memory, and return
- cdef str retval = result
+ cdef bytes retval = result
free(result)

return retval
@@ -154,7 +154,7 @@
ndiff[end] = 0

# Copy result ndiff to a Python string and free buffer
- cdef str md = ndiff
+ cdef bytes md = ndiff
free(ndiff)

# Return count of mismatches and ndiff string (md)

==============================================================================
Revision: c8146cde4be4
Branch: default
Author: Kevin Jacobs <kevin....@invitae.com>
Date: Sun Jun 30 06:23:11 2013
Log: struct.admix: Perform 10 E-M iterations before bounded
optimization
http://code.google.com/p/glu-genetics/source/detail?r=c8146cde4be4

Modified:
/glu/modules/struct/admix.py

=======================================
--- /glu/modules/struct/admix.py Wed Jan 25 17:53:47 2012
+++ /glu/modules/struct/admix.py Sun Jun 30 06:23:11 2013
@@ -619,7 +619,7 @@
f = individual_frequencies(pops,genotype_indices(genos))

# Find feasible starting values
- x0 = estimate_admixture_em(f,iters=0)
+ x0 = estimate_admixture_em(f,iters=10)

# Estimate admixture
x,l,it = estimate_admixture_sqp(f, x0)

==============================================================================
Revision: 7cff1a5acb07
Branch: default
Author: Kevin Jacobs <kevin....@invitae.com>
Date: Sun Jun 30 06:24:19 2013
Log: Output [start,end) coordinates
http://code.google.com/p/glu-genetics/source/detail?r=7cff1a5acb07

Modified:
/glu/modules/seq/vcf2table.py

=======================================
--- /glu/modules/seq/vcf2table.py Fri Jul 20 09:48:49 2012
+++ /glu/modules/seq/vcf2table.py Sun Jun 30 06:24:19 2013
@@ -48,7 +48,7 @@
inf = inf[3:]
info.append(inf)

- header = ( ['CHROM','LOCATION','IDS','REF','VAR','QUAL']
+ header = ( ['CHROM','REF_START','REF_STOP','IDS','REF','VAR','QUAL']
+ [ f.upper() for f in filters ]
+ [ i.upper() for i in info ]
+ samples
@@ -71,7 +71,7 @@

infomap[key] = value

- row = ( [ v.chrom, str(v.end), ','.join(v.names),
v.ref, ','.join(v.var), v.qual ]
+ row = ( [ v.chrom, str(v.start), str(v.end), ','.join(v.names),
v.ref, ','.join(v.var), v.qual ]
+ [ 'Y' if f in v.filter else '' for f in filters ]
+ [ infomap.get(i,'') for i in info ]
+ [ g[0] for g in v.genos ]

==============================================================================
Revision: ef043d746850
Branch: default
Author: Kevin Jacobs <kevin....@invitae.com>
Date: Sun Jun 30 06:24:52 2013
Log: Add a locus quality filter
http://code.google.com/p/glu-genetics/source/detail?r=ef043d746850

Modified:
/glu/modules/seq/vcf2ldat.py

=======================================
--- /glu/modules/seq/vcf2ldat.py Sun Aug 5 04:36:17 2012
+++ /glu/modules/seq/vcf2ldat.py Sun Jun 30 06:24:52 2013
@@ -25,6 +25,8 @@

parser.add_argument('variants', help='Input VCF variant file')

+ parser.add_argument('--quality-filter', default=None, type=float,
+ help='Apply a site quality filter')
parser.add_argument('-o', '--output', metavar='FILE', default='-',
help='Output variant file')
return parser
@@ -48,9 +50,13 @@
non_autosomes = set(['chrX','chrY','chrM','X','Y','M','Mt','MT'])

for v in vcf:
+ # Bialleleic for now
if v.chrom in non_autosomes or not v.names or len(v.var)!=1:
continue

+ if options.quality_filter and v.qual < options.quality_filter:
+ continue
+
a,b = ab = v.ref,v.var[0]

# Count SNPs only for now

==============================================================================
Revision: 420f1d981f8a
Branch: default
Author: Kevin Jacobs <kevin....@invitae.com>
Date: Sun Jun 30 06:25:12 2013
Log: Convert locus quality to float or None
http://code.google.com/p/glu-genetics/source/detail?r=420f1d981f8a

Modified:
/glu/lib/seqlib/vcf.py

=======================================
--- /glu/lib/seqlib/vcf.py Sun Aug 5 11:24:41 2012
+++ /glu/lib/seqlib/vcf.py Sun Jun 30 06:25:12 2013
@@ -62,7 +62,7 @@
ref = _intern(fields[3])
end = start+len(ref)
var = [ _intern(v) for v in fields[4].split(',') ]
- qual = fields[5]
+ qual = float(fields[5]) if fields[5]!='.' else None
filter = [ _intern(f) for f in fields[6].split(';') ] if
fields[6]!='.' else []
info = fields[7].split(';') if fields[7]!='.' else []
format = _intern(fields[8]) if n>8 else None

==============================================================================
Revision: fb932986fda1
Branch: default
Author: Kevin Jacobs <kevin....@invitae.com>
Date: Sun Jun 30 06:25:34 2013
Log: Remove spurious ESP name annotation
http://code.google.com/p/glu-genetics/source/detail?r=fb932986fda1

Modified:
/glu/modules/seq/annotate.py

=======================================
--- /glu/modules/seq/annotate.py Sun Aug 5 04:31:51 2012
+++ /glu/modules/seq/annotate.py Sun Jun 30 06:25:34 2013
@@ -333,6 +333,10 @@
v.names.remove('tgp')
v.filter.append('1000G')

+ if 'esp' in v.names:
+ v.names.remove('esp')
+ v.filter.append('ESP')
+
if not v.ref or v.var==['']:
v.filter.append('Indel')


==============================================================================
Revision: f235aebb7e9a
Branch: default
Author: Kevin Jacobs <kevin....@invitae.com>
Date: Sun Jun 30 06:26:19 2013
Log: Add short chromosome names to cytoband map
http://code.google.com/p/glu-genetics/source/detail?r=f235aebb7e9a

Modified:
/glu/lib/seqlib/vannotator.py

=======================================
--- /glu/lib/seqlib/vannotator.py Mon Jul 30 10:58:19 2012
+++ /glu/lib/seqlib/vannotator.py Sun Jun 30 06:26:19 2013
@@ -194,6 +194,8 @@
self.band_map = band_map = defaultdict(IntervalTree)
for band in get_cytobands(self.con):
band_map[band.chrom].insert(band.start,band.end,band)
+ if band.chrom.startswith('chr') and band.chrom[3:] not in band_map:
+ band_map[band.chrom[3:]] = band_map[band.chrom]

trans = get_transcripts(self.con)
trans = progress_loop(trans, label='Loading transcripts: ',
units='transcripts')

==============================================================================
Revision: ec32b9042eb4
Branch: default
Author: Kevin Jacobs <kevin....@invitae.com>
Date: Sun Jun 30 06:45:24 2013
Log: Disable tabix cache, since that feature isn't in the mainline
release
http://code.google.com/p/glu-genetics/source/detail?r=ec32b9042eb4

Modified:
/glu/lib/seqlib/cgfvariants.py

=======================================
--- /glu/lib/seqlib/cgfvariants.py Mon Jan 21 11:41:31 2013
+++ /glu/lib/seqlib/cgfvariants.py Sun Jun 30 06:45:24 2013
@@ -19,7 +19,8 @@

class CGFVariants(object):
def __init__(self, cgfvariant, reference_fasta):
- self.vars = pysam.Tabixfile(cgfvariant,cache_size=128*1024*1024)
+ #self.vars = pysam.Tabixfile(cgfvariant,cache_size=128*1024*1024)
+ self.vars = pysam.Tabixfile(cgfvariant)
self.reference = pysam.Fastafile(reference_fasta)

def query_variants(self, chromosome, start, stop):

==============================================================================
Revision: ce990b033708
Branch: default
Author: Kevin Jacobs <kevin....@invitae.com>
Date: Sun Jun 30 06:46:21 2013
Log: Add BAM MAPQ filter
http://code.google.com/p/glu-genetics/source/detail?r=ce990b033708

Modified:
/glu/lib/seqlib/filter.py
/glu/modules/seq/amplicon_coverage.py
/glu/modules/seq/coverage.py
/glu/modules/seq/filter.py

=======================================
--- /glu/lib/seqlib/filter.py Sat Jul 30 20:35:40 2011
+++ /glu/lib/seqlib/filter.py Sun Jun 30 06:46:21 2013
@@ -50,16 +50,22 @@


# Internal helper generator function for filter_alignments
-def _filter_alignments(alignments,positive_flags,negative_flags):
+def _filter_alignments(alignments, positive_flags, negative_flags,
min_mapq):
for alignment in alignments:
- flags = alignment.flag
- pos_match = (flags&positive_flags)==positive_flags
- neg_match = (flags&negative_flags)==0
- if pos_match and neg_match:
- yield alignment
+ if min_mapq and alignment.mapq < min_mapq:
+ continue
+
+ flags = alignment.flag
+ pos_mismatch = (flags&positive_flags)!=positive_flags
+ neg_mismatch = (flags&negative_flags)!=0
+
+ if pos_mismatch or neg_mismatch:
+ continue
+
+ yield alignment


-def filter_alignments(alignments, include, exclude):
+def filter_alignments(alignments, include, exclude, min_mapq=None):
'''
Filter alignments based on a set of specified options

@@ -128,6 +134,8 @@
group.add_argument('--excludealign', action='append', metavar='OPTS',
help='Exclude alignments that meet the specified comma
separated criteria. '
' See --includealign for supported criteria.')
+ group.add_argument('--minmapq', type=int, default=None, metavar='N',
+ help='Exclude alignments with MAPQ < N')


def _test():
=======================================
--- /glu/modules/seq/amplicon_coverage.py Fri Jul 20 09:52:28 2012
+++ /glu/modules/seq/amplicon_coverage.py Sun Jun 30 06:46:21 2013
@@ -54,8 +54,6 @@
contigs.add(rname)

if rname=='unaligned':
- for align in contig_aligns:
- pass
continue

next_start,next_end = ctargets[0][:2] if ctargets else nulltarget
@@ -185,8 +183,7 @@
aligns = iter(samfile)

aligns = progress_loop(aligns, label='Loading BAM file(s): ',
units='alignments')
- aligns = filter_alignments(aligns, options.includealign,
options.excludealign)
-
+ aligns = filter_alignments(aligns, options.includealign,
options.excludealign, options.minmapq)

target_stats = {}
for target_name in target_names:
=======================================
--- /glu/modules/seq/coverage.py Sun Apr 22 13:25:43 2012
+++ /glu/modules/seq/coverage.py Sun Jun 30 06:46:21 2013
@@ -198,7 +198,7 @@
contig_lens = inbam.lengths
aligns = inbam.fetch(region=options.region or None)
aligns = progress_loop(aligns, label='Loading BAM file: ',
units='alignments')
- aligns = filter_alignments(aligns, options.includealign,
options.excludealign)
+ aligns = filter_alignments(aligns, options.includealign,
options.excludealign, options.minmapq)

for tid,contig_aligns in groupby(aligns, attrgetter('tid')):
contig_name = contig_names[tid]
=======================================
--- /glu/modules/seq/filter.py Tue Sep 4 05:21:11 2012
+++ /glu/modules/seq/filter.py Sun Jun 30 06:46:21 2013
@@ -585,7 +585,7 @@
if options.progress:
aligns = progress_loop(aligns, label='Loading BAM file(s): ',
units='alignments')

- aligns = filter_alignments(aligns, options.includealign,
options.excludealign)
+ aligns = filter_alignments(aligns, options.includealign,
options.excludealign, options.minmapq)

stats = FilterStats()

Reply all
Reply to author
Forward
0 new messages