| Previous CloneSet | Next CloneSet | Back to Main Report |
| Clone Mass | Clones in CloneSet | Parameter Count | Clone Similarity | Syntax Category [Sequence Length] |
|---|---|---|---|---|
| 230 | 2 | 6 | 0.985 | compound_stmt |
| Clone Abstraction | Parameter Bindings |
| Clone Instance (Click to see clone) | Line Count | Source Line | Source File |
|---|---|---|---|
| 1 | 230 | 218 | Bio/MEME/Parser.py |
| 2 | 230 | 302 | Bio/Motif/Parsers/MEME.py |
| ||||
class _MASTConsumer:
"""
Consumer that can receive events from _MASTScanner.
A _MASTConsumer parses lines from a mast text output file.
The motif match diagrams are parsed using line buffering.
Each of the buffering functions have a dummy variable that is
required for testing using the Bio.ParserSupport.TaggingConsumer.
If this variable isn't there, the TaggingConsumer barfs. In
the _MASTScanner, None is passed in the place of this variable.
"""
def __init__ (self):
self.data = MASTRecord( )
self._current_seq = ""
self._line_buffer = [ ]
self._buffer_size = 0
self._buffered_seq_start = 0
def _version (self,line):
line = line.strip( )
ls = line.split( )
self.data._version(ls[2])
def _database (self,line):
line = line.strip( )
ls = line.split( )
self.data._database(ls[1])
al = ""
if ls[2]=="(nucleotide)":
al = IUPAC.unambiguous_dna
self.data._alphabet(al)
else:
al = IUPAC.protein
self.data._alphabet(al)
def _add_motif (self,line):
line = line.strip( )
ls = line.split( )
m = Motif.MEMEMotif( )
m._alphabet(self.data.alphabet)
m._length(ls[1])
name = ls[0]
m._name(name)
m._consensus(ls[2])
self.data._add_motif(m)
def _add_match_diagram (self,line):
line = line.strip( )
ls = line.split( )
self.data._add_diagram_for_sequence(ls[1],self._current_seq)
ds = ls[1].split("_")
i = 0
start = 0
for i in range(0,len(ds)):
if ds[i].find("[")!= -1 or ds[i].find("<")!= -1:
inst = Motif.Instance( )
inst._seqname (self._current_seq)
inst._start (start)
r = re.compile("\\d+")
mn = r.findall(ds[i])[0]
if ds[i].find("-")!= -1:
inst.strand = "-"
else:
inst.strand = "+"
motif = self.data.get_motif_by_name(mn)
motif.add_instance(inst)
start+=motif.length
else:
start+=int(ds[i])
def _add_sequence_match_with_diagram (self,line):
line = line.strip( )
ls = line.split( )
self.data._add_sequence(ls[0])
self.data._add_diagram_for_sequence(ls[2],ls[0])
ds = ls[2].split("_")
i = 0
start = 0
for i in range(0,len(ds)):
if ds[i].find("+")!= -1 or ds[i].find("-")!= -1:
inst = Motif.Instance( )
inst._seqname (ls[0])
inst._start (start)
r = re.compile("\\d+")
mn = r.findall(ds[i])[0]
if ds[i].find("-")!= -1:
inst.strand = "-"
else:
inst.strand = "+"
motif = self.data.get_motif_by_name(mn)
motif.add_instance(inst)
start+=motif.length
else:
start+=int(ds[i])
def _add_diagram_from_buffer (self,dummy):
line = ""
for l in self._line_buffer:
line+=l.strip( )
ls = line.split( )
self.data._add_diagram_for_sequence(ls[1],self._current_seq)
ds = ls[1].split("_")
i = 0
start = 0
for i in range(0,len(ds)):
if ds[i].find("[")!= -1 or ds[i].find("<")!= -1:
inst = Motif.Instance( )
inst._seqname (self._current_seq)
inst._start (start)
r = re.compile("\\d+")
mn = r.findall(ds[i])[0]
if ds[i].find("-")!= -1:
inst.strand = "-"
else:
inst.strand = "+"
motif = self.data.get_motif_by_name(mn)
motif.add_instance(inst)
start+=motif.length
else:
start+=int(ds[i])
def _set_current_seq (self,line):
line = line.strip( )
self._current_seq = line
if not self.data.sequences.count(line):
self.data.sequences.append(line)
def _add_line_to_buffer (self,line):
line = line.strip( )
if not line.startswith("*****"):
self._line_buffer.append(line)
else:
return -1
def _parse_buffer (self,dummy):
"""Parses the line buffer to get e-values for each instance of a motif.
This buffer parser is the most likely point of failure for the
MASTParser.
"""
insts = self.data.get_motif_matches_for_sequence(self._current_seq)
if len(insts)>0:
fullSeq = self._line_buffer[self._buffer_size-1]
pvals = self._line_buffer[1].split( )
p = 0
lpval = len(pvals)
while p<lpval:
if pvals[p].count("e")>1:
#Break blocks up by e and parse into valid floats. This only
#works if there are no e-values greater than 1e-5.
pvs = [ ]
spe = pvals[p].split("e")
spe.reverse( )
dotind = spe[1].find(".")
if dotind== -1:
thispval = spe[1][ -1]+"e"+spe[0]
else:
thispval = spe[1][dotind-1: ]+"e"+spe[0]
pvs.append(thispval)
for spi in range(2,len(spe)):
dotind = spe[spi].find(".")
prevdotind = spe[spi-1].find(".")
if dotind!= -1:
if prevdotind== -1:
thispval = spe[spi][dotind-1: ]+"e"+spe[spi-1][ : -1]
else:
thispval = spe[spi][dotind-1: ]+"e"+spe[spi-1][0:prevdotind-1]
else:
if prevdotind== -1:
thispval = spe[spi][ -1]+"e"+spe[spi-1][ : -1]
else:
thispval = spe[spi][ -1]+"e"+spe[spi-1][0:prevdotind-1]
pvs.append(thispval)
pvs.reverse( )
if p>0:
pvals = pvals[0:p]+pvs+pvals[p+1: ]
else:
pvals = pvs+pvals[p+1: ]
lpval = len(pvals)
p+=1
i = 0
if len(pvals)!=len(insts):
sys.stderr.write("Failure to parse p-values for "+self._current_seq+": "+self._line_buffer[1]+" to: "+str(pvals)+"""
""" )
pvals = [ ]
# else:
# sys.stderr.write('These are just fine' + self._current_seq + ': ' + self._line_buffer[1] + " to: " + str(pvals) + "\n")
for i in range(0,len(insts)):
inst = insts[i]
start = inst.start-self._buffered_seq_start+1
thisSeq = fullSeq[start:start+inst.length]
thisSeq = Seq.Seq(thisSeq,self.data.alphabet)
inst._sequence(thisSeq)
if pvals:
inst._pvalue(float(pvals[i]))
def _blank_buffer (self,dummy):
self._line_buffer = [ ]
self._buffer_size = 0
def _collapse_buffer(self,dummy):
if self._buffer_size==0:
if len(self._line_buffer)>0:
self._buffer_size = len(self._line_buffer)
ll = self._line_buffer[self._buffer_size-1].split( )
self._line_buffer[self._buffer_size-1] = ll[1]
self._buffered_seq_start = int(ll[0])
else:
i = 0
for i in range(self._buffer_size,len(self._line_buffer)-1):
self._line_buffer[i-self._buffer_size] = self._line_buffer[i-self._buffer_size]+self._line_buffer[i].strip( )
ll = self._line_buffer[len(self._line_buffer)-1].split( )
if int(ll[0])==self._buffered_seq_start+len(self._line_buffer[self._buffer_size-1]):
self._line_buffer[self._buffer_size-1]+=ll[1]
else:
differ = int(ll[0])-(self._buffered_seq_start+len(self._line_buffer[self._buffer_size-1]))
self._line_buffer[self._buffer_size-1]+="N"*differ
self._line_buffer[self._buffer_size-1]+=ll[1]
self._line_buffer = self._line_buffer[0:self._buffer_size]
def _add_motif_match (self,line):
line = line.strip( )
if line.find("[")!= -1 or line.find("<")!= -1:
pass
elif line.find("e")!= -1:
pass
elif line.find("+")!= -1:
pass
def noevent (self,line):
pass
|
| ||||
class _MASTConsumer:
"""
Consumer that can receive events from _MASTScanner.
A _MASTConsumer parses lines from a mast text output file.
The motif match diagrams are parsed using line buffering.
Each of the buffering functions have a dummy variable that is
required for testing using the Bio.ParserSupport.TaggingConsumer.
If this variable isn't there, the TaggingConsumer barfs. In
the _MASTScanner, None is passed in the place of this variable.
"""
def __init__ (self):
self.data = MASTRecord( )
self._current_seq = ""
self._line_buffer = [ ]
self._buffer_size = 0
self._buffered_seq_start = 0
def _version (self,line):
line = line.strip( )
ls = line.split( )
self.data._version(ls[2])
def _database (self,line):
line = line.strip( )
ls = line.split( )
self.data._database(ls[1])
al = ""
if ls[2]=="(nucleotide)":
al = IUPAC.unambiguous_dna
self.data._alphabet(al)
else:
al = IUPAC.protein
self.data._alphabet(al)
def _add_motif (self,line):
line = line.strip( )
ls = line.split( )
m = MEMEMotif( )
m.alphabet = self.data.alphabet
m.length = ls[1]
name = ls[0]
m.name = name
m.add_instance(ls[2])
self.data._add_motif(m)
def _add_match_diagram (self,line):
line = line.strip( )
ls = line.split( )
self.data._add_diagram_for_sequence(ls[1],self._current_seq)
ds = ls[1].split("_")
i = 0
start = 0
for i in range(0,len(ds)):
if ds[i].find("[")!= -1 or ds[i].find("<")!= -1:
inst = MEMEInstance( )
inst._seqname (self._current_seq)
inst._start (start)
r = re.compile("\\d+")
mn = r.findall(ds[i])[0]
if ds[i].find("-")!= -1:
inst.strand = "-"
else:
inst.strand = "+"
motif = self.data.get_motif_by_name(mn)
motif.add_instance(inst)
start+=motif.length
else:
start+=int(ds[i])
def _add_sequence_match_with_diagram (self,line):
line = line.strip( )
ls = line.split( )
self.data._add_sequence(ls[0])
self.data._add_diagram_for_sequence(ls[2],ls[0])
ds = ls[2].split("_")
i = 0
start = 0
for i in range(0,len(ds)):
if ds[i].find("+")!= -1 or ds[i].find("-")!= -1:
inst = MEMEInstance( )
inst._seqname (ls[0])
inst._start (start)
r = re.compile("\\d+")
mn = r.findall(ds[i])[0]
if ds[i].find("-")!= -1:
inst.strand = "-"
else:
inst.strand = "+"
motif = self.data.get_motif_by_name(mn)
motif.add_instance(inst)
start+=motif.length
else:
start+=int(ds[i])
def _add_diagram_from_buffer (self,dummy):
line = ""
for l in self._line_buffer:
line+=l.strip( )
ls = line.split( )
self.data._add_diagram_for_sequence(ls[1],self._current_seq)
ds = ls[1].split("_")
i = 0
start = 0
for i in range(0,len(ds)):
if ds[i].find("[")!= -1 or ds[i].find("<")!= -1:
inst = MEMEInstance( )
inst._seqname (self._current_seq)
inst._start (start)
r = re.compile("\\d+")
mn = r.findall(ds[i])[0]
if ds[i].find("-")!= -1:
inst.strand = "-"
else:
inst.strand = "+"
motif = self.data.get_motif_by_name(mn)
motif.add_instance(inst)
start+=motif.length
else:
start+=int(ds[i])
def _set_current_seq (self,line):
line = line.strip( )
self._current_seq = line
if not self.data.sequences.count(line):
self.data.sequences.append(line)
def _add_line_to_buffer (self,line):
line = line.strip( )
if not line.startswith("*****"):
self._line_buffer.append(line)
else:
return -1
def _parse_buffer (self,dummy):
"""Parses the line buffer to get e-values for each instance of a motif.
This buffer parser is the most likely point of failure for the
MASTParser.
"""
insts = self.data.get_motif_matches_for_sequence(self._current_seq)
if len(insts)>0:
fullSeq = self._line_buffer[self._buffer_size-1]
pvals = self._line_buffer[1].split( )
p = 0
lpval = len(pvals)
while p<lpval:
if pvals[p].count("e")>1:
#Break blocks up by e and parse into valid floats. This only
#works if there are no e-values greater than 1e-5.
pvs = [ ]
spe = pvals[p].split("e")
spe.reverse( )
dotind = spe[1].find(".")
if dotind== -1:
thispval = spe[1][ -1]+"e"+spe[0]
else:
thispval = spe[1][dotind-1: ]+"e"+spe[0]
pvs.append(thispval)
for spi in range(2,len(spe)):
dotind = spe[spi].find(".")
prevdotind = spe[spi-1].find(".")
if dotind!= -1:
if prevdotind== -1:
thispval = spe[spi][dotind-1: ]+"e"+spe[spi-1][ : -1]
else:
thispval = spe[spi][dotind-1: ]+"e"+spe[spi-1][0:prevdotind-1]
else:
if prevdotind== -1:
thispval = spe[spi][ -1]+"e"+spe[spi-1][ : -1]
else:
thispval = spe[spi][ -1]+"e"+spe[spi-1][0:prevdotind-1]
pvs.append(thispval)
pvs.reverse( )
if p>0:
pvals = pvals[0:p]+pvs+pvals[p+1: ]
else:
pvals = pvs+pvals[p+1: ]
lpval = len(pvals)
p+=1
i = 0
if len(pvals)!=len(insts):
sys.stderr.write("Failure to parse p-values for "+self._current_seq+": "+self._line_buffer[1]+" to: "+str(pvals)+"""
""" )
pvals = [ ]
# else:
# sys.stderr.write('These are just fine' + self._current_seq + ': ' + self._line_buffer[1] + " to: " + str(pvals) + "\n")
for i in range(0,len(insts)):
inst = insts[i]
start = inst.start-self._buffered_seq_start+1
thisSeq = fullSeq[start:start+inst.length]
thisSeq = Seq.Seq(thisSeq,self.data.alphabet)
inst._sequence(thisSeq)
if pvals:
inst._pvalue(float(pvals[i]))
def _blank_buffer (self,dummy):
self._line_buffer = [ ]
self._buffer_size = 0
def _collapse_buffer(self,dummy):
if self._buffer_size==0:
if len(self._line_buffer)>0:
self._buffer_size = len(self._line_buffer)
ll = self._line_buffer[self._buffer_size-1].split( )
self._line_buffer[self._buffer_size-1] = ll[1]
self._buffered_seq_start = int(ll[0])
else:
i = 0
for i in range(self._buffer_size,len(self._line_buffer)-1):
self._line_buffer[i-self._buffer_size] = self._line_buffer[i-self._buffer_size]+self._line_buffer[i].strip( )
ll = self._line_buffer[len(self._line_buffer)-1].split( )
if int(ll[0])==self._buffered_seq_start+len(self._line_buffer[self._buffer_size-1]):
self._line_buffer[self._buffer_size-1]+=ll[1]
else:
differ = int(ll[0])-(self._buffered_seq_start+len(self._line_buffer[self._buffer_size-1]))
self._line_buffer[self._buffer_size-1]+="N"*differ
self._line_buffer[self._buffer_size-1]+=ll[1]
self._line_buffer = self._line_buffer[0:self._buffer_size]
def _add_motif_match (self,line):
line = line.strip( )
if line.find("[")!= -1 or line.find("<")!= -1:
pass
elif line.find("e")!= -1:
pass
elif line.find("+")!= -1:
pass
def noevent (self,line):
pass
|
| |||
class _MASTConsumer:
"""
Consumer that can receive events from _MASTScanner.
A _MASTConsumer parses lines from a mast text output file.
The motif match diagrams are parsed using line buffering.
Each of the buffering functions have a dummy variable that is
required for testing using the Bio.ParserSupport.TaggingConsumer.
If this variable isn't there, the TaggingConsumer barfs. In
the _MASTScanner, None is passed in the place of this variable.
"""
def __init__(self):
self.data = MASTRecord( )
self._current_seq = ""
self._line_buffer = [ ]
self._buffer_size = 0
self._buffered_seq_start = 0
def _version(self,line):
line = line.strip( )
ls = line.split( )
self.data._version(ls[2])
def _database(self,line):
line = line.strip( )
ls = line.split( )
self.data._database(ls[1])
al = ""
if ls[2]=="(nucleotide)":
al = IUPAC.unambiguous_dna
self.data._alphabet(al)
else:
al = IUPAC.protein
self.data._alphabet(al)
def _add_motif(self,line):
line = line.strip( )
ls = line.split( )
m = [[#variable2ed1cb40]]( )
[[#variable2ed1ca80]]
[[#variable2ed1c9c0]]
name = ls[0]
[[#variable2ed1c960]]
m. [[#variable2ed1c900]](ls[2])
self.data._add_motif(m)
def _add_match_diagram(self,line):
line = line.strip( )
ls = line.split( )
self.data._add_diagram_for_sequence(ls[1],self._current_seq)
ds = ls[1].split("_")
i = 0
start = 0
for i in range(0,len(ds)):
if ds[i].find("[")!= -1 or ds[i].find("<")!= -1:
inst = [[#variable2ed1c8a0]]( )
inst._seqname(self._current_seq)
inst._start(start)
r = re.compile("\\d+")
mn = r.findall(ds[i])[0]
if ds[i].find("-")!= -1:
inst.strand = "-"
else:
inst.strand = "+"
motif = self.data.get_motif_by_name(mn)
motif.add_instance(inst)
start+=motif.length
else:
start+=int(ds[i])
def _add_sequence_match_with_diagram(self,line):
line = line.strip( )
ls = line.split( )
self.data._add_sequence(ls[0])
self.data._add_diagram_for_sequence(ls[2],ls[0])
ds = ls[2].split("_")
i = 0
start = 0
for i in range(0,len(ds)):
if ds[i].find("+")!= -1 or ds[i].find("-")!= -1:
inst = [[#variable2ed1c8a0]]( )
inst._seqname(ls[0])
inst._start(start)
r = re.compile("\\d+")
mn = r.findall(ds[i])[0]
if ds[i].find("-")!= -1:
inst.strand = "-"
else:
inst.strand = "+"
motif = self.data.get_motif_by_name(mn)
motif.add_instance(inst)
start+=motif.length
else:
start+=int(ds[i])
def _add_diagram_from_buffer(self,dummy):
line = ""
for l in self._line_buffer:
line+=l.strip( )
ls = line.split( )
self.data._add_diagram_for_sequence(ls[1],self._current_seq)
ds = ls[1].split("_")
i = 0
start = 0
for i in range(0,len(ds)):
if ds[i].find("[")!= -1 or ds[i].find("<")!= -1:
inst = [[#variable2ed1c8a0]]( )
inst._seqname(self._current_seq)
inst._start(start)
r = re.compile("\\d+")
mn = r.findall(ds[i])[0]
if ds[i].find("-")!= -1:
inst.strand = "-"
else:
inst.strand = "+"
motif = self.data.get_motif_by_name(mn)
motif.add_instance(inst)
start+=motif.length
else:
start+=int(ds[i])
def _set_current_seq(self,line):
line = line.strip( )
self._current_seq = line
if not self.data.sequences.count(line):
self.data.sequences.append(line)
def _add_line_to_buffer(self,line):
line = line.strip( )
if not line.startswith("*****"):
self._line_buffer.append(line)
else:
return -1
def _parse_buffer(self,dummy):
"""Parses the line buffer to get e-values for each instance of a motif.
This buffer parser is the most likely point of failure for the
MASTParser.
"""
insts = self.data.get_motif_matches_for_sequence(self._current_seq)
if len(insts)>0:
fullSeq = self._line_buffer[self._buffer_size-1]
pvals = self._line_buffer[1].split( )
p = 0
lpval = len(pvals)
while p<lpval:
if pvals[p].count("e")>1:
#Break blocks up by e and parse into valid floats. This only
#works if there are no e-values greater than 1e-5.
pvs = [ ]
spe = pvals[p].split("e")
spe.reverse( )
dotind = spe[1].find(".")
if dotind== -1:
thispval = spe[1][ -1]+"e"+spe[0]
else:
thispval = spe[1][dotind-1: ]+"e"+spe[0]
pvs.append(thispval)
for spi in range(2,len(spe)):
dotind = spe[spi].find(".")
prevdotind = spe[spi-1].find(".")
if dotind!= -1:
if prevdotind== -1:
thispval = spe[spi][dotind-1: ]+"e"+spe[spi-1][ : -1]
else:
thispval = spe[spi][dotind-1: ]+"e"+spe[spi-1][0:prevdotind-1]
else:
if prevdotind== -1:
thispval = spe[spi][ -1]+"e"+spe[spi-1][ : -1]
else:
thispval = spe[spi][ -1]+"e"+spe[spi-1][0:prevdotind-1]
pvs.append(thispval)
pvs.reverse( )
if p>0:
pvals = pvals[0:p]+pvs+pvals[p+1: ]
else:
pvals = pvs+pvals[p+1: ]
lpval = len(pvals)
p+=1
i = 0
if len(pvals)!=len(insts):
sys.stderr.write("Failure to parse p-values for "+self._current_seq+": "+self._line_buffer[1]+" to: "+str(pvals)+"""
""" )
pvals = [ ]
# else:
# sys.stderr.write('These are just fine' + self._current_seq + ': ' + self._line_buffer[1] + " to: " + str(pvals) + "\n")
for i in range(0,len(insts)):
inst = insts[i]
start = inst.start-self._buffered_seq_start+1
thisSeq = fullSeq[start:start+inst.length]
thisSeq = Seq.Seq(thisSeq,self.data.alphabet)
inst._sequence(thisSeq)
if pvals:
inst._pvalue(float(pvals[i]))
def _blank_buffer(self,dummy):
self._line_buffer = [ ]
self._buffer_size = 0
def _collapse_buffer(self,dummy):
if self._buffer_size==0:
if len(self._line_buffer)>0:
self._buffer_size = len(self._line_buffer)
ll = self._line_buffer[self._buffer_size-1].split( )
self._line_buffer[self._buffer_size-1] = ll[1]
self._buffered_seq_start = int(ll[0])
else:
i = 0
for i in range(self._buffer_size,len(self._line_buffer)-1):
self._line_buffer[i-self._buffer_size] = self._line_buffer[i-self._buffer_size]+self._line_buffer[i].strip( )
ll = self._line_buffer[len(self._line_buffer)-1].split( )
if int(ll[0])==self._buffered_seq_start+len(self._line_buffer[self._buffer_size-1]):
self._line_buffer[self._buffer_size-1]+=ll[1]
else:
differ = int(ll[0])-(self._buffered_seq_start+len(self._line_buffer[self._buffer_size-1]))
self._line_buffer[self._buffer_size-1]+="N"*differ
self._line_buffer[self._buffer_size-1]+=ll[1]
self._line_buffer = self._line_buffer[0:self._buffer_size]
def _add_motif_match(self,line):
line = line.strip( )
if line.find("[")!= -1 or line.find("<")!= -1:
pass
elif line.find("e")!= -1:
pass
elif line.find("+")!= -1:
pass
def noevent(self,line):
pass
|
| CloneAbstraction |
| Parameter Index | Clone Instance | Parameter Name | Value |
|---|---|---|---|
| 1 | 1 | [[#2ed1cb40]] | Motif.MEMEMotif |
| 1 | 2 | [[#2ed1cb40]] | MEMEMotif |
| 2 | 1 | [[#2ed1ca80]] | m._alphabet(self.data.alphabet) |
| 2 | 2 | [[#2ed1ca80]] | m.alphabet = self.data.alphabet |
| 3 | 1 | [[#2ed1c9c0]] | m._length(ls[1]) |
| 3 | 2 | [[#2ed1c9c0]] | m.length = ls[1] |
| 4 | 1 | [[#2ed1c960]] | m._name(name) |
| 4 | 2 | [[#2ed1c960]] | m.name = name |
| 5 | 1 | [[#2ed1c900]] | _consensus |
| 5 | 2 | [[#2ed1c900]] | add_instance |
| 6 | 1 | [[#2ed1c8a0]] | Motif.Instance |
| 6 | 2 | [[#2ed1c8a0]] | MEMEInstance |