CloneSet10


Previous CloneSetNext CloneSetBack to Main Report
Clone
Mass
Clones in
CloneSet
Parameter
Count
Clone
Similarity
Syntax Category
[Sequence Length]
180220.999file_input_element_list[4]
Clone AbstractionParameter Bindings
Clone Instance
(Click to see clone)
Line CountSource Line
Source File
1180451
Bio/MEME/Parser.py
2178535
Bio/Motif/Parsers/MEME.py
Clone Instance
1
Line Count
180
Source Line
451
Source File
Bio/MEME/Parser.py

class MASTParser(AbstractParser): 
     '''
    Parser for MAST text output. HTML output cannot be parsed, yet. Returns a MASTRecord
    
    A MASTParser takes a file handle for a MAST text output file and 
    returns a MASTRecord, containing the hits between motifs and 
    sequences. The parser does some unusual line buffering to parse out 
    match diagrams. Really complex diagrams often lead to an error message 
    and p-values not being parsed for a given line.
    
    Methods:
    parse (handle): parses the data from the file handle passed to it.
    
    Example:
    
    f = open("mast_file.txt")
    parser = MASTParser()
    mast_record = parser.parse(f)
    for motif in mast_record.motifs:
        for instance in motif.instances:
            print instance.motif_name, instance.sequence_name, instance.strand, instance.pvalue
    ''' 

     def __init__ (self): 
          self._consumer = _MASTConsumer( ) 
          self._scanner = _MASTScanner( ) 

     def parse (self,handle): 
         self._scanner.feed(handle,self._consumer) 
         return self._consumer.data 
     

class _MASTScanner: 
     """
    Scanner for MAST text output. 
        
    """ 

     def feed (self,handle,consumer):  
         if isinstance(handle,File.UndoHandle): 
             uhandle = handle 
         else: 
              uhandle = File.UndoHandle(handle) 

         self._scan_header(uhandle,consumer) 
         self._scan_matches(uhandle,consumer) 
         self._scan_annotated_matches(uhandle,consumer) 

     def _scan_header (self,uhandle,consumer):  
         try :
             
             read_and_call_until(uhandle,consumer.noevent,contains = "MAST version") 
         except ValueError: 
              raise ValueError("Improper input file. Does not begin with a line with 'MAST version'") 
         read_and_call(uhandle,consumer._version,contains = "MAST version") 
         read_and_call_until(uhandle,consumer.noevent,start = "DATABASE AND MOTIFS") 
         read_and_call(uhandle,consumer.noevent,start = "DATABASE") 
         read_and_call(uhandle,consumer.noevent,start = "****") 
         read_and_call(uhandle,consumer._database,contains = "DATABASE") 
         read_and_call_until(uhandle,consumer.noevent,contains = "MOTIF WIDTH") 
         read_and_call(uhandle,consumer.noevent,contains = "MOTIF") 
         read_and_call(uhandle,consumer.noevent,contains = "----") 
         read_and_call_until(uhandle,consumer._add_motif,blank = 1) 
         read_and_call_until(uhandle,consumer.noevent,start = "SECTION II:") 

     def _scan_matches (self,uhandle,consumer):  
         read_and_call_until(uhandle,consumer.noevent,start = "SEQUENCE NAME") 
         read_and_call(uhandle,consumer.noevent,start = "SEQUENCE NAME") 
         read_and_call(uhandle,consumer.noevent,start = "---") 
 #        read_and_call_until(uhandle, consumer._add_sequence_match_with_diagram, blank = 1)
         read_and_call_until(uhandle,consumer.noevent,blank = 1) 
         read_and_call(uhandle,consumer.noevent,blank = 1) 

     def _scan_annotated_matches (self,uhandle,consumer):  
         read_and_call_until(uhandle,consumer.noevent,start = "SECTION III:") 
         read_and_call(uhandle,consumer.noevent,start = "SECTION III:") 
         read_and_call_until(uhandle,consumer.noevent,start = "****") 
         read_and_call(uhandle,consumer.noevent,start = "****") 
         read_and_call_until(uhandle,consumer.noevent,start = "*****") 
         read_and_call(uhandle,consumer.noevent) 
         read_and_call_while(uhandle,consumer.noevent,blank = 1) 
         readMatches = 1 
         while readMatches==1:  
             if consumer._current_seq: 
                  if consumer._buffer_size!=0:  
                      consumer._parse_buffer(None) 
                  consumer._blank_buffer(None) 
             read_and_call(uhandle,consumer._set_current_seq) 
             read_and_call_until(uhandle,consumer.noevent,start = "  DIAGRAM") 
             read_and_call_until(uhandle,consumer._add_line_to_buffer,blank = 1) 
             consumer._add_diagram_from_buffer(None) 
             consumer._blank_buffer(None) 
             read_and_call(uhandle,consumer.noevent,blank = 1) 
             while 1: 
                  line = safe_peekline(uhandle) 
                  if line.startswith("****"): 
                       consumer._parse_buffer(None) 
                       readMatches = 0 
                       break 
                  read_and_call_until(uhandle,consumer._add_line_to_buffer,blank = 1) 
                  read_and_call(uhandle,consumer.noevent,blank = 1) 
                  consumer._collapse_buffer(None) 
                  if attempt_read_and_call(uhandle,consumer.noevent,blank = 1):  
                      break 
                  elif attempt_read_and_call(uhandle,consumer.noevent,start = "*****"):  
                      consumer._parse_buffer(None) 
                      consumer._blank_buffer(None) 
                      readMatches = 0 
                      break 
                  

class MASTRecord: 
     """The class for holding the results from a MAST run.
    
    A MASTRecord holds data about matches between motifs and sequences.
    The motifs held by the MASTRecord are objects of the class MEMEMotif.
    
    Methods:
    get_motif_matches_for_sequence(sequence_name): returns all of the
        motif matches within a given sequence. The matches are objects of
        the class MEME.Motif.Instance
    get_motif_matches (motif_name): returns all of the matches for a motif
        in the sequences searched. The matches returned are of class 
        MEME.Motif.Instance
    get_motif_by_name (motif_name): returns a MEMEMotif with the given
        name.
    """ 

     def __init__ (self): 
          self.sequences = [ ] 
          self.version = "" 
          self.matches = [ ] 
          self.database = "" 
          self.diagrams = { } 
          self.alphabet = None 
          self.motifs = [ ] 

     def _version (self,version): 
         self.version = version 

     def _alphabet (self,alphabet): 
         if alphabet==IUPAC.protein or   alphabet==IUPAC.ambiguous_dna or   alphabet==IUPAC.unambiguous_dna:  
             self.alphabet = alphabet 
         else: 
              return -1 
         

     def _database(self,database): 
         self.database = database 

     def get_motif_matches_for_sequence (self,seq): 
         insts = [ ] 
         for m in self.motifs: 
              for i in m.instances: 
                   if i.sequence_name==seq:  
                       insts.append(i) 
                   
         insts.sort( lambda x,y:cmp(x.start,y.start)) 
         return insts 

     def get_motif_matches (self,motif): 
         m = self.get_motif_by_name (motif.name) 
         return m.instances 

     def _add_diagram_for_sequence (self,diagram,seq):  
         self.diagrams[seq] = diagram 

     def _add_match (self,match): 
         self.matches.append(match) 

     def _add_sequence (self,sequence): 
         self.sequences.append(sequence) 

     def _add_motif (self,motif): 
         self.motifs.append(motif) 

     def get_motif_by_name (self,name): 
         for m in self.motifs: 
              if m.name==name:  
                  return m 
              


Clone Instance
2
Line Count
178
Source Line
535
Source File
Bio/Motif/Parsers/MEME.py

class MASTParser(AbstractParser): 
     '''
    Parser for MAST text output. HTML output cannot be parsed, yet. Returns a MASTRecord
    
    A MASTParser takes a file handle for a MAST text output file and 
    returns a MASTRecord, containing the hits between motifs and 
    sequences. The parser does some unusual line buffering to parse out 
    match diagrams. Really complex diagrams often lead to an error message 
    and p-values not being parsed for a given line.
    
    Methods:
    parse (handle): parses the data from the file handle passed to it.
    
    Example:
    
    >>>f = open("mast_file.txt")
    >>>parser = MASTParser()
    >>>mast_record = parser.parse(f)
    >>>for motif in mast_record.motifs:
    >>>    for instance in motif.instances:
    >>>        print instance.motif_name, instance.sequence_name, instance.strand, instance.pvalue
    ''' 

     def __init__ (self): 
          self._consumer = _MASTConsumer( ) 
          self._scanner = _MASTScanner( ) 

     def parse (self,handle): 
         self._scanner.feed(handle,self._consumer) 
         return self._consumer.data 
     

class _MASTScanner: 
     """
    Scanner for MAST text output. 
        
    """ 

     def feed (self,handle,consumer):  
         if isinstance(handle,File.UndoHandle): 
             uhandle = handle 
         else: 
              uhandle = File.UndoHandle(handle) 

         self._scan_header(uhandle,consumer) 
         self._scan_matches(uhandle,consumer) 
         self._scan_annotated_matches(uhandle,consumer) 

     def _scan_header (self,uhandle,consumer):  
         try :
             
             read_and_call_until(uhandle,consumer.noevent,contains = "MAST version") 
         except ValueError: 
              raise ValueError("Improper input file. Does not begin with a line with 'MAST version'") 
         read_and_call(uhandle,consumer._version,contains = "MAST version") 
         read_and_call_until(uhandle,consumer.noevent,start = "DATABASE AND MOTIFS") 
         read_and_call(uhandle,consumer.noevent,start = "DATABASE") 
         read_and_call(uhandle,consumer.noevent,start = "****") 
         read_and_call(uhandle,consumer._database,contains = "DATABASE") 
         read_and_call_until(uhandle,consumer.noevent,contains = "MOTIF WIDTH") 
         read_and_call(uhandle,consumer.noevent,contains = "MOTIF") 
         read_and_call(uhandle,consumer.noevent,contains = "----") 
         read_and_call_until(uhandle,consumer._add_motif,blank = 1) 
         read_and_call_until(uhandle,consumer.noevent,start = "SECTION II:") 

     def _scan_matches (self,uhandle,consumer):  
         read_and_call_until(uhandle,consumer.noevent,start = "SEQUENCE NAME") 
         read_and_call(uhandle,consumer.noevent,start = "SEQUENCE NAME") 
         read_and_call(uhandle,consumer.noevent,start = "---") 
 #        read_and_call_until(uhandle, consumer._add_sequence_match_with_diagram, blank = 1)
         read_and_call_until(uhandle,consumer.noevent,blank = 1) 
         read_and_call(uhandle,consumer.noevent,blank = 1) 

     def _scan_annotated_matches (self,uhandle,consumer):  
         read_and_call_until(uhandle,consumer.noevent,start = "SECTION III:") 
         read_and_call(uhandle,consumer.noevent,start = "SECTION III:") 
         read_and_call_until(uhandle,consumer.noevent,start = "****") 
         read_and_call(uhandle,consumer.noevent,start = "****") 
         read_and_call_until(uhandle,consumer.noevent,start = "*****") 
         read_and_call(uhandle,consumer.noevent) 
         read_and_call_while(uhandle,consumer.noevent,blank = 1) 
         readMatches = 1 
         while readMatches==1:  
             if consumer._current_seq: 
                  if consumer._buffer_size!=0:  
                      consumer._parse_buffer(None) 
                  consumer._blank_buffer(None) 
             read_and_call(uhandle,consumer._set_current_seq) 
             read_and_call_until(uhandle,consumer.noevent,start = "  DIAGRAM") 
             read_and_call_until(uhandle,consumer._add_line_to_buffer,blank = 1) 
             consumer._add_diagram_from_buffer(None) 
             consumer._blank_buffer(None) 
             read_and_call(uhandle,consumer.noevent,blank = 1) 
             while 1: 
                  line = safe_peekline(uhandle) 
                  if line.startswith("****"): 
                       consumer._parse_buffer(None) 
                       readMatches = 0 
                       break 
                  read_and_call_until(uhandle,consumer._add_line_to_buffer,blank = 1) 
                  read_and_call(uhandle,consumer.noevent,blank = 1) 
                  consumer._collapse_buffer(None) 
                  if attempt_read_and_call(uhandle,consumer.noevent,blank = 1):  
                      break 
                  elif attempt_read_and_call(uhandle,consumer.noevent,start = "*****"):  
                      consumer._parse_buffer(None) 
                      consumer._blank_buffer(None) 
                      readMatches = 0 
                      break 
                  

class MASTRecord: 
     """The class for holding the results from a MAST run.
    
    A MASTRecord holds data about matches between motifs and sequences.
    The motifs held by the MASTRecord are objects of the class MEMEMotif.
    
    Methods:
    get_motif_matches_for_sequence(sequence_name): returns all of the
    motif matches within a given sequence. The matches are objects of
    the class MEMEInstance
    get_motif_matches (motif_name): returns all of the matches for a motif
    in the sequences searched. The matches returned are of class 
    MEMEInstance
    get_motif_by_name (motif_name): returns a MEMEMotif with the given
    name.
    """ 

     def __init__ (self): 
          self.sequences = [ ] 
          self.version = "" 
          self.matches = [ ] 
          self.database = "" 
          self.diagrams = { } 
          self.alphabet = None 
          self.motifs = [ ] 

     def _version (self,version): 
         self.version = version 

     def _alphabet (self,alphabet): 
         if alphabet==IUPAC.protein or   alphabet==IUPAC.ambiguous_dna or   alphabet==IUPAC.unambiguous_dna:  
             self.alphabet = alphabet 
         else: 
              return -1 
         

     def _database(self,database): 
         self.database = database 

     def get_motif_matches_for_sequence (self,seq): 
         insts = [ ] 
         for m in self.motifs: 
              for i in m.instances: 
                   if i.sequence_name==seq:  
                       insts.append(i) 
                   
         insts.sort( lambda x,y:cmp(x.start,y.start)) 
         return insts 

     def get_motif_matches (self,motif): 
         m = self.get_motif_by_name (motif.name) 
         return m.instances 

     def _add_diagram_for_sequence (self,diagram,seq):  
         self.diagrams[seq] = diagram 

     def _add_match (self,match): 
         self.matches.append(match) 

     def _add_sequence (self,sequence): 
         self.sequences.append(sequence) 

     def _add_motif (self,motif): 
         self.motifs.append(motif) 

     def get_motif_by_name (self,name): 
         for m in self.motifs: 
              if m.name==name:  
                  return m 
              


Clone AbstractionParameter Count: 2Parameter Bindings

class MASTParser(AbstractParser):
   [[#variable2d9a26c0]]

  def __init__(self):
  
    self._consumer = _MASTConsumer( ) 
    self._scanner = _MASTScanner( ) 

  def parse(self,handle):
  
    self._scanner.feed(handle,self._consumer) 
    return self._consumer.data 
  

class _MASTScanner:
  """
    Scanner for MAST text output. 
        
    """ 

  def feed(self,handle,consumer):
  
    if isinstance(handle,File.UndoHandle):
    
      uhandle = handle 
    else:
    
      uhandle = File.UndoHandle(handle) 
    self._scan_header(uhandle,consumer) 
    self._scan_matches(uhandle,consumer) 
    self._scan_annotated_matches(uhandle,consumer) 

  def _scan_header(self,uhandle,consumer):
  
    try :
    
      read_and_call_until(uhandle,consumer.noevent,contains = "MAST version") 
    except ValueError:
    
      raise ValueError("Improper input file. Does not begin with a line with 'MAST version'") 
    read_and_call(uhandle,consumer._version,contains = "MAST version") 
    read_and_call_until(uhandle,consumer.noevent,start = "DATABASE AND MOTIFS") 
    read_and_call(uhandle,consumer.noevent,start = "DATABASE") 
    read_and_call(uhandle,consumer.noevent,start = "****") 
    read_and_call(uhandle,consumer._database,contains = "DATABASE") 
    read_and_call_until(uhandle,consumer.noevent,contains = "MOTIF WIDTH") 
    read_and_call(uhandle,consumer.noevent,contains = "MOTIF") 
    read_and_call(uhandle,consumer.noevent,contains = "----") 
    read_and_call_until(uhandle,consumer._add_motif,blank = 1) 
    read_and_call_until(uhandle,consumer.noevent,start = "SECTION II:") 

  def _scan_matches(self,uhandle,consumer):
  
    read_and_call_until(uhandle,consumer.noevent,start = "SEQUENCE NAME") 
    read_and_call(uhandle,consumer.noevent,start = "SEQUENCE NAME") 
    read_and_call(uhandle,consumer.noevent,start = "---") 
    #        read_and_call_until(uhandle, consumer._add_sequence_match_with_diagram, blank = 1)
    read_and_call_until(uhandle,consumer.noevent,blank = 1) 
    read_and_call(uhandle,consumer.noevent,blank = 1) 

  def _scan_annotated_matches(self,uhandle,consumer):
  
    read_and_call_until(uhandle,consumer.noevent,start = "SECTION III:") 
    read_and_call(uhandle,consumer.noevent,start = "SECTION III:") 
    read_and_call_until(uhandle,consumer.noevent,start = "****") 
    read_and_call(uhandle,consumer.noevent,start = "****") 
    read_and_call_until(uhandle,consumer.noevent,start = "*****") 
    read_and_call(uhandle,consumer.noevent) 
    read_and_call_while(uhandle,consumer.noevent,blank = 1) 
    readMatches = 1 
    while readMatches==1:
    
      if consumer._current_seq:
      
        if consumer._buffer_size!=0:
        
          consumer._parse_buffer(None) 
        consumer._blank_buffer(None) 
      read_and_call(uhandle,consumer._set_current_seq) 
      read_and_call_until(uhandle,consumer.noevent,start = "  DIAGRAM") 
      read_and_call_until(uhandle,consumer._add_line_to_buffer,blank = 1) 
      consumer._add_diagram_from_buffer(None) 
      consumer._blank_buffer(None) 
      read_and_call(uhandle,consumer.noevent,blank = 1) 
      while 1:
      
        line = safe_peekline(uhandle) 
        if line.startswith("****"):
        
          consumer._parse_buffer(None) 
          readMatches = 0 
          break 
        read_and_call_until(uhandle,consumer._add_line_to_buffer,blank = 1) 
        read_and_call(uhandle,consumer.noevent,blank = 1) 
        consumer._collapse_buffer(None) 
        if attempt_read_and_call(uhandle,consumer.noevent,blank = 1):
        
          break 
        elif attempt_read_and_call(uhandle,consumer.noevent,start = "*****"):
        
          consumer._parse_buffer(None) 
          consumer._blank_buffer(None) 
          readMatches = 0 
          break 
        

class MASTRecord:
   [[#variable2d9a26a0]]

  def __init__(self):
  
    self.sequences = [ ] 
    self.version = "" 
    self.matches = [ ] 
    self.database = "" 
    self.diagrams = { } 
    self.alphabet = None 
    self.motifs = [ ] 

  def _version(self,version):
  
    self.version = version 

  def _alphabet(self,alphabet):
  
    if alphabet==IUPAC.protein or alphabet==IUPAC.ambiguous_dna or alphabet==IUPAC.unambiguous_dna:
    
      self.alphabet = alphabet 
    else:
    
      return -1 
    

  def _database(self,database):
  
    self.database = database 

  def get_motif_matches_for_sequence(self,seq):
  
    insts = [ ] 
    for m in self.motifs:
    
      for i in m.instances:
      
        if i.sequence_name==seq:
        
          insts.append(i) 
        
    insts.sort( lambda x,y:cmp(x.start,y.start)) 
    return insts 

  def get_motif_matches(self,motif):
  
    m = self.get_motif_by_name(motif.name) 
    return m.instances 

  def _add_diagram_for_sequence(self,diagram,seq):
  
    self.diagrams[seq] = diagram 

  def _add_match(self,match):
  
    self.matches.append(match) 

  def _add_sequence(self,sequence):
  
    self.sequences.append(sequence) 

  def _add_motif(self,motif):
  
    self.motifs.append(motif) 

  def get_motif_by_name(self,name):
  
    for m in self.motifs:
    
      if m.name==name:
      
        return m 
      
 

CloneAbstraction
Parameter Bindings
Parameter
Index
Clone
Instance
Parameter
Name
Value
11[[#2d9a26c0]]
'''
    Parser for MAST text output. HTML output cannot be parsed, yet. Returns a MASTRecord
    
    A MASTParser takes a file handle for a MAST text output file and 
    returns a MASTRecord, containing the hits between motifs and 
    sequences. The parser does some unusual line buffering to parse out 
    match diagrams. Really complex diagrams often lead to an error message 
    and p-values not being parsed for a given line.
    
    Methods:
    parse (handle): parses the data from the file handle passed to it.
    
    Example:
    
    f = open("mast_file.txt")
    parser = MASTParser()
    mast_record = parser.parse(f)
    for motif in mast_record.motifs:
        for instance in motif.instances:
            print instance.motif_name, instance.sequence_name, instance.strand, instance.pvalue
    ''' 
12[[#2d9a26c0]]
'''
    Parser for MAST text output. HTML output cannot be parsed, yet. Returns a MASTRecord
    
    A MASTParser takes a file handle for a MAST text output file and 
    returns a MASTRecord, containing the hits between motifs and 
    sequences. The parser does some unusual line buffering to parse out 
    match diagrams. Really complex diagrams often lead to an error message 
    and p-values not being parsed for a given line.
    
    Methods:
    parse (handle): parses the data from the file handle passed to it.
    
    Example:
    
    >>>f = open("mast_file.txt")
    >>>parser = MASTParser()
    >>>mast_record = parser.parse(f)
    >>>for motif in mast_record.motifs:
    >>>    for instance in motif.instances:
    >>>        print instance.motif_name, instance.sequence_name, instance.strand, instance.pvalue
    ''' 
21[[#2d9a26a0]]
"""The class for holding the results from a MAST run.
    
    A MASTRecord holds data about matches between motifs and sequences.
    The motifs held by the MASTRecord are objects of the class MEMEMotif.
    
    Methods:
    get_motif_matches_for_sequence(sequence_name): returns all of the
        motif matches within a given sequence. The matches are objects of
        the class MEME.Motif.Instance
    get_motif_matches (motif_name): returns all of the matches for a motif
        in the sequences searched. The matches returned are of class 
        MEME.Motif.Instance
    get_motif_by_name (motif_name): returns a MEMEMotif with the given
        name.
    """ 
22[[#2d9a26a0]]
"""The class for holding the results from a MAST run.
    
    A MASTRecord holds data about matches between motifs and sequences.
    The motifs held by the MASTRecord are objects of the class MEMEMotif.
    
    Methods:
    get_motif_matches_for_sequence(sequence_name): returns all of the
    motif matches within a given sequence. The matches are objects of
    the class MEMEInstance
    get_motif_matches (motif_name): returns all of the matches for a motif
    in the sequences searched. The matches returned are of class 
    MEMEInstance
    get_motif_by_name (motif_name): returns a MEMEMotif with the given
    name.
    """