Spaces:

ChatterjeeLab
/

SMILES2PEPTIDE

Running

App Files Files Community

yinuozhang commited on Dec 21, 2024

Commit

71e2885

1 Parent(s): 2b18b2c

adjust several LD aa and GLY bond

Browse files

Files changed (1) hide show

app.py +182 -336

app.py CHANGED Viewed

@@ -72,12 +72,10 @@ class PeptideAnalyzer:
         is_cyclic = len(peptide_cycles) > 0 and not smiles.endswith('C(=O)O')
         return is_cyclic, peptide_cycles, aromatic_cycles
     def split_on_bonds(self, smiles):
-        """Split SMILES into segments with simplified Pro handling"""
         positions = []
         used = set()
         # Find Gly pattern first
         gly_pattern = r'NCC\(=O\)'
         for match in re.finditer(gly_pattern, smiles):
@@ -106,7 +104,6 @@ class PeptideAnalyzer:
         # Create segments
         segments = []
         if positions:
             # First segment
             if positions[0]['start'] > 0:
@@ -114,18 +111,21 @@ class PeptideAnalyzer:
                     'content': smiles[0:positions[0]['start']],
                     'bond_after': positions[0]['pattern']
                 })
             # Process segments
             for i in range(len(positions)-1):
                 current = positions[i]
                 next_pos = positions[i+1]
                 if current['type'] == 'gly':
                     segments.append({
                         'content': 'NCC(=O)',
                         'bond_before': positions[i-1]['pattern'] if i > 0 else None,
                         'bond_after': next_pos['pattern']
                     })
                 else:
                     content = smiles[current['end']:next_pos['start']]
                     if content:
@@ -141,7 +141,6 @@ class PeptideAnalyzer:
                     'content': smiles[positions[-1]['end']:],
                     'bond_before': positions[-1]['pattern']
                 })
         return segments
     def clean_terminal_carboxyl(self, segment):
@@ -168,11 +167,162 @@ class PeptideAnalyzer:
         content = self.clean_terminal_carboxyl(segment)
         mods = self.get_modifications(segment)
-        # UAA pattern matching section - before regular residues
-        # Phenylglycine and derivatives
-        if 'c1ccccc1' in content:
             if '[C@@H](c1ccccc1)' in content or '[C@H](c1ccccc1)' in content:
                 return '4', mods  # Base phenylglycine
         # 4-substituted phenylalanines
         if 'Cc1ccc' in content:
@@ -282,22 +432,6 @@ class PeptideAnalyzer:
             if 'c1ccc(c(c1)O)O' in content:
                 return 'DAH', mods  # 3,4-Dihydroxy-phenylalanine
-        # Cyclic amino acids
-        if 'C1CCCC1' in content:
-            return 'CPA3', mods  # 3-Cyclopentyl-alanine
-        if 'C1CCCCC1' in content:
-            if 'CC1CCCCC1' in content:
-                return 'ALC', mods  # 3-cyclohexyl-alanine
-            else:
-                return 'CHG', mods  # Cyclohexylglycine
-        # Chain-length variants
-        if 'CCC[C@@H]' in content or 'CCC[C@H]' in content:
-            return 'NLE', mods  # Norleucine
-        if 'CC[C@@H]' in content or 'CC[C@H]' in content:
-            if not any(x in content for x in ['CC(C)', 'COC', 'CN(']):
-                return 'ABA', mods  # 2-Aminobutyric acid
         # Modified histidines
         if 'c1cnc' in content:
             if '[C@@H]1CN[C@@H](N1)F' in content:
@@ -307,7 +441,6 @@ class PeptideAnalyzer:
             if 'c1c[nH]c(n1)F' in content:
                 return '2HF2', mods  # 2-fluoro-l-histidine variant
-        # Sulfur and selenium containing
         if '[SeH]' in content:
             return 'CSE', mods  # Selenocysteine
         if 'S' in content:
@@ -318,7 +451,6 @@ class PeptideAnalyzer:
             if 'CCS' in content:
                 return 'HCS', mods  # homocysteine
-        # Additional modifications
         if 'CN=[N]=N' in content:
             return 'AZDA', mods  # azido-alanine
         if '[NH]=[C](=[NH2])=[NH2]' in content:
@@ -326,7 +458,21 @@ class PeptideAnalyzer:
                 return 'AGM', mods  # 5-methyl-arginine
             if 'CC[NH]=' in content:
                 return 'GDPR', mods  # 2-Amino-3-guanidinopropionic acid
         if 'CCON' in content:
             return 'CAN', mods  # canaline
         if '[C@@H]1C=C[C@@H](C=C1)' in content:
@@ -350,7 +496,6 @@ class PeptideAnalyzer:
             if 'c1cccc(c1)[C](=[NH2])=[NH2]' in content:
                 return 'APM', mods  # m-amidinophenyl-3-alanine
-        # Multiple hydroxy patterns
         if 'O' in content:
             if '[C@H]([C@H](C)O)O' in content:
                 return 'ILX', mods  # 4,5-dihydroxy-isoleucine
@@ -365,7 +510,6 @@ class PeptideAnalyzer:
             if '[C@H](c1ccc(c(Cl)c1)O)O' in content:
                 return 'OMY', mods  # (betar)-3-chloro-beta-hydroxy-l-tyrosine
-        # Heterocyclic patterns
         if 'n1' in content:
             if 'n1cccn1' in content:
                 return 'PYZ1', mods  # 3-(1-Pyrazolyl)-alanine
@@ -384,7 +528,6 @@ class PeptideAnalyzer:
             if 'c1cnc2c(n1)cccc2' in content:
                 return 'QX32', mods  # 3-(2-quinoxalyl)-alanine
-        # Multiple nitrogen patterns
         if 'N' in content:
             if '[NH3]CC[C@@H]' in content:
                 return 'DAB', mods  # Diaminobutyric acid
@@ -397,7 +540,6 @@ class PeptideAnalyzer:
             if '[NH]=[C](=S)=[NH2]' in content:
                 return 'THIC', mods  # Thio-citrulline
-        # Chain modified amino acids
         if 'CC' in content:
             if 'CCCC[C@@H]' in content:
                 return 'AHP', mods  # 2-Aminoheptanoic acid
@@ -410,7 +552,6 @@ class PeptideAnalyzer:
             if '[C@@H]([C@@H](C)O)C' in content:
                 return 'HLU', mods  # beta-hydroxyleucine
-        # Modified glutamate/aspartate patterns
         if '[C@@H]' in content:
             if '[C@@H](C[C@@H](F))' in content:
                 return 'FGA4', mods  # 4-Fluoro-glutamic acid
@@ -421,7 +562,6 @@ class PeptideAnalyzer:
             if '[C@@H](CC[C@H](C))' in content:
                 return 'MEG', mods  # (3s)-3-methyl-l-glutamic acid
-        # Sulfur and selenium modifications
         if 'S' in content:
             if 'SCC[C@@H]' in content:
                 return 'HSER', mods  # homoserine
@@ -434,7 +574,6 @@ class PeptideAnalyzer:
             if 'S(=O)(=O)' in content:
                 return 'OMT', mods  # Methionine sulfone
-        # Double bond containing
         if 'C=' in content:
             if 'C=C[C@@H]' in content:
                 return '2AG', mods  # 2-Allyl-glycine
@@ -443,175 +582,16 @@ class PeptideAnalyzer:
             if 'C=Cc1ccccc1' in content:
                 return 'STYA', mods  # Styrylalanine
-        # Special cases
         if '[C@@H]1Cc2c(C1)cccc2' in content:
             return 'IGL', mods  # alpha-amino-2-indanacetic acid
         if '[C](=[C](=O)=O)=O' in content:
             return '26P', mods  # 2-amino-6-oxopimelic acid
         if '[C](=[C](=O)=O)=C' in content:
             return '2NP', mods  # l-2-amino-6-methylene-pimelic acid
-        if 'c2cnc[nH]2' in content:
-            return 'HIS', mods  # histidine core
         if 'c1cccc2c1cc(O)cc2' in content:
             return 'NAO1', mods  # 5-hydroxy-1-naphthalene
         if 'c1ccc2c(c1)cc(O)cc2' in content:
-            return 'NAO2', mods  # 6-hydroxy-2-naphthalene
-        # Proline (P) - flexible ring numbers
-        if any([
-            # Check for any ring number in bond patterns
-            (segment.get('bond_after', '').startswith(f'N{n}C(=O)') and 'CCC' in content and
-            any(f'[C@@H]{n}' in content or f'[C@H]{n}' in content for n in '123456789'))
-            for n in '123456789'
-        ]) or any([
-            # Check ending patterns with any ring number
-            (f'CCCN{n}' in content and content.endswith('=O') and
-            any(f'[C@@H]{n}' in content or f'[C@H]{n}' in content for n in '123456789'))
-            for n in '123456789'
-        ]) or any([
-            # Handle CCC[C@H]n patterns
-            (content == f'CCC[C@H]{n}' and segment.get('bond_before', '').startswith(f'C(=O)N{n}')) or
-            (content == f'CCC[C@@H]{n}' and segment.get('bond_before', '').startswith(f'C(=O)N{n}')) or
-            # N-terminal Pro with any ring number
-            (f'N{n}CCC[C@H]{n}' in content) or
-            (f'N{n}CCC[C@@H]{n}' in content)
-            for n in '123456789'
-        ]):
-            return 'Pro', mods
-        # Tryptophan (W) - more specific indole pattern
-        if re.search(r'c[0-9]c\[nH\]c[0-9]ccccc[0-9][0-9]', content) and \
-        'c[nH]c' in content.replace(' ', ''):
-            return 'Trp', mods
-        # Lysine (K) - both patterns
-        if '[C@@H](CCCCN)' in content or '[C@H](CCCCN)' in content:
-            return 'Lys', mods
-        # Arginine (R) - both patterns
-        if '[C@@H](CCCNC(=N)N)' in content or '[C@H](CCCNC(=N)N)' in content:
-            return 'Arg', mods
-        if ('C[C@H](CCCC)' in content or 'C[C@@H](CCCC)' in content) and 'CC(C)' not in content:
-            return 'Nle', mods
-        # Ornithine (Orn) - 3-carbon chain with NH2
-        if ('C[C@H](CCCN)' in content or 'C[C@@H](CCCN)' in content) and 'CC(C)' not in content:
-            return 'Orn', mods
-        # 2-Naphthylalanine (2Nal) - distinct from Phe pattern
-        if ('Cc3cc2ccccc2c3' in content) and ('C[C@H]' in content or 'C[C@@H]' in content):
-            return '2Nal', mods
-        # Cyclohexylalanine (Cha) - already in your code but moved here for clarity
-        if 'N2CCCCC2' in content or 'CCCCC2' in content:
-            return 'Cha', mods
-        # Aminobutyric acid (Abu) - 2-carbon chain
-        if ('C[C@H](CC)' in content or 'C[C@@H](CC)' in content) and not any(p in content for p in ['CC(C)', 'CCCC', 'CCC(C)']):
-            return 'Abu', mods
-        # Pipecolic acid (Pip) - 6-membered ring like Pro
-        if ('N3CCCCC3' in content or 'CCCCC3' in content) and ('C[C@H]' in content or 'C[C@@H]' in content):
-            return 'Pip', mods
-        # Cyclohexylglycine (Chg) - direct cyclohexyl without CH2
-        if ('C[C@H](C1CCCCC1)' in content or 'C[C@@H](C1CCCCC1)' in content):
-            return 'Chg', mods
-        # 4-Fluorophenylalanine (4F-Phe)
-        if ('Cc2ccc(F)cc2' in content) and ('C[C@H]' in content or 'C[C@@H]' in content):
-            return '4F-Phe', mods
-        # Regular residue identification
-        if ('NCC(=O)' in content) or (content == 'C'):
-            # Middle case - between bonds
-            if segment.get('bond_before') and segment.get('bond_after'):
-                if ('C(=O)N' in segment['bond_before'] or 'C(=O)N(C)' in segment['bond_before']):
-                    return 'Gly', mods
-            # Terminal case - at the end
-            elif segment.get('bond_before') and segment.get('bond_before').startswith('C(=O)N'):
-                return 'Gly', mods
-        if 'CC(C)C[C@H]' in content or 'CC(C)C[C@@H]' in content:
-            return 'Leu', mods
-        if '[C@@H](CC(C)C)' in content or '[C@H](CC(C)C)' in content:
-            return 'Leu', mods
-        if '[C@@H]([C@@H](C)O)' in content or '[C@H]([C@H](C)O)' in content:
-            return 'Thr', mods
-        if '[C@H](Cc2ccccc2)' in content or '[C@@H](Cc2ccccc2)' in content:
-            return 'Phe', mods
-        if ('[C@H](C(C)C)' in content or       # With outer parentheses
-            '[C@@H](C(C)C)' in content or      # With outer parentheses
-            '[C@H]C(C)C' in content or         # Without outer parentheses
-            '[C@@H]C(C)C' in content):         # Without outer parentheses
-            if not any(p in content for p in ['CC(C)C[C@H]', 'CC(C)C[C@@H]']):  # Still check not Leu
-                return 'Val', mods
-        if '[C@H](COC(C)(C)C)' in content or '[C@@H](COC(C)(C)C)' in content:
-            return 'O-tBu', mods
-        if any([
-            'CC[C@H](C)' in content,
-            'CC[C@@H](C)' in content,
-            'C(C)C[C@H]' in content and 'CC(C)C' not in content,
-            'C(C)C[C@@H]' in content and 'CC(C)C' not in content
-        ]):
-            return 'Ile', mods
-        if ('[C@H](C)' in content or '[C@@H](C)' in content):
-            if not any(p in content for p in ['C(C)C', 'COC', 'CN(', 'C(C)O', 'CC[C@H]', 'CC[C@@H]']):
-                return 'Ala', mods
-        # Tyrosine (Tyr) - 4-hydroxybenzyl side chain
-        if re.search(r'Cc[0-9]ccc\(O\)cc[0-9]', content):
-            return 'Tyr', mods
-        # Serine (Ser) - Hydroxymethyl side chain
-        if '[C@H](CO)' in content or '[C@@H](CO)' in content:
-            if not ('C(C)O' in content or 'COC' in content):
-                return 'Ser', mods
-        # Threonine (Thr) - 1-hydroxyethyl side chain
-        if '[C@@H]([C@@H](C)O)' in content or '[C@H]([C@H](C)O)' in content or '[C@@H](C)O' in content or '[C@H](C)O' in content:
-            return 'Thr', mods
-        # Cysteine (Cys) - Thiol side chain
-        if '[C@H](CS)' in content or '[C@@H](CS)' in content:
-            return 'Cys', mods
-        # Methionine (Met) - Methylthioethyl side chain
-        if ('C[C@H](CCSC)' in content or 'C[C@@H](CCSC)' in content):
-            return 'Met', mods
-        # Asparagine (Asn) - Carbamoylmethyl side chain
-        if ('CC(=O)N' in content) and ('C[C@H]' in content or 'C[C@@H]' in content):
-            return 'Asn', mods
-        # Glutamine (Gln) - Carbamoylethyl side chain
-        if ('CCC(=O)N' in content) and ('C[C@H]' in content or 'C[C@@H]' in content):
-            return 'Gln', mods
-        # Aspartic acid (Asp) - Carboxymethyl side chain
-        if ('CC(=O)O' in content) and ('C[C@H]' in content or 'C[C@@H]' in content):
-            return 'Asp', mods
-        # Glutamic acid (Glu) - Carboxyethyl side chain
-        if ('CCC(=O)O' in content) and ('C[C@H]' in content or 'C[C@@H]' in content):
-            return 'Glu', mods
-        # Arginine (Arg) - 3-guanidinopropyl side chain
-        if ('CCCNC(=N)N' in content) and ('C[C@H]' in content or 'C[C@@H]' in content):
-            return 'Arg', mods
-        # Histidine (His) - Imidazole side chain
-        if ('Cc2cnc[nH]2' in content) and ('C[C@H]' in content or 'C[C@@H]' in content):
-            return 'His', mods
         return None, mods
     def get_modifications(self, segment):
@@ -670,109 +650,11 @@ class PeptideAnalyzer:
             'one_letter': one_letter,
             'is_cyclic': is_cyclic
         }
-"""
-def annotate_cyclic_structure(mol, sequence):
-    '''Create annotated 2D structure with clear, non-overlapping residue labels'''
-    # Generate 2D coordinates
-    # Generate 2D coordinates
-    AllChem.Compute2DCoords(mol)
-    # Create drawer with larger size for annotations
-    drawer = Draw.rdMolDraw2D.MolDraw2DCairo(2000, 2000)  # Even larger size
-    # Get residue list and reverse it to match structural representation
-    if sequence.startswith('cyclo('):
-        residues = sequence[6:-1].split('-')
-    else:
-        residues = sequence.split('-')
-    residues = list(reversed(residues))  # Reverse the sequence
-    # Draw molecule first to get its bounds
-    drawer.drawOptions().addAtomIndices = False
-    drawer.DrawMolecule(mol)
-    drawer.FinishDrawing()
-    # Convert to PIL Image
-    img = Image.open(BytesIO(drawer.GetDrawingText()))
-    draw = ImageDraw.Draw(img)
-    try:
-        # Try to use DejaVuSans as it's commonly available on Linux systems
-        font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 60)
-        small_font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 60)
-    except OSError:
-        try:
-            # Fallback to Arial if available (common on Windows)
-            font = ImageFont.truetype("arial.ttf", 60)
-            small_font = ImageFont.truetype("arial.ttf", 60)
-        except OSError:
-            # If no TrueType fonts are available, fall back to default
-            print("Warning: TrueType fonts not available, using default font")
-            font = ImageFont.load_default()
-            small_font = ImageFont.load_default()
-    # Get molecule bounds
-    conf = mol.GetConformer()
-    positions = []
-    for i in range(mol.GetNumAtoms()):
-        pos = conf.GetAtomPosition(i)
-        positions.append((pos.x, pos.y))
-    x_coords = [p[0] for p in positions]
-    y_coords = [p[1] for p in positions]
-    min_x, max_x = min(x_coords), max(x_coords)
-    min_y, max_y = min(y_coords), max(y_coords)
-    # Calculate scaling factors
-    scale = 150  # Increased scale factor
-    center_x = 1000  # Image center
-    center_y = 1000
-    # Add residue labels in a circular arrangement around the structure
-    n_residues = len(residues)
-    radius = 700  # Distance of labels from center
-    # Start from the rightmost point (3 o'clock position) and go counterclockwise
-    # Offset by -3 positions to align with structure
-    offset = 0  # Adjust this value to match the structure alignment
-    for i, residue in enumerate(residues):
-        # Calculate position in a circle around the structure
-        # Start from 0 (3 o'clock) and go counterclockwise
-        angle = -(2 * np.pi * ((i + offset) % n_residues) / n_residues)
-        # Calculate label position
-        label_x = center_x + radius * np.cos(angle)
-        label_y = center_y + radius * np.sin(angle)
-        # Draw residue label
-        text = f"{i+1}. {residue}"
-        bbox = draw.textbbox((label_x, label_y), text, font=font)
-        padding = 10
-        draw.rectangle([bbox[0]-padding, bbox[1]-padding,
-                       bbox[2]+padding, bbox[3]+padding],
-                      fill='white', outline='white')
-        draw.text((label_x, label_y), text,
-                 font=font, fill='black', anchor="mm")
-    # Add sequence at the top with white background
-    seq_text = f"Sequence: {sequence}"
-    bbox = draw.textbbox((center_x, 100), seq_text, font=small_font)
-    padding = 10
-    draw.rectangle([bbox[0]-padding, bbox[1]-padding,
-                   bbox[2]+padding, bbox[3]+padding],
-                  fill='white', outline='white')
-    draw.text((center_x, 100), seq_text,
-             font=small_font, fill='black', anchor="mm")
-    return img
-"""
 def annotate_cyclic_structure(mol, sequence):
-    """Create structure visualization with just the sequence header"""
-    # Generate 2D coordinates
     AllChem.Compute2DCoords(mol)
-    # Create drawer with larger size for annotations
     drawer = Draw.rdMolDraw2D.MolDraw2DCairo(2000, 2000)
     # Draw molecule first
@@ -792,7 +674,7 @@ def annotate_cyclic_structure(mol, sequence):
             print("Warning: TrueType fonts not available, using default font")
             small_font = ImageFont.load_default()
-    # Add just the sequence header at the top
     seq_text = f"Sequence: {sequence}"
     bbox = draw.textbbox((1000, 100), seq_text, font=small_font)
     padding = 10
@@ -805,61 +687,50 @@ def annotate_cyclic_structure(mol, sequence):
     return img
 def create_enhanced_linear_viz(sequence, smiles):
-    """Create an enhanced linear representation using PeptideAnalyzer"""
-    analyzer = PeptideAnalyzer()  # Create analyzer instance
-    # Create figure with two subplots
     fig = plt.figure(figsize=(15, 10))
     gs = fig.add_gridspec(2, 1, height_ratios=[1, 2])
     ax_struct = fig.add_subplot(gs[0])
     ax_detail = fig.add_subplot(gs[1])
-    # Parse sequence and get residues
     if sequence.startswith('cyclo('):
         residues = sequence[6:-1].split('-')
     else:
         residues = sequence.split('-')
-    # Get segments using analyzer
     segments = analyzer.split_on_bonds(smiles)
-    # Debug print
     print(f"Number of residues: {len(residues)}")
     print(f"Number of segments: {len(segments)}")
-    # Top subplot - Basic structure
     ax_struct.set_xlim(0, 10)
     ax_struct.set_ylim(0, 2)
     num_residues = len(residues)
     spacing = 9.0 / (num_residues - 1) if num_residues > 1 else 9.0
-    # Draw basic structure
     y_pos = 1.5
     for i in range(num_residues):
         x_pos = 0.5 + i * spacing
-        # Draw amino acid box
         rect = patches.Rectangle((x_pos-0.3, y_pos-0.2), 0.6, 0.4,
                                facecolor='lightblue', edgecolor='black')
         ax_struct.add_patch(rect)
-        # Draw connecting bonds if not the last residue
         if i < num_residues - 1:
             segment = segments[i] if i < len(segments) else None
             if segment:
-                # Determine bond type from segment info
                 bond_type = 'ester' if 'O-linked' in segment.get('bond_after', '') else 'peptide'
                 is_n_methylated = 'N-Me' in segment.get('bond_after', '')
                 bond_color = 'red' if bond_type == 'ester' else 'black'
                 linestyle = '--' if bond_type == 'ester' else '-'
-                # Draw bond line
                 ax_struct.plot([x_pos+0.3, x_pos+spacing-0.3], [y_pos, y_pos],
                              color=bond_color, linestyle=linestyle, linewidth=2)
-                # Add bond type label
                 mid_x = x_pos + spacing/2
                 bond_label = f"{bond_type}"
                 if is_n_methylated:
@@ -868,16 +739,13 @@ def create_enhanced_linear_viz(sequence, smiles):
                              ha='center', va='bottom', fontsize=10,
                              color=bond_color)
-        # Add residue label
         ax_struct.text(x_pos, y_pos-0.5, residues[i],
                       ha='center', va='top', fontsize=14)
-    # Bottom subplot - Detailed breakdown
     ax_detail.set_ylim(0, len(segments)+1)
     ax_detail.set_xlim(0, 1)
-    # Create detailed breakdown
-    segment_y = len(segments)  # Start from top
     for i, segment in enumerate(segments):
         y = segment_y - i
@@ -899,7 +767,6 @@ def create_enhanced_linear_viz(sequence, smiles):
                 text += "peptide"
             color = 'red'
-        # Add segment analysis
         ax_detail.text(0.05, y, text, fontsize=12, color=color)
         ax_detail.text(0.5, y, f"SMILES: {segment.get('content', '')}", fontsize=10, color='gray')
@@ -910,11 +777,9 @@ def create_enhanced_linear_viz(sequence, smiles):
         ax_struct.text(5, y_pos+0.3, 'Cyclic Connection',
                       ha='center', color='red', fontsize=14)
-    # Add titles and adjust layout
     ax_struct.set_title("Peptide Structure Overview", pad=20)
     ax_detail.set_title("Segment Analysis Breakdown", pad=20)
-    # Remove axes
     for ax in [ax_struct, ax_detail]:
         ax.set_xticks([])
         ax.set_yticks([])
@@ -924,7 +789,7 @@ def create_enhanced_linear_viz(sequence, smiles):
     return fig
 class PeptideStructureGenerator:
-    """A class to generate 3D structures of peptides using different embedding methods"""
     @staticmethod
     def prepare_molecule(smiles):
@@ -933,7 +798,6 @@ class PeptideStructureGenerator:
         if mol is None:
             raise ValueError("Failed to create molecule from SMILES")
-        # Calculate valence for each atom
         for atom in mol.GetAtoms():
             atom.UpdatePropertyCache(strict=False)
@@ -951,7 +815,7 @@ class PeptideStructureGenerator:
     @staticmethod
     def get_etkdg_params(attempt=0):
-        """Get ETKDG parameters with optional modifications based on attempt number"""
         params = AllChem.ETKDGv3()
         params.randomSeed = -1
         params.maxIterations = 200
@@ -1025,13 +889,11 @@ class PeptideStructureGenerator:
     @staticmethod
     def mol_to_sdf_bytes(mol):
         """Convert RDKit molecule to SDF file bytes"""
-        # First write to StringIO in text mode
         sio = StringIO()
         writer = Chem.SDWriter(sio)
         writer.write(mol)
         writer.close()
-        # Convert the string to bytes
         return sio.getvalue().encode('utf-8')
 def process_input(smiles_input=None, file_obj=None, show_linear=False,
@@ -1045,17 +907,14 @@ def process_input(smiles_input=None, file_obj=None, show_linear=False,
     if smiles_input:
         smiles = smiles_input.strip()
-        # First check if it's a peptide using analyzer's method
         if not analyzer.is_peptide(smiles):
             return "Error: Input SMILES does not appear to be a peptide structure.", None, None
         try:
-            # Create molecule
             mol = Chem.MolFromSmiles(smiles)
             if mol is None:
                 return "Error: Invalid SMILES notation.", None, None
-            # Generate 3D structures if requested
             if generate_3d:
                 generator = PeptideStructureGenerator()
@@ -1080,10 +939,8 @@ def process_input(smiles_input=None, file_obj=None, show_linear=False,
                 except Exception as e:
                     return f"Error generating 3D structures: {str(e)}", None, None, None
-            # Use analyzer to get sequence
             segments = analyzer.split_on_bonds(smiles)
-            # Process segments and build sequence
             sequence_parts = []
             output_text = ""
@@ -1108,7 +965,6 @@ def process_input(smiles_input=None, file_obj=None, show_linear=False,
                         output_text += f"Warning: Could not identify residue in segment: {segment['content']}\n"
                 output_text += "\n"
             else:
-                # Just build sequence without detailed analysis in output
                 for segment in segments:
                     residue, mods = analyzer.identify_residue(segment)
                     if residue:
@@ -1117,7 +973,6 @@ def process_input(smiles_input=None, file_obj=None, show_linear=False,
                         else:
                             sequence_parts.append(residue)
-            # Check if cyclic using analyzer's method
             is_cyclic, peptide_cycles, aromatic_cycles = analyzer.is_cyclic(smiles)
             three_letter = '-'.join(sequence_parts)
             one_letter = ''.join(analyzer.three_to_one.get(aa.split('(')[0], 'X') for aa in sequence_parts)
@@ -1126,7 +981,6 @@ def process_input(smiles_input=None, file_obj=None, show_linear=False,
                 three_letter = f"cyclo({three_letter})"
                 one_letter = f"cyclo({one_letter})"
-            # Create cyclic structure visualization
             img_cyclic = annotate_cyclic_structure(mol, three_letter)
             # Create linear representation if requested
@@ -1139,7 +993,6 @@ def process_input(smiles_input=None, file_obj=None, show_linear=False,
                 img_linear = Image.open(buf)
                 plt.close(fig_linear)
-            # Add summary to output
             summary = "Summary:\n"
             summary += f"Sequence: {three_letter}\n"
             summary += f"One-letter code: {one_letter}\n"
@@ -1161,7 +1014,6 @@ def process_input(smiles_input=None, file_obj=None, show_linear=False,
     # Handle file input
     if file_obj is not None:
         try:
-            # Handle file content
             if hasattr(file_obj, 'name'):
                 with open(file_obj.name, 'r') as f:
                     content = f.read()
@@ -1172,16 +1024,13 @@ def process_input(smiles_input=None, file_obj=None, show_linear=False,
             for line in content.splitlines():
                 smiles = line.strip()
                 if smiles:
-                    # Check if it's a peptide
                     if not analyzer.is_peptide(smiles):
                         output_text += f"Skipping non-peptide SMILES: {smiles}\n"
                         continue
-                    # Process this SMILES
                     segments = analyzer.split_on_bonds(smiles)
                     sequence_parts = []
-                    # Add segment details if requested
                     if show_segment_details:
                         output_text += f"\nSegment Analysis for SMILES: {smiles}\n"
                         for i, segment in enumerate(segments):
@@ -1206,7 +1055,6 @@ def process_input(smiles_input=None, file_obj=None, show_linear=False,
                                 else:
                                     sequence_parts.append(residue)
-                    # Get cyclicity and create sequence
                     is_cyclic, peptide_cycles, aromatic_cycles = analyzer.is_cyclic(smiles)
                     sequence = f"cyclo({'-'.join(sequence_parts)})" if is_cyclic else '-'.join(sequence_parts)
@@ -1215,7 +1063,6 @@ def process_input(smiles_input=None, file_obj=None, show_linear=False,
                     output_text += f"Is Cyclic: {'Yes' if is_cyclic else 'No'}\n"
                     if is_cyclic:
                         output_text += f"Peptide Cycles: {', '.join(peptide_cycles)}\n"
-                        #output_text += f"Aromatic Cycles: {', '.join(aromatic_cycles)}\n"
                     output_text += "-" * 50 + "\n"
             return output_text, None, None
@@ -1298,6 +1145,5 @@ iface = gr.Interface(
     flagging_mode="never"
 )
-# Launch the app
 if __name__ == "__main__":
     iface.launch(share=True)

         is_cyclic = len(peptide_cycles) > 0 and not smiles.endswith('C(=O)O')
         return is_cyclic, peptide_cycles, aromatic_cycles
     def split_on_bonds(self, smiles):
         positions = []
         used = set()
         # Find Gly pattern first
         gly_pattern = r'NCC\(=O\)'
         for match in re.finditer(gly_pattern, smiles):
         # Create segments
         segments = []
         if positions:
             # First segment
             if positions[0]['start'] > 0:
                     'content': smiles[0:positions[0]['start']],
                     'bond_after': positions[0]['pattern']
                 })
             # Process segments
             for i in range(len(positions)-1):
                 current = positions[i]
                 next_pos = positions[i+1]
                 if current['type'] == 'gly':
                     segments.append({
                         'content': 'NCC(=O)',
                         'bond_before': positions[i-1]['pattern'] if i > 0 else None,
                         'bond_after': next_pos['pattern']
                     })
+                    segments.append({
+                        'content': smiles[current['start']+7:next_pos['start']],
+                        'bond_before': 'gly_bond',
+                        'bond_after': next_pos['pattern']
+                    })
                 else:
                     content = smiles[current['end']:next_pos['start']]
                     if content:
                     'content': smiles[positions[-1]['end']:],
                     'bond_before': positions[-1]['pattern']
                 })
         return segments
     def clean_terminal_carboxyl(self, segment):
         content = self.clean_terminal_carboxyl(segment)
         mods = self.get_modifications(segment)
+        # Proline (P) - flexible ring numbers
+        if any([
+            # Check for any ring number in bond patterns
+            (segment.get('bond_after', '').startswith(f'N{n}C(=O)') and 'CCC' in content and
+            any(f'[C@@H]{n}' in content or f'[C@H]{n}' in content for n in '123456789'))
+            for n in '123456789'
+        ]) or any([(segment.get('bond_before', '').startswith(f'C(=O)N{n}') and 'CCC' in content and
+            any(f'CCC{n}' for n in '123456789'))
+            for n in '123456789'
+        ]) or any([
+            # Check ending patterns with any ring number
+            (f'CCCN{n}' in content and content.endswith('=O') and
+            any(f'[C@@H]{n}' in content or f'[C@H]{n}' in content for n in '123456789'))
+            for n in '123456789'
+        ]) or any([
+            # Handle CCC[C@H]n patterns
+            (content == f'CCC[C@H]{n}' and segment.get('bond_before', '').startswith(f'C(=O)N{n}')) or
+            (content == f'CCC[C@@H]{n}' and segment.get('bond_before', '').startswith(f'C(=O)N{n}')) or
+            # N-terminal Pro with any ring number
+            (f'N{n}CCC[C@H]{n}' in content) or
+            (f'N{n}CCC[C@@H]{n}' in content)
+            for n in '123456789'
+        ]):
+            return 'Pro', mods
+        # Tryptophan (W) - more specific indole pattern
+        if re.search(r'c[0-9]c\[nH\]c[0-9]ccccc[0-9][0-9]', content) and \
+        'c[nH]c' in content.replace(' ', ''):
+            return 'Trp', mods
+        # Lysine (K)
+        if '[C@@H](CCCCN)' in content or '[C@H](CCCCN)' in content:
+            return 'Lys', mods
+        # Arginine (R)
+        if '[C@@H](CCCNC(=N)N)' in content or '[C@H](CCCNC(=N)N)' in content:
+            return 'Arg', mods
+        if ('NCC(=O)' in content) or (content == 'C'):
+            if segment.get('bond_before') and segment.get('bond_after'):
+                if ('C(=O)N' in segment['bond_before'] or 'C(=O)N(C)' in segment['bond_before']):
+                    return 'Gly', mods
+            elif segment.get('bond_before') and segment.get('bond_before').startswith('C(=O)N'):
+                return 'Gly', mods
+        if 'CC(C)C[C@H]' in content or 'CC(C)C[C@@H]' in content or '[C@@H](CC(C)C)' in content or '[C@H](CC(C)C)' in content or (('N[C@H](CCC(C)C)' in content or 'N[C@@H](CCC(C)C)' in content) and segment.get('bond_before') is None):
+            return 'Leu', mods
+        if '[C@@H]([C@@H](C)O)' in content or '[C@H]([C@H](C)O)' in content:
+            return 'Thr', mods
+        if re.search(r'\[C@H\]\(Cc\d+ccccc\d+\)', content) or re.search(r'\[C@@H\]\(Cc\d+ccccc\d+\)', content):
+            return 'Phe', mods
+        if ('[C@H](C(C)C)' in content or
+            '[C@@H](C(C)C)' in content or
+            '[C@H]C(C)C' in content or
+            '[C@@H]C(C)C' in content
+            ):
+            if not any(p in content for p in ['CC(C)C[C@H]', 'CC(C)C[C@@H]']):  # Still check not Leu
+                return 'Val', mods
+        if any([
+            'CC[C@H](C)' in content,
+            'CC[C@@H](C)' in content,
+            '[C@@H](CC)C' in content,
+            '[C@H](CC)C' in content,
+            'C(C)C[C@H]' in content and 'CC(C)C' not in content,
+            'C(C)C[C@@H]' in content and 'CC(C)C' not in content
+        ]):
+            return 'Ile', mods
+        if ('[C@H](C)' in content or '[C@@H](C)' in content):
+            if not any(p in content for p in ['C(C)C', 'COC', 'CN(', 'C(C)O', 'CC[C@H]', 'CC[C@@H]']):
+                return 'Ala', mods
+        # Tyrosine (Tyr) - 4-hydroxybenzyl side chain
+        if re.search(r'Cc[0-9]ccc\(O\)cc[0-9]', content):
+            return 'Tyr', mods
+        # Serine (Ser) - Hydroxymethyl side chain
+        if '[C@H](CO)' in content or '[C@@H](CO)' in content:
+            if not ('C(C)O' in content or 'COC' in content):
+                return 'Ser', mods
+        # Threonine (Thr) - 1-hydroxyethyl side chain
+        if '[C@@H]([C@@H](C)O)' in content or '[C@H]([C@H](C)O)' in content or '[C@@H](C)O' in content or '[C@H](C)O' in content:
+            return 'Thr', mods
+        # Cysteine (Cys) - Thiol side chain
+        if '[C@H](CS)' in content or '[C@@H](CS)' in content:
+            return 'Cys', mods
+        # Methionine (Met) - Methylthioethyl side chain
+        if ('CCSC' in content):
+            return 'Met', mods
+        # Glutamine (Gln) - Carbamoylethyl side chain
+        if (content == '[C@@H](CC' or content == '[C@H](CC' and segment.get('bond_before')=='C(=O)N' and segment.get('bond_after')=='C(=O)N') or ('CCC(=O)N' in content) or ('CCC(N)=O' in content):
+            return 'Gln', mods
+        # Asparagine (Asn) - Carbamoylmethyl side chain
+        if (content == '[C@@H](C' or content == '[C@H](C' and segment.get('bond_before')=='C(=O)N' and segment.get('bond_after')=='C(=O)N') or ('CC(=O)N' in content) or ('CCN(=O)' in content) or ('CC(N)=O' in content):
+            return 'Asn', mods
+        # Glutamic acid (Glu) - Carboxyethyl side chain
+        if ('CCC(=O)O' in content):
+            return 'Glu', mods
+        # Aspartic acid (Asp) - Carboxymethyl side chain
+        if ('CC(=O)O' in content):
+            return 'Asp', mods
+        # Arginine (Arg) - 3-guanidinopropyl side chain
+        if ('CCCNC(=N)N' in content):
+            return 'Arg', mods
+        # Histidine (His) - Imidazole side chain
+        if re.search(r'Cc\d+c\[nH\]cn\d+', content) or re.search(r'Cc\d+cnc\[nH\]\d+', content):
+            return 'His', mods
+        ############UAA
+        if '[C@H](COC(C)(C)C)' in content or '[C@@H](COC(C)(C)C)' in content:
+            return 'O-tBu', mods
+        if re.search(r'c\d+ccccc\d+', content):
             if '[C@@H](c1ccccc1)' in content or '[C@H](c1ccccc1)' in content:
                 return '4', mods  # Base phenylglycine
+        if ('C[C@H](CCCC)' in content or 'C[C@@H](CCCC)' in content) and 'CC(C)' not in content:
+            return 'Nle', mods
+        # Ornithine (Orn) - 3-carbon chain with NH2
+        if ('C[C@H](CCCN)' in content or 'C[C@@H](CCCN)' in content) and 'CC(C)' not in content:
+            return 'Orn', mods
+        # 2-Naphthylalanine (2Nal)
+        if ('Cc3cc2ccccc2c3' in content):
+            return '2Nal', mods
+        # Cyclohexylalanine (Cha)
+        if 'N2CCCCC2' in content or 'CCCCC2' in content:
+            return 'Cha', mods
+        # Aminobutyric acid (Abu) - 2-carbon chain
+        if ('C[C@H](CC)' in content or 'C[C@@H](CC)' in content) and not any(p in content for p in ['CC(C)', 'CCCC', 'CCC(C)']):
+            return 'Abu', mods
+        # Pipecolic acid (Pip)
+        if ('N3CCCCC3' in content or 'CCCCC3' in content):
+            return 'Pip', mods
+        # Cyclohexylglycine (Chg) - direct cyclohexyl without CH2
+        if ('C[C@H](C1CCCCC1)' in content or 'C[C@@H](C1CCCCC1)' in content):
+            return 'Chg', mods
+        # 4-Fluorophenylalanine (4F-Phe)
+        if ('Cc2ccc(F)cc2' in content):
+            return '4F-Phe', mods
         # 4-substituted phenylalanines
         if 'Cc1ccc' in content:
             if 'c1ccc(c(c1)O)O' in content:
                 return 'DAH', mods  # 3,4-Dihydroxy-phenylalanine
         # Modified histidines
         if 'c1cnc' in content:
             if '[C@@H]1CN[C@@H](N1)F' in content:
             if 'c1c[nH]c(n1)F' in content:
                 return '2HF2', mods  # 2-fluoro-l-histidine variant
         if '[SeH]' in content:
             return 'CSE', mods  # Selenocysteine
         if 'S' in content:
             if 'CCS' in content:
                 return 'HCS', mods  # homocysteine
         if 'CN=[N]=N' in content:
             return 'AZDA', mods  # azido-alanine
         if '[NH]=[C](=[NH2])=[NH2]' in content:
                 return 'AGM', mods  # 5-methyl-arginine
             if 'CC[NH]=' in content:
                 return 'GDPR', mods  # 2-Amino-3-guanidinopropionic acid
+        # Others
+        if 'C1CCCC1' in content:
+            return 'CPA3', mods  # 3-Cyclopentyl-alanine
+        if 'C1CCCCC1' in content:
+            if 'CC1CCCCC1' in content:
+                return 'ALC', mods  # 3-cyclohexyl-alanine
+            else:
+                return 'CHG', mods  # Cyclohexylglycine
+        if 'CCC[C@@H]' in content or 'CCC[C@H]' in content:
+            return 'NLE', mods  # Norleucine
+        if 'CC[C@@H]' in content or 'CC[C@H]' in content:
+            if not any(x in content for x in ['CC(C)', 'COC', 'CN(']):
+                return 'ABA', mods  # 2-Aminobutyric acid
         if 'CCON' in content:
             return 'CAN', mods  # canaline
         if '[C@@H]1C=C[C@@H](C=C1)' in content:
             if 'c1cccc(c1)[C](=[NH2])=[NH2]' in content:
                 return 'APM', mods  # m-amidinophenyl-3-alanine
         if 'O' in content:
             if '[C@H]([C@H](C)O)O' in content:
                 return 'ILX', mods  # 4,5-dihydroxy-isoleucine
             if '[C@H](c1ccc(c(Cl)c1)O)O' in content:
                 return 'OMY', mods  # (betar)-3-chloro-beta-hydroxy-l-tyrosine
         if 'n1' in content:
             if 'n1cccn1' in content:
                 return 'PYZ1', mods  # 3-(1-Pyrazolyl)-alanine
             if 'c1cnc2c(n1)cccc2' in content:
                 return 'QX32', mods  # 3-(2-quinoxalyl)-alanine
         if 'N' in content:
             if '[NH3]CC[C@@H]' in content:
                 return 'DAB', mods  # Diaminobutyric acid
             if '[NH]=[C](=S)=[NH2]' in content:
                 return 'THIC', mods  # Thio-citrulline
         if 'CC' in content:
             if 'CCCC[C@@H]' in content:
                 return 'AHP', mods  # 2-Aminoheptanoic acid
             if '[C@@H]([C@@H](C)O)C' in content:
                 return 'HLU', mods  # beta-hydroxyleucine
         if '[C@@H]' in content:
             if '[C@@H](C[C@@H](F))' in content:
                 return 'FGA4', mods  # 4-Fluoro-glutamic acid
             if '[C@@H](CC[C@H](C))' in content:
                 return 'MEG', mods  # (3s)-3-methyl-l-glutamic acid
         if 'S' in content:
             if 'SCC[C@@H]' in content:
                 return 'HSER', mods  # homoserine
             if 'S(=O)(=O)' in content:
                 return 'OMT', mods  # Methionine sulfone
         if 'C=' in content:
             if 'C=C[C@@H]' in content:
                 return '2AG', mods  # 2-Allyl-glycine
             if 'C=Cc1ccccc1' in content:
                 return 'STYA', mods  # Styrylalanine
         if '[C@@H]1Cc2c(C1)cccc2' in content:
             return 'IGL', mods  # alpha-amino-2-indanacetic acid
         if '[C](=[C](=O)=O)=O' in content:
             return '26P', mods  # 2-amino-6-oxopimelic acid
         if '[C](=[C](=O)=O)=C' in content:
             return '2NP', mods  # l-2-amino-6-methylene-pimelic acid
         if 'c1cccc2c1cc(O)cc2' in content:
             return 'NAO1', mods  # 5-hydroxy-1-naphthalene
         if 'c1ccc2c(c1)cc(O)cc2' in content:
+            return 'NAO2', mods  # 6-hydroxy-2-naphthalene
         return None, mods
     def get_modifications(self, segment):
             'one_letter': one_letter,
             'is_cyclic': is_cyclic
         }
 def annotate_cyclic_structure(mol, sequence):
+    """Create structure visualization"""
     AllChem.Compute2DCoords(mol)
     drawer = Draw.rdMolDraw2D.MolDraw2DCairo(2000, 2000)
     # Draw molecule first
             print("Warning: TrueType fonts not available, using default font")
             small_font = ImageFont.load_default()
+    # Header
     seq_text = f"Sequence: {sequence}"
     bbox = draw.textbbox((1000, 100), seq_text, font=small_font)
     padding = 10
     return img
 def create_enhanced_linear_viz(sequence, smiles):
+    """"Linear visualization"""
+    analyzer = PeptideAnalyzer()
     fig = plt.figure(figsize=(15, 10))
     gs = fig.add_gridspec(2, 1, height_ratios=[1, 2])
     ax_struct = fig.add_subplot(gs[0])
     ax_detail = fig.add_subplot(gs[1])
     if sequence.startswith('cyclo('):
         residues = sequence[6:-1].split('-')
     else:
         residues = sequence.split('-')
     segments = analyzer.split_on_bonds(smiles)
     print(f"Number of residues: {len(residues)}")
     print(f"Number of segments: {len(segments)}")
     ax_struct.set_xlim(0, 10)
     ax_struct.set_ylim(0, 2)
     num_residues = len(residues)
     spacing = 9.0 / (num_residues - 1) if num_residues > 1 else 9.0
     y_pos = 1.5
     for i in range(num_residues):
         x_pos = 0.5 + i * spacing
         rect = patches.Rectangle((x_pos-0.3, y_pos-0.2), 0.6, 0.4,
                                facecolor='lightblue', edgecolor='black')
         ax_struct.add_patch(rect)
         if i < num_residues - 1:
             segment = segments[i] if i < len(segments) else None
             if segment:
                 bond_type = 'ester' if 'O-linked' in segment.get('bond_after', '') else 'peptide'
                 is_n_methylated = 'N-Me' in segment.get('bond_after', '')
                 bond_color = 'red' if bond_type == 'ester' else 'black'
                 linestyle = '--' if bond_type == 'ester' else '-'
                 ax_struct.plot([x_pos+0.3, x_pos+spacing-0.3], [y_pos, y_pos],
                              color=bond_color, linestyle=linestyle, linewidth=2)
                 mid_x = x_pos + spacing/2
                 bond_label = f"{bond_type}"
                 if is_n_methylated:
                              ha='center', va='bottom', fontsize=10,
                              color=bond_color)
         ax_struct.text(x_pos, y_pos-0.5, residues[i],
                       ha='center', va='top', fontsize=14)
     ax_detail.set_ylim(0, len(segments)+1)
     ax_detail.set_xlim(0, 1)
+    segment_y = len(segments)
     for i, segment in enumerate(segments):
         y = segment_y - i
                 text += "peptide"
             color = 'red'
         ax_detail.text(0.05, y, text, fontsize=12, color=color)
         ax_detail.text(0.5, y, f"SMILES: {segment.get('content', '')}", fontsize=10, color='gray')
         ax_struct.text(5, y_pos+0.3, 'Cyclic Connection',
                       ha='center', color='red', fontsize=14)
     ax_struct.set_title("Peptide Structure Overview", pad=20)
     ax_detail.set_title("Segment Analysis Breakdown", pad=20)
     for ax in [ax_struct, ax_detail]:
         ax.set_xticks([])
         ax.set_yticks([])
     return fig
 class PeptideStructureGenerator:
+    """Generate 3D structures of peptides using different embedding methods"""
     @staticmethod
     def prepare_molecule(smiles):
         if mol is None:
             raise ValueError("Failed to create molecule from SMILES")
         for atom in mol.GetAtoms():
             atom.UpdatePropertyCache(strict=False)
     @staticmethod
     def get_etkdg_params(attempt=0):
+        """Get ETKDG parameters"""
         params = AllChem.ETKDGv3()
         params.randomSeed = -1
         params.maxIterations = 200
     @staticmethod
     def mol_to_sdf_bytes(mol):
         """Convert RDKit molecule to SDF file bytes"""
         sio = StringIO()
         writer = Chem.SDWriter(sio)
         writer.write(mol)
         writer.close()
         return sio.getvalue().encode('utf-8')
 def process_input(smiles_input=None, file_obj=None, show_linear=False,
     if smiles_input:
         smiles = smiles_input.strip()
         if not analyzer.is_peptide(smiles):
             return "Error: Input SMILES does not appear to be a peptide structure.", None, None
         try:
             mol = Chem.MolFromSmiles(smiles)
             if mol is None:
                 return "Error: Invalid SMILES notation.", None, None
             if generate_3d:
                 generator = PeptideStructureGenerator()
                 except Exception as e:
                     return f"Error generating 3D structures: {str(e)}", None, None, None
             segments = analyzer.split_on_bonds(smiles)
             sequence_parts = []
             output_text = ""
                         output_text += f"Warning: Could not identify residue in segment: {segment['content']}\n"
                 output_text += "\n"
             else:
                 for segment in segments:
                     residue, mods = analyzer.identify_residue(segment)
                     if residue:
                         else:
                             sequence_parts.append(residue)
             is_cyclic, peptide_cycles, aromatic_cycles = analyzer.is_cyclic(smiles)
             three_letter = '-'.join(sequence_parts)
             one_letter = ''.join(analyzer.three_to_one.get(aa.split('(')[0], 'X') for aa in sequence_parts)
                 three_letter = f"cyclo({three_letter})"
                 one_letter = f"cyclo({one_letter})"
             img_cyclic = annotate_cyclic_structure(mol, three_letter)
             # Create linear representation if requested
                 img_linear = Image.open(buf)
                 plt.close(fig_linear)
             summary = "Summary:\n"
             summary += f"Sequence: {three_letter}\n"
             summary += f"One-letter code: {one_letter}\n"
     # Handle file input
     if file_obj is not None:
         try:
             if hasattr(file_obj, 'name'):
                 with open(file_obj.name, 'r') as f:
                     content = f.read()
             for line in content.splitlines():
                 smiles = line.strip()
                 if smiles:
                     if not analyzer.is_peptide(smiles):
                         output_text += f"Skipping non-peptide SMILES: {smiles}\n"
                         continue
                     segments = analyzer.split_on_bonds(smiles)
                     sequence_parts = []
                     if show_segment_details:
                         output_text += f"\nSegment Analysis for SMILES: {smiles}\n"
                         for i, segment in enumerate(segments):
                                 else:
                                     sequence_parts.append(residue)
                     is_cyclic, peptide_cycles, aromatic_cycles = analyzer.is_cyclic(smiles)
                     sequence = f"cyclo({'-'.join(sequence_parts)})" if is_cyclic else '-'.join(sequence_parts)
                     output_text += f"Is Cyclic: {'Yes' if is_cyclic else 'No'}\n"
                     if is_cyclic:
                         output_text += f"Peptide Cycles: {', '.join(peptide_cycles)}\n"
                     output_text += "-" * 50 + "\n"
             return output_text, None, None
     flagging_mode="never"
 )
 if __name__ == "__main__":
     iface.launch(share=True)