File size: 4,680 Bytes
786340e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import sys
from os import access, R_OK
from os.path import isfile
from collections import defaultdict
import re

from smem_obj import SMEM_Obj, ObjType


def clean_smem_string_token(s):
    return str(s).replace("|","")

def removeComments(s):
    """ Trim any '#' content from the given string
    """
    ind = str(s).find('#')
    if (ind == -1):
        return s
    return s[:ind]

def read_tokens_from_lines(file):
    retval = []             # A list of all tokens from the "smem --add {}" contents.
    inSmemAdd = False       # Whether the read is inside an "smem --add {}" command

    try:
        for line in file:
            sline = line.lstrip()
            # Don't use comments
            if sline.startswith('#'):
                continue
            # Check if we're in an smem --add command
            if not inSmemAdd:
                if sline.startswith("smem --add {"):
                    inSmemAdd = True
                    # Only add from this line any content after the opening '{'
                    sline = sline[12:]
                else:
                    continue
            
            # Get the tokens: split on whitespace, pipe quotes, parentheses, and brackets, but only exclude whitespace delimiters and pipe quotes
            regex_pattern = '[\s+]|\\|(.+?)\\||(?<=\\))|(?<=\\()|(?<=\\{)|(?<=\\})'
            tokens = re.split(regex_pattern, removeComments(sline).rstrip())

            while None in tokens:
                tokens.remove(None)
            while '' in tokens:
                tokens.remove('')

            # Check for the closing character
            try:
                ind = tokens.index('}')
                # If no exception, it was found
                tokens = tokens[:ind]
                inSmemAdd = False
            except:
                pass

            # Add this line to the return list
            if len(tokens) > 0:
                retval.extend(tokens)
    except Exception as e:
        print("ERROR extracting tokens from the given file: "+str(e), file=sys.stderr)
        return None
    
    return retval

""" 
This method scans a file that holds an 'smem --add{}' command and returns the relevant tokens from that file
"""
def get_smem_tokens_from_local_file(filename):
    # Error check for reading the file
    if not isfile(filename):
        print("ERROR in get_smem_tokens_from_local_file(): File does not exist: '"+str(filename)+"'.", file=sys.stderr)
        return None
    if not access(filename, R_OK):
        print("ERROR in get_smem_tokens_from_local_file(): File is not readable: '"+str(filename)+"'.", file=sys.stderr)
        return None

    # Get the file content
    with open(filename) as file:
        retval = read_tokens_from_lines(file)

    # All done
    return retval


class SMEM_Parser():
    
    def __init__(self):
        # Init the data structured needed to parse the smem file
        self.smem_var_obj_map = defaultdict(SMEM_Obj)

    def parse_file(self, smem_tokens):
        # Iterate through the tokens
        current_obj = None
        current_attr = None
        isNextWMEId = False
        for token in smem_tokens:
            # Skip empty tokens
            if len(token) == 0:
                continue
            # Get WME ID
            if isNextWMEId:
                current_obj = self.smem_var_obj_map[token]
                current_obj.set_id_var(token)
                isNextWMEId = False
                continue
            # Get start of obj
            if token == '(':
                isNextWMEId = True
                continue
            # Get end of obj
            if token == ')':
                current_obj = None
                current_attr = None
                continue
            # Get attributes
            if token.startswith('^'):
                current_attr = token
                continue
            # Get values
            if current_obj != None and current_attr != None:
                # Add the WME to the current object
                if token.startswith('<'):
                    token_val = self.smem_var_obj_map[token]
                    token_val.set_id_var(token)
                else:
                    token_val = clean_smem_string_token(token)
                current_obj.add_wme(current_attr, token_val)
            else:
                print("ERROR: Unexpected token '"+token+"'.")
                return

    def get_context_root(self):
        for var,obj in self.smem_var_obj_map.items():
            for (attr,val) in obj.wme_list:
                if attr == "^context-root":
                    obj.obj_type = ObjType.CONTEXT
                    return obj
        return None