asigalov61
commited on
Upload 2 files
Browse files- HaystackSearch.py +187 -0
- TMIDIX.py +139 -0
HaystackSearch.py
ADDED
@@ -0,0 +1,187 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Needle in a haystack search
|
3 |
+
|
4 |
+
Original source code is located here:
|
5 |
+
https://github.com/agapow/py-gsp/blob/master/gsp/motifsearch.py
|
6 |
+
"""
|
7 |
+
|
8 |
+
"""
|
9 |
+
A modifiable GSP algorithm.
|
10 |
+
"""
|
11 |
+
|
12 |
+
__version__ = '0.1'
|
13 |
+
|
14 |
+
|
15 |
+
### IMPORTS
|
16 |
+
|
17 |
+
### CONSTANTS & DEFINES
|
18 |
+
|
19 |
+
PP_INDENT = 3
|
20 |
+
|
21 |
+
|
22 |
+
### CODE ###
|
23 |
+
|
24 |
+
class GspSearch (object):
|
25 |
+
"""
|
26 |
+
A generic GSP algorithm, alllowing the individual parts to be overridden.
|
27 |
+
|
28 |
+
This is setup so the object can be created once, but searched multiple times
|
29 |
+
at different thresholds. In this generic form, we assume that the transactions
|
30 |
+
are simply strings.
|
31 |
+
"""
|
32 |
+
|
33 |
+
def __init__ (self, raw_transactions):
|
34 |
+
"""
|
35 |
+
C'tor, simply shaping the raw transactions into a useful form.
|
36 |
+
"""
|
37 |
+
self.process_transactions (raw_transactions)
|
38 |
+
|
39 |
+
def process_transactions (self, raw_transactions):
|
40 |
+
"""
|
41 |
+
Create the alphabet & (normalized) transactions.
|
42 |
+
"""
|
43 |
+
self.transactions = []
|
44 |
+
alpha = {}
|
45 |
+
for r in raw_transactions:
|
46 |
+
for c in r:
|
47 |
+
alpha[c] = True
|
48 |
+
self.transactions.append (r)
|
49 |
+
self.alpha = alpha.keys()
|
50 |
+
|
51 |
+
def generate_init_candidates (self):
|
52 |
+
"""
|
53 |
+
Make the initial set of candidate.
|
54 |
+
|
55 |
+
Usually this would just be the alphabet.
|
56 |
+
"""
|
57 |
+
return list (self.alpha)
|
58 |
+
|
59 |
+
def generate_new_candidates (self, freq_pat):
|
60 |
+
"""
|
61 |
+
Given existing patterns, generate a set of new patterns, one longer.
|
62 |
+
"""
|
63 |
+
old_cnt = len (freq_pat)
|
64 |
+
old_len = len (freq_pat[0])
|
65 |
+
print ("Generating new candidates from %s %s-mers ..." % (old_cnt, old_len))
|
66 |
+
|
67 |
+
new_candidates = []
|
68 |
+
for c in freq_pat:
|
69 |
+
for d in freq_pat:
|
70 |
+
merged_candidate = self.merge_candidates (c, d)
|
71 |
+
if merged_candidate and (merged_candidate not in new_candidates):
|
72 |
+
new_candidates.append (merged_candidate)
|
73 |
+
|
74 |
+
## Postconditions & return:
|
75 |
+
return new_candidates
|
76 |
+
|
77 |
+
def merge_candidates (self, a, b):
|
78 |
+
if a[1:] == b[:-1]:
|
79 |
+
return a + b[-1:]
|
80 |
+
else:
|
81 |
+
return None
|
82 |
+
|
83 |
+
def filter_candidates (self, trans_min):
|
84 |
+
"""
|
85 |
+
Return a list of the candidates that occur in at least the given number of transactions.
|
86 |
+
"""
|
87 |
+
filtered_candidates = []
|
88 |
+
for c in self.candidates:
|
89 |
+
curr_cand_hits = self.single_candidate_freq (c)
|
90 |
+
if trans_min <= curr_cand_hits:
|
91 |
+
filtered_candidates.append ((c, curr_cand_hits))
|
92 |
+
return filtered_candidates
|
93 |
+
|
94 |
+
def single_candidate_freq (self, c):
|
95 |
+
"""
|
96 |
+
Return true if a candidate is found in the transactions.
|
97 |
+
"""
|
98 |
+
hits = 0
|
99 |
+
for t in self.transactions:
|
100 |
+
if self.search_transaction (t, c):
|
101 |
+
hits += 1
|
102 |
+
return hits
|
103 |
+
|
104 |
+
def search_transaction (self, t, c):
|
105 |
+
"""
|
106 |
+
Does this candidate appear in this transaction?
|
107 |
+
"""
|
108 |
+
return (t.find (c) != -1)
|
109 |
+
|
110 |
+
def search (self, threshold):
|
111 |
+
## Preparation:
|
112 |
+
assert (0.0 < threshold) and (threshold <= 1.0)
|
113 |
+
trans_cnt = len (self.transactions)
|
114 |
+
trans_min = trans_cnt * threshold
|
115 |
+
|
116 |
+
print ("The number of transactions is: %s" % trans_cnt)
|
117 |
+
print ("The minimal support is: %s" % threshold)
|
118 |
+
print ("The minimal transaction support is: %s" % trans_min)
|
119 |
+
|
120 |
+
## Main:
|
121 |
+
# generate initial candidates & do initial filter
|
122 |
+
self.candidates = list (self.generate_init_candidates())
|
123 |
+
print ("There are %s initial candidates." % len (self.candidates))
|
124 |
+
freq_patterns = []
|
125 |
+
new_freq_patterns = self.filter_candidates (trans_min)
|
126 |
+
print ("The initial candidates have been filtered down to %s." % len (new_freq_patterns))
|
127 |
+
|
128 |
+
while True:
|
129 |
+
# is there anything left?
|
130 |
+
if new_freq_patterns:
|
131 |
+
freq_patterns = new_freq_patterns
|
132 |
+
else:
|
133 |
+
return freq_patterns
|
134 |
+
|
135 |
+
# if any left, generate new candidates & filter
|
136 |
+
self.candidates = self.generate_new_candidates ([x[0] for x in freq_patterns])
|
137 |
+
print ("There are %s new candidates." % len (self.candidates))
|
138 |
+
new_freq_patterns = self.filter_candidates (trans_min)
|
139 |
+
print ("The candidates have been filtered down to %s." % len (new_freq_patterns))
|
140 |
+
|
141 |
+
### END ###
|
142 |
+
|
143 |
+
__version__ = '0.1'
|
144 |
+
|
145 |
+
### CONSTANTS & DEFINES
|
146 |
+
|
147 |
+
NULL_SYMBOL = 'X'
|
148 |
+
|
149 |
+
### CODE ###
|
150 |
+
|
151 |
+
def HaystackSearch(needle, haystack):
|
152 |
+
"""
|
153 |
+
Return the index of the needle in the haystack
|
154 |
+
|
155 |
+
Parameters:
|
156 |
+
needle: any iterable
|
157 |
+
haystack: any other iterable
|
158 |
+
|
159 |
+
Returns:
|
160 |
+
the index of the start of needle or -1 if it is not found.
|
161 |
+
|
162 |
+
Looking for a sub-list of a list is actually a tricky thing. This
|
163 |
+
approach uses the Boyer-Moore-Horspool algorithm. Needle and haystack
|
164 |
+
should be any iterable, as long as their elements are hashable.
|
165 |
+
Example:
|
166 |
+
|
167 |
+
>>> find ([1, 2], [1, 1, 2])
|
168 |
+
1
|
169 |
+
>>> find ((1, 2, 3), range (10))
|
170 |
+
1
|
171 |
+
>>> find ('gh', 'abcdefghi')
|
172 |
+
6
|
173 |
+
>>> find ([2, 3], [7, 8, 9])
|
174 |
+
-1
|
175 |
+
"""
|
176 |
+
h = len (haystack)
|
177 |
+
n = len (needle)
|
178 |
+
skip = {needle[i]: n - i - 1 for i in range(n - 1)}
|
179 |
+
i = n - 1
|
180 |
+
while i < h:
|
181 |
+
for j in range(n):
|
182 |
+
if haystack[i - j] != needle[-j - 1]:
|
183 |
+
i += skip.get(haystack[i], n)
|
184 |
+
break
|
185 |
+
else:
|
186 |
+
return i - n + 1
|
187 |
+
return -1
|
TMIDIX.py
CHANGED
@@ -6895,6 +6895,145 @@ def binary_matrix_to_original_escore_notes(binary_matrix,
|
|
6895 |
|
6896 |
###################################################################################
|
6897 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6898 |
# This is the end of the TMIDI X Python module
|
6899 |
|
6900 |
###################################################################################
|
|
|
6895 |
|
6896 |
###################################################################################
|
6897 |
|
6898 |
+
def escore_notes_averages(escore_notes,
|
6899 |
+
times_index=1,
|
6900 |
+
durs_index=2,
|
6901 |
+
chans_index=3,
|
6902 |
+
ptcs_index=4,
|
6903 |
+
vels_index=5,
|
6904 |
+
average_drums=False,
|
6905 |
+
score_is_delta=False,
|
6906 |
+
return_ptcs_and_vels=False
|
6907 |
+
):
|
6908 |
+
|
6909 |
+
if score_is_delta:
|
6910 |
+
if average_drums:
|
6911 |
+
times = [e[times_index] for e in escore_notes if e[times_index] != 0]
|
6912 |
+
else:
|
6913 |
+
times = [e[times_index] for e in escore_notes if e[times_index] != 0 and e[chans_index] != 9]
|
6914 |
+
|
6915 |
+
else:
|
6916 |
+
descore_notes = delta_score_notes(escore_notes)
|
6917 |
+
if average_drums:
|
6918 |
+
times = [e[times_index] for e in descore_notes if e[times_index] != 0]
|
6919 |
+
else:
|
6920 |
+
times = [e[times_index] for e in descore_notes if e[times_index] != 0 and e[chans_index] != 9]
|
6921 |
+
|
6922 |
+
if average_drums:
|
6923 |
+
durs = [e[durs_index] for e in escore_notes]
|
6924 |
+
else:
|
6925 |
+
durs = [e[durs_index] for e in escore_notes if e[chans_index] != 9]
|
6926 |
+
|
6927 |
+
if return_ptcs_and_vels:
|
6928 |
+
if average_drums:
|
6929 |
+
ptcs = [e[ptcs_index] for e in escore_notes]
|
6930 |
+
vels = [e[vels_index] for e in escore_notes]
|
6931 |
+
else:
|
6932 |
+
ptcs = [e[ptcs_index] for e in escore_notes if e[chans_index] != 9]
|
6933 |
+
vels = [e[vels_index] for e in escore_notes if e[chans_index] != 9]
|
6934 |
+
|
6935 |
+
return [sum(times) / len(times), sum(durs) / len(durs), sum(ptcs) / len(ptcs), sum(vels) / len(vels)]
|
6936 |
+
|
6937 |
+
else:
|
6938 |
+
return [sum(times) / len(times), sum(durs) / len(durs)]
|
6939 |
+
|
6940 |
+
###################################################################################
|
6941 |
+
|
6942 |
+
def adjust_escore_notes_timings(escore_notes,
|
6943 |
+
adj_k=1,
|
6944 |
+
times_index=1,
|
6945 |
+
durs_index=2,
|
6946 |
+
score_is_delta=False,
|
6947 |
+
return_delta_scpre=False
|
6948 |
+
):
|
6949 |
+
|
6950 |
+
if score_is_delta:
|
6951 |
+
adj_escore_notes = copy.deepcopy(escore_notes)
|
6952 |
+
else:
|
6953 |
+
adj_escore_notes = delta_score_notes(escore_notes)
|
6954 |
+
|
6955 |
+
for e in adj_escore_notes:
|
6956 |
+
|
6957 |
+
if e[times_index] != 0:
|
6958 |
+
e[times_index] = max(1, round(e[times_index] * adj_k))
|
6959 |
+
|
6960 |
+
e[durs_index] = max(1, round(e[durs_index] * adj_k))
|
6961 |
+
|
6962 |
+
if return_delta_scpre:
|
6963 |
+
return adj_escore_notes
|
6964 |
+
|
6965 |
+
else:
|
6966 |
+
return delta_score_to_abs_score(adj_escore_notes)
|
6967 |
+
|
6968 |
+
###################################################################################
|
6969 |
+
|
6970 |
+
def escore_notes_delta_times(escore_notes,
|
6971 |
+
times_index=1
|
6972 |
+
):
|
6973 |
+
|
6974 |
+
descore_notes = delta_score_notes(escore_notes)
|
6975 |
+
|
6976 |
+
return [e[times_index] for e in descore_notes]
|
6977 |
+
|
6978 |
+
###################################################################################
|
6979 |
+
|
6980 |
+
def escore_notes_durations(escore_notes,
|
6981 |
+
durs_index=1
|
6982 |
+
):
|
6983 |
+
|
6984 |
+
descore_notes = delta_score_notes(escore_notes)
|
6985 |
+
|
6986 |
+
return [e[durs_index] for e in descore_notes]
|
6987 |
+
|
6988 |
+
###################################################################################
|
6989 |
+
|
6990 |
+
def ordered_lists_match_ratio(src_list, trg_list):
|
6991 |
+
|
6992 |
+
zlist = list(zip(src_list, trg_list))
|
6993 |
+
|
6994 |
+
return sum([a == b for a, b in zlist]) / len(list(zlist))
|
6995 |
+
|
6996 |
+
###################################################################################
|
6997 |
+
|
6998 |
+
def lists_intersections(src_list, trg_list):
|
6999 |
+
return list(set(src_list) & set(trg_list))
|
7000 |
+
|
7001 |
+
###################################################################################
|
7002 |
+
|
7003 |
+
def transpose_escore_notes(escore_notes,
|
7004 |
+
transpose_value=0,
|
7005 |
+
channel_index=3,
|
7006 |
+
pitches_index=4
|
7007 |
+
):
|
7008 |
+
|
7009 |
+
tr_escore_notes = copy.deepcopy(escore_notes)
|
7010 |
+
|
7011 |
+
for e in tr_escore_notes:
|
7012 |
+
if e[channel_index] != 9:
|
7013 |
+
e[pitches_index] = max(1, min(127, e[pitches_index] + transpose_value))
|
7014 |
+
|
7015 |
+
return tr_escore_notes
|
7016 |
+
|
7017 |
+
###################################################################################
|
7018 |
+
|
7019 |
+
def transpose_escore_notes_to_pitch(escore_notes,
|
7020 |
+
target_pitch_value=60,
|
7021 |
+
channel_index=3,
|
7022 |
+
pitches_index=4
|
7023 |
+
):
|
7024 |
+
|
7025 |
+
tr_escore_notes = copy.deepcopy(escore_notes)
|
7026 |
+
|
7027 |
+
transpose_delta = int(round(target_pitch_value)) - int(round(escore_notes_averages(escore_notes, return_ptcs_and_vels=True)[2]))
|
7028 |
+
|
7029 |
+
for e in tr_escore_notes:
|
7030 |
+
if e[channel_index] != 9:
|
7031 |
+
e[pitches_index] = max(1, min(127, e[pitches_index] + transpose_delta))
|
7032 |
+
|
7033 |
+
return tr_escore_notes
|
7034 |
+
|
7035 |
+
###################################################################################
|
7036 |
+
|
7037 |
# This is the end of the TMIDI X Python module
|
7038 |
|
7039 |
###################################################################################
|