File size: 1,648 Bytes
dab2de2
4c04f50
 
2f6222b
dab2de2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3d38118
dab2de2
 
 
3d38118
dab2de2
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
"""Gen triple-set from a  matrix."""
# pylint: disable=unused-import

from typing import List, Tuple, Union  # noqa

import numpy as np
import pandas as pd


# fmt: off
def cmat2tset(
    cmat1: Union[List[List[float]], np.ndarray, pd.DataFrame],
    # thirdcol: bool = True
) -> np.ndarray:
    # ) -> List[Union[Tuple[int, int], Tuple[int, int, float]]]:
    # fmt: on
    """Gen triple-set from a matrix.

    Args
        cmat: 2d-array or list, correlation or other metric matrix
        # thirdcol: bool, whether to output a third column (max value)

    Returns
        Obtain the max and argmax for each column, erase the row afterwards to eliminate one single row  that would dominate
        every column.
    """
    # if isinstance(cmat, list):
    cmat = np.array(cmat1)

    if not np.prod(cmat.shape):
        raise SystemError("data not 2d...")

    _ = """
    # y00 = range(cmat.shape[1])  # cmat.shape[0] long time wasting bug

    yargmax = cmat.argmax(axis=0)
    if thirdcol:
        ymax = cmat.max(axis=0)

        res = [*zip(y00, yargmax, ymax)]  # type: ignore
        # to unzip
        # a, b, c = zip(*res)

        return res

    _ = [*zip(y00, yargmax)]  # type: ignore
    return _
    """
    low_ = cmat.min() - 1
    argmax_max = []
    src_len, tgt_len = cmat.shape  # ylim, xlim
    for _ in range(min(src_len, tgt_len)):
        argmax = int(cmat.argmax())
        row, col = divmod(argmax, tgt_len)
        argmax_max.append([col, row, cmat.max()])  # x-axis, y-axis

        # erase row-th row and col-th col of cmat
        cmat[row, :] = low_
        cmat[:, col] = low_

    return np.array(argmax_max)