File size: 3,207 Bytes
dab2de2
4c04f50
d5ff673
dab2de2
d5ff673
dab2de2
 
 
 
d5ff673
 
 
dab2de2
 
d5ff673
dab2de2
 
d5ff673
 
 
 
dab2de2
 
 
 
 
d5ff673
dab2de2
d5ff673
 
 
 
 
 
dab2de2
 
 
 
 
 
4c04f50
dab2de2
 
 
 
 
 
 
 
 
 
 
 
 
 
d5ff673
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dab2de2
 
 
 
d5ff673
dab2de2
d5ff673
 
 
 
dab2de2
d5ff673
 
dab2de2
d5ff673
 
 
 
dab2de2
d5ff673
 
dab2de2
 
 
 
 
d5ff673
 
dab2de2
d5ff673
 
 
 
 
 
 
 
 
dab2de2
 
d5ff673
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
"""Plot pandas.DataFrame with DBSCAN clustering."""
# pylint: disable=invalid-name, too-many-arguments, unused-import
import numpy as np  # noqa
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import DBSCAN

from logzero import logger  # noqa

# from radiobee.cmat2tset import cmat2tset

# turn interactive when in ipython session
_ = """
if "get_ipython" in globals():
    plt.ion()
else:
    plt.switch_backend('Agg')
# """
# fastlid.set_languages = ["en", "zh"]


# fmt: off
def plot_df(
        df_: pd.DataFrame,
        # cmat: np.ndarray,
        eps: float = 10,
        min_samples: int = 6,
        xlabel: str = "",
        ylabel: str = "",
        xlim: int = 0,
        ylim: int = 0,
        backend: str = "TkAgg",
) -> plt:
    # fmt: on
    """Plot df with DBSCAN clustering.

    Args:
        df_: pandas.DataFrame, with three columns columns=["x", "y", "cos"]

    Returns:
        matplotlib.pyplot: for possible use in gradio

    plot_df(pd.DataFrame(cmat2tset(smat), columns=['x', 'y', 'cos']))
    df_ = pd.DataFrame(cmat2tset(smat), columns=['x', 'y', 'cos'])

    # sort 'x', axis 0 changes, index regenerated
    df_s = df_.sort_values('x', axis=0, ignore_index=True)

    # sorintg does not seem to impact clustering
    DBSCAN(1.5, min_samples=3).fit(df_).labels_
    DBSCAN(1.5, min_samples=3).fit(df_s).labels_

    """
    # df_ = pd.DataFrame(cmat2tset(cmat))
    if df_.shape[1] == 3:
        df_.columns = ["x", "y", "cos"]
    else:
        logger.error(" shape mismatch: %s, expected (x, 3)", df_.shape)
        # return None
        raise Exception(" df_.shape[1] not equal to 3 ")

    if not xlim:
        xlim = len(df_)
    if not ylim:
        ylim = df_.y.max()

    if not xlabel:
        xlabel = str(xlim)
    if not ylabel:
        ylabel = str(ylim)

    backend_saved = matplotlib.get_backend()

    # switch if necessary
    if backend_saved != backend:
        plt.switch_backend(backend)

    sns.set()
    sns.set_style("darkgrid")

    fig = plt.figure(figsize=(13, 8))

    # gs = fig.add_gridspec(2, 2, wspace=0.4, hspace=0.58)
    # ax2 = fig.add_subplot(gs[0, 0])
    # ax0 = fig.add_subplot(gs[0, 1])
    # ax1 = fig.add_subplot(gs[1, 0])

    gs = fig.add_gridspec(1, 1, wspace=0.4, hspace=0.58)
    ax0 = fig.add_subplot(gs[0, 0])

    cmap = "viridis_r"

    _ = DBSCAN(min_samples=min_samples, eps=eps).fit(df_).labels_ > -1
    _x = ~_

    # clustered
    df_[_].plot.scatter("x", "y", c="cos", cmap=cmap, ax=ax0)
    # outliers
    df_[_x].plot.scatter("x", "y", c="r", marker="x", alpha=0.6, ax=ax0)

    # ax1.set_xlabel("en")
    # ax1.set_ylabel("zh")
    ax0.set_xlabel(xlabel)
    ax0.set_ylabel(ylabel)

    # ax0.set_xlim(0, xlim)
    # ax0.set_ylim(0, ylim)
    ax0.set_title("max cos ('x': outliers)")

    # ax1.set_title(f"potential aligned pairs ({round(sum(_) / xlim, 2):.0%})")

    # restore if necessary
    if backend_saved != backend:
        plt.switch_backend(backend_saved)

    return plt


_ = """
        eps: float = 10
        min_samples: int = 6
        xlabel: str = ""
        ylabel: str = ""
        xlim: int = 0
        ylim: int = 0
"""