 V2

2022/10/14阅读：44主题：默认主题

# Entropy

``import scipyimport sklearn.preprocessingimport numpy as npimport pandas as pdimport plotly.express as pximport plotly.subplots as subplotsimport plotly.graph_objects as gofrom tqdm.auto import tqdmfrom noise import pnoise1``

## Generate data

Create simulation dataset using Perlin noise. It includes signals with 1000 time points, and repeat 500 times. The 500 signals are independent, and the distributions are similar.

``def generate_data(repeat=50, num=1000):    data = []    seeds = [e for e in range(num)]    np.random.shuffle(seeds)    seeds = seeds[:repeat]    print(seeds)    for seed in tqdm(seeds):        octaves = 1 * 4 + 0 * np.random.randint(3, 7)        persistence = 0 * 0.8 + 1 * np.random.uniform(0.7, 0.9)        s = np.array([            pnoise1(e, persistence=persistence, base=seed, octaves=octaves)            for e in np.linspace(0, 1, num)        ])        data.append(s)    data = np.array(data)    data = sklearn.preprocessing.minmax_scale(data, axis=1)    print(data.shape)    return datadata = generate_data()df = pd.DataFrame()for j in range(5):    df['signal-{}'.format(j)] = data[j]px.scatter(df, title='Signals').show()px.imshow(data, title='Signal').show()``

## All-in-One Class

``class DataWithProb(object):    '''    Data with Probability    Every entropy you need for a dataset.    '''    def __init__(self):        pass    def load(self, data):        '''        Load new data, the data is 2-d array,        The 1st dimension is the signal.        The function will compute things automatically        - prob.: The empirical probability of every signal,                 in 1-d array        - joint prob.: The empirical joint probability of every two signals,                       in 2-d array        Since the data is inherently discrete, the discrete prob is computed.        :param:data: The data will be computed        '''        self.data = data        self.shape = data.shape        print('Data shape is {}'.format(self.shape))        bins = self.auto_bins()        self.compute_prob(bins)        self.compute_joint_prob(bins)    def auto_bins(self, num=100):        '''        Compute bins by default setting.        :param: num: A number of how many bins we will use        :return: bins: Hhe linear seperated bins        '''        data = self.data        bins = np.linspace(np.min(data), np.max(data), num)        return bins    def compute_prob(self, bins):        '''        Compute prob for every signal        :param: bins: The bins        :return: prob: The prob for every signal, the shape is n x (m-1),                       where m is the count of bins, n is the count of signal        '''        data = self.data        n = data.shape        m = bins.shape        prob = np.zeros((n, m-1))        j = 0        for d in tqdm(data, 'Prob.'):            a, b = np.histogram(d, bins=bins)            a = a.astype(np.float32)            a /= np.sum(a)            prob[j] = a            j += 1            pass        self.prob = prob        self.prob_bins = bins        return prob    def compute_joint_prob(self, bins):        '''        Compute joint prob for every two signal        :param: bins: The bins        :return: joint_prob: The joint_prob matrix of every two signal,                             the shape is n x n x (m-1) x (m-1).                             The first two n refer the signal pair;                             the last two (m-1) refer the bins grid.        '''        data = self.data        n = data.shape        m = bins.shape        joint_prob = np.zeros((n, n, m-1, m-1))        for j in tqdm(range(n), 'Joint prob.'):            for k in range(n):                a, b, c = np.histogram2d(data[j], data[k], bins=bins)                a = a.astype(np.float32)                a /= np.sum(a)                joint_prob[j][k] = a                pass        self.joint_prob = joint_prob        self.joint_prob_bins = bins        return joint_prob    def shannon_entropy(self):        '''        Compute shannon entropy for every signal        :return: entropy: The shannon entropy for every signal,                          it is a 1-d array        '''        entropy = np.array([scipy.stats.entropy(p) for p in tqdm(self.prob, 'Shannon Envropy')])        return entropy    def joint_entropy(self):        '''        Compute joint entropy for every two signals        :return: joint_entropy: The joint entropy for every two signal,                                it is a 2-d array        '''        joint_prob = self.joint_prob        n = joint_prob.shape        joint_entropy = np.zeros((n, n))        for j in tqdm(range(n), 'Joint Entropy'):            for k in range(n):                p = joint_prob[j][k].flatten()                e = scipy.stats.entropy(p)                joint_entropy[j][k] = e                pass        return joint_entropy    def mutual_information(self):        '''        Compute mutual information for every two signals        :return: mutual_information: The mutual information for every two signals,                                     it is a 2-d array        '''        prob = self.prob        joint_prob = self.joint_prob        n = joint_prob.shape        mutual_information = np.zeros((n, n))        for j in tqdm(range(n), 'Mutual Information'):            for k in range(n):                p1 = prob[j][:, np.newaxis]                p2 = prob[k][np.newaxis, :]                pxy = np.matmul(p1, p2)                pp = joint_prob[j, k]                m = pp != 0                pxy = pxy[m]                pp = pp[m]                s = pp * np.log(pxy / pp)                mutual_information[j][k] = -np.sum(s)        return mutual_information    def cross_entropy(self):        '''        Compute cross entropy for every two signals        :return: cross_entropy: The cross entropy for every two signals,                                it is a 2-d array        '''        prob = self.prob        n = prob.shape        cross_entropy = np.zeros((n, n))        for j in tqdm(range(n), 'Cross Entropy'):            for k in range(n):                p1 = prob[j]                p2 = prob[k]                m = p2 != 0                p1 = p1[m]                p2 = p2[m]                s = p1 * np.log(p2)                cross_entropy[j][k] = -np.sum(s)        return cross_entropy``

## Compute prob and jointProb

``dwp = DataWithProb()dwp.load(data)dwp.prob.shape, dwp.joint_prob.shape``

### Display prob & jointProb

``px.imshow(dwp.prob, title='Prob.', color_continuous_scale='dense').show()``
``d = dwp.joint_probbins = dwp.joint_prob_binspx.imshow(d, title='Joint prob. (0, 1)', x=bins[:-1], y=bins[:-1], color_continuous_scale='dense').show()px.imshow(np.mean(d, axis=0), title='Joint prob. (all, 1)', x=bins[:-1], y=bins[:-1], color_continuous_scale='dense').show()``

## Compute entropy

### Shannon entropy

``shannon_entropy = dwp.shannon_entropy()px.scatter(shannon_entropy, title='Shannon Entropy').show()df = pd.DataFrame()df['Max Ent. ({:0.2f})'.format(np.max(shannon_entropy))] = dwp.data[np.argmax(shannon_entropy)]df['Min Ent. ({:0.2f})'.format(np.min(shannon_entropy))] = dwp.data[np.argmin(shannon_entropy)]fig = subplots.make_subplots(rows=1, cols=2)f = px.line(df)for d in f.data:    fig.append_trace(d, row=1, col=2)f = px.violin(df)for d in f.data:    fig.append_trace(d, row=1, col=1)fig.update_layout(title='Entropy')fig.show()``

### Other entropy

``joint_entropy = dwp.joint_entropy()mutual_information = dwp.mutual_information()cross_entropy = dwp.cross_entropy()fig = subplots.make_subplots(rows=1, cols=3, subplot_titles=['Joint entropy', 'Mutual information', 'Cross entropy'])f = px.imshow(joint_entropy, title='Joint Entropy')fig.add_trace(f.data, row=1, col=1)f = px.imshow(mutual_information, title='Joint Entropy')fig.add_trace(f.data, row=1, col=2)f = px.imshow(cross_entropy, title='Joint Entropy')fig.add_trace(f.data, row=1, col=3)fig.show()``

## Compare entropy

### Select signal with minimal entropy

``select = np.argmin(shannon_entropy)print('Select the {}th signal by min entropy'.format(select))px.line(dwp.data[select], title = 'Min entropy signal').show()``
``color_discrete_sequence = px.colors.sequential.PuBu_rdef draw_by_order(data, order, title, color_discrete_sequence=color_discrete_sequence):    n = data.shape    dfs = [pd.DataFrame(dict(order=0, x=range(n), value=data[select]))]    for j in range(5):        dfs.append(            pd.DataFrame(dict(order=j + 1, x=range(n), value=data[order[j]])))    df = pd.concat(dfs, axis=0)    fig = subplots.make_subplots(rows=2, cols=2, specs=[[{}, {}],           [{"colspan": 2}, None]])    # Plot waves    f = px.line(df,            x='x',            y='value',            color='order',            color_discrete_sequence=color_discrete_sequence,            title=title)#.show()    for d in f.data:        fig.add_trace(d, row=1, col=1)    # Plot violin for prob.    f = px.violin(df,              y='value',              x='order',              color='order',              color_discrete_sequence=color_discrete_sequence,              title=title)#.show()    for d in f.data:        fig.add_trace(d, row=2, col=1)    # Plot radar waves    k = (2 * np.pi) / n    df['corr_x'] = df['value'] * np.cos(df['x'] * k)    df['corr_y'] = df['value'] * np.sin(df['x'] * k)    f = px.line(df,            x='corr_x',            y='corr_y',            color='order',            color_discrete_sequence=color_discrete_sequence,            title=title)    f.update_layout(yaxis = dict(scaleanchor = 'x'))    for d in f.data:        fig.add_trace(d, row=1, col=2)    fig.update_layout(height=800, width=800, title=title)    fig.show()    return df``

### Order signals by joint entropy (ascending)

The joint entropy is smaller means it requires little information added to estimate the other signal based on the signal.

Order signals by joint entropy (ascending)

``order = np.argsort(joint_entropy[select])title = 'Joint entropy (top 5)'data = dwp.datadraw_by_order(data, order, title)``

### Order signals by mutual information (descending)

The mutual information is larger means the less independent between the two signals.

Order signals by mutual information (descending)

``order = np.argsort(mutual_information[select])[::-1]title = 'Mutual Information (top 5)'data = dwp.datadraw_by_order(data, order, title)``

### Order signals by cross entropy (ascending)

The cross entropy is smaller means the less different between the signals.

Order signals by cross entropy (ascending)

``order = np.argsort(cross_entropy[select])title = 'Cross Entropy (top 5)'data = dwp.datadraw_by_order(data, order, title)`` V2