首页手记奇异值分解——SVD算法解析与...

奇异值分解——SVD算法解析与简单应用

标签：

机器学习

CH1.奇异值分解原理

CH2.SVD计算举例：

示例一：SVD算法的实现

class SVDReduce(object):


    def __init__(self, data, dimension=500):
        """
        Initialize the class with the parameters.
        :param data: pd.DataFrame, the output data from the class DataPreprocess.
        :param dimension: int, default 500. To specify the output dimension.
        """
        self.data = data
        self.target_dim = dimension
        self.format_data_path = '../../data/format_2/'
        self.field = ['user', 'product', 'context', 'shop']
        # self.field = ['product']

    def judge(self, data):
        """
        Abandon
        方法：判读大领域的维度
        标准维度,判断：不足补零,大于转为svd()
        :return:
        """
        logger.info("judge the dimension...")
        field_matrix_shape = data.shape
        dimension = field_matrix_shape[1]
        if dimension > self.target_dim:
            return True
        else:
            return False

    def svd(self, field_matrix):
        """
        方法：对大的领域数据进行降维
        :param field_matrix: list(2d) or np.array, 每一行(list)表示一条record
        :return: 返回领域的降维矩阵
        """
        logger.info("use svd to reduce the dimension")
        indices = field_matrix.index
        fm = field_matrix
        field_matrix = np.array(field_matrix)
        field_matrix_dim = field_matrix.shape
        print(field_matrix_dim)

        # 对维度进行判断是否需要降维
        if field_matrix_dim[1] <= self.target_dim:
            logger.info('Filed_matrix_dim if smaller than the target, no need to perform reduction, thus we'
                        'only add extra zero element to make up the dimension.')
            dim_make_up = self.target_dim - field_matrix_dim[1]
            matrix_make_up = np.zeros([field_matrix_dim[0], dim_make_up])
            matrix_make_up = pd.DataFrame(matrix_make_up, index=indices)
            return pd.concat([fm, matrix_make_up], axis=1)
        else:
            svd = TruncatedSVD(n_components=self.target_dim)
            return pd.DataFrame(svd.fit_transform(field_matrix), index=indices)

    def run(self):
        """
        1. Extract the one-hot-form data from the self.new_data_one_hot according to the field-instruction.
        2. Based on the given self.target_dimension, judge the field matrix whether satisfy the dimension requirement.
        3. If so, do the svd method, else add extra zero element to achieve the self.target_dimension.
        """
        output_matrix = []
        for i, field_data in enumerate(self.data):
            # field_data = self.split_field(field=item)
            svd_matrix = self.svd(field_matrix=field_data)
            svd_matrix.to_csv(self.format_data_path + 'svd_' + self.field[i] + '.csv')
            output_matrix.append(svd_matrix)
        return output_matrix

示例二：SVD用于图像压缩

# -*- coding: utf-8 -*-

import numpy as np
import numpy.linalg as la
import matplotlib.pyplot as plt
from sklearn import datasets
from skimage import io

def getImgAsMat(index):
    ds = datasets.fetch_olivetti_faces()
    return np.mat(ds.images[index])

def getImgAsMatFromFile(filename):
    img = io.imread(filename, as_grey=True)
    return np.mat(img) 

def plotImg(imgMat):
    plt.imshow(imgMat, cmap=plt.cm.gray)
    plt.show()

def recoverBySVD(imgMat, k):
    # singular value decomposition
    U, s, V = la.svd(imgMat)
    # choose top k important singular values (or eigens)
    Uk = U[:, 0:k]
    #SK:二维数组
    Sk = np.diag(s[0:k])
    Vk = V[0:k, :]
    # recover the image
    imgMat_new = Uk * Sk * Vk
    return imgMat_new


# -------------------- main --------------------- #
#A = getImgAsMat(0)
#plotImg(A)
#A_new = recoverBySVD(A, 20)
#plotImg(A_new)

A = getImgAsMatFromFile('D:/Movie/svd.jpg')
plotImg(A)
A_new = recoverBySVD(A, 20)
plotImg(A_new)

原图：

30：

20：