Model Demo
In the following part, we introduce All models' demo. The source data used can be found in Google drive
LDA
Latent Dirichlet Allocation(LDA)
Latent dirichlet allocation. David M. Blei, Andrew Y. Ng, and Michael I. Jordan. In Advances in Neural Information Processing Systems.
Demo
from pydpm._model import LDA
# load data
data = sio.loadmat('./data/mnist_gray')
train_data = np.array(np.ceil(data['train_mnist']*5), order='C')[:, 0:999]
test_data = np.array(np.ceil(data['train_mnist']*5), order='C')[:, 1000:1999]
train_label = data['train_label'][:999]
test_label = data['train_label'][1000:1999]
# create the model and deploy it on gpu or cpu
model = LDA(128, 'gpu')
model.initial(train_data) # use the shape of train_data to initialize the params of model
train_local_params = model.train(100, train_data)
train_local_params = model.test(100, train_data)
test_local_params = model.test(100, test_data)
# evaluate the model with classification accuracy
# the demo accuracy can achieve 0.850
results = ACC(train_local_params.Theta, test_local_params.Theta, train_label, test_label, 'SVM')
# save the model after training
model.save()
PFA
Poisson Factor Analysis(PFA)
Beta-negative binomial process and poisson factor analysis. Mingyuan Zhou, Lauren Hannah, David B. Dunson, and Lawrence Carin. In International Conference on Artificial Intelligence and Statistics.
Demo
from pydpm._model import PFA
# load data
data = sio.loadmat('./data/mnist_gray')
train_data = np.array(np.ceil(data['train_mnist']*5), order='C')[:, 0:999]
test_data = np.array(np.ceil(data['train_mnist']*5), order='C')[:, 1000:1999]
train_label = data['train_label'][:999]
test_label = data['train_label'][1000:1999]
# create the model and deploy it on gpu or cpu
model = PFA(128, 'gpu')
model.initial(train_data) # use the shape of train_data to initialize the params of model
train_local_params = model.train(100, train_data)
train_local_params = model.test(100, train_data)
test_local_params = model.test(100, test_data)
# evaluate the model with classification accuracy
# the demo accuracy can achieve 0.8238
results = ACC(train_local_params.Theta, test_local_params.Theta, train_label, test_label, 'SVM')
# save the model after training
model.save()
PGBN
Poisson Gamma Belief Network(PGBN)
The poisson gamma belief network. Mingyuan Zhou, Yulai Cong, and Bo Chen. In Advances in Neural Information Processing.
Demo
from pydpm._model import PGBN
# load data
data = sio.loadmat('./data/mnist_gray')
train_data = np.array(np.ceil(data['train_mnist']*5), order='C')[:, 0:999]
test_data = np.array(np.ceil(data['train_mnist']*5), order='C')[:, 1000:1999]
train_label = data['train_label'][:999]
test_label = data['train_label'][1000:1999]
# create the model and deploy it on gpu or cpu
model = PGBN([128, 64, 32], device='gpu')
model.initial(train_data)
train_local_params = model.train(100, train_data)
train_local_params = model.test(100, train_data)
test_local_params = model.test(100, test_data)
# evaluate the model with classification accuracy
# the demo accuracy can achieve 0.8549
results = ACC(train_local_params.Theta[0], test_local_params.Theta[0], train_label, test_label, 'SVM')
# save the model after training
model.save()
DPFA
Deep Poisson Factor Analysis(DPFA)
Scalable deep poisson factor analysis for topic modeling. Zhe Gan, Changyou Chen, Ricardo Henao, David E. Carlson, and Lawrence Carin. In International Conference on Machine Learning.
Demo
from pydpm._model import DPFA
# load data
data = sio.loadmat('./data/mnist_gray.mat')
train_data = np.array(np.ceil(data['train_mnist']*5), order='C')[:, 0:999]
test_data = np.array(np.ceil(data['train_mnist']*5), order='C')[:, 1000:1999]
train_label = data['train_label'][:999]
test_label = data['train_label'][1000:1999]
# create the model and deploy it on gpu or cpu
model = DPFA([128, 64, 32], 'gpu') # topics of 3 layers
model.initial(train_data) # use the shape of train_data to initialize the params of model
burnin, collection = 100, 80
train_local_params = model.train(burnin, collection, train_data)
train_local_params = model.test(burnin, collection, train_data)
test_local_params = model.test(burnin, collection, test_data)
# evaluate the model with classification accuracy
# the demo accuracy can achieve 0.9099
results = ACC(train_local_params.Theta, test_local_params.Theta, train_label, test_label, 'SVM')
# save the model after training
model.save()
DirBN
Dirichlet Belief Networks(DirBN)
Dirichlet belief networks for topic structure learning. He Zhao, Lan Du, Wray L. Buntine, and Mingyuan Zhou. In Advances in Neural Information Processing Systems.
Demo
from pydpm._model import DirBN
# load data
data = sio.loadmat('./data/mnist_gray.mat')
train_data = np.array(np.ceil(data['train_mnist']*5), order='C')[:, 0:499]
test_data = np.array(np.ceil(data['train_mnist']*5), order='C')[:, 1000:1499]
train_label = data['train_label'][:499]
test_label = data['train_label'][1000:1499]
# create the model and deploy it on gpu or cpu
model = DirBN([100, 100], 'gpu') # topics of each layers
model.initial(train_data) # use the shape of train_data to initialize the params of model
train_local_params = model.train(90, train_data)
train_local_params = model.test(90, train_data)
test_local_params = model.test(90, test_data)
# evaluate the model with classification accuracy
# the demo accuracy can achieve 0.78
results = ACC(train_local_params.Theta, test_local_params.Theta, train_label, test_label, 'SVM')
# save the model after training
model.save()
WEDTM
Word Embeddings Deep Topic Model(WEDTM)
Inter and intra topic structure learning with word embeddings. He Zhao, Lan Du, Wray L. Buntine, and Mingyuan Zhou. In International Conference on Machine Learning.
Demo
from pydpm._model import WEDTM
# load data
dataset = sio.loadmat('./data/WS.mat')
train_data = np.asarray(dataset['doc'].todense()[:, dataset['train_idx'][0]-1])[:, ::10].astype(int)
test_data = np.asarray(dataset['doc'].todense()[:, dataset['test_idx'][0]-1])[:, ::5].astype(int)
train_label = dataset['labels'][dataset['train_idx'][0]-1][::10, :]
test_label = dataset['labels'][dataset['test_idx'][0]-1][::5, :]
# params of model
T = 3 # vertical layers
S = 3 # sub topics
K = [100] * T # topics in each layers
# create the model and deploy it on gpu or cpu
model = WEDTM(K, 'gpu')
model.initial(dataset['doc']) # use the shape of train_data to initialize the params of model
train_local_params = model.train(dataset['embeddings'], S, 300, train_data)
train_local_params = model.test(dataset['embeddings'], S, 300, train_data)
test_local_params = model.test(dataset['embeddings'], S, 300, test_data)
# evaluate the model with classification accuracy
# the demo accuracy can achieve
results = ACC(train_local_params.Theta, test_local_params.Theta, train_label, test_label, 'SVM')
# save the model after training
model.save()
CPFA
Convolutional Poisson Factor Analysis(CPFA)
Convolutional poisson gamma belief network. Chaojie Wang, Bo Chen, Sucheng Xiao, and Mingyuan Zhou. In International Conference on Machine Learning.
Demo
from pydpm._model import CPFA
DATA = cPickle.load(open("./data/TREC.pkl", "rb"), encoding='iso-8859-1')
# ========== details of data process can be found in pydpm/example/CPFA_demo.py =========
# create the model and deploy it on gpu or cpu
model = CPFA(200, 'gpu')
# mode 1, dense input
model.initial([batch_file_index_tr, batch_rows_tr, batch_cols_tr, batch_value_tr], [len(data_train_list) - delete_count, DATA['Vab_Size'], np.max(batch_len_tr)]) # use the shape of train_data to initialize the params of model
train_local_params = model.train(100, [batch_file_index_tr, batch_rows_tr, batch_cols_tr, batch_value_tr], [len(data_train_list) - delete_count, DATA['Vab_Size'], np.max(batch_len_tr)])
train_local_params = model.test(100, [batch_file_index_tr, batch_rows_tr, batch_cols_tr, batch_value_tr], [len(data_train_list) - delete_count, DATA['Vab_Size'], np.max(batch_len_tr)])
test_local_params = model.test(100, [batch_file_index_te, batch_rows_te, batch_cols_te, batch_value_te], [len(data_test_list) - delete_count, DATA['Vab_Size'], np.max(batch_len_te)])
train_theta = np.sum(np.sum(train_local_params.W_nk, axis=3), axis=2).T
test_theta = np.sum(np.sum(test_local_params.W_nk, axis=3), axis=2).T
# train_theta[np.where(np.isinf((train_theta)))] = 0
# Score of test dataset's Theta: 0.682
results = ACC(train_theta, test_theta, batch_label_tr, batch_label_te, 'SVM')
model.save()
CPGBN
Convolutional Poisson Gamma Belief Network(CPGBN)
Convolutional poisson gamma belief network. Chaojie Wang, Bo Chen, Sucheng Xiao, and Mingyuan Zhou. In International Conference on Machine Learning.
Demo
from pydpm._model import CPGBN
DATA = cPickle.load(open("data/TREC.pkl", "rb"), encoding='iso-8859-1')
# ========== details of data process can be found in pydpm/example/CPFA_demo.py =========
# create the model and deploy it on gpu or cpu
model = CPGBN([200, 100, 50], 'gpu')
# mode 1, dense input
model.initial([batch_file_index_tr, batch_rows_tr, batch_cols_tr, batch_value_tr], [len(data_train_list) - delete_count, DATA['Vab_Size'], np.max(batch_len_tr)]) # use the shape of train_data to initialize the params of model
train_local_params = model.train(100, [batch_file_index_tr, batch_rows_tr, batch_cols_tr, batch_value_tr], [len(data_train_list) - delete_count, DATA['Vab_Size'], np.max(batch_len_tr)])
train_local_params = model.test(100, [batch_file_index_tr, batch_rows_tr, batch_cols_tr, batch_value_tr], [len(data_train_list) - delete_count, DATA['Vab_Size'], np.max(batch_len_tr)])
test_local_params = model.test(100, [batch_file_index_te, batch_rows_te, batch_cols_te, batch_value_te], [len(data_test_list) - delete_count, DATA['Vab_Size'], np.max(batch_len_te)])
train_theta = np.sum(np.sum(train_local_params.W_nk, axis=3), axis=2).T
test_theta = np.sum(np.sum(test_local_params.W_nk, axis=3), axis=2).T
# Score of test dataset's Theta: 0.682
results = ACC(train_theta, test_theta, batch_label_tr, batch_label_te, 'SVM')
model.save()
PGDS
Poisson Gamma Dynamical Systems(PGDS)
Poisson-gamma dynamical systems. Aaron Schein, Hanna M. Wallach, and Mingyuan Zhou. In Advances in Neural Information Processing Systems.
Demo
from pydpm._model import PGDS
# load data
data = sio.loadmat('./data/mnist_gray')
train_data = np.array(np.ceil(data['train_mnist']*5), order='C')[:, 0:999]
test_data = np.array(np.ceil(data['train_mnist']*5), order='C')[:, 1000:1999]
train_label = data['train_label'][:999]
test_label = data['train_label'][1000:1999]
# create the model and deploy it on gpu or cpu
model = PGDS(100, 'gpu')
model.initial(train_data)
train_local_params = model.train(200, train_data)
train_local_params = model.test(200, train_data)
test_local_params = model.test(200, test_data)
# evaluate the model with classification accuracy
# the demo accuracy can achieve 0.8739
results = ACC(train_local_params.Theta, test_local_params.Theta, train_label, test_label, 'SVM')
# save the model after training
model.save()
DPGDS
Deep Poisson Gamma Dynamical Systems(DPGDS)
Deep poisson gamma dynamical systems. Dandan Guo, Bo Chen, Hao Zhang, and Mingyuan Zhou. In Advances in Neural Information Processing Systems.
Demo
from pydpm._model import DPGDS
# load data
data = sio.loadmat('./data/mnist_gray')
train_data = np.array(np.ceil(data['train_mnist']*5), order='C')[:, 0:999]
test_data = np.array(np.ceil(data['train_mnist']*5), order='C')[:, 1000:1999]
train_label = data['train_label'][:999]
test_label = data['train_label'][1000:1999]
# create the model and deploy it on gpu or cpu
model = DPGDS([200, 100, 50], 'gpu')
model.initial(train_data)
train_local_params = model.train(200, train_data)
train_local_params = model.test(200, train_data)
test_local_params = model.test(200, test_data)
# evaluate the model with classification accuracy
# the demo accuracy can achieve 0.8519
results = ACC(train_local_params.Theta[0], test_local_params.Theta[0], train_label, test_label, 'SVM')
# save the model after training
model.save()
MPGBN
Multimodal Poisson Gamma Belief Network(MPGBN)
Multimodal poisson gamma belief network. Chaojie Wang, Bo Chen, and Mingyuan Zhou. In AAAI Conference on Artificial Intelligence.
Demo
from pydpm._model import MPGBN
# load data
data = sio.loadmat('./data/mnist_gray')
train_data = np.array(np.ceil(data['train_mnist']*5), order='C')[:, 0:999]
train_data_1 = train_data[:360, :]
train_data_2 = train_data[360:, :]
test_data = np.array(np.ceil(data['train_mnist']*5), order='C')[:, 1000:1999]
test_data_1 = test_data[:360, :]
test_data_2 = test_data[360:, :]
train_label = data['train_label'][:999]
test_label = data['train_label'][1000:1999]
# create the model and deploy it on gpu or cpu
model = MPGBN([128, 64, 32], device='gpu')
model.initial(train_data_1, train_data_2)
train_local_params = model.train(100, train_data_1, train_data_2)
train_local_params = model.test(100, train_data_1, train_data_2)
test_local_params = model.test(100, test_data_1, test_data_2)
# evaluate the model with classification accuracy
# the demo accuracy can achieve 0.8659 -
results = ACC(train_local_params.Theta[0], test_local_params.Theta[0], train_label, test_label, 'SVM')
# save the model after training
model.save()
GPGBN
Graph Poisson Gamma Belief Network(GPGBN)
Deep relational topic modeling via graph poisson gamma belief network. Chaojie Wang, Hao Zhang, Bo Chen, Dongsheng Wang, Zhengjue Wang, and Mingyuan Zhou. In Advances in Neural Information Processing Systems.
Demo
from pydpm._model import CPGBN
DATA = cPickle.load(open("data/TREC.pkl", "rb"), encoding='iso-8859-1')
# ========== details of data process can be found in pydpm/example/CPFA_demo.py =========
# create the model and deploy it on gpu or cpu
model = CPGBN([200, 100, 50], 'gpu')
# mode 1, dense input
model.initial([batch_file_index_tr, batch_rows_tr, batch_cols_tr, batch_value_tr], [len(data_train_list) - delete_count, DATA['Vab_Size'], np.max(batch_len_tr)]) # use the shape of train_data to initialize the params of model
train_local_params = model.train(100, [batch_file_index_tr, batch_rows_tr, batch_cols_tr, batch_value_tr], [len(data_train_list) - delete_count, DATA['Vab_Size'], np.max(batch_len_tr)])
train_local_params = model.test(100, [batch_file_index_tr, batch_rows_tr, batch_cols_tr, batch_value_tr], [len(data_train_list) - delete_count, DATA['Vab_Size'], np.max(batch_len_tr)])
test_local_params = model.test(100, [batch_file_index_te, batch_rows_te, batch_cols_te, batch_value_te], [len(data_test_list) - delete_count, DATA['Vab_Size'], np.max(batch_len_te)])
train_theta = np.sum(np.sum(train_local_params.W_nk, axis=3), axis=2).T
test_theta = np.sum(np.sum(test_local_params.W_nk, axis=3), axis=2).T
# Score of test dataset's Theta: 0.682
results = ACC(train_theta, test_theta, batch_label_tr, batch_label_te, 'SVM')
model.save()