generate_data_categorical_example.py
1 # -*- coding: utf-8 -*- 2 """Example of using and visualizing ``generate_data_categorical`` function. 3 """ 4 # Author: Yahya Almardeny <almardeny@gmail.com> 5 # License: BSD 2 clause 6 7 from __future__ import division 8 from __future__ import print_function 9 10 import os 11 import sys 12 import numpy as np 13 import matplotlib.pyplot as plt 14 15 # temporary solution for relative imports in case pyod is not installed 16 # if pyod is installed, no need to use the following line 17 18 sys.path.append( 19 os.path.abspath(os.path.join(os.path.dirname("__file__"), '..'))) 20 21 from pyod.utils.data import generate_data_categorical 22 23 if __name__ == "__main__": 24 contamination = 0.1 # percentage of outliers 25 26 # Generate sample data in clusters 27 X_train, X_test, y_train, y_test = generate_data_categorical \ 28 (n_train=200, n_test=50, 29 n_category_in=8, n_category_out=5, 30 n_informative=1, n_features=1, 31 contamination=contamination, 32 shuffle=True, random_state=42) 33 34 # note that visalizing it can only be in 1 dimension! 35 cats = list(np.ravel(X_train)) 36 labels = list(y_train) 37 fig, axs = plt.subplots(1, 2) 38 axs[0].bar(cats, labels) 39 axs[1].plot(cats, labels) 40 plt.title('Synthetic Categorical Train Data') 41 plt.show() 42 43 cats = list(np.ravel(X_test)) 44 labels = list(y_test) 45 fig, axs = plt.subplots(1, 2) 46 axs[0].bar(cats, labels) 47 axs[1].plot(cats, labels) 48 plt.title('Synthetic Categorical Test Data') 49 plt.show()