/ examples / generate_data_categorical_example.py
generate_data_categorical_example.py
 1  # -*- coding: utf-8 -*-
 2  """Example of using and visualizing ``generate_data_categorical`` function.
 3  """
 4  # Author: Yahya Almardeny <almardeny@gmail.com>
 5  # License: BSD 2 clause
 6  
 7  from __future__ import division
 8  from __future__ import print_function
 9  
10  import os
11  import sys
12  import numpy as np
13  import matplotlib.pyplot as plt
14  
15  # temporary solution for relative imports in case pyod is not installed
16  # if pyod is installed, no need to use the following line
17  
18  sys.path.append(
19      os.path.abspath(os.path.join(os.path.dirname("__file__"), '..')))
20  
21  from pyod.utils.data import generate_data_categorical
22  
23  if __name__ == "__main__":
24      contamination = 0.1  # percentage of outliers
25  
26      # Generate sample data in clusters
27      X_train, X_test, y_train, y_test = generate_data_categorical \
28          (n_train=200, n_test=50,
29           n_category_in=8, n_category_out=5,
30           n_informative=1, n_features=1,
31           contamination=contamination,
32           shuffle=True, random_state=42)
33  
34      # note that visalizing it can only be in 1 dimension!
35      cats = list(np.ravel(X_train))
36      labels = list(y_train)
37      fig, axs = plt.subplots(1, 2)
38      axs[0].bar(cats, labels)
39      axs[1].plot(cats, labels)
40      plt.title('Synthetic Categorical Train Data')
41      plt.show()
42  
43      cats = list(np.ravel(X_test))
44      labels = list(y_test)
45      fig, axs = plt.subplots(1, 2)
46      axs[0].bar(cats, labels)
47      axs[1].plot(cats, labels)
48      plt.title('Synthetic Categorical Test Data')
49      plt.show()