{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# LODA: projections & histograms\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "# Author: Ondrej Kur\u00e1k kurak@gaussalgo.com\n# License: LGPLv3+\nimport matplotlib.pyplot as plt\nimport numpy as np\nfrom scipy.stats.kde import gaussian_kde\nfrom sklearn.datasets import make_blobs\n\nfrom anlearn.loda import LODA\n\nrng = np.random.RandomState(42)\n\nn_inliers = 900\nn_outliers = 100\nn_samples = n_inliers + n_outliers\n\nn_features = 5\n\ndata = make_blobs(\n    centers=[[-2] * n_features, [2] * n_features],\n    cluster_std=[1.5, 0.3],\n    random_state=42,\n    n_samples=n_inliers,\n    n_features=n_features,\n)[0]\n\ndata = np.concatenate(\n    [data, rng.uniform(low=-6, high=6, size=(n_outliers, n_features))]\n)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "loda = LODA(n_estimators=5, bins=100, random_state=42, q=0.1)\nloda.fit(data)\npredicted = loda.predict(data)\n\nplt.figure(figsize=(12, 8))\nplt.subplot(111, aspect=\"auto\")\nplt.subplots_adjust(\n    left=0.02, right=0.98, bottom=0.001, top=0.96, wspace=0.05, hspace=0.01\n)\ncolors = np.array([\"#377eb8\", \"#ff7f00\"])\nplt.scatter(data[:, 0], data[:, 1], s=15, color=colors[(predicted + 1) // 2])\nplt.xticks(())\nplt.yticks(())\nplt.title(\"LODA test dataset anomalous points\", fontsize=15)\nplt.show()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "w_X = loda.projections_ @ data.T\n\nlabels = [f\"w={x.round(2)}\" for x in loda.projections_]\nn_points = 500\nbounds = (np.min(w_X), np.max(w_X))\n\nplt.figure(figsize=(12, 10))\nplt.subplot(111, aspect=\"auto\")\nplt.subplots_adjust(\n    left=0.02, right=0.98, bottom=0.001, top=0.96, wspace=0.05, hspace=0.01\n)\nxx = np.linspace(*bounds, n_points)\nyticks = []\nfor i, tmp in enumerate(zip(w_X, labels)):\n    points, label = tmp\n    pdf = gaussian_kde(points)\n    y = i + 0.1\n    yticks.append(y)\n    curve = pdf(xx)\n    plt.hist(points, density=True, bottom=y, bins=\"auto\", label=label)\n    plt.plot(xx, curve + y, c=\"black\")\n\nplt.legend(fontsize=13)\nplt.title(\"LODA projections & histograms\", fontsize=15)\nplt.xlim(bounds)\nplt.yticks(())\nplt.show()\n\n\n# sphinx_gallery_thumbnail_number = 2"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.8.6"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}