Outlier Sklearn

import  pandas as pd
import  numpy as np

iris = pd.read_csv("../data/iris.csv")
print(iris)

xx = iris[["sepal_length","sepal_width","petal_length","petal_width"]]
print(xx)

from sklearn.ensemble import IsolationForest
isf = IsolationForest()
train = isf.fit(xx)
predik = isf.predict(xx)

print(predik)

df_pre = pd.DataFrame(predik)
print(df_pre)
print(df_pre.value_counts())

Output

/home/mfahri/OneDrive/ml/venv/bin/python /home/mfahri/OneDrive/ml/belajar/iris_eda.py
     sepal_length  sepal_width  petal_length  petal_width    species
0             5.1          3.5           1.4          0.2     setosa
1             4.9          3.0           1.4          0.2     setosa
2             4.7          3.2           1.3          0.2     setosa
3             4.6          3.1           1.5          0.2     setosa
4             5.0          3.6           1.4          0.2     setosa
..            ...          ...           ...          ...        ...
145           6.7          3.0           5.2          2.3  virginica
146           6.3          2.5           5.0          1.9  virginica
147           6.5          3.0           5.2          2.0  virginica
148           6.2          3.4           5.4          2.3  virginica
149           5.9          3.0           5.1          1.8  virginica

[150 rows x 5 columns]
     sepal_length  sepal_width  petal_length  petal_width
0             5.1          3.5           1.4          0.2
1             4.9          3.0           1.4          0.2
2             4.7          3.2           1.3          0.2
3             4.6          3.1           1.5          0.2
4             5.0          3.6           1.4          0.2
..            ...          ...           ...          ...
145           6.7          3.0           5.2          2.3
146           6.3          2.5           5.0          1.9
147           6.5          3.0           5.2          2.0
148           6.2          3.4           5.4          2.3
149           5.9          3.0           5.1          1.8

[150 rows x 4 columns]
[ 1  1  1  1  1 -1  1  1 -1  1  1  1  1 -1 -1 -1 -1  1 -1  1  1  1 -1 -1
  1  1  1  1  1  1  1  1 -1 -1  1  1  1  1  1  1  1 -1  1 -1 -1  1  1  1
  1  1 -1  1  1  1  1  1  1 -1  1  1 -1  1 -1  1  1  1  1  1 -1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 -1  1  1
  1  1 -1  1 -1  1  1  1  1 -1 -1 -1 -1 -1  1  1  1 -1 -1  1  1 -1 -1 -1
  1  1 -1  1  1  1  1  1  1 -1 -1 -1  1  1  1 -1 -1  1  1  1  1  1  1  1
 -1  1  1  1 -1  1]
     0
0    1
1    1
2    1
3    1
4    1
..  ..
145  1
146  1
147  1
148 -1
149  1

[150 rows x 1 columns]
 1    110
-1     40
dtype: int64

Process finished with exit code 0

 196 total views

Tinggalkan Balasan

Alamat email Anda tidak akan dipublikasikan. Ruas yang wajib ditandai *