Skip to content

sklearn

Test snippet

python
import numpy as np
from sklearn.naive_bayes import GaussianNB

X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
Y = np.array([1, 1, 1, 2, 2, 2])

clf = GaussianNB()
clf.fit(X, Y)

### save model artifact
import joblib

joblib.dump(MODEL, FILENAME)

Visualize dataset cluster via t-nse

python
from sklearn.manifold import TSNE
import seaborn as sns

import warnings

warnings.simplefilter(action="ignore")

#####################
for i in categ_columns:
    df[i] = df[i].astype("category").cat.codes

tsne = TSNE()
X_embedded = tsne.fit_transform(df[numer_columns])

#####################
sns.set(rc={"figure.figsize": (11.7, 8.27)})
sns.scatterplot(
    X_embedded[:, 0], X_embedded[:, 1], hue=df["PACKAGENAME"], legend="full"
)