.. DO NOT EDIT.
.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY.
.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE:
.. "auto_examples/ensemble/plot_gradient_boosting_oob.py"
.. LINE NUMBERS ARE GIVEN BELOW.

.. only:: html

    .. note::
        :class: sphx-glr-download-link-note

        :ref:`Go to the end <sphx_glr_download_auto_examples_ensemble_plot_gradient_boosting_oob.py>`
        to download the full example code. or to run this example in your browser via JupyterLite or Binder

.. rst-class:: sphx-glr-example-title

.. _sphx_glr_auto_examples_ensemble_plot_gradient_boosting_oob.py:


======================================
تقديرات Gradient Boosting Out-of-Bag
======================================
يمكن أن تكون تقديرات Out-of-Bag (OOB) وسيلة مفيدة لتقدير العدد "الأمثل" لدورات التعزيز.
تتشابه تقديرات OOB تقريبًا مع تقديرات التحقق المتقاطع، ولكن يمكن حسابها أثناء التنفيذ دون الحاجة إلى تكرار ملاءمة النموذج.
تتوفر تقديرات OOB فقط لتعزيز التدرج العشوائي
(أي ``subsample < 1.0``)، وتُستمد التقديرات من التحسن في الخسارة بناءً على الأمثلة غير المدرجة في عينة التمهيد
(ما يسمى الأمثلة خارج الكيس).
المقدر OOB هو مقدر متشائم للخسارة الحقيقية للاختبار، ولكنه يبقى تقريبًا جيدًا لعدد صغير من الأشجار.
يوضح الشكل المجموع التراكمي للتحسينات السلبية لـ OOB
كدالة لدورة التعزيز. كما ترى، فإنه يتتبع خسارة الاختبار للمائة دورة الأولى ولكن بعد ذلك ينحرف بطريقة متشائمة.
يوضح الشكل أيضًا أداء التحقق المتقاطع 3-fold الذي
يعطي عادة تقديرًا أفضل لخسارة الاختبار
ولكنه أكثر تطلبًا من الناحية الحسابية.

.. GENERATED FROM PYTHON SOURCE LINES 17-139


.. image-sg:: /auto_examples/ensemble/images/sphx_glr_plot_gradient_boosting_oob_001.png
   :alt: plot gradient boosting oob
   :srcset: /auto_examples/ensemble/images/sphx_glr_plot_gradient_boosting_oob_001.png
   :class: sphx-glr-single-img


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    Accuracy: 0.6860


|

.. code-block:: Python

    # المؤلفون: مطوري scikit-learn
    # معرف الترخيص: BSD-3-Clause

    import matplotlib.pyplot as plt
    import numpy as np
    from scipy.special import expit

    from sklearn import ensemble
    from sklearn.metrics import log_loss
    from sklearn.model_selection import KFold, train_test_split

    # Generate data (adapted from G. Ridgeway's gbm example)
    n_samples = 1000
    random_state = np.random.RandomState(13)
    x1 = random_state.uniform(size=n_samples)
    x2 = random_state.uniform(size=n_samples)
    x3 = random_state.randint(0, 4, size=n_samples)

    p = expit(np.sin(3 * x1) - 4 * x2 + x3)
    y = random_state.binomial(1, p, size=n_samples)

    X = np.c_[x1, x2, x3]

    X = X.astype(np.float32)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=9)

    # Fit classifier with out-of-bag estimates
    params = {
        "n_estimators": 1200,
        "max_depth": 3,
        "subsample": 0.5,
        "learning_rate": 0.01,
        "min_samples_leaf": 1,
        "random_state": 3,
    }
    clf = ensemble.GradientBoostingClassifier(**params)

    clf.fit(X_train, y_train)
    acc = clf.score(X_test, y_test)
    print("Accuracy: {:.4f}".format(acc))

    n_estimators = params["n_estimators"]
    x = np.arange(n_estimators) + 1


    def heldout_score(clf, X_test, y_test):
        """compute deviance scores on ``X_test`` and ``y_test``."""
        score = np.zeros((n_estimators,), dtype=np.float64)
        for i, y_proba in enumerate(clf.staged_predict_proba(X_test)):
            score[i] = 2 * log_loss(y_test, y_proba[:, 1])
        return score


    def cv_estimate(n_splits=None):
        cv = KFold(n_splits=n_splits)
        cv_clf = ensemble.GradientBoostingClassifier(**params)
        val_scores = np.zeros((n_estimators,), dtype=np.float64)
        for train, test in cv.split(X_train, y_train):
            cv_clf.fit(X_train[train], y_train[train])
            val_scores += heldout_score(cv_clf, X_train[test], y_train[test])
        val_scores /= n_splits
        return val_scores


    # Estimate best n_estimator using cross-validation
    cv_score = cv_estimate(3)

    # Compute best n_estimator for test data
    test_score = heldout_score(clf, X_test, y_test)

    # negative cumulative sum of oob improvements
    cumsum = -np.cumsum(clf.oob_improvement_)

    # min loss according to OOB
    oob_best_iter = x[np.argmin(cumsum)]

    # min loss according to test (normalize such that first loss is 0)
    test_score -= test_score[0]
    test_best_iter = x[np.argmin(test_score)]

    # min loss according to cv (normalize such that first loss is 0)
    cv_score -= cv_score[0]
    cv_best_iter = x[np.argmin(cv_score)]

    # color brew for the three curves
    oob_color = list(map(lambda x: x / 256.0, (190, 174, 212)))
    test_color = list(map(lambda x: x / 256.0, (127, 201, 127)))
    cv_color = list(map(lambda x: x / 256.0, (253, 192, 134)))

    # line type for the three curves
    oob_line = "dashed"
    test_line = "solid"
    cv_line = "dashdot"
    # line type for the three curves
    oob_line = "dashed"
    test_line = "solid"
    cv_line = "dashdot"

    # plot curves and vertical lines for best iterations
    plt.figure(figsize=(8, 4.8))
    plt.plot(x, cumsum, label="OOB loss", color=oob_color, linestyle=oob_line)
    plt.plot(x, test_score, label="Test loss", color=test_color, linestyle=test_line)
    plt.plot(x, cv_score, label="CV loss", color=cv_color, linestyle=cv_line)
    plt.axvline(x=oob_best_iter, color=oob_color, linestyle=oob_line)
    plt.axvline(x=test_best_iter, color=test_color, linestyle=test_line)
    plt.axvline(x=cv_best_iter, color=cv_color, linestyle=cv_line)

    # add three vertical lines to xticks
    xticks = plt.xticks()
    xticks_pos = np.array(
        xticks[0].tolist() + [oob_best_iter, cv_best_iter, test_best_iter]
    )
    xticks_label = np.array(list(map(lambda t: int(t), xticks[0])) + ["OOB", "CV", "Test"])
    ind = np.argsort(xticks_pos)
    xticks_pos = xticks_pos[ind]
    xticks_label = xticks_label[ind]
    plt.xticks(xticks_pos, xticks_label, rotation=90)

    plt.legend(loc="upper center")
    plt.ylabel("normalized loss")
    plt.xlabel("number of iterations")

    plt.show()

.. rst-class:: sphx-glr-timing

   **Total running time of the script:** (0 minutes 11.122 seconds)


.. _sphx_glr_download_auto_examples_ensemble_plot_gradient_boosting_oob.py:

.. only:: html

  .. container:: sphx-glr-footer sphx-glr-footer-example

    .. container:: binder-badge

      .. image:: images/binder_badge_logo.svg
        :target: https://mybinder.org/v2/gh/scikit-learn/scikit-learn/main?urlpath=lab/tree/notebooks/auto_examples/ensemble/plot_gradient_boosting_oob.ipynb
        :alt: Launch binder
        :width: 150 px

    .. container:: lite-badge

      .. image:: images/jupyterlite_badge_logo.svg
        :target: ../../lite/lab/index.html?path=auto_examples/ensemble/plot_gradient_boosting_oob.ipynb
        :alt: Launch JupyterLite
        :width: 150 px

    .. container:: sphx-glr-download sphx-glr-download-jupyter

      :download:`Download Jupyter notebook: plot_gradient_boosting_oob.ipynb <plot_gradient_boosting_oob.ipynb>`

    .. container:: sphx-glr-download sphx-glr-download-python

      :download:`Download Python source code: plot_gradient_boosting_oob.py <plot_gradient_boosting_oob.py>`

    .. container:: sphx-glr-download sphx-glr-download-zip

      :download:`Download zipped: plot_gradient_boosting_oob.zip <plot_gradient_boosting_oob.zip>`


.. include:: plot_gradient_boosting_oob.recommendations


.. only:: html

 .. rst-class:: sphx-glr-signature

    `Gallery generated by Sphinx-Gallery <https://sphinx-gallery.github.io>`_