.. DO NOT EDIT.
.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY.
.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE:
.. "auto_examples/applications/plot_prediction_latency.py"
.. LINE NUMBERS ARE GIVEN BELOW.

.. only:: html

    .. note::
        :class: sphx-glr-download-link-note

        :ref:`Go to the end <sphx_glr_download_auto_examples_applications_plot_prediction_latency.py>`
        to download the full example code. or to run this example in your browser via JupyterLite or Binder

.. rst-class:: sphx-glr-example-title

.. _sphx_glr_auto_examples_applications_plot_prediction_latency.py:


==================
تأخير التنبؤ
==================

هذا مثال يوضح تأخير التنبؤ لمختلف الخوارزميات في مكتبة ساي كيت ليرن.

الهدف هو قياس التأخير المتوقع عند إجراء التنبؤات إما بالجملة أو بالطريقة الذرية (أي واحدًا تلو الآخر).

تمثل المخططات توزيع تأخير التنبؤ على شكل مخطط صندوقي.

.. GENERATED FROM PYTHON SOURCE LINES 12-37

.. code-block:: Python


    # المؤلفون: مطوري ساي كيت ليرن
    # معرف الترخيص: BSD-3-Clause

    import gc
    import time
    from collections import defaultdict

    import matplotlib.pyplot as plt
    import numpy as np

    from sklearn.datasets import make_regression
    from sklearn.ensemble import RandomForestRegressor
    from sklearn.linear_model import Ridge, SGDRegressor
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import StandardScaler
    from sklearn.svm import SVR
    from sklearn.utils import shuffle


    def _not_in_sphinx():
        # خدعة للكشف عما إذا كنا نعمل بواسطة باني سفينكس
        return "__file__" in globals()


.. GENERATED FROM PYTHON SOURCE LINES 38-40

وظائف مساعدة للقياس والرسم
-----------------------------------

.. GENERATED FROM PYTHON SOURCE LINES 40-290

.. code-block:: Python


    def atomic_benchmark_estimator(estimator, X_test, verbose=False):
        """قياس وقت تشغيل التنبؤ لكل مثيل."""
        n_instances = X_test.shape[0]
        runtimes = np.zeros(n_instances, dtype=float)
        for i in range(n_instances):
            instance = X_test[[i], :]
            start = time.time()
            estimator.predict(instance)
            runtimes[i] = time.time() - start
        if verbose:
            print(
                "atomic_benchmark runtimes:",
                min(runtimes),
                np.percentile(runtimes, 50),
                max(runtimes),
            )
        return runtimes


    def bulk_benchmark_estimator(estimator, X_test, n_bulk_repeats, verbose):
        """قياس وقت تشغيل التنبؤ للمدخلات بالكامل."""
        n_instances = X_test.shape[0]
        runtimes = np.zeros(n_bulk_repeats, dtype=float)
        for i in range(n_bulk_repeats):
            start = time.time()
            estimator.predict(X_test)
            runtimes[i] = time.time() - start
        runtimes = np.array(list(map(lambda x: x / float(n_instances), runtimes)))
        if verbose:
            print(
                "bulk_benchmark runtimes:",
                min(runtimes),
                np.percentile(runtimes, 50),
                max(runtimes),
            )
        return runtimes


    def benchmark_estimator(estimator, X_test, n_bulk_repeats=30, verbose=False):
        """
        قياس أوقات التشغيل للتنبؤ في الوضع الذري والمجمع.

        المعلمات
        ----------
        estimator : خوارزمية مدربة بالفعل تدعم `predict()`
        X_test : مدخلات الاختبار
        n_bulk_repeats : عدد مرات التكرار عند تقييم الوضع المجمع

        العائدات
        -------
        atomic_runtimes, bulk_runtimes : زوج من `np.array` الذي يحتوي على أوقات التشغيل بالثواني.

        """
        atomic_runtimes = atomic_benchmark_estimator(estimator, X_test, verbose)
        bulk_runtimes = bulk_benchmark_estimator(estimator, X_test, n_bulk_repeats, verbose)
        return atomic_runtimes, bulk_runtimes


    def generate_dataset(n_train, n_test, n_features, noise=0.1, verbose=False):
        """توليد مجموعة بيانات للانحدار بالمعايير المحددة."""
        if verbose:
            print("generating dataset...")

        X, y, coef = make_regression(
            n_samples=n_train + n_test, n_features=n_features, noise=noise, coef=True
        )

        random_seed = 13
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, train_size=n_train, test_size=n_test, random_state=random_seed
        )
        X_train, y_train = shuffle(X_train, y_train, random_state=random_seed)

        X_scaler = StandardScaler()
        X_train = X_scaler.fit_transform(X_train)
        X_test = X_scaler.transform(X_test)

        y_scaler = StandardScaler()
        y_train = y_scaler.fit_transform(y_train[:, None])[:, 0]
        y_test = y_scaler.transform(y_test[:, None])[:, 0]

        gc.collect()
        if verbose:
            print("ok")
        return X_train, y_train, X_test, y_test


    def boxplot_runtimes(runtimes, pred_type, configuration):
        """
       رسم مخطط جديد مع مخططات صندوقية لأوقات التنبؤ.

        المعلمات
        ----------
        runtimes : قائمة من `np.array` من التأخيرات بالميكروثانية
        cls_names : قائمة من أسماء الخوارزميات التي ولدت أوقات التنبؤ
        pred_type : 'bulk' أو 'atomic'

        """

        fig, ax1 = plt.subplots(figsize=(10, 6))
        bp = plt.boxplot(
            runtimes,
        )

        cls_infos = [
            "%s\n(%d %s)"
            % (
                estimator_conf["name"],
                estimator_conf["complexity_computer"](estimator_conf["instance"]),
                estimator_conf["complexity_label"],
            )
            for estimator_conf in configuration["estimators"]
        ]
        plt.setp(ax1, xticklabels=cls_infos)
        plt.setp(bp["boxes"], color="black")
        plt.setp(bp["whiskers"], color="black")
        plt.setp(bp["fliers"], color="red", marker="+")

        ax1.yaxis.grid(True, linestyle="-", which="major", color="lightgrey", alpha=0.5)

        ax1.set_axisbelow(True)
        ax1.set_title(
            "Prediction Time per Instance - %s, %d feats."
            % (pred_type.capitalize(), configuration["n_features"])
        )
        ax1.set_ylabel("Prediction Time (us)")

        plt.show()


    def benchmark(configuration):
        """تشغيل القياس بالكامل."""
        X_train, y_train, X_test, y_test = generate_dataset(
            configuration["n_train"], configuration["n_test"], configuration["n_features"]
        )

        stats = {}
        for estimator_conf in configuration["estimators"]:
            print("Benchmarking", estimator_conf["instance"])
            estimator_conf["instance"].fit(X_train, y_train)
            gc.collect()
            a, b = benchmark_estimator(estimator_conf["instance"], X_test)
            stats[estimator_conf["name"]] = {"atomic": a, "bulk": b}

        cls_names = [
            estimator_conf["name"] for estimator_conf in configuration["estimators"]
        ]
        runtimes = [1e6 * stats[clf_name]["atomic"] for clf_name in cls_names]
        boxplot_runtimes(runtimes, "atomic", configuration)
        runtimes = [1e6 * stats[clf_name]["bulk"] for clf_name in cls_names]
        boxplot_runtimes(runtimes, "bulk (%d)" % configuration["n_test"], configuration)


    def n_feature_influence(estimators, n_train, n_test, n_features, percentile):
        """
        تقدير تأثير عدد الميزات على وقت التنبؤ.

        المعلمات
        ----------

        estimators : قاموس من (الاسم (str)، الخوارزمية) للقياس
        n_train : عدد مثيلات التدريب (int)
        n_test : عدد مثيلات الاختبار (int)
        n_features : قائمة بأبعاد المساحة المميزة للاختبار (int)
        percentile : المئوية التي يتم عندها قياس السرعة (int [0-100])

        العائدات:
        --------

        percentiles : dict(estimator_name,
                           dict(n_features, percentile_perf_in_us))

        """
        percentiles = defaultdict(defaultdict)
        for n in n_features:
            print("benchmarking with %d features" % n)
            X_train, y_train, X_test, y_test = generate_dataset(n_train, n_test, n)
            for cls_name, estimator in estimators.items():
                estimator.fit(X_train, y_train)
                gc.collect()
                runtimes = bulk_benchmark_estimator(estimator, X_test, 30, False)
                percentiles[cls_name][n] = 1e6 * np.percentile(runtimes, percentile)
        return percentiles


    def plot_n_features_influence(percentiles, percentile):
        fig, ax1 = plt.subplots(figsize=(10, 6))
        colors = ["r", "g", "b"]
        for i, cls_name in enumerate(percentiles.keys()):
            x = np.array(sorted(percentiles[cls_name].keys()))
            y = np.array([percentiles[cls_name][n] for n in x])
            plt.plot(
                x,
                y,
                color=colors[i],
            )
        ax1.yaxis.grid(True, linestyle="-", which="major", color="lightgrey", alpha=0.5)
        ax1.set_axisbelow(True)
        ax1.set_title("Evolution of Prediction Time with #Features")
        ax1.set_xlabel("#Features")
        ax1.set_ylabel("Prediction Time at %d%%-ile (us)" % percentile)
        plt.show()
    def benchmark_throughputs(configuration, duration_secs=0.1):
        """قياس الإنتاجية للخوارزميات المختلفة."""
        X_train, y_train, X_test, y_test = generate_dataset(
            configuration["n_train"], configuration["n_test"], configuration["n_features"]
        )
        throughputs = dict()
        for estimator_config in configuration["estimators"]:
            estimator_config["instance"].fit(X_train, y_train)
            start_time = time.time()
            n_predictions = 0
            while (time.time() - start_time) < duration_secs:
                estimator_config["instance"].predict(X_test[[0]])
                n_predictions += 1
            throughputs[estimator_config["name"]] = n_predictions / duration_secs
        return throughputs


    def plot_benchmark_throughput(throughputs, configuration):
        fig, ax = plt.subplots(figsize=(10, 6))
        colors = ["r", "g", "b"]
        cls_infos = [
            "%s\n(%d %s)"
            % (
                estimator_conf["name"],
                estimator_conf["complexity_computer"](estimator_conf["instance"]),
                estimator_conf["complexity_label"],
            )
            for estimator_conf in configuration["estimators"]
        ]
        cls_values = [
            throughputs[estimator_conf["name"]]
            for estimator_conf in configuration["estimators"]
        ]
        plt.bar(range(len(throughputs)), cls_values, width=0.5, color=colors)
        ax.set_xticks(np.linspace(0.25, len(throughputs) - 0.75, len(throughputs)))
        ax.set_xticklabels(cls_infos, fontsize=10)
        ymax = max(cls_values) * 1.2
        ax.set_ylim((0, ymax))
        ax.set_ylabel("Throughput (predictions/sec)")
        ax.set_title(
            "Prediction Throughput for different estimators (%d features)"
            % configuration["n_features"]
        )
        plt.show()


.. GENERATED FROM PYTHON SOURCE LINES 291-293

قياس سرعة التنبؤ بالجملة/الذرية لمختلف الخوارزميات
-------------------------------------------------------------

.. GENERATED FROM PYTHON SOURCE LINES 293-323

.. code-block:: Python


    configuration = {
        "n_train": int(1e3),
        "n_test": int(1e2),
        "n_features": int(1e2),
        "estimators": [
            {
                "name": "Linear Model",
                "instance": SGDRegressor(
                    penalty="elasticnet", alpha=0.01, l1_ratio=0.25, tol=1e-4
                ),
                "complexity_label": "non-zero coefficients",
                "complexity_computer": lambda clf: np.count_nonzero(clf.coef_),
            },
            {
                "name": "RandomForest",
                "instance": RandomForestRegressor(),
                "complexity_label": "estimators",
                "complexity_computer": lambda clf: clf.n_estimators,
            },
            {
                "name": "SVR",
                "instance": SVR(kernel="rbf"),
                "complexity_label": "support vectors",
                "complexity_computer": lambda clf: len(clf.support_vectors_),
            },
        ],
    }
    benchmark(configuration)


.. rst-class:: sphx-glr-horizontal


    *

      .. image-sg:: /auto_examples/applications/images/sphx_glr_plot_prediction_latency_001.png
         :alt: Prediction Time per Instance - Atomic, 100 feats.
         :srcset: /auto_examples/applications/images/sphx_glr_plot_prediction_latency_001.png
         :class: sphx-glr-multi-img

    *

      .. image-sg:: /auto_examples/applications/images/sphx_glr_plot_prediction_latency_002.png
         :alt: Prediction Time per Instance - Bulk (100), 100 feats.
         :srcset: /auto_examples/applications/images/sphx_glr_plot_prediction_latency_002.png
         :class: sphx-glr-multi-img


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    Benchmarking SGDRegressor(alpha=0.01, l1_ratio=0.25, penalty='elasticnet', tol=0.0001)
    Benchmarking RandomForestRegressor()
    Benchmarking SVR()


.. GENERATED FROM PYTHON SOURCE LINES 324-326

قياس تأثير n_features على سرعة التنبؤ
--------------------------------------------------

.. GENERATED FROM PYTHON SOURCE LINES 326-337

.. code-block:: Python


    percentile = 90
    percentiles = n_feature_influence(
        {"ridge": Ridge()},
        configuration["n_train"],
        configuration["n_test"],
        [100, 250, 500],
        percentile,
    )
    plot_n_features_influence(percentiles, percentile)


.. image-sg:: /auto_examples/applications/images/sphx_glr_plot_prediction_latency_003.png
   :alt: Evolution of Prediction Time with #Features
   :srcset: /auto_examples/applications/images/sphx_glr_plot_prediction_latency_003.png
   :class: sphx-glr-single-img


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    benchmarking with 100 features
    benchmarking with 250 features
    benchmarking with 500 features


.. GENERATED FROM PYTHON SOURCE LINES 338-340

قياس الإنتاجية
--------------------

.. GENERATED FROM PYTHON SOURCE LINES 340-342

.. code-block:: Python


    throughputs = benchmark_throughputs(configuration)
    plot_benchmark_throughput(throughputs, configuration)


.. image-sg:: /auto_examples/applications/images/sphx_glr_plot_prediction_latency_004.png
   :alt: Prediction Throughput for different estimators (100 features)
   :srcset: /auto_examples/applications/images/sphx_glr_plot_prediction_latency_004.png
   :class: sphx-glr-single-img


.. rst-class:: sphx-glr-timing

   **Total running time of the script:** (0 minutes 16.644 seconds)


.. _sphx_glr_download_auto_examples_applications_plot_prediction_latency.py:

.. only:: html

  .. container:: sphx-glr-footer sphx-glr-footer-example

    .. container:: binder-badge

      .. image:: images/binder_badge_logo.svg
        :target: https://mybinder.org/v2/gh/scikit-learn/scikit-learn/main?urlpath=lab/tree/notebooks/auto_examples/applications/plot_prediction_latency.ipynb
        :alt: Launch binder
        :width: 150 px

    .. container:: lite-badge

      .. image:: images/jupyterlite_badge_logo.svg
        :target: ../../lite/lab/index.html?path=auto_examples/applications/plot_prediction_latency.ipynb
        :alt: Launch JupyterLite
        :width: 150 px

    .. container:: sphx-glr-download sphx-glr-download-jupyter

      :download:`Download Jupyter notebook: plot_prediction_latency.ipynb <plot_prediction_latency.ipynb>`

    .. container:: sphx-glr-download sphx-glr-download-python

      :download:`Download Python source code: plot_prediction_latency.py <plot_prediction_latency.py>`

    .. container:: sphx-glr-download sphx-glr-download-zip

      :download:`Download zipped: plot_prediction_latency.zip <plot_prediction_latency.zip>`


.. include:: plot_prediction_latency.recommendations


.. only:: html

 .. rst-class:: sphx-glr-signature

    `Gallery generated by Sphinx-Gallery <https://sphinx-gallery.github.io>`_