Source code for byzfl.attacks.attacks

import numpy as np
from byzfl.utils.misc import check_vectors_type, random_tool
from byzfl.aggregators import Average, Median, TrMean, GeometricMedian, Krum, MultiKrum, CenteredClipping, MDA, MoNNA, Meamed, Clipping, NNM, ARC, Bucketing


[docs]
class SignFlipping:
    
    r"""
    Description
    -----------
    
    Send the opposite of the mean vector [1]_.

    .. math::

        \mathrm{SignFlipping} \ (x_1, \dots, x_n) = - \frac{1}{n}\sum_{i=1}^{n} x_i

    where

    - :math:`x_1, \dots, x_n` are the input vectors, which conceptually correspond to correct gradients submitted by honest participants during a training iteration.
    
    Initialization parameters
    --------------------------

    None

    Calling the instance
    --------------------

    Input parameters
    ----------------

    vectors: numpy.ndarray, torch.Tensor, list of numpy.ndarray or list of torch.Tensor
        A set of vectors, matrix or tensors.

    Returns
    -------
    :numpy.ndarray or torch.Tensor
        The data type of the output is the same as the input.

    Examples
    --------

    >>> import byzfl
    >>> attack = byzfl.SignFlipping()

    Using numpy arrays
        
    >>> import numpy as np
    >>> x = np.array([[1., 2., 3.],       # np.ndarray
    >>>               [4., 5., 6.], 
    >>>               [7., 8., 9.]])
    >>> attack(x)
    array([-4. -5. -6.])
            
    Using torch tensors
        
    >>> import torch
    >>> x = torch.tensor([[1., 2., 3.],   # torch.tensor 
    >>>                   [4., 5., 6.], 
    >>>                   [7., 8., 9.]])
    >>> attack(x)
    tensor([-4., -5., -6.])

    Using list of numpy arrays

    >>> import numpy as np
    >>> x = [np.array([1., 2., 3.]),      # list of np.ndarray  
    >>>      np.array([4., 5., 6.]), 
    >>>      np.array([7., 8., 9.])]
    >>> attack(x)
    array([-4., -5., -6.])

    Using list of torch tensors
        
    >>> import torch
    >>> x = [torch.tensor([1., 2., 3.]),  # list of torch.tensor 
    >>>      torch.tensor([4., 5., 6.]), 
    >>>      torch.tensor([7., 8., 9.])]
    >>> attack(x)
    tensor([-4., -5., -6.])


    References
    ----------

    .. [1] Zeyuan Allen-Zhu, Faeze Ebrahimianghazani, Jerry Li, and Dan Alistarh. Byzantine-resilient non-convex stochastic gradient descent.
        In International Conference on Learning Representations, 2020

    """

    def __init__(self):
        pass
    
    def __call__(self, honest_vectors):
        tools, honest_vectors = check_vectors_type(honest_vectors)
        mean_vector = tools.mean(honest_vectors, axis=0)
        return tools.multiply(mean_vector, -1)




[docs]
class InnerProductManipulation:
    
    r"""
    Description
    -----------

    Execute the Inner Product Manipulation (IPM) attack [1]_: multiplicatively scale the mean vector by :math:`- \tau`.

    .. math::

        \text{IPM}_{\tau}(x_1, \dots, x_n) = - \tau \cdot \frac{1}{n} \sum_{i=1}^{n} x_i

    where 

    - :math:`x_1, \dots, x_n` are the input vectors, which conceptually correspond to correct gradients submitted by honest participants during a training iteration.

    - :math:`\tau > 0` is the attack factor.

    Initialization parameters
    --------------------------

    tau : float, optional
        The attack factor :math:`\tau` used to adjust the mean vector. Set to 2.0 by default.

    Calling the instance
    --------------------

    Input parameters
    ----------------

    vectors: numpy.ndarray, torch.Tensor, list of numpy.ndarray or list of torch.Tensor
        A set of vectors, matrix or tensors.
    

    Returns
    -------
    :numpy.ndarray or torch.Tensor
        The data type of the output is the same as the input.
    
    Examples
    --------

    >>> import byzfl
    >>> attack = byzfl.InnerProductManipulation(2.0)

    Using numpy arrays
        
    >>> import numpy as np
    >>> x = np.array([[1., 2., 3.],       # np.ndarray
    >>>               [4., 5., 6.], 
    >>>               [7., 8., 9.]])
    >>> attack(x)
    array([ -8. -10. -12.])
            
    Using torch tensors
        
    >>> import torch
    >>> x = torch.tensor([[1., 2., 3.],   # torch.tensor 
    >>>                   [4., 5., 6.], 
    >>>                   [7., 8., 9.]])
    >>> attack(x)
    tensor([-8., -10., -12.])

    Using list of numpy arrays

    >>> import numpy as np
    >>> x = [np.array([1., 2., 3.]),      # list of np.ndarray  
    >>>      np.array([4., 5., 6.]), 
    >>>      np.array([7., 8., 9.])]
    >>> attack(x)
    array([ -8. -10. -12.])

    Using list of torch tensors
        
    >>> import torch
    >>> x = [torch.tensor([1., 2., 3.]),  # list of torch.tensor 
    >>>      torch.tensor([4., 5., 6.]), 
    >>>      torch.tensor([7., 8., 9.])]
    >>> attack(x)
    tensor([-8., -10., -12.])

    References
    ----------

    .. [1] Cong Xie, Oluwasanmi Koyejo, and Indranil Gupta. Fall of empires: Breaking byzantine-tolerant
            sgd by inner product manipulation. In Ryan P. Adams and Vibhav Gogate (eds.), Proceedings of
            The 35th Uncertainty in Artificial Intelligence Conference, volume 115 of Proceedings of Machine
            Learning Research, pp. 261–270. PMLR, 22–25 Jul 2020. URL https://proceedings.mlr.press/v115/xie20a.html.

    """

    def __init__(self, tau=2.0):
        if not isinstance(tau, float):
            raise TypeError("tau must be a float.")
        self.tau = tau

    def __call__(self, honest_vectors):
        tools, honest_vectors = check_vectors_type(honest_vectors)
        mean_vector = tools.mean(honest_vectors, axis=0)
        return tools.multiply(mean_vector, - self.tau)



[docs]
class Optimal_InnerProductManipulation:
    
    r"""
    Description
    -----------

    Generalization of the :ref:`ipm-label` attack [1]_ by optimizing the attack factor :math:`\tau`.

    .. math::

        \text{Opt-IPM}_{\textit{agg}, \textit{pre_agg_list}, f}(x_1, \dots, x_n) = - \tau_{opt} \cdot \frac{1}{n} \sum_{i=1}^{n} x_i

    where 

    - :math:`x_1, \dots, x_n` are the input vectors, which conceptually correspond to correct gradients submitted by honest participants during a training iteration.

    - :math:`\textit{agg}` is the robust aggregator to be used to aggregate the vectors during the training.

    - :math:`\textit{pre_agg_list}` is the list of robust pre-aggregators to be used to transform the vectors during the training.

    - :math:`f` conceptually represents the expected number of Byzantine vectors.

    - :math:`\tau_{opt} \in \mathbb{R}` is the optimal attack factor found using a line-search optimization method.

    This attack is designed to optimize the attack factor :math:`\tau` of the IPM attack by maximizing a specific function.
    The function quantifies the effect of the attack, in particular the \\(\\ell_2\\)-norm of the distance between the aggregated vectors (including Byzantine vectors) and the average of honest vectors.
    The goal is to find the attack factor that results in the maximum disruption.

    This attack, developed by the ByzFL team, draws inspiration from the IPM attack and has been utilized in [2]_.

    Initialization parameters
    --------------------------

    agg : object, optional (default: Average)
        An instance of a robust aggregator that will be used to aggregate the vectors during the optimization process.

    pre_agg_list : list, optional (default: [Clipping])
        A list of pre-aggregation functions, where each element is an object representing a pre-aggregation method.

    f : int, optional (default: 1)
        The number of Byzantine participants. Must be a non-negative integer.

    evals : int, optional (default: 20)
        The maximum number of evaluations during the optimization process. Must be a positive integer.

    start : float, optional (default: 0.0)
        The initial attack factor to evaluate. Must be a float.

    delta : float, optional (default: 10.0)
        The initial step size for the optimization process. Must be a non-zero float.

    ratio : float, optional (default: 0.8)
        The contraction ratio used to reduce the step size during the contraction phase. Must be between 0.5 and 1 (both excluded).

    Calling the instance
    --------------------

    Input parameters
    ----------------

    vectors: numpy.ndarray, torch.Tensor, list of numpy.ndarray or list of torch.Tensor
        A set of vectors, matrix or tensors.

    Returns
    -------
    :numpy.ndarray or torch.Tensor
        The data type of the output is the same as the input.

    Examples
    --------

    >>> import byzfl
    >>> # Instantiate the robust aggregator
    >>> agg = byzfl.TrMean(f=1)
    >>> # Instantiate the list of pre-aggregators
    >>> pre_agg_list = [byzfl.NNM(f=1), byzfl.Clipping()]
    >>> # Instantiate the attack
    >>> attack = byzfl.Optimal_InnerProductManipulation(agg, pre_agg_list=pre_agg_list, f=1)

    Using numpy arrays
        
    >>> import numpy as np
    >>> x = np.array([[1., 2., 3.],       # np.ndarray
    >>>               [4., 5., 6.], 
    >>>               [7., 8., 9.]])
    >>> attack(x)
    array([-2.98949673 -3.73687091 -4.48424509])
            
    Using torch tensors
        
    >>> import torch
    >>> x = torch.tensor([[1., 2., 3.],   # torch.tensor 
    >>>                   [4., 5., 6.], 
    >>>                   [7., 8., 9.]])
    >>> attack(x)
    tensor([-2.9895, -3.7369, -4.4842])

    Using list of numpy arrays

    >>> import numpy as np
    >>> x = [np.array([1., 2., 3.]),      # list of np.ndarray  
    >>>      np.array([4., 5., 6.]), 
    >>>      np.array([7., 8., 9.])]
    >>> attack(x)
    array([-2.98949673 -3.73687091 -4.48424509])

    Using list of torch tensors
        
    >>> import torch
    >>> x = [torch.tensor([1., 2., 3.]),  # list of torch.tensor 
    >>>      torch.tensor([4., 5., 6.]), 
    >>>      torch.tensor([7., 8., 9.])]
    >>> attack(x)
    tensor([-2.9895, -3.7369, -4.4842])

    References
    ----------

    .. [1] Cong Xie, Oluwasanmi Koyejo, and Indranil Gupta. Fall of empires: Breaking byzantine-tolerant
            sgd by inner product manipulation. In Ryan P. Adams and Vibhav Gogate (eds.), Proceedings of
            The 35th Uncertainty in Artificial Intelligence Conference, volume 115 of Proceedings of Machine
            Learning Research, pp. 261–270. PMLR, 22–25 Jul 2020. URL https://proceedings.mlr.press/v115/xie20a.html.
    
    .. [2] Allouah, Y., Farhadkhani, S., Guerraoui, R., Gupta, N., Pinot, R.,
           & Stephan, J. (2023, April). Fixing by mixing: A recipe for optimal
           byzantine ml under heterogeneity. In International Conference on 
           Artificial Intelligence and Statistics (pp. 1232-1300). PMLR.  

    """

    def __init__(self, agg=Average(), pre_agg_list=[Clipping()], f=1, evals=20, start=0.0, delta=10.0, ratio=0.8):

        # List of valid aggregator classes
        valid_agg_classes = (Average, Median, TrMean, GeometricMedian, Krum, MultiKrum, CenteredClipping, MDA, MoNNA, Meamed)
        if not any(isinstance(agg, valid_agg) for valid_agg in valid_agg_classes):
            raise TypeError("agg must be a valid aggregator.")
        self.agg = agg

        # List of valid pre-aggregator classes
        valid_pre_agg_classes = (NNM, Bucketing, Clipping, ARC)
        if not isinstance(pre_agg_list, list) or not all(any(isinstance(pre_agg, valid_pre_agg) for valid_pre_agg in valid_pre_agg_classes) for pre_agg in pre_agg_list):
            raise TypeError("valid_pre_agg_classes must be a list valid pre-aggregators.")
        self.pre_agg_list = pre_agg_list

        if not isinstance(f, int) or f < 0:
            raise ValueError("f must be a non-negative integer")
        self.f = f
        if not isinstance(evals, int) or evals <= 0:
            raise ValueError("evals must be a positive integer")
        self.evals = evals
        if not isinstance(start, float):
            raise ValueError("start must be a float.")
        self.start = start
        if not isinstance(delta, float) or delta == 0:
            raise ValueError("delta must be a non-zero float.")
        self.delta = delta
        if not isinstance(ratio, float) or ratio <= 0.5 or ratio >= 1:
            raise ValueError("ratio must be a float in (0.5, 1) exclusive.")
        self.ratio = ratio

    def _evaluate(self, honest_vectors, avg_honest_vector, current_tau):
       
        """
        Computes the norm of the distance between the aggregated vector (including Byzantine vectors) and the average of honest vectors.

        Parameters
        ----------
        honest_vectors : ndarray or torch.tensor
            Honest node vectors (2D).
        avg_honest_vector : ndarray or torch.tensor
            Average of honest_vectors (1D).
        current_tau : float
            Current attack factor considered in the attack.

        Returns
        -------
        float
            Norm of the distance.
        """

        tools, honest_vectors = check_vectors_type(honest_vectors)

        #Compute Byzantine vector
        attack = InnerProductManipulation(tau=current_tau)
        byzantine_vector = attack(honest_vectors)
        byzantine_vectors = tools.array([byzantine_vector] * self.f)

        #Aggregate vectors with current Byzantine vectors
        vectors = tools.concatenate((honest_vectors, byzantine_vectors), axis=0)
        for pre_agg in self.pre_agg_list:
            vectors = pre_agg(vectors)
        aggregated_vector = self.agg(vectors)

        #Return distance between aggregate vector and mean of honest vectors
        distance = tools.subtract(aggregated_vector, avg_honest_vector)
        return tools.linalg.norm(distance).item()

    def _expansion_phase(self, honest_vectors, avg_honest_vector):
        
        """
        Performs the expansion phase of the optimization.
        This phase explores the search space aggressively by increasing the step size (delta) when a better value is found.
        """
        best_x = self.start
        best_y = self._evaluate(honest_vectors, avg_honest_vector, best_x)
        delta = self.delta
        remaining_evals = self.evals - 1

        while remaining_evals > 0:
            prop_x = best_x + delta
            prop_y = self._evaluate(honest_vectors, avg_honest_vector, prop_x)
            remaining_evals -= 1

            if prop_y > best_y:
                # If the new value is better: Update best_x, double the step size (delta *= 2), and continue exploring.
                best_x, best_y = prop_x, prop_y
                delta *= 2
            else:
                # If the new value is worse: Stop the expansion phase and proceed to contraction.
                delta *= self.ratio
                break

        return best_x, best_y, delta, remaining_evals

    def _contraction_phase(self, honest_vectors, avg_honest_vector, best_x, best_y, delta, remaining_evals):

        """
        Performs the contraction phase of the optimization.
        This phase refines the search by reducing the step size (delta) and searching around the current best value.
        """

        while remaining_evals > 0:
            # Continue reducing the step size until no significant improvements are found or evaluations are exhausted.
            prop_x = best_x + delta
            prop_y = self._evaluate(honest_vectors, avg_honest_vector, prop_x)
            remaining_evals -= 1

            if prop_y > best_y:
                # If a better value is found: Update best_x and best_y.
                best_x, best_y = prop_x, prop_y

            delta *= self.ratio

        return best_x

    def __call__(self, honest_vectors):

        """
        Iteratively computes the best attack factor tau and executes the corresponding IPM attack.

        Parameters
        ----------
        honest_vectors : ndarray or torch.tensor
            Honest node vectors (2D).
        """
        tools, honest_vectors = check_vectors_type(honest_vectors)
        avg_honest_vector = tools.mean(honest_vectors, axis=0)

        # Expansion Phase
        best_tau, largest_distance, delta, remaining_evals = self._expansion_phase(honest_vectors, avg_honest_vector)

        # Contraction Phase
        best_tau = self._contraction_phase(honest_vectors, avg_honest_vector, best_tau, largest_distance, delta, remaining_evals)

        # Set the best attack factor and execute IPM
        attack = InnerProductManipulation(tau=best_tau)
        return attack(honest_vectors)




[docs]
class ALittleIsEnough:

    r"""
    Description
    -----------

    Execute the A Little Is Enough (ALIE) attack [1]_: perturb the mean vector using the coordinate-wise standard deviation of the vectors multiplicatively scaled with the attack factor :math:`\tau`.

    .. math::

        \text{ALIE}_{\tau}(x_1, \dots, x_n) = \mu_{x_1, ..., x_n} + \tau \cdot \sigma_{x_1, ..., x_n}
    
    where

    - :math:`x_1, \dots, x_n` are the input vectors, which conceptually correspond to correct gradients submitted by honest participants during a training iteration.

    - :math:`\mu_{x_1, \dots, x_n} = \frac{1}{n}\sum_{i=1}^{n}x_i` is the mean vector.

    - \\(\\big[\\cdot\\big]_k\\) refers to the \\(k\\)-th coordinate.

    - :math:`\sigma_{x_1, \dots, x_n}` is the coordinate-wise standard deviation of :math:`x_1, \dots, x_n`, i.e., :math:`\big[\sigma_{x_1, \dots, x_n}\big]_k = \sqrt{\frac{1}{n}\sum_{i=1}^{n}(\big[x_i\big]_k - \big[\mu_{x_1, \dots, x_n}\big]_k)^2}`.

    - :math:`\tau \in \mathbb{R}` is the attack factor.

    Initialization parameters
    --------------------------

    tau : float, optional
        The attack factor :math:`\tau` used to adjust the mean vector. Set to 1.5 by default.

    Calling the instance
    --------------------

    Input parameters
    ----------------

    vectors: numpy.ndarray, torch.Tensor, list of numpy.ndarray or list of torch.Tensor
        A set of vectors, matrix or tensors.

    Returns
    -------
    :numpy.ndarray or torch.Tensor
        The data type of the output is the same as the input.
    
    Examples
    --------

    >>> import byzfl
    >>> attack = byzfl.ALittleIsEnough(1.5)

    Using numpy arrays:

    >>> import numpy as np
    >>> x = np.array([[1., 2., 3.],       # np.ndarray
    >>>               [4., 5., 6.], 
    >>>               [7., 8., 9.]])
    >>> attack(x)
    array([ 8.5  9.5 10.5])
    
    Using torch tensors (Warning: We need the tensor to be either a floating point or complex dtype):
    
    >>> import torch
    >>> x = torch.tensor([[1., 2., 3.],   # torch.tensor 
    >>>                   [4., 5., 6.], 
    >>>                   [7., 8., 9.]])
    >>> attack(x)
    tensor([ 8.5000,  9.5000, 10.5000])

    Using list of numpy arrays

    >>> import numpy as np
    >>> x = [np.array([1., 2., 3.]),      # list of np.ndarray  
    >>>      np.array([4., 5., 6.]), 
    >>>      np.array([7., 8., 9.])]
    >>> attack(x)
    array([ 8.5  9.5 10.5])

    Using list of torch tensors (Warning: We need the tensor to be either a floating point or complex dtype):
        
    >>> import torch
    >>> x = [torch.tensor([1., 2., 3.]),  # list of torch.tensor 
    >>>      torch.tensor([4., 5., 6.]), 
    >>>      torch.tensor([7., 8., 9.])]
    >>> attack(x)
    tensor([ 8.5000,  9.5000, 10.5000])
   
    References
    ----------

    .. [1] Baruch, M., Baruch, G., and Goldberg, Y. A little is enough: Circumventing defenses for distributed learning.
           In Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019, 8-14 December 2019, Long Beach, CA, USA, 2019.

    """

    def __init__(self, tau=1.5):
        if not isinstance(tau, float):
            raise TypeError("tau must be a float.")
        self.tau = tau

    def __call__(self, honest_vectors):
        tools, honest_vectors = check_vectors_type(honest_vectors)
        attack_vector = tools.sqrt(tools.var(honest_vectors, axis=0, ddof=1))
        return tools.add(tools.mean(honest_vectors, axis=0), tools.multiply(attack_vector, self.tau))



[docs]
class Optimal_ALittleIsEnough:
    
    r"""
    Description
    -----------

    Generalization of the :ref:`alie-label` attack [1]_ by optimizing the attack factor :math:`\tau`. 

    .. math::

        \text{Opt-ALIE}_{\textit{agg}, \textit{pre_agg_list}, f}(x_1, \dots, x_n) = \mu_{x_1, ..., x_n} + \tau_{opt} \cdot \sigma_{x_1, ..., x_n}

    where 

    - :math:`x_1, \dots, x_n` are the input vectors, which conceptually correspond to correct gradients submitted by honest participants during a training iteration.

    - :math:`\textit{agg}` is the robust aggregator to be used to aggregate the vectors during the training.

    - :math:`\textit{pre_agg_list}` is the list of robust pre-aggregators to be used to transform the vectors during the training.

    - :math:`f` conceptually represents the expected number of Byzantine vectors.

    - :math:`\tau_{opt} \in \mathbb{R}` is the optimal attack factor found using a line-search optimization method.

    - :math:`\mu_{x_1, \dots, x_n} = \frac{1}{n}\sum_{i=1}^{n}x_i` is the mean vector.

    - \\(\\big[\\cdot\\big]_k\\) refers to the \\(k\\)-th coordinate.

    - :math:`\sigma_{x_1, \dots, x_n}` is the coordinate-wise standard deviation of :math:`x_1, \dots, x_n`, i.e., :math:`\big[\sigma_{x_1, \dots, x_n}\big]_k = \sqrt{\frac{1}{n}\sum_{i=1}^{n}(\big[x_i\big]_k - \big[\mu_{x_1, \dots, x_n}\big]_k)^2}`.

    This attack is designed to optimize the attack factor :math:`\tau` of the ALIE attack by maximizing a specific function.
    The function quantifies the effect of the attack, in particular the \\(\\ell_2\\)-norm of the distance between the aggregated vectors (including Byzantine vectors) and the average of honest vectors.
    The goal is to find the attack factor that results in the maximum disruption.

    This attack, developed by the ByzFL team, draws inspiration from the ALIE attack and has been utilized in [2]_.

    Initialization parameters
    --------------------------

    agg : object, optional (default: Average)
        An instance of a robust aggregator that will be used to aggregate the vectors during the optimization process.

    pre_agg_list : list, optional (default: [Clipping])
        A list of pre-aggregation functions, where each element is an object representing a pre-aggregation method.

    f : int, optional (default: 1)
        The number of Byzantine participants. Must be a non-negative integer.

    evals : int, optional (default: 20)
        The maximum number of evaluations during the optimization process. Must be a positive integer.

    start : float, optional (default: 0.0)
        The initial attack factor to evaluate. Must be a float.

    delta : float, optional (default: 10.0)
        The initial step size for the optimization process. Must be a non-zero float.

    ratio : float, optional (default: 0.8)
        The contraction ratio used to reduce the step size during the contraction phase. Must be between 0.5 and 1 (both excluded).

    Calling the instance
    --------------------

    Input parameters
    ----------------

    vectors: numpy.ndarray, torch.Tensor, list of numpy.ndarray or list of torch.Tensor
        A set of vectors, matrix or tensors.

    Returns
    -------
    :numpy.ndarray or torch.Tensor
        The data type of the output is the same as the input.

    Examples
    --------

    >>> import byzfl
    >>> # Instantiate the robust aggregator
    >>> agg = byzfl.TrMean(f=1)
    >>> # Instantiate the list of pre-aggregators
    >>> pre_agg_list = [byzfl.NNM(f=1), byzfl.Clipping()]
    >>> # Instantiate the attack
    >>> attack = byzfl.Optimal_ALittleIsEnough(agg, pre_agg_list=pre_agg_list, f=1)

    Using numpy arrays
        
    >>> import numpy as np
    >>> x = np.array([[1., 2., 3.],       # np.ndarray
    >>>               [4., 5., 6.], 
    >>>               [7., 8., 9.]])
    >>> attack(x)
    array([6.0615843 7.0615843 8.0615843])
            
    Using torch tensors
        
    >>> import torch
    >>> x = torch.tensor([[1., 2., 3.],   # torch.tensor 
    >>>                   [4., 5., 6.], 
    >>>                   [7., 8., 9.]])
    >>> attack(x)
    tensor([6.0616, 7.0616, 8.0616])

    Using list of numpy arrays

    >>> import numpy as np
    >>> x = [np.array([1., 2., 3.]),      # list of np.ndarray  
    >>>      np.array([4., 5., 6.]), 
    >>>      np.array([7., 8., 9.])]
    >>> attack(x)
    array([6.0615843 7.0615843 8.0615843])

    Using list of torch tensors
        
    >>> import torch
    >>> x = [torch.tensor([1., 2., 3.]),  # list of torch.tensor 
    >>>      torch.tensor([4., 5., 6.]), 
    >>>      torch.tensor([7., 8., 9.])]
    >>> attack(x)
    tensor([6.0616, 7.0616, 8.0616])

    References
    ----------

    .. [1] Baruch, M., Baruch, G., and Goldberg, Y. A little is enough: Circumventing defenses for distributed learning.
           In Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019, 8-14 December 2019, Long Beach, CA, USA, 2019.
    
    .. [2] Allouah, Y., Farhadkhani, S., Guerraoui, R., Gupta, N., Pinot, R.,
           & Stephan, J. (2023, April). Fixing by mixing: A recipe for optimal
           byzantine ml under heterogeneity. In International Conference on 
           Artificial Intelligence and Statistics (pp. 1232-1300). PMLR.  

    """

    def __init__(self, agg=Average(), pre_agg_list=[Clipping()], f=1, evals=20, start=0.0, delta=10.0, ratio=0.8):

        # List of valid aggregator classes
        valid_agg_classes = (Average, Median, TrMean, GeometricMedian, Krum, MultiKrum, CenteredClipping, MDA, MoNNA, Meamed)
        if not any(isinstance(agg, valid_agg) for valid_agg in valid_agg_classes):
            raise TypeError("agg must be a valid aggregator.")
        self.agg = agg

        # List of valid pre-aggregator classes
        valid_pre_agg_classes = (NNM, Bucketing, Clipping, ARC)
        if not isinstance(pre_agg_list, list) or not all(any(isinstance(pre_agg, valid_pre_agg) for valid_pre_agg in valid_pre_agg_classes) for pre_agg in pre_agg_list):
            raise TypeError("valid_pre_agg_classes must be a list valid pre-aggregators.")
        self.pre_agg_list = pre_agg_list

        if not isinstance(f, int) or f < 0:
            raise ValueError("f must be a non-negative integer")
        self.f = f
        if not isinstance(evals, int) or evals <= 0:
            raise ValueError("evals must be a positive integer")
        self.evals = evals
        if not isinstance(start, float):
            raise ValueError("start must be a float.")
        self.start = start
        if not isinstance(delta, float) or delta == 0:
            raise ValueError("delta must be a non-zero float.")
        self.delta = delta
        if not isinstance(ratio, float) or ratio <= 0.5 or ratio >= 1:
            raise ValueError("ratio must be a float in (0.5, 1) exclusive.")
        self.ratio = ratio

    def _evaluate(self, honest_vectors, avg_honest_vector, current_tau):
       
        """
        Computes the norm of the distance between the aggregated vector (including Byzantine vectors) and the average of honest vectors.

        Parameters
        ----------
        honest_vectors : ndarray or torch.tensor
            Honest node vectors (2D).
        avg_honest_vector : ndarray or torch.tensor
            Average of honest_vectors (1D).
        current_tau : float
            Current attack factor considered in the attack.

        Returns
        -------
        float
            Norm of the distance.
        """

        tools, honest_vectors = check_vectors_type(honest_vectors)

        #Compute Byzantine vector
        attack = ALittleIsEnough(tau=current_tau)
        byzantine_vector = attack(honest_vectors)
        byzantine_vectors = tools.array([byzantine_vector] * self.f)

        #Aggregate vectors with current Byzantine vectors
        vectors = tools.concatenate((honest_vectors, byzantine_vectors), axis=0)
        for pre_agg in self.pre_agg_list:
            vectors = pre_agg(vectors)
        aggregated_vector = self.agg(vectors)

        #Return distance between aggregate vector and mean of honest vectors
        distance = tools.subtract(aggregated_vector, avg_honest_vector)
        return tools.linalg.norm(distance).item()

    def _expansion_phase(self, honest_vectors, avg_honest_vector):
        
        """
        Performs the expansion phase of the optimization.
        This phase explores the search space aggressively by increasing the step size (delta) when a better value is found.
        """
        best_x = self.start
        best_y = self._evaluate(honest_vectors, avg_honest_vector, best_x)
        delta = self.delta
        remaining_evals = self.evals - 1

        while remaining_evals > 0:
            prop_x = best_x + delta
            prop_y = self._evaluate(honest_vectors, avg_honest_vector, prop_x)
            remaining_evals -= 1

            if prop_y > best_y:
                # If the new value is better: Update best_x, double the step size (delta *= 2), and continue exploring.
                best_x, best_y = prop_x, prop_y
                delta *= 2
            else:
                # If the new value is worse: Stop the expansion phase and proceed to contraction.
                delta *= self.ratio
                break

        return best_x, best_y, delta, remaining_evals

    def _contraction_phase(self, honest_vectors, avg_honest_vector, best_x, best_y, delta, remaining_evals):

        """
        Performs the contraction phase of the optimization.
        This phase refines the search by reducing the step size (delta) and searching around the current best value.
        """
        while remaining_evals > 0:
            # Continue reducing the step size until no significant improvements are found or evaluations are exhausted.
            prop_x = best_x + delta
            prop_y = self._evaluate(honest_vectors, avg_honest_vector, prop_x)
            remaining_evals -= 1

            if prop_y > best_y:
                # If a better value is found: Update best_x and best_y.
                best_x, best_y = prop_x, prop_y

            delta *= self.ratio

        return best_x

    def __call__(self, honest_vectors):

        """
        Iteratively computes the best attack factor tau and executes the corresponding IPM attack.

        Parameters
        ----------
        honest_vectors : ndarray or torch.tensor
            Honest node vectors (2D).
        """
        tools, honest_vectors = check_vectors_type(honest_vectors)
        avg_honest_vector = tools.mean(honest_vectors, axis=0)

        # Expansion Phase
        best_tau, largest_distance, delta, remaining_evals = self._expansion_phase(honest_vectors, avg_honest_vector)

        # Contraction Phase
        best_tau = self._contraction_phase(honest_vectors, avg_honest_vector, best_tau, largest_distance, delta, remaining_evals)

        # Set the best attack factor and execute IPM
        attack = ALittleIsEnough(tau=best_tau)
        return attack(honest_vectors)




[docs]
class Mimic:
    
    r"""
    Description
    -----------

    The attacker mimics the behavior of worker with ID :math:`\epsilon` by sending the same vector as that worker [1]_.

    .. math::

        \text{Mimic}_{\epsilon}(x_1, \dots, x_n) = x_{\epsilon+1}
    
    where

    - :math:`x_1, \dots, x_n` are the input vectors, which conceptually correspond to correct gradients submitted by honest participants during a training iteration.

    - :math:`\epsilon \in \{0, \dots, n-1\}` is the ID of the worker to mimic. In other words, :math:`x_{\epsilon+1}` is the vector sent by the worker with ID :math:`\epsilon`.

    Initialization parameters
    --------------------------

    epsilon : int, optional
        ID of the worker whose behavior is to be mimicked. Set to 0 by default.

    Calling the instance
    --------------------

    Input parameters
    ----------------

    vectors: numpy.ndarray, torch.Tensor, list of numpy.ndarray or list of torch.Tensor
        A set of vectors, matrix or tensors.

    Returns
    -------
    :numpy.ndarray or torch.Tensor
        The data type of the output is the same as the input.
    
    Examples
    --------
    
    >>> import byzfl
    >>> attack = byzfl.Mimic(0)

    Using numpy arrays:

    >>> import numpy as np
    >>> x = np.array([[1., 2., 3.],       # np.ndarray
    >>>               [4., 5., 6.], 
    >>>               [7., 8., 9.]])
    >>> attack(x)
    array([1. 2. 3.])

    Using torch tensors:

    >>> import torch
    >>> x = torch.tensor([[1., 2., 3.],   # torch.tensor 
    >>>                   [4., 5., 6.], 
    >>>                   [7., 8., 9.]])
    >>> attack(x)
    tensor([1., 2., 3.])

    Using list of numpy arrays

    >>> import numpy as np
    >>> x = [np.array([1., 2., 3.]),      # list of np.ndarray  
    >>>      np.array([4., 5., 6.]), 
    >>>      np.array([7., 8., 9.])]
    >>> attack(x)
    array([1.  2. 3.])

    Using list of torch tensors:

    >>> import torch
    >>> x = [torch.tensor([1., 2., 3.]),  # list of torch.tensor 
    >>>      torch.tensor([4., 5., 6.]), 
    >>>      torch.tensor([7., 8., 9.])]
    >>> attack(x)
    tensor([1., 2., 3.])

    References
    ----------

    .. [1] Sai Praneeth Karimireddy, Lie He, and Martin Jaggi. Byzantine-robust learning on heterogeneous datasets via bucketing. In International Conference on Learning Representations, 2022.
    
    """

    def __init__(self, epsilon=0):
        if not isinstance(epsilon, float) or epsilon < 0:
            raise ValueError("epsilon must be a non-negative int.")
        self.epsilon = epsilon

    def __call__(self, honest_vectors):
        if not self.epsilon < len(honest_vectors):
            raise ValueError(f"epsilon must be smaller than len(honest_vectors), but got epsilon={self.epsilon} and len(honest_vectors)={len(honest_vectors)}")
        return honest_vectors[self.epsilon]




[docs]
class Inf:

    r"""
    Description
    -----------

    Generate extreme vector comprised of positive infinity values.

    .. math::

        \mathrm{Inf}(x_1, \dots, x_n) = \begin{bmatrix} +\infty \\ +\infty \\ \vdots \\ +\infty \end{bmatrix} \in \mathbb{R}^d

    where

    - :math:`x_1, \dots, x_n` are the input vectors, which conceptually correspond to correct gradients submitted by honest participants during a training iteration.

    - :math:`d` is the dimensionality of the input space, i.e., :math:`d` is the number of coordinates of vectors :math:`x_1, \dots, x_n`.

    Initialization parameters
    --------------------------

    None

    Calling the instance
    --------------------

    Input parameters
    ----------------

    vectors: numpy.ndarray, torch.Tensor, list of numpy.ndarray or list of torch.Tensor
        A set of vectors, matrix or tensors.

    Returns
    -------
    :numpy.ndarray or torch.Tensor
        The data type of the output is the same as the input.

    Examples
    --------

    >>> import byzfl
    >>> attack = byzfl.Inf()

    Using numpy arrays:

    >>> import numpy as np
    >>> x = np.array([[1., 2., 3.],       # np.ndarray
    >>>               [4., 5., 6.], 
    >>>               [7., 8., 9.]])
    >>> attack(x)
    array([inf, inf, inf])
    
    Using torch tensors:

    >>> import torch
    >>> x = torch.tensor([[1., 2., 3.],   # torch.tensor 
    >>>                   [4., 5., 6.], 
    >>>                   [7., 8., 9.]])
    >>> attack(x)
    tensor([inf, inf, inf])

    Using list of numpy arrays

    >>> import numpy as np
    >>> x = [np.array([1., 2., 3.]),      # list of np.ndarray  
    >>>      np.array([4., 5., 6.]), 
    >>>      np.array([7., 8., 9.])]
    >>> attack(x)
    array([inf, inf, inf])

    Using list of torch tensors (Warning: We need the tensor to be either a floating point or complex dtype):
        
    >>> import torch
    >>> x = [torch.tensor([1., 2., 3.]),  # list of torch.tensor 
    >>>      torch.tensor([4., 5., 6.]), 
    >>>      torch.tensor([7., 8., 9.])]
    >>> attack(x)
    tensor([inf, inf, inf])

    """ 

    def __init__(self):
        pass
    
    def __call__(self, honest_vectors):
        tools, honest_vectors = check_vectors_type(honest_vectors)
        return tools.full_like(honest_vectors[0], float('inf'), dtype=np.float64)




[docs]
class Gaussian:

    r"""
    Description
    -----------

    Generate a random vector where each coordinate is independently sampled from a Gaussian distribution.

    .. math::

        \mathrm{Gaussian}_{\mu, \sigma}(x_1, \dots, x_n) = \begin{bmatrix} g_1 \\ g_2 \\ \vdots \\ g_d \end{bmatrix} \in \mathbb{R}^d
        
    where:

    - :math:`x_1, \dots, x_n` are the input vectors, which conceptually correspond to correct gradients submitted by honest participants during a training iteration.

    - :math:`d` is the dimensionality of the input space, i.e., :math:`d` is the number of coordinates of vectors :math:`x_1, \dots, x_n`.

    - :math:`\mathit{N}(\mu, \sigma^2)` is the Gaussian distribution of mean :math:`\mu \in \mathbb{R}` and standard deviation :math:`\sigma \geq 0`.

    - :math:`g_i  \sim \mathit{N}(\mu, \sigma^2)` for all :math:`i  \in \{1, \dots, d\}`.


    Initialization parameters
    --------------------------

    mu: float, optional (default=0.0)
        Mean of the Gaussian distribution.
    sigma: float, optional (default=1.0)
        Standard deviation of the Gaussian distribution.

    Calling the instance
    --------------------

    Input parameters
    ----------------

    vectors: numpy.ndarray, torch.Tensor, list of numpy.ndarray or list of torch.Tensor
        A set of vectors, matrix or tensors.

    Returns
    -------
    :numpy.ndarray or torch.Tensor
        The data type of the output is the same as the input.

    Examples
    --------

    >>> import byzfl
    >>> attack = byzfl.Gaussian(mu=0.0, sigma=1.0)

    Using numpy arrays

    >>> import numpy as np
    >>> x = np.array([[1., 2., 3.],       # np.ndarray
    >>>               [4., 5., 6.], 
    >>>               [7., 8., 9.]])
    >>> attack(x)
    array([-0.08982162  0.07237574  0.55886579])
            
    Using torch tensors

    >>> import torch
    >>> x = torch.tensor([[1., 2., 3.],   # torch.tensor 
    >>>                   [4., 5., 6.], 
    >>>                   [7., 8., 9.]])
    >>> attack(x)
    tensor([ 0.9791,  0.0266, -1.0112])
    
    Using list of numpy arrays

    >>> import numpy as np
    >>> x = [np.array([1., 2., 3.]),      # list of np.ndarray  
    >>>      np.array([4., 5., 6.]), 
    >>>      np.array([7., 8., 9.])]
    >>> attack(x)
    array([-0.08982162  0.07237574  0.55886579])

    Using list of torch tensors
        
    >>> import torch
    >>> x = [torch.tensor([1., 2., 3.]),  # list of torch.tensor 
    >>>      torch.tensor([4., 5., 6.]), 
    >>>      torch.tensor([7., 8., 9.])]
    >>> attack(x)
    tensor([ 0.9791,  0.0266, -1.0112])

    """

    def __init__(self, mu=0.0, sigma=1.0):
        if not isinstance(mu, float):
            raise TypeError("mu must be a float.")
        self.mu = mu
        if not isinstance(sigma, float) or sigma < 0:
            raise ValueError("sigma must be a non-negative float.")
        self.sigma = sigma

    def __call__(self, honest_vectors):
        _, honest_vectors = check_vectors_type(honest_vectors)
        random = random_tool(honest_vectors)
        shape = honest_vectors.shape[1]
        return random.normal(loc=self.mu, scale=self.sigma, size=shape)