
    wfh5                        d dl mZ d dlZd dlmZmZ d dlmZ d dlmZ  G d dej                        Z
 G d d	ej                        Zy)
    )annotationsN)Tensornn)CrossEncoder)fullnamec                  .     e Zd ZdZdd fdZddZ xZS )PListMLELambdaWeightzMBase class for implementing weighting schemes in Position-Aware ListMLE Loss.c                0    t         |           || _        y)z
        Initialize a lambda weight for PListMLE loss.

        Args:
            rank_discount_fn: Function that computes a discount for each rank position.
                              If None, uses default discount of 2^(num_docs - rank) - 1.
        N)super__init__rank_discount_fn)selfr   	__class__s     u/home/chris/cleankitchens-env/lib/python3.12/site-packages/sentence_transformers/cross_encoder/losses/PListMLELoss.pyr   zPListMLELambdaWeight.__init__   s     	 0    c                &   | j                   | j                  |      S |j                  dd      }t        j                  |j	                  d      |j
                        j                  |      }t        j                  d||z
        dz
  }||z  }|S )a  
        Calculate position-aware weights for the PListMLE loss.

        Args:
            mask: A boolean mask indicating valid positions [batch_size, num_docs]

        Returns:
            Tensor: Weights for each position [batch_size, num_docs]
           Tdimkeepdimdeviceg       @g      ?)r   sumtorcharangesizer   	expand_aspow)r   masknum_docs_per_queryranksweightss        r   forwardzPListMLELambdaWeight.forward   s       ,((.. "XX!TX:TYYq\$++>HHN))C!3e!;<sBD.r   )N)returnNone)r   r   r$   r   )__name__
__module____qualname____doc__r   r#   __classcell__r   s   @r   r	   r	   
   s    W	1r   r	   c                       e Zd Z e        ej
                         ddf	 	 	 	 	 	 	 	 	 	 	 d fdZddZd	dZe	d
d       Z
 xZS )PListMLELossNTc                F   t         |           || _        || _        |xs t	        j
                         | _        || _        || _        d| _	        | j                  j                  dk7  r9t        | j                  j                   d| j                  j                   d      y)am  
        PListMLE loss for learning to rank with position-aware weighting. This loss function implements
        the ListMLE ranking algorithm which uses a list-wise approach based on maximum likelihood
        estimation of permutations. It maximizes the likelihood of the permutation induced by the
        ground truth labels with position-aware weighting.

        This loss is also known as Position-Aware ListMLE or p-ListMLE.

        .. note::

            The number of documents per query can vary between samples with the ``PListMLELoss``.

        Args:
            model (CrossEncoder): CrossEncoder model to be trained
            lambda_weight (PListMLELambdaWeight, optional): Weighting scheme to use. When specified,
                implements Position-Aware ListMLE which applies different weights to different rank
                positions. Default is None (standard PListMLE).
            activation_fn (:class:`~torch.nn.Module`): Activation function applied to the logits before computing the
                loss. Defaults to :class:`~torch.nn.Identity`.
            mini_batch_size (int, optional): Number of samples to process in each forward pass. This has a significant
                impact on the memory consumption and speed of the training process. Three cases are possible:

                - If ``mini_batch_size`` is None, the ``mini_batch_size`` is set to the batch size.
                - If ``mini_batch_size`` is greater than 0, the batch is split into mini-batches of size ``mini_batch_size``.
                - If ``mini_batch_size`` is <= 0, the entire batch is processed at once.

                Defaults to None.
            respect_input_order (bool): Whether to respect the original input order of documents.
                If True, assumes the input documents are already ordered by relevance (most relevant first).
                If False, sorts documents by label values. Defaults to True.

        References:
            - Position-Aware ListMLE: A Sequential Learning Process for Ranking: https://auai.org/uai2014/proceedings/individuals/164.pdf
            - `Cross Encoder > Training Examples > MS MARCO <../../../examples/cross_encoder/training/ms_marco/README.html>`_

        Requirements:
            1. Query with multiple documents (listwise approach)
            2. Documents must have relevance scores/labels. Both binary and continuous labels are supported.
            3. Documents must be sorted in a defined rank order.

        Inputs:
            +----------------------------------------+--------------------------------+-------------------------------+
            | Texts                                  | Labels                         | Number of Model Output Labels |
            +========================================+================================+===============================+
            | (query, [doc1, doc2, ..., docN])       | [score1, score2, ..., scoreN]  | 1                             |
            +----------------------------------------+--------------------------------+-------------------------------+

        Recommendations:
            - Use :class:`~sentence_transformers.util.mine_hard_negatives` with ``output_format="labeled-list"``
              to convert question-answer pairs to the required input format with hard negatives.

        Relations:
            - The :class:`~sentence_transformers.cross_encoder.losses.PListMLELoss` is an extension of the
              :class:`~sentence_transformers.cross_encoder.losses.ListMLELoss` and allows for positional weighting
              of the loss. :class:`~sentence_transformers.cross_encoder.losses.PListMLELoss` generally outperforms
              :class:`~sentence_transformers.cross_encoder.losses.ListMLELoss` and is recommended over it.
            - :class:`~sentence_transformers.cross_encoder.losses.LambdaLoss` takes the same inputs, and generally
              outperforms this loss.

        Example:
            ::

                from sentence_transformers.cross_encoder import CrossEncoder, CrossEncoderTrainer, losses
                from datasets import Dataset

                model = CrossEncoder("microsoft/mpnet-base")
                train_dataset = Dataset.from_dict({
                    "query": ["What are pandas?", "What is the capital of France?"],
                    "docs": [
                        ["Pandas are a kind of bear.", "Pandas are kind of like fish."],
                        ["The capital of France is Paris.", "Paris is the capital of France.", "Paris is quite large."],
                    ],
                    "labels": [[1, 0], [1, 1, 0]],
                })

                # Either: Position-Aware ListMLE with default weighting
                lambda_weight = losses.PListMLELambdaWeight()
                loss = losses.PListMLELoss(model, lambda_weight=lambda_weight)

                # or: Position-Aware ListMLE with custom weighting function
                def custom_discount(ranks): # e.g. ranks: [1, 2, 3, 4, 5]
                    return 1.0 / torch.log1p(ranks)
                lambda_weight = losses.PListMLELambdaWeight(rank_discount_fn=custom_discount)
                loss = losses.PListMLELoss(model, lambda_weight=lambda_weight)

                trainer = CrossEncoderTrainer(
                    model=model,
                    train_dataset=train_dataset,
                    loss=loss,
                )
                trainer.train()
        g|=r   z< supports a model with 1 output label, but got a model with z output labels.N)r   r   modellambda_weightr   Identityactivation_fnmini_batch_sizerespect_input_ordereps
num_labels
ValueErrorr   r&   )r   r/   r0   r2   r3   r4   r   s         r   r   zPListMLELoss.__init__.   s    H 	
**;bkkm.#6 ::  A%>>**+ ,((,

(=(='>oO  &r   c                	   t        |t              rt        d      t        |      dk7  rt        dt        |       d      |\  }}|D cg c]  }t        |       }}t	        |      }t        |      }||D cg c]  }t        |       c}k7  r)t        d| dD cg c]  }t        |       c} d      t        ||      D 	
cg c]  \  }	}|D ]  }
|	|
f  }}}	}
|s,t        j                  d| j                  j                  d	
      S | j                  xs |}|dk  rt        |      }g }t        dt        |      |      D ]  }||||z    }| j                  j                  |d	d	d      }|j                  | j                  j                        } | j                  di |d   j                  d      }|j                  |        t        j                   |d      }| j#                  |      }t        j$                  ||fd| j                  j                        }t        j                   |D cg c]   }t        j&                  t        |            " c}d      }t        j(                  t        j&                  |      t        j                  |            }||||f<   t        j*                  |t        j,                        }d	|||f<   t        j.                  |t1        d             }t        j                   d      j1                         |||f<   t        j2                  |      s,t        j                  d| j                  j                  d	
      S | j4                  s.|j7                  d	d      \  }}t        j8                  |d|      }n|}|j;                         }t        j<                  t        j>                  t        j<                  |dg      d      dg      }|t        j@                  || jB                  z         z
  }| jD                  9| jE                  |      }||jG                  dd	      | jB                  z   z  }||z  }d|| <   t        jF                  |d       }t        j2                  |      s,t        j                  d| j                  j                  d	
      S t        jH                  |      S c c}w c c}w c c}w c c}
}}	w c c}w )a5  
        Compute PListMLE loss for a batch of queries and their documents.

        Args:
            inputs: List of (queries, documents_list)
            labels: Ground truth relevance scores, shape (batch_size, num_documents)

        Returns:
            Tensor: Mean PListMLE loss over the batch
        z^PListMLELoss expects a list of labels for each sample, but got a single value for each sample.   zCPListMLELoss expects two inputs (queries, documents_list), but got z inputs.z)Number of documents per query in inputs (z-) does not match number of labels per query (z).g        T)r   requires_gradr   pt)padding
truncationreturn_tensors)r   gؗҜ<r   )dtypeinfr   )
descendingr   r    )%
isinstancer   r7   lenmaxzipr   tensorr/   r   r3   range	tokenizertoviewappendcatr2   fullr   repeat_interleave
zeros_likebool	full_likefloatanyr4   sortgatherexpflipcumsumlogr5   r0   r   mean)r   inputslabelsqueries	docs_listdocsdocs_per_querymax_docs
batch_sizequerydocumentpairsr3   logits_listimini_batch_pairstokenslogitslogits_matrixdoc_indicesbatch_indicesr   labels_matrixsorted_labelsindicessorted_logitsscorescumsum_scores	log_probsr0   per_query_lossess                                  r   r#   zPListMLELoss.forward   s    ff%p  v;!UVYZ`VaUbbjk  $09:#d)::~&\
?fc&k??;N;KKx  U[  z\  KQz}  E  {F  z\  y]  ]_  `  7:'96Mcc{ud^bcRZ%"c"cc<<DJJ,=,=TRR..<*a!%jOq#e*o6 	'A$Q_)<=ZZ)) #	 * F YYtzz001FTZZ)&)!,11"5Fv&	' ;A.##F+ 

J#95IZIZ[ iiY OTc$i!8 OUVW//Z0H%,,WeJfg4:m[01 UZZ@+/]K'( e}E49IIf!4L4R4R4Tm[01yy<<DJJ,=,=TRR''%2%7%74Q%7%O"M7!LL7CM *M ""$

5<<

6A30G#KaSQ!EIImdhh.F$GG	 ) ..t4M)]->->1d->-SVZV^V^-^_M!M1I 	4%!IIiQ77yy)*<<DJJ,=,=TRR zz*++k ; @ z\ d@ !Ps   S
:SS
S8%S c                    | j                   dnt        | j                         t        | j                        | j                  | j                  dS )z
        Get configuration parameters for this loss function.

        Returns:
            Dictionary containing the configuration parameters
        N)r0   r2   r3   r4   )r0   r   r2   r3   r4   r   s    r   get_config_dictzPListMLELoss.get_config_dict  sI     &*%7%7%?TXdN`N`Ea%d&8&89#33#'#;#;	
 	
r   c                     y)Na  
@inproceedings{lan2014position,
  title={Position-Aware ListMLE: A Sequential Learning Process for Ranking},
  author={Lan, Yanyan and Zhu, Yadong and Guo, Jiafeng and Niu, Shuzi and Cheng, Xueqi},
  booktitle={UAI},
  volume={14},
  pages={449--458},
  year={2014}
}
rC   ry   s    r   citationzPListMLELoss.citation  s    	r   )r/   r   r0   zPListMLELambdaWeight | Noner2   znn.Module | Noner3   z
int | Noner4   rR   r$   r%   )r]   z list[list[str], list[list[str]]]r^   zlist[Tensor]r$   r   )r$   z#dict[str, float | int | str | None])r$   str)r&   r'   r(   r	   r   r1   r   r#   rz   propertyr|   r*   r+   s   @r   r-   r-   -   s     6J5K*5"++-&*$(pp 3p (	p
 $p "p 
pdk,Z
 
 
r   r-   )
__future__r   r   r   r   #sentence_transformers.cross_encoderr   sentence_transformers.utilr   Moduler	   r-   rC   r   r   <module>r      s7    "   < / 299  Fy299 yr   