
    wfhw1                        d dl mZ d dlZd dlZd dlmZ d dlmZmZm	Z	 d dl
Zd dlmZ d dlmZ d dlmZ erd dlmZ d d	lmZ d d
lmZ d dlmZ  ej2                  e      Z G d de      Zy)    )annotationsN)defaultdict)TYPE_CHECKINGAnyCallable)NanoBEIREvaluator)#SparseInformationRetrievalEvaluator)append_to_last_row)Tensor)SimilarityFunction)DatasetNameType)SparseEncoderc                      e Zd ZdZeZddgdgg dg ddgddddddej                  d	dddf	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fd
Zd fdZ	 fdZ
	 d	 	 	 	 	 	 	 	 	 d fdZ	 	 	 	 d fdZd fdZ xZS )SparseNanoBEIREvaluatora$  
    This evaluator extends :class:`~sentence_transformers.evaluation.NanoBEIREvaluator` but is specifically designed for sparse encoder models.

    This class evaluates the performance of a SparseEncoder Model on the NanoBEIR collection of Information Retrieval datasets.

    The collection is a set of datasets based on the BEIR collection, but with a significantly smaller size, so it can
    be used for quickly evaluating the retrieval performance of a model before committing to a full evaluation.
    The datasets are available on Hugging Face in the `NanoBEIR collection <https://huggingface.co/collections/zeta-alpha-ai/nanobeir-66e1a0af21dfd93e620cd9f6>`_.
    This evaluator will return the same metrics as the InformationRetrievalEvaluator (i.e., MRR, nDCG, Recall@k), for each dataset and on average.

    Args:
        dataset_names (List[str]): The names of the datasets to evaluate on. Defaults to all datasets.
        mrr_at_k (List[int]): A list of integers representing the values of k for MRR calculation. Defaults to [10].
        ndcg_at_k (List[int]): A list of integers representing the values of k for NDCG calculation. Defaults to [10].
        accuracy_at_k (List[int]): A list of integers representing the values of k for accuracy calculation. Defaults to [1, 3, 5, 10].
        precision_recall_at_k (List[int]): A list of integers representing the values of k for precision and recall calculation. Defaults to [1, 3, 5, 10].
        map_at_k (List[int]): A list of integers representing the values of k for MAP calculation. Defaults to [100].
        show_progress_bar (bool): Whether to show a progress bar during evaluation. Defaults to False.
        batch_size (int): The batch size for evaluation. Defaults to 32.
        write_csv (bool): Whether to write the evaluation results to a CSV file. Defaults to True.
        max_active_dims (Optional[int], optional): The maximum number of active dimensions to use.
            `None` uses the model's current `max_active_dims`. Defaults to None.
        score_functions (Dict[str, Callable[[Tensor, Tensor], Tensor]]): A dictionary mapping score function names to score functions. Defaults to {SimilarityFunction.COSINE.value: cos_sim, SimilarityFunction.DOT_PRODUCT.value: dot_score}.
        main_score_function (Union[str, SimilarityFunction], optional): The main score function to use for evaluation. Defaults to None.
        aggregate_fn (Callable[[list[float]], float]): The function to aggregate the scores. Defaults to np.mean.
        aggregate_key (str): The key to use for the aggregated score. Defaults to "mean".
        query_prompts (str | dict[str, str], optional): The prompts to add to the queries. If a string, will add the same prompt to all queries. If a dict, expects that all datasets in dataset_names are keys.
        corpus_prompts (str | dict[str, str], optional): The prompts to add to the corpus. If a string, will add the same prompt to all corpus. If a dict, expects that all datasets in dataset_names are keys.
        write_predictions (bool): Whether to write the predictions to a JSONL file. Defaults to False.
            This can be useful for downstream evaluation as it can be used as input to the :class:`~sentence_transformers.sparse_encoder.evaluation.ReciprocalRankFusionEvaluator` that accept precomputed predictions.

    Example:
        ::

            import logging

            from sentence_transformers import SparseEncoder
            from sentence_transformers.sparse_encoder.evaluation import SparseNanoBEIREvaluator

            logging.basicConfig(format="%(message)s", level=logging.INFO)

            # Load a model
            model = SparseEncoder("naver/splade-cocondenser-ensembledistil")

            datasets = ["QuoraRetrieval", "MSMARCO"]

            evaluator = SparseNanoBEIREvaluator(
                dataset_names=datasets,
                show_progress_bar=True,
                batch_size=32,
            )

            # Run evaluation
            results = evaluator(model)
            '''
            Evaluating NanoQuoraRetrieval
            Information Retrieval Evaluation of the model on the NanoQuoraRetrieval dataset:
            Queries: 50
            Corpus: 5046

            Score-Function: dot
            Accuracy@1: 92.00%
            Accuracy@3: 96.00%
            Accuracy@5: 98.00%
            Accuracy@10: 100.00%
            Precision@1: 92.00%
            Precision@3: 40.00%
            Precision@5: 24.80%
            Precision@10: 13.20%
            Recall@1: 79.73%
            Recall@3: 92.53%
            Recall@5: 94.93%
            Recall@10: 98.27%
            MRR@10: 0.9439
            NDCG@10: 0.9339
            MAP@100: 0.9070
            Model Query Sparsity: Active Dimensions: 59.4, Sparsity Ratio: 0.9981
            Model Corpus Sparsity: Active Dimensions: 61.9, Sparsity Ratio: 0.9980

            Information Retrieval Evaluation of the model on the NanoMSMARCO dataset:
            Queries: 50
            Corpus: 5043

            Score-Function: dot
            Accuracy@1: 48.00%
            Accuracy@3: 74.00%
            Accuracy@5: 76.00%
            Accuracy@10: 86.00%
            Precision@1: 48.00%
            Precision@3: 24.67%
            Precision@5: 15.20%
            Precision@10: 8.60%
            Recall@1: 48.00%
            Recall@3: 74.00%
            Recall@5: 76.00%
            Recall@10: 86.00%
            MRR@10: 0.6191
            NDCG@10: 0.6780
            MAP@100: 0.6277
            Model Query Sparsity: Active Dimensions: 45.4, Sparsity Ratio: 0.9985
            Model Corpus Sparsity: Active Dimensions: 122.6, Sparsity Ratio: 0.9960

            Average Queries: 50.0
            Average Corpus: 5044.5
            Aggregated for Score Function: dot
            Accuracy@1: 70.00%
            Accuracy@3: 85.00%
            Accuracy@5: 87.00%
            Accuracy@10: 93.00%
            Precision@1: 70.00%
            Recall@1: 63.87%
            Precision@3: 32.33%
            Recall@3: 83.27%
            Precision@5: 20.00%
            Recall@5: 85.47%
            Precision@10: 10.90%
            Recall@10: 92.13%
            MRR@10: 0.7815
            NDCG@10: 0.8060
            Model Query Sparsity: Active Dimensions: 52.4, Sparsity Ratio: 0.9983
            Model Corpus Sparsity: Active Dimensions: 92.2, Sparsity Ratio: 0.9970
            '''
            # Print the results
            print(f"Primary metric: {evaluator.primary_metric}")
            # => Primary metric: NanoBEIR_mean_dot_ndcg@10
            print(f"Primary metric value: {results[evaluator.primary_metric]:.4f}")
            # => Primary metric value: 0.8060

    N
   )         r   d   F    Tmeanc                    |
| _         t        t              | _        t        |   |||||||||	|||||||       | j                   #| xj                  d| j                    z  c_        y y )N)dataset_namesmrr_at_k	ndcg_at_kaccuracy_at_kprecision_recall_at_kmap_at_kshow_progress_bar
batch_size	write_csvscore_functionsmain_score_functionaggregate_fnaggregate_keyquery_promptscorpus_promptswrite_predictions_)max_active_dimsr   listsparsity_statssuper__init__name)selfr   r   r   r   r   r   r   r    r!   r*   r"   r#   r$   r%   r&   r'   r(   	__class__s                     /home/chris/cleankitchens-env/lib/python3.12/site-packages/sentence_transformers/sparse_encoder/evaluation/SparseNanoBEIREvaluator.pyr.   z SparseNanoBEIREvaluator.__init__   s    (  /)$/''"7/!+ 3%'')/! 	 	
$ +II1T11233I ,    c                b    t         |   |      }| j                  |d| j                   z  }|S )Nr)   )r-   _get_human_readable_namer*   )r0   dataset_namehuman_readable_namer1   s      r2   r5   z0SparseNanoBEIREvaluator._get_human_readable_name   s>    #g>|L+Qt';';&<#==""r3   c                ^    t         |   |       | j                  j                  g d       y )N)query_active_dimsquery_sparsity_ratiocorpus_active_dimscorpus_sparsity_ratio)r-   _append_csv_headerscsv_headersextend)r0   similarity_fn_namesr1   s     r2   r=   z+SparseNanoBEIREvaluator._append_csv_headers   s(    #$78h	
r3   c                   t        t              | _        t        t              | _        t	        |   |g||||d|}| j                  D ]  }| j                  d   j                  t        |j                               | j                  d   j                  t        |j                               |j                  j                         D ]#  \  }	}
| j                  |	   j                  |
       %  | j                  j                         D ]x  \  }	}
t        d t        |
| j                  |	j                  d      d            D              t        | j                  |	j                  d      d            z  | j                  |	<   z |j                  | j!                  | j                  | j"                               |j                         D 	
ci c]4  \  }	}
|	j%                  | j"                        s"|	| j"                  k7  s2|	|
6 }}	}
| j'                  ||||       t(        j+                  d| j                  d   d	d
| j                  d   d       t(        j+                  d| j                  d   d	d
| j                  d   d       |X| j,                  rLt/        t0        j2                  j5                  || j6                        | j                  j9                                |S c c}
}	w )N)output_pathepochstepsquerycorpusc              3  ,   K   | ]  \  }}||z    y w)N ).0vallengths      r2   	<genexpr>z3SparseNanoBEIREvaluator.__call__.<locals>.<genexpr>   s      +!,ff+s   r)   r   z)Model Query Sparsity: Active Dimensions: r9   z.1fz, Sparsity Ratio: r:   z.4fz*Model Corpus Sparsity: Active Dimensions: r;   r<   )r   r+   r,   lengthsr-   __call__
evaluatorsappendlenqueriesrF   itemssumzipsplitupdateprefix_name_to_metricsr/   
startswith store_metrics_in_model_card_dataloggerinfor!   r
   ospathjoincsv_filevalues)r0   modelrB   rC   rD   argskwargsper_dataset_results	evaluatorkeyvalueaggregated_resultsr1   s               r2   rN   z SparseNanoBEIREvaluator.__call__   s    *$/"4(#g.
GK
*%u
OU
  	7ILL!((Y->->)?@LL"))#i.>.>*?@'66<<> 7
U##C(//67	7
 --335 	5JC'* +03E4<<		RUWXHY;Z0[+ (DLL3!234(5D$	5
 	""4#>#>t?R?RTXT]T]#^_)<)B)B)D
%3W[W`W`HafimqmvmvfvCJ
 
 	--e5GPUV78K8KL_8`ad7eewx|  yL  yL  Mc  yd  eh  xi  j	
 	89L9LMa9bcf8ggyz~  {N  {N  Of  {g  hk  zl  m	
 "t~~[$--8##**,
 #"!
s   #K+5K+K+c                f    | j                   |d<   |j                  dd        t        |   |fi |S )Nr*   truncate_dim)r*   popr-   _load_dataset)r0   r6   ir_evaluator_kwargsr1   s      r2   rm   z%SparseNanoBEIREvaluator._load_dataset   s=     261E1E-.5w$\I5HIIr3   c                Z    t         |          }| j                  | j                  |d<   |S )Nr*   )r-   get_config_dictr*   )r0   config_dictr1   s     r2   rp   z'SparseNanoBEIREvaluator.get_config_dict  s3    g-/+-1-A-AK)*r3   )"r   zlist[DatasetNameType] | Noner   	list[int]r   rr   r   rr   r   rr   r   rr   r   boolr    intr!   rs   r*   z
int | Noner"   z4dict[str, Callable[[Tensor, Tensor], Tensor]] | Noner#   zstr | SimilarityFunction | Noner$   zCallable[[list[float]], float]r%   strr&   str | dict[str, str] | Noner'   rv   r(   rs   )r6   r   returnru   )Nrx   )
rb   r   rB   z
str | NonerC   rt   rD   rt   rw   zdict[str, float])r6   r   rw   r	   )rw   zdict[str, Any])__name__
__module____qualname____doc__r	   information_retrieval_classnpr   r.   r5   r=   rN   rm   rp   __classcell__)r1   s   @r2   r   r      sb   @D #F 7;!d "t#0+8"e"'&*PT?C79ww#596:"'%)43)4 )4 	)4
 !)4  ))4 )4  )4 )4 )4 $)4 N)4 =)4 5)4 )4  3!)4" 4#)4$  %)4V#
 ce##"##1;##KN##\_##	##JJ+J	,J r3   r   )
__future__r   loggingr]   collectionsr   typingr   r   r   numpyr~   2sentence_transformers.evaluation.NanoBEIREvaluatorr   Ssentence_transformers.sparse_encoder.evaluation.SparseInformationRetrievalEvaluatorr	   sentence_transformers.utilr
   torchr    sentence_transformers.evaluationr   r   $sentence_transformers.sparse_encoderr   	getLoggerry   r[   r   rH   r3   r2   <module>r      sW    "  	 # / /  P :CRB			8	$l/ lr3   