
    wfh                        d dl mZ d dlZd dlZd dlmZ d dlmZmZ d dl	Z	d dl
mZ d dlmZ erd dlZd dl	mZ d dlmZ  ej&                  e      Z G d	 d
e      Zy)    )annotationsN)defaultdict)TYPE_CHECKINGAny)TranslationEvaluator)append_to_last_row)Tensor)SparseEncoderc                       e Zd ZdZ	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZ	 d		 	 	 	 	 	 	 	 	 d
 fdZ	 	 	 	 	 	 ddZ	 d	 	 	 	 	 	 	 	 	 ddZd fdZ xZ	S )SparseTranslationEvaluatora  
    This evaluator extends :class:`~sentence_transformers.evaluation.TranslationEvaluator` but is specifically designed for sparse encoder models.

    Given two sets of sentences in different languages, e.g. (en_1, en_2, en_3...) and (fr_1, fr_2, fr_3, ...),
    and assuming that fr_i is the translation of en_i.
    Checks if vec(en_i) has the highest similarity to vec(fr_i). Computes the accuracy in both directions

    The labels need to indicate the similarity between the sentences.

    Args:
        source_sentences (List[str]): List of sentences in the source language.
        target_sentences (List[str]): List of sentences in the target language.
        show_progress_bar (bool): Whether to show a progress bar when computing embeddings. Defaults to False.
        batch_size (int): The batch size to compute sentence embeddings. Defaults to 16.
        name (str): The name of the evaluator. Defaults to an empty string.
        print_wrong_matches (bool): Whether to print incorrect matches. Defaults to False.
        write_csv (bool): Whether to write the evaluation results to a CSV file. Defaults to True.
        max_active_dims (Optional[int], optional): The maximum number of active dimensions to use.
            `None` uses the model's current `max_active_dims`. Defaults to None.

    Example:
        ::

            import logging

            from datasets import load_dataset

            from sentence_transformers import SparseEncoder
            from sentence_transformers.sparse_encoder.evaluation import SparseTranslationEvaluator

            logging.basicConfig(format="%(message)s", level=logging.INFO)

            # Load a model, not mutilingual but hope to see some on the hub soon
            model = SparseEncoder("naver/splade-cocondenser-ensembledistil")

            # Load a parallel sentences dataset
            dataset = load_dataset("sentence-transformers/parallel-sentences-news-commentary", "en-nl", split="train[:1000]")

            # Initialize the TranslationEvaluator using the same texts from two languages
            translation_evaluator = SparseTranslationEvaluator(
                source_sentences=dataset["english"],
                target_sentences=dataset["non_english"],
                name="news-commentary-en-nl",
            )
            results = translation_evaluator(model)
            '''
            Evaluating translation matching Accuracy of the model on the news-commentary-en-nl dataset:
            Accuracy src2trg: 41.40
            Accuracy trg2src: 47.60
            Model Sparsity: Active Dimensions: 112.3, Sparsity Ratio: 0.9963
            '''
            # Print the results
            print(f"Primary metric: {translation_evaluator.primary_metric}")
            # => Primary metric: news-commentary-en-nl_mean_accuracy
            print(f"Primary metric value: {results[translation_evaluator.primary_metric]:.4f}")
            # => Primary metric value: 0.4450

    c	           	         || _         t        t              | _        t        	|   |||||||       | j                  j                  ddg       y )N)show_progress_bar
batch_sizenameprint_wrong_matches	write_csvactive_dimssparsity_ratio)max_active_dimsr   listsparsity_statssuper__init__csv_headersextend)
selfsource_sentencestarget_sentencesr   r   r   r   r   r   	__class__s
            /home/chris/cleankitchens-env/lib/python3.12/site-packages/sentence_transformers/sparse_encoder/evaluation/SparseTranslationEvaluator.pyr   z#SparseTranslationEvaluator.__init__S   s]      /)$//! 3 	 	
 	0@ AB    c                   t        t              | _        t        |   ||||      }| j                  j                         D ])  \  }}t        |      t        |      z  | j                  |<   + |j                  | j                  | j                  | j                               | j                  ||||       t        j                  d| j                  d   dd| j                  d   d       |\| j                  rPt        t         j"                  j%                  || j&                        | j                  d   | j                  d   g       |S )N)modeloutput_pathepochstepsz#Model Sparsity: Active Dimensions: r   z.1fz, Sparsity Ratio: r   z.4f)r   r   r   r   __call__itemssumlenupdateprefix_name_to_metricsr    store_metrics_in_model_card_dataloggerinfor   r   ospathjoincsv_file)	r   r#   r$   r%   r&   metricskeyvaluer   s	           r    r'   z#SparseTranslationEvaluator.__call__k   sE    *$/'"Ku\a"b--335 	?JC'*5zCJ'>D$	? 	t2243F3F		RS--eWeUK1$2E2Em2TUX1YYklpll  AQ  mR  SV  lW  X	
 "t~~[$--8$$]3T5H5HIY5Z[
 r!   c           
     *    |j                   |f| j                  | j                  ddd| j                  d|}|j	                  t        j                  |            }|j                         D ]#  \  }}| j                  |   j                  |       % |S )NFT)r   r   convert_to_tensorconvert_to_sparse_tensorsave_to_cpur   )
encoder   r   r   sparsitytorchstackr(   r   append)r   r#   	sentenceskwargs
embeddingsstatr5   r6   s           r    embed_inputsz'SparseTranslationEvaluator.embed_inputs   s     "U\\	
"44#%) 00	
 	

 ~~ekk*56**, 	3JC$++E2	3r!   c                B    |j                   j                  | |||       y )N)r%   step)model_card_dataset_evaluation_metrics)r   r#   r4   r%   rF   s        r    r-   z;SparseTranslationEvaluator.store_metrics_in_model_card_data   s"     	44T7%VZ4[r!   c                Z    t         |          }| j                  | j                  |d<   |S )Nr   )r   get_config_dictr   )r   config_dictr   s     r    rJ   z*SparseTranslationEvaluator.get_config_dict   s3    g-/+-1-A-AK)*r!   )F    FTN)r   	list[str]r   rN   r   boolr   intr   strr   rO   r   rO   r   z
int | None)NrR   )
r#   r
   r$   z
str | Noner%   rP   r&   rP   returnzdict[str, float])r#   r
   r@   zstr | list[str] | np.ndarrayrS   zlist[Tensor])r   r   )
r#   r
   r4   dict[str, Any]r%   rP   rF   rP   rS   None)rS   rT   )
__name__
__module____qualname____doc__r   r'   rD   r-   rJ   __classcell__)r   s   @r    r   r      s   9~ #($)&*C#C $C  	C
 C C "C C $C2 ce"1;KN\_	* 0
 
, Z[\"\-;\DG\SV\	\
 r!   r   )
__future__r   loggingr0   collectionsr   typingr   r   r=    sentence_transformers.evaluationr   sentence_transformers.utilr   numpynpr	   2sentence_transformers.sparse_encoder.SparseEncoderr
   	getLoggerrV   r.   r    r!   r    <module>rf      sL    "  	 # %  A 9P 
		8	$G!5 Gr!   