
    wfh5                        d dl mZ d dlZd dlZd dlmZ d dlmZmZm	Z	 d dl
Z
d dlmZ d dlmZ erd dlZd dlmZ d dlmZ  ej*                  e      Z G d	 d
e      Zy)    )annotationsN)defaultdict)TYPE_CHECKINGAnyCallable)InformationRetrievalEvaluator)append_to_last_row)SimilarityFunction)SparseEncoderc                  L    e Zd ZdZddgdgg dg ddgdddd	d
d
d
d
d
d
d
df	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZ fdZ	 d	 	 	 	 	 	 	 	 	 d fdZ	 	 	 d	 	 	 	 	 	 	 d fdZ	 	 	 d	 	 	 	 	 	 	 	 	 	 	 ddZ	 d	 	 	 	 	 	 	 	 	 ddZ	d fdZ
 xZS )#SparseInformationRetrievalEvaluatora6  
    This evaluator extends :class:`~sentence_transformers.evaluation.InformationRetrievalEvaluator` but is specifically designed for sparse encoder models.

    This class evaluates an Information Retrieval (IR) setting.

    Given a set of queries and a large corpus set. It will retrieve for each query the top-k most similar document. It measures
    Mean Reciprocal Rank (MRR), Recall@k, and Normalized Discounted Cumulative Gain (NDCG)

    Args:
        queries (Dict[str, str]): A dictionary mapping query IDs to queries.
        corpus (Dict[str, str]): A dictionary mapping document IDs to documents.
        relevant_docs (Dict[str, Set[str]]): A dictionary mapping query IDs to a set of relevant document IDs.
        corpus_chunk_size (int): The size of each chunk of the corpus. Defaults to 50000.
        mrr_at_k (List[int]): A list of integers representing the values of k for MRR calculation. Defaults to [10].
        ndcg_at_k (List[int]): A list of integers representing the values of k for NDCG calculation. Defaults to [10].
        accuracy_at_k (List[int]): A list of integers representing the values of k for accuracy calculation. Defaults to [1, 3, 5, 10].
        precision_recall_at_k (List[int]): A list of integers representing the values of k for precision and recall calculation. Defaults to [1, 3, 5, 10].
        map_at_k (List[int]): A list of integers representing the values of k for MAP calculation. Defaults to [100].
        show_progress_bar (bool): Whether to show a progress bar during evaluation. Defaults to False.
        batch_size (int): The batch size for evaluation. Defaults to 32.
        name (str): A name for the evaluation. Defaults to "".
        write_csv (bool): Whether to write the evaluation results to a CSV file. Defaults to True.
        max_active_dims (Optional[int], optional): The maximum number of active dimensions to use.
            `None` uses the model's current `max_active_dims`. Defaults to None.
        score_functions (Dict[str, Callable[[Tensor, Tensor], Tensor]]): A dictionary mapping score function names to score functions. Defaults to the ``similarity`` function from the ``model``.
        main_score_function (Union[str, SimilarityFunction], optional): The main score function to use for evaluation. Defaults to None.
        query_prompt (str, optional): The prompt to be used when encoding the corpus. Defaults to None.
        query_prompt_name (str, optional): The name of the prompt to be used when encoding the corpus. Defaults to None.
        corpus_prompt (str, optional): The prompt to be used when encoding the corpus. Defaults to None.
        corpus_prompt_name (str, optional): The name of the prompt to be used when encoding the corpus. Defaults to None.
        write_predictions (bool): Whether to write the predictions to a JSONL file. Defaults to False.
            This can be useful for downstream evaluation as it can be used as input to the :class:`~sentence_transformers.sparse_encoder.evaluation.ReciprocalRankFusionEvaluator` that accept precomputed predictions.

    Example:
        ::

            import logging
            import random

            from datasets import load_dataset

            from sentence_transformers import SparseEncoder
            from sentence_transformers.sparse_encoder.evaluation import SparseInformationRetrievalEvaluator

            logging.basicConfig(format="%(message)s", level=logging.INFO)

            # Load a model
            model = SparseEncoder("naver/splade-cocondenser-ensembledistil")

            # Load the NFcorpus IR dataset (https://huggingface.co/datasets/BeIR/nfcorpus, https://huggingface.co/datasets/BeIR/nfcorpus-qrels)
            corpus = load_dataset("BeIR/nfcorpus", "corpus", split="corpus")
            queries = load_dataset("BeIR/nfcorpus", "queries", split="queries")
            relevant_docs_data = load_dataset("BeIR/nfcorpus-qrels", split="test")

            # For this dataset, we want to concatenate the title and texts for the corpus
            corpus = corpus.map(lambda x: {"text": x["title"] + " " + x["text"]}, remove_columns=["title"])

            # Shrink the corpus size heavily to only the relevant documents + 1,000 random documents
            required_corpus_ids = set(map(str, relevant_docs_data["corpus-id"]))
            required_corpus_ids |= set(random.sample(corpus["_id"], k=1000))
            corpus = corpus.filter(lambda x: x["_id"] in required_corpus_ids)

            # Convert the datasets to dictionaries
            corpus = dict(zip(corpus["_id"], corpus["text"]))  # Our corpus (cid => document)
            queries = dict(zip(queries["_id"], queries["text"]))  # Our queries (qid => question)
            relevant_docs = {}  # Query ID to relevant documents (qid => set([relevant_cids])
            for qid, corpus_ids in zip(relevant_docs_data["query-id"], relevant_docs_data["corpus-id"]):
                qid = str(qid)
                corpus_ids = str(corpus_ids)
                if qid not in relevant_docs:
                    relevant_docs[qid] = set()
                relevant_docs[qid].add(corpus_ids)

            # Given queries, a corpus and a mapping with relevant documents, the SparseInformationRetrievalEvaluator computes different IR metrics.
            ir_evaluator = SparseInformationRetrievalEvaluator(
                queries=queries,
                corpus=corpus,
                relevant_docs=relevant_docs,
                name="BeIR-nfcorpus-subset-test",
                show_progress_bar=True,
                batch_size=16,
            )

            # Run evaluation
            results = ir_evaluator(model)
            '''
            Queries: 323
            Corpus: 3269

            Score-Function: dot
            Accuracy@1: 50.77%
            Accuracy@3: 64.40%
            Accuracy@5: 66.87%
            Accuracy@10: 71.83%
            Precision@1: 50.77%
            Precision@3: 40.45%
            Precision@5: 34.06%
            Precision@10: 25.98%
            Recall@1: 6.27%
            Recall@3: 11.69%
            Recall@5: 13.74%
            Recall@10: 17.23%
            MRR@10: 0.5814
            NDCG@10: 0.3621
            MAP@100: 0.1838
            Model Query Sparsity: Active Dimensions: 40.0, Sparsity Ratio: 0.9987
            Model Corpus Sparsity: Active Dimensions: 206.2, Sparsity Ratio: 0.9932
            '''
            # Print the results
            print(f"Primary metric: {ir_evaluator.primary_metric}")
            # => Primary metric: BeIR-nfcorpus-subset-test_dot_ndcg@10
            print(f"Primary metric value: {results[ir_evaluator.primary_metric]:.4f}")
            # => Primary metric value: 0.3621

    iP  
   )         r   d   F     TNc                    || _         t        t              t        t              d| _        g | _        t
        |   |||||||||	|
||||||||||      S )Nquerycorpus)queriesr   relevant_docscorpus_chunk_sizemrr_at_k	ndcg_at_kaccuracy_at_kprecision_recall_at_kmap_at_kshow_progress_bar
batch_sizename	write_csvscore_functionsmain_score_functionquery_promptquery_prompt_namecorpus_promptcorpus_prompt_namewrite_predictions)max_active_dimsr   listsparsity_statscorpus_lengthssuper__init__)selfr   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r,   r%   r&   r'   r(   r)   r*   r+   	__class__s                         /home/chris/cleankitchens-env/lib/python3.12/site-packages/sentence_transformers/sparse_encoder/evaluation/SparseInformationRetrievalEvaluator.pyr1   z,SparseInformationRetrievalEvaluator.__init__   s    0  /(3D(9[QUEVW w'/'"7/!+ 3%/'1/)   
 	
    c                ^    t         |   |       | j                  j                  g d       y )N)query_active_dimsquery_sparsity_ratiocorpus_active_dimscorpus_sparsity_ratio)r0   _append_csv_headerscsv_headersextend)r2   similarity_fn_namesr3   s     r4   r;   z7SparseInformationRetrievalEvaluator._append_csv_headers   s(    #$78h	
r5   c           
        t        t              t        t              d| _        g | _        t        |   ||||      }dD ]  }| j                  |   j                         D ]~  \  }	}
|dk(  r(t        |
      t        |
      z  | j                  |   |	<   3t        d t        |
| j                        D              t        | j                        z  | j                  |   |	<     | j                  j                         D 	
ci c]%  \  }}|j                         D ]  \  }	}
| d|	 |
 ' c}
}	}}| _        |j                  | j                  | j                  | j                               | j                  ||||       t        j                  d| j                  d   dd	| j                  d
   d       t        j                  d| j                  d   dd	| j                  d   d       |X| j                   rLt#        t$        j&                  j)                  || j*                        | j                  j-                                |S c c}
}	}}w )Nr   )modeloutput_pathepochstepsr   c              3  ,   K   | ]  \  }}||z    y w)N ).0vallengths      r4   	<genexpr>z?SparseInformationRetrievalEvaluator.__call__.<locals>.<genexpr>   s      ;)4ff;s   _z)Model Query Sparsity: Active Dimensions: r7   z.1fz, Sparsity Ratio: r8   z.4fz*Model Corpus Sparsity: Active Dimensions: r9   r:   )r   r-   r.   r/   r0   __call__itemssumlenzipupdateprefix_name_to_metricsr#    store_metrics_in_model_card_dataloggerinfor$   r	   ospathjoincsv_filevalues)r2   r@   rA   rB   rC   argskwargsmetricsprefixkeyvaluerY   r3   s               r4   rK   z,SparseInformationRetrievalEvaluator.__call__   s{    )4D(9[QUEVW '"Ku\a"b) 	1F"11&9??A 1
UW$7:5zCJ7ND''/47: ;8;E4CVCV8W; 8D//081D''/4	1	1 <@;N;N;T;T;V
 
)7ioiuiuiw
[e[^`evhauu$

 	t2243F3F		RS--eWeUK78K8KL_8`ad7eewx|  yL  yL  Mc  yd  eh  xi  j	
 	89L9LMa9bcf8ggyz~  {N  {N  Of  {g  hk  zl  m	
 "t~~rww||KGI\I\IcIcIef
s   *I
c                *    t         |   ||||      S )N)r@   corpus_modelcorpus_embeddingsrA   )r0   compute_metrices)r2   r@   ra   rb   rA   r3   s        r4   rc   z4SparseInformationRetrievalEvaluator.compute_metrices   s'     w'lFWep ( 
 	
r5   c                   ||j                   }n#|dk(  r|j                  }n|dk(  r|j                  } |f||| j                  | j                  dd| j
                  d|}|j                  |      }	|dv rdnd}
|	j                         D ]&  \  }}| j                  |
   |   j                  |       ( |
dk(  r$| j                  j                  t        |             |S )Nr   documentT)prompt_namepromptr"   r!   convert_to_sparse_tensorsave_to_cpur,   )r   Nr   )encodeencode_queryencode_documentr"   r!   r,   sparsityrL   r.   appendr/   rN   )r2   r@   	sentencesencode_fn_namerf   rg   r[   	encode_fn
embeddingsstatr]   r^   r_   s                r4   embed_inputsz0SparseInformationRetrievalEvaluator.embed_inputs   s     !Iw&**Iz)--I

#"44%) 00

 


 ~~j)*o=8**, 	;JC',33E:	;X&&s9~6r5   c                B    |j                   j                  | |||       y )N)rB   step)model_card_dataset_evaluation_metrics)r2   r@   r\   rB   rv   s        r4   rR   zDSparseInformationRetrievalEvaluator.store_metrics_in_model_card_data  s"     	44T7%VZ4[r5   c                Z    t         |          }| j                  | j                  |d<   |S )Nr,   )r0   get_config_dictr,   )r2   config_dictr3   s     r4   rz   z3SparseInformationRetrievalEvaluator.get_config_dict  s3    g-/+-1-A-AK)*r5   ),r   dict[str, str]r   r|   r   zdict[str, set[str]]r   intr   	list[int]r   r~   r   r~   r   r~   r    r~   r!   boolr"   r}   r#   strr$   r   r,   z
int | Noner%   zFdict[str, Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] | Noner&   zstr | SimilarityFunction | Noner'   
str | Noner(   r   r)   r   r*   r   r+   r   returnNone)Nr   )
r@   r   rA   r   rB   r}   rC   r}   r   dict[str, float])NNN)r@   r   rb   ztorch.Tensor | NonerA   r   r   r   )r@   r   ro   zstr | list[str] | np.ndarrayrp   r   rf   r   rg   r   r   ztorch.Tensor)r   r   )
r@   r   r\   dict[str, Any]rB   r}   rv   r}   r   r   )r   r   )__name__
__module____qualname____doc__r1   r;   rK   rc   rt   rR   rz   __classcell__)r3   s   @r4   r   r      s(   rr "'!d "t#0+8"e"'&*bf?C#'(,$()-"'-0
0
 0
 +	0

 0
 0
 0
 !0
  )0
 0
  0
 0
 0
 0
 $0
  `!0
" =#0
$ !%0
& &'0
( ")0
* '+0
,  -0
. 
/0
d
 ce"1;KN\_	B 15"&	
	
 /		

  	
 
	
 &*"&!   0  #	 
      
 F Z[\"\-;\DG\SV\	\
 r5   r   )
__future__r   loggingrU   collectionsr   typingr   r   r   torch sentence_transformers.evaluationr   sentence_transformers.utilr	   numpynp*sentence_transformers.similarity_functionsr
   2sentence_transformers.sparse_encoder.SparseEncoderr   	getLoggerr   rS   r   rE   r5   r4   <module>r      sO    "  	 # / /  J 9MP 
		8	$A*G Ar5   