diff --git a/haystack/retriever/dpr_utils.py b/haystack/retriever/dpr_utils.py
deleted file mode 100644
index 86a66949c..000000000
--- a/haystack/retriever/dpr_utils.py
+++ /dev/null
@@ -1,726 +0,0 @@
-#!/usr/bin/env python3
-# Utilility functions and classes required for DensePassageRetriever
-#
-# Building upon the code (https://github.com/facebookresearch/DPR) published by Facebook research under Creative Commons License (https://github.com/facebookresearch/DPR/blob/master/LICENSE)
-# It is based on the following research work:
-# Karpukhin, Vladimir, et al. "Dense Passage Retrieval for Open-Domain Question Answering." arXiv preprint arXiv:2004.04906 (2020).
-# (https://arxiv.org/abs/2004.04906)
-
-import logging
-from typing import Tuple, Union, List
-
-import gzip
-import re
-
-import torch
-from torch import nn, Tensor
-
-from transformers.modeling_bert import BertModel, BertConfig
-from transformers.file_utils import add_start_docstrings_to_callable
-from transformers.modeling_utils import PreTrainedModel
-from transformers.file_utils import add_start_docstrings
-from transformers.tokenization_bert import BertTokenizer, BertTokenizerFast
-
-import logging
-from dataclasses import dataclass
-from typing import Optional, Tuple, Union
-
-
-
-logger = logging.getLogger(__name__)
-logging.basicConfig(level=logging.INFO)
-
-_CONFIG_FOR_DOC = "DPRConfig"
-
-DPR_CONTEXT_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST = [
-    "facebook/dpr-ctx_encoder-single-nq-base",
-]
-DPR_QUESTION_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST = [
-    "facebook/dpr-question_encoder-single-nq-base",
-]
-DPR_READER_PRETRAINED_MODEL_ARCHIVE_LIST = [
-    "facebook/dpr-reader-single-nq-base",
-]
-# CLASSES
-############
-# file_utils
-############
-
-class ModelOutput:
-    """
-    Base class for all model outputs as dataclass. Has a ``__getitem__`` that allows preprocessor by integer or slice (like
-    a tuple) or strings (like a dictionnary) that will ignore the ``None`` attributes.
-    """
-
-    def to_tuple(self):
-        """
-        Converts :obj:`self` to a tuple.
-
-        Return: A tuple containing all non-:obj:`None` attributes of the :obj:`self`.
-        """
-        return tuple(getattr(self, f) for f in self.__dataclass_fields__.keys() if getattr(self, f, None) is not None)
-
-    def to_dict(self):
-        """
-        Converts :obj:`self` to a Python dictionary.
-
-        Return: A dictionary containing all non-:obj:`None` attributes of the :obj:`self`.
-        """
-        return {f: getattr(self, f) for f in self.__dataclass_fields__.keys() if getattr(self, f, None) is not None}
-
-    def __getitem__(self, i):
-        return self.to_dict()[i] if isinstance(i, str) else self.to_tuple()[i]
-
-    def __len__(self):
-        return len(self.to_tuple())
-
-RETURN_INTRODUCTION = r"""
-    Returns:
-        :class:`~{full_output_type}` or :obj:`tuple(torch.FloatTensor)` (if ``return_tuple=True`` is passed or when ``config.return_tuple=True``) comprising various elements depending on the configuration (:class:`~transformers.{config_class}`) and inputs:
-"""
-
-def _prepare_output_docstrings(output_type, config_class):
-    """
-    Prepares the return part of the docstring using `output_type`.
-    """
-    docstrings = output_type.__doc__
-
-    # Remove the head of the docstring to keep the list of args only
-    lines = docstrings.split("\n")
-    i = 0
-    while i < len(lines) and re.search(r"^\s*(Args|Parameters):\s*$", lines[i]) is None:
-        i += 1
-    if i < len(lines):
-        docstrings = "\n".join(lines[(i + 1) :])
-
-    # Add the return introduction
-    full_output_type = f"{output_type.__module__}.{output_type.__name__}"
-    intro = RETURN_INTRODUCTION.format(full_output_type=full_output_type, config_class=config_class)
-    return intro + docstrings
-
-def replace_return_docstrings(output_type=None, config_class=None):
-    def docstring_decorator(fn):
-        docstrings = fn.__doc__
-        lines = docstrings.split("\n")
-        i = 0
-        while i < len(lines) and re.search(r"^\s*Returns?:\s*$", lines[i]) is None:
-            i += 1
-        if i < len(lines):
-            lines[i] = _prepare_output_docstrings(output_type, config_class)
-            docstrings = "\n".join(lines)
-        else:
-            raise ValueError(
-                f"The function {fn} should have an empty 'Return:' or 'Returns:' in its docstring as placeholder, current docstring is:\n{docstrings}"
-            )
-        fn.__doc__ = docstrings
-        return fn
-
-    return docstring_decorator
-
-###########
-# modeling_outputs
-###########
-@dataclass
-class BaseModelOutputWithPooling(ModelOutput):
-    """
-    Base class for model's outputs that also contains a pooling of the last hidden states.
-
-    Args:
-        last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`):
-            Sequence of hidden-states at the output of the last layer of the model.
-        pooler_output (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, hidden_size)`):
-            Last layer hidden-state of the first token of the sequence (classification token)
-            further processed by a Linear layer and a Tanh activation function. The Linear
-            layer weights are trained from the next sentence prediction (classification)
-            objective during pretraining.
-
-            This output is usually *not* a good summary
-            of the semantic content of the input, you're often better with averaging or pooling
-            the sequence of hidden-states for the whole input sequence.
-        hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
-            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
-            of shape :obj:`(batch_size, sequence_length, hidden_size)`.
-
-            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
-        attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
-            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
-            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
-
-            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
-            heads.
-    """
-
-    last_hidden_state: torch.FloatTensor
-    pooler_output: torch.FloatTensor
-    hidden_states: Optional[Tuple[torch.FloatTensor]] = None
-    attentions: Optional[Tuple[torch.FloatTensor]] = None
-
-
-###########
-#tokenization_dpr
-###########
-
-VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
-
-CONTEXT_ENCODER_PRETRAINED_VOCAB_FILES_MAP = {
-    "vocab_file": {
-        "facebook/dpr-ctx_encoder-single-nq-base": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt",
-    }
-}
-QUESTION_ENCODER_PRETRAINED_VOCAB_FILES_MAP = {
-    "vocab_file": {
-        "facebook/dpr-question_encoder-single-nq-base": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt",
-    }
-}
-READER_PRETRAINED_VOCAB_FILES_MAP = {
-    "vocab_file": {
-        "facebook/dpr-reader-single-nq-base": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt",
-    }
-}
-
-CONTEXT_ENCODER_PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
-    "facebook/dpr-ctx_encoder-single-nq-base": 512,
-}
-QUESTION_ENCODER_PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
-    "facebook/dpr-question_encoder-single-nq-base": 512,
-}
-READER_PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
-    "facebook/dpr-reader-single-nq-base": 512,
-}
-
-
-CONTEXT_ENCODER_PRETRAINED_INIT_CONFIGURATION = {
-    "facebook/dpr-ctx_encoder-single-nq-base": {"do_lower_case": True},
-}
-QUESTION_ENCODER_PRETRAINED_INIT_CONFIGURATION = {
-    "facebook/dpr-question_encoder-single-nq-base": {"do_lower_case": True},
-}
-READER_PRETRAINED_INIT_CONFIGURATION = {
-    "facebook/dpr-reader-single-nq-base": {"do_lower_case": True},
-}
-
-
-class DPRContextEncoderTokenizer(BertTokenizer):
-    r"""
-    Constructs a  DPRContextEncoderTokenizer.
-
-    :class:`~transformers.DPRContextEncoderTokenizer` is identical to :class:`~transformers.BertTokenizer` and runs end-to-end
-    tokenization: punctuation splitting + wordpiece.
-
-    Refer to superclass :class:`~transformers.BertTokenizer` for usage examples and documentation concerning
-    parameters.
-    """
-
-    vocab_files_names = VOCAB_FILES_NAMES
-    pretrained_vocab_files_map = CONTEXT_ENCODER_PRETRAINED_VOCAB_FILES_MAP
-    max_model_input_sizes = CONTEXT_ENCODER_PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
-    pretrained_init_configuration = CONTEXT_ENCODER_PRETRAINED_INIT_CONFIGURATION
-
-
-class DPRContextEncoderTokenizerFast(BertTokenizerFast):
-    r"""
-    Constructs a  "Fast" DPRContextEncoderTokenizer (backed by HuggingFace's `tokenizers` library).
-
-    :class:`~transformers.DPRContextEncoderTokenizerFast` is identical to :class:`~transformers.BertTokenizerFast` and runs end-to-end
-    tokenization: punctuation splitting + wordpiece.
-
-    Refer to superclass :class:`~transformers.BertTokenizerFast` for usage examples and documentation concerning
-    parameters.
-    """
-
-    vocab_files_names = VOCAB_FILES_NAMES
-    pretrained_vocab_files_map = CONTEXT_ENCODER_PRETRAINED_VOCAB_FILES_MAP
-    max_model_input_sizes = CONTEXT_ENCODER_PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
-    pretrained_init_configuration = CONTEXT_ENCODER_PRETRAINED_INIT_CONFIGURATION
-
-
-class DPRQuestionEncoderTokenizer(BertTokenizer):
-    r"""
-    Constructs a  DPRQuestionEncoderTokenizer.
-
-    :class:`~transformers.DPRQuestionEncoderTokenizer` is identical to :class:`~transformers.BertTokenizer` and runs end-to-end
-    tokenization: punctuation splitting + wordpiece.
-
-    Refer to superclass :class:`~transformers.BertTokenizer` for usage examples and documentation concerning
-    parameters.
-    """
-
-    vocab_files_names = VOCAB_FILES_NAMES
-    pretrained_vocab_files_map = QUESTION_ENCODER_PRETRAINED_VOCAB_FILES_MAP
-    max_model_input_sizes = QUESTION_ENCODER_PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
-    pretrained_init_configuration = QUESTION_ENCODER_PRETRAINED_INIT_CONFIGURATION
-
-
-class DPRQuestionEncoderTokenizerFast(BertTokenizerFast):
-    r"""
-    Constructs a  "Fast" DPRQuestionEncoderTokenizer (backed by HuggingFace's `tokenizers` library).
-
-    :class:`~transformers.DPRQuestionEncoderTokenizerFast` is identical to :class:`~transformers.BertTokenizerFast` and runs end-to-end
-    tokenization: punctuation splitting + wordpiece.
-
-    Refer to superclass :class:`~transformers.BertTokenizerFast` for usage examples and documentation concerning
-    parameters.
-    """
-
-    vocab_files_names = VOCAB_FILES_NAMES
-    pretrained_vocab_files_map = QUESTION_ENCODER_PRETRAINED_VOCAB_FILES_MAP
-    max_model_input_sizes = QUESTION_ENCODER_PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
-    pretrained_init_configuration = QUESTION_ENCODER_PRETRAINED_INIT_CONFIGURATION
-
-
-
-
-##########
-# configuration_dpr
-##########
-
-
-DPR_PRETRAINED_CONFIG_ARCHIVE_MAP = {
-    "facebook/dpr-ctx_encoder-single-nq-base": "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/dpr-ctx_encoder-single-nq-base/config.json",
-    "facebook/dpr-question_encoder-single-nq-base": "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/dpr-question_encoder-single-nq-base/config.json",
-    "facebook/dpr-reader-single-nq-base": "https://s3.amazonaws.com/models.huggingface.co/bert/facebook/dpr-reader-single-nq-base/config.json",
-}
-
-
-class DPRConfig(BertConfig):
-    r"""
-        :class:`~transformers.DPRConfig` is the configuration class to store the configuration of a
-        `DPRModel`.
-
-        This is the configuration class to store the configuration of a `DPRContextEncoder`, `DPRQuestionEncoder`, or a `DPRReader`.
-        It is used to instantiate the components of the DPR model.
-
-        Args:
-            projection_dim (:obj:`int`, optional, defaults to 0):
-                Dimension of the projection for the context and question encoders.
-                If it is set to zero (default), then no projection is done.
-    """
-    model_type = "dpr"
-
-    def __init__(self, projection_dim: int = 0, **kwargs):  # projection of the encoders, 0 for no projection
-        super().__init__(**kwargs)
-        self.projection_dim = projection_dim
-
-##########
-# Outputs
-##########
-
-
-@dataclass
-class DPRContextEncoderOutput(ModelOutput):
-    """
-    Class for outputs of :class:`~transformers.DPRQuestionEncoder`.
-
-    Args:
-        pooler_output: (:obj:``torch.FloatTensor`` of shape ``(batch_size, embeddings_size)``):
-            The DPR encoder outputs the `pooler_output` that corresponds to the context representation.
-            Last layer hidden-state of the first token of the sequence (classification token)
-            further processed by a Linear layer. This output is to be used to embed contexts for
-            nearest neighbors queries with questions embeddings.
-        hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
-            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
-            of shape :obj:`(batch_size, sequence_length, hidden_size)`.
-
-            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
-        attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
-            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
-            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
-
-            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
-            heads.
-    """
-
-    pooler_output: torch.FloatTensor
-    hidden_states: Optional[Tuple[torch.FloatTensor]] = None
-    attentions: Optional[Tuple[torch.FloatTensor]] = None
-
-@dataclass
-class DPRQuestionEncoderOutput(ModelOutput):
-    """
-    Class for outputs of :class:`~transformers.DPRQuestionEncoder`.
-
-    Args:
-        pooler_output: (:obj:``torch.FloatTensor`` of shape ``(batch_size, embeddings_size)``):
-            The DPR encoder outputs the `pooler_output` that corresponds to the question representation.
-            Last layer hidden-state of the first token of the sequence (classification token)
-            further processed by a Linear layer. This output is to be used to embed questions for
-            nearest neighbors queries with context embeddings.
-        hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
-            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
-            of shape :obj:`(batch_size, sequence_length, hidden_size)`.
-
-            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
-        attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
-            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
-            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
-
-            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
-            heads.
-    """
-
-    pooler_output: torch.FloatTensor
-    hidden_states: Optional[Tuple[torch.FloatTensor]] = None
-    attentions: Optional[Tuple[torch.FloatTensor]] = None
-
-
-@dataclass
-class DPRReaderOutput(ModelOutput):
-    """
-    Class for outputs of :class:`~transformers.DPRQuestionEncoder`.
-
-    Args:
-        start_logits: (:obj:``torch.FloatTensor`` of shape ``(n_passages, sequence_length)``):
-            Logits of the start index of the span for each passage.
-        end_logits: (:obj:``torch.FloatTensor`` of shape ``(n_passages, sequence_length)``):
-            Logits of the end index of the span for each passage.
-        relevance_logits: (:obj:`torch.FloatTensor`` of shape ``(n_passages, )``):
-            Outputs of the QA classifier of the DPRReader that corresponds to the scores of each passage
-            to answer the question, compared to all the other passages.
-        hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
-            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
-            of shape :obj:`(batch_size, sequence_length, hidden_size)`.
-
-            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
-        attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
-            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
-            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
-
-            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
-            heads.
-    """
-
-    start_logits: torch.FloatTensor
-    end_logits: torch.FloatTensor
-    relevance_logits: torch.FloatTensor
-    hidden_states: Optional[Tuple[torch.FloatTensor]] = None
-    attentions: Optional[Tuple[torch.FloatTensor]] = None
-
-##################
-# PreTrainedModel
-##################
-
-class DPREncoder(PreTrainedModel):
-
-    base_model_prefix = "bert_model"
-
-    def __init__(self, config: DPRConfig):
-        super().__init__(config)
-        self.bert_model = BertModel(config)
-        assert self.bert_model.config.hidden_size > 0, "Encoder hidden_size can't be zero"
-        self.projection_dim = config.projection_dim
-        if self.projection_dim > 0:
-            self.encode_proj = nn.Linear(self.bert_model.config.hidden_size, config.projection_dim)
-        self.init_weights()
-
-    def forward(
-        self,
-        input_ids: Tensor,
-        attention_mask: Optional[Tensor] = None,
-        token_type_ids: Optional[Tensor] = None,
-        inputs_embeds: Optional[Tensor] = None,
-        output_attentions: bool = False,
-        output_hidden_states: bool = False,
-        return_tuple: bool = True,
-    ) -> Union[BaseModelOutputWithPooling, Tuple[Tensor, ...]]:
-        outputs = self.bert_model(
-            input_ids=input_ids,
-            attention_mask=attention_mask,
-            token_type_ids=token_type_ids,
-            inputs_embeds=inputs_embeds,
-            output_attentions=output_attentions,
-            output_hidden_states=output_hidden_states,
-        )
-        sequence_output, pooled_output = outputs[:2]
-        pooled_output = sequence_output[:, 0, :]
-        if self.projection_dim > 0:
-            pooled_output = self.encode_proj(pooled_output)
-
-        if return_tuple:
-            return (sequence_output, pooled_output) + outputs[2:]
-
-        return BaseModelOutputWithPooling(
-            last_hidden_state=sequence_output,
-            pooler_output=pooled_output,
-            hidden_states=outputs.hidden_states,
-            attentions=outputs.attentions,
-        )
-
-    @property
-    def embeddings_size(self) -> int:
-        if self.projection_dim > 0:
-            return self.encode_proj.out_features
-        return self.bert_model.config.hidden_size
-
-    def init_weights(self):
-        self.bert_model.init_weights()
-        if self.projection_dim > 0:
-            self.encode_proj.apply(self.bert_model._init_weights)
-
-
-class DPRPretrainedContextEncoder(PreTrainedModel):
-    """ An abstract class to handle weights initialization and
-        a simple interface for downloading and loading pretrained models.
-    """
-
-    config_class = DPRConfig
-    load_tf_weights = None
-    base_model_prefix = "ctx_encoder"
-
-    def init_weights(self):
-        self.ctx_encoder.init_weights()
-
-
-class DPRPretrainedQuestionEncoder(PreTrainedModel):
-    """ An abstract class to handle weights initialization and
-        a simple interface for downloading and loading pretrained models.
-    """
-
-    config_class = DPRConfig
-    load_tf_weights = None
-    base_model_prefix = "question_encoder"
-
-    def init_weights(self):
-        self.question_encoder.init_weights()
-
-
-DPR_START_DOCSTRING = r"""
-
-    This model is a PyTorch `torch.nn.Module <https://pytorch.org/docs/stable/nn.html#torch.nn.Module>`_ sub-class.
-    Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general
-    usage and behavior.
-
-    Parameters:
-        config (:class:`~transformers.DPRConfig`): Model configuration class with all the parameters of the model.
-            Initializing with a config file does not load the weights associated with the model, only the configuration.
-            Check out the :meth:`~transformers.PreTrainedModel.from_pretrained` method to load the model weights.
-"""
-
-
-DPR_ENCODERS_INPUTS_DOCSTRING = r"""
-    Args:
-        input_ids: (:obj:``torch.LongTensor`` of shape ``(batch_size, sequence_length)``):
-            Indices of input sequence tokens in the vocabulary.
-            To match pre-training, DPR input sequence should be formatted with [CLS] and [SEP] tokens as follows:
-
-            (a) For sequence pairs (for a pair title+text for example):
-
-                ``tokens:         [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP]``
-
-                ``token_type_ids:   0   0  0    0    0     0       0   0   1  1  1  1   1   1``
-
-            (b) For single sequences (for a question for example):
-
-                ``tokens:         [CLS] the dog is hairy . [SEP]``
-
-                ``token_type_ids:   0   0   0   0  0     0   0``
-
-            DPR is a model with absolute position embeddings so it's usually advised to pad the inputs on
-            the right rather than the left.
-
-            Indices can be obtained using :class:`transformers.DPRTokenizer`.
-            See :func:`transformers.PreTrainedTokenizer.encode` and
-            :func:`transformers.PreTrainedTokenizer.convert_tokens_to_ids` for details.
-        attention_mask: (:obj:``torch.FloatTensor`` of shape ``(batch_size, sequence_length)``, `optional`, defaults to :obj:`None`):
-            Mask to avoid performing attention on padding token indices.
-            Mask values selected in ``[0, 1]``:
-            ``1`` for tokens that are NOT MASKED, ``0`` for MASKED tokens.
-        token_type_ids: (:obj:``torch.LongTensor`` of shape ``(batch_size, sequence_length)``, `optional`, defaults to :obj:`None`):
-            Segment token indices to indicate first and second portions of the inputs.
-            Indices are selected in ``[0, 1]``: ``0`` corresponds to a `sentence A` token, ``1``
-            corresponds to a `sentence B` token
-        inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`, defaults to :obj:`None`):
-            Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation.
-            This is useful if you want more control over how to convert `input_ids` indices into associated vectors
-        output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`):
-            If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail.
-        output_hidden_states (:obj:`bool`, `optional`, defaults to :obj:`None`):
-            If set to ``True``, the hidden states tensors of all layers are returned. See ``hidden_states`` under returned tensors for more detail.
-"""
-
-
-@add_start_docstrings(
-    "The bare DPRContextEncoder transformer outputting pooler outputs as context representations.",
-    DPR_START_DOCSTRING,
-)
-class DPRContextEncoder(DPRPretrainedContextEncoder):
-    def __init__(self, config: DPRConfig):
-        super().__init__(config)
-        self.config = config
-        self.ctx_encoder = DPREncoder(config)
-        self.init_weights()
-
-    @add_start_docstrings_to_callable(DPR_ENCODERS_INPUTS_DOCSTRING)
-    @replace_return_docstrings(output_type=DPRContextEncoderOutput, config_class=_CONFIG_FOR_DOC)
-    def forward(
-        self,
-        input_ids: Optional[Tensor] = None,
-        attention_mask: Optional[Tensor] = None,
-        token_type_ids: Optional[Tensor] = None,
-        inputs_embeds: Optional[Tensor] = None,
-        output_attentions=None,
-        output_hidden_states=None,
-        return_tuple=True,
-    ) -> Union[DPRContextEncoderOutput, Tuple[Tensor, ...]]:
-        r"""
-    Return:
-
-    Examples::
-
-        from transformers import DPRContextEncoder, DPRContextEncoderTokenizer
-        tokenizer = DPRContextEncoderTokenizer.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')
-        model = DPRContextEncoder.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base')
-        input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='pt')["input_ids"]
-        embeddings = model(input_ids)[0]  # the embeddings of the given context.
-
-        """
-
-        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
-        output_hidden_states = (
-            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
-        )
-        return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple
-
-        if input_ids is not None and inputs_embeds is not None:
-            raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
-        elif input_ids is not None:
-            input_shape = input_ids.size()
-            device = input_ids.device
-        elif inputs_embeds is not None:
-            input_shape = torch.Size(inputs_embeds.size()[:-1])
-            device = inputs_embeds.device
-        else:
-            raise ValueError("You have to specify either input_ids or inputs_embeds")
-
-        if attention_mask is None:
-            attention_mask = (
-                torch.ones(input_shape, device=device)
-                if input_ids is None
-                else (input_ids != self.config.pad_token_id)
-            )
-        if token_type_ids is None:
-            token_type_ids = torch.zeros(input_shape, dtype=torch.long, device=device)
-
-        outputs = self.ctx_encoder(
-            input_ids=input_ids,
-            attention_mask=attention_mask,
-            token_type_ids=token_type_ids,
-            inputs_embeds=inputs_embeds,
-            output_attentions=output_attentions,
-            output_hidden_states=output_hidden_states,
-        )
-
-        if return_tuple:
-            return outputs[1:]
-        return DPRContextEncoderOutput(
-            pooler_output=outputs.pooler_output, hidden_states=outputs.hidden_states, attentions=outputs.attentions
-        )
-
-
-@add_start_docstrings(
-    "The bare DPRQuestionEncoder transformer outputting pooler outputs as question representations.",
-    DPR_START_DOCSTRING,
-)
-class DPRQuestionEncoder(DPRPretrainedQuestionEncoder):
-    def __init__(self, config: DPRConfig):
-        super().__init__(config)
-        self.config = config
-        self.question_encoder = DPREncoder(config)
-        self.init_weights()
-
-    @add_start_docstrings_to_callable(DPR_ENCODERS_INPUTS_DOCSTRING)
-    @replace_return_docstrings(output_type=DPRQuestionEncoderOutput, config_class=_CONFIG_FOR_DOC)
-    def forward(
-        self,
-        input_ids: Optional[Tensor] = None,
-        attention_mask: Optional[Tensor] = None,
-        token_type_ids: Optional[Tensor] = None,
-        inputs_embeds: Optional[Tensor] = None,
-        output_attentions=None,
-        output_hidden_states=None,
-        return_tuple=True,
-    ) -> Union[DPRQuestionEncoderOutput, Tuple[Tensor, ...]]:
-        r"""
-    Return:
-
-    Examples::
-
-        from transformers import DPRQuestionEncoder, DPRQuestionEncoderTokenizer
-        tokenizer = DPRQuestionEncoderTokenizer.from_pretrained('facebook/dpr-question_encoder-single-nq-base')
-        model = DPRQuestionEncoder.from_pretrained('facebook/dpr-question_encoder-single-nq-base')
-        input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='pt')["input_ids"]
-        embeddings = model(input_ids)[0]  # the embeddings of the given question.
-        """
-        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
-        output_hidden_states = (
-            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
-        )
-        return_tuple = return_tuple if return_tuple is not None else self.config.use_return_tuple
-
-        if input_ids is not None and inputs_embeds is not None:
-            raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
-        elif input_ids is not None:
-            input_shape = input_ids.size()
-            device = input_ids.device
-        elif inputs_embeds is not None:
-            input_shape = torch.Size(inputs_embeds.size()[:-1])
-            device = inputs_embeds.device
-        else:
-            raise ValueError("You have to specify either input_ids or inputs_embeds")
-
-        if attention_mask is None:
-            attention_mask = (
-                torch.ones(input_shape, device=device)
-                if input_ids is None
-                else (input_ids != self.config.pad_token_id)
-            )
-        if token_type_ids is None:
-            token_type_ids = torch.zeros(input_shape, dtype=torch.long, device=device)
-
-        outputs = self.question_encoder(
-            input_ids=input_ids,
-            attention_mask=attention_mask,
-            token_type_ids=token_type_ids,
-            inputs_embeds=inputs_embeds,
-            output_attentions=output_attentions,
-            output_hidden_states=output_hidden_states,
-            return_tuple=return_tuple,
-        )
-
-        if return_tuple:
-            return outputs[1:]
-        return DPRQuestionEncoderOutput(
-            pooler_output=outputs.pooler_output, hidden_states=outputs.hidden_states, attentions=outputs.attentions
-        )
-
-
-# UTILS
-def move_to_device(sample, device):
-    if len(sample) == 0:
-        return {}
-
-    def _move_to_device(maybe_tensor, device):
-        if torch.is_tensor(maybe_tensor):
-            return maybe_tensor.to(device)
-        elif isinstance(maybe_tensor, dict):
-            return {
-                key: _move_to_device(value, device)
-                for key, value in maybe_tensor.items()
-            }
-        elif isinstance(maybe_tensor, list):
-            return [_move_to_device(x, device) for x in maybe_tensor]
-        elif isinstance(maybe_tensor, tuple):
-            return [_move_to_device(x, device) for x in maybe_tensor]
-        else:
-            return maybe_tensor
-
-    return _move_to_device(sample, device)
-
-def unpack(gzip_file: str, out_file: str):
-    print('Uncompressing ', gzip_file)
-    input = gzip.GzipFile(gzip_file, 'rb')
-    s = input.read()
-    input.close()
-    output = open(out_file, 'wb')
-    output.write(s)
-    output.close()
-    print('Saved to ', out_file)
\ No newline at end of file