diff --git a/pdelfin/train/config/qwen2vl-7b-lora.yaml b/pdelfin/train/config/qwen2vl-7b-lora.yaml
index 87a24b7..38d3a09 100644
--- a/pdelfin/train/config/qwen2vl-7b-lora.yaml
+++ b/pdelfin/train/config/qwen2vl-7b-lora.yaml
@@ -7,43 +7,33 @@ wandb:
   project: pdelfin
   entity: ai2-llm
 
-# TODO This is not used
-format:
-  instruction_template: "Original:"
-  response_template: "Rewritten:"
-  # Template from here: https://github.com/QwenLM/Qwen2/blob/main/examples/sft/finetune.py#L30
-  chat_template: |
-    {% for message in messages %}
-      {{'<|im_start|>' + message['role'] + '\n' + message['content']}}
-      {% if loop.last %}
-        {{ '<|im_end|>'}}
-      {% else %}
-        {{ '<|im_end|>\n' }}
-      {% endif %}
-    {% endfor %}
-
 generate:
-  max_length: 4096
+  max_length: 8192
 
 train_data:
   seed: 1337
   sources:
-    # These tend to be really big, so it's only practical to host them as parquets on weka, otherwise you may OOM or just never finish dataloading
     - name: openai_batch_data_v5_1_train
-      parquet_path: /data/jakep/pdfdata/openai_batch_data_v5_1_parquet/*.parquet
-    - name: openai_batch_data_v5_1_train
-      parquet_path: /data/jakep/pdfdata/openai_batch_data_v5_1_iabooks_parquet/*.parquet
+      response_glob_path: /data/jakep/pdfdata/openai_batch_data_v5_1_train_done/*.json
+      target_longest_image_dim: 1024
+      target_anchor_text_len: 6000
+    - name: openai_batch_data_v5_1_iabooks_train
+      response_glob_path: /data/jakep/pdfdata/openai_batch_data_v5_1_iabooks_train_done/*.json
+      target_longest_image_dim: 1024
+      target_anchor_text_len: 6000
 
 valid_data:
   metric_for_best_model: openai_batch_data_v5_1_eval_loss
   sources:
     # These tend to be small, so you can load from s3 it's no big deal
     - name: openai_batch_data_v5_1_eval
-      query_glob_path: s3://ai2-oe-data/jakep/pdfdata/openai_batch_data_v5_1_eval/*.jsonl
       response_glob_path: s3://ai2-oe-data/jakep/pdfdata/openai_batch_done_v5_1_eval/*.json
+      target_longest_image_dim: 1024
+      target_anchor_text_len: 6000
     - name: openai_batch_data_v5_1_iabooks_eval
-      query_glob_path: s3://ai2-oe-data/jakep/pdfdata/openai_batch_data_v5_1_iabooks_eval/*.jsonl
       response_glob_path: s3://ai2-oe-data/jakep/pdfdata/openai_batch_done_v5_1_iabooks_eval/*.json
+      target_longest_image_dim: 1024
+      target_anchor_text_len: 6000
 
 
 
diff --git a/pdelfin/train/core/config.py b/pdelfin/train/core/config.py
index 6e677fc..c3389ae 100644
--- a/pdelfin/train/core/config.py
+++ b/pdelfin/train/core/config.py
@@ -22,28 +22,6 @@ class ModelConfig:
     model_revision: Optional[str] = field(help="The model revision to use for the model.", default=None)
 
 
-@dataclass
-class FormatConfig:
-    """Configuration for formatting the text that is input to the model."""
-
-    new_line_symbol: str = field(
-        help="The symbol to use for new lines in the text; default is '\\n'.",
-        default="\n",
-    )
-    system_message: Optional[str] = field(
-        help="The system message to use for formatting the text; default is no system message.",
-        default=None,
-    )
-    instruction_template: str = field(
-        help="The template to use for formatting the input text", default="Original:"
-    )
-    response_template: str = field(help="The template to use for formatting the output text", default="Rewrite:")
-    chat_template: Optional[str] = field(
-        help="The template to use for formatting the chat text. If None, the default chat template will be used.",
-        default=None,
-    )
-
-
 @dataclass
 class GenerateConfig:
     max_length: int = field(help="The maximum length of the generated text", default=4096)
@@ -75,9 +53,9 @@ class AwsConfig:
 @dataclass
 class SourceConfig:
     name: str = field(help="The name of the source")
-    parquet_path: Optional[str] = field(help="The s3/glob path to a bunch of parquet files for a preprocessed dataset.", default=None)
-    query_glob_path: Optional[str] = field(help="The s3 bucket pointing to the inputs sent to OpenAI to generate the silver data", default=None)
-    response_glob_path: Optional[str] = field(help="The s3 bucket pointing to the batch api response json's sent back from open ai", default=None)
+    response_glob_path: str = field(help="The s3 bucket pointing to the batch api response json's sent back from open ai")
+    target_longest_image_dim: int = field(help="Dimensions to render the pdf page image to")
+    target_anchor_text_len: int = field(help="Maximum amount of anchor text (aka prompt hint)")
 
 
 @dataclass
@@ -141,7 +119,6 @@ class TrainConfig:
     lora: Optional[LoraConfig] = field(default=None, help="The LoRA configuration")
     aws: AwsConfig = field(default=AwsConfig(), help="Configuration for AWS S3")
     wandb: WandbConfig = field(default=WandbConfig(), help="Configuration for Weights and Biases")
-    format: FormatConfig = field(default=FormatConfig(), help="Configuration for formatting the input/output text")
     train_data: DataConfig = field(default=DataConfig(), help="Configuration for the training data")
     valid_data: DataConfig = field(default=DataConfig(), help="Configuration for the validation data")
     generate: GenerateConfig = field(default=GenerateConfig(), help="Configuration for text generation")
@@ -158,5 +135,4 @@ class DemoConfig:
     share: bool = field(default=False, help="Share the demo publicly.")
 
     model: ModelConfig = field(default=ModelConfig())
-    format: FormatConfig = field(default=FormatConfig())
     generate: GenerateConfig = field(default=GenerateConfig())