Compare revisions

Johann-Ludwig Herzog · Johann-Ludwig Herzog · 06f451f0
--- a/llava/train/train.py
+++ b/llava/train/train.py
@@ -831,6 +831,11 @@ class LazySupervisedDataset(Dataset):
            else:
                image = processor.preprocess(image, return_tensors='pt', input_data_format='channels_first')['pixel_values'][0] # add channels_first because it can only infer channels for 1 / 3 bands
            
+            if self.image_size[0] == 14:
+                # atm image has 12 bands [B01, B02, B03, B04, B05, B06, B07, B08, B8A, B09, B11, B12] B10 is not available in BEN
+                # add B10 as zeros between B09 and B11
+                image = torch.cat([image[:9], torch.zeros(1, self.image_size[-1], self.image_size[-1]), image[9:]], dim=0)
+
            # move <image> (DEFAULT_IMAGE_TOKEN) to beginning of each question/instruction
            # [{'id': '004539375', 'image': '00453/004539375.jpg', 'conversations': [{'from': 'human', 'value': 'Render a clear and concise summary of the photo.\n<image>'}, {'from': 'gpt', 'value': 'select luxury furniture 3 - inch gel memory foam mattress topper'}]}]
            # [[{'from': 'human', 'value': '<image>\nRender a clear and concise summary of the photo.'}, {'from': 'gpt', 'value': 'select luxury furniture 3 - inch gel memory foam mattress topper'}]]
No results found