@@ -831,6 +831,11 @@ class LazySupervisedDataset(Dataset):
else:
image=processor.preprocess(image,return_tensors='pt',input_data_format='channels_first')['pixel_values'][0]# add channels_first because it can only infer channels for 1 / 3 bands
ifself.image_size[0]==14:
# atm image has 12 bands [B01, B02, B03, B04, B05, B06, B07, B08, B8A, B09, B11, B12] B10 is not available in BEN
# move <image> (DEFAULT_IMAGE_TOKEN) to beginning of each question/instruction
# [{'id': '004539375', 'image': '00453/004539375.jpg', 'conversations': [{'from': 'human', 'value': 'Render a clear and concise summary of the photo.\n<image>'}, {'from': 'gpt', 'value': 'select luxury furniture 3 - inch gel memory foam mattress topper'}]}]
# [[{'from': 'human', 'value': '<image>\nRender a clear and concise summary of the photo.'}, {'from': 'gpt', 'value': 'select luxury furniture 3 - inch gel memory foam mattress topper'}]]