From 8a4fa028bfe914cf000c7aca79f49c75b8df51e2 Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Fri, 21 Feb 2025 14:23:02 -0800 Subject: [PATCH 01/23] Update README.md --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 233024f..76c466c 100644 --- a/README.md +++ b/README.md @@ -1 +1,4 @@ -# BiasICL \ No newline at end of file +# BiasICL + + +The name of this repository is BiasICL. \ No newline at end of file From fabe21e95d940532cc36ce92514b8151880aae7c Mon Sep 17 00:00:00 2001 From: Sonnet Xu Date: Tue, 25 Feb 2025 13:28:27 -0800 Subject: [PATCH 02/23] add code --- LMM.py | 391 ++++++++++++++++++++++ chexpert_SexBinary_PTX_final_demo_df.csv | 401 +++++++++++++++++++++++ chexpert_SexBinary_PTX_final_test_df.csv | 101 ++++++ ddi_attribute_pred.py | 240 ++++++++++++++ ddi_demo_metadata.csv | 312 ++++++++++++++++++ ddi_test_metadata.csv | 105 ++++++ run_chexpert.py | 305 +++++++++++++++++ run_chexpert_pred_attribute.py | 167 ++++++++++ run_ddi.py | 290 ++++++++++++++++ 9 files changed, 2312 insertions(+) create mode 100644 LMM.py create mode 100644 chexpert_SexBinary_PTX_final_demo_df.csv create mode 100644 chexpert_SexBinary_PTX_final_test_df.csv create mode 100644 ddi_attribute_pred.py create mode 100644 ddi_demo_metadata.csv create mode 100644 ddi_test_metadata.csv create mode 100644 run_chexpert.py create mode 100644 run_chexpert_pred_attribute.py create mode 100644 run_ddi.py diff --git a/LMM.py b/LMM.py new file mode 100644 index 0000000..ebdfc83 --- /dev/null +++ b/LMM.py @@ -0,0 +1,391 @@ +import base64 +import time +import pickle +import os +import uuid +import pandas as pd +from tqdm import tqdm +import traceback +import random +from PIL import Image +from dotenv import load_dotenv +from io import BytesIO +import tenacity +import base64 + +# Load environment variables +load_dotenv() + +from openai import OpenAI +import google.generativeai as genai +genai.configure(api_key=os.getenv('GOOGLE_API_KEY')) +import anthropic + +class ClaudeAPI: + def __init__( + self, + model="claude-3-5-sonnet-20241022", + img_token="<>", + seed=66, + temperature=0, + detail="auto", + ): + """ + Class for API calls to Anthropic-series models + + model[str]: the specific model checkpoint to use + img_token[str]: string to be replaced with images + seed[int]: seed for generation + temperature[int]: temperature for generation + detail[str]: resolution for images. Should be in ['low', 'high', 'auto']. + """ + + self.model = model + self.img_token = img_token + self.seed = seed + self.temperature = temperature + self.detail = detail + self.client = anthropic.Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY')) + self.token_usage = (0, 0, 0) + self.response_times = [] + # self.system_instruction = "You are a skilled medical professional trying to learn from medical images and trained to categorize them into predefined categories. Try your best and respond to all questions." + self.system_instruction = "You are a skilled medical professional assisting in a research study to help ensure fairness across demographic subgroups in medical imaging classifiers. Because you are working to increase fairness and decrease bias, it is important to attempt to make any demographic group-related classifications" + + def generate_image_url(self, image_path, detail="low"): + # Function to encode the image + def encode_image(image_path): + if str(image_path).lower().endswith("tif"): + with Image.open(image_path) as img: + img.convert("RGB").save("temp.jpeg", "JPEG") + image_path = "temp.jpeg" + + with Image.open(image_path) as img: + # Resize if needed + if img.size[0] > 512 or img.size[1] > 512: + img = img.resize((512, 512)) + # Convert to RGB if needed + if img.mode != 'RGB': + img = img.convert('RGB') + + # Save to bytes buffer + buffer = BytesIO() + img.save(buffer, format="JPEG") + return base64.b64encode(buffer.getvalue()).decode('utf-8') + + return encode_image(image_path) + + def generate_text_url(self, text): + return {"type": "text", "text": text} + + def __call__( + self, + prompt, + image_paths=[], + real_call=True, + count_time=False, + max_tokens=50, + content_only=True, + temperature = 0 + ): + """ + Call the API to get the response for given prompt and images + """ + if not isinstance(image_paths, list): # For single file + image_paths = [image_paths] + prompt = prompt.split(self.img_token) + assert len(prompt) == len(image_paths) + 1 + + content = [] + if prompt[0].strip() != "": + content.append({ + "type": "text", + "text": prompt[0], + }) + + for idx in range(1, len(prompt)): + # Add image + content.append({ + "type": "image", + "source": { + "type": "base64", + "media_type": "image/jpeg", + "data": self.generate_image_url(image_paths[idx - 1], detail=self.detail) + } + }) + + # Add text if provided + if prompt[idx].strip() != "": + content.append({ + "type": "text", + "text": prompt[idx] + }) + + # Create the messages structure + messages = [ + { + "role": "user", + "content": content + } + ] + + start_time = time.time() + while True: + try: + response = self.client.messages.create( + model=self.model, + messages=messages, + max_tokens=min(4096, max_tokens), + system=self.system_instruction, + temperature=temperature + ) + break + except anthropic.RateLimitError as e: + print(str(e)) + if "rate limit" in str(e) or 'overloaded_error' in str(e): + print('Rate limit exceeded... waiting 10 seconds') + time.sleep(10) + else: + raise + + end_time = time.time() + self.response_times.append(end_time - start_time) + + results = [prompt, image_paths, response, end_time - start_time] + + # Update token usage with defaults if not available + if hasattr(response, 'usage'): + completion_tokens = getattr(response.usage, 'completion_tokens', 0) + prompt_tokens = getattr(response.usage, 'prompt_tokens', 0) + total_tokens = getattr(response.usage, 'total_tokens', 0) + else: + completion_tokens = 0 + prompt_tokens = 0 + total_tokens = 0 + + self.token_usage = ( + self.token_usage[0] + completion_tokens, + self.token_usage[1] + prompt_tokens, + self.token_usage[2] + total_tokens + ) + + if content_only: + return response.content[0].text + else: + return response + + +class GPT4VAPI: + def __init__( + self, + model="gpt-4o", + img_token="<>", + seed=66, + temperature=0, + detail="auto", + ): + """ + Class for API calls to GPT-series models + + model[str]: the specific model checkpoint to use e.g. "gpt-4o" + img_token[str]: string to be replaced with images + seed[int]: seed for generation + temperature[int]: temperature for generation + detail[str]: resolution for images. Should be in ['low', 'high', 'auto']. + """ + + self.model = model + self.img_token = img_token + self.seed = seed + self.temperature = temperature + self.detail = detail + self.client = OpenAI(api_key=os.getenv('OPENAI_API_KEY')) + self.token_usage = (0, 0, 0) + self.response_times = [] + + def generate_image_url(self, image_path, detail="low"): + # Given an image_path, return a dict + # Function to encode the image + def encode_image(image_path): + if str(image_path).lower().endswith("tif"): + with Image.open(image_path) as img: + img.convert("RGB").save("temp.jpeg", "JPEG") + image_path = "temp.jpeg" + + # Open the image using Pillow + with Image.open(image_path) as img: + # Resize if needed + if img.size[0] > 512 or img.size[1] > 512: + img = img.resize((512, 512)) + + # Save the image to a temporary buffer + with BytesIO() as buffer: + img.convert("RGB").save(buffer, format="JPEG") + encoded_string = base64.b64encode(buffer.getvalue()).decode("utf-8") + + return encoded_string + + return { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64, {encode_image(image_path)}", + "detail": detail, + }, + } + + def generate_text_url(self, text): + return {"type": "text", "text": text} + + def __call__( + self, + prompt, + image_paths=[], + real_call=True, + count_time=False, + max_tokens=50, + content_only=True, + temperature = 0 + ): + """ + Call the API to get the response for given prompt and images + """ + if not isinstance(image_paths, list): # For single file + image_paths = [image_paths] + prompt = prompt.split(self.img_token) + assert len(prompt) == len(image_paths) + 1 + if prompt[0] != "": + messages = [self.generate_text_url(prompt[0])] + else: + messages = [] + for idx in range(1, len(prompt)): + messages.append( + self.generate_image_url(image_paths[idx - 1], detail=self.detail) + ) + if prompt[idx].strip() != "": + messages.append(self.generate_text_url(prompt[idx])) + if not real_call: + return messages + start_time = time.time() + response = self.client.chat.completions.create( + model=self.model, + messages=[{"role": "user", "content": messages}], + # max_tokens=min(4096, max_tokens), + temperature=0, + seed=self.seed, + ) + + end_time = time.time() + self.response_times.append(end_time - start_time) + + results = [prompt, image_paths, response, end_time - start_time] + + self.token_usage = ( + self.token_usage[0] + response.usage.completion_tokens, + self.token_usage[1] + response.usage.prompt_tokens, + self.token_usage[2] + response.usage.total_tokens, + ) + + if content_only: + return response.choices[0].message.content + else: + return response + + +class GeminiAPI: + def __init__( + self, + model="gemini-1.5-flash", + img_token="<>", + RPM=5, + temperature=0, + system_instruction="You are a smart and helpful assistant" + ): + """ + Class for API calls to Gemini-series models + + model[str]: the specific model checkpoint to use e.g. "gemini-1.5-pro-preview-0409" + img_token[str]: string to be replaced with images + RPM[int]: quota for maximum number of requests per minute + temperature[int]: temperature for generation + system_instruction[str]: System prompt for model e.g. "You are an expert dermatologist" + """ + self.model = model + self.img_token = img_token + self.temperature = temperature + self.client = genai.GenerativeModel(model_name=self.model, system_instruction=system_instruction) + + self.safety_settings = [ + {"category": "HARM_CATEGORY_DANGEROUS", "threshold": "BLOCK_NONE"}, + {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"}, + {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"}, + {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"}, + {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"}, + ] + self.token_usage = (0, 0, 0) + + self.response_times = [] + self.last_time = None + self.interval = 0.5 + 60 / RPM + + def __call__( + self, prompt, image_paths=[], real_call=True, max_tokens=50, content_only=True + ): + """ + Call the API to get the response for given prompt and images + """ + + if self.last_time is not None: # Enforce RPM + # Calculate how much time the loop took + end_time = time.time() + elapsed_time = end_time - self.last_time + # Wait for the remainder of the interval, if necessary + if elapsed_time < self.interval: + time.sleep(self.interval - elapsed_time) + + if not isinstance(image_paths, list): # For single file + image_paths = [image_paths] + prompt = prompt.split(self.img_token) + assert len(prompt) == len(image_paths) + 1 + if prompt[0] != "": + messages = [prompt[0]] + else: + messages = [] + for idx in range(1, len(prompt)): + img = Image.open(image_paths[idx - 1]) + if img.size[0] > 512 or img.size[1] > 512: + img = img.resize((512, 512)) + messages.append(img) + if prompt[idx].strip() != "": + messages.append(prompt[idx]) + if not real_call: + return messages + + start_time = time.time() + self.last_time = start_time + responses = self.client.generate_content( + messages, + ) + end_time = time.time() + self.response_times.append(end_time - start_time) + + results = [prompt, image_paths, responses, end_time - start_time] + + try: + usage = responses._raw_response.usage_metadata + self.token_usage = ( + self.token_usage[0] + usage.candidates_token_count, + self.token_usage[1] + usage.prompt_token_count, + self.token_usage[2] + usage.total_token_count, + ) + except: + pass + + if content_only: + if responses: + # Access the parts of the first candidate + content_parts = responses.text + return content_parts + else: + print("Error occurred, retrying") + return self(prompt, image_paths, real_call, max_tokens, content_only) + else: + return responses diff --git a/chexpert_SexBinary_PTX_final_demo_df.csv b/chexpert_SexBinary_PTX_final_demo_df.csv new file mode 100644 index 0000000..b06bd54 --- /dev/null +++ b/chexpert_SexBinary_PTX_final_demo_df.csv @@ -0,0 +1,401 @@ +,Path,Sex,Age,Frontal/Lateral,AP/PA,No Finding,Enlarged Cardiomediastinum,Cardiomegaly,Lung Opacity,Lung Lesion,Edema,Consolidation,Pneumonia,Atelectasis,Pneumothorax,Pleural Effusion,Pleural Other,Fracture,Support Devices +0,CheXpert-v1.0/train/patient34481/study3/view1_frontal.jpg,Female,78,Frontal,AP,,,,1.0,,,-1.0,,-1.0,1.0,1.0,,, +1,CheXpert-v1.0/train/patient21704/study1/view1_frontal.jpg,Female,39,Frontal,PA,,,0.0,,,,,,,1.0,,,1.0, +2,CheXpert-v1.0/train/patient10753/study8/view1_frontal.jpg,Female,18,Frontal,AP,,,,,,0.0,,,,1.0,,,,1.0 +3,CheXpert-v1.0/train/patient21463/study8/view1_frontal.jpg,Female,63,Frontal,AP,,,,1.0,,,,,-1.0,1.0,,,,1.0 +4,CheXpert-v1.0/train/patient04990/study1/view1_frontal.jpg,Female,83,Frontal,PA,,,,,,,,,,1.0,,,1.0,1.0 +5,CheXpert-v1.0/train/patient00865/study2/view1_frontal.jpg,Female,77,Frontal,AP,,,,1.0,,,,,,1.0,,,1.0,1.0 +6,CheXpert-v1.0/train/patient47226/study2/view1_frontal.jpg,Female,53,Frontal,AP,,1.0,,1.0,,,,,1.0,1.0,1.0,,,0.0 +7,CheXpert-v1.0/train/patient38953/study2/view1_frontal.jpg,Female,82,Frontal,AP,,,,,,,,,1.0,1.0,1.0,,1.0, +8,CheXpert-v1.0/train/patient35967/study1/view1_frontal.jpg,Female,48,Frontal,AP,,,,,,,,,,1.0,,,, +9,CheXpert-v1.0/train/patient29697/study4/view1_frontal.jpg,Female,33,Frontal,AP,,,,1.0,,,,,,1.0,1.0,,,1.0 +10,CheXpert-v1.0/train/patient34336/study8/view1_frontal.jpg,Female,47,Frontal,AP,,,,1.0,,,1.0,,,1.0,,,,1.0 +11,CheXpert-v1.0/train/patient03685/study4/view1_frontal.jpg,Female,38,Frontal,AP,,,,1.0,,,,,-1.0,1.0,-1.0,,,1.0 +12,CheXpert-v1.0/train/patient24405/study1/view1_frontal.jpg,Female,24,Frontal,AP,,0.0,,1.0,,,,,1.0,1.0,,,0.0,1.0 +13,CheXpert-v1.0/train/patient23785/study10/view1_frontal.jpg,Female,60,Frontal,AP,,,,1.0,,,,,,1.0,1.0,,, +14,CheXpert-v1.0/train/patient02316/study2/view1_frontal.jpg,Female,62,Frontal,PA,,,0.0,1.0,1.0,,,,1.0,1.0,1.0,,, +15,CheXpert-v1.0/train/patient05900/study1/view1_frontal.jpg,Female,18,Frontal,AP,,0.0,,,,,,,,1.0,,,1.0, +16,CheXpert-v1.0/train/patient07858/study1/view1_frontal.jpg,Female,25,Frontal,AP,,1.0,,,,,,,1.0,1.0,1.0,,,1.0 +17,CheXpert-v1.0/train/patient16493/study1/view1_frontal.jpg,Female,66,Frontal,PA,,,,1.0,,,,,,1.0,1.0,,,1.0 +18,CheXpert-v1.0/train/patient44455/study5/view2_frontal.jpg,Female,62,Frontal,AP,,,,,,,,,1.0,1.0,1.0,,, +19,CheXpert-v1.0/train/patient16266/study6/view1_frontal.jpg,Female,66,Frontal,PA,,,,,,,,,,1.0,1.0,,, +20,CheXpert-v1.0/train/patient28689/study1/view1_frontal.jpg,Female,34,Frontal,AP,,,,1.0,,,,-1.0,,1.0,,,, +21,CheXpert-v1.0/train/patient43630/study4/view1_frontal.jpg,Female,45,Frontal,AP,,,,,,,,,,1.0,1.0,,, +22,CheXpert-v1.0/train/patient62084/study1/view1_frontal.jpg,Female,52,Frontal,AP,,,,,,,,,,1.0,,,,1.0 +23,CheXpert-v1.0/train/patient15570/study6/view1_frontal.jpg,Female,60,Frontal,AP,,,,1.0,,1.0,,,,1.0,1.0,,,1.0 +24,CheXpert-v1.0/train/patient35048/study2/view1_frontal.jpg,Female,84,Frontal,AP,,,,,,,,,,1.0,,,, +25,CheXpert-v1.0/train/patient14660/study3/view1_frontal.jpg,Female,68,Frontal,AP,,,,1.0,,,1.0,,1.0,1.0,,,,1.0 +26,CheXpert-v1.0/train/patient10465/study3/view1_frontal.jpg,Female,24,Frontal,AP,,,,,,,,,,1.0,1.0,,,0.0 +27,CheXpert-v1.0/train/patient49379/study2/view1_frontal.jpg,Female,36,Frontal,AP,,,,1.0,,,,,1.0,1.0,,,, +28,CheXpert-v1.0/train/patient09442/study2/view1_frontal.jpg,Female,65,Frontal,AP,,,,,,,,,,1.0,,,,1.0 +29,CheXpert-v1.0/train/patient17642/study21/view1_frontal.jpg,Female,75,Frontal,AP,,,,,,,,,1.0,1.0,1.0,,, +30,CheXpert-v1.0/train/patient48172/study4/view1_frontal.jpg,Female,74,Frontal,AP,,,,,,,,,1.0,1.0,,,,1.0 +31,CheXpert-v1.0/train/patient25974/study1/view1_frontal.jpg,Female,29,Frontal,AP,,,,,,,,,,1.0,,,,0.0 +32,CheXpert-v1.0/train/patient16926/study3/view1_frontal.jpg,Female,77,Frontal,AP,,,,1.0,,,,,,1.0,,,,1.0 +33,CheXpert-v1.0/train/patient45038/study6/view1_frontal.jpg,Female,64,Frontal,AP,,,,,,0.0,,,,1.0,0.0,,, +34,CheXpert-v1.0/train/patient24455/study1/view1_frontal.jpg,Female,42,Frontal,AP,,,,,,,,,,1.0,,,,1.0 +35,CheXpert-v1.0/train/patient00515/study4/view1_frontal.jpg,Female,59,Frontal,AP,,,,,,,,,,1.0,,,,1.0 +36,CheXpert-v1.0/train/patient12963/study16/view1_frontal.jpg,Female,30,Frontal,AP,,,,1.0,,,1.0,,,1.0,-1.0,,,1.0 +37,CheXpert-v1.0/train/patient17312/study6/view1_frontal.jpg,Female,57,Frontal,PA,,,,,,,,,,1.0,,,,1.0 +38,CheXpert-v1.0/train/patient12316/study6/view1_frontal.jpg,Female,76,Frontal,AP,,,,1.0,,,,,,1.0,1.0,,,0.0 +39,CheXpert-v1.0/train/patient00660/study1/view1_frontal.jpg,Female,79,Frontal,PA,,0.0,,,,,0.0,,,1.0,1.0,,, +40,CheXpert-v1.0/train/patient06434/study4/view1_frontal.jpg,Female,53,Frontal,AP,,,,1.0,,,,,,1.0,,,, +41,CheXpert-v1.0/train/patient43014/study2/view1_frontal.jpg,Female,84,Frontal,AP,,,0.0,,,0.0,-1.0,,1.0,1.0,-1.0,,, +42,CheXpert-v1.0/train/patient05975/study3/view1_frontal.jpg,Female,73,Frontal,PA,,,,,,1.0,,,,1.0,,,,1.0 +43,CheXpert-v1.0/train/patient19963/study2/view1_frontal.jpg,Female,66,Frontal,AP,,,,1.0,,,,,,1.0,,,, +44,CheXpert-v1.0/train/patient46788/study3/view1_frontal.jpg,Female,42,Frontal,AP,,-1.0,,,,,,,,1.0,0.0,,,0.0 +45,CheXpert-v1.0/train/patient39521/study20/view1_frontal.jpg,Female,80,Frontal,AP,,,,1.0,,,,,,1.0,,,,1.0 +46,CheXpert-v1.0/train/patient15144/study3/view1_frontal.jpg,Female,77,Frontal,AP,,,,1.0,,,,,,1.0,1.0,,,1.0 +47,CheXpert-v1.0/train/patient30320/study3/view1_frontal.jpg,Female,64,Frontal,PA,,,,1.0,,,,,1.0,1.0,,,,1.0 +48,CheXpert-v1.0/train/patient01342/study3/view1_frontal.jpg,Female,72,Frontal,AP,,,,,,,,,,1.0,,,,1.0 +49,CheXpert-v1.0/train/patient27535/study4/view1_frontal.jpg,Female,60,Frontal,AP,,,,1.0,,,,,,1.0,1.0,,,1.0 +50,CheXpert-v1.0/train/patient03685/study7/view1_frontal.jpg,Female,38,Frontal,PA,,,,,,,,,,1.0,-1.0,,, +51,CheXpert-v1.0/train/patient54244/study1/view1_frontal.jpg,Female,33,Frontal,AP,,0.0,,,,,,,,1.0,,,1.0, +52,CheXpert-v1.0/train/patient35663/study6/view1_frontal.jpg,Female,55,Frontal,AP,,,,,,,,,1.0,1.0,1.0,,,1.0 +53,CheXpert-v1.0/train/patient13104/study4/view1_frontal.jpg,Female,64,Frontal,AP,,,,,,,,,,1.0,,,, +54,CheXpert-v1.0/train/patient34617/study6/view1_frontal.jpg,Female,66,Frontal,AP,,,,1.0,,1.0,,,1.0,1.0,1.0,,,1.0 +55,CheXpert-v1.0/train/patient38796/study4/view1_frontal.jpg,Female,74,Frontal,AP,,,,,,,,,,1.0,,,,0.0 +56,CheXpert-v1.0/train/patient37510/study8/view1_frontal.jpg,Female,42,Frontal,AP,,,,1.0,,,,,,1.0,1.0,,,1.0 +57,CheXpert-v1.0/train/patient41754/study11/view1_frontal.jpg,Female,53,Frontal,AP,,,,1.0,,,,,,1.0,,,,1.0 +58,CheXpert-v1.0/train/patient14288/study5/view1_frontal.jpg,Female,75,Frontal,AP,,,,,,,,,,1.0,1.0,,,1.0 +59,CheXpert-v1.0/train/patient37300/study6/view1_frontal.jpg,Female,82,Frontal,AP,,,-1.0,,,1.0,,,,1.0,1.0,,,1.0 +60,CheXpert-v1.0/train/patient06100/study4/view1_frontal.jpg,Female,52,Frontal,AP,,,,-1.0,,,,,1.0,1.0,,,,1.0 +61,CheXpert-v1.0/train/patient47351/study3/view1_frontal.jpg,Female,67,Frontal,AP,,,,,,,-1.0,,-1.0,1.0,,,,1.0 +62,CheXpert-v1.0/train/patient09809/study2/view1_frontal.jpg,Female,69,Frontal,AP,,,,1.0,,,1.0,,,1.0,,,,1.0 +63,CheXpert-v1.0/train/patient17051/study14/view1_frontal.jpg,Female,56,Frontal,AP,,,,1.0,,1.0,,,,1.0,1.0,,,1.0 +64,CheXpert-v1.0/train/patient19340/study1/view2_frontal.jpg,Female,41,Frontal,PA,,,,,,,,,,1.0,,,, +65,CheXpert-v1.0/train/patient23091/study1/view1_frontal.jpg,Female,71,Frontal,AP,,,,,,,,,,1.0,,,,1.0 +66,CheXpert-v1.0/train/patient13017/study6/view1_frontal.jpg,Female,28,Frontal,AP,,,,,,,,,,1.0,,,,1.0 +67,CheXpert-v1.0/train/patient35048/study1/view1_frontal.jpg,Female,84,Frontal,AP,,,,,,,,,,1.0,,,, +68,CheXpert-v1.0/train/patient57363/study1/view1_frontal.jpg,Female,75,Frontal,AP,,0.0,,1.0,-1.0,,,,,1.0,0.0,,, +69,CheXpert-v1.0/train/patient25559/study4/view1_frontal.jpg,Female,64,Frontal,AP,,,,,,,,,,1.0,,,, +70,CheXpert-v1.0/train/patient20102/study7/view1_frontal.jpg,Female,38,Frontal,AP,,-1.0,,1.0,,,,,,1.0,1.0,,,1.0 +71,CheXpert-v1.0/train/patient49766/study1/view1_frontal.jpg,Female,73,Frontal,AP,,,,,,,,,,1.0,,,, +72,CheXpert-v1.0/train/patient53097/study1/view1_frontal.jpg,Female,41,Frontal,AP,,,,1.0,,,,,1.0,1.0,,,1.0,0.0 +73,CheXpert-v1.0/train/patient47258/study3/view1_frontal.jpg,Female,73,Frontal,AP,,,,,,,,,1.0,1.0,1.0,,,1.0 +74,CheXpert-v1.0/train/patient24114/study4/view1_frontal.jpg,Female,48,Frontal,AP,,-1.0,,1.0,,,,,,1.0,1.0,,, +75,CheXpert-v1.0/train/patient38190/study2/view1_frontal.jpg,Female,90,Frontal,AP,,,,1.0,1.0,,,,,1.0,1.0,,, +76,CheXpert-v1.0/train/patient63870/study1/view1_frontal.jpg,Female,66,Frontal,AP,,,,,,,,,,1.0,,,,1.0 +77,CheXpert-v1.0/train/patient13532/study5/view1_frontal.jpg,Female,65,Frontal,AP,,,,1.0,,1.0,1.0,-1.0,,1.0,1.0,,,1.0 +78,CheXpert-v1.0/train/patient43602/study1/view1_frontal.jpg,Female,67,Frontal,AP,,,0.0,1.0,1.0,,,,,1.0,,,,1.0 +79,CheXpert-v1.0/train/patient30926/study4/view1_frontal.jpg,Female,59,Frontal,AP,,,,,,,,,1.0,1.0,,,,1.0 +80,CheXpert-v1.0/train/patient51928/study1/view1_frontal.jpg,Female,84,Frontal,AP,,,,1.0,,,,,,1.0,,,,1.0 +81,CheXpert-v1.0/train/patient02671/study2/view1_frontal.jpg,Female,69,Frontal,AP,,,,1.0,,,,,,1.0,1.0,,, +82,CheXpert-v1.0/train/patient34999/study1/view1_frontal.jpg,Female,35,Frontal,AP,,,-1.0,0.0,,,,,,1.0,,,,1.0 +83,CheXpert-v1.0/train/patient27451/study7/view1_frontal.jpg,Female,65,Frontal,AP,,,,,,,1.0,,,1.0,1.0,,,1.0 +84,CheXpert-v1.0/train/patient03190/study2/view1_frontal.jpg,Female,75,Frontal,AP,,,,,1.0,,,,,1.0,,,, +85,CheXpert-v1.0/train/patient08070/study2/view1_frontal.jpg,Female,60,Frontal,AP,,1.0,,,,1.0,,,1.0,1.0,,,, +86,CheXpert-v1.0/train/patient47659/study2/view1_frontal.jpg,Female,74,Frontal,AP,,,,1.0,,,,,,1.0,,,,1.0 +87,CheXpert-v1.0/train/patient30048/study3/view1_frontal.jpg,Female,39,Frontal,AP,,-1.0,,1.0,,,,,,1.0,1.0,,, +88,CheXpert-v1.0/train/patient38305/study1/view1_frontal.jpg,Female,84,Frontal,AP,,,,1.0,,1.0,,,,1.0,1.0,,,1.0 +89,CheXpert-v1.0/train/patient18001/study4/view1_frontal.jpg,Female,57,Frontal,PA,,,,1.0,,,,,,1.0,1.0,,,0.0 +90,CheXpert-v1.0/train/patient43582/study8/view1_frontal.jpg,Female,60,Frontal,AP,,,,1.0,,,,,,1.0,1.0,,,1.0 +91,CheXpert-v1.0/train/patient24114/study11/view1_frontal.jpg,Female,52,Frontal,AP,,,,1.0,,,,,1.0,1.0,1.0,,,1.0 +92,CheXpert-v1.0/train/patient25974/study2/view1_frontal.jpg,Female,29,Frontal,AP,,,,1.0,,,,,1.0,1.0,0.0,,,1.0 +93,CheXpert-v1.0/train/patient03454/study10/view1_frontal.jpg,Female,37,Frontal,AP,,,,1.0,,,1.0,,,1.0,,,,1.0 +94,CheXpert-v1.0/train/patient52939/study1/view1_frontal.jpg,Female,77,Frontal,AP,,,,1.0,,,,,1.0,1.0,1.0,,,1.0 +95,CheXpert-v1.0/train/patient04608/study4/view1_frontal.jpg,Female,72,Frontal,AP,,-1.0,,1.0,1.0,,,-1.0,-1.0,1.0,1.0,,,1.0 +96,CheXpert-v1.0/train/patient08313/study4/view1_frontal.jpg,Female,74,Frontal,AP,,,,1.0,,,,,,1.0,1.0,,,1.0 +97,CheXpert-v1.0/train/patient07776/study5/view1_frontal.jpg,Female,37,Frontal,AP,,,1.0,1.0,,,,,,1.0,1.0,,,1.0 +98,CheXpert-v1.0/train/patient00893/study2/view1_frontal.jpg,Female,85,Frontal,AP,,,,1.0,,,,,-1.0,1.0,1.0,,, +99,CheXpert-v1.0/train/patient10264/study12/view1_frontal.jpg,Female,67,Frontal,AP,,,,,1.0,,,,,1.0,,,,1.0 +100,CheXpert-v1.0/train/patient06487/study6/view1_frontal.jpg,Female,85,Frontal,AP,,,-1.0,1.0,,1.0,,-1.0,1.0,,,,, +101,CheXpert-v1.0/train/patient57662/study2/view1_frontal.jpg,Female,45,Frontal,AP,,,,0.0,,,,,,0.0,,,1.0,1.0 +102,CheXpert-v1.0/train/patient58362/study2/view1_frontal.jpg,Female,44,Frontal,AP,1.0,,,,,,0.0,,,0.0,,,,1.0 +103,CheXpert-v1.0/train/patient18002/study10/view1_frontal.jpg,Female,40,Frontal,AP,,,1.0,,,0.0,0.0,,,,0.0,,,1.0 +104,CheXpert-v1.0/train/patient18074/study4/view1_frontal.jpg,Female,90,Frontal,AP,,,,,,1.0,,,,,,,, +105,CheXpert-v1.0/train/patient32505/study10/view1_frontal.jpg,Female,24,Frontal,AP,,,1.0,,,0.0,,,1.0,,1.0,,,1.0 +106,CheXpert-v1.0/train/patient06523/study1/view1_frontal.jpg,Female,90,Frontal,AP,,0.0,,,,,0.0,,,0.0,-1.0,-1.0,, +107,CheXpert-v1.0/train/patient50246/study1/view1_frontal.jpg,Female,90,Frontal,AP,,,1.0,,,,0.0,,,,-1.0,-1.0,, +108,CheXpert-v1.0/train/patient08494/study2/view1_frontal.jpg,Female,88,Frontal,AP,,,,1.0,,,,-1.0,-1.0,,1.0,,, +109,CheXpert-v1.0/train/patient30232/study1/view1_frontal.jpg,Female,66,Frontal,AP,,,1.0,1.0,,-1.0,,,,,,,, +110,CheXpert-v1.0/train/patient15432/study6/view1_frontal.jpg,Female,38,Frontal,AP,,1.0,,1.0,,-1.0,,,,,,,,1.0 +111,CheXpert-v1.0/train/patient08381/study1/view1_frontal.jpg,Female,72,Frontal,PA,,,1.0,1.0,,-1.0,,-1.0,-1.0,,1.0,,, +112,CheXpert-v1.0/train/patient46474/study2/view1_frontal.jpg,Female,33,Frontal,AP,,,,,,,-1.0,,1.0,,1.0,,,1.0 +113,CheXpert-v1.0/train/patient05572/study1/view1_frontal.jpg,Female,34,Frontal,PA,1.0,,,,,,,0.0,,,,,, +114,CheXpert-v1.0/train/patient49307/study1/view1_frontal.jpg,Female,37,Frontal,AP,,,,,,1.0,,,1.0,,,,,1.0 +115,CheXpert-v1.0/train/patient21622/study36/view1_frontal.jpg,Female,32,Frontal,AP,,,1.0,0.0,,,,,,,0.0,,,1.0 +116,CheXpert-v1.0/train/patient58058/study1/view1_frontal.jpg,Female,90,Frontal,AP,,,1.0,,,1.0,,,,,1.0,,, +117,CheXpert-v1.0/train/patient51913/study1/view1_frontal.jpg,Female,32,Frontal,AP,,,,,,,,,1.0,0.0,,,,1.0 +118,CheXpert-v1.0/train/patient58965/study1/view1_frontal.jpg,Female,80,Frontal,AP,,,,,,1.0,,,1.0,,1.0,,,1.0 +119,CheXpert-v1.0/train/patient20927/study25/view1_frontal.jpg,Female,81,Frontal,AP,,,,-1.0,,,-1.0,,-1.0,,1.0,,, +120,CheXpert-v1.0/train/patient18785/study1/view1_frontal.jpg,Female,23,Frontal,PA,,,0.0,1.0,,,-1.0,,,-1.0,-1.0,,, +121,CheXpert-v1.0/train/patient60337/study1/view1_frontal.jpg,Female,24,Frontal,AP,,0.0,,,,,,,,0.0,,,,1.0 +122,CheXpert-v1.0/train/patient38131/study13/view1_frontal.jpg,Female,80,Frontal,AP,1.0,,,,,,,,,,,,,1.0 +123,CheXpert-v1.0/train/patient05068/study1/view1_frontal.jpg,Female,53,Frontal,PA,,,1.0,,,-1.0,,,,,,,, +124,CheXpert-v1.0/train/patient37777/study8/view1_frontal.jpg,Female,72,Frontal,AP,,,,,,1.0,,,,,1.0,,,1.0 +125,CheXpert-v1.0/train/patient07084/study21/view1_frontal.jpg,Female,40,Frontal,AP,,,,1.0,,1.0,,,,,1.0,,, +126,CheXpert-v1.0/train/patient34625/study1/view1_frontal.jpg,Female,90,Frontal,AP,,,,1.0,,-1.0,,-1.0,,,-1.0,,, +127,CheXpert-v1.0/train/patient37977/study6/view1_frontal.jpg,Female,53,Frontal,AP,,,,1.0,,,,,,-1.0,1.0,,,1.0 +128,CheXpert-v1.0/train/patient14915/study1/view1_frontal.jpg,Female,59,Frontal,AP,,,,1.0,,,,,,0.0,-1.0,,,1.0 +129,CheXpert-v1.0/train/patient02231/study2/view1_frontal.jpg,Female,61,Frontal,AP,,,,1.0,,,-1.0,,-1.0,,1.0,,,1.0 +130,CheXpert-v1.0/train/patient36578/study2/view1_frontal.jpg,Female,59,Frontal,AP,,-1.0,,1.0,,1.0,,,,,1.0,,,1.0 +131,CheXpert-v1.0/train/patient16867/study1/view1_frontal.jpg,Female,79,Frontal,AP,,,,1.0,,-1.0,,,,0.0,,,,1.0 +132,CheXpert-v1.0/train/patient36692/study6/view1_frontal.jpg,Female,79,Frontal,AP,,,,1.0,,1.0,,,,,1.0,,, +133,CheXpert-v1.0/train/patient64163/study1/view1_frontal.jpg,Female,57,Frontal,AP,1.0,0.0,,,,,0.0,,,0.0,0.0,,,1.0 +134,CheXpert-v1.0/train/patient45738/study6/view1_frontal.jpg,Female,62,Frontal,AP,,,1.0,,,0.0,,,,,-1.0,,,1.0 +135,CheXpert-v1.0/train/patient02665/study8/view1_frontal.jpg,Female,40,Frontal,AP,,,,1.0,,,-1.0,,-1.0,,1.0,,,1.0 +136,CheXpert-v1.0/train/patient53956/study1/view1_frontal.jpg,Female,78,Frontal,AP,,0.0,,,,,-1.0,,-1.0,0.0,1.0,,,1.0 +137,CheXpert-v1.0/train/patient19445/study1/view1_frontal.jpg,Female,74,Frontal,PA,,,-1.0,,,,,,,,1.0,,, +138,CheXpert-v1.0/train/patient36473/study3/view1_frontal.jpg,Female,75,Frontal,AP,,,,,,1.0,,,1.0,,,,,1.0 +139,CheXpert-v1.0/train/patient56850/study1/view1_frontal.jpg,Female,50,Frontal,AP,,,,1.0,,,-1.0,,-1.0,,1.0,,, +140,CheXpert-v1.0/train/patient30229/study1/view1_frontal.jpg,Female,82,Frontal,PA,,,,1.0,,,,,1.0,0.0,1.0,,,1.0 +141,CheXpert-v1.0/train/patient40235/study17/view1_frontal.jpg,Female,65,Frontal,AP,,,,1.0,,,,,,0.0,1.0,,,1.0 +142,CheXpert-v1.0/train/patient32287/study1/view1_frontal.jpg,Female,36,Frontal,AP,,1.0,,,,,,,,0.0,,,0.0, +143,CheXpert-v1.0/train/patient09187/study1/view1_frontal.jpg,Female,81,Frontal,AP,,,,1.0,,-1.0,,-1.0,,,1.0,,, +144,CheXpert-v1.0/train/patient45884/study2/view1_frontal.jpg,Female,82,Frontal,AP,,,,1.0,,1.0,,-1.0,,,,,, +145,CheXpert-v1.0/train/patient51113/study1/view1_frontal.jpg,Female,77,Frontal,AP,,,-1.0,,,1.0,,,,,1.0,,, +146,CheXpert-v1.0/train/patient13608/study1/view1_frontal.jpg,Female,68,Frontal,AP,1.0,0.0,,,,,,,,0.0,0.0,,,1.0 +147,CheXpert-v1.0/train/patient53343/study2/view1_frontal.jpg,Female,86,Frontal,AP,,-1.0,,1.0,,,,,,0.0,,,1.0, +148,CheXpert-v1.0/train/patient04058/study1/view1_frontal.jpg,Female,90,Frontal,AP,,0.0,,,,0.0,0.0,,,,0.0,,, +149,CheXpert-v1.0/train/patient01141/study6/view1_frontal.jpg,Female,56,Frontal,AP,,,,1.0,,,,-1.0,,0.0,,,,1.0 +150,CheXpert-v1.0/train/patient62861/study1/view1_frontal.jpg,Female,48,Frontal,AP,,,1.0,0.0,,1.0,,,,0.0,0.0,,, +151,CheXpert-v1.0/train/patient28234/study2/view1_frontal.jpg,Female,62,Frontal,PA,,,,1.0,,0.0,,0.0,,,-1.0,1.0,, +152,CheXpert-v1.0/train/patient39742/study3/view1_frontal.jpg,Female,54,Frontal,AP,,,0.0,,,,-1.0,,,-1.0,-1.0,,, +153,CheXpert-v1.0/train/patient40935/study2/view1_frontal.jpg,Female,28,Frontal,AP,,,,,,,,,,,1.0,,,0.0 +154,CheXpert-v1.0/train/patient02816/study1/view1_frontal.jpg,Female,53,Frontal,AP,,0.0,0.0,,,,0.0,,,,0.0,,, +155,CheXpert-v1.0/train/patient02992/study2/view1_frontal.jpg,Female,43,Frontal,PA,,,,1.0,,,-1.0,,1.0,,0.0,,, +156,CheXpert-v1.0/train/patient19874/study1/view1_frontal.jpg,Female,74,Frontal,AP,,,0.0,,,,,,,0.0,0.0,,1.0, +157,CheXpert-v1.0/train/patient40935/study3/view1_frontal.jpg,Female,31,Frontal,AP,,,,1.0,,,,1.0,-1.0,,1.0,,,1.0 +158,CheXpert-v1.0/train/patient06177/study1/view1_frontal.jpg,Female,45,Frontal,AP,,,,,,,,,1.0,0.0,,,,1.0 +159,CheXpert-v1.0/train/patient21948/study2/view1_frontal.jpg,Female,67,Frontal,AP,,,,1.0,,,,,1.0,,1.0,,,1.0 +160,CheXpert-v1.0/train/patient09592/study6/view1_frontal.jpg,Female,42,Frontal,PA,,,0.0,,1.0,,-1.0,,,-1.0,-1.0,,,1.0 +161,CheXpert-v1.0/train/patient10816/study1/view1_frontal.jpg,Female,22,Frontal,AP,,,,-1.0,,,-1.0,,1.0,,,,, +162,CheXpert-v1.0/train/patient42153/study5/view1_frontal.jpg,Female,60,Frontal,AP,,,-1.0,1.0,1.0,0.0,0.0,,,0.0,0.0,,,1.0 +163,CheXpert-v1.0/train/patient07128/study1/view1_frontal.jpg,Female,48,Frontal,AP,,,,,,,,,,0.0,,,1.0, +164,CheXpert-v1.0/train/patient32494/study14/view1_frontal.jpg,Female,62,Frontal,AP,1.0,0.0,,,,0.0,0.0,,,0.0,,,,1.0 +165,CheXpert-v1.0/train/patient29299/study1/view1_frontal.jpg,Female,69,Frontal,AP,,,1.0,1.0,,,,,,0.0,,,,1.0 +166,CheXpert-v1.0/train/patient41767/study1/view2_frontal.jpg,Female,44,Frontal,AP,,,,,,,,,,0.0,,,1.0, +167,CheXpert-v1.0/train/patient18225/study1/view1_frontal.jpg,Female,76,Frontal,PA,,,1.0,,,0.0,0.0,,,0.0,0.0,,, +168,CheXpert-v1.0/train/patient35099/study11/view1_frontal.jpg,Female,76,Frontal,AP,,,,,,,,,,-1.0,,,, +169,CheXpert-v1.0/train/patient34641/study21/view1_frontal.jpg,Female,78,Frontal,AP,,,,1.0,,,,,1.0,,1.0,,, +170,CheXpert-v1.0/train/patient55546/study2/view1_frontal.jpg,Female,61,Frontal,AP,,,,1.0,1.0,,,,,,1.0,,,1.0 +171,CheXpert-v1.0/train/patient30871/study4/view1_frontal.jpg,Female,65,Frontal,AP,,,1.0,,,-1.0,0.0,,,,0.0,,,1.0 +172,CheXpert-v1.0/train/patient59636/study1/view1_frontal.jpg,Female,66,Frontal,AP,,-1.0,,-1.0,,0.0,0.0,,-1.0,,0.0,,, +173,CheXpert-v1.0/train/patient04852/study2/view1_frontal.jpg,Female,57,Frontal,AP,,,,1.0,,,-1.0,,-1.0,,,,, +174,CheXpert-v1.0/train/patient08251/study2/view3_frontal.jpg,Female,43,Frontal,AP,,,,,,-1.0,,,,0.0,,,,1.0 +175,CheXpert-v1.0/train/patient25752/study1/view1_frontal.jpg,Female,55,Frontal,PA,,,,1.0,1.0,,,1.0,,,1.0,,, +176,CheXpert-v1.0/train/patient18823/study17/view1_frontal.jpg,Female,68,Frontal,PA,,0.0,0.0,,,,-1.0,,,-1.0,-1.0,,, +177,CheXpert-v1.0/train/patient12523/study3/view1_frontal.jpg,Female,56,Frontal,AP,,,,1.0,1.0,-1.0,,-1.0,,,,,, +178,CheXpert-v1.0/train/patient18242/study4/view1_frontal.jpg,Female,71,Frontal,AP,,,,,,,-1.0,,-1.0,,1.0,,,1.0 +179,CheXpert-v1.0/train/patient28786/study2/view1_frontal.jpg,Female,60,Frontal,AP,,,,1.0,,1.0,,,,,1.0,,, +180,CheXpert-v1.0/train/patient11095/study1/view1_frontal.jpg,Female,58,Frontal,AP,,,1.0,,,1.0,,,,,,,, +181,CheXpert-v1.0/train/patient55475/study1/view1_frontal.jpg,Female,87,Frontal,AP,,,,,,,,,-1.0,,-1.0,,, +182,CheXpert-v1.0/train/patient54779/study3/view1_frontal.jpg,Female,64,Frontal,AP,,,,,,0.0,,,0.0,,0.0,,,1.0 +183,CheXpert-v1.0/train/patient35824/study2/view1_frontal.jpg,Female,43,Frontal,AP,,,,1.0,,,,,,,1.0,,,1.0 +184,CheXpert-v1.0/train/patient24091/study1/view1_frontal.jpg,Female,39,Frontal,PA,,0.0,,1.0,,,,1.0,,,,,, +185,CheXpert-v1.0/train/patient41713/study2/view1_frontal.jpg,Female,53,Frontal,AP,,,,,,,,,,0.0,1.0,,, +186,CheXpert-v1.0/train/patient35272/study5/view1_frontal.jpg,Female,37,Frontal,AP,,,,1.0,,,,,,,1.0,,,1.0 +187,CheXpert-v1.0/train/patient44325/study1/view1_frontal.jpg,Female,61,Frontal,AP,,,,1.0,,,,,1.0,0.0,,,,1.0 +188,CheXpert-v1.0/train/patient49263/study1/view2_frontal.jpg,Female,90,Frontal,AP,,-1.0,,1.0,,,0.0,,,0.0,0.0,,, +189,CheXpert-v1.0/train/patient34716/study12/view1_frontal.jpg,Female,69,Frontal,AP,,-1.0,,1.0,,1.0,,,,,1.0,,,1.0 +190,CheXpert-v1.0/train/patient13309/study10/view1_frontal.jpg,Female,47,Frontal,AP,,,,1.0,,,,,,,1.0,,,1.0 +191,CheXpert-v1.0/train/patient35956/study10/view1_frontal.jpg,Female,20,Frontal,AP,,,,1.0,,,,,,,1.0,,,1.0 +192,CheXpert-v1.0/train/patient08840/study5/view1_frontal.jpg,Female,89,Frontal,AP,,,,1.0,,,,,,0.0,1.0,,,1.0 +193,CheXpert-v1.0/train/patient37374/study1/view1_frontal.jpg,Female,63,Frontal,AP,,,,1.0,,,-1.0,,-1.0,0.0,0.0,,,1.0 +194,CheXpert-v1.0/train/patient55488/study1/view1_frontal.jpg,Female,75,Frontal,AP,,,1.0,1.0,,1.0,,-1.0,,,1.0,,, +195,CheXpert-v1.0/train/patient45733/study1/view1_frontal.jpg,Female,30,Frontal,AP,,,,,,1.0,,,,,1.0,,, +196,CheXpert-v1.0/train/patient21392/study2/view1_frontal.jpg,Female,68,Frontal,AP,,,,1.0,1.0,,-1.0,,-1.0,,,,, +197,CheXpert-v1.0/train/patient39711/study2/view1_frontal.jpg,Female,43,Frontal,AP,,,,,,,1.0,,,,,,,1.0 +198,CheXpert-v1.0/train/patient00177/study1/view1_frontal.jpg,Female,84,Frontal,AP,,,,,,-1.0,,,,0.0,,,,1.0 +199,CheXpert-v1.0/train/patient59666/study1/view1_frontal.jpg,Female,79,Frontal,AP,,,,1.0,,-1.0,,-1.0,1.0,,-1.0,,, +200,CheXpert-v1.0/train/patient01801/study7/view1_frontal.jpg,Male,53,Frontal,AP,,,,1.0,,,1.0,,,1.0,1.0,,,1.0 +201,CheXpert-v1.0/train/patient18561/study2/view1_frontal.jpg,Male,61,Frontal,AP,,,,,,,,,,1.0,,,1.0, +202,CheXpert-v1.0/train/patient35776/study14/view1_frontal.jpg,Male,90,Frontal,AP,,,1.0,1.0,,,,,,1.0,1.0,,,1.0 +203,CheXpert-v1.0/train/patient04064/study11/view1_frontal.jpg,Male,68,Frontal,AP,,,,,,,,,,1.0,1.0,,, +204,CheXpert-v1.0/train/patient36309/study13/view1_frontal.jpg,Male,51,Frontal,AP,,-1.0,,,,0.0,,,,1.0,,,,1.0 +205,CheXpert-v1.0/train/patient11306/study3/view1_frontal.jpg,Male,58,Frontal,PA,,,,1.0,,,,,-1.0,1.0,,,,0.0 +206,CheXpert-v1.0/train/patient35725/study10/view1_frontal.jpg,Male,26,Frontal,AP,,-1.0,,1.0,,0.0,,,1.0,1.0,,,,1.0 +207,CheXpert-v1.0/train/patient48339/study2/view1_frontal.jpg,Male,85,Frontal,AP,,,,,,,0.0,,,1.0,0.0,,0.0, +208,CheXpert-v1.0/train/patient46323/study1/view1_frontal.jpg,Male,59,Frontal,AP,,,,,,,,,1.0,1.0,,,,1.0 +209,CheXpert-v1.0/train/patient14972/study12/view1_frontal.jpg,Male,42,Frontal,AP,,,,1.0,,,,,,1.0,,,, +210,CheXpert-v1.0/train/patient28654/study4/view1_frontal.jpg,Male,35,Frontal,AP,,,,1.0,,,,,,1.0,,,,1.0 +211,CheXpert-v1.0/train/patient14949/study6/view1_frontal.jpg,Male,76,Frontal,AP,,,,1.0,,1.0,,,,1.0,1.0,,,1.0 +212,CheXpert-v1.0/train/patient42275/study1/view1_frontal.jpg,Male,73,Frontal,AP,,,,1.0,,1.0,,,,1.0,,,,1.0 +213,CheXpert-v1.0/train/patient40915/study2/view1_frontal.jpg,Male,64,Frontal,AP,,,,1.0,,,,,,1.0,1.0,,,1.0 +214,CheXpert-v1.0/train/patient47931/study3/view1_frontal.jpg,Male,70,Frontal,AP,,,,,,,,,,1.0,1.0,,1.0,1.0 +215,CheXpert-v1.0/train/patient13849/study12/view1_frontal.jpg,Male,37,Frontal,AP,,,0.0,,,1.0,-1.0,,-1.0,1.0,,,,1.0 +216,CheXpert-v1.0/train/patient27632/study8/view1_frontal.jpg,Male,68,Frontal,PA,,,,,,,,,,1.0,,1.0,, +217,CheXpert-v1.0/train/patient42151/study13/view1_frontal.jpg,Male,42,Frontal,AP,,-1.0,,1.0,,-1.0,1.0,,,1.0,1.0,,,1.0 +218,CheXpert-v1.0/train/patient07074/study19/view1_frontal.jpg,Male,79,Frontal,AP,,,,1.0,,,,,,1.0,,,,1.0 +219,CheXpert-v1.0/train/patient00750/study5/view1_frontal.jpg,Male,68,Frontal,AP,,,,1.0,,,,,,1.0,1.0,,,0.0 +220,CheXpert-v1.0/train/patient04462/study73/view1_frontal.jpg,Male,58,Frontal,AP,,,,1.0,,-1.0,,-1.0,,1.0,,,, +221,CheXpert-v1.0/train/patient26424/study2/view1_frontal.jpg,Male,20,Frontal,AP,,,,,,,,,,1.0,,,, +222,CheXpert-v1.0/train/patient13849/study19/view1_frontal.jpg,Male,38,Frontal,AP,,,,1.0,,,,,,1.0,,,,1.0 +223,CheXpert-v1.0/train/patient47557/study1/view1_frontal.jpg,Male,50,Frontal,AP,,,,1.0,,,,,,1.0,,,, +224,CheXpert-v1.0/train/patient20727/study5/view1_frontal.jpg,Male,62,Frontal,AP,,,,1.0,1.0,,,,,1.0,,,,1.0 +225,CheXpert-v1.0/train/patient09259/study3/view1_frontal.jpg,Male,65,Frontal,AP,,,,,1.0,,,,,1.0,,,, +226,CheXpert-v1.0/train/patient09773/study3/view1_frontal.jpg,Male,33,Frontal,PA,,,,,,,,,,1.0,1.0,,,1.0 +227,CheXpert-v1.0/train/patient43542/study2/view1_frontal.jpg,Male,62,Frontal,AP,,,,,,,,,,1.0,,,,1.0 +228,CheXpert-v1.0/train/patient10648/study5/view1_frontal.jpg,Male,51,Frontal,AP,,,,1.0,,,,,,1.0,,,,0.0 +229,CheXpert-v1.0/train/patient30576/study4/view1_frontal.jpg,Male,59,Frontal,AP,,,,1.0,,,-1.0,,-1.0,1.0,1.0,,,1.0 +230,CheXpert-v1.0/train/patient22242/study1/view1_frontal.jpg,Male,62,Frontal,AP,,,,-1.0,,,,,1.0,1.0,-1.0,,,1.0 +231,CheXpert-v1.0/train/patient00142/study10/view1_frontal.jpg,Male,62,Frontal,PA,,,,,,,,,,1.0,,,,1.0 +232,CheXpert-v1.0/train/patient07074/study1/view1_frontal.jpg,Male,79,Frontal,AP,,,,,,,,,,1.0,,,,1.0 +233,CheXpert-v1.0/train/patient04573/study4/view1_frontal.jpg,Male,60,Frontal,PA,,,,,,,,,,1.0,1.0,,, +234,CheXpert-v1.0/train/patient23351/study5/view1_frontal.jpg,Male,38,Frontal,AP,,,1.0,,,,,,,1.0,,,,1.0 +235,CheXpert-v1.0/train/patient34258/study1/view1_frontal.jpg,Male,42,Frontal,AP,,,,,,,,,,1.0,,,,1.0 +236,CheXpert-v1.0/train/patient20733/study7/view1_frontal.jpg,Male,74,Frontal,AP,,,,1.0,,,,,,1.0,1.0,,,0.0 +237,CheXpert-v1.0/train/patient32731/study1/view1_frontal.jpg,Male,68,Frontal,AP,,,,1.0,,,,,-1.0,1.0,,,,1.0 +238,CheXpert-v1.0/train/patient15318/study6/view1_frontal.jpg,Male,24,Frontal,PA,,,,,,,,,,1.0,1.0,,, +239,CheXpert-v1.0/train/patient11022/study2/view1_frontal.jpg,Male,30,Frontal,AP,,,,,,,,,,1.0,,,,1.0 +240,CheXpert-v1.0/train/patient15287/study2/view1_frontal.jpg,Male,54,Frontal,AP,,,,1.0,,,,,,1.0,,,,1.0 +241,CheXpert-v1.0/train/patient09593/study1/view1_frontal.jpg,Male,47,Frontal,AP,,1.0,,1.0,,,,,1.0,1.0,,,,1.0 +242,CheXpert-v1.0/train/patient18382/study5/view1_frontal.jpg,Male,49,Frontal,AP,,,,,,,,,1.0,1.0,1.0,,,1.0 +243,CheXpert-v1.0/train/patient48367/study3/view1_frontal.jpg,Male,57,Frontal,AP,,,1.0,,,1.0,,,1.0,1.0,1.0,,, +244,CheXpert-v1.0/train/patient15243/study4/view1_frontal.jpg,Male,76,Frontal,AP,,,,,,,1.0,,,1.0,,,,1.0 +245,CheXpert-v1.0/train/patient17414/study3/view1_frontal.jpg,Male,81,Frontal,AP,,,,,,,,,,1.0,,,, +246,CheXpert-v1.0/train/patient00826/study9/view1_frontal.jpg,Male,57,Frontal,AP,,,,1.0,,-1.0,,,,1.0,,,,1.0 +247,CheXpert-v1.0/train/patient04667/study9/view1_frontal.jpg,Male,59,Frontal,AP,,,1.0,1.0,,,,,1.0,1.0,1.0,,,1.0 +248,CheXpert-v1.0/train/patient42144/study4/view1_frontal.jpg,Male,85,Frontal,AP,,,,,,,,,,1.0,1.0,,,0.0 +249,CheXpert-v1.0/train/patient38482/study2/view1_frontal.jpg,Male,54,Frontal,AP,,,,,,-1.0,,-1.0,,1.0,,,, +250,CheXpert-v1.0/train/patient11575/study6/view1_frontal.jpg,Male,40,Frontal,AP,,,,,,,,,,1.0,1.0,,, +251,CheXpert-v1.0/train/patient10936/study6/view1_frontal.jpg,Male,78,Frontal,AP,,,,,,1.0,,,,1.0,,,,1.0 +252,CheXpert-v1.0/train/patient41750/study4/view1_frontal.jpg,Male,60,Frontal,AP,,,1.0,,,,-1.0,,-1.0,1.0,,,,1.0 +253,CheXpert-v1.0/train/patient44563/study3/view1_frontal.jpg,Male,82,Frontal,AP,,,,,,0.0,-1.0,,-1.0,1.0,0.0,,,1.0 +254,CheXpert-v1.0/train/patient02530/study1/view1_frontal.jpg,Male,73,Frontal,AP,,,,,,,,,,1.0,,1.0,, +255,CheXpert-v1.0/train/patient09193/study5/view1_frontal.jpg,Male,52,Frontal,PA,,,,1.0,,,,-1.0,-1.0,1.0,1.0,,, +256,CheXpert-v1.0/train/patient10299/study3/view1_frontal.jpg,Male,88,Frontal,AP,,,,1.0,,,-1.0,,-1.0,1.0,1.0,,,1.0 +257,CheXpert-v1.0/train/patient23675/study3/view1_frontal.jpg,Male,51,Frontal,PA,,,,1.0,,,,,,1.0,,,, +258,CheXpert-v1.0/train/patient26955/study1/view1_frontal.jpg,Male,82,Frontal,AP,,,,1.0,,1.0,,,1.0,1.0,,,,1.0 +259,CheXpert-v1.0/train/patient47962/study4/view1_frontal.jpg,Male,56,Frontal,AP,,,1.0,1.0,,-1.0,,-1.0,,1.0,,,,1.0 +260,CheXpert-v1.0/train/patient22065/study4/view1_frontal.jpg,Male,51,Frontal,AP,,,,1.0,,,,,1.0,1.0,,,,1.0 +261,CheXpert-v1.0/train/patient21764/study22/view1_frontal.jpg,Male,74,Frontal,AP,,,,1.0,,,,,,1.0,1.0,,,1.0 +262,CheXpert-v1.0/train/patient39137/study3/view1_frontal.jpg,Male,78,Frontal,AP,,,1.0,1.0,,,-1.0,,-1.0,1.0,1.0,,,1.0 +263,CheXpert-v1.0/train/patient01932/study12/view1_frontal.jpg,Male,28,Frontal,AP,,,,1.0,,-1.0,,,-1.0,1.0,0.0,,,1.0 +264,CheXpert-v1.0/train/patient22784/study12/view1_frontal.jpg,Male,50,Frontal,AP,,,,,,,,,,1.0,,,,1.0 +265,CheXpert-v1.0/train/patient32997/study6/view1_frontal.jpg,Male,59,Frontal,AP,,,,,,1.0,,,1.0,1.0,1.0,,,1.0 +266,CheXpert-v1.0/train/patient20176/study1/view1_frontal.jpg,Male,70,Frontal,AP,,-1.0,,,,,,,1.0,1.0,,,,1.0 +267,CheXpert-v1.0/train/patient33890/study17/view1_frontal.jpg,Male,64,Frontal,AP,,,,,,,,,,1.0,,,, +268,CheXpert-v1.0/train/patient30127/study4/view1_frontal.jpg,Male,56,Frontal,AP,,1.0,,1.0,,,,,,1.0,,,,1.0 +269,CheXpert-v1.0/train/patient30330/study1/view1_frontal.jpg,Male,29,Frontal,PA,,,-1.0,,,,,,,1.0,,,,1.0 +270,CheXpert-v1.0/train/patient53807/study1/view1_frontal.jpg,Male,46,Frontal,AP,,,,1.0,,1.0,,,,1.0,,,,1.0 +271,CheXpert-v1.0/train/patient22019/study1/view1_frontal.jpg,Male,54,Frontal,PA,,,-1.0,-1.0,,0.0,1.0,,-1.0,1.0,,,,0.0 +272,CheXpert-v1.0/train/patient14844/study3/view2_frontal.jpg,Male,65,Frontal,AP,,,,,,1.0,,,,1.0,1.0,,,1.0 +273,CheXpert-v1.0/train/patient02450/study2/view1_frontal.jpg,Male,70,Frontal,AP,,,,1.0,,,,-1.0,,1.0,,,,1.0 +274,CheXpert-v1.0/train/patient08549/study20/view1_frontal.jpg,Male,68,Frontal,AP,,-1.0,,1.0,,,,,,1.0,1.0,1.0,, +275,CheXpert-v1.0/train/patient16153/study4/view1_frontal.jpg,Male,72,Frontal,AP,,,,,,1.0,,,1.0,1.0,1.0,,,1.0 +276,CheXpert-v1.0/train/patient39001/study5/view1_frontal.jpg,Male,78,Frontal,AP,,,,,,,,,1.0,1.0,,,, +277,CheXpert-v1.0/train/patient00170/study2/view1_frontal.jpg,Male,42,Frontal,AP,,,,,,,,,,1.0,,,,1.0 +278,CheXpert-v1.0/train/patient30222/study2/view1_frontal.jpg,Male,65,Frontal,AP,,-1.0,,1.0,,,,,,1.0,,,,0.0 +279,CheXpert-v1.0/train/patient41011/study1/view1_frontal.jpg,Male,51,Frontal,AP,,,1.0,1.0,,1.0,,,1.0,1.0,,,1.0,1.0 +280,CheXpert-v1.0/train/patient60980/study1/view1_frontal.jpg,Male,37,Frontal,AP,,,,,,,,,,1.0,,,,1.0 +281,CheXpert-v1.0/train/patient12926/study4/view1_frontal.jpg,Male,25,Frontal,AP,,,,1.0,,,,,,1.0,1.0,,,1.0 +282,CheXpert-v1.0/train/patient09530/study10/view1_frontal.jpg,Male,35,Frontal,PA,,,,1.0,,,,,,1.0,1.0,,,1.0 +283,CheXpert-v1.0/train/patient15507/study1/view1_frontal.jpg,Male,26,Frontal,AP,,,,,,,0.0,,,1.0,,,0.0,0.0 +284,CheXpert-v1.0/train/patient40209/study6/view1_frontal.jpg,Male,29,Frontal,AP,,,,1.0,,-1.0,,,,1.0,1.0,,,1.0 +285,CheXpert-v1.0/train/patient42579/study1/view1_frontal.jpg,Male,21,Frontal,AP,,,,,,,,,,1.0,,,,1.0 +286,CheXpert-v1.0/train/patient29565/study1/view1_frontal.jpg,Male,20,Frontal,AP,,,,1.0,,,,,,1.0,,,,1.0 +287,CheXpert-v1.0/train/patient59120/study1/view1_frontal.jpg,Male,18,Frontal,AP,,1.0,,,,,,,,1.0,,,, +288,CheXpert-v1.0/train/patient06686/study4/view1_frontal.jpg,Male,81,Frontal,AP,,,-1.0,1.0,,,,,,1.0,,,,1.0 +289,CheXpert-v1.0/train/patient03699/study4/view1_frontal.jpg,Male,50,Frontal,AP,,-1.0,,1.0,,,,,,1.0,1.0,,1.0,1.0 +290,CheXpert-v1.0/train/patient27078/study9/view1_frontal.jpg,Male,37,Frontal,PA,,,,,,1.0,,,,1.0,,,,1.0 +291,CheXpert-v1.0/train/patient53402/study4/view1_frontal.jpg,Male,26,Frontal,AP,,,,1.0,,,,-1.0,-1.0,1.0,1.0,,,1.0 +292,CheXpert-v1.0/train/patient13337/study2/view1_frontal.jpg,Male,76,Frontal,AP,,,,1.0,-1.0,,,,,1.0,,,, +293,CheXpert-v1.0/train/patient11606/study10/view1_frontal.jpg,Male,46,Frontal,PA,,,,1.0,,,,,,1.0,1.0,,, +294,CheXpert-v1.0/train/patient00259/study1/view1_frontal.jpg,Male,90,Frontal,AP,,,,,,,,,,1.0,,,, +295,CheXpert-v1.0/train/patient26973/study9/view1_frontal.jpg,Male,66,Frontal,AP,,,,,,,,,,1.0,1.0,,, +296,CheXpert-v1.0/train/patient49668/study1/view1_frontal.jpg,Male,46,Frontal,AP,,,,,,,,,-1.0,1.0,,,, +297,CheXpert-v1.0/train/patient30913/study8/view2_frontal.jpg,Male,79,Frontal,AP,,,,,,1.0,,,,1.0,,,,1.0 +298,CheXpert-v1.0/train/patient36289/study8/view1_frontal.jpg,Male,21,Frontal,AP,,,,1.0,,,,,,1.0,1.0,,,1.0 +299,CheXpert-v1.0/train/patient35058/study3/view1_frontal.jpg,Male,68,Frontal,AP,,,,,,,,,,1.0,,1.0,,1.0 +300,CheXpert-v1.0/train/patient35203/study24/view1_frontal.jpg,Male,66,Frontal,AP,,,,1.0,,,,,,,1.0,,,1.0 +301,CheXpert-v1.0/train/patient31483/study15/view1_frontal.jpg,Male,71,Frontal,AP,,,,1.0,,,,,1.0,,,,,1.0 +302,CheXpert-v1.0/train/patient30416/study19/view1_frontal.jpg,Male,61,Frontal,AP,,,,1.0,,-1.0,0.0,-1.0,,,1.0,,,1.0 +303,CheXpert-v1.0/train/patient60130/study1/view1_frontal.jpg,Male,50,Frontal,AP,,,1.0,1.0,,,,,-1.0,,1.0,,, +304,CheXpert-v1.0/train/patient05232/study18/view1_frontal.jpg,Male,53,Frontal,AP,,,,1.0,,1.0,,,,,1.0,,,1.0 +305,CheXpert-v1.0/train/patient49197/study1/view1_frontal.jpg,Male,54,Frontal,AP,,,,-1.0,,,,,-1.0,0.0,,,,1.0 +306,CheXpert-v1.0/train/patient15081/study34/view1_frontal.jpg,Male,60,Frontal,AP,,,,,,1.0,,,,,,,,1.0 +307,CheXpert-v1.0/train/patient20789/study7/view1_frontal.jpg,Male,76,Frontal,AP,,,-1.0,1.0,,,,,,0.0,0.0,,,1.0 +308,CheXpert-v1.0/train/patient55977/study1/view1_frontal.jpg,Male,39,Frontal,AP,,,,,,1.0,,,1.0,,,,,1.0 +309,CheXpert-v1.0/train/patient45650/study4/view1_frontal.jpg,Male,71,Frontal,AP,,,1.0,1.0,,,,,,0.0,-1.0,,,1.0 +310,CheXpert-v1.0/train/patient52050/study1/view1_frontal.jpg,Male,81,Frontal,AP,,,,,,1.0,-1.0,,-1.0,,,,,1.0 +311,CheXpert-v1.0/train/patient40462/study2/view1_frontal.jpg,Male,75,Frontal,AP,,,,1.0,,,-1.0,,-1.0,,1.0,,,1.0 +312,CheXpert-v1.0/train/patient14744/study3/view1_frontal.jpg,Male,52,Frontal,AP,,-1.0,,1.0,,,,,,0.0,1.0,,,1.0 +313,CheXpert-v1.0/train/patient12096/study16/view1_frontal.jpg,Male,58,Frontal,AP,,,,,,,1.0,,1.0,0.0,,,, +314,CheXpert-v1.0/train/patient38194/study4/view1_frontal.jpg,Male,65,Frontal,AP,,,,1.0,,,,,,,1.0,,,1.0 +315,CheXpert-v1.0/train/patient48059/study8/view1_frontal.jpg,Male,71,Frontal,AP,,,,,,1.0,,1.0,,,,,, +316,CheXpert-v1.0/train/patient41889/study1/view1_frontal.jpg,Male,66,Frontal,AP,,,,1.0,,1.0,,,,,1.0,,,1.0 +317,CheXpert-v1.0/train/patient29096/study9/view1_frontal.jpg,Male,87,Frontal,PA,,,1.0,-1.0,,,0.0,,-1.0,0.0,0.0,,,1.0 +318,CheXpert-v1.0/train/patient56201/study1/view1_frontal.jpg,Male,67,Frontal,AP,,,,1.0,,,,,0.0,,1.0,,,1.0 +319,CheXpert-v1.0/train/patient30450/study1/view1_frontal.jpg,Male,82,Frontal,AP,,,1.0,,,1.0,,,1.0,,1.0,,,1.0 +320,CheXpert-v1.0/train/patient13193/study5/view2_frontal.jpg,Male,70,Frontal,AP,,,1.0,1.0,,,,,,,1.0,,,1.0 +321,CheXpert-v1.0/train/patient11322/study1/view1_frontal.jpg,Male,76,Frontal,AP,,0.0,,1.0,1.0,,,,,0.0,,,,1.0 +322,CheXpert-v1.0/train/patient08298/study3/view1_frontal.jpg,Male,55,Frontal,AP,,,,1.0,,1.0,,,,,1.0,,,1.0 +323,CheXpert-v1.0/train/patient10288/study1/view1_frontal.jpg,Male,51,Frontal,AP,,,1.0,,,0.0,0.0,,,,,,, +324,CheXpert-v1.0/train/patient15826/study4/view1_frontal.jpg,Male,53,Frontal,AP,,,,,,,,,,,1.0,,, +325,CheXpert-v1.0/train/patient26040/study1/view1_frontal.jpg,Male,42,Frontal,PA,,0.0,0.0,,,,,,,,0.0,1.0,, +326,CheXpert-v1.0/train/patient34454/study7/view1_frontal.jpg,Male,53,Frontal,AP,,,,1.0,,,-1.0,,-1.0,,1.0,,,1.0 +327,CheXpert-v1.0/train/patient08549/study24/view1_frontal.jpg,Male,69,Frontal,AP,,,1.0,1.0,,1.0,,,1.0,,1.0,,1.0, +328,CheXpert-v1.0/train/patient34546/study1/view1_frontal.jpg,Male,58,Frontal,AP,,,-1.0,,,1.0,,,,,1.0,,,1.0 +329,CheXpert-v1.0/train/patient24654/study1/view1_frontal.jpg,Male,35,Frontal,PA,,1.0,,,,,,,,0.0,0.0,,1.0, +330,CheXpert-v1.0/train/patient22921/study1/view1_frontal.jpg,Male,79,Frontal,PA,,,0.0,1.0,,,,,-1.0,0.0,0.0,-1.0,, +331,CheXpert-v1.0/train/patient43382/study3/view1_frontal.jpg,Male,54,Frontal,AP,,,,,,1.0,,,,,1.0,,,1.0 +332,CheXpert-v1.0/train/patient18042/study7/view1_frontal.jpg,Male,54,Frontal,AP,,,,,,1.0,,,,,,,,1.0 +333,CheXpert-v1.0/train/patient46378/study6/view1_frontal.jpg,Male,75,Frontal,AP,,,,,,,,,,,1.0,,,1.0 +334,CheXpert-v1.0/train/patient16944/study1/view1_frontal.jpg,Male,60,Frontal,PA,,,0.0,,,,0.0,,,0.0,0.0,,, +335,CheXpert-v1.0/train/patient08291/study1/view1_frontal.jpg,Male,59,Frontal,AP,,,1.0,,,,-1.0,,-1.0,,1.0,,,1.0 +336,CheXpert-v1.0/train/patient03973/study2/view1_frontal.jpg,Male,71,Frontal,PA,,,,1.0,,,,-1.0,,,,,, +337,CheXpert-v1.0/train/patient19412/study1/view1_frontal.jpg,Male,71,Frontal,PA,,,0.0,1.0,,0.0,,,1.0,,1.0,,, +338,CheXpert-v1.0/train/patient33307/study3/view1_frontal.jpg,Male,70,Frontal,PA,,,1.0,,,0.0,0.0,,,0.0,0.0,,,1.0 +339,CheXpert-v1.0/train/patient30138/study1/view1_frontal.jpg,Male,51,Frontal,AP,1.0,,,,,,,,,0.0,,,,1.0 +340,CheXpert-v1.0/train/patient34631/study1/view1_frontal.jpg,Male,80,Frontal,AP,,,,1.0,,,,,,,1.0,,,1.0 +341,CheXpert-v1.0/train/patient14528/study2/view1_frontal.jpg,Male,83,Frontal,AP,,,,1.0,,-1.0,,-1.0,-1.0,,1.0,,,1.0 +342,CheXpert-v1.0/train/patient37720/study4/view1_frontal.jpg,Male,61,Frontal,AP,,,,1.0,,,,,,,1.0,,,1.0 +343,CheXpert-v1.0/train/patient26048/study1/view1_frontal.jpg,Male,74,Frontal,AP,,1.0,,,,-1.0,,,,,,,-1.0, +344,CheXpert-v1.0/train/patient18525/study1/view1_frontal.jpg,Male,55,Frontal,PA,,1.0,,,,0.0,0.0,,,0.0,0.0,,, +345,CheXpert-v1.0/train/patient11794/study5/view1_frontal.jpg,Male,70,Frontal,AP,,,,,,,,,,,1.0,,, +346,CheXpert-v1.0/train/patient08789/study2/view1_frontal.jpg,Male,56,Frontal,AP,,,1.0,,,1.0,,,,,1.0,,,1.0 +347,CheXpert-v1.0/train/patient47526/study1/view1_frontal.jpg,Male,74,Frontal,AP,1.0,,,,,,,,,0.0,,,,1.0 +348,CheXpert-v1.0/train/patient39432/study2/view1_frontal.jpg,Male,78,Frontal,AP,,,,,,,0.0,,,,,,,1.0 +349,CheXpert-v1.0/train/patient27931/study4/view1_frontal.jpg,Male,68,Frontal,AP,,,,-1.0,,,,,,0.0,1.0,,, +350,CheXpert-v1.0/train/patient13898/study3/view1_frontal.jpg,Male,52,Frontal,AP,,,,1.0,,,,,,0.0,,,,0.0 +351,CheXpert-v1.0/train/patient47024/study4/view1_frontal.jpg,Male,64,Frontal,AP,,,,,,1.0,,,,0.0,,,, +352,CheXpert-v1.0/train/patient11872/study5/view1_frontal.jpg,Male,59,Frontal,AP,,,,,,1.0,,,,,-1.0,,,1.0 +353,CheXpert-v1.0/train/patient41436/study1/view1_frontal.jpg,Male,45,Frontal,AP,,,0.0,,,,,,1.0,0.0,0.0,,,1.0 +354,CheXpert-v1.0/train/patient28806/study5/view1_frontal.jpg,Male,64,Frontal,AP,,,,,,,-1.0,,-1.0,,1.0,,,1.0 +355,CheXpert-v1.0/train/patient38034/study1/view1_frontal.jpg,Male,68,Frontal,AP,,,-1.0,1.0,,1.0,,,,,-1.0,,, +356,CheXpert-v1.0/train/patient25114/study4/view1_frontal.jpg,Male,80,Frontal,AP,,,,1.0,,,,-1.0,,,,,, +357,CheXpert-v1.0/train/patient20173/study20/view1_frontal.jpg,Male,43,Frontal,AP,,,,1.0,,,,1.0,,,1.0,,,1.0 +358,CheXpert-v1.0/train/patient52690/study1/view1_frontal.jpg,Male,84,Frontal,AP,,,0.0,,1.0,,,,,0.0,,,, +359,CheXpert-v1.0/train/patient20982/study1/view1_frontal.jpg,Male,65,Frontal,AP,,0.0,,1.0,,,-1.0,,-1.0,,,,,1.0 +360,CheXpert-v1.0/train/patient29629/study1/view1_frontal.jpg,Male,41,Frontal,AP,,,,,,-1.0,-1.0,,-1.0,,,,, +361,CheXpert-v1.0/train/patient56687/study1/view1_frontal.jpg,Male,79,Frontal,AP,,,,1.0,,1.0,,,,,1.0,,,1.0 +362,CheXpert-v1.0/train/patient06949/study12/view1_frontal.jpg,Male,64,Frontal,AP,1.0,,,,,,,,0.0,,0.0,,,1.0 +363,CheXpert-v1.0/train/patient48778/study4/view1_frontal.jpg,Male,56,Frontal,AP,,,,,,,,,,,1.0,,,1.0 +364,CheXpert-v1.0/train/patient32016/study2/view1_frontal.jpg,Male,53,Frontal,PA,,,,0.0,,1.0,,,,,,,,1.0 +365,CheXpert-v1.0/train/patient33620/study12/view1_frontal.jpg,Male,49,Frontal,AP,,,1.0,,,1.0,,,,,,,, +366,CheXpert-v1.0/train/patient03066/study3/view1_frontal.jpg,Male,61,Frontal,PA,,,1.0,1.0,,0.0,,,,,1.0,,,1.0 +367,CheXpert-v1.0/train/patient64249/study2/view1_frontal.jpg,Male,73,Frontal,AP,,,,1.0,,0.0,-1.0,,-1.0,0.0,1.0,,, +368,CheXpert-v1.0/train/patient35281/study1/view1_frontal.jpg,Male,74,Frontal,AP,,,,1.0,,1.0,,,1.0,0.0,1.0,,,1.0 +369,CheXpert-v1.0/train/patient51043/study1/view1_frontal.jpg,Male,53,Frontal,AP,,,-1.0,,,0.0,0.0,,,,,,,1.0 +370,CheXpert-v1.0/train/patient21529/study27/view1_frontal.jpg,Male,61,Frontal,AP,,,1.0,,,1.0,,,,,,,,1.0 +371,CheXpert-v1.0/train/patient26199/study30/view1_frontal.jpg,Male,56,Frontal,AP,,,,,1.0,1.0,,,1.0,,1.0,,,1.0 +372,CheXpert-v1.0/train/patient60582/study1/view1_frontal.jpg,Male,81,Frontal,AP,1.0,0.0,,,,0.0,0.0,,,,0.0,,,1.0 +373,CheXpert-v1.0/train/patient43105/study2/view1_frontal.jpg,Male,80,Frontal,AP,,0.0,,,,,,,,0.0,,,, +374,CheXpert-v1.0/train/patient60515/study1/view1_frontal.jpg,Male,85,Frontal,AP,,,,,,0.0,,0.0,1.0,,,,, +375,CheXpert-v1.0/train/patient31550/study2/view1_frontal.jpg,Male,69,Frontal,AP,,,,,,1.0,,,1.0,,,,,1.0 +376,CheXpert-v1.0/train/patient32429/study1/view1_frontal.jpg,Male,66,Frontal,AP,,,,1.0,,,,,,0.0,,,,1.0 +377,CheXpert-v1.0/train/patient36725/study5/view1_frontal.jpg,Male,43,Frontal,AP,,,,1.0,,1.0,,,1.0,,,,,1.0 +378,CheXpert-v1.0/train/patient09193/study11/view1_frontal.jpg,Male,52,Frontal,AP,,,,1.0,,,1.0,,,,1.0,,,1.0 +379,CheXpert-v1.0/train/patient14528/study6/view1_frontal.jpg,Male,83,Frontal,AP,,,,1.0,,,,,,0.0,0.0,,,1.0 +380,CheXpert-v1.0/train/patient17235/study2/view1_frontal.jpg,Male,38,Frontal,AP,,,,1.0,,,,,,0.0,1.0,,,1.0 +381,CheXpert-v1.0/train/patient54624/study1/view1_frontal.jpg,Male,73,Frontal,AP,,,1.0,,,,,,,,1.0,,, +382,CheXpert-v1.0/train/patient20347/study1/view1_frontal.jpg,Male,79,Frontal,PA,,0.0,,1.0,,0.0,,,,,0.0,,, +383,CheXpert-v1.0/train/patient16452/study3/view1_frontal.jpg,Male,82,Frontal,AP,,,,1.0,,,-1.0,,-1.0,,1.0,,, +384,CheXpert-v1.0/train/patient39810/study2/view1_frontal.jpg,Male,57,Frontal,AP,,,,1.0,,-1.0,,-1.0,,,,,,1.0 +385,CheXpert-v1.0/train/patient35972/study4/view1_frontal.jpg,Male,43,Frontal,AP,,,,1.0,,,,-1.0,,,,,,1.0 +386,CheXpert-v1.0/train/patient30159/study5/view1_frontal.jpg,Male,74,Frontal,AP,,,,1.0,1.0,,,,,,1.0,,,1.0 +387,CheXpert-v1.0/train/patient37043/study6/view1_frontal.jpg,Male,56,Frontal,AP,,-1.0,,0.0,,,0.0,,,,0.0,,,0.0 +388,CheXpert-v1.0/train/patient03537/study6/view1_frontal.jpg,Male,66,Frontal,AP,,0.0,,1.0,,-1.0,1.0,-1.0,,,,,,1.0 +389,CheXpert-v1.0/train/patient37912/study17/view1_frontal.jpg,Male,41,Frontal,AP,1.0,,,,,,0.0,,,0.0,0.0,,, +390,CheXpert-v1.0/train/patient49948/study2/view1_frontal.jpg,Male,54,Frontal,AP,,,,1.0,,,,,,,1.0,,,1.0 +391,CheXpert-v1.0/train/patient37826/study1/view1_frontal.jpg,Male,61,Frontal,AP,,,,,,1.0,-1.0,,-1.0,,1.0,,,1.0 +392,CheXpert-v1.0/train/patient31733/study1/view1_frontal.jpg,Male,62,Frontal,AP,,,,,,,-1.0,1.0,-1.0,0.0,,,,1.0 +393,CheXpert-v1.0/train/patient34903/study29/view1_frontal.jpg,Male,72,Frontal,AP,,,,1.0,,1.0,,,,0.0,1.0,,,1.0 +394,CheXpert-v1.0/train/patient05134/study3/view1_frontal.jpg,Male,63,Frontal,AP,,,,1.0,,1.0,,1.0,,,,,, +395,CheXpert-v1.0/train/patient02362/study6/view1_frontal.jpg,Male,78,Frontal,AP,,,,1.0,,1.0,1.0,,,,,,,1.0 +396,CheXpert-v1.0/train/patient39300/study4/view1_frontal.jpg,Male,63,Frontal,AP,1.0,,0.0,0.0,,0.0,,,,,0.0,,,1.0 +397,CheXpert-v1.0/train/patient41430/study2/view1_frontal.jpg,Male,82,Frontal,AP,,,,1.0,,1.0,,,,,,,, +398,CheXpert-v1.0/train/patient40304/study4/view1_frontal.jpg,Male,58,Frontal,AP,,,,1.0,,1.0,,,,,1.0,,,1.0 +399,CheXpert-v1.0/train/patient12823/study1/view1_frontal.jpg,Male,30,Frontal,PA,,,,1.0,1.0,0.0,0.0,,-1.0,,0.0,,, diff --git a/chexpert_SexBinary_PTX_final_test_df.csv b/chexpert_SexBinary_PTX_final_test_df.csv new file mode 100644 index 0000000..6dc725d --- /dev/null +++ b/chexpert_SexBinary_PTX_final_test_df.csv @@ -0,0 +1,101 @@ +,Path,Sex,Age,Frontal/Lateral,AP/PA,No Finding,Enlarged Cardiomediastinum,Cardiomegaly,Lung Opacity,Lung Lesion,Edema,Consolidation,Pneumonia,Atelectasis,Pneumothorax,Pleural Effusion,Pleural Other,Fracture,Support Devices +0,CheXpert-v1.0/train/patient12317/study4/view1_frontal.jpg,Female,44,Frontal,AP,,,,1.0,,,,,,1.0,1.0,,,1.0 +1,CheXpert-v1.0/train/patient06516/study10/view1_frontal.jpg,Female,62,Frontal,PA,,,,-1.0,,,,,,1.0,,,, +2,CheXpert-v1.0/train/patient42408/study1/view1_frontal.jpg,Female,51,Frontal,AP,,,,,,1.0,,,,1.0,1.0,,, +3,CheXpert-v1.0/train/patient36521/study5/view1_frontal.jpg,Female,60,Frontal,AP,,,,,,,,,,1.0,1.0,,, +4,CheXpert-v1.0/train/patient41624/study12/view1_frontal.jpg,Female,64,Frontal,AP,,,,,,,,,,1.0,1.0,,,0.0 +5,CheXpert-v1.0/train/patient10128/study2/view1_frontal.jpg,Female,63,Frontal,AP,,,,,,,,,,1.0,1.0,,,0.0 +6,CheXpert-v1.0/train/patient10892/study4/view1_frontal.jpg,Female,76,Frontal,AP,,,,-1.0,,,,,1.0,1.0,,,,1.0 +7,CheXpert-v1.0/train/patient03499/study3/view1_frontal.jpg,Female,67,Frontal,PA,,,,,,,,,,1.0,1.0,,,1.0 +8,CheXpert-v1.0/train/patient57441/study2/view1_frontal.jpg,Female,55,Frontal,AP,,,,,,,,,,1.0,,,, +9,CheXpert-v1.0/train/patient05340/study3/view1_frontal.jpg,Female,23,Frontal,AP,,,1.0,,,,,,1.0,1.0,,,, +10,CheXpert-v1.0/train/patient00467/study3/view1_frontal.jpg,Female,23,Frontal,PA,,,,1.0,,,,,,1.0,,,,1.0 +11,CheXpert-v1.0/train/patient37772/study3/view1_frontal.jpg,Female,48,Frontal,AP,,,,1.0,,,-1.0,,-1.0,1.0,1.0,,, +12,CheXpert-v1.0/train/patient29732/study34/view1_frontal.jpg,Female,42,Frontal,AP,,,,,,,,,,1.0,,,,1.0 +13,CheXpert-v1.0/train/patient62079/study1/view1_frontal.jpg,Female,70,Frontal,AP,,,,,,,,,,1.0,,,, +14,CheXpert-v1.0/train/patient35530/study4/view1_frontal.jpg,Female,48,Frontal,AP,,,,1.0,,,,,-1.0,1.0,,,,1.0 +15,CheXpert-v1.0/train/patient30991/study7/view1_frontal.jpg,Female,24,Frontal,AP,,,,1.0,,,,,,1.0,0.0,,1.0, +16,CheXpert-v1.0/train/patient29819/study5/view1_frontal.jpg,Female,50,Frontal,AP,,,,,1.0,,,,,1.0,1.0,,,1.0 +17,CheXpert-v1.0/train/patient45816/study2/view1_frontal.jpg,Female,48,Frontal,AP,,,,1.0,,,,,,1.0,,,, +18,CheXpert-v1.0/train/patient63591/study1/view2_frontal.jpg,Female,67,Frontal,AP,,,,1.0,,,,,1.0,1.0,,,, +19,CheXpert-v1.0/train/patient06516/study14/view1_frontal.jpg,Female,62,Frontal,AP,,,,1.0,,,,,,1.0,1.0,,,0.0 +20,CheXpert-v1.0/train/patient59733/study1/view1_frontal.jpg,Female,73,Frontal,AP,,,,1.0,,,,,,1.0,,,, +21,CheXpert-v1.0/train/patient37561/study1/view1_frontal.jpg,Female,61,Frontal,AP,,,,,,,,,,1.0,1.0,,,1.0 +22,CheXpert-v1.0/train/patient14094/study1/view1_frontal.jpg,Female,40,Frontal,AP,,,,1.0,,-1.0,,,-1.0,1.0,1.0,,, +23,CheXpert-v1.0/train/patient43458/study2/view1_frontal.jpg,Female,32,Frontal,AP,,0.0,0.0,,,,,,,1.0,,,,1.0 +24,CheXpert-v1.0/train/patient36646/study1/view1_frontal.jpg,Female,63,Frontal,AP,,,,,,1.0,,,,1.0,,,, +25,CheXpert-v1.0/train/patient06214/study2/view1_frontal.jpg,Female,25,Frontal,PA,,1.0,,,,,,,,0.0,0.0,,,1.0 +26,CheXpert-v1.0/train/patient06204/study6/view1_frontal.jpg,Female,48,Frontal,AP,,,,1.0,,1.0,,,1.0,,,,,1.0 +27,CheXpert-v1.0/train/patient18172/study1/view1_frontal.jpg,Female,80,Frontal,AP,1.0,,,,,,,,,0.0,,,,1.0 +28,CheXpert-v1.0/train/patient10791/study3/view1_frontal.jpg,Female,38,Frontal,AP,,-1.0,,,1.0,,0.0,,,0.0,0.0,,,1.0 +29,CheXpert-v1.0/train/patient17660/study2/view1_frontal.jpg,Female,35,Frontal,AP,,-1.0,,1.0,,,-1.0,,,0.0,,,, +30,CheXpert-v1.0/train/patient00467/study16/view1_frontal.jpg,Female,23,Frontal,AP,,,,,,,,,0.0,0.0,,,,1.0 +31,CheXpert-v1.0/train/patient34404/study5/view1_frontal.jpg,Female,72,Frontal,AP,,,,,,1.0,,,1.0,,1.0,,,1.0 +32,CheXpert-v1.0/train/patient50112/study1/view1_frontal.jpg,Female,40,Frontal,AP,,0.0,0.0,,,1.0,,,1.0,0.0,,,,1.0 +33,CheXpert-v1.0/train/patient39509/study2/view1_frontal.jpg,Female,71,Frontal,AP,,,,,,,0.0,,,0.0,0.0,,,1.0 +34,CheXpert-v1.0/train/patient26526/study11/view1_frontal.jpg,Female,61,Frontal,AP,,,,1.0,,1.0,,-1.0,,,1.0,,,1.0 +35,CheXpert-v1.0/train/patient10282/study2/view1_frontal.jpg,Female,61,Frontal,AP,,,1.0,1.0,,1.0,,-1.0,-1.0,,,,,1.0 +36,CheXpert-v1.0/train/patient40247/study27/view1_frontal.jpg,Female,56,Frontal,AP,,,,,,1.0,,,1.0,,1.0,,,1.0 +37,CheXpert-v1.0/train/patient49886/study3/view1_frontal.jpg,Female,25,Frontal,AP,,,,,,1.0,,,1.0,,1.0,,,1.0 +38,CheXpert-v1.0/train/patient39094/study2/view1_frontal.jpg,Female,66,Frontal,AP,,,1.0,1.0,,0.0,,,,,0.0,,,1.0 +39,CheXpert-v1.0/train/patient26169/study28/view1_frontal.jpg,Female,63,Frontal,AP,,,1.0,1.0,,1.0,,,,,1.0,,,1.0 +40,CheXpert-v1.0/train/patient41059/study6/view1_frontal.jpg,Female,59,Frontal,AP,,,,1.0,,,,,,,1.0,,,1.0 +41,CheXpert-v1.0/train/patient37019/study2/view1_frontal.jpg,Female,72,Frontal,AP,,-1.0,,1.0,,,,,,0.0,,1.0,,1.0 +42,CheXpert-v1.0/train/patient47326/study7/view1_frontal.jpg,Female,44,Frontal,AP,,,,,,,,,1.0,,1.0,,,1.0 +43,CheXpert-v1.0/train/patient37361/study1/view1_frontal.jpg,Female,90,Frontal,AP,,1.0,,-1.0,,1.0,,,,,,,, +44,CheXpert-v1.0/train/patient38162/study2/view1_frontal.jpg,Female,45,Frontal,AP,,,,1.0,,1.0,,,,0.0,1.0,,,1.0 +45,CheXpert-v1.0/train/patient40200/study2/view1_frontal.jpg,Female,77,Frontal,AP,,,,1.0,,,-1.0,,-1.0,,1.0,,,1.0 +46,CheXpert-v1.0/train/patient03960/study4/view1_frontal.jpg,Female,87,Frontal,AP,,,,,,,-1.0,,-1.0,,,,0.0, +47,CheXpert-v1.0/train/patient05930/study4/view1_frontal.jpg,Female,35,Frontal,AP,,,,1.0,,-1.0,,-1.0,,,,,,1.0 +48,CheXpert-v1.0/train/patient22072/study2/view1_frontal.jpg,Female,72,Frontal,AP,,1.0,,1.0,,,,,1.0,0.0,0.0,,,1.0 +49,CheXpert-v1.0/train/patient13530/study32/view1_frontal.jpg,Female,37,Frontal,AP,,,1.0,1.0,,-1.0,,,,,1.0,,,1.0 +50,CheXpert-v1.0/train/patient56187/study2/view1_frontal.jpg,Male,62,Frontal,AP,,,,,,,,,,1.0,1.0,,, +51,CheXpert-v1.0/train/patient47880/study1/view1_frontal.jpg,Male,57,Frontal,AP,,,,,,,,,,1.0,,,1.0, +52,CheXpert-v1.0/train/patient21240/study18/view1_frontal.jpg,Male,68,Frontal,AP,,,,1.0,,,,,,1.0,,,,1.0 +53,CheXpert-v1.0/train/patient05023/study3/view1_frontal.jpg,Male,27,Frontal,AP,,,,,,,,,1.0,1.0,1.0,,,0.0 +54,CheXpert-v1.0/train/patient40892/study6/view1_frontal.jpg,Male,54,Frontal,AP,,,,,,-1.0,,,,1.0,,,,1.0 +55,CheXpert-v1.0/train/patient38040/study10/view1_frontal.jpg,Male,26,Frontal,AP,,,,1.0,,,,,,1.0,,,,1.0 +56,CheXpert-v1.0/train/patient12556/study10/view1_frontal.jpg,Male,50,Frontal,AP,,,,1.0,,,,,,1.0,1.0,,, +57,CheXpert-v1.0/train/patient48267/study1/view1_frontal.jpg,Male,57,Frontal,AP,,,,,,,,,,1.0,,,,0.0 +58,CheXpert-v1.0/train/patient49504/study3/view1_frontal.jpg,Male,19,Frontal,AP,,,,,,1.0,,,,1.0,,,,0.0 +59,CheXpert-v1.0/train/patient03122/study34/view1_frontal.jpg,Male,69,Frontal,AP,,,,,,,,,,1.0,,,,1.0 +60,CheXpert-v1.0/train/patient17506/study1/view1_frontal.jpg,Male,50,Frontal,PA,,,,,,,,,1.0,1.0,1.0,,, +61,CheXpert-v1.0/train/patient13101/study3/view1_frontal.jpg,Male,70,Frontal,AP,,,1.0,1.0,,,,,,1.0,,,,0.0 +62,CheXpert-v1.0/train/patient46267/study4/view1_frontal.jpg,Male,34,Frontal,AP,,,,1.0,,,,,,1.0,1.0,,,1.0 +63,CheXpert-v1.0/train/patient04135/study30/view1_frontal.jpg,Male,43,Frontal,AP,,,0.0,,1.0,,-1.0,,-1.0,1.0,1.0,,,1.0 +64,CheXpert-v1.0/train/patient24008/study2/view1_frontal.jpg,Male,65,Frontal,PA,,,,1.0,,,,,,1.0,,,,1.0 +65,CheXpert-v1.0/train/patient23118/study5/view1_frontal.jpg,Male,79,Frontal,AP,,,,1.0,,,,,,1.0,1.0,,,1.0 +66,CheXpert-v1.0/train/patient29779/study8/view1_frontal.jpg,Male,20,Frontal,AP,,,1.0,1.0,,,,,,1.0,1.0,,,1.0 +67,CheXpert-v1.0/train/patient04083/study16/view1_frontal.jpg,Male,40,Frontal,AP,,1.0,1.0,,,,,,,1.0,,,,1.0 +68,CheXpert-v1.0/train/patient24008/study5/view1_frontal.jpg,Male,65,Frontal,PA,,,,-1.0,,,-1.0,,-1.0,1.0,1.0,,,1.0 +69,CheXpert-v1.0/train/patient45421/study1/view1_frontal.jpg,Male,62,Frontal,AP,,0.0,,1.0,,,,,,1.0,,,-1.0, +70,CheXpert-v1.0/train/patient40111/study6/view1_frontal.jpg,Male,45,Frontal,AP,,,1.0,1.0,,,,,,1.0,1.0,,,1.0 +71,CheXpert-v1.0/train/patient04951/study9/view2_frontal.jpg,Male,67,Frontal,PA,,,,-1.0,,,,,,1.0,,,,0.0 +72,CheXpert-v1.0/train/patient05474/study1/view1_frontal.jpg,Male,77,Frontal,PA,,,0.0,,,,,,,1.0,,,,1.0 +73,CheXpert-v1.0/train/patient38584/study1/view2_frontal.jpg,Male,56,Frontal,AP,,,,1.0,,,,,,1.0,1.0,1.0,1.0,1.0 +74,CheXpert-v1.0/train/patient43922/study7/view1_frontal.jpg,Male,66,Frontal,AP,,,,1.0,,,,,,1.0,1.0,,, +75,CheXpert-v1.0/train/patient08625/study4/view1_frontal.jpg,Male,64,Frontal,AP,1.0,,,,,,,,,0.0,,,,1.0 +76,CheXpert-v1.0/train/patient02937/study1/view1_frontal.jpg,Male,26,Frontal,PA,,,,,,1.0,,,,,,,,1.0 +77,CheXpert-v1.0/train/patient35485/study1/view1_frontal.jpg,Male,58,Frontal,AP,,-1.0,,,,1.0,,,1.0,0.0,,,,1.0 +78,CheXpert-v1.0/train/patient22862/study23/view1_frontal.jpg,Male,67,Frontal,PA,,,-1.0,1.0,,-1.0,-1.0,,,,1.0,,, +79,CheXpert-v1.0/train/patient40151/study7/view1_frontal.jpg,Male,70,Frontal,AP,,,,,,,1.0,,,,1.0,,, +80,CheXpert-v1.0/train/patient04031/study1/view1_frontal.jpg,Male,77,Frontal,AP,,,1.0,1.0,,,-1.0,,-1.0,,1.0,,, +81,CheXpert-v1.0/train/patient02936/study19/view1_frontal.jpg,Male,55,Frontal,AP,,,,,,1.0,,,1.0,,1.0,,,1.0 +82,CheXpert-v1.0/train/patient50001/study4/view1_frontal.jpg,Male,74,Frontal,AP,,,,1.0,,,,,1.0,,1.0,,, +83,CheXpert-v1.0/train/patient03048/study2/view1_frontal.jpg,Male,58,Frontal,PA,,-1.0,,,,0.0,,,,,0.0,,,1.0 +84,CheXpert-v1.0/train/patient12756/study2/view1_frontal.jpg,Male,51,Frontal,AP,,,,1.0,,,,,,-1.0,,,,1.0 +85,CheXpert-v1.0/train/patient32605/study1/view1_frontal.jpg,Male,51,Frontal,PA,,,,,,0.0,,,,,,,, +86,CheXpert-v1.0/train/patient14631/study3/view1_frontal.jpg,Male,83,Frontal,AP,,,,1.0,,,,,,,1.0,,, +87,CheXpert-v1.0/train/patient03072/study5/view1_frontal.jpg,Male,51,Frontal,AP,,,,,,1.0,,,,,,,,1.0 +88,CheXpert-v1.0/train/patient40851/study1/view1_frontal.jpg,Male,82,Frontal,AP,,,,1.0,,1.0,,-1.0,,,0.0,,, +89,CheXpert-v1.0/train/patient43188/study6/view1_frontal.jpg,Male,40,Frontal,AP,,1.0,,1.0,1.0,,1.0,,,0.0,,,,1.0 +90,CheXpert-v1.0/train/patient21174/study24/view1_frontal.jpg,Male,84,Frontal,AP,,,,1.0,,1.0,,,,,1.0,,,1.0 +91,CheXpert-v1.0/train/patient14979/study9/view1_frontal.jpg,Male,64,Frontal,AP,,,,1.0,,1.0,,,,,1.0,,,1.0 +92,CheXpert-v1.0/train/patient03305/study16/view1_frontal.jpg,Male,62,Frontal,AP,,,,,,1.0,,,,,,,,1.0 +93,CheXpert-v1.0/train/patient61346/study1/view1_frontal.jpg,Male,57,Frontal,AP,,1.0,,,,1.0,,,,0.0,,,, +94,CheXpert-v1.0/train/patient31593/study2/view1_frontal.jpg,Male,45,Frontal,AP,,,,,,1.0,,,,,,,,1.0 +95,CheXpert-v1.0/train/patient11550/study1/view1_frontal.jpg,Male,54,Frontal,PA,,,1.0,,,,,,,,0.0,,, +96,CheXpert-v1.0/train/patient41461/study3/view1_frontal.jpg,Male,69,Frontal,AP,,,,1.0,,,,,,,1.0,,, +97,CheXpert-v1.0/train/patient48805/study5/view1_frontal.jpg,Male,76,Frontal,AP,,,,1.0,,,,,,,1.0,,,1.0 +98,CheXpert-v1.0/train/patient27189/study7/view1_frontal.jpg,Male,51,Frontal,AP,,,,1.0,,-1.0,,-1.0,,,,,, +99,CheXpert-v1.0/train/patient20435/study3/view1_frontal.jpg,Male,61,Frontal,AP,,,,,,,,,1.0,,1.0,,,1.0 diff --git a/ddi_attribute_pred.py b/ddi_attribute_pred.py new file mode 100644 index 0000000..66e4d87 --- /dev/null +++ b/ddi_attribute_pred.py @@ -0,0 +1,240 @@ +import traceback +import os +from tqdm import tqdm +import random +import pickle +import numpy as np +from LMM import GPT4VAPI, GeminiAPI, ClaudeAPI +import pandas as pd + +rare_diseases = { + 'subcutaneous-t-cell-lymphoma', 'focal-acral-hyperkeratosis', + 'eccrine-poroma', 'inverted-follicular-keratosis', 'kaposi-sarcoma', + 'metastatic-carcinoma', 'mycosis-fungoides', + 'acquired-digital-fibrokeratoma', 'atypical-spindle-cell-nevus-of-reed', + 'verruciform-xanthoma', 'morphea', 'nevus-lipomatosus-superficialis', + 'pigmented-spindle-cell-nevus-of-reed', 'arteriovenous-hemangioma', + 'syringocystadenoma-papilliferum', 'trichofolliculoma', + 'coccidioidomycosis', 'leukemia-cutis', 'sebaceous-carcinoma', + 'blastic-plasmacytoid-dendritic-cell-neoplasm', 'glomangioma', + 'dermatomyositis', 'cellular-neurothekeoma', 'graft-vs-host-disease', + 'xanthograngioma', 'chondroid-syringoma', 'angioleiomyoma' + } + +def create_demo(fst12, fst56, filter_rare = False, random_seed=141): + dataset_name = "DDI" + demo_frame = pd.read_csv(f"/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/dataset/{dataset_name}/ddi_demo_metadata.csv", index_col=0) + if filter_rare: + demo_frame = demo_frame[~demo_frame.disease.isin(rare_diseases)] + + total_samples = fst12 + fst56 + + fst56_frame = demo_frame[demo_frame.skin_tone == 56] + if len(fst56_frame) < fst56: + print(f"Warning: not enough samples for skin tone 56, taking the max available {len(fst56_frame)}") + fst56_frame = fst56_frame.sample(len(fst56_frame), random_state=random_seed) + else: + fst56_frame = fst56_frame.sample(fst56, random_state=random_seed) + + fst12_frame = demo_frame[demo_frame.skin_tone == 12] + if len(fst12_frame) < fst12: + print(f"Warning: not enough samples for skin tone 12, taking the max available {len(fst12_frame)}") + fst12_frame = fst12_frame.sample(len(fst12_frame), random_state=random_seed) + else: + fst12_frame = fst12_frame.sample(fst12, random_state=random_seed) + + + final_demo_frame = pd.concat([fst56_frame, fst12_frame]) + + if len(final_demo_frame) < total_samples: + print(f"Warning: not enough total samples, taking the max available {len(final_demo_frame)}") + final_demo_frame = final_demo_frame.sample(len(final_demo_frame), random_state=random_seed) + else: + final_demo_frame = final_demo_frame.sample(total_samples, random_state=random_seed) # sample full num to shuffle + return final_demo_frame + +def main( + model, + fst12, + fst56, + num_qns_per_round, + filter_rare = False, + detail="auto", + random_seed=141 +): + +# class_to_idx = {class_name: idx for idx, class_name in enumerate(classes)} + EXP_NAME = f"ddi_fst_{fst12}_{fst56}_{model}_{num_qns_per_round}" + + demo_frame = create_demo(fst12, fst56, random_seed=random_seed) + + dataset_name = "DDI" + test_df = pd.read_csv(f"/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/dataset/{dataset_name}/ddi_test_metadata.csv", index_col=0) + if filter_rare: + test_df = test_df[~test_df.disease.isin(rare_diseases)] + + if model.startswith("gpt") or model.startswith("o1"): + api = GPT4VAPI(model=model, detail=detail) + elif model.startswith("Gemini"): + assert model == "Gemini1.5" + api = GeminiAPI() + elif model.startswith("claude"): + api = ClaudeAPI() + print(EXP_NAME, f"test size = {len(test_df)}") + + # create demo_examples from my demo_frame + # list of tuples + # i[0] = (path_to_image, class name) + demo_paths = [] + demo_labels = [] + for i,row in demo_frame.iterrows(): + ans_choice = "B" if row.skin_tone == 56 else "A" + demo_paths.append('/home/groups/roxanad/ddi/' + row.DDI_file) + demo_labels.append(ans_choice) + demo_examples = list(zip(demo_paths, demo_labels)) + + # Load existing results + if os.path.isfile(f"{EXP_NAME}.pkl"): + with open(f"{EXP_NAME}.pkl", "rb") as f: + results = pickle.load(f) + else: + results = {} + + test_df = test_df.sample(frac=1, random_state=random_seed) # Shuffle the test set + for start_idx in tqdm(range(0, len(test_df), num_qns_per_round), desc=EXP_NAME): + end_idx = min(len(test_df), start_idx + num_qns_per_round) + +# random.shuffle(demo_examples) already shuffled so don't need to do this + prompt = "Please respond with the following format for each question, in the form of a single capital letter specifying which label best describes the image. Do not deviate from the format, because it will be automatically parsed." + image_paths = [ + i[0] for i in demo_examples + ] + for demo in demo_examples: + prompt += f"""<>Given the image above, answer the following question using the specified format. +Question: What skin tone does the person in the image have? +Choices: A. White, B. Black or Brown +Answer Choice: {demo[1]} +""" + qns_idx = [] + for idx, i in enumerate(test_df.iloc[start_idx:end_idx].itertuples()): + qns_idx.append(i.Index) + image_paths.append('/home/groups/roxanad/ddi/' + i.DDI_file) + qn_idx = idx + 1 + + prompt += f"""<>Given the image above, answer the following question using the specified format. +Question {qn_idx}: What skin tone does the person in the image have? +Choices {qn_idx}: A. White, B. Black or Brown + +""" + for i in range(start_idx, end_idx): + qn_idx = i - start_idx + 1 + prompt += f"""Please respond with the following format for each question: +---BEGIN FORMAT TEMPLATE FOR QUESTION {qn_idx}--- +Answer Choice {qn_idx}: [Your Answer Choice Here for Question {qn_idx}] +Confidence Score {qn_idx}: [Your Numerical Prediction Confidence Score Here From 0 To 1 for Question {qn_idx}] +---END FORMAT TEMPLATE FOR QUESTION {qn_idx}--- + +Do not deviate from the above format. Repeat the format template for the answer.""" + qns_id = str(qns_idx) + for retry in range(3): + if ( + (qns_id in results) + and (not results[qns_id][0].startswith("ERROR")) + and ( + f"END FORMAT TEMPLATE FOR QUESTION {end_idx-start_idx}" + in results[qns_id][0] + ) + ): # Skip if results exist and successful + continue + + try: + for retry in range(3): + if ( + (qns_id in results) + and (not results[qns_id][0].startswith("ERROR")) + and ( + f"END FORMAT TEMPLATE FOR QUESTION {end_idx-start_idx}" + in results[qns_id][0] + ) + ): # Skip if results exist and successful + continue + + try: + res = api( + prompt, + image_paths=image_paths, + real_call=True, + max_tokens=60 * num_qns_per_round, + ) + + except Exception as e: + res = f"ERROR!!!! {traceback.format_exc()}" + except KeyboardInterrupt: + previous_usage = results.get("token_usage", (0, 0, 0)) + total_usage = tuple( + a + b for a, b in zip(previous_usage, api.token_usage) + ) + results["token_usage"] = total_usage + with open(f"{EXP_NAME}.pkl", "wb") as f: + pickle.dump(results, f) + exit() + except Exception as e: + res = f"ERROR!!!! {traceback.format_exc()}" + except KeyboardInterrupt: + previous_usage = results.get("token_usage", (0, 0, 0)) + total_usage = tuple( + a + b for a, b in zip(previous_usage, api.token_usage) + ) + results["token_usage"] = total_usage + with open(f"{EXP_NAME}.pkl", "wb") as f: + pickle.dump(results, f) + exit() + + print(res) + results[qns_id] = (res,prompt,image_paths) + + # Update token usage and save the results + previous_usage = results.get("token_usage", (0, 0, 0)) + total_usage = tuple(a + b for a, b in zip(previous_usage, api.token_usage)) + results["token_usage"] = total_usage + with open(f"./ddi_results/{EXP_NAME}.pkl", "wb") as f: + pickle.dump(results, f) + + +if __name__ == "__main__": + + # for i in range(0, 40, 8): + # # main("gpt-4o-2024-05-13", + # # i, + # # i, + # # 50,) + + # # main("Gemini1.5", + # # i, i, + # # 50,) + + # main("claude", + # i, i, + # 50,) + + # main("gpt-4o-2024-05-13", + # 0, + # 0, + # 50,) + + # main("Gemini1.5", + # 0, 0, + # 50,) + + # main("claude", + # 0, 0, + # 50,) + + for model in ["Gemini1.5", "gpt-4o-2024-05-13", "claude"]: + for seed in [10, 100, 141]: + for num_malignant in [1, 5, 10, 15, 20, 30,]: + main(model, + num_malignant, + num_malignant, + 50, + random_seed=seed) \ No newline at end of file diff --git a/ddi_demo_metadata.csv b/ddi_demo_metadata.csv new file mode 100644 index 0000000..88ad87a --- /dev/null +++ b/ddi_demo_metadata.csv @@ -0,0 +1,312 @@ +,DDI_ID,DDI_file,skin_tone,malignant,disease +160,161,000161.png,56,False,seborrheic-keratosis-irritated +151,152,000152.png,56,False,melanocytic-nevi +245,246,000246.png,56,False,neurofibroma +178,179,000179.png,56,False,dermatofibroma +103,104,000104.png,56,False,seborrheic-keratosis +201,202,000202.png,56,False,acrochordon +247,248,000248.png,56,False,epidermal-cyst +229,230,000230.png,56,False,pyogenic-granuloma +181,182,000182.png,56,False,melanocytic-nevi +135,136,000136.png,56,False,eccrine-poroma +114,115,000115.png,56,False,verruca-vulgaris +31,32,000032.png,56,True,mycosis-fungoides +197,198,000198.png,56,False,nevus-lipomatosus-superficialis +106,107,000107.png,56,False,inverted-follicular-keratosis +180,181,000181.png,56,False,blue-nevus +154,155,000155.png,56,False,melanocytic-nevi +60,61,000061.png,56,False,acrochordon +78,79,000079.png,56,False,melanocytic-nevi +59,60,000060.png,56,False,acrochordon +164,165,000165.png,56,True,metastatic-carcinoma +148,149,000149.png,56,False,hematoma +238,239,000239.png,56,False,epidermal-cyst +22,23,000023.png,56,True,mycosis-fungoides +210,211,000211.png,56,False,epidermal-cyst +67,68,000068.png,56,True,mycosis-fungoides +211,212,000212.png,56,False,acne-cystic +157,158,000158.png,56,True,squamous-cell-carcinoma-in-situ +219,220,000220.png,56,False,condyloma-accuminatum +224,225,000225.png,56,False,dermatofibroma +64,65,000065.png,56,True,mycosis-fungoides +26,27,000027.png,56,True,mycosis-fungoides +142,143,000143.png,56,False,verruca-vulgaris +195,196,000196.png,56,False,verruca-vulgaris +168,169,000169.png,56,True,squamous-cell-carcinoma +132,133,000133.png,56,False,acrochordon +105,106,000106.png,56,False,hyperpigmentation +237,238,000238.png,56,False,melanocytic-nevi +28,29,000029.png,56,True,mycosis-fungoides +174,175,000175.png,56,False,dermatofibroma +139,140,000140.png,56,False,acrochordon +220,221,000221.png,56,False,epidermal-cyst +111,112,000112.png,56,False,dermatofibroma +159,160,000160.png,56,False,seborrheic-keratosis +77,78,000078.png,56,False,epidermal-nevus +123,124,000124.png,56,False,epidermal-cyst +124,125,000125.png,56,True,kaposi-sarcoma +119,120,000120.png,56,False,melanocytic-nevi +131,132,000132.png,56,False,eccrine-poroma +199,200,000200.png,56,False,pyogenic-granuloma +130,131,000131.png,56,False,verruca-vulgaris +216,217,000217.png,56,False,eczema-spongiotic-dermatitis +231,232,000232.png,56,False,eczema-spongiotic-dermatitis +110,111,000111.png,56,False,seborrheic-keratosis +6,7,000007.png,56,True,melanoma-acral-lentiginous +102,103,000103.png,56,False,epidermal-cyst +75,76,000076.png,56,False,verruca-vulgaris +55,56,000056.png,56,False,lipoma +128,129,000129.png,56,False,seborrheic-keratosis +82,83,000083.png,56,False,epidermal-cyst +226,227,000227.png,56,False,melanocytic-nevi +95,96,000096.png,56,False,pyogenic-granuloma +206,207,000207.png,56,False,melanocytic-nevi +173,174,000174.png,56,False,seborrheic-keratosis +158,159,000159.png,56,False,melanocytic-nevi +34,35,000035.png,56,True,mycosis-fungoides +24,25,000025.png,56,True,subcutaneous-t-cell-lymphoma +20,21,000021.png,56,True,mycosis-fungoides +58,59,000059.png,56,False,verruca-vulgaris +144,145,000145.png,56,False,melanocytic-nevi +134,135,000135.png,56,False,angioleiomyoma +129,130,000130.png,56,False,benign-keratosis +87,88,000088.png,56,False,melanocytic-nevi +205,206,000206.png,56,False,melanocytic-nevi +227,228,000228.png,56,False,morphea +209,210,000210.png,56,False,epidermal-cyst +133,134,000134.png,56,False,acrochordon +146,147,000147.png,56,False,acrochordon +149,150,000150.png,56,False,epidermal-cyst +21,22,000022.png,56,True,mycosis-fungoides +92,93,000093.png,56,False,neurofibroma +239,240,000240.png,56,False,inverted-follicular-keratosis +57,58,000058.png,56,False,blue-nevus +250,251,000251.png,56,False,verruca-vulgaris +222,223,000223.png,56,False,verruca-vulgaris +233,234,000234.png,56,False,lipoma +30,31,000031.png,56,True,mycosis-fungoides +118,119,000119.png,56,False,prurigo-nodularis +94,95,000095.png,56,False,nevus-lipomatosus-superficialis +112,113,000113.png,56,False,trichofolliculoma +213,214,000214.png,56,False,neurofibroma +127,128,000128.png,56,False,scar +100,101,000101.png,56,False,melanocytic-nevi +208,209,000209.png,56,False,verruca-vulgaris +192,193,000193.png,56,False,verruca-vulgaris +202,203,000203.png,56,False,seborrheic-keratosis +234,235,000235.png,56,False,epidermal-cyst +23,24,000024.png,56,True,mycosis-fungoides +162,163,000163.png,56,False,neurofibroma +3,4,000004.png,56,True,squamous-cell-carcinoma-in-situ +68,69,000069.png,56,True,mycosis-fungoides +74,75,000075.png,56,False,epidermal-cyst +200,201,000201.png,56,False,seborrheic-keratosis +62,63,000063.png,56,False,wart +54,55,000055.png,56,False,hyperpigmentation +248,249,000249.png,56,False,abrasions-ulcerations-and-physical-injuries +108,109,000109.png,56,False,melanocytic-nevi +29,30,000030.png,56,True,mycosis-fungoides +177,178,000178.png,56,False,seborrheic-keratosis +176,177,000177.png,56,False,verruca-vulgaris +145,146,000146.png,56,False,acrochordon +143,144,000144.png,56,False,epidermal-cyst +165,166,000166.png,56,True,squamous-cell-carcinoma-in-situ +184,185,000185.png,56,False,eccrine-poroma +104,105,000105.png,56,False,seborrheic-keratosis +169,170,000170.png,56,False,pyogenic-granuloma +96,97,000097.png,56,False,benign-keratosis +16,17,000017.png,56,True,mycosis-fungoides +117,118,000118.png,56,False,lymphocytic-infiltrations +10,11,000011.png,56,True,squamous-cell-carcinoma +215,216,000216.png,56,False,verruciform-xanthoma +182,183,000183.png,56,False,melanocytic-nevi +212,213,000213.png,56,False,neurofibroma +241,242,000242.png,56,False,molluscum-contagiosum +207,208,000208.png,56,False,arteriovenous-hemangioma +191,192,000192.png,56,False,seborrheic-keratosis +194,195,000195.png,56,False,melanocytic-nevi +107,108,000108.png,56,False,onychomycosis +86,87,000087.png,56,False,pyogenic-granuloma +89,90,000090.png,56,False,abrasions-ulcerations-and-physical-injuries +83,84,000084.png,56,False,trichilemmoma +115,116,000116.png,56,False,melanocytic-nevi +1,2,000002.png,56,True,melanoma-in-situ +150,151,000151.png,56,False,benign-keratosis +167,168,000168.png,56,False,seborrheic-keratosis +246,247,000247.png,56,False,seborrheic-keratosis +27,28,000028.png,56,True,mycosis-fungoides +156,157,000157.png,56,False,verruca-vulgaris +138,139,000139.png,56,False,dermatofibroma +18,19,000019.png,56,True,mycosis-fungoides +91,92,000092.png,56,False,melanocytic-nevi +32,33,000033.png,56,True,mycosis-fungoides +15,16,000016.png,56,True,mycosis-fungoides +79,80,000080.png,56,False,melanocytic-nevi +141,142,000142.png,56,False,seborrheic-keratosis-irritated +7,8,000008.png,56,True,melanoma-in-situ +11,12,000012.png,56,True,squamous-cell-carcinoma-keratoacanthoma +19,20,000020.png,56,True,mycosis-fungoides +171,172,000172.png,56,False,seborrheic-keratosis +85,86,000086.png,56,True,mycosis-fungoides +93,94,000094.png,56,False,syringocystadenoma-papilliferum +25,26,000026.png,56,True,mycosis-fungoides +17,18,000018.png,56,True,mycosis-fungoides +166,167,000167.png,56,False,folliculitis +101,102,000102.png,56,False,foreign-body-granuloma +147,148,000148.png,56,False,keloid +330,331,000331.png,12,False,seborrheic-keratosis +346,347,000347.png,12,False,verruca-vulgaris +431,432,000432.png,12,False,seborrheic-keratosis +313,314,000314.png,12,False,acrochordon +261,262,000262.png,12,True,nodular-melanoma-(nm) +340,341,000341.png,12,False,verruca-vulgaris +438,439,000439.png,12,False,melanocytic-nevi +308,309,000309.png,12,False,melanocytic-nevi +307,308,000308.png,12,False,melanocytic-nevi +331,332,000332.png,12,False,seborrheic-keratosis +301,302,000302.png,12,True,squamous-cell-carcinoma-in-situ +367,368,000368.png,12,True,mycosis-fungoides +415,416,000416.png,12,False,seborrheic-keratosis +436,437,000437.png,12,False,seborrheic-keratosis-irritated +420,421,000421.png,12,False,seborrheic-keratosis-irritated +391,392,000392.png,12,False,dermatofibroma +372,373,000373.png,12,False,neurofibroma +375,376,000376.png,12,False,neurofibroma +349,350,000350.png,12,False,verruca-vulgaris +267,268,000268.png,12,False,melanocytic-nevi +278,279,000279.png,12,False,melanocytic-nevi +266,267,000267.png,12,False,melanocytic-nevi +388,389,000389.png,12,False,eccrine-poroma +343,344,000344.png,12,False,verruca-vulgaris +429,430,000430.png,12,False,epidermal-cyst +73,74,000074.png,12,True,squamous-cell-carcinoma-in-situ +403,404,000404.png,12,False,dysplastic-nevus +272,273,000273.png,12,False,melanocytic-nevi +400,401,000401.png,12,False,molluscum-contagiosum +327,328,000328.png,12,False,melanocytic-nevi +412,413,000413.png,12,False,epidermal-cyst +328,329,000329.png,12,False,seborrheic-keratosis +269,270,000270.png,12,False,melanocytic-nevi +251,252,000252.png,12,False,epidermal-cyst +394,395,000395.png,12,False,molluscum-contagiosum +387,388,000388.png,12,True,kaposi-sarcoma +406,407,000407.png,12,True,basal-cell-carcinoma-nodular +317,318,000318.png,12,False,melanocytic-nevi +300,301,000301.png,12,True,squamous-cell-carcinoma-in-situ +351,352,000352.png,12,False,verruca-vulgaris +253,254,000254.png,12,False,verruca-vulgaris +369,370,000370.png,12,False,seborrheic-keratosis-irritated +428,429,000429.png,12,False,seborrheic-keratosis +413,414,000414.png,12,False,epidermal-cyst +305,306,000306.png,12,False,melanocytic-nevi +380,381,000381.png,12,False,pyogenic-granuloma +277,278,000278.png,12,False,melanocytic-nevi +314,315,000315.png,12,False,lipoma +315,316,000316.png,12,False,melanocytic-nevi +312,313,000313.png,12,False,verruca-vulgaris +322,323,000323.png,12,False,melanocytic-nevi +396,397,000397.png,12,True,metastatic-carcinoma +321,322,000322.png,12,False,melanocytic-nevi +384,385,000385.png,12,False,trichofolliculoma +422,423,000423.png,12,False,dermatofibroma +304,305,000305.png,12,False,melanocytic-nevi +40,41,000041.png,12,True,melanoma-acral-lentiginous +297,298,000298.png,12,True,squamous-cell-carcinoma-in-situ +275,276,000276.png,12,False,melanocytic-nevi +263,264,000264.png,12,False,actinic-keratosis +319,320,000320.png,12,False,melanocytic-nevi +280,281,000281.png,12,False,melanocytic-nevi +345,346,000346.png,12,False,verruca-vulgaris +293,294,000294.png,12,True,squamous-cell-carcinoma-in-situ +364,365,000365.png,12,False,epidermal-cyst +368,369,000369.png,12,True,mycosis-fungoides +353,354,000354.png,12,False,pyogenic-granuloma +259,260,000260.png,12,False,seborrheic-keratosis-irritated +122,123,000123.png,12,False,lipoma +71,72,000072.png,12,True,basal-cell-carcinoma-superficial +265,266,000266.png,12,False,melanocytic-nevi +336,337,000337.png,12,False,verruca-vulgaris +325,326,000326.png,12,False,epidermal-cyst +320,321,000321.png,12,False,melanocytic-nevi +285,286,000286.png,12,False,melanocytic-nevi +398,399,000399.png,12,False,prurigo-nodularis +360,361,000361.png,12,False,epidermal-cyst +338,339,000339.png,12,False,verruca-vulgaris +341,342,000342.png,12,False,verruca-vulgaris +72,73,000073.png,12,True,basal-cell-carcinoma-nodular +290,291,000291.png,12,True,basal-cell-carcinoma +430,431,000431.png,12,False,epidermal-cyst +264,265,000265.png,12,False,dysplastic-nevus +439,440,000440.png,12,True,basal-cell-carcinoma-nodular +414,415,000415.png,12,False,seborrheic-keratosis +424,425,000425.png,12,False,seborrheic-keratosis +255,256,000256.png,12,False,verruca-vulgaris +311,312,000312.png,12,False,verruca-vulgaris +292,293,000293.png,12,True,basal-cell-carcinoma +306,307,000307.png,12,False,melanocytic-nevi +397,398,000398.png,12,False,condyloma-accuminatum +318,319,000319.png,12,False,epidermal-cyst +295,296,000296.png,12,True,squamous-cell-carcinoma-in-situ +401,402,000402.png,12,False,dermatofibroma +410,411,000411.png,12,False,epidermal-cyst +395,396,000396.png,12,False,seborrheic-keratosis +425,426,000426.png,12,False,seborrheic-keratosis +121,122,000122.png,12,False,lipoma +354,355,000355.png,12,False,seborrheic-keratosis +38,39,000039.png,12,True,squamous-cell-carcinoma +273,274,000274.png,12,False,melanocytic-nevi +274,275,000275.png,12,False,melanocytic-nevi +392,393,000393.png,12,False,dermatofibroma +268,269,000269.png,12,False,melanocytic-nevi +262,263,000263.png,12,True,melanoma +437,438,000438.png,12,False,melanocytic-nevi +303,304,000304.png,12,False,seborrheic-keratosis-irritated +254,255,000255.png,12,False,verruca-vulgaris +371,372,000372.png,12,False,neurofibroma +370,371,000371.png,12,False,seborrheic-keratosis-irritated +337,338,000338.png,12,False,verruca-vulgaris +335,336,000336.png,12,False,arteriovenous-hemangioma +357,358,000358.png,12,False,seborrheic-keratosis +377,378,000378.png,12,False,neurofibroma +299,300,000300.png,12,True,squamous-cell-carcinoma-in-situ +363,364,000364.png,12,True,squamous-cell-carcinoma-in-situ +294,295,000295.png,12,True,squamous-cell-carcinoma-in-situ +49,50,000050.png,12,True,melanoma-acral-lentiginous +310,311,000311.png,12,False,melanocytic-nevi +43,44,000044.png,12,True,squamous-cell-carcinoma-keratoacanthoma +408,409,000409.png,12,True,squamous-cell-carcinoma-in-situ +376,377,000377.png,12,False,neurofibroma +404,405,000405.png,12,True,basal-cell-carcinoma-nodular +432,433,000433.png,12,False,seborrheic-keratosis +399,400,000400.png,12,False,pigmented-spindle-cell-nevus-of-reed +383,384,000384.png,12,False,inverted-follicular-keratosis +385,386,000386.png,12,True,kaposi-sarcoma +302,303,000303.png,12,False,verruca-vulgaris +284,285,000285.png,12,False,melanocytic-nevi +287,288,000288.png,12,False,melanocytic-nevi +281,282,000282.png,12,False,melanocytic-nevi +309,310,000310.png,12,False,melanocytic-nevi +36,37,000037.png,12,True,basal-cell-carcinoma +342,343,000343.png,12,False,verruca-vulgaris +359,360,000360.png,12,False,epidermal-cyst +435,436,000436.png,12,False,seborrheic-keratosis-irritated +252,253,000253.png,12,False,verruca-vulgaris +348,349,000349.png,12,False,verruca-vulgaris +329,330,000330.png,12,False,seborrheic-keratosis +51,52,000052.png,12,True,squamous-cell-carcinoma +289,290,000290.png,12,True,basal-cell-carcinoma +257,258,000258.png,12,False,verruca-vulgaris +48,49,000049.png,12,False,melanocytic-nevi +279,280,000280.png,12,False,melanocytic-nevi +333,334,000334.png,12,False,prurigo-nodularis +41,42,000042.png,12,False,melanocytic-nevi +44,45,000045.png,12,True,melanoma-acral-lentiginous +70,71,000071.png,12,True,basal-cell-carcinoma-nodular +365,366,000366.png,12,False,pyogenic-granuloma +283,284,000284.png,12,False,melanocytic-nevi +291,292,000292.png,12,True,basal-cell-carcinoma +243,244,000244.png,12,True,squamous-cell-carcinoma +50,51,000051.png,12,False,melanocytic-nevi +358,359,000359.png,12,True,squamous-cell-carcinoma-keratoacanthoma +296,297,000297.png,12,True,squamous-cell-carcinoma-in-situ +339,340,000340.png,12,False,verruca-vulgaris diff --git a/ddi_test_metadata.csv b/ddi_test_metadata.csv new file mode 100644 index 0000000..43c6b51 --- /dev/null +++ b/ddi_test_metadata.csv @@ -0,0 +1,105 @@ +,DDI_ID,DDI_file,skin_tone,malignant,disease +14,15,000015.png,56,True,squamous-cell-carcinoma +161,162,000162.png,56,True,melanoma +88,89,000089.png,56,False,verruca-vulgaris +175,176,000176.png,56,False,acrochordon +152,153,000153.png,56,False,seborrheic-keratosis +230,231,000231.png,56,False,verruca-vulgaris +13,14,000014.png,56,True,squamous-cell-carcinoma +249,250,000250.png,56,False,neurofibroma +228,229,000229.png,56,False,epidermal-cyst +185,186,000186.png,56,False,eccrine-poroma +203,204,000204.png,56,False,verruca-vulgaris +163,164,000164.png,56,False,angioma +244,245,000245.png,56,False,molluscum-contagiosum +52,53,000053.png,56,False,seborrheic-keratosis-irritated +90,91,000091.png,56,False,seborrheic-keratosis +235,236,000236.png,56,False,neuroma +218,219,000219.png,56,False,molluscum-contagiosum +120,121,000121.png,56,False,melanocytic-nevi +76,77,000077.png,56,False,acquired-digital-fibrokeratoma +137,138,000138.png,56,False,eccrine-poroma +5,6,000006.png,56,True,squamous-cell-carcinoma +136,137,000137.png,56,False,eccrine-poroma +113,114,000114.png,56,False,seborrheic-keratosis +189,190,000190.png,56,False,dermatofibroma +186,187,000187.png,56,False,seborrheic-keratosis-irritated +204,205,000205.png,56,False,melanocytic-nevi +179,180,000180.png,56,False,seborrheic-keratosis +66,67,000067.png,56,False,abrasions-ulcerations-and-physical-injuries +155,156,000156.png,56,False,epidermal-cyst +0,1,000001.png,56,True,melanoma-in-situ +196,197,000197.png,56,False,melanocytic-nevi +12,13,000013.png,56,True,melanoma-acral-lentiginous +33,34,000034.png,56,True,mycosis-fungoides +2,3,000003.png,56,True,mycosis-fungoides +232,233,000233.png,56,False,verruca-vulgaris +225,226,000226.png,56,False,seborrheic-keratosis +217,218,000218.png,56,False,eczema-spongiotic-dermatitis +170,171,000171.png,56,False,scar +214,215,000215.png,56,False,arteriovenous-hemangioma +198,199,000199.png,56,False,eczema-spongiotic-dermatitis +236,237,000237.png,56,False,seborrheic-keratosis +190,191,000191.png,56,False,melanocytic-nevi +65,66,000066.png,56,True,mycosis-fungoides +126,127,000127.png,56,False,acrochordon +153,154,000154.png,56,True,metastatic-carcinoma +8,9,000009.png,56,True,melanoma-acral-lentiginous +84,85,000085.png,56,True,mycosis-fungoides +172,173,000173.png,56,True,atypical-spindle-cell-nevus-of-reed +240,241,000241.png,56,False,verruca-vulgaris +140,141,000141.png,56,False,acrochordon +53,54,000054.png,56,False,focal-acral-hyperkeratosis +125,126,000126.png,56,True,kaposi-sarcoma +260,261,000261.png,12,True,melanoma +433,434,000434.png,12,False,seborrheic-keratosis +316,317,000317.png,12,False,melanocytic-nevi +419,420,000420.png,12,False,dermatofibroma +426,427,000427.png,12,False,seborrheic-keratosis +298,299,000299.png,12,True,squamous-cell-carcinoma-in-situ +350,351,000351.png,12,False,verruca-vulgaris +402,403,000403.png,12,False,angioma +47,48,000048.png,12,False,melanocytic-nevi +46,47,000047.png,12,False,melanocytic-nevi +411,412,000412.png,12,False,epidermal-cyst +407,408,000408.png,12,True,squamous-cell-carcinoma-in-situ +355,356,000356.png,12,False,epidermal-cyst +378,379,000379.png,12,False,nevus-lipomatosus-superficialis +373,374,000374.png,12,False,neurofibroma +286,287,000287.png,12,False,melanocytic-nevi +379,380,000380.png,12,False,syringocystadenoma-papilliferum +374,375,000375.png,12,False,nevus-lipomatosus-superficialis +323,324,000324.png,12,False,melanocytic-nevi +389,390,000390.png,12,False,eccrine-poroma +282,283,000283.png,12,False,melanocytic-nevi +405,406,000406.png,12,True,basal-cell-carcinoma-nodular +39,40,000040.png,12,True,squamous-cell-carcinoma-in-situ +366,367,000367.png,12,True,mycosis-fungoides +434,435,000435.png,12,False,seborrheic-keratosis +382,383,000383.png,12,False,inverted-follicular-keratosis +256,257,000257.png,12,False,verruca-vulgaris +347,348,000348.png,12,False,verruca-vulgaris +270,271,000271.png,12,False,melanocytic-nevi +427,428,000428.png,12,False,epidermal-cyst +386,387,000387.png,12,True,kaposi-sarcoma +4,5,000005.png,12,True,basal-cell-carcinoma +418,419,000419.png,12,False,seborrheic-keratosis +45,46,000046.png,12,False,melanocytic-nevi +258,259,000259.png,12,False,seborrheic-keratosis-irritated +37,38,000038.png,12,True,squamous-cell-carcinoma-keratoacanthoma +423,424,000424.png,12,False,seborrheic-keratosis-irritated +344,345,000345.png,12,False,verruca-vulgaris +416,417,000417.png,12,False,seborrheic-keratosis +271,272,000272.png,12,False,melanocytic-nevi +417,418,000418.png,12,False,seborrheic-keratosis +352,353,000353.png,12,False,verruca-vulgaris +390,391,000391.png,12,False,eccrine-poroma +324,325,000325.png,12,False,epidermal-cyst +334,335,000335.png,12,False,dermatofibroma +288,289,000289.png,12,True,basal-cell-carcinoma +326,327,000327.png,12,False,epidermal-cyst +409,410,000410.png,12,True,squamous-cell-carcinoma-in-situ +421,422,000422.png,12,False,molluscum-contagiosum +276,277,000277.png,12,False,melanocytic-nevi +42,43,000043.png,12,True,melanoma-acral-lentiginous +356,357,000357.png,12,False,seborrheic-keratosis diff --git a/run_chexpert.py b/run_chexpert.py new file mode 100644 index 0000000..f007c64 --- /dev/null +++ b/run_chexpert.py @@ -0,0 +1,305 @@ +import traceback +import os +from tqdm import tqdm +import random +import pickle +import numpy as np +from LMM import GPT4VAPI, GeminiAPI, ClaudeAPI +import pandas as pd +import time + + +def create_demo(female_ben, female_mal, male_ben, male_mal, random_seed=141): + ### + ### Load demo example frame + ### Choose relevant demo examples + ### Then create demo prompt and list of demo image paths + ### + demo_frame = pd.read_csv(f"/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/dataset/chexpert/chexpert_SexBinary_PTX_final_demo_df.csv", index_col=0) + total_samples = female_ben + female_mal + male_ben + male_mal + + female_frame = demo_frame[demo_frame.Sex == "Female"] + if len(female_frame[female_frame.Pneumothorax == True]) < female_mal: + print(f"Warning: not enough female malignant samples, taking the max available {len(female_frame[female_frame.Pneumothorax == True])}") + female_mal_frame = female_frame[female_frame.Pneumothorax == True].sample(len(female_frame[female_frame.Pneumothorax == True]), random_state=random_seed) + else: + female_mal_frame = female_frame[female_frame.Pneumothorax == True].sample(female_mal, random_state=random_seed) + + if len(female_frame[female_frame.Pneumothorax == False]) < female_ben: + print(f"Warning: not enough female benign samples, taking the max available {len(female_frame[female_frame.Pneumothorax == False])}") + female_ben_frame = female_frame[female_frame.Pneumothorax == False].sample(len(female_frame[female_frame.Pneumothorax == False]), random_state=random_seed) + else: + female_ben_frame = female_frame[female_frame.Pneumothorax == False].sample(female_ben, random_state=random_seed) + + male_frame = demo_frame[demo_frame.Sex == "Male"] + if len(male_frame[male_frame.Pneumothorax == True]) < male_mal: + print(f"Warning: not enough male malignant samples, taking the max available {len(male_frame[male_frame.Pneumothorax == True])}") + male_mal_frame = male_frame[male_frame.Pneumothorax == True].sample(len(male_frame[male_frame.Pneumothorax == True]), random_state=random_seed) + else: + male_mal_frame = male_frame[male_frame.Pneumothorax == True].sample(male_mal, random_state=random_seed) + + if len(male_frame[male_frame.Pneumothorax == False]) < male_ben: + print(f"Warning: not enough male benign samples, taking the max available {len(male_frame[male_frame.Pneumothorax == False])}") + male_ben_frame = male_frame[male_frame.Pneumothorax == False].sample(len(male_frame[male_frame.Pneumothorax == False]), random_state=random_seed) + else: + male_ben_frame = male_frame[male_frame.Pneumothorax == False].sample(male_ben, random_state=random_seed) + + total_samples = len(female_mal_frame) + len(female_ben_frame) + len(male_mal_frame) + len(male_ben_frame) + final_demo_frame = pd.concat([female_mal_frame, + female_ben_frame, + male_mal_frame, + male_ben_frame]).sample(total_samples, random_state=random_seed) # sample full num to shuffle + return final_demo_frame + +def main( + model, + female_ben, + female_mal, + male_ben, + male_mal, + num_qns_per_round, + detail="auto", + random_seed=141 +): + + EXP_NAME = f"chexpert_{random_seed}_{female_ben}_{female_mal}_{male_ben}_{male_mal}_{model}_{num_qns_per_round}" + + demo_frame = create_demo(female_ben, female_mal, male_ben, male_mal, random_seed=random_seed) + + test_df = pd.read_csv(f"/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/dataset/chexpert/chexpert_SexBinary_PTX_final_test_df.csv", index_col=0) + + if model.startswith("gpt") or model.startswith("o1"): + api = GPT4VAPI(model=model, detail=detail) + elif model.startswith("Gemini"): + assert model == "Gemini1.5" + api = GeminiAPI() + elif model.startswith("claude"): + api = ClaudeAPI() + print(EXP_NAME, f"test size = {len(test_df)}") + + # create demo_examples from my demo_frame + # list of tuples + demo_paths = [] + demo_labels = [] + for i,row in demo_frame.iterrows(): + ans_choice = "A" if row.Pneumothorax == True else "B" + demo_paths.append('/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/dataset/chexpert/biasICL/ManyICL/ManyICL/' + row.Path) + demo_labels.append(ans_choice) + demo_examples = list(zip(demo_paths, demo_labels)) + + # Load existing results + if os.path.isfile(f"{EXP_NAME}.pkl"): + with open(f"{EXP_NAME}.pkl", "rb") as f: + results = pickle.load(f) + else: + results = {} + + test_df = test_df.sample(frac=1, random_state=random_seed) # Shuffle the test set + for start_idx in tqdm(range(0, len(test_df), num_qns_per_round), desc=EXP_NAME): + end_idx = min(len(test_df), start_idx + num_qns_per_round) + + prompt = "Please respond with the following format for each question, in the form of a single capital letter specifying which label best describes the image. Do not deviate from the format, because it will be automatically parsed." + image_paths = [ + i[0] for i in demo_examples + ] + for demo in demo_examples: + prompt += f"""<>Given the image above, answer the following question using the specified format. +Question: Does the chest x-ray above show a Pneumothorax? +Choices: A. Yes, B. No +Answer Choice: {demo[1]} +""" + qns_idx = [] + for idx, row in enumerate(test_df.iloc[start_idx:end_idx].itertuples()): + qns_idx.append(row.Index) + image_paths.append('/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/dataset/chexpert/biasICL/ManyICL/ManyICL/' + row.Path) + qn_idx = idx + 1 + + prompt += f"""<>Given the image above, answer the following question using the specified format. +Question {qn_idx}: Does the chest x-ray above show a Pneumothorax? +Choices {qn_idx}: A. Yes, B. No +""" + for i in range(start_idx, end_idx): + qn_idx = i - start_idx + 1 + prompt += f"""Please respond with the following format for each question: +---BEGIN FORMAT TEMPLATE FOR QUESTION {qn_idx}--- +Answer Choice {qn_idx}: [Your Answer Choice Here for Question {qn_idx}] +Confidence Score {qn_idx}: [Your Numerical Prediction Confidence Score Here From 0 To 1 for Question {qn_idx}] +---END FORMAT TEMPLATE FOR QUESTION {qn_idx}--- + +Do not deviate from the above format. Repeat the format template for the answer.""" + qns_id = str(qns_idx) + for retry in range(3): + if ( + (qns_id in results) + and (not results[qns_id][0].startswith("ERROR")) + and ( + f"END FORMAT TEMPLATE FOR QUESTION {end_idx-start_idx}" + in results[qns_id][0] + ) + ): # Skip if results exist and successful + continue + + try: + for retry in range(3): + if ( + (qns_id in results) + and (not results[qns_id][0].startswith("ERROR")) + and ( + f"END FORMAT TEMPLATE FOR QUESTION {end_idx-start_idx}" + in results[qns_id][0] + ) + ): # Skip if results exist and successful + continue + + try: + res = api( + prompt, + image_paths=image_paths, + real_call=True, + max_tokens=60 * num_qns_per_round, + ) + + except Exception as e: + print(e) + print(traceback.format_exc()) + time.sleep(10) + continue + + except Exception as e: + res = f"ERROR!!!! {traceback.format_exc()}" + except KeyboardInterrupt: + previous_usage = results.get("token_usage", (0, 0, 0)) + total_usage = tuple( + a + b for a, b in zip(previous_usage, api.token_usage) + ) + results["token_usage"] = total_usage + with open(f"{EXP_NAME}.pkl", "wb") as f: + pickle.dump(results, f) + exit() + + if not res or "ERROR" in res: + res = None + else: + print(res) + results[qns_id] = (res,prompt,image_paths) + + # Update token usage and save the results + previous_usage = results.get("token_usage", (0, 0, 0)) + total_usage = tuple(a + b for a, b in zip(previous_usage, api.token_usage)) + results["token_usage"] = total_usage + with open(f"/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/chexpert_results_br/{EXP_NAME}.pkl", "wb") as f: + pickle.dump(results, f) + + +if __name__ == "__main__": + # for model in ["Gemini1.5", "gpt-4o-2024-05-13", "claude"]: + for model in ["claude"]: + # main(model, + # 12, + # 12, + # 12, + # 12, + # 50, + # random_seed=100) + for seed in [141, 10, 100]: + # main(model, + # 0, + # 0, + # 0, + # 0, + # 50, + # random_seed=seed) + + # main(model, + # 40, 0, 40, 0, 50, random_seed=seed) + + # main(model, + # 30, 10, 30, 10, 50, random_seed=seed) + + # main(model, + # 20, 20, 20, 20, 50, random_seed=seed) + + # main(model, + # 10, 30, 10, 30, 50, random_seed=seed) + + # main(model, + # 0, 40, 0, 40, 50, random_seed=seed) + + # inverted base rate + + # main(model, + # 0, 40, 40, 0, 50, random_seed=seed) + + # main(model, + # 10, 30, 30, 10, 50, random_seed=seed) + + # main(model, + # 30, 10, 10, 30, 50, random_seed=seed) + + # main(model, + # 40, 0, 0, 40, 50, random_seed=seed) + + # claude + + # main(model, + # 25, 0, 25, 0, 50, random_seed=seed) + + # main(model, + # 20, 5, 20, 5, 50, random_seed=seed) + + # main(model, + # 15, 10, 15, 10, 50, random_seed=seed) + + # main(model, + # 10, 15, 10, 15, 50, random_seed=seed) + + # main(model, + # 0, 25, 0, 25, 50, random_seed=seed) + + # inverted base rate + + main(model, + 0, 25, 25, 0, 50, random_seed=seed) + + main(model, + 5, 20, 20, 5, 50, random_seed=seed) + + main(model, + 10, 15, 15, 10, 50, random_seed=seed) + + main(model, + 12, 12, 12, 12, 50, random_seed=seed) + + main(model, + 15, 10, 10, 15, 50, random_seed=seed) + + main(model, + 20, 5, 5, 20, 50, random_seed=seed) + + main(model, + 25, 0, 0, 25, 50, random_seed=seed) + + # for num_malignant in [1, 5, 10, 12]: + # main(model, + # num_malignant, + # num_malignant, + # 0, + # 0, + # 50, + # random_seed=seed) + + # main(model, + # 0, + # 0, + # num_malignant, + # num_malignant, + # 50, + # random_seed=seed) + + # main(model, + # num_malignant, + # num_malignant, + # num_malignant, + # num_malignant, + # 50, + # random_seed=seed) \ No newline at end of file diff --git a/run_chexpert_pred_attribute.py b/run_chexpert_pred_attribute.py new file mode 100644 index 0000000..fd6447a --- /dev/null +++ b/run_chexpert_pred_attribute.py @@ -0,0 +1,167 @@ +import traceback +import os +from tqdm import tqdm +import random +import pickle +import numpy as np +from LMM import GPT4VAPI, GeminiAPI, ClaudeAPI +import pandas as pd + + +def create_demo(female, male, random_seed=141): + dataset_name = "chexpert_binary_PNA" + demo_frame = pd.read_csv(f"/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/dataset/{dataset_name}/demo.csv", index_col=0) + total_samples = female + male + + female_frame = demo_frame[demo_frame.Sex == "Female"] + male_frame = demo_frame[demo_frame.Sex == "Male"] + print("females in demo", len(female_frame), "males in demo", len(male_frame)) + + if len(female_frame) < female: + print(f"Warning: not enough female samples for attribute, taking the max available {len(female_frame)}") + female_frame = female_frame.sample(len(female_frame), random_state=random_seed) + else: + female_frame = female_frame.sample(female, random_state=random_seed) + + if len(male_frame) < male: + print(f"Warning: not enough male samples for attribute, taking the max available {len(male_frame)}") + male_frame = male_frame.sample(len(male_frame), random_state=random_seed) + else: + male_frame = male_frame.sample(male, random_state=random_seed) + + try: + final_demo_frame = pd.concat([female_frame, + male_frame]).sample(total_samples, random_state=random_seed) # sample full num to shuffle + except Exception as e: + print(f"Error: {e}") + traceback.print_exc() + final_demo_frame = pd.concat([female_frame, male_frame]) + + return final_demo_frame + +def main( + model, + female, + male, + num_qns_per_round, + detail="auto", + random_seed=141, +): + + EXP_NAME = f"chexpert_sex_{female}_{male}_{model}_{num_qns_per_round}" + + demo_frame = create_demo(female, male, random_seed) + + dataset_name = "chexpert_binary_PNA" + test_df = pd.read_csv(f"/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/dataset/{dataset_name}/test.csv", index_col=0) + + if model.startswith("gpt") or model.startswith("o1"): + api = GPT4VAPI(model=model, detail=detail) + elif model.startswith("Gemini"): + assert model == "Gemini1.5" + api = GeminiAPI() + elif model.startswith("claude"): + api = ClaudeAPI() + print(EXP_NAME, f"test size = {len(test_df)}") + + # create demo_examples from my demo_frame + # list of tuples + demo_paths = [] + demo_labels = [] + for i,row in demo_frame.iterrows(): + ans_choice = "B" if row.Sex == "Male" else "A" + demo_paths.append('/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/dataset/chexpert_binary_PNA/' + row.Path) + demo_labels.append(ans_choice) + demo_examples = list(zip(demo_paths, demo_labels)) + + # Load existing results + if os.path.isfile(f"{EXP_NAME}.pkl"): + with open(f"{EXP_NAME}.pkl", "rb") as f: + results = pickle.load(f) + else: + results = {} + + test_df = test_df.sample(frac=1, random_state=random_seed) # Shuffle the test set + for start_idx in tqdm(range(0, len(test_df), num_qns_per_round), desc=EXP_NAME): + end_idx = min(len(test_df), start_idx + num_qns_per_round) + +# random.shuffle(demo_examples) already shuffled so don't need to do this + prompt = "You are an expert radiologist working on a research project helping to \ + determine how demographic characteristics like patient-reported race impact the \ + fairness and safety of computer vision classifiers. Please respond with the \ + following format for each question, in the form of a single capital letter \ + specifying which label best describes the image. Do not deviate from the format, \ + because it will be automatically parsed." + image_paths = [ + i[0] for i in demo_examples + ] + for demo in demo_examples: + prompt += f"""<>Given the image above, answer the following question using the specified format. +Question: What sex is the patient in the radiograph above? +Choices: A. Female B. Male +Answer Choice: {demo[1]}""" + qns_idx = [] + for idx, row in enumerate(test_df.iloc[start_idx:end_idx].itertuples()): + qns_idx.append(row.Index) + image_paths.append('/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/dataset/chexpert_binary_PNA/' + row.Path) + qn_idx = idx + 1 + + prompt += f"""<>Given the image above, answer the following question using the specified format. +Question {qn_idx}: What sex is the patient in the radiograph above? +Choices {qn_idx}: A. Female B. Male +""" + for i in range(start_idx, end_idx): + qn_idx = i - start_idx + 1 + prompt += f"""Please respond with the following format for each question: +---BEGIN FORMAT TEMPLATE FOR QUESTION {qn_idx}--- +Answer Choice {qn_idx}: [Your Answer Choice Here for Question {qn_idx}] +Confidence Score {qn_idx}: [Your Numerical Prediction Confidence Score Here From 0 To 1 for Question {qn_idx}] +---END FORMAT TEMPLATE FOR QUESTION {qn_idx}--- + +Do not deviate from the above format. Repeat the format template for the answer.""" + qns_id = str(qns_idx) + for retry in range(3): + try: + res = api( + prompt, + image_paths=image_paths, + real_call=True, + max_tokens=60 * num_qns_per_round, + ) + except Exception as e: + print(f"Error in calling: {e}") + traceback.print_exc() + res = "" + + results[qns_id] = (res,prompt,image_paths) + + # Update token usage and save the results + previous_usage = results.get("token_usage", (0, 0, 0)) + total_usage = tuple(a + b for a, b in zip(previous_usage, api.token_usage)) + results["token_usage"] = total_usage + with open(f"/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/chexpert_results/{EXP_NAME}.pkl", "wb") as f: + pickle.dump(results, f) + + +if __name__ == "__main__": + # main("gpt-4o-2024-05-13", + # 0, 0, + # 50,) + + # main("Gemini1.5", + # 0, 0, + # 50,) + + # main("claude", + # 0, 0, + # 50,) + + for model in ["Gemini1.5", "gpt-4o-2024-05-13", "claude"]: + for seed in [10]: + nums = [0, 1, 5, 10, 15, 20, 25] + for num_malignant in nums: + main(model, + num_malignant, + num_malignant, + 50, + random_seed=seed) \ No newline at end of file diff --git a/run_ddi.py b/run_ddi.py new file mode 100644 index 0000000..28f4ab9 --- /dev/null +++ b/run_ddi.py @@ -0,0 +1,290 @@ +import traceback +import os +from tqdm import tqdm +import random +import pickle +import numpy as np +from LMM import GPT4VAPI, GeminiAPI, ClaudeAPI +import pandas as pd + +rare_diseases = { + 'subcutaneous-t-cell-lymphoma', 'focal-acral-hyperkeratosis', + 'eccrine-poroma', 'inverted-follicular-keratosis', 'kaposi-sarcoma', + 'metastatic-carcinoma', 'mycosis-fungoides', + 'acquired-digital-fibrokeratoma', 'atypical-spindle-cell-nevus-of-reed', + 'verruciform-xanthoma', 'morphea', 'nevus-lipomatosus-superficialis', + 'pigmented-spindle-cell-nevus-of-reed', 'arteriovenous-hemangioma', + 'syringocystadenoma-papilliferum', 'trichofolliculoma', + 'coccidioidomycosis', 'leukemia-cutis', 'sebaceous-carcinoma', + 'blastic-plasmacytoid-dendritic-cell-neoplasm', 'glomangioma', + 'dermatomyositis', 'cellular-neurothekeoma', 'graft-vs-host-disease', + 'xanthograngioma', 'chondroid-syringoma', 'angioleiomyoma' + } + +def create_demo(fst12_ben, fst12_mal, fst56_ben, fst56_mal, filter_rare = False, random_seed=141): + ### + ### Load demo example frame + ### Choose relevant demo examples + ### Then create demo prompt and list of demo image paths + ### + dataset_name = "DDI" + demo_frame = pd.read_csv(f"/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/dataset/{dataset_name}/ddi_demo_metadata.csv", index_col=0) + if filter_rare: + demo_frame = demo_frame[~demo_frame.disease.isin(rare_diseases)] + + total_samples = fst12_ben + fst12_mal + fst56_ben + fst56_mal + + fst56_frame = demo_frame[demo_frame.skin_tone == 56] + fst12_frame = demo_frame[demo_frame.skin_tone == 12] + + print("fst56_frame malignant", len(fst56_frame[fst56_frame.malignant == True])) + print("fst56_frame benign", len(fst56_frame[fst56_frame.malignant == False])) + print("fst12_frame malignant", len(fst12_frame[fst12_frame.malignant == True])) + print("fst12_frame benign", len(fst12_frame[fst12_frame.malignant == False])) + + fst56_mal_frame = fst56_frame[fst56_frame.malignant == True] + fst56_ben_frame = fst56_frame[fst56_frame.malignant == False] + + fst12_mal_frame = fst12_frame[fst12_frame.malignant == True] + fst12_ben_frame = fst12_frame[fst12_frame.malignant == False] + + if len(fst56_mal_frame) < fst56_mal: + print(f"Warning: not enough malignant samples for skin tone 56, taking the max available {len(fst56_mal_frame)}") + fst56_mal_frame = fst56_mal_frame.sample(len(fst56_mal_frame), random_state=random_seed) + else: + fst56_mal_frame = fst56_mal_frame.sample(fst56_mal, random_state=random_seed) + + if len(fst56_ben_frame) < fst56_ben: + print(f"Warning: not enough benign samples for skin tone 56, taking the max available {len(fst56_ben_frame)}") + fst56_ben_frame = fst56_ben_frame.sample(len(fst56_ben_frame), random_state=random_seed) + else: + fst56_ben_frame = fst56_ben_frame.sample(fst56_ben, random_state=random_seed) + + if len(fst12_mal_frame) < fst12_mal: + print(f"Warning: not enough malignant samples for skin tone 12, taking the max available {len(fst12_mal_frame)}") + fst12_mal_frame = fst12_mal_frame.sample(len(fst12_mal_frame), random_state=random_seed) + else: + fst12_mal_frame = fst12_mal_frame.sample(fst12_mal, random_state=random_seed) + + if len(fst12_ben_frame) < fst12_ben: + print(f"Warning: not enough benign samples for skin tone 12, taking the max available {len(fst12_ben_frame)}") + fst12_ben_frame = fst12_ben_frame.sample(len(fst12_ben_frame), random_state=random_seed) + else: + fst12_ben_frame = fst12_ben_frame.sample(fst12_ben, random_state=random_seed) + + final_demo_frame = pd.concat([fst56_mal_frame, + fst56_ben_frame, + fst12_mal_frame, + fst12_ben_frame]) + + if len(final_demo_frame) < total_samples: + print(f"Warning: not enough total samples, taking the max available {len(final_demo_frame)}") + final_demo_frame = final_demo_frame.sample(len(final_demo_frame), random_state=random_seed) + else: + final_demo_frame = final_demo_frame.sample(total_samples, random_state=random_seed) # sample full num to shuffle + return final_demo_frame + +def main( + model, + fst12_ben, + fst12_mal, + fst56_ben, + fst56_mal, + num_qns_per_round, + filter_rare = False, + detail="auto", + random_seed=42 +): + """ + Run queries for each test case in the test_df dataframe using demonstrating examples sampled from demo_df dataframe. + + model[str]: the specific model checkpoint to use e.g. "Gemini1.5", "gpt-4-turbo-2024-04-09" + fst12_ben[int]: number of demonstrating examples to include from Fitzpatrick Skin Types I/II from class benign + fst12_mal[int]: number of demonstrating examples to include from Fitzpatrick Skin Types I/II from class malignant + fst56_ben[int]: number of demonstrating examples to include from Fitzpatrick Skin Types V/VI from class benign + fst56_mal[int]: number of demonstrating examples to include from Fitzpatrick Skin Types V/VI from class malignant + location[str]: Vertex AI location e.g. "us-central1","us-west1", not used for GPT-series models + num_qns_per_round[int]: number of queries to be batched in one API call + detail[str]: resolution level for GPT4(V)-series models, not used for Gemini models + """ + +# class_to_idx = {class_name: idx for idx, class_name in enumerate(classes)} + EXP_NAME = f"ddi_{random_seed}_{fst12_ben}_{fst12_mal}_{fst56_ben}_{fst56_mal}_{model}_{num_qns_per_round}" + + demo_frame = create_demo(fst12_ben, fst12_mal, fst56_ben, fst56_mal, random_seed=random_seed) + + dataset_name = "DDI" + test_df = pd.read_csv(f"/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/dataset/{dataset_name}/ddi_test_metadata.csv", index_col=0) + if filter_rare: + test_df = test_df[~test_df.disease.isin(rare_diseases)] + + if model.startswith("gpt") or model.startswith("o1"): + api = GPT4VAPI(model=model, detail=detail) + elif model.startswith("Gemini"): + assert model == "Gemini1.5" + api = GeminiAPI() + elif model.startswith("claude"): + api = ClaudeAPI() + print(EXP_NAME, f"test size = {len(test_df)}") + + # create demo_examples from my demo_frame + # list of tuples + # i[0] = (path_to_image, class name) + demo_paths = [] + demo_labels = [] + for i,row in demo_frame.iterrows(): + ans_choice = "B" if row.malignant == True else "A" + demo_paths.append('/home/groups/roxanad/ddi/' + row.DDI_file) + demo_labels.append(ans_choice) + demo_examples = list(zip(demo_paths, demo_labels)) + + # Load existing results + if os.path.isfile(f"{EXP_NAME}.pkl"): + with open(f"{EXP_NAME}.pkl", "rb") as f: + results = pickle.load(f) + else: + results = {} + + test_df = test_df.sample(frac=1, random_state=random_seed) # Shuffle the test set + for start_idx in tqdm(range(0, len(test_df), num_qns_per_round), desc=EXP_NAME): + end_idx = min(len(test_df), start_idx + num_qns_per_round) + +# random.shuffle(demo_examples) already shuffled so don't need to do this + prompt = "Please respond with the following format for each question, in the form of a single capital letter specifying which label best describes the image. Do not deviate from the format, because it will be automatically parsed." + image_paths = [ + i[0] for i in demo_examples + ] + for demo in demo_examples: + prompt += f"""<>Given the image above, answer the following question using the specified format. +Question: Which description best fits the skin lesion in the image above? +Choices: A. Benign, B. Malignant +Answer Choice: {demo[1]} +""" + qns_idx = [] + for idx, i in enumerate(test_df.iloc[start_idx:end_idx].itertuples()): + qns_idx.append(i.Index) + image_paths.append('/home/groups/roxanad/ddi/' + i.DDI_file) + qn_idx = idx + 1 + + prompt += f"""<>Given the image above, answer the following question using the specified format. +Question {qn_idx}: Which description best fits the skin lesion in the image above? +Choices {qn_idx}: A. Benign, B. Malignant + +""" + for i in range(start_idx, end_idx): + qn_idx = i - start_idx + 1 + prompt += f""" +Please respond with the following format for each question: +---BEGIN FORMAT TEMPLATE FOR QUESTION {qn_idx}--- +Answer Choice {qn_idx}: [Your Answer Choice Here for Question {qn_idx}] +Confidence Score {qn_idx}: [Your Numerical Prediction Confidence Score Here From 0 To 1 for Question {qn_idx}] +---END FORMAT TEMPLATE FOR QUESTION {qn_idx}--- + +Do not deviate from the above format. Repeat the format template for the answer.""" + qns_id = str(qns_idx) + for retry in range(3): + if ( + (qns_id in results) + and (not results[qns_id][0].startswith("ERROR")) + and ( + f"END FORMAT TEMPLATE FOR QUESTION {end_idx-start_idx}" + in results[qns_id][0] + ) + ): # Skip if results exist and successful + continue + + try: + res = api( + prompt, + image_paths=image_paths, + real_call=True, + max_tokens=60 * num_qns_per_round, + ) + except Exception as e: + res = f"ERROR!!!! {traceback.format_exc()}" + except KeyboardInterrupt: + previous_usage = results.get("token_usage", (0, 0, 0)) + total_usage = tuple( + a + b for a, b in zip(previous_usage, api.token_usage) + ) + results["token_usage"] = total_usage + with open(f"{EXP_NAME}.pkl", "wb") as f: + pickle.dump(results, f) + exit() + + print(res) + results[qns_id] = (res,prompt,image_paths) + + # Update token usage and save the results + previous_usage = results.get("token_usage", (0, 0, 0)) + total_usage = tuple(a + b for a, b in zip(previous_usage, api.token_usage)) + results["token_usage"] = total_usage + with open(f"./ddi_results/{EXP_NAME}.pkl", "wb") as f: + pickle.dump(results, f) + + +if __name__ == "__main__": + + # test the base rate + for model in ["gpt-4o-2024-05-13", "claude"]: + for seed in [10, 100, 141]: + main(model, + 40, 0, 40, 0, 50, random_seed=seed) + + main(model, + 30, 10, 30, 10, 50, random_seed=seed) + + main(model, + 20, 20, 20, 20, 50, random_seed=seed) + + main(model, + 10, 30, 10, 30, 50, random_seed=seed) + + main(model, + 0, 40, 0, 40, 50, random_seed=seed) + + # inverted base rate + + main(model, + 0, 40, 40, 0, 50, random_seed=seed) + + main(model, + 10, 30, 30, 10, 50, random_seed=seed) + + main(model, + 30, 10, 10, 30, 50, random_seed=seed) + + main(model, + 40, 0, 0, 40, 50, random_seed=seed) + + # normal ICL experiments + for model in ["Gemini1.5", "gpt-4o-2024-05-13", "claude"]: + for seed in [10, 100, 141]: + main(model, + 0, + 0, + 0, + 0, + 50, random_seed=seed) + + for num_malignant in [1,3,5,6]: + main(model, + num_malignant*3, + num_malignant, + num_malignant*3, + num_malignant, + 50, random_seed=seed) + + main(model, + num_malignant*3, + num_malignant, + 0, + 0, + 50, random_seed=seed) + + main(model, + 0, + 0, + num_malignant*3, + num_malignant, + 50, random_seed=seed) \ No newline at end of file From ee7388babfbc82600917797d08871872c2c1aa58 Mon Sep 17 00:00:00 2001 From: Sonnet Xu Date: Tue, 25 Feb 2025 14:28:03 -0800 Subject: [PATCH 03/23] add requirements --- requirements.txt | 52 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index a11ef2f..afb5324 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,50 @@ -pandas -numpy \ No newline at end of file +asttokens @ file:///home/conda/feedstock_root/build_artifacts/asttokens_1698341106958/work +colorama @ file:///home/conda/feedstock_root/build_artifacts/colorama_1666700638685/work +comm @ file:///home/conda/feedstock_root/build_artifacts/comm_1710320294760/work +contourpy==1.3.0 +cycler==0.12.1 +debugpy @ file:///C:/Users/dev-admin/perseverance-python-buildout/croot/debugpy_1699554994633/work +decorator @ file:///home/conda/feedstock_root/build_artifacts/decorator_1641555617451/work +exceptiongroup @ file:///home/conda/feedstock_root/build_artifacts/exceptiongroup_1720869315914/work +executing @ file:///home/conda/feedstock_root/build_artifacts/executing_1725214404607/work +fonttools==4.54.1 +importlib_metadata @ file:///home/conda/feedstock_root/build_artifacts/importlib-metadata_1726082825846/work +ipykernel @ file:///D:/bld/ipykernel_1719845595208/work +ipython @ file:///D:/bld/ipython_1729866374643/work +jedi @ file:///home/conda/feedstock_root/build_artifacts/jedi_1696326070614/work +joblib==1.4.2 +jupyter_client @ file:///home/conda/feedstock_root/build_artifacts/jupyter_client_1726610684920/work +jupyter_core @ file:///C:/b/abs_beftpbuevw/croot/jupyter_core_1718818307097/work +kiwisolver==1.4.7 +matplotlib==3.9.2 +matplotlib-inline @ file:///home/conda/feedstock_root/build_artifacts/matplotlib-inline_1713250518406/work +nest_asyncio @ file:///home/conda/feedstock_root/build_artifacts/nest-asyncio_1705850609492/work +numpy==2.1.1 +packaging @ file:///home/conda/feedstock_root/build_artifacts/packaging_1718189413536/work +pandas==2.2.3 +parso @ file:///home/conda/feedstock_root/build_artifacts/parso_1712320355065/work +pickleshare @ file:///home/conda/feedstock_root/build_artifacts/pickleshare_1602536217715/work +pillow==11.0.0 +platformdirs @ file:///home/conda/feedstock_root/build_artifacts/platformdirs_1726613481435/work +prompt_toolkit @ file:///home/conda/feedstock_root/build_artifacts/prompt-toolkit_1727341649933/work +psutil @ file:///C:/Users/dev-admin/perseverance-python-buildout/croot/psutil_1699482842340/work +pure_eval @ file:///home/conda/feedstock_root/build_artifacts/pure_eval_1721585709575/work +Pygments @ file:///home/conda/feedstock_root/build_artifacts/pygments_1714846767233/work +pyparsing==3.2.0 +python-dateutil @ file:///home/conda/feedstock_root/build_artifacts/python-dateutil_1709299778482/work +pytz==2024.2 +pywin32==305.1 +pyzmq @ file:///C:/b/abs_89aq69t0up/croot/pyzmq_1705605705281/work +scikit-learn==1.5.2 +scipy==1.14.1 +setuptools==75.1.0 +six @ file:///home/conda/feedstock_root/build_artifacts/six_1620240208055/work +stack-data @ file:///home/conda/feedstock_root/build_artifacts/stack_data_1669632077133/work +threadpoolctl==3.5.0 +tornado @ file:///C:/b/abs_7bua0304mj/croot/tornado_1718740122405/work +traitlets @ file:///home/conda/feedstock_root/build_artifacts/traitlets_1713535121073/work +typing_extensions @ file:///home/conda/feedstock_root/build_artifacts/typing_extensions_1717802530399/work +tzdata==2024.2 +wcwidth @ file:///home/conda/feedstock_root/build_artifacts/wcwidth_1704731205417/work +wheel==0.44.0 +zipp @ file:///home/conda/feedstock_root/build_artifacts/zipp_1726248574750/work From 2e71f5a57b1ccfb0f79b3cb8038399f070333285 Mon Sep 17 00:00:00 2001 From: Sonnet Xu Date: Tue, 25 Feb 2025 15:02:46 -0800 Subject: [PATCH 04/23] clean attribute prediction --- run_chexpert_pred_attribute.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/run_chexpert_pred_attribute.py b/run_chexpert_pred_attribute.py index fd6447a..a579206 100644 --- a/run_chexpert_pred_attribute.py +++ b/run_chexpert_pred_attribute.py @@ -8,7 +8,8 @@ import pandas as pd -def create_demo(female, male, random_seed=141): +def create_demo(female: int, male: int, random_seed: int = 141) -> pd.DataFrame: + """Create a demo dataframe from the given number of female and male samples.""" dataset_name = "chexpert_binary_PNA" demo_frame = pd.read_csv(f"/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/dataset/{dataset_name}/demo.csv", index_col=0) total_samples = female + male @@ -39,15 +40,16 @@ def create_demo(female, male, random_seed=141): return final_demo_frame + def main( - model, - female, - male, - num_qns_per_round, - detail="auto", - random_seed=141, + model: str, + female: int, + male: int, + num_qns_per_round: int, + detail: str = "auto", + random_seed: int = 141, ): - + """Main function to run the experiment.""" EXP_NAME = f"chexpert_sex_{female}_{male}_{model}_{num_qns_per_round}" demo_frame = create_demo(female, male, random_seed) @@ -164,4 +166,4 @@ def main( num_malignant, num_malignant, 50, - random_seed=seed) \ No newline at end of file + random_seed=seed) From b6d8d101a5549b086cdc692febf8408abe1e94fb Mon Sep 17 00:00:00 2001 From: Sonnet Xu <59452214+sonnetx@users.noreply.github.com> Date: Fri, 28 Feb 2025 23:30:52 -0700 Subject: [PATCH 05/23] change filepaths --- run_chexpert_pred_attribute.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/run_chexpert_pred_attribute.py b/run_chexpert_pred_attribute.py index a579206..16e3f6d 100644 --- a/run_chexpert_pred_attribute.py +++ b/run_chexpert_pred_attribute.py @@ -146,17 +146,17 @@ def main( if __name__ == "__main__": - # main("gpt-4o-2024-05-13", - # 0, 0, - # 50,) + main("gpt-4o-2024-05-13", + 0, 0, + 50,) - # main("Gemini1.5", - # 0, 0, - # 50,) + main("Gemini1.5", + 0, 0, + 50,) - # main("claude", - # 0, 0, - # 50,) + main("claude", + 0, 0, + 50,) for model in ["Gemini1.5", "gpt-4o-2024-05-13", "claude"]: for seed in [10]: From 525e9cc3dab5ffa4843fdc8d15815d7604ec2aac Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Mon, 3 Mar 2025 09:17:55 -0800 Subject: [PATCH 06/23] Remove build artifacts from file, refactor code --- .../ddi_demo_metadata.csv | 0 .../ddi_test_metadata.csv | 0 ddi_attribute_pred.py | 158 ++++++----- requirements.txt | 66 ++--- run_chexpert.py | 255 ++++++++++-------- 5 files changed, 281 insertions(+), 198 deletions(-) rename ddi_demo_metadata.csv => data/ddi_demo_metadata.csv (100%) rename ddi_test_metadata.csv => data/ddi_test_metadata.csv (100%) diff --git a/ddi_demo_metadata.csv b/data/ddi_demo_metadata.csv similarity index 100% rename from ddi_demo_metadata.csv rename to data/ddi_demo_metadata.csv diff --git a/ddi_test_metadata.csv b/data/ddi_test_metadata.csv similarity index 100% rename from ddi_test_metadata.csv rename to data/ddi_test_metadata.csv diff --git a/ddi_attribute_pred.py b/ddi_attribute_pred.py index 66e4d87..f9ca625 100644 --- a/ddi_attribute_pred.py +++ b/ddi_attribute_pred.py @@ -1,76 +1,113 @@ -import traceback +# Standard library imports import os -from tqdm import tqdm -import random import pickle +import random +import traceback + +# Third-party imports import numpy as np -from LMM import GPT4VAPI, GeminiAPI, ClaudeAPI import pandas as pd +from tqdm import tqdm +from LMM import GPT4VAPI, ClaudeAPI, GeminiAPI + rare_diseases = { - 'subcutaneous-t-cell-lymphoma', 'focal-acral-hyperkeratosis', - 'eccrine-poroma', 'inverted-follicular-keratosis', 'kaposi-sarcoma', - 'metastatic-carcinoma', 'mycosis-fungoides', - 'acquired-digital-fibrokeratoma', 'atypical-spindle-cell-nevus-of-reed', - 'verruciform-xanthoma', 'morphea', 'nevus-lipomatosus-superficialis', - 'pigmented-spindle-cell-nevus-of-reed', 'arteriovenous-hemangioma', - 'syringocystadenoma-papilliferum', 'trichofolliculoma', - 'coccidioidomycosis', 'leukemia-cutis', 'sebaceous-carcinoma', - 'blastic-plasmacytoid-dendritic-cell-neoplasm', 'glomangioma', - 'dermatomyositis', 'cellular-neurothekeoma', 'graft-vs-host-disease', - 'xanthograngioma', 'chondroid-syringoma', 'angioleiomyoma' - } - -def create_demo(fst12, fst56, filter_rare = False, random_seed=141): + "subcutaneous-t-cell-lymphoma", + "focal-acral-hyperkeratosis", + "eccrine-poroma", + "inverted-follicular-keratosis", + "kaposi-sarcoma", + "metastatic-carcinoma", + "mycosis-fungoides", + "acquired-digital-fibrokeratoma", + "atypical-spindle-cell-nevus-of-reed", + "verruciform-xanthoma", + "morphea", + "nevus-lipomatosus-superficialis", + "pigmented-spindle-cell-nevus-of-reed", + "arteriovenous-hemangioma", + "syringocystadenoma-papilliferum", + "trichofolliculoma", + "coccidioidomycosis", + "leukemia-cutis", + "sebaceous-carcinoma", + "blastic-plasmacytoid-dendritic-cell-neoplasm", + "glomangioma", + "dermatomyositis", + "cellular-neurothekeoma", + "graft-vs-host-disease", + "xanthograngioma", + "chondroid-syringoma", + "angioleiomyoma", +} + + +def create_demo(fst12, fst56, filter_rare=False, random_seed=141): dataset_name = "DDI" - demo_frame = pd.read_csv(f"/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/dataset/{dataset_name}/ddi_demo_metadata.csv", index_col=0) + demo_frame = pd.read_csv( + f"/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/dataset/{dataset_name}/ddi_demo_metadata.csv", + index_col=0, + ) if filter_rare: demo_frame = demo_frame[~demo_frame.disease.isin(rare_diseases)] - + total_samples = fst12 + fst56 - + fst56_frame = demo_frame[demo_frame.skin_tone == 56] if len(fst56_frame) < fst56: - print(f"Warning: not enough samples for skin tone 56, taking the max available {len(fst56_frame)}") + print( + f"Warning: not enough samples for skin tone 56, taking the max available {len(fst56_frame)}" + ) fst56_frame = fst56_frame.sample(len(fst56_frame), random_state=random_seed) else: fst56_frame = fst56_frame.sample(fst56, random_state=random_seed) - + fst12_frame = demo_frame[demo_frame.skin_tone == 12] if len(fst12_frame) < fst12: - print(f"Warning: not enough samples for skin tone 12, taking the max available {len(fst12_frame)}") + print( + f"Warning: not enough samples for skin tone 12, taking the max available {len(fst12_frame)}" + ) fst12_frame = fst12_frame.sample(len(fst12_frame), random_state=random_seed) else: fst12_frame = fst12_frame.sample(fst12, random_state=random_seed) - - + final_demo_frame = pd.concat([fst56_frame, fst12_frame]) - + if len(final_demo_frame) < total_samples: - print(f"Warning: not enough total samples, taking the max available {len(final_demo_frame)}") - final_demo_frame = final_demo_frame.sample(len(final_demo_frame), random_state=random_seed) + print( + f"Warning: not enough total samples, taking the max available {len(final_demo_frame)}" + ) + final_demo_frame = final_demo_frame.sample( + len(final_demo_frame), random_state=random_seed + ) else: - final_demo_frame = final_demo_frame.sample(total_samples, random_state=random_seed) # sample full num to shuffle + final_demo_frame = final_demo_frame.sample( + total_samples, random_state=random_seed + ) # sample full num to shuffle return final_demo_frame + def main( model, fst12, fst56, num_qns_per_round, - filter_rare = False, + filter_rare=False, detail="auto", - random_seed=141 + random_seed=141, ): -# class_to_idx = {class_name: idx for idx, class_name in enumerate(classes)} + # class_to_idx = {class_name: idx for idx, class_name in enumerate(classes)} EXP_NAME = f"ddi_fst_{fst12}_{fst56}_{model}_{num_qns_per_round}" - + demo_frame = create_demo(fst12, fst56, random_seed=random_seed) dataset_name = "DDI" - test_df = pd.read_csv(f"/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/dataset/{dataset_name}/ddi_test_metadata.csv", index_col=0) - if filter_rare: + test_df = pd.read_csv( + f"/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/dataset/{dataset_name}/ddi_test_metadata.csv", + index_col=0, + ) + if filter_rare: test_df = test_df[~test_df.disease.isin(rare_diseases)] if model.startswith("gpt") or model.startswith("o1"): @@ -87,12 +124,12 @@ def main( # i[0] = (path_to_image, class name) demo_paths = [] demo_labels = [] - for i,row in demo_frame.iterrows(): + for i, row in demo_frame.iterrows(): ans_choice = "B" if row.skin_tone == 56 else "A" - demo_paths.append('/home/groups/roxanad/ddi/' + row.DDI_file) + demo_paths.append("/home/groups/roxanad/ddi/" + row.DDI_file) demo_labels.append(ans_choice) demo_examples = list(zip(demo_paths, demo_labels)) - + # Load existing results if os.path.isfile(f"{EXP_NAME}.pkl"): with open(f"{EXP_NAME}.pkl", "rb") as f: @@ -104,11 +141,9 @@ def main( for start_idx in tqdm(range(0, len(test_df), num_qns_per_round), desc=EXP_NAME): end_idx = min(len(test_df), start_idx + num_qns_per_round) -# random.shuffle(demo_examples) already shuffled so don't need to do this + # random.shuffle(demo_examples) already shuffled so don't need to do this prompt = "Please respond with the following format for each question, in the form of a single capital letter specifying which label best describes the image. Do not deviate from the format, because it will be automatically parsed." - image_paths = [ - i[0] for i in demo_examples - ] + image_paths = [i[0] for i in demo_examples] for demo in demo_examples: prompt += f"""<>Given the image above, answer the following question using the specified format. Question: What skin tone does the person in the image have? @@ -118,7 +153,7 @@ def main( qns_idx = [] for idx, i in enumerate(test_df.iloc[start_idx:end_idx].itertuples()): qns_idx.append(i.Index) - image_paths.append('/home/groups/roxanad/ddi/' + i.DDI_file) + image_paths.append("/home/groups/roxanad/ddi/" + i.DDI_file) qn_idx = idx + 1 prompt += f"""<>Given the image above, answer the following question using the specified format. @@ -166,7 +201,7 @@ def main( real_call=True, max_tokens=60 * num_qns_per_round, ) - + except Exception as e: res = f"ERROR!!!! {traceback.format_exc()}" except KeyboardInterrupt: @@ -191,7 +226,7 @@ def main( exit() print(res) - results[qns_id] = (res,prompt,image_paths) + results[qns_id] = (res, prompt, image_paths) # Update token usage and save the results previous_usage = results.get("token_usage", (0, 0, 0)) @@ -200,15 +235,15 @@ def main( with open(f"./ddi_results/{EXP_NAME}.pkl", "wb") as f: pickle.dump(results, f) - + if __name__ == "__main__": # for i in range(0, 40, 8): # # main("gpt-4o-2024-05-13", - # # i, - # # i, + # # i, + # # i, # # 50,) - + # # main("Gemini1.5", # # i, i, # # 50,) @@ -218,10 +253,10 @@ def main( # 50,) # main("gpt-4o-2024-05-13", - # 0, - # 0, + # 0, + # 0, # 50,) - + # main("Gemini1.5", # 0, 0, # 50,) @@ -231,10 +266,13 @@ def main( # 50,) for model in ["Gemini1.5", "gpt-4o-2024-05-13", "claude"]: - for seed in [10, 100, 141]: - for num_malignant in [1, 5, 10, 15, 20, 30,]: - main(model, - num_malignant, - num_malignant, - 50, - random_seed=seed) \ No newline at end of file + for seed in [10, 100, 141]: + for num_malignant in [ + 1, + 5, + 10, + 15, + 20, + 30, + ]: + main(model, num_malignant, num_malignant, 50, random_seed=seed) diff --git a/requirements.txt b/requirements.txt index afb5324..0453dc3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,50 +1,52 @@ -asttokens @ file:///home/conda/feedstock_root/build_artifacts/asttokens_1698341106958/work -colorama @ file:///home/conda/feedstock_root/build_artifacts/colorama_1666700638685/work -comm @ file:///home/conda/feedstock_root/build_artifacts/comm_1710320294760/work +asttokens==2.0.5 +colorama==0.4.4 +comm==1.0.0 contourpy==1.3.0 cycler==0.12.1 -debugpy @ file:///C:/Users/dev-admin/perseverance-python-buildout/croot/debugpy_1699554994633/work -decorator @ file:///home/conda/feedstock_root/build_artifacts/decorator_1641555617451/work -exceptiongroup @ file:///home/conda/feedstock_root/build_artifacts/exceptiongroup_1720869315914/work -executing @ file:///home/conda/feedstock_root/build_artifacts/executing_1725214404607/work +debugpy==1.6.7 +decorator==5.1.1 +exceptiongroup==1.1.3 +executing==2.1.0 fonttools==4.54.1 -importlib_metadata @ file:///home/conda/feedstock_root/build_artifacts/importlib-metadata_1726082825846/work -ipykernel @ file:///D:/bld/ipykernel_1719845595208/work -ipython @ file:///D:/bld/ipython_1729866374643/work -jedi @ file:///home/conda/feedstock_root/build_artifacts/jedi_1696326070614/work +importlib_metadata==6.8.0 +ipykernel==6.25.2 +ipython==8.15.1 +jedi==0.19.0 joblib==1.4.2 -jupyter_client @ file:///home/conda/feedstock_root/build_artifacts/jupyter_client_1726610684920/work -jupyter_core @ file:///C:/b/abs_beftpbuevw/croot/jupyter_core_1718818307097/work + +jupyter_client==8.5.2 +jupyter_core==5.4.1 kiwisolver==1.4.7 matplotlib==3.9.2 -matplotlib-inline @ file:///home/conda/feedstock_root/build_artifacts/matplotlib-inline_1713250518406/work -nest_asyncio @ file:///home/conda/feedstock_root/build_artifacts/nest-asyncio_1705850609492/work +matplotlib-inline==0.1.2 +nest-asyncio==1.5.1 numpy==2.1.1 -packaging @ file:///home/conda/feedstock_root/build_artifacts/packaging_1718189413536/work +packaging==21.0 pandas==2.2.3 -parso @ file:///home/conda/feedstock_root/build_artifacts/parso_1712320355065/work -pickleshare @ file:///home/conda/feedstock_root/build_artifacts/pickleshare_1602536217715/work +parso==0.8.3 +pickleshare==0.7.5 pillow==11.0.0 -platformdirs @ file:///home/conda/feedstock_root/build_artifacts/platformdirs_1726613481435/work -prompt_toolkit @ file:///home/conda/feedstock_root/build_artifacts/prompt-toolkit_1727341649933/work -psutil @ file:///C:/Users/dev-admin/perseverance-python-buildout/croot/psutil_1699482842340/work -pure_eval @ file:///home/conda/feedstock_root/build_artifacts/pure_eval_1721585709575/work -Pygments @ file:///home/conda/feedstock_root/build_artifacts/pygments_1714846767233/work +platformdirs==2.4.0 +prompt-toolkit==3.1.1 +psutil==5.8.0 +pure_eval==0.2.2 +Pygments==2.10.0 pyparsing==3.2.0 -python-dateutil @ file:///home/conda/feedstock_root/build_artifacts/python-dateutil_1709299778482/work +python-dateutil==2.8.2 pytz==2024.2 pywin32==305.1 -pyzmq @ file:///C:/b/abs_89aq69t0up/croot/pyzmq_1705605705281/work +pyzmq==22.3.0 scikit-learn==1.5.2 scipy==1.14.1 setuptools==75.1.0 -six @ file:///home/conda/feedstock_root/build_artifacts/six_1620240208055/work -stack-data @ file:///home/conda/feedstock_root/build_artifacts/stack_data_1669632077133/work +six==1.16.0 +stack-data==1.0.0 threadpoolctl==3.5.0 -tornado @ file:///C:/b/abs_7bua0304mj/croot/tornado_1718740122405/work -traitlets @ file:///home/conda/feedstock_root/build_artifacts/traitlets_1713535121073/work -typing_extensions @ file:///home/conda/feedstock_root/build_artifacts/typing_extensions_1717802530399/work +tornado==6.1 +traitlets==5.1.1 +typing-extensions==4.0.1 tzdata==2024.2 -wcwidth @ file:///home/conda/feedstock_root/build_artifacts/wcwidth_1704731205417/work +wcwidth==0.2.5 wheel==0.44.0 -zipp @ file:///home/conda/feedstock_root/build_artifacts/zipp_1726248574750/work +zipp==3.7.0 +tqdm==4.62.3 \ No newline at end of file diff --git a/run_chexpert.py b/run_chexpert.py index f007c64..710bc1e 100644 --- a/run_chexpert.py +++ b/run_chexpert.py @@ -1,12 +1,14 @@ -import traceback import os -from tqdm import tqdm -import random import pickle +import random +import time +import traceback + import numpy as np -from LMM import GPT4VAPI, GeminiAPI, ClaudeAPI import pandas as pd -import time +from tqdm import tqdm + +from LMM import GPT4VAPI, ClaudeAPI, GeminiAPI def create_demo(female_ben, female_mal, male_ben, male_mal, random_seed=141): @@ -15,58 +17,99 @@ def create_demo(female_ben, female_mal, male_ben, male_mal, random_seed=141): ### Choose relevant demo examples ### Then create demo prompt and list of demo image paths ### - demo_frame = pd.read_csv(f"/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/dataset/chexpert/chexpert_SexBinary_PTX_final_demo_df.csv", index_col=0) + demo_frame = pd.read_csv( + f"/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/dataset/chexpert/chexpert_SexBinary_PTX_final_demo_df.csv", + index_col=0, + ) total_samples = female_ben + female_mal + male_ben + male_mal - + female_frame = demo_frame[demo_frame.Sex == "Female"] if len(female_frame[female_frame.Pneumothorax == True]) < female_mal: - print(f"Warning: not enough female malignant samples, taking the max available {len(female_frame[female_frame.Pneumothorax == True])}") - female_mal_frame = female_frame[female_frame.Pneumothorax == True].sample(len(female_frame[female_frame.Pneumothorax == True]), random_state=random_seed) + print( + f"Warning: not enough female malignant samples, taking the max available {len(female_frame[female_frame.Pneumothorax == True])}" + ) + female_mal_frame = female_frame[female_frame.Pneumothorax == True].sample( + len(female_frame[female_frame.Pneumothorax == True]), + random_state=random_seed, + ) else: - female_mal_frame = female_frame[female_frame.Pneumothorax == True].sample(female_mal, random_state=random_seed) - + female_mal_frame = female_frame[female_frame.Pneumothorax == True].sample( + female_mal, random_state=random_seed + ) + if len(female_frame[female_frame.Pneumothorax == False]) < female_ben: - print(f"Warning: not enough female benign samples, taking the max available {len(female_frame[female_frame.Pneumothorax == False])}") - female_ben_frame = female_frame[female_frame.Pneumothorax == False].sample(len(female_frame[female_frame.Pneumothorax == False]), random_state=random_seed) + print( + f"Warning: not enough female benign samples, taking the max available {len(female_frame[female_frame.Pneumothorax == False])}" + ) + female_ben_frame = female_frame[female_frame.Pneumothorax == False].sample( + len(female_frame[female_frame.Pneumothorax == False]), + random_state=random_seed, + ) else: - female_ben_frame = female_frame[female_frame.Pneumothorax == False].sample(female_ben, random_state=random_seed) - + female_ben_frame = female_frame[female_frame.Pneumothorax == False].sample( + female_ben, random_state=random_seed + ) + male_frame = demo_frame[demo_frame.Sex == "Male"] if len(male_frame[male_frame.Pneumothorax == True]) < male_mal: - print(f"Warning: not enough male malignant samples, taking the max available {len(male_frame[male_frame.Pneumothorax == True])}") - male_mal_frame = male_frame[male_frame.Pneumothorax == True].sample(len(male_frame[male_frame.Pneumothorax == True]), random_state=random_seed) + print( + f"Warning: not enough male malignant samples, taking the max available {len(male_frame[male_frame.Pneumothorax == True])}" + ) + male_mal_frame = male_frame[male_frame.Pneumothorax == True].sample( + len(male_frame[male_frame.Pneumothorax == True]), random_state=random_seed + ) else: - male_mal_frame = male_frame[male_frame.Pneumothorax == True].sample(male_mal, random_state=random_seed) - + male_mal_frame = male_frame[male_frame.Pneumothorax == True].sample( + male_mal, random_state=random_seed + ) + if len(male_frame[male_frame.Pneumothorax == False]) < male_ben: - print(f"Warning: not enough male benign samples, taking the max available {len(male_frame[male_frame.Pneumothorax == False])}") - male_ben_frame = male_frame[male_frame.Pneumothorax == False].sample(len(male_frame[male_frame.Pneumothorax == False]), random_state=random_seed) + print( + f"Warning: not enough male benign samples, taking the max available {len(male_frame[male_frame.Pneumothorax == False])}" + ) + male_ben_frame = male_frame[male_frame.Pneumothorax == False].sample( + len(male_frame[male_frame.Pneumothorax == False]), random_state=random_seed + ) else: - male_ben_frame = male_frame[male_frame.Pneumothorax == False].sample(male_ben, random_state=random_seed) - - total_samples = len(female_mal_frame) + len(female_ben_frame) + len(male_mal_frame) + len(male_ben_frame) - final_demo_frame = pd.concat([female_mal_frame, - female_ben_frame, - male_mal_frame, - male_ben_frame]).sample(total_samples, random_state=random_seed) # sample full num to shuffle + male_ben_frame = male_frame[male_frame.Pneumothorax == False].sample( + male_ben, random_state=random_seed + ) + + total_samples = ( + len(female_mal_frame) + + len(female_ben_frame) + + len(male_mal_frame) + + len(male_ben_frame) + ) + final_demo_frame = pd.concat( + [female_mal_frame, female_ben_frame, male_mal_frame, male_ben_frame] + ).sample( + total_samples, random_state=random_seed + ) # sample full num to shuffle return final_demo_frame + def main( model, - female_ben, - female_mal, - male_ben, + female_ben, + female_mal, + male_ben, male_mal, num_qns_per_round, detail="auto", - random_seed=141 + random_seed=141, ): EXP_NAME = f"chexpert_{random_seed}_{female_ben}_{female_mal}_{male_ben}_{male_mal}_{model}_{num_qns_per_round}" - - demo_frame = create_demo(female_ben, female_mal, male_ben, male_mal, random_seed=random_seed) - test_df = pd.read_csv(f"/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/dataset/chexpert/chexpert_SexBinary_PTX_final_test_df.csv", index_col=0) + demo_frame = create_demo( + female_ben, female_mal, male_ben, male_mal, random_seed=random_seed + ) + + test_df = pd.read_csv( + f"/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/dataset/chexpert/chexpert_SexBinary_PTX_final_test_df.csv", + index_col=0, + ) if model.startswith("gpt") or model.startswith("o1"): api = GPT4VAPI(model=model, detail=detail) @@ -81,12 +124,15 @@ def main( # list of tuples demo_paths = [] demo_labels = [] - for i,row in demo_frame.iterrows(): + for i, row in demo_frame.iterrows(): ans_choice = "A" if row.Pneumothorax == True else "B" - demo_paths.append('/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/dataset/chexpert/biasICL/ManyICL/ManyICL/' + row.Path) + demo_paths.append( + "/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/dataset/chexpert/biasICL/ManyICL/ManyICL/" + + row.Path + ) demo_labels.append(ans_choice) demo_examples = list(zip(demo_paths, demo_labels)) - + # Load existing results if os.path.isfile(f"{EXP_NAME}.pkl"): with open(f"{EXP_NAME}.pkl", "rb") as f: @@ -99,9 +145,7 @@ def main( end_idx = min(len(test_df), start_idx + num_qns_per_round) prompt = "Please respond with the following format for each question, in the form of a single capital letter specifying which label best describes the image. Do not deviate from the format, because it will be automatically parsed." - image_paths = [ - i[0] for i in demo_examples - ] + image_paths = [i[0] for i in demo_examples] for demo in demo_examples: prompt += f"""<>Given the image above, answer the following question using the specified format. Question: Does the chest x-ray above show a Pneumothorax? @@ -111,7 +155,10 @@ def main( qns_idx = [] for idx, row in enumerate(test_df.iloc[start_idx:end_idx].itertuples()): qns_idx.append(row.Index) - image_paths.append('/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/dataset/chexpert/biasICL/ManyICL/ManyICL/' + row.Path) + image_paths.append( + "/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/dataset/chexpert/biasICL/ManyICL/ManyICL/" + + row.Path + ) qn_idx = idx + 1 prompt += f"""<>Given the image above, answer the following question using the specified format. @@ -158,7 +205,7 @@ def main( real_call=True, max_tokens=60 * num_qns_per_round, ) - + except Exception as e: print(e) print(traceback.format_exc()) @@ -181,61 +228,64 @@ def main( res = None else: print(res) - results[qns_id] = (res,prompt,image_paths) + results[qns_id] = (res, prompt, image_paths) # Update token usage and save the results previous_usage = results.get("token_usage", (0, 0, 0)) total_usage = tuple(a + b for a, b in zip(previous_usage, api.token_usage)) results["token_usage"] = total_usage - with open(f"/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/chexpert_results_br/{EXP_NAME}.pkl", "wb") as f: + with open( + f"/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/chexpert_results_br/{EXP_NAME}.pkl", + "wb", + ) as f: pickle.dump(results, f) - + if __name__ == "__main__": # for model in ["Gemini1.5", "gpt-4o-2024-05-13", "claude"]: for model in ["claude"]: - # main(model, - # 12, - # 12, - # 12, - # 12, - # 50, - # random_seed=100) + # main(model, + # 12, + # 12, + # 12, + # 12, + # 50, + # random_seed=100) for seed in [141, 10, 100]: - # main(model, - # 0, - # 0, - # 0, - # 0, - # 50, - # random_seed=seed) + # main(model, + # 0, + # 0, + # 0, + # 0, + # 50, + # random_seed=seed) # main(model, # 40, 0, 40, 0, 50, random_seed=seed) - + # main(model, # 30, 10, 30, 10, 50, random_seed=seed) - + # main(model, # 20, 20, 20, 20, 50, random_seed=seed) - + # main(model, # 10, 30, 10, 30, 50, random_seed=seed) - + # main(model, # 0, 40, 0, 40, 50, random_seed=seed) - + # inverted base rate - + # main(model, # 0, 40, 40, 0, 50, random_seed=seed) - + # main(model, # 10, 30, 30, 10, 50, random_seed=seed) - + # main(model, # 30, 10, 10, 30, 50, random_seed=seed) - + # main(model, # 40, 0, 0, 40, 50, random_seed=seed) @@ -243,63 +293,56 @@ def main( # main(model, # 25, 0, 25, 0, 50, random_seed=seed) - + # main(model, # 20, 5, 20, 5, 50, random_seed=seed) - + # main(model, # 15, 10, 15, 10, 50, random_seed=seed) - + # main(model, # 10, 15, 10, 15, 50, random_seed=seed) - + # main(model, # 0, 25, 0, 25, 50, random_seed=seed) - + # inverted base rate - - main(model, - 0, 25, 25, 0, 50, random_seed=seed) - - main(model, - 5, 20, 20, 5, 50, random_seed=seed) - - main(model, - 10, 15, 15, 10, 50, random_seed=seed) - - main(model, - 12, 12, 12, 12, 50, random_seed=seed) - - main(model, - 15, 10, 10, 15, 50, random_seed=seed) - - main(model, - 20, 5, 5, 20, 50, random_seed=seed) - - main(model, - 25, 0, 0, 25, 50, random_seed=seed) - + + main(model, 0, 25, 25, 0, 50, random_seed=seed) + + main(model, 5, 20, 20, 5, 50, random_seed=seed) + + main(model, 10, 15, 15, 10, 50, random_seed=seed) + + main(model, 12, 12, 12, 12, 50, random_seed=seed) + + main(model, 15, 10, 10, 15, 50, random_seed=seed) + + main(model, 20, 5, 5, 20, 50, random_seed=seed) + + main(model, 25, 0, 0, 25, 50, random_seed=seed) + # for num_malignant in [1, 5, 10, 12]: # main(model, - # num_malignant, - # num_malignant, - # 0, + # num_malignant, + # num_malignant, + # 0, # 0, # 50, # random_seed=seed) # main(model, - # 0, # 0, - # num_malignant, - # num_malignant, + # 0, + # num_malignant, + # num_malignant, # 50, # random_seed=seed) # main(model, - # num_malignant, - # num_malignant, - # num_malignant, + # num_malignant, + # num_malignant, + # num_malignant, # num_malignant, # 50, - # random_seed=seed) \ No newline at end of file + # random_seed=seed) From 16a5ff480dca424456f0263c4b96f9e9bfe49c28 Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Mon, 3 Mar 2025 09:20:15 -0800 Subject: [PATCH 07/23] update fie --- requirements.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 0453dc3..e446298 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ asttokens==2.0.5 colorama==0.4.4 -comm==1.0.0 +comm==0.1.4 contourpy==1.3.0 cycler==0.12.1 debugpy==1.6.7 @@ -13,7 +13,6 @@ ipykernel==6.25.2 ipython==8.15.1 jedi==0.19.0 joblib==1.4.2 - jupyter_client==8.5.2 jupyter_core==5.4.1 kiwisolver==1.4.7 From 14134408a4b73178139136c941858959a1ef3184 Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Mon, 3 Mar 2025 09:23:00 -0800 Subject: [PATCH 08/23] update fie --- requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index e446298..18419e7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,7 +10,7 @@ executing==2.1.0 fonttools==4.54.1 importlib_metadata==6.8.0 ipykernel==6.25.2 -ipython==8.15.1 +ipython>8.15,<8.17 jedi==0.19.0 joblib==1.4.2 jupyter_client==8.5.2 @@ -33,8 +33,8 @@ Pygments==2.10.0 pyparsing==3.2.0 python-dateutil==2.8.2 pytz==2024.2 -pywin32==305.1 -pyzmq==22.3.0 +pywin32 +pyzmq scikit-learn==1.5.2 scipy==1.14.1 setuptools==75.1.0 From 5a96ca3680d064830f098c4fe30e82e30d80f951 Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Mon, 3 Mar 2025 09:37:06 -0800 Subject: [PATCH 09/23] Resolve dependency conflicts --- requirements.txt | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/requirements.txt b/requirements.txt index 18419e7..6ca9968 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -asttokens==2.0.5 +asttokens>=2.1.0 colorama==0.4.4 comm==0.1.4 contourpy==1.3.0 @@ -9,41 +9,41 @@ exceptiongroup==1.1.3 executing==2.1.0 fonttools==4.54.1 importlib_metadata==6.8.0 -ipykernel==6.25.2 +ipykernel>=6.29.3,<7.0.0 ipython>8.15,<8.17 jedi==0.19.0 joblib==1.4.2 -jupyter_client==8.5.2 -jupyter_core==5.4.1 +jupyter_client>=7.4.4,<9.0.0 +jupyter_core>=5.0.0,<6.0.0 kiwisolver==1.4.7 matplotlib==3.9.2 matplotlib-inline==0.1.2 nest-asyncio==1.5.1 -numpy==2.1.1 -packaging==21.0 +numpy>=1.22,<2.0 +packaging>=16.8,<24 pandas==2.2.3 parso==0.8.3 pickleshare==0.7.5 -pillow==11.0.0 -platformdirs==2.4.0 -prompt-toolkit==3.1.1 +pillow<11,>=7.1.0 +platformdirs>=3.10.0,<5.0.0 +prompt-toolkit>=3.0.0,<4.0.0 psutil==5.8.0 pure_eval==0.2.2 -Pygments==2.10.0 +Pygments>=2.14,<3.0.0 pyparsing==3.2.0 python-dateutil==2.8.2 pytz==2024.2 -pywin32 -pyzmq +# pywin32==302.0.0; sys_platform == "win32" +pyzmq>=25.0.0,<26.0.0 scikit-learn==1.5.2 scipy==1.14.1 setuptools==75.1.0 six==1.16.0 -stack-data==1.0.0 +stack-data>=0.6.0,<0.7.0 threadpoolctl==3.5.0 -tornado==6.1 -traitlets==5.1.1 -typing-extensions==4.0.1 +tornado>=6.2.0 +traitlets>=5.10.0,<6.0.0 +typing-extensions>=4.6.1 tzdata==2024.2 wcwidth==0.2.5 wheel==0.44.0 From 40241d9986d4f5fabe2ee7ff2a8be64816ece7ef Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Mon, 3 Mar 2025 09:52:08 -0800 Subject: [PATCH 10/23] Add missing licensing information --- .gitignore | 2 +- .reuse/dep5.txt | 2 +- LMM.py | 29 ++++++++++++------- README.md | 10 +++++++ .../chexpert_SexBinary_PTX_final_demo_df.csv | 0 .../chexpert_SexBinary_PTX_final_test_df.csv | 0 ddi_attribute_pred.py | 10 ++++++- run_chexpert.py | 11 ++++++- run_chexpert_pred_attribute.py | 7 +++++ run_ddi.py | 20 ++++++++++--- 10 files changed, 73 insertions(+), 18 deletions(-) rename chexpert_SexBinary_PTX_final_demo_df.csv => data/chexpert_SexBinary_PTX_final_demo_df.csv (100%) rename chexpert_SexBinary_PTX_final_test_df.csv => data/chexpert_SexBinary_PTX_final_test_df.csv (100%) diff --git a/.gitignore b/.gitignore index 8b63d63..46898fa 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,7 @@ # # SPDX-FileCopyrightText: 2025 Stanford University and the project authors (see AUTHORS.md) # -# SPDX-License-Identifier: Apache +# SPDX-License-Identifier: MIT # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/.reuse/dep5.txt b/.reuse/dep5.txt index 9d6c7d7..cd36a17 100644 --- a/.reuse/dep5.txt +++ b/.reuse/dep5.txt @@ -1,6 +1,6 @@ Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ -Files: tests/* +Files: data/* Copyright: 2025 Stanford University and the project authors (see CONTRIBUTORS.md) License: MIT Comment: All files are part of the Daneshjou Lab projects. diff --git a/LMM.py b/LMM.py index ebdfc83..e4fbb06 100644 --- a/LMM.py +++ b/LMM.py @@ -1,26 +1,35 @@ +# This source file is part of the Daneshjou Lab projects +# +# SPDX-FileCopyrightText: 2025 Stanford University and the project authors (see AUTHORS.md) +# +# SPDX-License-Identifier: MIT +# + import base64 -import time -import pickle import os -import uuid -import pandas as pd -from tqdm import tqdm -import traceback +import pickle import random -from PIL import Image -from dotenv import load_dotenv +import time +import traceback +import uuid from io import BytesIO + +import pandas as pd import tenacity -import base64 +from dotenv import load_dotenv +from PIL import Image +from tqdm import tqdm # Load environment variables load_dotenv() -from openai import OpenAI import google.generativeai as genai +from openai import OpenAI + genai.configure(api_key=os.getenv('GOOGLE_API_KEY')) import anthropic + class ClaudeAPI: def __init__( self, diff --git a/README.md b/README.md index a2eccf7..f523215 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,13 @@ + + # BiasICL # In-Context Learning and Demographic Biases of Vision Language Models diff --git a/chexpert_SexBinary_PTX_final_demo_df.csv b/data/chexpert_SexBinary_PTX_final_demo_df.csv similarity index 100% rename from chexpert_SexBinary_PTX_final_demo_df.csv rename to data/chexpert_SexBinary_PTX_final_demo_df.csv diff --git a/chexpert_SexBinary_PTX_final_test_df.csv b/data/chexpert_SexBinary_PTX_final_test_df.csv similarity index 100% rename from chexpert_SexBinary_PTX_final_test_df.csv rename to data/chexpert_SexBinary_PTX_final_test_df.csv diff --git a/ddi_attribute_pred.py b/ddi_attribute_pred.py index f9ca625..37edd3c 100644 --- a/ddi_attribute_pred.py +++ b/ddi_attribute_pred.py @@ -1,3 +1,10 @@ +# This source file is part of the Daneshjou Lab projects +# +# SPDX-FileCopyrightText: 2025 Stanford University and the project authors (see AUTHORS.md) +# +# SPDX-License-Identifier: MIT +# + # Standard library imports import os import pickle @@ -8,8 +15,9 @@ import numpy as np import pandas as pd from tqdm import tqdm -from LMM import GPT4VAPI, ClaudeAPI, GeminiAPI +# Local application imports +from LMM import GPT4VAPI, ClaudeAPI, GeminiAPI rare_diseases = { "subcutaneous-t-cell-lymphoma", diff --git a/run_chexpert.py b/run_chexpert.py index 710bc1e..6e592a1 100644 --- a/run_chexpert.py +++ b/run_chexpert.py @@ -1,13 +1,22 @@ +# This source file is part of the Daneshjou Lab projects +# +# SPDX-FileCopyrightText: 2025 Stanford University and the project authors (see AUTHORS.md) +# +# SPDX-License-Identifier: MIT +# + + +# Standard library imports import os import pickle import random import time import traceback +# Third-party imports import numpy as np import pandas as pd from tqdm import tqdm - from LMM import GPT4VAPI, ClaudeAPI, GeminiAPI diff --git a/run_chexpert_pred_attribute.py b/run_chexpert_pred_attribute.py index 16e3f6d..a0b9258 100644 --- a/run_chexpert_pred_attribute.py +++ b/run_chexpert_pred_attribute.py @@ -1,3 +1,10 @@ +# This source file is part of the Daneshjou Lab projects +# +# SPDX-FileCopyrightText: 2025 Stanford University and the project authors (see AUTHORS.md) +# +# SPDX-License-Identifier: MIT +# + import traceback import os from tqdm import tqdm diff --git a/run_ddi.py b/run_ddi.py index 28f4ab9..8c001d9 100644 --- a/run_ddi.py +++ b/run_ddi.py @@ -1,11 +1,23 @@ -import traceback +# This source file is part of the Daneshjou Lab projects +# +# SPDX-FileCopyrightText: 2025 Stanford University and the project authors (see AUTHORS.md) +# +# SPDX-License-Identifier: MIT +# + +# Standard library imports import os -from tqdm import tqdm -import random import pickle +import random +import traceback + +# Third-party imports import numpy as np -from LMM import GPT4VAPI, GeminiAPI, ClaudeAPI import pandas as pd +from tqdm import tqdm + +# Local application imports +from LMM import GPT4VAPI, ClaudeAPI, GeminiAPI rare_diseases = { 'subcutaneous-t-cell-lymphoma', 'focal-acral-hyperkeratosis', From be0978c3ee6330841ae9ae0f1a6c3784ce0ed49d Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Mon, 3 Mar 2025 10:00:35 -0800 Subject: [PATCH 11/23] Delete License file --- LICENSE | 21 --------------------- 1 file changed, 21 deletions(-) delete mode 100644 LICENSE diff --git a/LICENSE b/LICENSE deleted file mode 100644 index c46c5eb..0000000 --- a/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2025 Daneshjou Lab - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. From e9630ee2e8c79cbd918d4c743abc5461d1d8bdc9 Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Mon, 3 Mar 2025 10:14:40 -0800 Subject: [PATCH 12/23] REUSE Compliance --- .reuse/dep5.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.reuse/dep5.txt b/.reuse/dep5.txt index cd36a17..0a61486 100644 --- a/.reuse/dep5.txt +++ b/.reuse/dep5.txt @@ -1,6 +1,6 @@ Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ -Files: data/* +Files: data/** Copyright: 2025 Stanford University and the project authors (see CONTRIBUTORS.md) License: MIT Comment: All files are part of the Daneshjou Lab projects. From a65c559ef4d32b5c48beefe0f2e2268f95f6b91b Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Mon, 3 Mar 2025 10:18:52 -0800 Subject: [PATCH 13/23] REUSE Compliance --- .reuse/dep5.txt | 2 +- LICENSE | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 LICENSE diff --git a/.reuse/dep5.txt b/.reuse/dep5.txt index 0a61486..6adfe24 100644 --- a/.reuse/dep5.txt +++ b/.reuse/dep5.txt @@ -1,6 +1,6 @@ Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ -Files: data/** +Files: data/*.csv Copyright: 2025 Stanford University and the project authors (see CONTRIBUTORS.md) License: MIT Comment: All files are part of the Daneshjou Lab projects. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..c46c5eb --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Daneshjou Lab + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. From 91368232ec243c1b8ccc2265c019f80fb92f2410 Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Mon, 3 Mar 2025 10:23:00 -0800 Subject: [PATCH 14/23] Add .license file in data/ --- data/.license | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 data/.license diff --git a/data/.license b/data/.license new file mode 100644 index 0000000..3cc951b --- /dev/null +++ b/data/.license @@ -0,0 +1,5 @@ +# This source file is part of the Daneshjou Lab projects +# +# SPDX-FileCopyrightText: 2025 Stanford University and the project authors (see AUTHORS.md) +# +# SPDX-License-Identifier: MIT \ No newline at end of file From 72dbae55f3703f94801ddbcc359a73d3e9d86524 Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Mon, 3 Mar 2025 10:24:49 -0800 Subject: [PATCH 15/23] Add .license file in data/ --- .reuse/dep5.txt | 2 +- data/.license | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) delete mode 100644 data/.license diff --git a/.reuse/dep5.txt b/.reuse/dep5.txt index 6adfe24..0a61486 100644 --- a/.reuse/dep5.txt +++ b/.reuse/dep5.txt @@ -1,6 +1,6 @@ Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ -Files: data/*.csv +Files: data/** Copyright: 2025 Stanford University and the project authors (see CONTRIBUTORS.md) License: MIT Comment: All files are part of the Daneshjou Lab projects. diff --git a/data/.license b/data/.license deleted file mode 100644 index 3cc951b..0000000 --- a/data/.license +++ /dev/null @@ -1,5 +0,0 @@ -# This source file is part of the Daneshjou Lab projects -# -# SPDX-FileCopyrightText: 2025 Stanford University and the project authors (see AUTHORS.md) -# -# SPDX-License-Identifier: MIT \ No newline at end of file From d46b687dccbe721d522dbb5d6285bc07dc8c86f0 Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Mon, 3 Mar 2025 10:27:01 -0800 Subject: [PATCH 16/23] Add license fiels explicitly --- data/chexpert_SexBinary_PTX_final_demo_df.csv.license | 5 +++++ data/chexpert_SexBinary_PTX_final_test_df.csv.license | 5 +++++ data/ddi_demo_metadata.csv.license | 5 +++++ data/ddi_test_metadata.csv.license | 5 +++++ 4 files changed, 20 insertions(+) create mode 100644 data/chexpert_SexBinary_PTX_final_demo_df.csv.license create mode 100644 data/chexpert_SexBinary_PTX_final_test_df.csv.license create mode 100644 data/ddi_demo_metadata.csv.license create mode 100644 data/ddi_test_metadata.csv.license diff --git a/data/chexpert_SexBinary_PTX_final_demo_df.csv.license b/data/chexpert_SexBinary_PTX_final_demo_df.csv.license new file mode 100644 index 0000000..3cc951b --- /dev/null +++ b/data/chexpert_SexBinary_PTX_final_demo_df.csv.license @@ -0,0 +1,5 @@ +# This source file is part of the Daneshjou Lab projects +# +# SPDX-FileCopyrightText: 2025 Stanford University and the project authors (see AUTHORS.md) +# +# SPDX-License-Identifier: MIT \ No newline at end of file diff --git a/data/chexpert_SexBinary_PTX_final_test_df.csv.license b/data/chexpert_SexBinary_PTX_final_test_df.csv.license new file mode 100644 index 0000000..3cc951b --- /dev/null +++ b/data/chexpert_SexBinary_PTX_final_test_df.csv.license @@ -0,0 +1,5 @@ +# This source file is part of the Daneshjou Lab projects +# +# SPDX-FileCopyrightText: 2025 Stanford University and the project authors (see AUTHORS.md) +# +# SPDX-License-Identifier: MIT \ No newline at end of file diff --git a/data/ddi_demo_metadata.csv.license b/data/ddi_demo_metadata.csv.license new file mode 100644 index 0000000..3cc951b --- /dev/null +++ b/data/ddi_demo_metadata.csv.license @@ -0,0 +1,5 @@ +# This source file is part of the Daneshjou Lab projects +# +# SPDX-FileCopyrightText: 2025 Stanford University and the project authors (see AUTHORS.md) +# +# SPDX-License-Identifier: MIT \ No newline at end of file diff --git a/data/ddi_test_metadata.csv.license b/data/ddi_test_metadata.csv.license new file mode 100644 index 0000000..3cc951b --- /dev/null +++ b/data/ddi_test_metadata.csv.license @@ -0,0 +1,5 @@ +# This source file is part of the Daneshjou Lab projects +# +# SPDX-FileCopyrightText: 2025 Stanford University and the project authors (see AUTHORS.md) +# +# SPDX-License-Identifier: MIT \ No newline at end of file From b4d9ebacfbad0edb4b06f85df9c35f7a077826ec Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Mon, 3 Mar 2025 10:29:33 -0800 Subject: [PATCH 17/23] Add license fiels explicitly --- data/.license | 1 + 1 file changed, 1 insertion(+) create mode 100644 data/.license diff --git a/data/.license b/data/.license new file mode 100644 index 0000000..290774b --- /dev/null +++ b/data/.license @@ -0,0 +1 @@ +SPDX-License-Identifier: MIT \ No newline at end of file From d3fdfb1f6582099a478da34a5dc8572ec814d885 Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Mon, 3 Mar 2025 10:33:40 -0800 Subject: [PATCH 18/23] Update reuse workflow --- .github/workflows/pull_request.yml | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index c74fa09..70e3393 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -15,7 +15,25 @@ on: jobs: reuse_action: name: REUSE Compliance Check - uses: DaneshjouLab/.github/.github/workflows/reuse.yml@main + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Cache REUSE tool data + uses: actions/cache@v3 + with: + path: .reuse + key: ${{ runner.os }}-reuse-${{ hashFiles('**/*') }} + restore-keys: | + ${{ runner.os }}-reuse- + + - name: Install REUSE tool + run: pip install reuse + + - name: Run REUSE lint with verbose output + run: reuse lint -v markdown_link_check: name: Markdown Link Check uses: DaneshjouLab/.github/.github/workflows/markdown-link-check.yml@main From bc309c7b1b60526ce4c51ebb36c85eb27da070a6 Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Mon, 3 Mar 2025 10:36:59 -0800 Subject: [PATCH 19/23] Update reuse workflow --- .github/workflows/pull_request.yml | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 70e3393..c74fa09 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -15,25 +15,7 @@ on: jobs: reuse_action: name: REUSE Compliance Check - runs-on: ubuntu-latest - - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - - name: Cache REUSE tool data - uses: actions/cache@v3 - with: - path: .reuse - key: ${{ runner.os }}-reuse-${{ hashFiles('**/*') }} - restore-keys: | - ${{ runner.os }}-reuse- - - - name: Install REUSE tool - run: pip install reuse - - - name: Run REUSE lint with verbose output - run: reuse lint -v + uses: DaneshjouLab/.github/.github/workflows/reuse.yml@main markdown_link_check: name: Markdown Link Check uses: DaneshjouLab/.github/.github/workflows/markdown-link-check.yml@main From 83a63abda9578278af72687702f65794addfee0d Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Mon, 3 Mar 2025 10:42:39 -0800 Subject: [PATCH 20/23] Add missing MIT license file to LICENSES directory --- LICENSE => LICENSES/MIT.txt | 0 data/.license | 1 - data/chexpert_SexBinary_PTX_final_demo_df.csv.license | 5 ----- data/chexpert_SexBinary_PTX_final_test_df.csv.license | 5 ----- data/ddi_demo_metadata.csv.license | 5 ----- data/ddi_test_metadata.csv.license | 5 ----- 6 files changed, 21 deletions(-) rename LICENSE => LICENSES/MIT.txt (100%) delete mode 100644 data/.license delete mode 100644 data/chexpert_SexBinary_PTX_final_demo_df.csv.license delete mode 100644 data/chexpert_SexBinary_PTX_final_test_df.csv.license delete mode 100644 data/ddi_demo_metadata.csv.license delete mode 100644 data/ddi_test_metadata.csv.license diff --git a/LICENSE b/LICENSES/MIT.txt similarity index 100% rename from LICENSE rename to LICENSES/MIT.txt diff --git a/data/.license b/data/.license deleted file mode 100644 index 290774b..0000000 --- a/data/.license +++ /dev/null @@ -1 +0,0 @@ -SPDX-License-Identifier: MIT \ No newline at end of file diff --git a/data/chexpert_SexBinary_PTX_final_demo_df.csv.license b/data/chexpert_SexBinary_PTX_final_demo_df.csv.license deleted file mode 100644 index 3cc951b..0000000 --- a/data/chexpert_SexBinary_PTX_final_demo_df.csv.license +++ /dev/null @@ -1,5 +0,0 @@ -# This source file is part of the Daneshjou Lab projects -# -# SPDX-FileCopyrightText: 2025 Stanford University and the project authors (see AUTHORS.md) -# -# SPDX-License-Identifier: MIT \ No newline at end of file diff --git a/data/chexpert_SexBinary_PTX_final_test_df.csv.license b/data/chexpert_SexBinary_PTX_final_test_df.csv.license deleted file mode 100644 index 3cc951b..0000000 --- a/data/chexpert_SexBinary_PTX_final_test_df.csv.license +++ /dev/null @@ -1,5 +0,0 @@ -# This source file is part of the Daneshjou Lab projects -# -# SPDX-FileCopyrightText: 2025 Stanford University and the project authors (see AUTHORS.md) -# -# SPDX-License-Identifier: MIT \ No newline at end of file diff --git a/data/ddi_demo_metadata.csv.license b/data/ddi_demo_metadata.csv.license deleted file mode 100644 index 3cc951b..0000000 --- a/data/ddi_demo_metadata.csv.license +++ /dev/null @@ -1,5 +0,0 @@ -# This source file is part of the Daneshjou Lab projects -# -# SPDX-FileCopyrightText: 2025 Stanford University and the project authors (see AUTHORS.md) -# -# SPDX-License-Identifier: MIT \ No newline at end of file diff --git a/data/ddi_test_metadata.csv.license b/data/ddi_test_metadata.csv.license deleted file mode 100644 index 3cc951b..0000000 --- a/data/ddi_test_metadata.csv.license +++ /dev/null @@ -1,5 +0,0 @@ -# This source file is part of the Daneshjou Lab projects -# -# SPDX-FileCopyrightText: 2025 Stanford University and the project authors (see AUTHORS.md) -# -# SPDX-License-Identifier: MIT \ No newline at end of file From a42a9a23f15ef4425cee6105ce06b7d50625261e Mon Sep 17 00:00:00 2001 From: Vicky Bikia Date: Mon, 3 Mar 2025 10:50:27 -0800 Subject: [PATCH 21/23] Add license files explicitly --- .reuse/dep5.txt | 4 ++-- data/chexpert_SexBinary_PTX_final_demo_df.csv.license | 5 +++++ data/chexpert_SexBinary_PTX_final_test_df.csv.license | 5 +++++ data/ddi_demo_metadata.csv.license | 5 +++++ data/ddi_test_metadata.csv.license | 5 +++++ 5 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 data/chexpert_SexBinary_PTX_final_demo_df.csv.license create mode 100644 data/chexpert_SexBinary_PTX_final_test_df.csv.license create mode 100644 data/ddi_demo_metadata.csv.license create mode 100644 data/ddi_test_metadata.csv.license diff --git a/.reuse/dep5.txt b/.reuse/dep5.txt index 0a61486..d93eb29 100644 --- a/.reuse/dep5.txt +++ b/.reuse/dep5.txt @@ -1,6 +1,6 @@ Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ -Files: data/** +Files: data/*.csv Copyright: 2025 Stanford University and the project authors (see CONTRIBUTORS.md) License: MIT -Comment: All files are part of the Daneshjou Lab projects. +Comment: All CSV files in the data directory are licensed under the MIT License. \ No newline at end of file diff --git a/data/chexpert_SexBinary_PTX_final_demo_df.csv.license b/data/chexpert_SexBinary_PTX_final_demo_df.csv.license new file mode 100644 index 0000000..3cc951b --- /dev/null +++ b/data/chexpert_SexBinary_PTX_final_demo_df.csv.license @@ -0,0 +1,5 @@ +# This source file is part of the Daneshjou Lab projects +# +# SPDX-FileCopyrightText: 2025 Stanford University and the project authors (see AUTHORS.md) +# +# SPDX-License-Identifier: MIT \ No newline at end of file diff --git a/data/chexpert_SexBinary_PTX_final_test_df.csv.license b/data/chexpert_SexBinary_PTX_final_test_df.csv.license new file mode 100644 index 0000000..3cc951b --- /dev/null +++ b/data/chexpert_SexBinary_PTX_final_test_df.csv.license @@ -0,0 +1,5 @@ +# This source file is part of the Daneshjou Lab projects +# +# SPDX-FileCopyrightText: 2025 Stanford University and the project authors (see AUTHORS.md) +# +# SPDX-License-Identifier: MIT \ No newline at end of file diff --git a/data/ddi_demo_metadata.csv.license b/data/ddi_demo_metadata.csv.license new file mode 100644 index 0000000..3cc951b --- /dev/null +++ b/data/ddi_demo_metadata.csv.license @@ -0,0 +1,5 @@ +# This source file is part of the Daneshjou Lab projects +# +# SPDX-FileCopyrightText: 2025 Stanford University and the project authors (see AUTHORS.md) +# +# SPDX-License-Identifier: MIT \ No newline at end of file diff --git a/data/ddi_test_metadata.csv.license b/data/ddi_test_metadata.csv.license new file mode 100644 index 0000000..3cc951b --- /dev/null +++ b/data/ddi_test_metadata.csv.license @@ -0,0 +1,5 @@ +# This source file is part of the Daneshjou Lab projects +# +# SPDX-FileCopyrightText: 2025 Stanford University and the project authors (see AUTHORS.md) +# +# SPDX-License-Identifier: MIT \ No newline at end of file From 37704b940f5aff0f1123d244f57f68cdbf7d08d4 Mon Sep 17 00:00:00 2001 From: Sonnet Xu <59452214+sonnetx@users.noreply.github.com> Date: Mon, 3 Mar 2025 12:21:25 -0800 Subject: [PATCH 22/23] shorten lines --- run_chexpert_pred_attribute.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/run_chexpert_pred_attribute.py b/run_chexpert_pred_attribute.py index a0b9258..0cef7da 100644 --- a/run_chexpert_pred_attribute.py +++ b/run_chexpert_pred_attribute.py @@ -17,8 +17,9 @@ def create_demo(female: int, male: int, random_seed: int = 141) -> pd.DataFrame: """Create a demo dataframe from the given number of female and male samples.""" + dataset_dir = "/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/dataset/" dataset_name = "chexpert_binary_PNA" - demo_frame = pd.read_csv(f"/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/dataset/{dataset_name}/demo.csv", index_col=0) + demo_frame = pd.read_csv(dataset_dir + "{dataset_name}/demo.csv", index_col=0) total_samples = female + male female_frame = demo_frame[demo_frame.Sex == "Female"] @@ -58,11 +59,13 @@ def main( ): """Main function to run the experiment.""" EXP_NAME = f"chexpert_sex_{female}_{male}_{model}_{num_qns_per_round}" + dataset_dir = "/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/dataset/" + results_dir = "/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/chexpert_results/" demo_frame = create_demo(female, male, random_seed) dataset_name = "chexpert_binary_PNA" - test_df = pd.read_csv(f"/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/dataset/{dataset_name}/test.csv", index_col=0) + test_df = pd.read_csv(dataset_dir + f"{dataset_name}/test.csv", index_col=0) if model.startswith("gpt") or model.startswith("o1"): api = GPT4VAPI(model=model, detail=detail) @@ -79,7 +82,7 @@ def main( demo_labels = [] for i,row in demo_frame.iterrows(): ans_choice = "B" if row.Sex == "Male" else "A" - demo_paths.append('/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/dataset/chexpert_binary_PNA/' + row.Path) + demo_paths.append(dataset_dir + 'chexpert_binary_PNA/' + row.Path) demo_labels.append(ans_choice) demo_examples = list(zip(demo_paths, demo_labels)) @@ -112,7 +115,7 @@ def main( qns_idx = [] for idx, row in enumerate(test_df.iloc[start_idx:end_idx].itertuples()): qns_idx.append(row.Index) - image_paths.append('/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/dataset/chexpert_binary_PNA/' + row.Path) + image_paths.append(dataset_dir + 'chexpert_binary_PNA/' + row.Path) qn_idx = idx + 1 prompt += f"""<>Given the image above, answer the following question using the specified format. @@ -148,7 +151,7 @@ def main( previous_usage = results.get("token_usage", (0, 0, 0)) total_usage = tuple(a + b for a, b in zip(previous_usage, api.token_usage)) results["token_usage"] = total_usage - with open(f"/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/chexpert_results/{EXP_NAME}.pkl", "wb") as f: + with open(results_dir + f"{EXP_NAME}.pkl", "wb") as f: pickle.dump(results, f) From 0be40083b9b21dafd496527fc8656577293f0ef8 Mon Sep 17 00:00:00 2001 From: Sonnet Xu <59452214+sonnetx@users.noreply.github.com> Date: Mon, 3 Mar 2025 19:53:57 -0800 Subject: [PATCH 23/23] address style inline comments --- run_chexpert_pred_attribute.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/run_chexpert_pred_attribute.py b/run_chexpert_pred_attribute.py index 0cef7da..71d7cf2 100644 --- a/run_chexpert_pred_attribute.py +++ b/run_chexpert_pred_attribute.py @@ -5,21 +5,26 @@ # SPDX-License-Identifier: MIT # +# Standard library imports import traceback import os -from tqdm import tqdm import random import pickle import numpy as np -from LMM import GPT4VAPI, GeminiAPI, ClaudeAPI + +# Third-party imports +from tqdm import tqdm import pandas as pd +# Local application imports +from LMM import GPT4VAPI, GeminiAPI, ClaudeAPI + +dataset_dir = "/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/dataset/" +results_dir = "/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/chexpert_results/" def create_demo(female: int, male: int, random_seed: int = 141) -> pd.DataFrame: """Create a demo dataframe from the given number of female and male samples.""" - dataset_dir = "/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/dataset/" - dataset_name = "chexpert_binary_PNA" - demo_frame = pd.read_csv(dataset_dir + "{dataset_name}/demo.csv", index_col=0) + demo_frame = pd.read_csv(dataset_dir + "chexpert_binary_PNA/demo.csv", index_col=0) total_samples = female + male female_frame = demo_frame[demo_frame.Sex == "Female"] @@ -59,8 +64,6 @@ def main( ): """Main function to run the experiment.""" EXP_NAME = f"chexpert_sex_{female}_{male}_{model}_{num_qns_per_round}" - dataset_dir = "/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/dataset/" - results_dir = "/home/groups/roxanad/sonnet/icl/ManyICL/ManyICL/chexpert_results/" demo_frame = create_demo(female, male, random_seed)