Skip to content

Project

automil.project.Project handles the setup of the project directory in which results such as trained models and evaluation reports are stored. It makes sure the directory is created, the annotations file conforms to slideflows expected format and a corresponding slideflow Project instance is created

Project

Manages the setup of an AutoMIL project.

The Project class is responsible for
  • Modifying the annotation file to conform to the expected slideflow format
  • Creating the project directory structure
  • Creating or loading a Slideflow project instance
  • Exposing project attributes to downstream processes

A Project instance must be prepared before training, evaluation, or prediction can be performed.

Source code in automil/project.py
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
class Project:
    """
    Manages the setup of an AutoMIL project.

    The Project class is responsible for:
        - Modifying the annotation file to conform to the expected slideflow format
        - Creating the project directory structure
        - Creating or loading a Slideflow project instance
        - Exposing project attributes to downstream processes

    A Project instance must be prepared before training, evaluation,
    or prediction can be performed.
    """

    def __init__(
        self,
        project_dir: Path | str,
        annotations_file: Path | str,
        slide_dir: Path | str,
        patient_column: str,
        label_column: str,
        slide_column: str | None = None,
        transform_labels: bool = False,
        verbose: bool = True
    ) -> None:
        """Initializes a Project instance.

        This metod itself does not create or modify files or directories. To prepare a directory to house
        a project, call :meth:`prepare_project`

        Args:
            project_dir (Path | str): Directory in which to set up project
            annotations_file (Path | str): annotations file
            slide_dir (Path | str): Slide directory
            patient_column (str): column containing patient identifiers
            label_column (str): column containing labels
            slide_column (str | None, optional): column containing slide identifiers. Defaults to None.
            transform_labels (bool, optional): Whether to transform labels to a float mapping. Defaults to False.
            verbose (bool, optional): Whether to log verbose messages. Defaults to True.
        """
        self.project_dir: Path = Path(project_dir)
        self.annotations_file: Path = Path(annotations_file)
        self.slide_dir: Path = Path(slide_dir)
        self.modified_annotations_file: Path = self.project_dir / "annotations.csv"

        self.patient_column = patient_column
        self.label_column = label_column
        self.slide_column = slide_column

        self.transform_labels = transform_labels
        self.vlog = get_vlog(verbose)

    # === Properties === #
    @cached_property
    def required_columns(self) -> set[str]:
        """
        Set of required columns expected in the annotation file.

        Includes:
            - Patient identifier column
            - Label column
            - Slide identifier column (if provided)

        Returns:
            Set of required columns
        """
        required = {self.patient_column, self.label_column}
        if self.slide_column:
            required.add(self.slide_column)
        return required

    @property
    def label_map(self) -> dict | list[str]:
        """
        Mapping between original labels and model-ready labels.

        The mapping is created during project scaffold setup.

        Returns:
            dict:
                Mapping from label to float if ``transform_labels=True`` or a list of unique labels otherwise.

        Raises:
            AttributeError:
                If the project scaffold has not been set up yet.
        """
        if not hasattr(self, '_label_map'):
            raise AttributeError(
                "Label map has not been set up yet. Call setup_project_scaffold() first."
            )
        return self._label_map

    @property
    def slide_ids(self) -> list[str]:
        """List of unique slide identifiers from the modified annotations file.

        Returns:
            List of unique slide IDs.
        """
        if not hasattr(self, 'modified_annotations'):
            raise AttributeError(
                "Modified annotations have not been set up yet. Call setup_project_scaffold() first."
            )
        return self.modified_annotations["slide"].astype(str).unique().tolist()

    # === Public Methods === #
    def setup_project_scaffold(self) -> None:
        """
        Creates the project directory and normalizes annotations.

        This method:
            - Creates the project directory if it does not exist
            - Normalizes the annotation file to Slideflow format
            - Generates and stores the label mapping
        """
        self._setup_project_folder()
        self.modified_annotations = self._setup_annotations()
        self._label_map = self._setup_label_map()
        self.vlog(f"[{SUCCESS_CLR}]Project scaffold setup complete[/]")

    def prepare_project(self) -> sf.Project:
        """
        Sets up the project directory structure, modifies and stores annotations, and creates or loads
        a Slideflow project.

        This method:
            1. Creates the project folder if necessary.
            2. Normalizes and saves annotations to project_dir/annotations.csv.
            3. Creates a new Slideflow project or loads an existing one.

        Returns:
            sf.Project: A slideflow project instance
        """
        # Setup project folder and annotations
        self.setup_project_scaffold()

        # Load or create project
        if is_project(str(self.project_dir)):
            self.vlog(f"Loading existing project at [{INFO_CLR}]{self.project_dir}[/]")
            self.project = sf.load_project(str(self.project_dir))
        else:
            self.vlog(f"Creating new project at [{INFO_CLR}]{self.project_dir}[/]")
            self.project = sf.create_project(
                name="AutoMIL",
                root=str(self.project_dir),
                slides=str(self.slide_dir),
                annotations=str(self.modified_annotations_file),
            )
        return self.project

    def summary(self) -> None:
        """Prints a simple summary of the Project Instance in a tabular format"""
        vlog = self.vlog
        rows = [
            ("Project Directory:", str(self.project_dir)),
            ("Slide Directory:", str(self.slide_dir)),
            ("Annotations File:", str(self.annotations_file)),
            ("Patient Column:", self.patient_column),
            ("Label Column:", self.label_column),
            ("Slide Column:", self.slide_column or "None (using patient ID)"),
            ("Transform Labels:", str(self.transform_labels)),
            ("Modified Annotations:", str(self.modified_annotations_file) or "Not yet created"),
            ("Slideflow Project:", "Loaded" if self.project else "Not initialized"),
        ]


        vlog("[bold underline]Project Summary[/]")
        vlog(render_kv_table(rows, width=256))

    # === Internals === #
    def _setup_project_folder(self) -> None:
        """
        Ensures the project directory exists.

        Creates the directory and parent directories if necessary.
        """
        if not self.project_dir.exists():
            self.project_dir.mkdir(parents=True, exist_ok=True)
            self.vlog(f"Created project directory at [{INFO_CLR}]{self.project_dir}[/]")
        else:
            self.vlog(f"Project directory [{INFO_CLR}]{self.project_dir}[/] already exists")

    def _setup_annotations(self) -> pd.DataFrame:
        """
        Normalizes the input annotations file to the required format and set up label map.

        This includes:
            - Validating the presence of required columns.
            - Renaming the patient and label columns to `patient` and `label`.
            - Creating or renaming the `slide` column.
            - Optionally transforming labels to float encodings.
            - Creating and storing the label map for later use.
            - Saving the normalized file to project_dir/annotations.csv.

        AutoMIL requires the annotations file to have the following columns:
            - patient | contains patient identifiers
            - slide   | contains slide identifiers
            - label   | contains labels

        Raises:
            ValueError:
                If required columns are missing.
            IOError:
                If the output annotations file cannot be written.
        """
        # Make sure given columns exist
        if (missing := contains_columns(self.annotations_file, self.required_columns, return_missing=True)):
            raise ValueError(f"Annotations file is missing required columns: {missing}")

        # Load annotations
        annotations = pd.read_csv(self.annotations_file, index_col=self.patient_column)
        annotations.index.name = "patient"

        # Renaming the slide column if provided, otherwise just use the patient column as slide identifier
        if not self.slide_column:
            annotations["slide"] = annotations.index
        else:
            annotations.rename(columns={self.slide_column: "slide"}, inplace=True)
        # Rename label column
        annotations.rename(columns={self.label_column: "label"}, inplace=True)

        # Save modified annotations
        out_path = self.modified_annotations_file
        annotations.to_csv(out_path, index=True)

        if not out_path.exists():
            raise IOError(f"Failed to write annotations file: {out_path}")

        if annotations.empty:
            self.vlog("Warning: annotation file written but is empty.")

        self.vlog(f"Annotations saved to [{INFO_CLR}]{out_path}[/]")
        return annotations

    def _setup_label_map(self) -> dict | list[str]:
        """Sets up the label map based on the modified annotations file.

        Returns:
            dict | list[str]: The label map (dict if transform_labels=True, else list of unique labels).
        """
        annotations = self.modified_annotations
        labels = annotations["label"].unique()

        # Transform labels to float values and store the mapping
        if self.transform_labels:
            label_map = {label: float(i) for i, label in enumerate(sorted(labels))}
            pretty = ", ".join(f"{k}: {v}" for k, v in label_map.items())
            self.vlog(f"Transformed labels to float values: [{INFO_CLR}]{pretty}[/]")
        else:
            # Store unique labels as sorted list
            label_map = sorted(labels.astype(str).tolist())

        return label_map

label_map property

label_map: dict | list[str]

Mapping between original labels and model-ready labels.

The mapping is created during project scaffold setup.

Returns:

Name Type Description
dict dict | list[str]

Mapping from label to float if transform_labels=True or a list of unique labels otherwise.

Raises:

Type Description
AttributeError

If the project scaffold has not been set up yet.

required_columns cached property

required_columns: set[str]

Set of required columns expected in the annotation file.

Includes
  • Patient identifier column
  • Label column
  • Slide identifier column (if provided)

Returns:

Type Description
set[str]

Set of required columns

slide_ids property

slide_ids: list[str]

List of unique slide identifiers from the modified annotations file.

Returns:

Type Description
list[str]

List of unique slide IDs.

prepare_project

prepare_project() -> sf.Project

Sets up the project directory structure, modifies and stores annotations, and creates or loads a Slideflow project.

This method
  1. Creates the project folder if necessary.
  2. Normalizes and saves annotations to project_dir/annotations.csv.
  3. Creates a new Slideflow project or loads an existing one.

Returns:

Type Description
Project

sf.Project: A slideflow project instance

Source code in automil/project.py
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
def prepare_project(self) -> sf.Project:
    """
    Sets up the project directory structure, modifies and stores annotations, and creates or loads
    a Slideflow project.

    This method:
        1. Creates the project folder if necessary.
        2. Normalizes and saves annotations to project_dir/annotations.csv.
        3. Creates a new Slideflow project or loads an existing one.

    Returns:
        sf.Project: A slideflow project instance
    """
    # Setup project folder and annotations
    self.setup_project_scaffold()

    # Load or create project
    if is_project(str(self.project_dir)):
        self.vlog(f"Loading existing project at [{INFO_CLR}]{self.project_dir}[/]")
        self.project = sf.load_project(str(self.project_dir))
    else:
        self.vlog(f"Creating new project at [{INFO_CLR}]{self.project_dir}[/]")
        self.project = sf.create_project(
            name="AutoMIL",
            root=str(self.project_dir),
            slides=str(self.slide_dir),
            annotations=str(self.modified_annotations_file),
        )
    return self.project

setup_project_scaffold

setup_project_scaffold() -> None

Creates the project directory and normalizes annotations.

This method
  • Creates the project directory if it does not exist
  • Normalizes the annotation file to Slideflow format
  • Generates and stores the label mapping
Source code in automil/project.py
175
176
177
178
179
180
181
182
183
184
185
186
187
def setup_project_scaffold(self) -> None:
    """
    Creates the project directory and normalizes annotations.

    This method:
        - Creates the project directory if it does not exist
        - Normalizes the annotation file to Slideflow format
        - Generates and stores the label mapping
    """
    self._setup_project_folder()
    self.modified_annotations = self._setup_annotations()
    self._label_map = self._setup_label_map()
    self.vlog(f"[{SUCCESS_CLR}]Project scaffold setup complete[/]")

summary

summary() -> None

Prints a simple summary of the Project Instance in a tabular format

Source code in automil/project.py
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
def summary(self) -> None:
    """Prints a simple summary of the Project Instance in a tabular format"""
    vlog = self.vlog
    rows = [
        ("Project Directory:", str(self.project_dir)),
        ("Slide Directory:", str(self.slide_dir)),
        ("Annotations File:", str(self.annotations_file)),
        ("Patient Column:", self.patient_column),
        ("Label Column:", self.label_column),
        ("Slide Column:", self.slide_column or "None (using patient ID)"),
        ("Transform Labels:", str(self.transform_labels)),
        ("Modified Annotations:", str(self.modified_annotations_file) or "Not yet created"),
        ("Slideflow Project:", "Loaded" if self.project else "Not initialized"),
    ]


    vlog("[bold underline]Project Summary[/]")
    vlog(render_kv_table(rows, width=256))