Source code for aigarmic.file_handlers

# Filename: 	file_handlers.py
# Author: 	Alessandro Gerada
# Date: 	2023-08-11
# Copyright: 	Alessandro Gerada 2023
# Email: 	alessandro.gerada@liverpool.ac.uk

"""Functions to facilitate working with image data files"""

import pathlib
from pathlib import Path

from aigarmic._img_utils import convert_cv2_to_keras
import csv
import os
from typing import Optional, Union
import cv2  # pylint: disable=import-error
import keras.callbacks
import numpy as np
import tensorflow as tf



[docs]
def create_dataset_from_directory(directory: str,
                                  label_mode: str,
                                  image_width: int,
                                  image_height: int,
                                  seed: int = 12345,
                                  val_split: float = 0.2,
                                  batch_size: int = 32) -> tuple[tf.data.Dataset, tf.data.Dataset]:
    """
    Create a training and validation dataset from a directory containing subdirectories for each class of image data.

    :param directory: path containing images, each in subdirectory corresponding to class
    :param label_mode: Labelling depending on model type ("binary" for binary or "int" for softmax)
    :param image_width: Image width in pixels
    :param image_height: Image height in pixels
    :param seed: Random seed for dataset splitting
    :param val_split: Proportion of data to use for validation
    :param batch_size: Batch size for datasets
    :return: Tuple containing training and validation datasets
    """
    train_dataset = tf.keras.utils.image_dataset_from_directory(  # pylint: disable=no-member
                directory,
                validation_split=val_split, 
                subset='training',
                seed=seed,
                image_size=(image_width, image_height), 
                batch_size=batch_size, 
                label_mode=label_mode
            )
    val_dataset = tf.keras.utils.image_dataset_from_directory(  # pylint: disable=no-member
        directory,
        validation_split=val_split, 
        subset='validation', 
        seed=seed,
        image_size=(image_width, image_height), 
        batch_size=batch_size, 
        label_mode=label_mode
    )
    return train_dataset, val_dataset




[docs]
def predict_colony_images_from_directory(directory: Optional[Union[str, pathlib.Path]],
                                         model: tf.keras.models.Model,  # pylint: disable=no-member
                                         class_names: list[str],
                                         image_width: int,
                                         image_height: int,
                                         model_type: str,
                                         save_path: Optional[Union[str, pathlib.Path]] = None,
                                         binary_threshold: float = 0.5) -> list[dict]:
    """
    Predict the class of images in a directory using a trained model, and compare prediction to
    true class (based on subdirectory in which image is located, which should correspond to class name).

    :param directory: Directory containing images to predict
    :param model: Model to use for prediction
    :param class_names: List of class names
    :param image_width: Image width in pixels
    :param image_height: Image height in pixels
    :param model_type: "binary" or "softmax"
    :param save_path: Path to save prediction log
    :param binary_threshold: For binary models, threshold for classifying as positive
    :return: List of dictionaries containing image, path, prediction, predicted class, and true class (for each image)
    """
    output = []
    file_paths = {i: os.listdir(os.path.join(directory, i)) for i in class_names}
    # add subdirectories
    file_paths = {i: [os.path.join(directory, i, j) for j in file_paths[i] if j.count(".jpg") > 0] for i in file_paths}

    for i in file_paths:
        for j in file_paths[i]:
            image = cv2.imread(j)  # pylint: disable=no-member
            true_class = i
            directory = j
            prediction = model.predict(convert_cv2_to_keras(image, image_width, image_height))
            if model_type == "binary":
                [prediction] = prediction.reshape(-1)
                predicted_class = class_names[0] if prediction <= binary_threshold else class_names[1]
            elif model_type == "softmax":
                prediction = tf.nn.softmax(prediction)
                prediction_value = np.max(prediction)
                predicted_class = np.argmax(prediction)
                prediction = prediction_value
            else:
                raise ValueError("Model type not supported")
            output.append({"image": image,
                           "path": directory,
                           "prediction": prediction,
                           "predicted_class": predicted_class,
                           "true_class": true_class})
    if save_path:
        with open(save_path, "w", encoding="utf-8-sig") as file:
            writer = csv.DictWriter(file,
                                    fieldnames=['path', 'prediction', 'predicted_class', 'true_class'],
                                    extrasaction='ignore')
            writer.writeheader()
            writer.writerows(output)
    return output




[docs]
def save_training_log(model_history: keras.callbacks.History,
                      save_path: Union[str, pathlib.Path]) -> None:
    """
    Save training log to CSV file

    :param model_history: Training history object
    :param save_path: Directory to save training log
    """
    with open(save_path, "w", encoding="utf-8-sig") as file:
        writer = csv.writer(file)
        h = zip(
            model_history.history['accuracy'],
            model_history.history['val_accuracy'],
            model_history.history['loss'],
            model_history.history['val_loss'],
            range(len(model_history.history['accuracy'])))
        writer.writerow(['accuracy', 'val_accuracy', 'loss', 'val_loss', 'epoch'])
        writer.writerows(h)




[docs]
def get_concentration_from_path(path: Union[str, Path]) -> float:
    """
    get concentration from plate image path, e.g.
    antibiotic1/0.125.jpg -> 0.125

    :param path: Path to plate image
    :return: Concentration
    """
    split_text = os.path.split(path)
    split_text = split_text[-1]
    concentration_str = os.path.splitext(split_text)[0]
    return float(concentration_str)




[docs]
def get_paths_from_directory(path: Union[str, Path]) -> dict[str, list[str]]:
    """
    Returns a dict of abx_names: [image1_path, image2_path, etc.]
    If there are no antibiotic subdirectories, "unnamed" is used
    for abx_names (length = 1)

    :param path: Path to directory containing antibiotic subdirectories
    :return: dict of abx_names: [image1_path, image2_path, etc.]
    """
    abx_names = [i for i in os.listdir(path) if not i.startswith('.') and os.path.isdir(os.path.join(path, i))]

    if not abx_names:
        abx_names = [""]

    plate_images_paths = {}
    for abx in abx_names:
        _path = os.path.join(path, abx)
        _temp_plate_images_paths = os.listdir(_path)
        _temp_plate_images_paths = [i for i in _temp_plate_images_paths if i.count('.jpg') > 0 or i.count('.JPG') > 0]
        _temp_plate_images_paths = [os.path.join(path, abx, i) for i in _temp_plate_images_paths]
        plate_images_paths[abx] = _temp_plate_images_paths

    return plate_images_paths