Source code for aigarmic.file_handlers

# Filename: 	file_handlers.py
# Author: 	Alessandro Gerada
# Date: 	2023-08-11
# Copyright: 	Alessandro Gerada 2023
# Email: 	alessandro.gerada@liverpool.ac.uk

"""Functions to facilitate working with image data files"""

import pathlib
from pathlib import Path

from aigarmic._img_utils import convert_cv2_to_keras
import csv
import os
from typing import Optional, Union
import cv2  # pylint: disable=import-error
import keras.callbacks
import numpy as np
import tensorflow as tf


[docs] def create_dataset_from_directory(directory: str, label_mode: str, image_width: int, image_height: int, seed: int = 12345, val_split: float = 0.2, batch_size: int = 32) -> tuple[tf.data.Dataset, tf.data.Dataset]: """ Create a training and validation dataset from a directory containing subdirectories for each class of image data. :param directory: path containing images, each in subdirectory corresponding to class :param label_mode: Labelling depending on model type ("binary" for binary or "int" for softmax) :param image_width: Image width in pixels :param image_height: Image height in pixels :param seed: Random seed for dataset splitting :param val_split: Proportion of data to use for validation :param batch_size: Batch size for datasets :return: Tuple containing training and validation datasets """ train_dataset = tf.keras.utils.image_dataset_from_directory( # pylint: disable=no-member directory, validation_split=val_split, subset='training', seed=seed, image_size=(image_width, image_height), batch_size=batch_size, label_mode=label_mode ) val_dataset = tf.keras.utils.image_dataset_from_directory( # pylint: disable=no-member directory, validation_split=val_split, subset='validation', seed=seed, image_size=(image_width, image_height), batch_size=batch_size, label_mode=label_mode ) return train_dataset, val_dataset
[docs] def predict_colony_images_from_directory(directory: Optional[Union[str, pathlib.Path]], model: tf.keras.models.Model, # pylint: disable=no-member class_names: list[str], image_width: int, image_height: int, model_type: str, save_path: Optional[Union[str, pathlib.Path]] = None, binary_threshold: float = 0.5) -> list[dict]: """ Predict the class of images in a directory using a trained model, and compare prediction to true class (based on subdirectory in which image is located, which should correspond to class name). :param directory: Directory containing images to predict :param model: Model to use for prediction :param class_names: List of class names :param image_width: Image width in pixels :param image_height: Image height in pixels :param model_type: "binary" or "softmax" :param save_path: Path to save prediction log :param binary_threshold: For binary models, threshold for classifying as positive :return: List of dictionaries containing image, path, prediction, predicted class, and true class (for each image) """ output = [] file_paths = {i: os.listdir(os.path.join(directory, i)) for i in class_names} # add subdirectories file_paths = {i: [os.path.join(directory, i, j) for j in file_paths[i] if j.count(".jpg") > 0] for i in file_paths} for i in file_paths: for j in file_paths[i]: image = cv2.imread(j) # pylint: disable=no-member true_class = i directory = j prediction = model.predict(convert_cv2_to_keras(image, image_width, image_height)) if model_type == "binary": [prediction] = prediction.reshape(-1) predicted_class = class_names[0] if prediction <= binary_threshold else class_names[1] elif model_type == "softmax": prediction = tf.nn.softmax(prediction) prediction_value = np.max(prediction) predicted_class = np.argmax(prediction) prediction = prediction_value else: raise ValueError("Model type not supported") output.append({"image": image, "path": directory, "prediction": prediction, "predicted_class": predicted_class, "true_class": true_class}) if save_path: with open(save_path, "w", encoding="utf-8-sig") as file: writer = csv.DictWriter(file, fieldnames=['path', 'prediction', 'predicted_class', 'true_class'], extrasaction='ignore') writer.writeheader() writer.writerows(output) return output
[docs] def save_training_log(model_history: keras.callbacks.History, save_path: Union[str, pathlib.Path]) -> None: """ Save training log to CSV file :param model_history: Training history object :param save_path: Directory to save training log """ with open(save_path, "w", encoding="utf-8-sig") as file: writer = csv.writer(file) h = zip( model_history.history['accuracy'], model_history.history['val_accuracy'], model_history.history['loss'], model_history.history['val_loss'], range(len(model_history.history['accuracy']))) writer.writerow(['accuracy', 'val_accuracy', 'loss', 'val_loss', 'epoch']) writer.writerows(h)
[docs] def get_concentration_from_path(path: Union[str, Path]) -> float: """ get concentration from plate image path, e.g. antibiotic1/0.125.jpg -> 0.125 :param path: Path to plate image :return: Concentration """ split_text = os.path.split(path) split_text = split_text[-1] concentration_str = os.path.splitext(split_text)[0] return float(concentration_str)
[docs] def get_paths_from_directory(path: Union[str, Path]) -> dict[str, list[str]]: """ Returns a dict of abx_names: [image1_path, image2_path, etc.] If there are no antibiotic subdirectories, "unnamed" is used for abx_names (length = 1) :param path: Path to directory containing antibiotic subdirectories :return: dict of abx_names: [image1_path, image2_path, etc.] """ abx_names = [i for i in os.listdir(path) if not i.startswith('.') and os.path.isdir(os.path.join(path, i))] if not abx_names: abx_names = [""] plate_images_paths = {} for abx in abx_names: _path = os.path.join(path, abx) _temp_plate_images_paths = os.listdir(_path) _temp_plate_images_paths = [i for i in _temp_plate_images_paths if i.count('.jpg') > 0 or i.count('.JPG') > 0] _temp_plate_images_paths = [os.path.join(path, abx, i) for i in _temp_plate_images_paths] plate_images_paths[abx] = _temp_plate_images_paths return plate_images_paths