Utilities

Utilities to index text.

Main functions

base_hash: a hash generation function for strings generate_uuid: a UUID represntation of a string generate_random_string: a random string of required length

Behaviour

these functions are supposedly pure.

base_hash(input_string)

Generate human-readable hash to check changes in strings.

Parameters:

Name Type Description Default
input_string str

an input string

required

Returns:

Type Description
str

a hash string

Source code in lmm/utils/hash.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
def base_hash(input_string: str) -> str:
    """
    Generate human-readable hash to check changes in strings.

    Args: 
        input_string: an input string

    Returns: 
        a hash string
    """
    if not input_string:
        return ""

    # Encode the input string to bytes
    encoded_string = input_string.encode('utf-8')

    # Using MD5 for performance, crypto quality not required
    md5_hasher = hashlib.md5()
    md5_hasher.update(encoded_string)

    # Convert to human-readable
    byte_digest_md5 = md5_hasher.digest()
    base64_digest_md5 = base64.b64encode(byte_digest_md5).decode(
        'utf-8'
    )

    return base64_digest_md5[:-2]

generate_random_string(length=18)

Generates a random string.

Parameters:

Name Type Description Default
length int

the length of the random string (defaults to 18 characters).

18

Returns:

Type Description
str

a random string of the required length.

Source code in lmm/utils/hash.py
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
def generate_random_string(length: int = 18) -> str:
    """Generates a random string.

    Args:
        length: the length of the random string (defaults to 18
            characters).

    Returns:
        a random string of the required length.
    """
    import secrets
    import string

    alphabet = string.ascii_letters + string.digits
    return ''.join(secrets.choice(alphabet) for _ in range(length))

generate_uuid(text_input, namespace_uuid=uuid.NAMESPACE_URL)

Generates a UUID Version 5 from a given text string using a specified namespace.

UUID v5 is based on SHA-1 hashing, ensuring that the same text input with the same namespace will always produce the same UUID.

Parameters:

Name Type Description Default
text_input str

The string from which to generate the UUID.

required
namespace_uuid UUID object

The namespace UUID. Defaults to uuid.NAMESPACE_URL. You can use other predefined namespaces (e.g., uuid.NAMESPACE_DNS) or define your own.

NAMESPACE_URL

Returns:

Name Type Description
str str

The generated UUID v5 as a hyphenated string (36 chars).

Source code in lmm/utils/hash.py
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
def generate_uuid(
    text_input: str, namespace_uuid: uuid.UUID = uuid.NAMESPACE_URL
) -> str:
    """
    Generates a UUID Version 5 from a given text string using a
    specified namespace.

    UUID v5 is based on SHA-1 hashing, ensuring that the same text
    input with the same namespace will always produce the same UUID.

    Args:
        text_input (str): The string from which to generate the UUID.
        namespace_uuid (UUID object, optional): The namespace UUID.
                                Defaults to uuid.NAMESPACE_URL.
                                You can use other predefined
                                namespaces (e.g., uuid.NAMESPACE_DNS)
                                or define your own.

    Returns:
        str: The generated UUID v5 as a hyphenated string (36 chars).
    """
    generated_uuid: uuid.UUID = uuid.uuid5(namespace_uuid, text_input)
    return str(generated_uuid)

Utilities to read/write to/from disc and print errors to console. Errors are not propagated, but functions return null value.

append_postfix_to_filename(filename, postfix)

Appends a postfix string to the name of a file.

Parameters:

Name Type Description Default
filename str

The original name of the file (e.g., "my_document.txt").

required
postfix str

The string to append (e.g., "_new").

required

Returns:

Name Type Description
str str

The new filename with the postfix appended.

Source code in lmm/utils/ioutils.py
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
def append_postfix_to_filename(filename: str, postfix: str) -> str:
    """
    Appends a postfix string to the name of a file.

    Args:
        filename (str): The original name of the file (e.g.,
            "my_document.txt").
        postfix (str): The string to append (e.g., "_new").

    Returns:
        str: The new filename with the postfix appended.
    """
    import os

    base_name, extension = os.path.splitext(filename)
    new_filename = f"{base_name}{postfix}{extension}"
    return new_filename

check_allowed_content(input_string, allowed_list)

Extracts strings delimited by single quotes from input_string and checks if any of them are in the allowed_list.

Parameters:

Name Type Description Default
input_string str

The string to extract quoted content from.

required
allowed_list list[str]

List of strings to check against.

required

Returns:

Type Description
bool

True if any extracted string is in allowed_list, False otherwise.

Source code in lmm/utils/ioutils.py
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
def check_allowed_content(
    input_string: str, allowed_list: list[str]
) -> bool:
    """
    Extracts strings delimited by single quotes from input_string and checks
    if any of them are in the allowed_list.

    Args:
        input_string: The string to extract quoted content from.
        allowed_list: List of strings to check against.

    Returns:
        True if any extracted string is in allowed_list, False otherwise.
    """
    import re

    # Firth just check is in list
    if input_string in allowed_list:
        return True

    # Fallback, extract all strings delimited by single quotes
    pattern = r"'([^']*)'"
    extracted_strings = re.findall(pattern, input_string)

    # Check if any extracted string is in the allowed list
    for extracted in extracted_strings:
        if extracted in allowed_list:
            return True

    return False

clean_text_concat(text_segments)

Concatenates a list of strings, merging overlapping tails/heads if the overlap constitutes at least one whole word.

The merge condition requires: 1. The tail of text A matches the head of text B. 2. The match represents a complete word boundary on both sides: - The character preceding the overlap in A must not be alphanumeric (or A starts with the overlap). - The character following the overlap in B must not be alphanumeric (or B ends with the overlap). 3. The overlap contains at least one alphanumeric character (to ensure it's "at least a word" and not just whitespace/punctuation).

Parameters:

Name Type Description Default
text_segments list[str]

A list of strings to concatenate.

required

Returns:

Type Description
str

A single concatenated string with overlaps merged.

Source code in lmm/utils/ioutils.py
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
def clean_text_concat(text_segments: list[str]) -> str:
    """
    Concatenates a list of strings, merging overlapping tails/heads
    if the overlap constitutes at least one whole word.

    The merge condition requires:
    1. The tail of text A matches the head of text B.
    2. The match represents a complete word boundary on both sides:
       - The character preceding the overlap in A must not be alphanumeric (or A starts with the overlap).
       - The character following the overlap in B must not be alphanumeric (or B ends with the overlap).
    3. The overlap contains at least one alphanumeric character (to ensure it's "at least a word"
       and not just whitespace/punctuation).

    Args:
        text_segments: A list of strings to concatenate.

    Returns:
        A single concatenated string with overlaps merged.
    """
    if not text_segments:
        return ""

    # Initialize with the first segment
    result_text = text_segments[0]

    for next_segment in text_segments[1:]:
        result_text = _merge_segments(result_text, next_segment)

    return result_text

create_interface(f, argv)

Waits for Enter key presses and handles Ctrl-C to enable interactive execution of the function f and for debugging. The first command-line argument is the markdown file on which the module acts. An optional second command-line argument is the file to which changes are saved. A third command line argument, if True, creates a loop for interactive editing.

Source code in lmm/utils/ioutils.py
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
@validate_call
def create_interface(
    f: Callable[[str, str], list[Block] | None], argv: list[str]
) -> None:
    """Waits for Enter key presses and handles Ctrl-C to
    enable interactive execution of the function f and for debugging.
    The first command-line argument is the markdown file on
    which the module acts. An optional second command-line
    argument is the file to which changes are saved. A third
    command line argument, if True, creates a loop for interactive
    editing.
    """
    if len(argv) > 1:
        filename = argv[1]
    else:
        print("Usage: first command line arg is source file")
        print("       second command line arg is save file (opt)")
        print("       third command line 'True' enters loop")
        return
    if len(argv) > 2:
        target = argv[2]
    else:
        target = filename

    if not validate_file(filename):
        return

    if len(argv) > 3:
        interactive = argv[3] == "True"
    else:
        interactive = False

    if not interactive:
        f(filename, target)
        return

    print(f"Press 'Enter' to execute the function on '{filename}'.")
    print("Press 'Ctrl-C' to exit.")

    try:
        input()
        while True:
            f(filename, target)
            # Waits for the user to press Enter
            input("Press 'Enter' to continue, 'Ctrl-C' to exit")
    except KeyboardInterrupt:
        print("\nCtrl-C detected. Exiting program.")
    except Exception as e:
        print("An unexpected error occurred: " + str(e))
    finally:
        print("Program gracefully terminated.")

list_files_with_extensions(folder_path, extensions)

Lists all files in a given folder that match a set of specified extensions.

Parameters:

Name Type Description Default
folder_path str | Path

The full path to the folder to search.

required
extensions str | list[str]

A single semicolon-separated string of file extensions (e.g., ".txt;.md;py") OR a standard list of strings (e.g., ['.txt', 'md']). Extensions may or may not start with a dot.

required

Returns:

Type Description
list[str]

A list of full paths (as strings) for all matching files. Returns an

list[str]

empty list if no files are found.

Raises:

Type Description
FileNotFoundError

If the specified folder_path does not exist.

NotADirectoryError

If the specified folder_path is not a directory.

ValueError

If the extensions string contains invalid characters for a filename.

Source code in lmm/utils/ioutils.py
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
def list_files_with_extensions(
    folder_path: str | Path, extensions: str | list[str]
) -> list[str]:
    """
    Lists all files in a given folder that match a set of specified extensions.

    Args:
        folder_path (str | Path): The full path to the folder to search.
        extensions (str | list[str]): A single semicolon-separated string of 
            file extensions (e.g., ".txt;.md;py") OR a standard list of strings 
            (e.g., ['.txt', 'md']). Extensions may or may not start with a dot.

    Returns:
        A list of full paths (as strings) for all matching files. Returns an
        empty list if no files are found.

    Raises:
        FileNotFoundError: If the specified folder_path does not exist.
        NotADirectoryError: If the specified folder_path is not a directory.
        ValueError: If the extensions string contains invalid characters for
            a filename.
    """
    # --- 1. Validate folder path ---
    p_folder = Path(folder_path)
    if not p_folder.exists():
        raise FileNotFoundError(
            f"The folder does not exist: '{folder_path}'"
        )
    if not p_folder.is_dir():
        raise NotADirectoryError(
            f"The specified path is not a directory: '{folder_path}'"
        )

    # --- 2. Process and Normalize Extensions ---
    raw_extensions: list[str] = []

    if isinstance(extensions, str):
        # Handle the semicolon-separated string input
        if not extensions:
            return []
        raw_extensions = extensions.split(';')
    elif isinstance(extensions, list):  # type: ignore (always met)
        # Handle the standard list input
        raw_extensions = extensions
    else:
        # Catch unexpected types
        raise TypeError(
            "Unreacheable code reached. Extensions supposed to be " 
            "a string (semicolon-separated) or a list of strings."
        )

    # Define invalid characters for filenames
    # This remains critical for security and robustness.
    invalid_chars = r'<>:"/\|?*' + "".join(map(chr, range(32)))

    processed_extensions: set[str] = set()
    for ext in raw_extensions:
        ext = str(ext).strip() # Ensure it's a string and strip whitespace
        if not ext:
            continue

        # Check for invalid characters
        if any(char in invalid_chars for char in ext):
            raise ValueError(
                f"Invalid character found in extension '{ext}'. Extensions cannot "
                f"contain any of the following: {invalid_chars}"
            )

        # Prepend dot if missing and store in the set
        if not ext.startswith('.'):
            processed_extensions.add('.' + ext.lower()) # Added .lower() for case-insensitivity
        else:
            processed_extensions.add(ext.lower()) # Added .lower() for case-insensitivity

    if not processed_extensions:
        return []

    # --- 3. Find matching files ---
    # Note: Using Path.suffix is case-sensitive, so we lower-case it here 
    # to match the lower-cased processed_extensions set.
    matching_files: list[str] = [
        str(file_path)
        for file_path in p_folder.iterdir()
        if file_path.is_file()
        and file_path.suffix.lower() in processed_extensions
    ]

    return matching_files

parse_external_boolean(value)

Sanitize externally given boolean

Source code in lmm/utils/ioutils.py
216
217
218
219
220
221
222
223
224
225
def parse_external_boolean(value: object) -> bool:
    """Sanitize externally given boolean"""
    if isinstance(value, str):
        if value.lower() in ('true', '1', 'yes'):
            return True
        elif value.lower() in ('false', '0', 'no', ''):
            return False
        # Handle other string interpretations as needed
    # Fallback to Python's default truthiness for other types
    return bool(value)

process_string_quotes(input_string)

Processes a string to ensure consistent internal quoting.

Rules: - If the string contains the character ", except for the first and last character, replace it with ' and make sure the string starts and ends with ". - If the string contains the character ', make sure the string starts and ends with ".

In short, the quote should create a string that can internally quote text with a consistent approach, starting from a string that may do so using different ways.

Parameters:

Name Type Description Default
input_string str

The string to be processed.

required

Returns:

Type Description
str

The processed string with consistent quoting.

Source code in lmm/utils/ioutils.py
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
def process_string_quotes(input_string: str) -> str:
    """
    Processes a string to ensure consistent internal quoting.

    Rules:
    - If the string contains the character ", except for the first
    and last character, replace it with ' and make sure the string
    starts and ends with ".
    - If the string contains the character ', make sure the string
    starts and ends with ".

    In short, the quote should create a string that can internally
    quote text with a consistent approach, starting from a string
    that may do so using different ways.

    Args:
        input_string: The string to be processed.

    Returns:
        The processed string with consistent quoting.
    """

    # Step 1: Remove any existing outer quotes to get the core content
    core_content = input_string

    # Check if the string starts and ends with double quotes
    if (
        len(core_content) >= 2
        and core_content.startswith('"')
        and core_content.endswith('"')
    ):
        core_content = core_content[1:-1]
    # Check if the string starts and ends with single quotes
    elif (
        len(core_content) >= 2
        and core_content.startswith("'")
        and core_content.endswith("'")
    ):
        core_content = core_content[1:-1]

    # Step 2: Handle internal double quotes
    # If the core content contains double quotes, replace them all
    # with single quotes
    # This ensures that internal quoting consistently uses single
    # quotes when the outer is double.
    if '"' in core_content:
        processed_internal_content = core_content.replace('"', "'")
    else:
        processed_internal_content = core_content

    # Step 3: Ensure the final string starts and ends with double
    # quotes
    # This applies to both cases: if it originally had internal
    # double quotes (now replaced with single), or if it had internal
    # single quotes, or no quotes.
    if "'" in processed_internal_content:
        final_string = '"' + processed_internal_content + '"'
    else:
        final_string = processed_internal_content

    return final_string

string_to_path_or_string(input_string)

Takes a string as argument. If the string is one line, checks that the string codes for an existing file. If so, it returns a Path object for that file. Otherwise, it returns the string.

A string is considered one line if it contains no newlines, or if it only has a single trailing newline character.

Parameters:

Name Type Description Default
input_string str

The input string to check

required

Returns:

Type Description
Path | str

Path object if the string represents an existing file,

Path | str

otherwise the original string

Source code in lmm/utils/ioutils.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
def string_to_path_or_string(input_string: str) -> Path | str:
    """
    Takes a string as argument. If the string is one line, checks
    that the string codes for an existing file. If so, it returns a
    Path object for that file. Otherwise, it returns the string.

    A string is considered one line if it contains no newlines, or if
    it only has a single trailing newline character.

    Args:
        input_string: The input string to check

    Returns:
        Path object if the string represents an existing file,
        otherwise the original string
    """
    # Check if string is a single line (allowing for trailing \n)
    stripped_string = input_string.rstrip('\n\r')
    if '\n' in stripped_string or '\r' in stripped_string:
        return input_string

    # Try to create a Path object and check if it exists as a file
    try:
        potential_path = Path(stripped_string.strip())
        if potential_path.exists() and potential_path.is_file():
            return potential_path
    except (OSError, ValueError):
        # Invalid path characters or other path-related errors
        pass

    # Return original string if not a valid existing file
    return input_string

validate_file(source, logger=logger)

Returns: None for failure, Path object otherwise

Source code in lmm/utils/ioutils.py
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
def validate_file(
    source: str | Path, logger: LoggerBase = logger
) -> Path | None:
    """Returns: None for failure, Path object otherwise"""
    if not source:
        logger.warning("No file given")
        return None
    try:
        source_path = Path(source)
        if not source_path.exists():
            logger.error(f"File does not exist: {source}")
            return None
        if not source_path.is_file():
            logger.error(f"Not a file: {source}")
            return None
        if source_path.stat().st_size == 0:
            logger.warning(f"File is empty: {source}")
            return None
    except Exception as e:
        logger.error(f"Error accessing file {source}: {str(e)}")
        return None

    return source_path

Centralized logging configuration for the ML Markdown project.

This module provides a standardized way to configure and use Python's logging module across the entire project. It ensures consistent log formatting, appropriate log levels, and centralized configuration.

Usage
from library.lm_logging import get_logger, ConsoleLogger,
    FileLogger, ExceptionConsoleLogger

# Use the abstract interface implementations
console_logger = ConsoleLogger(__name__)
file_logger = FileLogger(__name__, "app.log")
exception_logger = ExceptionConsoleLogger(__name__)

# Or use the traditional logger
logger = get_logger(__name__)

ConsoleLogger

Bases: LoggerBase

A console logger implementation that uses logging.Logger as a delegate. Logs messages to the console using Python's built-in logging module.

Source code in lmm/utils/logging.py
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
class ConsoleLogger(LoggerBase):
    """
    A console logger implementation that uses logging.Logger as a
    delegate. Logs messages to the console using Python's built-in
    logging module.
    """

    def __init__(self, name: str | None = None) -> None:
        """
        Initialize the ConsoleLogger with a specific logger name,
        typically __name__ to use the module name
        """
        if name is not None or not bool(name):
            self.logger = logging.getLogger(name)
        else:
            self.logger = logging.getLogger()
        self.logger.setLevel(logging.INFO)

        # Ensure we have a console handler if none exists
        if not self.logger.hasHandlers():
            handler = logging.StreamHandler(sys.stdout)
            formatter = logging.Formatter(
                '%(levelname)s - %(message)s'
            )
            handler.setFormatter(formatter)
            self.logger.addHandler(handler)

    def set_level(self, level: int) -> None:
        """Set the logging level for the logger."""
        self.logger.setLevel(level)

    def get_level(self) -> int:
        """Get the current logging level"""
        return self.logger.level

    def info(self, msg: str) -> None:
        """Log an informational message."""
        self.logger.info(msg)

    def error(self, msg: str) -> None:
        """Log an error message."""
        self.logger.error(msg)

    def warning(self, msg: str) -> None:
        """Log a warning message."""
        self.logger.warning(msg)

    def critical(self, msg: str) -> None:
        """Log a critical message."""
        self.logger.critical(msg, stack_info=True)

__init__(name=None)

Initialize the ConsoleLogger with a specific logger name, typically name to use the module name

Source code in lmm/utils/logging.py
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
def __init__(self, name: str | None = None) -> None:
    """
    Initialize the ConsoleLogger with a specific logger name,
    typically __name__ to use the module name
    """
    if name is not None or not bool(name):
        self.logger = logging.getLogger(name)
    else:
        self.logger = logging.getLogger()
    self.logger.setLevel(logging.INFO)

    # Ensure we have a console handler if none exists
    if not self.logger.hasHandlers():
        handler = logging.StreamHandler(sys.stdout)
        formatter = logging.Formatter(
            '%(levelname)s - %(message)s'
        )
        handler.setFormatter(formatter)
        self.logger.addHandler(handler)

critical(msg)

Log a critical message.

Source code in lmm/utils/logging.py
112
113
114
def critical(self, msg: str) -> None:
    """Log a critical message."""
    self.logger.critical(msg, stack_info=True)

error(msg)

Log an error message.

Source code in lmm/utils/logging.py
104
105
106
def error(self, msg: str) -> None:
    """Log an error message."""
    self.logger.error(msg)

get_level()

Get the current logging level

Source code in lmm/utils/logging.py
96
97
98
def get_level(self) -> int:
    """Get the current logging level"""
    return self.logger.level

info(msg)

Log an informational message.

Source code in lmm/utils/logging.py
100
101
102
def info(self, msg: str) -> None:
    """Log an informational message."""
    self.logger.info(msg)

set_level(level)

Set the logging level for the logger.

Source code in lmm/utils/logging.py
92
93
94
def set_level(self, level: int) -> None:
    """Set the logging level for the logger."""
    self.logger.setLevel(level)

warning(msg)

Log a warning message.

Source code in lmm/utils/logging.py
108
109
110
def warning(self, msg: str) -> None:
    """Log a warning message."""
    self.logger.warning(msg)

ExceptionConsoleLogger

Bases: LoggerBase

A console logger implementation that raises exceptions on error and critical calls.

This logger behaves like ConsoleLogger for info, warning, and set_level methods, but raises exceptions when error() or critical() methods are called. The message is still logged before the exception is raised.

Source code in lmm/utils/logging.py
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
class ExceptionConsoleLogger(LoggerBase):
    """
    A console logger implementation that raises exceptions on error
    and critical calls.

    This logger behaves like ConsoleLogger for info, warning, and
    set_level methods, but raises exceptions when error() or
    critical() methods are called.
    The message is still logged before the exception is raised.
    """

    def __init__(self, name: str = "") -> None:
        """
        Initialize the ExceptionConsoleLogger with a specific logger
        name.

        Args:
            name: The name of the logger, typically __name__ to use
                the module name
        """
        self.logger = logging.getLogger(f"{name}_exception")
        self.logger.setLevel(logging.INFO)

        # Ensure we have a console handler if none exists
        if not self.logger.handlers:
            handler = logging.StreamHandler(sys.stdout)
            formatter = logging.Formatter(
                '%(levelname)s - %(message)s'
            )
            handler.setFormatter(formatter)
            self.logger.addHandler(handler)

    def set_level(self, level: int) -> None:
        """Set the logging level for the logger."""
        self.logger.setLevel(level)

    def get_level(self) -> int:
        """Get the current logging level"""
        return self.logger.level

    def info(self, msg: str) -> None:
        """Log an informational message."""
        self.logger.info(msg)

    def error(self, msg: str) -> None:
        """Log an error message and raise an exception."""
        self.logger.error(msg)
        raise RuntimeError(f"Error: {msg}")

    def warning(self, msg: str) -> None:
        """Log a warning message."""
        self.logger.warning(msg)

    def critical(self, msg: str) -> None:
        """Log a critical message and raise an exception."""
        self.logger.critical(msg)
        raise RuntimeError(f"Critical error: {msg}")

__init__(name='')

Initialize the ExceptionConsoleLogger with a specific logger name.

Parameters:

Name Type Description Default
name str

The name of the logger, typically name to use the module name

''
Source code in lmm/utils/logging.py
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
def __init__(self, name: str = "") -> None:
    """
    Initialize the ExceptionConsoleLogger with a specific logger
    name.

    Args:
        name: The name of the logger, typically __name__ to use
            the module name
    """
    self.logger = logging.getLogger(f"{name}_exception")
    self.logger.setLevel(logging.INFO)

    # Ensure we have a console handler if none exists
    if not self.logger.handlers:
        handler = logging.StreamHandler(sys.stdout)
        formatter = logging.Formatter(
            '%(levelname)s - %(message)s'
        )
        handler.setFormatter(formatter)
        self.logger.addHandler(handler)

critical(msg)

Log a critical message and raise an exception.

Source code in lmm/utils/logging.py
444
445
446
447
def critical(self, msg: str) -> None:
    """Log a critical message and raise an exception."""
    self.logger.critical(msg)
    raise RuntimeError(f"Critical error: {msg}")

error(msg)

Log an error message and raise an exception.

Source code in lmm/utils/logging.py
435
436
437
438
def error(self, msg: str) -> None:
    """Log an error message and raise an exception."""
    self.logger.error(msg)
    raise RuntimeError(f"Error: {msg}")

get_level()

Get the current logging level

Source code in lmm/utils/logging.py
427
428
429
def get_level(self) -> int:
    """Get the current logging level"""
    return self.logger.level

info(msg)

Log an informational message.

Source code in lmm/utils/logging.py
431
432
433
def info(self, msg: str) -> None:
    """Log an informational message."""
    self.logger.info(msg)

set_level(level)

Set the logging level for the logger.

Source code in lmm/utils/logging.py
423
424
425
def set_level(self, level: int) -> None:
    """Set the logging level for the logger."""
    self.logger.setLevel(level)

warning(msg)

Log a warning message.

Source code in lmm/utils/logging.py
440
441
442
def warning(self, msg: str) -> None:
    """Log a warning message."""
    self.logger.warning(msg)

FileConsoleLogger

Bases: LoggerBase

A file logger implementation that uses logging.Logger as a delegate. Logs messages to a specified file using Python's built-in logging module, and relays the messages to the console as well.

This logger allows independent control of logging levels for both file and console outputs.

Source code in lmm/utils/logging.py
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
class FileConsoleLogger(LoggerBase):
    """
    A file logger implementation that uses logging.Logger as a
    delegate. Logs messages to a specified file using Python's
    built-in logging module, and relays the messages to the console
    as well.

    This logger allows independent control of logging levels for
    both file and console outputs.
    """

    console_logger: LoggerBase

    def __init__(
        self,
        name: str = "",
        log_file: str | Path = "app.log",
        console_level: int = logging.INFO,
        file_level: int = logging.INFO,
    ) -> None:
        """
        Initialize the FileConsoleLogger with a specific logger name,
        file path, and separate logging levels for console and file.

        Args:
            name: The name of the logger, typically __name__ to use
                the module name
            log_file: Path to the log file where messages will be
                written
            console_level: The logging level for console output
                (default: logging.INFO)
            file_level: The logging level for file output
                (default: logging.INFO)
        """
        self.logger = logging.getLogger(f"{name}_file")
        self.logger.setLevel(file_level)

        # Clear any existing handlers to avoid duplicates
        self.logger.handlers.clear()

        # Add file handler
        handler = logging.FileHandler(log_file)
        formatter = logging.Formatter(
            '%(asctime)s - %(levelname)s - %(message)s'
        )
        handler.setFormatter(formatter)
        self.logger.addHandler(handler)

        # Prevent propagation to avoid duplicate logs
        self.logger.propagate = False

        # Delegate for console
        self.console_logger = ConsoleLogger(name)
        self.console_logger.set_level(console_level)

    def set_level(self, level: int) -> None:
        """
        Set the logging level for both file and console loggers.

        Args:
            level: The logging level to set for both outputs
        """
        self.logger.setLevel(level)
        self.console_logger.set_level(level)

    def set_console_level(self, level: int) -> None:
        """
        Set the logging level for the console logger only.

        Args:
            level: The logging level for console output
        """
        self.console_logger.set_level(level)

    def set_file_level(self, level: int) -> None:
        """
        Set the logging level for the file logger only.

        Args:
            level: The logging level for file output
        """
        self.logger.setLevel(level)

    def get_level(self) -> int:
        """
        Get the current logging level for the file logger.

        Returns:
            The file logger's current level
        """
        return self.logger.level

    def get_console_level(self) -> int:
        """
        Get the current logging level for the console logger.

        Returns:
            The console logger's current level
        """
        return self.console_logger.get_level()

    def get_file_level(self) -> int:
        """
        Get the current logging level for the file logger.

        Returns:
            The file logger's current level
        """
        return self.logger.level

    def info(self, msg: str) -> None:
        """Log an informational message."""
        self.logger.info(msg)
        self.console_logger.info(msg)

    def error(self, msg: str) -> None:
        """Log an error message."""
        self.logger.error(msg)
        self.console_logger.error(msg)

    def warning(self, msg: str) -> None:
        """Log a warning message."""
        self.logger.warning(msg)
        self.console_logger.warning(msg)

    def critical(self, msg: str) -> None:
        """Log a critical message."""
        self.logger.critical(msg, stack_info=True)
        self.console_logger.critical(msg)

__init__(name='', log_file='app.log', console_level=logging.INFO, file_level=logging.INFO)

Initialize the FileConsoleLogger with a specific logger name, file path, and separate logging levels for console and file.

Parameters:

Name Type Description Default
name str

The name of the logger, typically name to use the module name

''
log_file str | Path

Path to the log file where messages will be written

'app.log'
console_level int

The logging level for console output (default: logging.INFO)

INFO
file_level int

The logging level for file output (default: logging.INFO)

INFO
Source code in lmm/utils/logging.py
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
def __init__(
    self,
    name: str = "",
    log_file: str | Path = "app.log",
    console_level: int = logging.INFO,
    file_level: int = logging.INFO,
) -> None:
    """
    Initialize the FileConsoleLogger with a specific logger name,
    file path, and separate logging levels for console and file.

    Args:
        name: The name of the logger, typically __name__ to use
            the module name
        log_file: Path to the log file where messages will be
            written
        console_level: The logging level for console output
            (default: logging.INFO)
        file_level: The logging level for file output
            (default: logging.INFO)
    """
    self.logger = logging.getLogger(f"{name}_file")
    self.logger.setLevel(file_level)

    # Clear any existing handlers to avoid duplicates
    self.logger.handlers.clear()

    # Add file handler
    handler = logging.FileHandler(log_file)
    formatter = logging.Formatter(
        '%(asctime)s - %(levelname)s - %(message)s'
    )
    handler.setFormatter(formatter)
    self.logger.addHandler(handler)

    # Prevent propagation to avoid duplicate logs
    self.logger.propagate = False

    # Delegate for console
    self.console_logger = ConsoleLogger(name)
    self.console_logger.set_level(console_level)

critical(msg)

Log a critical message.

Source code in lmm/utils/logging.py
304
305
306
307
def critical(self, msg: str) -> None:
    """Log a critical message."""
    self.logger.critical(msg, stack_info=True)
    self.console_logger.critical(msg)

error(msg)

Log an error message.

Source code in lmm/utils/logging.py
294
295
296
297
def error(self, msg: str) -> None:
    """Log an error message."""
    self.logger.error(msg)
    self.console_logger.error(msg)

get_console_level()

Get the current logging level for the console logger.

Returns:

Type Description
int

The console logger's current level

Source code in lmm/utils/logging.py
271
272
273
274
275
276
277
278
def get_console_level(self) -> int:
    """
    Get the current logging level for the console logger.

    Returns:
        The console logger's current level
    """
    return self.console_logger.get_level()

get_file_level()

Get the current logging level for the file logger.

Returns:

Type Description
int

The file logger's current level

Source code in lmm/utils/logging.py
280
281
282
283
284
285
286
287
def get_file_level(self) -> int:
    """
    Get the current logging level for the file logger.

    Returns:
        The file logger's current level
    """
    return self.logger.level

get_level()

Get the current logging level for the file logger.

Returns:

Type Description
int

The file logger's current level

Source code in lmm/utils/logging.py
262
263
264
265
266
267
268
269
def get_level(self) -> int:
    """
    Get the current logging level for the file logger.

    Returns:
        The file logger's current level
    """
    return self.logger.level

info(msg)

Log an informational message.

Source code in lmm/utils/logging.py
289
290
291
292
def info(self, msg: str) -> None:
    """Log an informational message."""
    self.logger.info(msg)
    self.console_logger.info(msg)

set_console_level(level)

Set the logging level for the console logger only.

Parameters:

Name Type Description Default
level int

The logging level for console output

required
Source code in lmm/utils/logging.py
244
245
246
247
248
249
250
251
def set_console_level(self, level: int) -> None:
    """
    Set the logging level for the console logger only.

    Args:
        level: The logging level for console output
    """
    self.console_logger.set_level(level)

set_file_level(level)

Set the logging level for the file logger only.

Parameters:

Name Type Description Default
level int

The logging level for file output

required
Source code in lmm/utils/logging.py
253
254
255
256
257
258
259
260
def set_file_level(self, level: int) -> None:
    """
    Set the logging level for the file logger only.

    Args:
        level: The logging level for file output
    """
    self.logger.setLevel(level)

set_level(level)

Set the logging level for both file and console loggers.

Parameters:

Name Type Description Default
level int

The logging level to set for both outputs

required
Source code in lmm/utils/logging.py
234
235
236
237
238
239
240
241
242
def set_level(self, level: int) -> None:
    """
    Set the logging level for both file and console loggers.

    Args:
        level: The logging level to set for both outputs
    """
    self.logger.setLevel(level)
    self.console_logger.set_level(level)

warning(msg)

Log a warning message.

Source code in lmm/utils/logging.py
299
300
301
302
def warning(self, msg: str) -> None:
    """Log a warning message."""
    self.logger.warning(msg)
    self.console_logger.warning(msg)

FileLogger

Bases: LoggerBase

A file logger implementation that uses logging.Logger as a delegate. Logs messages to a specified file using Python's built-in logging module.

Source code in lmm/utils/logging.py
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
class FileLogger(LoggerBase):
    """
    A file logger implementation that uses logging.Logger as a
    delegate. Logs messages to a specified file using Python's
    built-in logging module.
    """

    def __init__(
        self, name: str = "", log_file: str | Path = "app.log"
    ) -> None:
        """
        Initialize the FileLogger with a specific logger name and
        file path.

        Args:
            name: The name of the logger, typically __name__ to use
                the module name
            log_file: Path to the log file where messages will be
                written
        """
        self.logger = logging.getLogger(f"{name}_file")
        self.logger.setLevel(logging.INFO)

        # Clear any existing handlers to avoid duplicates
        self.logger.handlers.clear()

        # Add file handler
        handler = logging.FileHandler(log_file)
        formatter = logging.Formatter(
            '%(asctime)s - %(levelname)s - %(message)s'
        )
        handler.setFormatter(formatter)
        self.logger.addHandler(handler)

        # Prevent propagation to avoid duplicate logs
        self.logger.propagate = False

    def set_level(self, level: int) -> None:
        """Set the logging level for the logger."""
        self.logger.setLevel(level)

    def get_level(self) -> int:
        """Get the current logging level"""
        return self.logger.level

    def info(self, msg: str) -> None:
        """Log an informational message."""
        self.logger.info(msg)

    def error(self, msg: str) -> None:
        """Log an error message."""
        self.logger.error(msg)

    def warning(self, msg: str) -> None:
        """Log a warning message."""
        self.logger.warning(msg)

    def critical(self, msg: str) -> None:
        """Log a critical message."""
        self.logger.critical(msg, stack_info=True)

__init__(name='', log_file='app.log')

Initialize the FileLogger with a specific logger name and file path.

Parameters:

Name Type Description Default
name str

The name of the logger, typically name to use the module name

''
log_file str | Path

Path to the log file where messages will be written

'app.log'
Source code in lmm/utils/logging.py
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
def __init__(
    self, name: str = "", log_file: str | Path = "app.log"
) -> None:
    """
    Initialize the FileLogger with a specific logger name and
    file path.

    Args:
        name: The name of the logger, typically __name__ to use
            the module name
        log_file: Path to the log file where messages will be
            written
    """
    self.logger = logging.getLogger(f"{name}_file")
    self.logger.setLevel(logging.INFO)

    # Clear any existing handlers to avoid duplicates
    self.logger.handlers.clear()

    # Add file handler
    handler = logging.FileHandler(log_file)
    formatter = logging.Formatter(
        '%(asctime)s - %(levelname)s - %(message)s'
    )
    handler.setFormatter(formatter)
    self.logger.addHandler(handler)

    # Prevent propagation to avoid duplicate logs
    self.logger.propagate = False

critical(msg)

Log a critical message.

Source code in lmm/utils/logging.py
174
175
176
def critical(self, msg: str) -> None:
    """Log a critical message."""
    self.logger.critical(msg, stack_info=True)

error(msg)

Log an error message.

Source code in lmm/utils/logging.py
166
167
168
def error(self, msg: str) -> None:
    """Log an error message."""
    self.logger.error(msg)

get_level()

Get the current logging level

Source code in lmm/utils/logging.py
158
159
160
def get_level(self) -> int:
    """Get the current logging level"""
    return self.logger.level

info(msg)

Log an informational message.

Source code in lmm/utils/logging.py
162
163
164
def info(self, msg: str) -> None:
    """Log an informational message."""
    self.logger.info(msg)

set_level(level)

Set the logging level for the logger.

Source code in lmm/utils/logging.py
154
155
156
def set_level(self, level: int) -> None:
    """Set the logging level for the logger."""
    self.logger.setLevel(level)

warning(msg)

Log a warning message.

Source code in lmm/utils/logging.py
170
171
172
def warning(self, msg: str) -> None:
    """Log a warning message."""
    self.logger.warning(msg)

LoggerBase

Bases: ABC

Abstract interface for logging functionality.

Source code in lmm/utils/logging.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
class LoggerBase(ABC):
    """
    Abstract interface for logging functionality.
    """

    @abstractmethod
    def set_level(self, level: int) -> None:
        """Set the logging level for the logger."""
        pass

    @abstractmethod
    def get_level(self) -> int:
        """Get the current logging level"""
        pass

    @abstractmethod
    def info(self, msg: str) -> None:
        """Log an informational message."""
        pass

    @abstractmethod
    def error(self, msg: str) -> None:
        """Log an error message."""
        pass

    @abstractmethod
    def warning(self, msg: str) -> None:
        """Log a warning message."""
        pass

    @abstractmethod
    def critical(self, msg: str) -> None:
        """Log a critical message."""
        pass

critical(msg) abstractmethod

Log a critical message.

Source code in lmm/utils/logging.py
59
60
61
62
@abstractmethod
def critical(self, msg: str) -> None:
    """Log a critical message."""
    pass

error(msg) abstractmethod

Log an error message.

Source code in lmm/utils/logging.py
49
50
51
52
@abstractmethod
def error(self, msg: str) -> None:
    """Log an error message."""
    pass

get_level() abstractmethod

Get the current logging level

Source code in lmm/utils/logging.py
39
40
41
42
@abstractmethod
def get_level(self) -> int:
    """Get the current logging level"""
    pass

info(msg) abstractmethod

Log an informational message.

Source code in lmm/utils/logging.py
44
45
46
47
@abstractmethod
def info(self, msg: str) -> None:
    """Log an informational message."""
    pass

set_level(level) abstractmethod

Set the logging level for the logger.

Source code in lmm/utils/logging.py
34
35
36
37
@abstractmethod
def set_level(self, level: int) -> None:
    """Set the logging level for the logger."""
    pass

warning(msg) abstractmethod

Log a warning message.

Source code in lmm/utils/logging.py
54
55
56
57
@abstractmethod
def warning(self, msg: str) -> None:
    """Log a warning message."""
    pass

LoglistLogger

Bases: LoggerBase

Maintains a list of logged errors and warnings that can be inspected by the object creator.

Source code in lmm/utils/logging.py
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
class LoglistLogger(LoggerBase):
    """
    Maintains a list of logged errors and warnings that can be
    inspected by the object creator.
    """

    def __init__(self) -> None:
        """
        Initialize the logger.
        """
        self.logs: list[dict[str, str]] = []

    def set_level(self, level: int) -> None:
        """Set the logging level for the logger."""
        pass

    def get_level(self) -> int:
        """Get the current logging level"""
        return 0

    def info(self, msg: str) -> None:
        """Log an informational message."""
        self.logs.append({'info': msg})

    def error(self, msg: str) -> None:
        """Log an error message."""
        self.logs.append({'error': msg})

    def warning(self, msg: str) -> None:
        """Log a warning message."""
        self.logs.append({'warning': msg})

    def critical(self, msg: str) -> None:
        """Log a critical message."""
        self.logs.append({'critical': msg})

    def get_logs(self, level: int = 0) -> list[str]:
        """
        Returns a list of strings with the log messages.

        Args:
           level: a filter on the logs. Possible values:
                0 or less: returns all messages
                WARNING or less: omit info
                ERROR or less: omit warning
                CRITICAL or more: only errors and critical
        """
        logs: list[str] = []
        for entry in self.logs:
            match entry:
                case {'info': msg}:
                    if level <= logging.INFO:
                        logs.append("INFO - " + msg)
                case {'warning': msg}:
                    if level <= logging.WARNING:
                        logs.append("WARNING - " + msg)
                case {'error': msg}:
                    if level <= logging.ERROR:
                        logs.append("ERROR - " + msg)
                case {'critical': msg}:
                    logs.append("CRITICAL - " + msg)
                case _:
                    logs.append(str(entry))
        return logs

    def count_logs(self, level: int = 0) -> int:
        """The number of recorded logs. Zero means there
        were no recorded logs."""
        logs = self.get_logs(level)
        return len(logs)

    def clear_logs(self) -> None:
        """Clear the logs from the cache"""
        self.logs.clear()

    def print_logs(self, level: int = 0) -> None:
        logs: list[str] = self.get_logs(level)
        for log in logs:
            print(log)

__init__()

Initialize the logger.

Source code in lmm/utils/logging.py
316
317
318
319
320
def __init__(self) -> None:
    """
    Initialize the logger.
    """
    self.logs: list[dict[str, str]] = []

clear_logs()

Clear the logs from the cache

Source code in lmm/utils/logging.py
381
382
383
def clear_logs(self) -> None:
    """Clear the logs from the cache"""
    self.logs.clear()

count_logs(level=0)

The number of recorded logs. Zero means there were no recorded logs.

Source code in lmm/utils/logging.py
375
376
377
378
379
def count_logs(self, level: int = 0) -> int:
    """The number of recorded logs. Zero means there
    were no recorded logs."""
    logs = self.get_logs(level)
    return len(logs)

critical(msg)

Log a critical message.

Source code in lmm/utils/logging.py
342
343
344
def critical(self, msg: str) -> None:
    """Log a critical message."""
    self.logs.append({'critical': msg})

error(msg)

Log an error message.

Source code in lmm/utils/logging.py
334
335
336
def error(self, msg: str) -> None:
    """Log an error message."""
    self.logs.append({'error': msg})

get_level()

Get the current logging level

Source code in lmm/utils/logging.py
326
327
328
def get_level(self) -> int:
    """Get the current logging level"""
    return 0

get_logs(level=0)

Returns a list of strings with the log messages.

Parameters:

Name Type Description Default
level int

a filter on the logs. Possible values: 0 or less: returns all messages WARNING or less: omit info ERROR or less: omit warning CRITICAL or more: only errors and critical

0
Source code in lmm/utils/logging.py
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
def get_logs(self, level: int = 0) -> list[str]:
    """
    Returns a list of strings with the log messages.

    Args:
       level: a filter on the logs. Possible values:
            0 or less: returns all messages
            WARNING or less: omit info
            ERROR or less: omit warning
            CRITICAL or more: only errors and critical
    """
    logs: list[str] = []
    for entry in self.logs:
        match entry:
            case {'info': msg}:
                if level <= logging.INFO:
                    logs.append("INFO - " + msg)
            case {'warning': msg}:
                if level <= logging.WARNING:
                    logs.append("WARNING - " + msg)
            case {'error': msg}:
                if level <= logging.ERROR:
                    logs.append("ERROR - " + msg)
            case {'critical': msg}:
                logs.append("CRITICAL - " + msg)
            case _:
                logs.append(str(entry))
    return logs

info(msg)

Log an informational message.

Source code in lmm/utils/logging.py
330
331
332
def info(self, msg: str) -> None:
    """Log an informational message."""
    self.logs.append({'info': msg})

set_level(level)

Set the logging level for the logger.

Source code in lmm/utils/logging.py
322
323
324
def set_level(self, level: int) -> None:
    """Set the logging level for the logger."""
    pass

warning(msg)

Log a warning message.

Source code in lmm/utils/logging.py
338
339
340
def warning(self, msg: str) -> None:
    """Log a warning message."""
    self.logs.append({'warning': msg})

add_file_handler(log_file)

Add a file handler to the root logger to write logs to a file.

Parameters:

Name Type Description Default
log_file str | Path

Path to the log file

required
Source code in lmm/utils/logging.py
503
504
505
506
507
508
509
510
511
512
513
514
def add_file_handler(log_file: str | Path) -> None:
    """
    Add a file handler to the root logger to write logs to a file.

    Args:
        log_file: Path to the log file
    """
    file_handler = logging.FileHandler(log_file)
    file_handler.setFormatter(
        logging.Formatter(LOG_FORMAT, DATE_FORMAT)
    )
    logging.getLogger().addHandler(file_handler)

get_logger(name)

Get a logger with the specified name.

Parameters:

Name Type Description Default
name str

The name of the logger, typically name to use the module name

required

Returns:

Type Description
LoggerBase

A configured logger instance

Source code in lmm/utils/logging.py
450
451
452
453
454
455
456
457
458
459
460
461
462
def get_logger(name: str) -> LoggerBase:
    """
    Get a logger with the specified name.

    Args:
        name: The name of the logger, typically __name__ to use the
            module name

    Returns:
        A configured logger instance
    """
    logger = ConsoleLogger(name)
    return logger

get_logging_logger(name)

Get a logger with the specified name.

Parameters:

Name Type Description Default
name str

The name of the logger, typically name to use the module name

required

Returns:

Type Description
Logger

A configured logger instance

Source code in lmm/utils/logging.py
478
479
480
481
482
483
484
485
486
487
488
489
490
def get_logging_logger(name: str) -> logging.Logger:
    """
    Get a logger with the specified name.

    Args:
        name: The name of the logger, typically __name__ to use the
            module name

    Returns:
        A configured logger instance
    """
    logger = logging.Logger(name)
    return logger

set_log_level(level)

Set the log level for all loggers.

Parameters:

Name Type Description Default
level int

The logging level (e.g., logging.DEBUG, logging.INFO)

required
Source code in lmm/utils/logging.py
493
494
495
496
497
498
499
500
def set_log_level(level: int) -> None:
    """
    Set the log level for all loggers.

    Args:
        level: The logging level (e.g., logging.DEBUG, logging.INFO)
    """
    logging.getLogger().setLevel(level)

The utility class LazyLoadingDict stores memoized language model class objects, or indeed objects of any class, produced by a factory function.

The LazyLoadingDict class has three main uses that may be combined.

  • the first is to create objects based on a definition using a dictionary interface. The key of the dictionary is the definition that provides the object instance; different instances may be created based on the definition

  • the second is to memoize the objects created by the definition

  • the third is to enable runtime errors when an invalid definition is given.

The class is instantiated by providing the factory function in the constructor. The factory function takes one argument of the type of the dictionary key, and returns a type that determined the type of the values in the dictionary. To trigger runtime errors when invalid definitions are provided, provide keys of EnumStr of BaseModel-derived types (for example, see the documentation of the class).

LazyLoadingDict

Bases: dict[KeyT, ValueT]

A lazy dictionary class with memoized object of type ValueT. To restrict the keys used, use a StrEnum key value (see example below). Any object type may be used as key, depending on how the dictionary is used.

Example:

# We define here permissible keys by inheriting from StrEnum
class LMSource(StrEnum):
    Anthropic = 'Anthropic'
    Gemini = 'Gemini'
    OpenAI = 'OpenAI'

# We then define a factory function that creates a model object
# designated by the key, i.e. a function that maps the possible
# keys to instances that are memoized. In the example, ModelClass
# objects are stored in the dictionary (code not included):
def create_model_instance(model_name: LMSource) -> ModelClass:
    print(f"Created instance of {model_name}")
    return ModelClass(model_name=model_name)

# The lazy dictionary is created by giving the factory function
# in the constructor.
lazy_dict = LazyLoadingDict(create_model_instance)

# The objects are created or retrieved as the value of the key:
openai_model = lazy_dict['OpenAI']

# If the argument of the factory is derived from StrEnum, calling
# the dictionary with an invalid key will throw a ValueError:
model = lazy_dict[LMSource('OpenX')]

This is a more elaborate example, where a whole specification is used to create objects and memoize them:

# This defines the supported model sources. Runtime errors
# provided by BaseModel below
from typing import Literal
from pydantic import BaseModel, ConfigDict

LanguageModelSource = Literal[
        'Anthropic',
        'Gemini',
        'Mistral',
        'OpenAI'
    ]

# This defines source + model
class LanguageModelSpecification(BaseModel):
    source_name: LanguageModelSource
    model_name: str

    # This required to make instances hashable, so that they can
    # be used as keys in the dictionary
    model_config = ConfigDict(frozen=True)


# Langchain model type specified here.
def _create_model_instance(
    model: LanguageModelSpecification,
) -> BaseLM[BaseMsg]:
    # Factory function to create Langchain models while checking
    # permissible sources, provided as key values:

    match model.source_name:
        case LanguageModelSource.OpenAI:
            from langchain_openai.chat_models import ChatOpenAI

            return ChatOpenAI(
                model=model.model_name,
                temperature=0.1,
                max_retries=2,
                use_responses_api=False,
            )
    ... (rest of code not shown)

# The memoized dictionary. langchain_models is parametrized like
# a dict[LanguageModelSpecification, BaseLM[BaseMSg]]
langchain_models = LazyLoadingDict(_create_model_instance)

# Example of use
model_spec = {'source_name': "OpenAI", 'model_name': "gpt-4o"}
model = langchain_models[
    LanguageModelSpecification(**model_spec)
]

A Pydantic model class may also be used to create a more flexible dictionary. In the previous example, only the models specified in LanguageModel source can be specified without raising exceptions. However, a Pydantic model class may be used to constrain the objects saved in the dictionary without limiting them to a finite sets, i.e. by a validation that does not constrain the instances to that set. Thus, if source_name was a str in the above example, then any LanguageModelSpecification constructed with any string will be accepted.

In the following example, the runtime error is generated in the factory function, because literals do not give rise to runtime errors in themselves.

ModelSource = Literal["OpenAI", "Cohere"]

def _model_factory(src: ModelSource) -> ModelClass:
    match src:
        case "OpenAI"
            return ModelClass("OpenAI") # code not shown
        case "Cohere"
            return ModelClass("Cohere") # code not shown
        case _:
            # required to raise error
            raise ValueError(f"Invalid model source: {src}")

model_factory = LazyLoadingDict(_model_factory)

It is also possible to assign to the dictionary directly, thus bypassing the factory function. In this case, the only checks are those that are possibly computed by Pydantic when the object is assigned.

Expected behaviour: may raise ValidationError and ValueErrors.

Source code in lmm/utils/lazy_dict.py
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
class LazyLoadingDict(dict[KeyT, ValueT]):
    """A lazy dictionary class with memoized object of type ValueT.
    To restrict the keys used, use a StrEnum key value (see example
    below). Any object type may be used as key, depending on how the
    dictionary is used.

    Example:
    ```python
    # We define here permissible keys by inheriting from StrEnum
    class LMSource(StrEnum):
        Anthropic = 'Anthropic'
        Gemini = 'Gemini'
        OpenAI = 'OpenAI'

    # We then define a factory function that creates a model object
    # designated by the key, i.e. a function that maps the possible
    # keys to instances that are memoized. In the example, ModelClass
    # objects are stored in the dictionary (code not included):
    def create_model_instance(model_name: LMSource) -> ModelClass:
        print(f"Created instance of {model_name}")
        return ModelClass(model_name=model_name)

    # The lazy dictionary is created by giving the factory function
    # in the constructor.
    lazy_dict = LazyLoadingDict(create_model_instance)

    # The objects are created or retrieved as the value of the key:
    openai_model = lazy_dict['OpenAI']

    # If the argument of the factory is derived from StrEnum, calling
    # the dictionary with an invalid key will throw a ValueError:
    model = lazy_dict[LMSource('OpenX')]
    ```

    This is a more elaborate example, where a whole specification is
    used to create objects and memoize them:

    ```python
    # This defines the supported model sources. Runtime errors
    # provided by BaseModel below
    from typing import Literal
    from pydantic import BaseModel, ConfigDict

    LanguageModelSource = Literal[
            'Anthropic',
            'Gemini',
            'Mistral',
            'OpenAI'
        ]

    # This defines source + model
    class LanguageModelSpecification(BaseModel):
        source_name: LanguageModelSource
        model_name: str

        # This required to make instances hashable, so that they can
        # be used as keys in the dictionary
        model_config = ConfigDict(frozen=True)


    # Langchain model type specified here.
    def _create_model_instance(
        model: LanguageModelSpecification,
    ) -> BaseLM[BaseMsg]:
        # Factory function to create Langchain models while checking
        # permissible sources, provided as key values:

        match model.source_name:
            case LanguageModelSource.OpenAI:
                from langchain_openai.chat_models import ChatOpenAI

                return ChatOpenAI(
                    model=model.model_name,
                    temperature=0.1,
                    max_retries=2,
                    use_responses_api=False,
                )
        ... (rest of code not shown)

    # The memoized dictionary. langchain_models is parametrized like
    # a dict[LanguageModelSpecification, BaseLM[BaseMSg]]
    langchain_models = LazyLoadingDict(_create_model_instance)

    # Example of use
    model_spec = {'source_name': "OpenAI", 'model_name': "gpt-4o"}
    model = langchain_models[
        LanguageModelSpecification(**model_spec)
    ]
    ```

    A Pydantic model class may also be used to create a more flexible
    dictionary. In the previous example, only the models specified in
    LanguageModel source can be specified without raising exceptions.
    However, a Pydantic model class may be used to constrain the
    objects saved in the dictionary without limiting them to a finite
    sets, i.e. by a validation that does not constrain the instances
    to that set. Thus, if source_name was a str in the above example,
    then any LanguageModelSpecification constructed with any string
    will be accepted.

    In the following example, the runtime error is generated in the
    factory function, because literals do not give rise to runtime
    errors in themselves.

    ```python
    ModelSource = Literal["OpenAI", "Cohere"]

    def _model_factory(src: ModelSource) -> ModelClass:
        match src:
            case "OpenAI"
                return ModelClass("OpenAI") # code not shown
            case "Cohere"
                return ModelClass("Cohere") # code not shown
            case _:
                # required to raise error
                raise ValueError(f"Invalid model source: {src}")

    model_factory = LazyLoadingDict(_model_factory)
    ```

    It is also possible to assign to the dictionary directly, thus
    bypassing the factory function. In this case, the only checks
    are those that are possibly computed by Pydantic when the object
    is assigned.

    Expected behaviour: may raise ValidationError and ValueErrors.
    """

    def __init__(
        self,
        key_creator_func: Callable[[KeyT], ValueT],
        destructor_func: Callable[[ValueT], None] | None = None,
    ):
        super().__init__()
        self._key_creator_func = key_creator_func
        self._destructor_func = destructor_func

    def _destroy_value(self, value: ValueT) -> None:
        """Helper to destroy a value using the configured strategy."""
        if self._destructor_func:
            self._destructor_func(value)
        elif hasattr(value, "close") and callable(value.close): # type: ignore (self-reflection)
            value.close()  # type: ignore (checked)
        elif hasattr(value, "dispose") and callable(value.dispose): # type: ignore (self-reflection)
            value.dispose() # type: ignore (checked)

    def __getitem__(self, key: KeyT) -> ValueT:
        # Check if the value is already cached
        if key in self:
            return super().__getitem__(key)

        # Lazy-load the data, cache it, and return
        value: ValueT = self._key_creator_func(key)
        super().__setitem__(key, value)
        return value

    def __setitem__(self, key: KeyT, value: ValueT) -> None:
        """Allow direct setting of key/value pairs.

        This bypasses the factory function for the given key.
        Once set directly, the factory function will not be called
        for this key unless the key is deleted first.

        Raises:
            ValueError: If the key already exists in the dictionary.
        """
        if key in self:
            raise ValueError(f"Key '{key}' already exists. Delete it first to overwrite.")
        super().__setitem__(key, value)

    def __delitem__(self, key: KeyT) -> None:
        if key in self:
            value: ValueT = super().__getitem__(key)
            self._destroy_value(value)
        super().__delitem__(key)

    def clear(self) -> None:
        for value in list(self.values()):
            self._destroy_value(value)
        super().clear()

    def __del__(self) -> None:
        # We need to be careful here during interpreter shutdown
        try:
            self.clear()
        except Exception:
            # Suppress errors during destruction to avoid noise
            pass

__setitem__(key, value)

Allow direct setting of key/value pairs.

This bypasses the factory function for the given key. Once set directly, the factory function will not be called for this key unless the key is deleted first.

Raises:

Type Description
ValueError

If the key already exists in the dictionary.

Source code in lmm/utils/lazy_dict.py
190
191
192
193
194
195
196
197
198
199
200
201
202
def __setitem__(self, key: KeyT, value: ValueT) -> None:
    """Allow direct setting of key/value pairs.

    This bypasses the factory function for the given key.
    Once set directly, the factory function will not be called
    for this key unless the key is deleted first.

    Raises:
        ValueError: If the key already exists in the dictionary.
    """
    if key in self:
        raise ValueError(f"Key '{key}' already exists. Delete it first to overwrite.")
    super().__setitem__(key, value)

apply_markdown_heuristics(page_text)

Applies simple heuristics to convert extracted raw text into basic Markdown format.

This function attempts to: 1. Clean up excessive whitespace. 2. Ensure proper paragraph separation (Markdown requires two newlines). 3. (Placeholder for advanced logic) Detect headings or lists based on patterns.

Source code in lmm/utils/importpdfs.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
def apply_markdown_heuristics(page_text: str) -> str:
    """
    Applies simple heuristics to convert extracted raw text into basic Markdown format.

    This function attempts to:
    1. Clean up excessive whitespace.
    2. Ensure proper paragraph separation (Markdown requires two newlines).
    3. (Placeholder for advanced logic) Detect headings or lists based on patterns.
    """
    # 1. Normalize line endings and cleanup extra spaces
    lines = page_text.strip().split('\n')

    markdown_lines: list[str] = []

    # Simple logic: assume lines separated by only one newline are part of the
    # same paragraph, and lines separated by blank lines are new paragraphs.
    current_paragraph: list[str] = []

    for line in lines:
        stripped_line = line.strip()

        if not stripped_line:
            # End of a paragraph block, join and add to markdown_lines
            if current_paragraph:
                markdown_lines.append(" ".join(current_paragraph))
                current_paragraph = []
            # Add an extra newline for Markdown paragraph separation
            markdown_lines.append("")
        else:
            # Simple list/heading detection placeholder
            if stripped_line.startswith(('1.', 'a.', '*', '-')):
                # If it looks like a list item, treat it as a new line item
                if current_paragraph:
                    markdown_lines.append(" ".join(current_paragraph))
                    current_paragraph = []
                markdown_lines.append(stripped_line)
            else:
                # Part of the current paragraph
                current_paragraph.append(stripped_line)

    # Add the last pending paragraph
    if current_paragraph:
        markdown_lines.append(" ".join(current_paragraph))

    return "\n".join(markdown_lines).strip()

convert_folder_to_markdown(input_dir, output_dir)

Reads all PDF files from an input directory and converts them to Markdown in an output directory.

Parameters:

Name Type Description Default
input_dir str

The path to the folder containing PDF files.

required
output_dir str

The path where the Markdown files will be saved.

required
Source code in lmm/utils/importpdfs.py
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
def convert_folder_to_markdown(input_dir: str, output_dir: str):
    """
    Reads all PDF files from an input directory and converts them to Markdown
    in an output directory.

    Args:
        input_dir: The path to the folder containing PDF files.
        output_dir: The path where the Markdown files will be saved.
    """
    input_path = Path(input_dir)
    output_path = Path(output_dir)

    if not input_path.is_dir():
        print(f"Error: Input directory not found at '{input_dir}'")
        return

    # Create the output directory if it does not exist
    output_path.mkdir(parents=True, exist_ok=True)
    print(f"Output directory ensured: {output_path}")

    # Find all PDF files in the input directory
    pdf_files = list(input_path.glob("*.pdf"))

    if not pdf_files:
        print(f"No PDF files found in '{input_dir}'.")
        return

    print(f"Found {len(pdf_files)} PDF(s) to process.")

    for pdf_file in pdf_files:
        convert_pdf_to_md(pdf_file, output_path)

    print("\nProcessing complete.")

convert_pdf_to_md(pdf_path, output_dir)

Converts a single PDF file into a Markdown file.

Parameters:

Name Type Description Default
pdf_path Path

Path object to the input PDF file.

required
output_dir Path

Path object for the output directory.

required
Source code in lmm/utils/importpdfs.py
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
def convert_pdf_to_md(pdf_path: Path, output_dir: Path) -> None:
    """
    Converts a single PDF file into a Markdown file.

    Args:
        pdf_path: Path object to the input PDF file.
        output_dir: Path object for the output directory.
    """
    print(f"Processing: {pdf_path.name}")

    markdown_content: list[str] = []
    output_filename = pdf_path.stem + ".md"
    output_path = output_dir / output_filename

    # Initialize default LAParams to fix the "unpack requires a buffer..." error
    # By explicitly passing this object, we prevent pdfminer.six from performing
    # an internal initialization step that fails on some PDFs.
    default_laparams = LAParams()

    try:
        # Pass the initialized default_laparams to pdfplumber.open()
        with pdfplumber.open(
            pdf_path,
            laparams=default_laparams.__dict__,
            repair=True,
            repair_setting="default",
            gs_path="C:/Program Files/gs/gs10.06.0/bin/gswin64c.exe",
        ) as pdf:
            total_pages = len(pdf.pages)

            for i, page in enumerate(pdf.pages):
                # Extract text retaining layout structure (via 'layout' argument)
                raw_text = page.extract_text(
                    x_tolerance=2, y_tolerance=2, layout=True
                )

                if raw_text:
                    # Apply markdown formatting heuristics
                    formatted_text = apply_markdown_heuristics(
                        raw_text
                    )
                    markdown_content.append(formatted_text)

                # Insert a metadata block between pages to track content
                if i < total_pages - 1:
                    markdown_content.append(
                        f"\n\n---\npage: {i + 1}\n"
                        f"total_pages: {total_pages}\n---\n\n"
                    )

        # Write the final content to the Markdown file
        output_path.write_text(
            "\n".join(markdown_content), encoding="utf-8"
        )
        print(f"Successfully converted to: {output_path}")

    except Exception as e:
        print(
            f"ERROR: Failed to process {pdf_path.name}. Reason: {e}"
        )