glossary: added test module for glossaries

added module 'glossary.py'
2024-02-03 08:42:32 +01:00 · 2024-02-03 08:42:32 +01:00
2 changed files with 220 additions and 0 deletions
--- a/chatmastermind/glossary.py
+++ b/chatmastermind/glossary.py
@ -0,0 +1,128 @@
 """
 Module implementing glossaries for translations.
 """
 import yaml
 import tempfile
 import shutil
 import csv
 from pathlib import Path
 from dataclasses import dataclass, field
 from typing import Type, TypeVar
 GlossaryInst = TypeVar('GlossaryInst', bound='Glossary')
 class GlossaryError(Exception):
    pass
 def str_presenter(dumper: yaml.Dumper, data: str) -> yaml.ScalarNode:
    """
    Changes the YAML dump style to multiline syntax for multiline strings.
    """
    if len(data.splitlines()) > 1:
        return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|')
    return dumper.represent_scalar('tag:yaml.org,2002:str', data)
@dataclass
 class Glossary:
    """
    A glossary consists of the following parameters:
        - Name (freely selectable)
        - Path (full file path)
        - Source language
        - Target language
        - Entries (pairs of source lang and target lang terms)
        - ID (automatically generated / modified, required by DeepL)
    """
    name: str
    source_lang: str
    target_lang: str
    entries: dict[str, str] = field(default_factory=lambda: dict())
    file_path: Path | None = None
    ID: str | None = None
    @classmethod
    def from_file(cls: Type[GlossaryInst], file_path: Path) -> GlossaryInst:
        """
        Create a glossary from the given file.
        """
        with open(file_path, "r") as fd:
            try:
                data = yaml.load(fd, Loader=yaml.FullLoader)
                # remove any quotes from the entries that YAML may have added while dumping
                # (e. g. for special keywords like 'yes')
                clean_entries = {key.strip('\"\' '): value for key, value in data['Entries'].items()}
                return cls(name=data['Name'],
                           source_lang=data['SourceLang'],
                           target_lang=data['TargetLang'],
                           entries=clean_entries,
                           file_path=file_path,
                           ID=data['ID'] if data['ID'] != 'None' else None)
            except Exception:
                raise GlossaryError(f"'{file_path}' does not contain a valid glossary")
    def to_file(self, file_path: Path | None = None) -> None:
        """
        Write glossary to given file.
        """
        if file_path:
            self.file_path = file_path
        if not self.file_path:
            raise GlossaryError("Got no valid path to write glossary")
        # write YAML
        with tempfile.NamedTemporaryFile(dir=self.file_path.parent, prefix=self.file_path.name, mode="w", delete=False) as temp_fd:
            temp_file_path = Path(temp_fd.name)
            data = {'Name': self.name,
                    'ID': str(self.ID),
                    'SourceLang': self.source_lang,
                    'TargetLang': self.target_lang,
                    'Entries': self.entries}
            yaml.dump(data, temp_fd, sort_keys=False)
        shutil.move(temp_file_path, self.file_path)
    def export_csv(self, dictionary: dict[str, str], file_path: Path) -> None:
        """
        Export the 'entries' of this glossary to a file in CSV format (compatible with DeepL).
        """
        with open(file_path, 'w', newline='', encoding='utf-8') as csvfile:
            writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)
            for source_entry, target_entry in self.entries.items():
                writer.writerow([source_entry, target_entry])
    def export_tsv(self, entries: dict[str, str], file_path: Path) -> None:
        """
        Export the 'entries' of this glossary to a file in TSV format (compatible with DeepL).
        """
        with open(file_path, 'w', encoding='utf-8') as file:
            for source_entry, target_entry in self.entries.items():
                file.write(f"{source_entry}\t{target_entry}\n")
    def import_csv(self, file_path: Path) -> None:
        """
        Import the entries from the given CSV file to those of the current glossary.
        Existing entries are overwritten.
        """
        try:
            with open(file_path, mode='r', encoding='utf-8') as csvfile:
                reader = csv.reader(csvfile, delimiter=',', quotechar='"')
                self.entries = {rows[0]: rows[1] for rows in reader if len(rows) >= 2}
        except Exception as e:
            raise GlossaryError(f"Error importing CSV: {e}")
    def import_tsv(self, file_path: Path) -> None:
        """
        Import the entries from the given CSV file to those of the current glossary.
        Existing entries are overwritten.
        """
        try:
            with open(file_path, mode='r', encoding='utf-8') as tsvfile:
                self.entries = {}
                for line in tsvfile:
                    parts = line.strip().split('\t')
                    if len(parts) == 2:
                        self.entries[parts[0]] = parts[1]
        except Exception as e:
            raise GlossaryError(f"Error importing TSV: {e}")
--- a/tests/test_glossary.py
+++ b/tests/test_glossary.py
@ -0,0 +1,92 @@
 import unittest
 import tempfile
 from pathlib import Path
 from chatmastermind.glossary import Glossary
 class TestGlossary(unittest.TestCase):
    def test_from_file_valid_yaml(self) -> None:
        # Prepare a temporary YAML file with valid content
        with tempfile.NamedTemporaryFile('w', delete=False) as yaml_file:
            yaml_file.write("Name: Sample\n"
                            "ID: '123'\n"
                            "SourceLang: en\n"
                            "TargetLang: es\n"
                            "Entries:\n"
                            "  hello: hola\n"
                            "  goodbye: adiós\n"
                            "  'yes': sí\n")  # 'yes' is a YAML keyword and therefore quoted
            yaml_file_path = Path(yaml_file.name)
        glossary = Glossary.from_file(yaml_file_path)
        self.assertEqual(glossary.name, "Sample")
        self.assertEqual(glossary.source_lang, "en")
        self.assertEqual(glossary.target_lang, "es")
        self.assertEqual(glossary.entries, {"hello": "hola", "goodbye": "adiós", "yes": "sí"})
        yaml_file_path.unlink()  # Remove the temporary file
    def test_to_file_writes_yaml(self) -> None:
        # Create glossary instance
        glossary = Glossary(name="Test", source_lang="en", target_lang="fr", entries={"yes": "oui"})
        with tempfile.NamedTemporaryFile('w', delete=False) as tmp_file:
            file_path = Path(tmp_file.name)
            glossary.to_file(file_path)
        with open(file_path, 'r') as file:
            content = file.read()
        self.assertIn("Name: Test", content)
        self.assertIn("SourceLang: en", content)
        self.assertIn("TargetLang: fr", content)
        self.assertIn("Entries", content)
        # 'yes' is a YAML keyword and therefore quoted
        self.assertIn("'yes': oui", content)
        file_path.unlink()  # Remove the temporary file
    def test_write_read_glossary(self) -> None:
        # Create glossary instance
        # -> use 'yes' in order to test if the YAML quoting is correctly removed when reading the file
        glossary_write = Glossary(name="Test", source_lang="en", target_lang="fr", entries={"yes": "oui"})
        with tempfile.NamedTemporaryFile('w', delete=False) as tmp_file:
            file_path = Path(tmp_file.name)
            glossary_write.to_file(file_path)
        # create new instance from glossary file
        glossary_read = Glossary.from_file(file_path)
        self.assertEqual(glossary_write.name, glossary_read.name)
        self.assertEqual(glossary_write.source_lang, glossary_read.source_lang)
        self.assertEqual(glossary_write.target_lang, glossary_read.target_lang)
        self.assertDictEqual(glossary_write.entries, glossary_read.entries)
        file_path.unlink()  # Remove the temporary file
    def test_import_export_csv(self) -> None:
        glossary = Glossary(name="Test", source_lang="en", target_lang="fr", entries={})
        # First export to CSV
        with tempfile.NamedTemporaryFile('w', delete=False) as csvfile:
            csv_file_path = Path(csvfile.name)
        glossary.entries = {"hello": "salut", "goodbye": "au revoir"}
        glossary.export_csv(glossary.entries, csv_file_path)
        # Now import CSV
        glossary.import_csv(csv_file_path)
        self.assertEqual(glossary.entries, {"hello": "salut", "goodbye": "au revoir"})
        csv_file_path.unlink()  # Remove the temporary file
    def test_import_export_tsv(self) -> None:
        glossary = Glossary(name="Test", source_lang="en", target_lang="fr", entries={})
        # First export to TSV
        with tempfile.NamedTemporaryFile('w', delete=False) as tsvfile:
            tsv_file_path = Path(tsvfile.name)
        glossary.entries = {"hello": "salut", "goodbye": "au revoir"}
        glossary.export_tsv(glossary.entries, tsv_file_path)
        # Now import TSV
        glossary.import_tsv(tsv_file_path)
        self.assertEqual(glossary.entries, {"hello": "salut", "goodbye": "au revoir"})
        tsv_file_path.unlink()  # Remove the temporary file
Author	SHA1	Message	Date
juk0de	85315d9c1c	glossary: added test module for glossaries	2024-02-03 08:42:32 +01:00
juk0de	2de0faabdb	added module 'glossary.py'	2024-02-03 08:42:32 +01:00