166 lines
6.5 KiB
Python
166 lines
6.5 KiB
Python
"""
|
|
Module implementing glossaries for translations.
|
|
"""
|
|
import yaml
|
|
import tempfile
|
|
import shutil
|
|
import csv
|
|
from pathlib import Path
|
|
from dataclasses import dataclass, field
|
|
from typing import Type, TypeVar, ClassVar
|
|
|
|
GlossaryInst = TypeVar('GlossaryInst', bound='Glossary')
|
|
|
|
|
|
class GlossaryError(Exception):
|
|
pass
|
|
|
|
|
|
def str_presenter(dumper: yaml.Dumper, data: str) -> yaml.ScalarNode:
|
|
"""
|
|
Changes the YAML dump style to multiline syntax for multiline strings.
|
|
"""
|
|
if len(data.splitlines()) > 1:
|
|
return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|')
|
|
return dumper.represent_scalar('tag:yaml.org,2002:str', data)
|
|
|
|
|
|
@dataclass
|
|
class Glossary:
|
|
"""
|
|
A glossary consists of the following parameters:
|
|
- Name (freely selectable)
|
|
- Path (full file path, suffix is automatically generated)
|
|
- Source language
|
|
- Target language
|
|
- Description (optional)
|
|
- Entries (pairs of source lang and target lang terms)
|
|
- ID (automatically generated / modified, required by DeepL)
|
|
"""
|
|
|
|
name: str
|
|
source_lang: str
|
|
target_lang: str
|
|
file_path: Path | None = None
|
|
desc: str | None = None
|
|
entries: dict[str, str] = field(default_factory=lambda: dict())
|
|
ID: str | None = None
|
|
file_suffix: ClassVar[str] = '.glo'
|
|
|
|
def __post_init__(self) -> None:
|
|
# FIXME: check for valid languages
|
|
pass
|
|
|
|
@classmethod
|
|
def from_file(cls: Type[GlossaryInst], file_path: Path) -> GlossaryInst:
|
|
"""
|
|
Create a glossary from the given file.
|
|
"""
|
|
if not file_path.exists():
|
|
raise GlossaryError(f"Glossary file '{file_path}' does not exist")
|
|
if file_path.suffix != cls.file_suffix:
|
|
raise GlossaryError(f"File type '{file_path.suffix}' is not supported")
|
|
with open(file_path, "r") as fd:
|
|
try:
|
|
# use BaseLoader so every entry is read as a string
|
|
# - disables automatic conversions
|
|
# - makes it possible to omit quoting for YAML keywords in entries (e. g. 'yes')
|
|
# - also correctly reads quoted entries
|
|
data = yaml.load(fd, Loader=yaml.BaseLoader)
|
|
clean_entries = data['Entries']
|
|
return cls(name=data['Name'],
|
|
source_lang=data['SourceLang'],
|
|
target_lang=data['TargetLang'],
|
|
file_path=file_path,
|
|
desc=data['Description'],
|
|
entries=clean_entries,
|
|
ID=data['ID'] if data['ID'] != 'None' else None)
|
|
except Exception:
|
|
raise GlossaryError(f"'{file_path}' does not contain a valid glossary")
|
|
|
|
def to_file(self, file_path: Path | None = None) -> None:
|
|
"""
|
|
Write glossary to given file.
|
|
"""
|
|
if file_path:
|
|
self.file_path = file_path
|
|
if not self.file_path:
|
|
raise GlossaryError("Got no valid path to write glossary")
|
|
# check / add valid suffix
|
|
if not self.file_path.suffix:
|
|
self.file_path = self.file_path.with_suffix(self.file_suffix)
|
|
elif self.file_path.suffix != self.file_suffix:
|
|
raise GlossaryError(f"File suffix '{self.file_path.suffix}' is not supported")
|
|
# write YAML
|
|
with tempfile.NamedTemporaryFile(dir=self.file_path.parent, prefix=self.file_path.name, mode="w", delete=False) as temp_fd:
|
|
temp_file_path = Path(temp_fd.name)
|
|
data = {'Name': self.name,
|
|
'Description': str(self.desc),
|
|
'ID': str(self.ID),
|
|
'SourceLang': self.source_lang,
|
|
'TargetLang': self.target_lang,
|
|
'Entries': self.entries}
|
|
yaml.dump(data, temp_fd, sort_keys=False)
|
|
shutil.move(temp_file_path, self.file_path)
|
|
|
|
def export_csv(self, dictionary: dict[str, str], file_path: Path) -> None:
|
|
"""
|
|
Export the 'entries' of this glossary to a file in CSV format (compatible with DeepL).
|
|
"""
|
|
with open(file_path, 'w', newline='', encoding='utf-8') as csvfile:
|
|
writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)
|
|
for source_entry, target_entry in self.entries.items():
|
|
writer.writerow([source_entry, target_entry])
|
|
|
|
def export_tsv(self, entries: dict[str, str], file_path: Path) -> None:
|
|
"""
|
|
Export the 'entries' of this glossary to a file in TSV format (compatible with DeepL).
|
|
"""
|
|
with open(file_path, 'w', encoding='utf-8') as file:
|
|
for source_entry, target_entry in self.entries.items():
|
|
file.write(f"{source_entry}\t{target_entry}\n")
|
|
|
|
def import_csv(self, file_path: Path) -> None:
|
|
"""
|
|
Import the entries from the given CSV file to those of the current glossary.
|
|
Existing entries are overwritten.
|
|
"""
|
|
try:
|
|
with open(file_path, mode='r', encoding='utf-8') as csvfile:
|
|
reader = csv.reader(csvfile, delimiter=',', quotechar='"')
|
|
self.entries = {rows[0]: rows[1] for rows in reader if len(rows) >= 2}
|
|
except Exception as e:
|
|
raise GlossaryError(f"Error importing CSV: {e}")
|
|
|
|
def import_tsv(self, file_path: Path) -> None:
|
|
"""
|
|
Import the entries from the given CSV file to those of the current glossary.
|
|
Existing entries are overwritten.
|
|
"""
|
|
try:
|
|
with open(file_path, mode='r', encoding='utf-8') as tsvfile:
|
|
self.entries = {}
|
|
for line in tsvfile:
|
|
parts = line.strip().split('\t')
|
|
if len(parts) == 2:
|
|
self.entries[parts[0]] = parts[1]
|
|
except Exception as e:
|
|
raise GlossaryError(f"Error importing TSV: {e}")
|
|
|
|
def to_str(self, with_entries: bool = False) -> str:
|
|
"""
|
|
Return the current glossary as a string.
|
|
"""
|
|
output: list[str] = []
|
|
output.append(f'{self.name} (ID: {self.ID}):')
|
|
if self.desc and self.desc != 'None':
|
|
output.append('- ' + self.desc)
|
|
output.append(f'- Languages: {self.source_lang} -> {self.target_lang}')
|
|
if with_entries:
|
|
output.append('- Entries:')
|
|
for source, target in self.entries.items():
|
|
output.append(f' {source} : {target}')
|
|
else:
|
|
output.append(f'- Entries: {len(self.entries)}')
|
|
return '\n'.join(output)
|