Commit ba9d9289 authored by Merlijn Wajer's avatar Merlijn Wajer
Browse files

doc: update examples

parent f17deaa9
......@@ -17,40 +17,37 @@ and then gzips the content.
from subprocess import check_call, CalledProcessError
from derivermodule.logger import get_logger
from derivermodule.files import canonical_item_filename
from derivermodule.task import get_task_info, write_extra_targets
from derivermodule.metadata import load_item_metadata, write_item_metadata, \
load_files_metadata, write_files_metadata, create_file_metadata, \
append_file_metadata, SOURCE_DERIVATIVE
from derivermodule.const import PB_TMP, PB_ITEM, PB_FAST
from derivermodule.task import get_task_info
from derivermodule.metadata import load_item_metadata, write_item_metadata
from derivermodule.const import PB_TMP, PB_ITEM
from version import VERSION
logger = get_logger('hocr-char-to-word')
if __name__ == '__main__':
logger.info('hocr-char-to-word module version %s' % VERSION)
# Read task.json
info = get_task_info()
identifier = info['identifier']
# Read _meta.xml
metadata = load_item_metadata(identifier)
files_metadata = load_files_metadata(identifier)
source_file = info['sourceFile']
target_file = info['targetFile']
target_format = info['targetFormat']
logger.info('sourceFile: \'%s\' -> targetFile \'%s\'',
source_file, target_file)
# Strip '.gz', create in /tmp
# Strip '.gz', create in /tmp (e.g. /tmp/<identifier>_hocr.html)
target_file_plain = join(PB_TMP, basename(target_file[:-3]))
target_fd = open(target_file_plain, 'w+')
# Call hocr-fold-chars from our hocr package
# Call hocr-fold-chars from our hocr package, installed by Dockerfile
try:
check_call(['hocr-fold-chars', '-f', source_file], stdout=target_fd)
except CalledProcessError:
......@@ -68,24 +65,7 @@ and then gzips the content.
sys.exit(1)
target_fd.close()
# Mark _hocr.html.gz as derivative (mark_original=False)
write_extra_targets([
{'name': canonical_item_filename(target_file), 'mark_original': False}
])
# Create _files.xml entry for our file
# The file metadata should not exist, since we would not be running in thos
# module otherwise (target_file is being made, so it cannot exist)
file_md = create_file_metadata(files_metadata,
canonical_item_filename(target_file),
source=SOURCE_DERIVATIVE,
fileformat=target_format)
file_md['chocr_to_word_module_version'] = VERSION
# Append the entry to the files_metadata
append_file_metadata(files_metadata, file_md)
# Write changes, if any.
write_item_metadata(identifier, metadata)
write_files_metadata(identifier, files_metadata)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment