Skip to content

Commit

Permalink
Support str and Path object inputs for file processing function - close
Browse files Browse the repository at this point in the history
  • Loading branch information
titusz committed Jun 28, 2024
1 parent aa5984e commit 786d267
Show file tree
Hide file tree
Showing 12 changed files with 184 additions and 182 deletions.
36 changes: 17 additions & 19 deletions iscc_sdk/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@

import shutil
import tempfile
from os.path import join, basename
from typing import Optional

from pathlib import Path
from PIL import Image, ImageEnhance
from loguru import logger as log
import json
Expand Down Expand Up @@ -42,16 +40,16 @@


def audio_thumbnail(fp):
# type: (str) -> Optional[Image.Image]
# type: (str|Path) -> Image.Image|None
"""
Create a thumbnail from embedded cover art.
:param str fp: Filepath to audio file.
:param fp: Filepath to audio file.
:return: Thumbnail image as PIL Image object
:rtype: Image.Image|None
"""
tempdir = tempfile.mkdtemp()
tempimg = join(tempdir, "cover.jpg")
fp = Path(fp)
tempdir = Path(tempfile.mkdtemp())
tempimg = tempdir / "cover.jpg"
cmd = ["-i", fp, "-an", "-vcodec", "copy", tempimg]
size = idk.sdk_opts.image_thumbnail_size
try:
Expand All @@ -68,14 +66,14 @@ def audio_thumbnail(fp):


def audio_meta_extract(fp):
# type: (str) -> dict
# type: (str|Path) -> dict
"""
Extract metadata from audio file.
:param str fp: Filepath to audio file.
:param fp: Filepath to audio file.
:return: Metadata mapped to IsccMeta schema
:rtype: dict
"""
fp = Path(fp)
mapped = dict()
done = set()

Expand All @@ -86,15 +84,15 @@ def audio_meta_extract(fp):
obj.close()
except OSError as e: # pragma: no cover
# This is a workaround for the issue that taglib requires exclusive access even for reading.
log.warning(f"Create tempfile for taglib access {basename(fp)}: {e}")
log.warning(f"Create tempfile for taglib access {fp.name}: {e}")
try:
with idk.TempFile(fp) as tmp_path:
obj = taglib.File(tmp_path.as_posix())
meta = dict(obj.tags)
mapped["duration"] = obj.length
obj.close()
except Exception as e:
log.warning(f"Failed metadata extraction for {basename(fp)}: {e}")
log.warning(f"Failed metadata extraction for {fp.name}: {e}")
return mapped

for tag, mapped_field in AUDIO_META_MAP.items():
Expand All @@ -113,15 +111,15 @@ def audio_meta_extract(fp):


def audio_meta_embed(fp, meta):
# type: (str, idk.IsccMeta) -> str
# type: (str|Path, idk.IsccMeta) -> str
"""
Embed metadata into a copy of the audio file.
:param str fp: Filepath to source audio file
:param fp: Filepath to source audio file
:param IsccMeta meta: Metadata to embed into audio file
:return: Filepath to new audio file with updated metadata
:rtype: str
"""
fp = Path(fp)
tdir = tempfile.mkdtemp()
tfile = shutil.copy(fp, tdir)
obj = taglib.File(tfile)
Expand All @@ -142,14 +140,14 @@ def audio_meta_embed(fp, meta):


def audio_features_extract(fp):
# type: (str) -> dict
# type: (str|Path) -> dict
"""
Exctracts chromprint fingerprint.
:param str fp: Filepath
:param fp: Filepath
:return: A dict with `duration` in seconds and `fingerprint` 32-bit integers
:rtype: dict
"""
fp = Path(fp)
args = ["-raw", "-json", "-signed", "-length", "0", fp]
proc = idk.run_fpcalc(args)
result = json.loads(proc.stdout)
Expand Down
17 changes: 10 additions & 7 deletions iscc_sdk/docx_.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import tempfile
from pathlib import Path

from docx import Document
import iscc_sdk as idk
import shutil
Expand All @@ -17,17 +19,18 @@


def docx_meta_embed(fp, meta):
# type: (str, idk.IsccMeta) -> str
# type: (str|Path, idk.IsccMeta) -> str
"""
Embed metadata into a copy of the PDF file.
:param str fp: Filepath to source PDF file
:param IsccMeta meta: Metadata to embed into PDF
:return: Filepath to the new PDF file with updated metadata
:rtype: str
Embed metadata into a copy of the DOCX file.
:param fp: Filepath to source DOCX file
:param meta: Metadata to embed into DOCX
:return: Filepath to the new DOCX file with updated metadata
"""
fp = Path(fp)
tempdir = tempfile.mkdtemp()
tempdoc = shutil.copy(fp, tempdir)
doc = Document(fp)
doc = Document(fp.as_posix())
new_meta = doc.core_properties
for iscc_field, docx_field in META_DOCX_MAP.items():
value = getattr(meta, iscc_field)
Expand Down
17 changes: 9 additions & 8 deletions iscc_sdk/epub.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import io
import shutil
import tempfile
from pathlib import Path

import ebookmeta
from PIL import Image, ImageEnhance
Expand All @@ -16,15 +17,15 @@


def epub_thumbnail(fp):
# type: (str) -> Image.Image
# type: (str|Path) -> Image.Image
"""
Creat thumbnail from EPUB document cover image.
:param str fp: Filepath to EPUB document.
:param fp: Filepath to EPUB document.
:return: Thumbnail image as PIL Image object
:rtype: Image.Image
"""
meta = ebookmeta.get_metadata(fp)
fp = Path(fp)
meta = ebookmeta.get_metadata(fp.as_posix())
data = meta.cover_image_data
img = Image.open(io.BytesIO(data))
size = idk.sdk_opts.image_thumbnail_size
Expand All @@ -33,15 +34,15 @@ def epub_thumbnail(fp):


def epub_meta_embed(fp, meta):
# type: (str, idk.IsccMeta) -> str
# type: (str|Path, idk.IsccMeta) -> str
"""
Embed metadata into a copy of the EPUB file.
:param str fp: Filepath to source EPUB file
:param fp: Filepath to source EPUB file
:param IsccMeta meta: Metadata to embed into EPUB
:return: Filepath to the new PDF file with updated metadata
:rtype: str
:return: Filepath to the new EPUB file with updated metadata
"""
fp = Path(fp)
tempdir = tempfile.mkdtemp()
tempepub = shutil.copy(fp, tempdir)
new_meta = ebookmeta.get_metadata(tempepub)
Expand Down
52 changes: 24 additions & 28 deletions iscc_sdk/image.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
"""*Image handling module*."""

from pathlib import Path

import pillow_avif
import base64
import io
import shutil
import sys
import json
import tempfile
from os.path import basename, join
from typing import Sequence
import jmespath
from iscc_schema import IsccMeta
Expand Down Expand Up @@ -40,9 +41,8 @@ def image_normalize(img):
"""
Normalize image for hash calculation.
:param Image.Image img: Pillow Image Object
:param img: Pillow Image Object
:return: Normalized and flattened image as 1024-pixel array (from 32x32 gray pixels)
:rtype: Sequence[int]
"""

# Transpose image according to EXIF Orientation tag
Expand Down Expand Up @@ -74,9 +74,8 @@ def image_exif_transpose(img):
"""
Transpose image according to EXIF Orientation tag
:param Image.Image img: Pillow Image Object
:param img: Pillow Image Object
:return: EXIF transposed image
:rtype: Image.Image
"""
img = ImageOps.exif_transpose(img)
log.debug(f"Image exif transpose applied")
Expand All @@ -88,9 +87,8 @@ def image_fill_transparency(img):
"""
Add white background to image if it has alpha transparency.
:param Image.Image img: Pillow Image Object
:param img: Pillow Image Object
:return: Image with transparency replaced by white background
:rtype: Image.Image
"""
if img.mode != "RGBA":
img = img.convert("RGBA")
Expand All @@ -106,9 +104,8 @@ def image_trim_border(img):
Takes the upper left pixel as reference for border color.
:param Image.Image img: Pillow Image Object
:param img: Pillow Image Object
:return: Image with uniform colored (empty) border removed.
:rtype: Image.Image
"""

bg = Image.new(img.mode, img.size, img.getpixel((0, 0)))
Expand All @@ -122,14 +119,14 @@ def image_trim_border(img):


def image_meta_extract(fp):
# type: (str) -> dict
# type: (str|Path) -> dict
"""
Extract metadata from image.
:param str fp: Filepath to image file.
:param fp: Filepath to image file.
:return: Metadata mapped to IsccMeta schema
:rtype: dict
"""
fp = Path(fp)
args = ["--all", fp]
result = idk.run_exiv2json(args)
encoding = sys.stdout.encoding or "utf-8"
Expand Down Expand Up @@ -163,15 +160,15 @@ def image_meta_extract(fp):


def image_meta_embed(fp, meta):
# type: (str, IsccMeta) -> str
# type: (str|Path, IsccMeta) -> Path
"""
Embed metadata into a copy of the image file.
:param str fp: Filepath to source image file
:param IsccMeta meta: Metadata to embed into image
:param fp: Filepath to source image file
:param meta: Metadata to embed into image
:return: Filepath to the new image file with updated metadata
:rtype: str
"""
fp = Path(fp)
cmdf = "reg iscc http://purl.org/iscc/schema\n"
cmdf += "reg dc http://purl.org/dc/elements/1.1/\n"

Expand All @@ -194,42 +191,42 @@ def image_meta_embed(fp, meta):
cmdf += f"set Xmp.dc.rights {meta.rights}\n"

# Create temp filepaths
tempdir = tempfile.mkdtemp()
metafile = join(tempdir, "meta.txt")
imagefile = shutil.copy(fp, tempdir)
tempdir = Path(tempfile.mkdtemp())
metafile = tempdir / "meta.txt"
imagefile = Path(shutil.copy(fp, tempdir))

# Store metadata
with open(metafile, "wt", encoding="utf-8") as outf:
outf.write(cmdf)

# Embed metaadata
args = ["-m", metafile, imagefile]
log.debug(f"Embedding {meta.dict(exclude_unset=True)} in {basename(imagefile)}")
log.debug(f"Embedding {meta.dict(exclude_unset=True)} in {imagefile.name}")
idk.run_exiv2(args)
return imagefile


def image_meta_delete(fp):
# type: (str) -> None
# type: (str|Path) -> None
"""
Delete all metadata from image.
:param str fp: Filepath to image file.
:rtype: None
:param fp: Filepath to image file.
"""
fp = Path(fp)
args = ["rm", fp]
return idk.run_exiv2(args)


def image_thumbnail(fp):
# type: (str) -> Image.Image
# type: (str|Path) -> Image.Image
"""
Create a thumbnail for an image.
:param str fp: Filepath to image file.
:param fp: Filepath to image file.
:return: Thumbnail image as PIL Image object
:rtype: Image.Image
"""
fp = Path(fp)
size = idk.sdk_opts.image_thumbnail_size
img = Image.open(fp)
img.thumbnail((size, size), resample=idk.LANCZOS)
Expand All @@ -241,9 +238,8 @@ def image_to_data_url(img):
"""
Convert PIL Image object to WebP Data-URL.
:param Image.Image img: PIL Image object to encode as WebP Data-URL.
:param img: PIL Image object to encode as WebP Data-URL.
:return: Data-URL string
:rtype: str
"""
quality = idk.sdk_opts.image_thumbnail_quality
raw = io.BytesIO()
Expand Down
Loading

0 comments on commit 786d267

Please sign in to comment.