Commit 3ae59ae7 authored by Eelco van der Wel's avatar Eelco van der Wel :speech_balloon:
Browse files

Merge branch 'photo-refactor' into 'dev'

Photo refactor

See merge request !151
parents 9c7d27ce 5d4cdb0a
Pipeline #5474 passed with stages
in 3 minutes and 55 seconds
Showing with 665 additions and 328 deletions
+665 -328
%% Cell type:code id: tags:
``` python
#default_exp data.basic
%load_ext autoreload
%autoreload 2
```
%% Cell type:code id: tags:
``` python
# export
from pymemri.imports import *
from urllib.request import urlretrieve
import requests
from tqdm import tqdm
import zipfile
import cv2
```
%% Cell type:markdown id: tags:
# Basic
%% Cell type:code id: tags:
``` python
# export
Path.ls = lambda x: list(x.iterdir())
PYI_HOME = Path.cwd().parent
PYI_TESTDATA = PYI_HOME / "test" / "data"
HOME_DIR = Path.home()
MODEL_DIR = HOME_DIR / ".memri" / "models"
MEMRI_S3 = "https://memri.s3-eu-west-1.amazonaws.com"
MODEL_DIR.mkdir(parents=True, exist_ok=True)
def read_file(path):
return open(path, "r").read()
def read_json(path):
with open(path) as json_file:
return json.load(json_file)
def write_json(obj, fname, indent=4):
with open(fname, 'w') as file_out:
json.dump(obj , file_out, indent=indent)
def unzip(f, dest):
with zipfile.ZipFile(str(f)) as zf:
zf.extractall(str(dest))
def resize(img, maxsize):
size = get_size(img, maxsize)
return cv2.resize(img, dsize=size, interpolation=cv2.INTER_CUBIC)
def get_size(img, maxsize):
s = img.shape
assert len(s) > 1
div = max(s) / maxsize
return (int(s[1]//div), int(s[0]//div))
```
%% Cell type:markdown id: tags:
# Export -
%% Cell type:code id: tags:
``` python
# hide
from nbdev.export import *
notebook2script()
```
%% Output
Converted basic.ipynb.
Converted cvu.utils.ipynb.
Converted data.photo.ipynb.
Converted exporters.query.ipynb.
Converted index.ipynb.
Converted itembase.ipynb.
Converted plugin.authenticators.credentials.ipynb.
Converted plugin.authenticators.oauth.ipynb.
Converted plugin.listeners.ipynb.
Converted plugin.pluginbase.ipynb.
Converted plugin.schema.ipynb.
Converted plugin.states.ipynb.
Converted plugins.authenticators.password.ipynb.
Converted pod.api.ipynb.
Converted pod.client.ipynb.
Converted pod.db.ipynb.
Converted pod.utils.ipynb.
Converted template.config.ipynb.
Converted template.formatter.ipynb.
Converted test_schema.ipynb.
Converted test_utils.ipynb.
%% Cell type:code id: tags:
``` python
```
......
%% Cell type:code id: tags:
``` python
%load_ext autoreload
%autoreload 2
# default_exp data.photo
```
%% Cell type:code id: tags:
``` python
# export
from pymemri.data.schema import Item
```
%% Cell type:code id: tags:
``` python
# export
from pymemri.data.schema import *
from pymemri.data.basic import *
from matplotlib.pyplot import imshow
from matplotlib import patches
from matplotlib.collections import PatchCollection
from numpy.linalg import norm
from hashlib import sha256
import cv2
import matplotlib.pyplot as plt
import math
import numpy as np
import io
from PIL import Image
from hashlib import sha256
from typing import Any
```
%% Cell type:markdown id: tags:
# Photo
%% Cell type:code id: tags:
``` python
# export
NUMPY, BYTES = "numpy", "bytes"
DEFAULT_ENCODING = "PNG"
```
%% Cell type:code id: tags:
``` python
# export
def show_images(images, cols = 3, titles = None):
image_list = [x.data for x in images] if isinstance(images[0], Photo) else images
assert((titles is None) or (len(image_list) == len(titles)))
n_images = len(image_list)
if titles is None: titles = ["" for i in range(1,n_images + 1)]
fig = plt.figure()
for n, (image, title) in enumerate(zip(image_list, titles)):
a = fig.add_subplot(int(np.ceil(n_images/float(cols))), cols , n + 1)
a.axis('off')
if image.ndim == 2:
plt.gray()
plt.imshow(image[:,:,::-1])
a.set_title(title)
fig.set_size_inches(np.array(fig.get_size_inches()) * n_images)
plt.show()
def get_size(img, maxsize):
s = img.shape
assert len(s) > 1
div = max(s) / maxsize
return (int(s[1]//div), int(s[0]//div))
def resize(img, maxsize):
size = get_size(img, maxsize)
return cv2.resize(img, dsize=size, interpolation=cv2.INTER_CUBIC)
def get_height_width_channels(img):
s = img.shape
if len(s) == 2: return s[0], s[1], 1
else: return img.shape
```
%% Cell type:code id: tags:
``` python
# export
class Photo(Item):
properties = Item.properties + ["width", "height", "channels", "encoding"]
properties = Item.properties + ["width", "height", "channels", "encoding", "mode"]
edges = Item.edges + ["file"]
def __init__(
self,
data: Any=None,
includes: Any=None,
thumbnail: Any=None,
includes: Any=None, # TODO
thumbnail: Any=None, # TODO
height: int=None,
width: int=None,
channels: int=None,
encoding: str=None,
mode: str=None,
file: list=None,
_file_created: bool=False,
**kwargs
):
super().__init__(**kwargs)
self.private = ["data", "embedding", "path"]
self.private = ["data", "embedding", "path"] #TODO
self.height = height
self.width = width
self.channels = channels
self.encoding = encoding
self.mode = mode
self.file = file if file is not None else []
self.data = data
self._file_created = _file_created
def show(self):
fig, ax = plt.subplots(1)
fig.set_figheight(15)
fig.set_figwidth(15)
ax.axis("off")
imshow(self.data[:, :, ::-1])
fig.set_size_inches((6, 6))
plt.show()
@property
def size(self):
return self.width, self.height
@classmethod
def from_data(cls, *args, **kwargs):
res = super().from_data(*args, **kwargs)
if res.file:
res.file[0]
res.file[0] # TODO
return res
@classmethod
def from_path(cls, path, size=None):
data = cv2.imread(str(path))
res = cls.from_np(data, size)
pil_image = Image.open(path)
encoding, mode, shape = cls.infer_PIL_metadata(pil_image)
w, h, c = shape
_bytes = cls.PIL_to_bytes(pil_image, encoding)
res = cls(data=_bytes, height=h, width=w, channels=c, encoding=encoding, mode=mode)
file = File.from_data(sha256=sha256(_bytes).hexdigest())
res.add_edge("file", file)
return res
@classmethod
def from_np(cls, data, size=None, *args, **kwargs):
pil_image = Image.fromarray(data)
if size is not None:
data = resize(data, size)
h, w, c = get_height_width_channels(data)
res = cls(
data=data, height=h, width=w, channels=c, encoding=NUMPY, *args, **kwargs
)
file = File.from_data(sha256=sha256(data.tobytes()).hexdigest())
pil_image = pil_image.resize(size)
encoding, mode, shape = cls.infer_PIL_metadata(pil_image)
w, h, c = shape
_bytes = cls.PIL_to_bytes(pil_image, encoding)
res = cls(data=_bytes, height=h, width=w, channels=c, encoding=encoding, mode=mode)
file = File.from_data(sha256=sha256(_bytes).hexdigest())
res.add_edge("file", file)
return res
@classmethod
def from_bytes(cls, _bytes):
image_stream = io.BytesIO(_bytes)
pil_image = Image.open(image_stream)
encoding, mode, shape = cls.infer_PIL_metadata(pil_image)
w, h, c = shape
res = cls(data=_bytes, height=h, width=w, channels=c, encoding=encoding, mode=mode)
file = File.from_data(sha256=sha256(_bytes).hexdigest())
res.add_edge("file", file)
return res
@staticmethod
def PIL_to_bytes(pil_image, encoding):
byte_io = io.BytesIO()
pil_image.save(byte_io, encoding)
return byte_io.getvalue()
@staticmethod
def infer_PIL_metadata(pil_image):
encoding = pil_image.format or DEFAULT_ENCODING
mode = pil_image.mode
size = pil_image.size
if len(size) == 3:
w, h, c = size
if len(size) == 2:
w, h = size
c = 1
else:
raise Error
return encoding, mode, (w, h, c)
res = cls(data=_bytes, height=h, width=w, encoding=BYTES, channels=c)
file = File.from_data(sha256=sha256(_bytes).hexdigest())
res.add_edge("file", file)
return res
def to_PIL(self):
if self.data is None:
raise ValueError("Photo object has no data")
return Image.open(io.BytesIO(self.data))
def to_np(self):
pil_img = self.to_PIL()
return np.asarray(pil_img)
```
%% Cell type:markdown id: tags:
%% Cell type:code id: tags:
# Iphoto -
``` python
photo_path = Photo.from_path("images/labrador.jpg")
assert photo_path.encoding == "JPEG"
assert photo_path.mode == "RGB"
```
%% Cell type:code id: tags:
``` python
with open("images/labrador.jpg", "rb") as f:
b = f.read()
photo_bytes = Photo.from_bytes(b)
assert photo_bytes.encoding == "JPEG"
assert photo_bytes.mode == "RGB"
```
%% Cell type:code id: tags:
``` python
# # export
# class IPhoto(Photo):
# properties = Item.properties + [
# "service",
# "identifier",
# "secret",
# "code",
# "handle",
# "refreshToken",
# "errorMessage",
# "accessToken",
# "displayName"
# ]
# edges = Item.edges + ['belongsTo', 'contact']
# def __init__(self, data=None, embedding=None,path=None, encoding=None, *args, **kwargs):
# self.private = ["data", "embedding", "path"]
# super().__init__(*args, **kwargs)
# self.data=data
# self.embedding=embedding
# self.path=path
# self.encoding=encoding
# def draw_boxes(self, boxes):
# print(f"Plotting {len(boxes)} face boundingboxes")
# fig,ax = plt.subplots(1)
# fig.set_figheight(15)
# fig.set_figwidth(15)
# ax.axis('off')
# # Display the image
# ax.imshow(self.data[:,:,::-1])
# ps = []
# # Create a Rectangle patch
# for b in boxes:
# rect = self.box_to_rect(b)
# ax.add_patch(rect)
# ps.append(rect)
# fig.set_size_inches((6,6))
# plt.show()
# def get_crop(self, box, landmark=None):
# b = [max(0, int(x)) for x in box]
# if landmark is not None:
# return face_align.norm_crop(self.data, landmark=landmark)
# else:
# return self.data[b[1]:b[3], b[0]:b[2], :]
# def get_crops(self, boxes, landmarks=None):
# crops = []
# if landmarks is None:
# print("you are getting unnormalized crops, which are lower quality for recognition")
# for i, b in enumerate(boxes):
# crop = self.get_crop(b, landmarks[i] if landmarks is not None else None)
# crops.append(crop)
# return crops
# def plot_crops(self, boxes, landmarks=None):
# crops = self.get_crops(boxes, landmarks)
# show_images(crops, cols=3)
# @staticmethod
# def box_to_rect(box):
# x = box[0]
# y = box[1]
# w = box[2]-box[0]
# h = box[3]-box[1]
# return patches.Rectangle((x,y),w,h, linewidth=2,edgecolor='r',facecolor='none')
np_image = np.asarray(Image.open("images/labrador.jpg"))
photo_np = Photo.from_np(np_image)
assert photo_np.encoding == DEFAULT_ENCODING
assert photo_np.mode == "RGB"
```
%% Cell type:code id: tags:
``` python
grayscale_img = np.uint8(np.random.random([500, 500]) * 255)
photo = Photo.from_np(grayscale_img)
assert photo.channels == 1
assert photo.mode == "L"
```
%% Cell type:markdown id: tags:
# Export -
%% Cell type:code id: tags:
``` python
# hide
from nbdev.export import *
notebook2script()
```
%% Output
Converted basic.ipynb.
Converted cvu.utils.ipynb.
Converted data.photo.ipynb.
Converted importers.Importer.ipynb.
Converted importers.util.ipynb.
Converted exporters.query.ipynb.
Converted index.ipynb.
Converted indexers.indexer.ipynb.
Converted itembase.ipynb.
Converted plugin.authenticators.credentials.ipynb.
Converted plugin.authenticators.oauth.ipynb.
Converted plugin.listeners.ipynb.
Converted plugin.pluginbase.ipynb.
Converted plugin.schema.ipynb.
Converted plugin.states.ipynb.
Converted plugins.authenticators.password.ipynb.
Converted pod.api.ipynb.
Converted pod.client.ipynb.
Converted pod.db.ipynb.
Converted pod.utils.ipynb.
Converted template.config.ipynb.
Converted template.formatter.ipynb.
Converted test_schema.ipynb.
Converted test_utils.ipynb.
%% Cell type:code id: tags:
``` python
```
......
This diff is collapsed.
nbs/images/labrador.jpg

571 KB

%% Cell type:code id: tags:
``` python
# default_exp pod.client
%load_ext autoreload
%autoreload 2
```
%% Cell type:markdown id: tags:
# Pod Client
%% Cell type:code id: tags:
``` python
# export
from pymemri.data.basic import *
from pymemri.data.schema import *
from pymemri.data.itembase import Edge, ItemBase, Item
from pymemri.data.photo import Photo, NUMPY, BYTES
from pymemri.imports import *
from hashlib import sha256
from pymemri.pod.db import DB
from pymemri.pod.utils import *
from pymemri.plugin.schema import *
from pymemri.test_utils import get_ci_variables
from pymemri.pod.api import PodAPI, PodError, DEFAULT_POD_ADDRESS, POD_VERSION
from typing import List, Union
import uuid
import urllib
from datetime import datetime
```
%% Cell type:code id: tags:
``` python
# export
class PodClient:
# Mapping from python type to schema type
# TODO move to data.schema once schema is refactored
TYPE_TO_SCHEMA = {
bool: "Bool",
str: "Text",
int: "Integer",
float: "Real",
datetime: "DateTime",
}
def __init__(
self,
url=DEFAULT_POD_ADDRESS,
version=POD_VERSION,
database_key=None,
owner_key=None,
auth_json=None,
register_base_schema=True,
verbose=False,
):
self.verbose = verbose
self.database_key = (
database_key if database_key is not None else self.generate_random_key()
)
self.owner_key = (
owner_key if owner_key is not None else self.generate_random_key()
)
self.api = PodAPI(
database_key=self.database_key,
owner_key=self.owner_key,
url=url,
version=version,
auth_json=auth_json,
verbose=verbose,
)
self.api.test_connection()
self.local_db = DB()
self.registered_classes = dict()
self.register_base_schemas()
@classmethod
def from_local_keys(cls, path=DEFAULT_POD_KEY_PATH, **kwargs):
return cls(
database_key=read_pod_key("database_key"),
owner_key=read_pod_key("owner_key"),
**kwargs,
)
@staticmethod
def generate_random_key():
return "".join([str(random.randint(0, 9)) for i in range(64)])
def register_base_schemas(self):
assert self.add_to_schema(PluginRun, CVUStoredDefinition, Account, Photo)
def add_to_db(self, node):
existing = self.local_db.get(node.id)
if existing is None and node.id is not None:
self.local_db.add(node)
def reset_local_db(self):
self.local_db = DB()
def get_create_dict(self, node):
properties = node.to_json()
properties = {k: v for k, v in properties.items() if v != []}
return properties
def create(self, node):
create_dict = self.get_create_dict(node)
try:
result = self.api.create_item(create_dict)
node.id = result
self.add_to_db(node)
return True
except Exception as e:
print(e)
return False
def create_photo(self, photo):
# create the file
# file_success = self.create_photo_file(photo)
file = photo.file[0]
# self.create(file)
# create the photo
items_edges_success = self.bulk_action(
create_items=[photo, file], create_edges=photo.get_edges("file")
)
if not items_edges_success:
raise ValueError("Could not create file or photo item")
return self._upload_image(photo.data)
def _property_dicts_from_instance(self, node):
create_items = []
attributes = node.to_json()
for k, v in attributes.items():
if type(v) not in self.TYPE_TO_SCHEMA:
raise ValueError(f"Could not add property {k} with type {type(v)}")
value_type = self.TYPE_TO_SCHEMA[type(v)]
create_items.append(
{
"type": "ItemPropertySchema",
"itemType": attributes["type"],
"propertyName": k,
"valueType": value_type,
}
)
return create_items
def _property_dicts_from_type(self, item):
create_items = []
for property, p_type in item.get_property_types().items():
p_type = self.TYPE_TO_SCHEMA[p_type]
create_items.append(
{
"type": "ItemPropertySchema",
"itemType": item.__name__,
"propertyName": property,
"valueType": p_type,
}
)
return create_items
def add_to_schema(self, *items: List[Union[object, type]]):
create_items = []
for item in items:
if isinstance(item, type):
property_dicts = self._property_dicts_from_type(item)
else:
property_dicts = self._property_dicts_from_instance(item)
item = type(item)
create_items.extend(property_dicts)
self.registered_classes[item.__name__] = item
try:
self.api.bulk(create_items=create_items)
return True
except Exception as e:
print(e)
return False
def _upload_image(self, img):
if isinstance(img, np.ndarray):
return self.upload_file(img.tobytes())
elif isinstance(img, bytes):
return self.upload_file(img)
else:
raise ValueError(f"Unknown image data type {type(img)}")
def upload_file(self, file):
try:
self.api.upload_file(file)
return True
except PodError as e:
# 409 = CONFLICT, file already exists
if e.status == 409:
return True
return False
def get_file(self, sha):
return self.api.get_file(sha)
def get_photo(self, id, size=640):
photo = self.get(id)
self._load_photo_data(photo, size=size)
return photo
def _load_photo_data(self, photo, size=None):
if len(photo.file) > 0 and photo.data is None:
file = self.get_file(photo.file[0].sha256)
if file is None:
print(
f"Could not load data of {photo} attached file item does not have data in pod"
)
return
if photo.encoding == NUMPY:
data = np.frombuffer(file, dtype=np.uint8)
c = photo.channels
shape = (
(photo.height, photo.width, c)
if c is not None and c > 1
else (photo.height, photo.width)
)
data = data.reshape(shape)
if size is not None:
data = resize(data, size)
photo.data = data
return
elif photo.encoding == BYTES:
photo.data = file
return
else:
raise ValueError("Unsupported encoding")
print(f"could not load data of {photo}, no file attached")
photo.data = file
else:
print(f"could not load data of {photo}, no file attached")
def create_if_external_id_not_exists(self, node):
if not self.external_id_exists(node):
self.create(node)
def external_id_exists(self, node):
if node.externalId is None:
return False
existing = self.search({"externalId": node.externalId})
return len(existing) > 0
def create_edges(self, edges):
return self.bulk_action(create_edges=edges)
def delete_items(self, items):
return self.bulk_action(delete_items=items)
def delete_all(self):
items = self.get_all_items()
self.delete_items(items)
@staticmethod
def gather_batch(items, start_idx, start_size=0, max_size=5000000):
idx = start_idx
total_size = start_size
batch_items = []
for i, x in enumerate(items):
if i < idx:
continue
elif len(str(x)) > max_size:
idx = i + 1
print("Could not add item: Item exceeds max item size")
elif total_size + len(str(x)) < max_size:
batch_items.append(x)
total_size += len(str(x))
idx = i + 1
else:
break
return batch_items, idx, total_size
def bulk_action(
self, create_items=None, update_items=None, create_edges=None, delete_items=None
):
# we need to set the id to not lose the reference
if create_items is not None:
for c in create_items:
if c.id is None:
c.id = uuid.uuid4().hex
create_items = (
[self.get_create_dict(i) for i in create_items]
if create_items is not None
else []
)
update_items = (
[self.get_update_dict(i) for i in update_items]
if update_items is not None
else []
)
create_edges = (
[self.get_create_edge_dict(i) for i in create_edges]
if create_edges is not None
else []
)
# Note: skip delete_items without id, as items that are not in pod cannot be deleted
delete_items = (
[item.id for item in delete_items if item.id is not None]
if delete_items is not None
else []
)
n_total = len(create_items + update_items + create_edges + delete_items)
n = 0
i_ci, i_ui, i_ce, i_di = 0, 0, 0, 0
while not (
i_ci == len(create_items)
and i_ui == len(update_items)
and i_ce == len(create_edges)
and i_di == len(delete_items)
):
batch_size = 0
create_items_batch, i_ci, batch_size = self.gather_batch(
create_items, i_ci, start_size=batch_size
)
update_items_batch, i_ui, batch_size = self.gather_batch(
update_items, i_ui, start_size=batch_size
)
delete_items_batch, i_di, batch_size = self.gather_batch(
delete_items, i_di, start_size=batch_size
)
if i_ci == len(create_items):
create_edges_batch, i_ce, batch_size = self.gather_batch(
create_edges, i_ce, start_size=batch_size
)
else:
create_edges_batch = []
n_batch = len(
create_items_batch
+ update_items_batch
+ create_edges_batch
+ delete_items_batch
)
n += n_batch
print(f"BULK: Writing {n}/{n_total} items/edges")
try:
result = self.api.bulk(
create_items_batch,
update_items_batch,
create_edges_batch,
delete_items_batch,
)
except PodError as e:
print(e)
print("could not complete bulk action, aborting")
return False
print(f"Completed Bulk action, written {n} items/edges")
return True
def get_create_edge_dict(self, edge):
return {"_source": edge.source.id, "_target": edge.target.id, "_name": edge._type}
def create_edge(self, edge):
edge_dict = self.get_create_edge_dict(edge)
try:
self.api.create_edge(edge_dict)
return True
except PodError as e:
print(e)
return False
def get(self, id, expanded=True, include_deleted=False):
if not expanded:
res = self._get_item_with_properties(id)
else:
res = self._get_item_expanded(id)
if res is None:
raise ValueError(f"Item with id {id} does not exist")
elif res.deleted and not include_deleted:
print(f"Item with id {id} has been deleted")
return
return res
def get_all_items(self):
raise NotImplementedError()
def filter_deleted(self, items):
return [i for i in items if not i.deleted == True]
def _get_item_expanded(self, id, include_deleted=False):
item = self.get(id, expanded=False, include_deleted=include_deleted)
edges = self.get_edges(id)
for e in edges:
item.add_edge(e["name"], e["item"])
return item
def get_edges(self, id):
try:
result = self.api.get_edges(id)
for d in result:
d["item"] = self.item_from_json(d["item"])
return result
except PodError as e:
print(e)
return
def _get_item_with_properties(self, id):
try:
result = self.api.get_item(str(id))
if not len(result):
return
return self.item_from_json(result[0])
except PodError as e:
print(e)
return
def get_update_dict(self, node):
properties = node.to_json(dates=False)
properties.pop("type", None)
properties.pop("deleted", None)
return properties
def update_item(self, node):
data = self.get_update_dict(node)
try:
self.api.update_item(data)
return True
except PodError as e:
print(e)
return False
def exists(self, id):
try:
result = self.api.get_item(str(id))
if isinstance(result, list) and len(result) > 0:
return True
return False
except PodError as e:
print(e)
return False
def search_paginate(self, fields_data, limit=50, include_edges=True):
extra_fields = {"[[edges]]": {}} if include_edges else {}
query = {**fields_data, **extra_fields}
try:
for page in self.api.search_paginate(query, limit):
result = [self._item_from_search(item) for item in page]
yield self.filter_deleted(result)
except PodError as e:
print(e)
def search(self, fields_data, include_edges: bool = True):
extra_fields = {"[[edges]]": {}} if include_edges else {}
query = {**fields_data, **extra_fields}
try:
result = self.api.search(query)
result = [self._item_from_search(item) for item in result]
return self.filter_deleted(result)
except PodError as e:
print(e)
def _item_from_search(self, item_json: dict):
# search returns different fields w.r.t. edges compared to `get` api,
# different method to keep `self.get` clean.
item = self.item_from_json(item_json)
for edge_json in item_json.get("[[edges]]", []):
edge_name = edge_json["_edge"]
try:
edge_item = self.item_from_json(edge_json["_item"])
item.add_edge(edge_name, edge_item)
except Exception as e:
print(f"Could not attach edge {edge_json['_item']} to {item}")
print(e)
continue
return item
def search_last_added(self, type=None, with_prop=None, with_val=None):
query = {"_limit": 1, "_sortOrder": "Desc"}
if type is not None:
query["type"] = type
if with_prop is not None:
query[f"{with_prop}=="] = with_val
return self.search(query)[0]
def item_from_json(self, json):
plugin_class = json.get("pluginClass", None)
plugin_package = json.get("pluginPackage", None)
constructor = get_constructor(
json["type"],
plugin_class,
plugin_package=plugin_package,
extra=self.registered_classes,
)
new_item = constructor.from_json(json)
existing = self.local_db.get(new_item.id)
# TODO: cleanup
if existing is not None:
if not existing.is_expanded() and new_item.is_expanded():
for edge_name in new_item.get_all_edge_names():
edges = new_item.get_edges(edge_name)
for e in edges:
e.source = existing
existing.__setattr__(edge_name, edges)
for prop_name in new_item.get_property_names():
existing.__setattr__(prop_name, new_item.__getattribute__(prop_name))
return existing
else:
return new_item
def get_properties(self, expanded):
properties = copy(expanded)
if ALL_EDGES in properties:
del properties[ALL_EDGES]
return properties
def send_email(self, to, subject="", body=""):
try:
self.api.send_email(to, subject, body)
print(f"succesfully sent email to {to}")
return True
except PodError as e:
print(e)
return False
```
%% Output
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-2-94d6ac817c95> in <module>
1 # export
----> 2 class PodClient:
3 # Mapping from python type to schema type
4 # TODO move to data.schema once schema is refactored
5 TYPE_TO_SCHEMA = {
<ipython-input-2-94d6ac817c95> in PodClient()
8 int: "Integer",
9 float: "Real",
---> 10 datetime: "DateTime",
11 }
12
NameError: name 'datetime' is not defined
%% Cell type:markdown id: tags:
Pymemri communicates with the pod via the `PodClient`. The PodClient requires you to provide a [database key](https://gitlab.memri.io/memri/pod/-/blob/dev/docs/HTTP_API.md#user-content-api-authentication-credentials) and an [owner key](https://gitlab.memri.io/memri/pod/-/blob/dev/docs/HTTP_API.md#user-content-api-authentication-credentials). During development, you don't have to worry about these keys, you can just omit the keys when initializing the `PodClient`, which creates a new user by defining random keys.
If you want to use the same keys for different `PodClient` instances, you can store a random key pair locally with the `store_keys` CLI, and create a new client with `PodClient.from_local_keys()`. When you are using the app, setting the keys in the pod, and passing them when calling a plugin is handled for you by the app itself.
%% Cell type:code id: tags:
``` python
client = PodClient()
client.registered_classes["Photo"]
```
%% Output
<class 'pymemri.data.photo.Photo'>
pymemri.data.photo.Photo
%% Cell type:code id: tags:
``` python
# hide
success = client.api.test_connection()
assert success
```
%% Cell type:markdown id: tags:
## Creating Items and Edges
%% Cell type:markdown id: tags:
Now that we have access to the pod, we can create items here and upload them to the pod. All items are defined in the schema of the pod. To create an item in the pod, you have to add the schema first. Schemas can be added as follows
%% Cell type:code id: tags:
``` python
from pymemri.data.schema import EmailMessage, Address, PhoneNumber
succes = client.add_to_schema(EmailMessage, Address, PhoneNumber)
```
%% Cell type:code id: tags:
``` python
# hide
assert succes
```
%% Cell type:markdown id: tags:
We can now create an item with one of the above item definitions. As a side-effect, our item will be assigned an id.
%% Cell type:code id: tags:
``` python
email_item = EmailMessage.from_data(content="example content field")
client.create(email_item)
print(email_item.id)
```
%% Output
bda24d7b1d611716b2036349b65ebc74
556efe1042cf8761e94bae4c8c5e3e83
%% Cell type:markdown id: tags:
The types of items in the pod are not limited to definitions to the pymemri schema. We can easily define our own types, or overwrite existing item definitions with the same `add_to_schema` method.
Note that all keyword arguments need to be added to the `properties` class variable to let the pod know what the properties of our item are. Additionally, properties in the Pod are statically typed, and have to be inferred from type the annotations of our `__init__` method.
%% Cell type:code id: tags:
``` python
# export
class Dog(Item):
properties = Item.properties + ["name", "age", "bites", "weight"]
def __init__(self, name: str=None, age: int=None, bites: bool=False, weight: float=None, **kwargs):
super().__init__(**kwargs)
self.name = name
self.age = age
self.bites = bites
self.weight = weight
```
%% Cell type:code id: tags:
``` python
client.add_to_schema(Dog)
dog2 = Dog(name="bob", age=3, weight=33.2)
client.create(dog2);
```
%% Cell type:code id: tags:
``` python
# hide
client.reset_local_db()
dog_from_db = client.get(dog2.id)
assert dog_from_db.name == "bob"
assert dog_from_db.age == 3
assert dog_from_db.weight == 33.2
```
%% Cell type:markdown id: tags:
We can connect items using edges. Let's create another item, a person, and connect the email and the person.
%% Cell type:code id: tags:
``` python
person_item = Person.from_data(firstName="Alice", lastName="X")
succes = client.add_to_schema(person_item)
```
%% Cell type:code id: tags:
``` python
# hide
assert succes
```
%% Cell type:code id: tags:
``` python
person_item = Person.from_data(firstName="Alice", lastName="X")
item_succes = client.create(person_item)
edge = Edge(email_item, person_item, "sender")
edge_succes = client.create_edge(edge)
print(client.get_edges(email_item.id))
```
%% Output
[{'item': Person (#c95c1c89663c06971de7560462e7ce3d), 'name': 'sender'}]
[{'item': Person (#33b03cddf56811350e3eee3e8910afd2), 'name': 'sender'}]
%% Cell type:code id: tags:
``` python
# hide
assert item_succes
assert edge_succes
```
%% Cell type:markdown id: tags:
If we use the normal `client.get` (without `expanded=False`), we also get items directly connected to the Item.
%% Cell type:code id: tags:
``` python
email_from_db = client.get(email_item.id)
print(email_from_db.sender)
```
%% Output
[Person (#c95c1c89663c06971de7560462e7ce3d)]
[Person (#33b03cddf56811350e3eee3e8910afd2)]
%% Cell type:code id: tags:
``` python
# hide
assert isinstance(email_from_db.sender[0], Person)
```
%% Cell type:markdown id: tags:
# Fetching and updating Items
%% Cell type:markdown id: tags:
## Normal Items
%% Cell type:markdown id: tags:
We can use the client to fetch data from the database. This is in particular useful for indexers, which often use data in the database as input for their models. The simplest form of querying the database is by querying items in the pod by their id (unique identifier).
%% Cell type:code id: tags:
``` python
person_item = Person.from_data(firstName="Alice")
client.create(person_item)
# Retrieve person from Pod
person_from_db = client.get(person_item.id, expanded=False)
```
%% Cell type:code id: tags:
``` python
# hide
assert person_from_db is not None
assert person_from_db == person_item
assert person_from_db.id is not None
```
%% Cell type:markdown id: tags:
Appart from creating, we might want to update existing items:
%% Cell type:code id: tags:
``` python
person_item.lastName = "Awesome"
client.update_item(person_item)
person_from_db = client.get(person_item.id, expanded=False)
print(person_from_db.lastName)
```
%% Output
Awesome
%% Cell type:code id: tags:
``` python
# hide
assert person_from_db.lastName == "Awesome"
```
%% Cell type:markdown id: tags:
When we don't know the ids of the items we want to fetch, we can also search by property. We can use this for instance when we want to query all items from a particular type to perform some indexing on. We can get all `Person` Items from the db by:
%% Cell type:markdown id: tags:
## Search
%% Cell type:markdown id: tags:
the `PodClient` can search through the pod with the `search` or `search_paginate` methods, which return the results of a search as a list or generator respectively. Search uses the same arguments as the Pod search API, which can be found [here](https://gitlab.memri.io/memri/pod/-/blob/dev/docs/HTTP_API.md#post-v4owner_keysearch).
To display how search works, we first add a few new items
%% Cell type:code id: tags:
``` python
person_item2 = Person.from_data(firstName="Bob")
person_account = Account(service="testService")
client.create(person_item2)
client.create(person_account)
person_item2.add_edge("account", person_account)
client.create_edges(person_item2.get_edges("account"));
```
%% Output
BULK: Writing 1/1 items/edges
Completed Bulk action, written 1 items/edges
%% Cell type:code id: tags:
``` python
# hide
client.reset_local_db()
```
%% Cell type:code id: tags:
``` python
# Search for all Persons in the pod
all_people = client.search({"type": "Person"}, include_edges=True)
print("Number of results:", len(all_people))
```
%% Output
Number of results: 3
%% Cell type:code id: tags:
``` python
# hide
assert all([isinstance(p, Person) for p in all_people]) and len(all_people) > 0
assert any([len(p.account) for p in all_people])
```
%% Cell type:code id: tags:
``` python
# hide
# search without returning edges
all_people = client.search({"type": "Person"}, include_edges=False)
assert len(all_people)
assert([len(person.get_all_edges())==0 for person in all_people])
```
%% Cell type:code id: tags:
``` python
# hide
assert len(all_people)
```
%% Cell type:code id: tags:
``` python
# hide
# Search with edges
all_people = client.search({"type": "Person"}, include_edges=True)
assert all([isinstance(p, Person) for p in all_people]) and len(all_people) > 0
assert any([len(p.account) for p in all_people])
```
%% Cell type:markdown id: tags:
To hande large volumes of Items, the `PodClient.search_paginate` method can search through the pod and return a generator which yields batches of items. This method uses the same search arguments as the `search` method:
%% Cell type:code id: tags:
``` python
# Create 100 accounts to search
client.bulk_action(
create_items=[
Account(identifier=str(i), service="paginate_test") for i in range(100)
]
)
generator = client.search_paginate({"type": "Account", "service": "paginate_test"}, limit=10)
for page in generator:
# process accounts
pass
```
%% Output
BULK: Writing 100/100 items/edges
Completed Bulk action, written 100 items/edges
%% Cell type:code id: tags:
``` python
# hide
# Test pagination
accounts = client.search({"type": "Account", "service": "paginate_test"})
generator = client.search_paginate({"type": "Account", "service": "paginate_test"}, limit=10)
accounts_paginated = []
for page in generator:
accounts_paginated.extend(page)
assert len(accounts_paginated) == 100
assert [a.id for a in accounts] == [a.id for a in accounts_paginated]
```
%% Cell type:code id: tags:
``` python
# hide
# Search, edge cases
result = client.search({"type": "Account", "service": "NonExistentService"})
assert result == []
paginator = client.search_paginate({"type": "Account", "service": "NonExistentService"})
try:
next(paginator)
except Exception as e:
if not isinstance(e, StopIteration):
assert False
```
%% Cell type:markdown id: tags:
## Search last added items
%% Cell type:code id: tags:
``` python
person_item2 = Person.from_data(firstName="Last Person")
client.create(person_item2)
last_added = client.search_last_added(type="Person")
```
%% Cell type:code id: tags:
``` python
# hide
assert last_added.firstName == "Last Person"
```
%% Cell type:markdown id: tags:
In the near future, Pod will support searching by user defined properties as well. This will allow for the following. **warning, this is currently not supported**
%% Cell type:markdown id: tags:
```client.search_last_added(type="Person", with_prop="ImportedBy", with_val="EmailImporter")```
%% Cell type:markdown id: tags:
## Uploading & downloading files
%% Cell type:markdown id: tags:
### File API
%% Cell type:markdown id: tags:
To work with files like Photos or Videos, the `PodClient` has a separate file api. This api works by posting a blob to the `upload_file` endpoint, and creating an Item with a property with the same sha256 as the sha used in the endpoint.
For example, we can upload a photo with the file API as follows:
%% Cell type:code id: tags:
``` python
from pymemri.data.photo import Photo
x = np.random.randint(0, 255+1, size=(640, 640), dtype=np.uint8)
photo = Photo.from_np(x)
file = photo.file[0]
succes = client.create(file)
succes2 = client._upload_image(x)
succes2 = client._upload_image(photo.data)
```
%% Cell type:code id: tags:
``` python
# hide
assert succes
assert succes2
data = client.get_file(file.sha256)
arr = np.frombuffer(data, dtype=np.uint8)
assert (arr.reshape(640,640) == x).all()
photo.data = data
arr = photo.to_np()
assert (arr == x).all()
```
%% Cell type:markdown id: tags:
### Photo API
%% Cell type:markdown id: tags:
The PodClient implements an easier API for photos separately, which uses the same file API under the hood
%% Cell type:code id: tags:
``` python
print(client.registered_classes["Photo"])
# client.add_to_schema(Photo)
x = np.random.randint(0, 255+1, size=(640, 640), dtype=np.uint8)
photo = Photo.from_np(x)
client.create_photo(photo);
photo.file
```
%% Output
<class 'pymemri.data.photo.Photo'>
BULK: Writing 3/3 items/edges
Completed Bulk action, written 3 items/edges
[File (#76bbf5ecd3a140a999d793f71187c340)]
%% Cell type:code id: tags:
``` python
# hide
res = client.get_photo(photo.id, size=640)
assert (res.data == x).all()
res = client.get_photo(photo.id)
print(res.id)
res.file[0].sha256
assert (res.to_np() == x).all()
```
%% Output
d7af0a4a291d42f9a25a38443d07bc91
%% Cell type:markdown id: tags:
Some photos come as bytes, for example when downloading them from a third party service. We can use `photo.from_bytes` to initialize these photos:
%% Cell type:code id: tags:
``` python
byte_photo = b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\xe1\x00\x00\x00\xe1\x08\x03\x00\x00\x00\tm"H\x00\x00\x003PLTE\x04\x02\x04\x00\x00\x00\xa0\xa0\xa0\xa3\xa3\xa3\xaa\xaa\xaa\xb4\xb4\xb4\xbd\xbe\xbd\xbb\xbc\xbb\xde\xde\xde\x9b\x9a\x9b\xfe\xfe\xfe\xf2\xf3\xf2\xe5\xe6\xe5\xd8\xd9\xd8\xd1\xd1\xd1\xc9\xca\xc9\xae\xae\xae\x80k\x98\xfc\x00\x00\x01TIDATx\x9c\xed\xdd;r\xc2P\x00\x04A!\x90\x84\xfc\x01\xee\x7fZ\x138\xb1\x13S\xceF\xaf\xfb\x06\x93o\xd5No\xef\x1f\x9f\xb7\xfb}]\xd7my\xba|;\xff4\xff\xdf\xf9O\x97W<\x96W\xac\xbfm\xd7i9\x1d\xdb\xfe,\x9c\x8e\xec4+\xac{\x16^\x14\xb6)\xecS\xd8\xa7\xb0Oa\x9f\xc2\xbe!\n\xcf\n\xdb\x14\xf6)\xecS\xd8\xa7\xb0Oa\x9f\xc2>\x85}\n\xfb\x14\xf6)\xecS\xd8\xa7\xb0Oa\x9f\xc2>\x85}C\x14\xce\n\xdb\x14\xf6)\xecS\xd8\xa7\xb0Oa\x9f\xc2>\x85}\n\xfb\x14\xf6)\xecS\xd8\xa7\xb0Oa\x9f\xc2>\x85}\n\xfb\x14\xf6)\xecS\xd87\xc4bHa\x9c\xc2>\x85}\n\xfb\x14\xf6)\xecS\xd8\xa7\xb0Oa\x9f\xc2>\x85}\n\xfb\x14\xf6)\xecS\xd8\xa7\xb0Oa\x9f\xc2>\x85}\n\xfb\x86xaQ\x18\xa7\xb0Oa\x9f\xc2>\x85}\n\xfb\x14\xf6)\xecS\xd8\xa7\xb0Oa\x9f\xc2>\x85}\n\xfb\x14\xf6)\xecS\xd87D\xe1\xe3\xf0\x85\x8b\xc26\x85}\n\xfb\x14\xf6)\xecS\xd8\xa7\xb0Oa\x9f\xc2>\x85}\n\xfb\x14\xf6)\xecS\xd8\xa7\xb0Oa\x9f\xc2>\x85}C\x14\xae\n\xdb\x14\xf6)\xecS\xd8\xa7\xb0Oa\x9f\xc2>\x85}C\x14n\xa7c\xdb\xa7\xeb>\x1f\xd9~\xfb\x02\xee\x7f\r\xe5\xe1h\x04"\x00\x00\x00\x00IEND\xaeB`\x82'
photo = Photo.from_bytes(byte_photo)
client.create_photo(photo);
```
%% Output
BULK: Writing 3/3 items/edges
Completed Bulk action, written 3 items/edges
%% Cell type:code id: tags:
``` python
# hide
# Test on a new client to prevent caching
new_client = PodClient(database_key=client.database_key, owner_key=client.owner_key)
res = new_client.get_photo(photo.id, size=225)
assert res.data == photo.data
```
%% Output
<class 'pymemri.data.photo.Photo'>
%% Cell type:markdown id: tags:
## Bulk API
%% Cell type:markdown id: tags:
Adding each item separately to the pod with the `create` method can take a lot of time. For this reason, using the bulk API is faster and more convenient in most cases. Here we show creating items and edges, updating and deleting is also possible.
%% Cell type:code id: tags:
``` python
# Create 100 Dogs to add to the pod, and two edges to a new person
dogs = [Dog(name=f"dog number {i}") for i in range(100)]
person = Person(firstName="Alice")
edge1 = Edge(dogs[0], person, "label")
edge2 = Edge(dogs[1], person, "label")
# Simultaneously add the dogs, person, and edges with the bulk API
success = client.bulk_action(create_items=dogs + [person], create_edges=[edge1, edge2])
```
%% Output
BULK: Writing 103/103 items/edges
Completed Bulk action, written 103 items/edges
%% Cell type:code id: tags:
``` python
# hide
dogs = client.search({"type": "Dog"})
dogs_with_edge = [item for item in dogs if len(item.get_all_edges())]
print(len(dogs_with_edge))
assert len(dogs_with_edge) == 2
for d in dogs_with_edge:
assert len(d.label) > 0
```
%% Output
2
%% Cell type:code id: tags:
``` python
# hide
# test bulk delete and update
# Change person name, delete first dog :(
person.firstName = "Bob"
to_delete = [dogs[0]]
to_update = [person]
client.bulk_action(delete_items=to_delete, update_items=to_update)
dogs_with_edge = [
item for item in client.search({"type": "Dog"}) if item.name.startswith("dog number 0") or item.name.startswith("dog number 1 ")
]
assert len(dogs_with_edge) == 1
dog = dogs_with_edge[0]
assert dog.label[0].firstName == "Bob"
```
%% Output
BULK: Writing 2/2 items/edges
Completed Bulk action, written 2 items/edges
%% Cell type:markdown id: tags:
# Sending emails -
%% Cell type:code id: tags:
``` python
# hide
# skip
to = "myemail@gmail.com"
client.send_email(to=to, subject="test", body="test2")
```
%% Output
succesfully sent email to myemail@gmail.com
True
%% Cell type:markdown id: tags:
# Create items that do not exist in the Pod
The `PodClient` can deduplicate items with the same externalId with the `create_if_external_id_not_exists` method.
%% Cell type:code id: tags:
``` python
person_item = Person(firstName="Eve", externalId="gmail_1")
person_item2 = Person(firstName="Eve2", externalId="gmail_1")
client.create_if_external_id_not_exists(person_item)
client.create_if_external_id_not_exists(person_item2)
existing = client.search({"externalId": "gmail_1"})
```
%% Cell type:code id: tags:
``` python
# hide
assert len(existing) == 1
```
%% Cell type:markdown id: tags:
# Export -
%% Cell type:code id: tags:
``` python
# hide
from nbdev.export import *
notebook2script()
```
%% Output
Converted basic.ipynb.
Converted cvu.utils.ipynb.
Converted data.photo.ipynb.
Converted exporters.query.ipynb.
Converted index.ipynb.
Converted itembase.ipynb.
Converted plugin.authenticators.credentials.ipynb.
Converted plugin.authenticators.oauth.ipynb.
Converted plugin.listeners.ipynb.
Converted plugin.pluginbase.ipynb.
Converted plugin.states.ipynb.
Converted plugins.authenticators.password.ipynb.
Converted pod.api.ipynb.
Converted pod.client.ipynb.
Converted pod.db.ipynb.
Converted pod.utils.ipynb.
Converted template.config.ipynb.
Converted template.formatter.ipynb.
Converted test_schema.ipynb.
Converted test_utils.ipynb.
......
......@@ -6,8 +6,6 @@ index = {"read_file": "basic.ipynb",
"read_json": "basic.ipynb",
"write_json": "basic.ipynb",
"unzip": "basic.ipynb",
"resize": "data.photo.ipynb",
"get_size": "data.photo.ipynb",
"Path.ls": "basic.ipynb",
"PYI_HOME": "basic.ipynb",
"PYI_TESTDATA": "basic.ipynb",
......@@ -17,8 +15,8 @@ index = {"read_file": "basic.ipynb",
"CVU_BASE_PATH": "cvu.utils.ipynb",
"get_default_cvu": "cvu.utils.ipynb",
"list_default_cvus": "cvu.utils.ipynb",
"DEFAULT_ENCODING": "data.photo.ipynb",
"show_images": "data.photo.ipynb",
"get_height_width_channels": "data.photo.ipynb",
"Photo": "data.photo.ipynb",
"Query": "exporters.query.ipynb",
"ALL_EDGES": "itembase.ipynb",
......
# AUTOGENERATED! DO NOT EDIT! File to edit: nbs/basic.ipynb (unless otherwise specified).
__all__ = ['read_file', 'read_json', 'write_json', 'unzip', 'resize', 'get_size', 'PYI_HOME', 'PYI_TESTDATA',
'HOME_DIR', 'MODEL_DIR', 'MEMRI_S3']
__all__ = ['read_file', 'read_json', 'write_json', 'unzip', 'PYI_HOME', 'PYI_TESTDATA', 'HOME_DIR', 'MODEL_DIR',
'MEMRI_S3']
# Cell
from ..imports import *
......@@ -9,7 +9,6 @@ from urllib.request import urlretrieve
import requests
from tqdm import tqdm
import zipfile
import cv2
# Cell
Path.ls = lambda x: list(x.iterdir())
......@@ -34,14 +33,4 @@ def write_json(obj, fname, indent=4):
def unzip(f, dest):
with zipfile.ZipFile(str(f)) as zf:
zf.extractall(str(dest))
def resize(img, maxsize):
size = get_size(img, maxsize)
return cv2.resize(img, dsize=size, interpolation=cv2.INTER_CUBIC)
def get_size(img, maxsize):
s = img.shape
assert len(s) > 1
div = max(s) / maxsize
return (int(s[1]//div), int(s[0]//div))
\ No newline at end of file
zf.extractall(str(dest))
\ No newline at end of file
# AUTOGENERATED! DO NOT EDIT! File to edit: nbs/data.photo.ipynb (unless otherwise specified).
__all__ = ['show_images', 'get_size', 'resize', 'get_height_width_channels', 'Photo']
__all__ = ['DEFAULT_ENCODING', 'show_images', 'Photo']
# Cell
from .schema import Item
......@@ -13,7 +13,6 @@ from matplotlib import patches
from matplotlib.collections import PatchCollection
from numpy.linalg import norm
from hashlib import sha256
import cv2
import matplotlib.pyplot as plt
import math
import numpy as np
......@@ -23,7 +22,7 @@ from hashlib import sha256
from typing import Any
# Cell
NUMPY, BYTES = "numpy", "bytes"
DEFAULT_ENCODING = "PNG"
# Cell
def show_images(images, cols = 3, titles = None):
......@@ -42,46 +41,33 @@ def show_images(images, cols = 3, titles = None):
fig.set_size_inches(np.array(fig.get_size_inches()) * n_images)
plt.show()
def get_size(img, maxsize):
s = img.shape
assert len(s) > 1
div = max(s) / maxsize
return (int(s[1]//div), int(s[0]//div))
def resize(img, maxsize):
size = get_size(img, maxsize)
return cv2.resize(img, dsize=size, interpolation=cv2.INTER_CUBIC)
def get_height_width_channels(img):
s = img.shape
if len(s) == 2: return s[0], s[1], 1
else: return img.shape
# Cell
class Photo(Item):
properties = Item.properties + ["width", "height", "channels", "encoding"]
properties = Item.properties + ["width", "height", "channels", "encoding", "mode"]
edges = Item.edges + ["file"]
def __init__(
self,
data: Any=None,
includes: Any=None,
thumbnail: Any=None,
includes: Any=None, # TODO
thumbnail: Any=None, # TODO
height: int=None,
width: int=None,
channels: int=None,
encoding: str=None,
mode: str=None,
file: list=None,
_file_created: bool=False,
**kwargs
):
super().__init__(**kwargs)
self.private = ["data", "embedding", "path"]
self.private = ["data", "embedding", "path"] #TODO
self.height = height
self.width = width
self.channels = channels
self.encoding = encoding
self.mode = mode
self.file = file if file is not None else []
self.data = data
self._file_created = _file_created
......@@ -95,28 +81,40 @@ class Photo(Item):
fig.set_size_inches((6, 6))
plt.show()
@property
def size(self):
return self.width, self.height
@classmethod
def from_data(cls, *args, **kwargs):
res = super().from_data(*args, **kwargs)
if res.file:
res.file[0]
res.file[0] # TODO
return res
@classmethod
def from_path(cls, path, size=None):
data = cv2.imread(str(path))
res = cls.from_np(data, size)
pil_image = Image.open(path)
encoding, mode, shape = cls.infer_PIL_metadata(pil_image)
w, h, c = shape
_bytes = cls.PIL_to_bytes(pil_image, encoding)
res = cls(data=_bytes, height=h, width=w, channels=c, encoding=encoding, mode=mode)
file = File.from_data(sha256=sha256(_bytes).hexdigest())
res.add_edge("file", file)
return res
@classmethod
def from_np(cls, data, size=None, *args, **kwargs):
pil_image = Image.fromarray(data)
if size is not None:
data = resize(data, size)
h, w, c = get_height_width_channels(data)
res = cls(
data=data, height=h, width=w, channels=c, encoding=NUMPY, *args, **kwargs
)
file = File.from_data(sha256=sha256(data.tobytes()).hexdigest())
pil_image = pil_image.resize(size)
encoding, mode, shape = cls.infer_PIL_metadata(pil_image)
w, h, c = shape
_bytes = cls.PIL_to_bytes(pil_image, encoding)
res = cls(data=_bytes, height=h, width=w, channels=c, encoding=encoding, mode=mode)
file = File.from_data(sha256=sha256(_bytes).hexdigest())
res.add_edge("file", file)
return res
......@@ -124,6 +122,24 @@ class Photo(Item):
def from_bytes(cls, _bytes):
image_stream = io.BytesIO(_bytes)
pil_image = Image.open(image_stream)
encoding, mode, shape = cls.infer_PIL_metadata(pil_image)
w, h, c = shape
res = cls(data=_bytes, height=h, width=w, channels=c, encoding=encoding, mode=mode)
file = File.from_data(sha256=sha256(_bytes).hexdigest())
res.add_edge("file", file)
return res
@staticmethod
def PIL_to_bytes(pil_image, encoding):
byte_io = io.BytesIO()
pil_image.save(byte_io, encoding)
return byte_io.getvalue()
@staticmethod
def infer_PIL_metadata(pil_image):
encoding = pil_image.format or DEFAULT_ENCODING
mode = pil_image.mode
size = pil_image.size
if len(size) == 3:
w, h, c = size
......@@ -132,8 +148,14 @@ class Photo(Item):
c = 1
else:
raise Error
return encoding, mode, (w, h, c)
res = cls(data=_bytes, height=h, width=w, encoding=BYTES, channels=c)
file = File.from_data(sha256=sha256(_bytes).hexdigest())
res.add_edge("file", file)
return res
\ No newline at end of file
def to_PIL(self):
if self.data is None:
raise ValueError("Photo object has no data")
return Image.open(io.BytesIO(self.data))
def to_np(self):
pil_img = self.to_PIL()
return np.asarray(pil_img)
\ No newline at end of file
import random, string
from .itembase import ItemBase, Edge, Item
from .central_schema import *
from .photo import Photo
def get_constructor(_type, plugin_class=None, plugin_package=None, extra=None):
......
......@@ -6,6 +6,7 @@ __all__ = ['Query']
# hide
from typing import Dict, List, Optional, Iterable, Any
import pandas as pd
import json
from ..pod.client import PodClient
from ..data.itembase import Item
......@@ -65,24 +66,18 @@ class Query:
prop = prop[-1]
return edges, prop
@staticmethod
def convert_dtype(result, dtype):
def convert_dtype(self, result, dtype):
if dtype == "dict":
return result
elif dtype == "list":
return [result[prop] for prop in self.properties]
elif dtype in {"pandas", "pd", "df"}:
return pd.DataFrame.from_dict(result)
else:
raise ValueError(f"Unknown dtype: {dtype}")
def execute(
self, client: PodClient, items: List[Item], dtype="dict", include_ids=True
) -> Any:
if "id" not in self.properties and include_ids:
properties = ["id"] + self.properties
else:
properties = self.properties
def execute(self, client: PodClient, items: List[Item], dtype="dict") -> Any:
result = {
prop: self.get_property_values(client, prop, items) for prop in properties
prop: self.get_property_values(client, prop, items) for prop in self.properties
}
return self.convert_dtype(result, dtype)
\ No newline at end of file
......@@ -6,7 +6,6 @@ __all__ = ['PodClient', 'Dog']
from ..data.basic import *
from ..data.schema import *
from ..data.itembase import Edge, ItemBase, Item
from ..data.photo import Photo, NUMPY, BYTES
from ..imports import *
from hashlib import sha256
from .db import DB
......@@ -102,10 +101,7 @@ class PodClient:
return False
def create_photo(self, photo):
# create the file
# file_success = self.create_photo_file(photo)
file = photo.file[0]
# self.create(file)
# create the photo
items_edges_success = self.bulk_action(
......@@ -201,25 +197,9 @@ class PodClient:
f"Could not load data of {photo} attached file item does not have data in pod"
)
return
if photo.encoding == NUMPY:
data = np.frombuffer(file, dtype=np.uint8)
c = photo.channels
shape = (
(photo.height, photo.width, c)
if c is not None and c > 1
else (photo.height, photo.width)
)
data = data.reshape(shape)
if size is not None:
data = resize(data, size)
photo.data = data
return
elif photo.encoding == BYTES:
photo.data = file
return
else:
raise ValueError("Unsupported encoding")
print(f"could not load data of {photo}, no file attached")
photo.data = file
else:
print(f"could not load data of {photo}, no file attached")
def create_if_external_id_not_exists(self, node):
if not self.external_id_exists(node):
......
......@@ -16,7 +16,7 @@ custom_sidebar = True
license = apache2
status = 2
console_scripts = run_plugin=pymemri.plugin.pluginbase:run_plugin simulate_run_plugin_from_frontend=pymemri.plugin.pluginbase:simulate_run_plugin_from_frontend store_keys=pymemri.plugin.pluginbase:store_keys qr_simulator=pymemri.client_simulator.qr_simulator:run_qr_simulator simulate_enter_credentials=pymemri.plugin.authenticators.password:simulate_enter_credentials plugin_from_template=pymemri.template.formatter:plugin_from_template create_plugin_config=pymemri.template.config:create_plugin_config
requirements = requests tqdm ipdb fastprogress fastscript opencv-python fastcore==1.3.21 nbdev==1.1.21 matplotlib jupyter-client==6.1.12 flask giturlparse==0.10.0 pandas
requirements = requests tqdm ipdb fastprogress fastscript fastcore==1.3.21 nbdev==1.1.21 matplotlib jupyter-client==6.1.12 flask giturlparse==0.10.0 pandas pillow==8.4.0
nbs_path = nbs
doc_path = docs
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment