Commit c829a264 authored by Youp's avatar Youp
Browse files

- Started on Evernote conversion and created a basic PodClient

parent b05e45fc
Showing with 584 additions and 44 deletions
+584 -44
%% Cell type:code id: tags:
``` python
%reload_ext autoreload
%autoreload
```
%% Cell type:code id: tags:
``` python
# export pod.client
```
%% Cell type:code id: tags:
``` python
import hashlib
import json
import os
import requests
import hashlib
import json
import os
import requests
class PodClient:
def __init__(self, run_item=None):
if 'POD_FULL_ADDRESS' in os.environ:
self.pod_full_address = os.environ['POD_FULL_ADDRESS']
print(f'Running with POD_FULL_ADDRESS (from env variable) : {self.pod_full_address}')
else:
self.pod_full_address = 'https://0.0.0.0:3030'
print(f'Running with POD_FULL_ADDRESS (from default) as {self.pod_full_address}')
if run_item == None:
# TODO: parse run variables from env variable once exact
# name is known (likely to be RUN_ITEM)
# self.run_item = json.loads(self.get_run_item())
print('RUN_VARIABLES are taken from environment variable')
else:
self.run_item = run_item
print('RUN_VARIABLES are injected/side-loaded')
self.uid = self.run_item['uid']
print('RUN_VARIABLES are ', self.run_item)
self.client_payload = json.loads(self.get_client_payload())
self.database_key = self.client_payload['databaseKey']
self.owner_key = self.client_payload['ownerKey']
def get_run_variable(self, variable):
return self.run_item[variable]
# def get_run_item(self):
# json_object = {
# 'uid': 'xxx'
# }
# if False:
# return json.dumps(json_object)
# else:
# settings_file = open('local_settings.json', 'r')
# return settings_file.read()
def get_client_payload(self):
# TODO: parse env variable once exact
# name is known (likely to be CLIENT_PAYLOAD)
json_object = {
'databaseKey': 'database_key',
'ownerKey': 'owner_key',
}
return json.dumps(json_object)
def create_item(self, item, item_type=None):
if item_type != None:
item['_type'] = item_type
wrapped_item = {
'databaseKey': self.database_key,
'payload': item,
}
result = requests.post(f'{self.pod_full_address}/v2/{self.owner_key}/create_item',
json.dumps(wrapped_item),
verify=False)
if result.ok:
return result.json()
else:
# TODO: better exception handling
raise Exception(result.status_code, result.text)
def get_item(self, uid):
wrapped_item = {
'databaseKey': self.database_key,
'payload': uid,
}
result = requests.post(f'{self.pod_full_address}/v2/{self.owner_key}/get_item',
json.dumps(wrapped_item),
verify=False)
if result.ok:
# TODO: assumes the pod returns an array of length 1
result_json = result.json()
if len(result_json) == 0:
raise Exception(f'item {uid} does not exist')
else:
return result_json[0]
else:
# TODO: better exception handling
raise Exception(result.status_code, result.text)
def upload_file(self, file):
hash = hashlib.sha256(file).hexdigest()
# TODO: check if creation was succesful
file_item = {
'sha256': hash
}
self.create_item(file_item, item_type='File')
result = requests.post(f'{self.pod_full_address}/v2/{self.owner_key}/upload_file/{self.database_key}/{hash}',
file,
verify=False)
if result.ok:
return hash
else:
raise Exception(result.status_code, result.text)
def get_file(self, hash):
wrapped_item = {
'databaseKey': self.database_key,
'payload': {
'sha256': hash,
}
}
result = requests.post(f'{self.pod_full_address}/v2/{self.owner_key}/get_file',
json.dumps(wrapped_item),
verify=False)
if result.ok:
return result.content
else:
raise Exception(result.status_code, result.text)
def set_progress(self, new_progress):
print('Progress of {} is now {}'.format(self.uid, new_progress))
```
%% Output
File "<ipython-input-6-16aa1692fdef>", line 29
def get_run_variable(variable):
^
IndentationError: expected an indented block
%% Cell type:code id: tags:
``` python
import shutil
from pathlib import Path
class DryRunPodClient(PodClient):
def __init__(self, run_item=None):
PodClient.__init__(self, run_item=run_item)
# if Path('local/files').is_dir():
# shutil.rmtree('local/files')
# if Path('local/nodes').is_dir():
# shutil.rmtree('local/nodes')
# os.mkdir('local/files')
# os.mkdir('local/nodes')
def create_item(self, item, item_type=None):
# if item_type != None:
# item['_type'] = item_type
# f = open('local/nodes/{}.json'.format(item['external_id']), "w")
# f.write(json.dumps(item))
# f.close()
print("Create ITEM {}".format(item['external_id']))
def upload_file(self, file):
hash = hashlib.sha256(file).hexdigest()
# f = open('local/files/{}.{}'.format(hash, 'jpg'), "wb")
# f.write(file)
# f.close()
print("Uploaded FILE {}".format(hash))
```
%% Output
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-1-8f0ed8f9d6ba> in <module>
2 from pathlib import Path
3
----> 4 class DryRunPodClient(PodClient):
5 def __init__(self, run_item=None):
6 PodClient.__init__(self, run_item=run_item)
NameError: name 'PodClient' is not defined
%% Cell type:code id: tags:
``` python
#default_exp pod.client
```
%% Cell type:code id: tags:
``` python
#export
class Client():
pass
```
%% Cell type:code id: tags:
``` python
assert 1 == 1
```
%% Cell type:code id: tags:
``` python
x = Client()
```
%% Cell type:code id: tags:
``` python
from nbdev import *
notebook2script()
```
%% Output
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-7-6e60732a8a52> in <module>
1 from nbdev import *
----> 2 notebook2script()
NameError: name 'notebook2script' is not defined
%% Cell type:code id: tags:
``` python
```
......
%% Cell type:code id: tags:
``` python
%reload_ext autoreload
%autoreload
```
%% Cell type:code id: tags:
``` python
from evernote.api.client import EvernoteClient
import evernote.edam.notestore.NoteStore as NoteStore
import evernote.edam.userstore.constants as UserStoreConstants
import os
import json
import shutil
from pathlib import Path
import xml.etree.ElementTree as ET
import unittest
import argparse
from integrators import *
```
%% Output
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
<ipython-input-38-5c8a331fdbbe> in <module>
9 import unittest
10 import argparse
---> 11 from integrators import *
ModuleNotFoundError: No module named 'integrators'
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
mime_to_filetype = {'image/png': 'png',
'image/gif': 'gif',
'image/jpeg': 'jpg',
'application/pdf': 'pdf',
'audio/mpeg': 'mp3',
'audio/x-m4a': 'm4a'}
def clean_html(original_html):
try:
root = ET.fromstring(original_html)
root.tag = 'div'
def iterator(parent, nested=False):
for child in reversed(parent):
if nested:
if len(child) >= 1:
iterator(child, nested=True)
if child.tag == 'en-media' or child.tag == 'en-crypt':
parent.remove(child)
elif child.tag == 'en-todo':
if child.tail is None:
child.tail = ''
parent.text = 'TODO ' + child.tail
parent.remove(child)
iterator(root, nested=True)
return ET.tostring(root).decode('utf-8')
except:
return ""
def create_token(use_oauth_flow, sandbox):
if not use_oauth_flow:
# Use a developer token for authentication
try:
credential_file = open('evernote/dev_token', 'r')
token = credential_file.read()
except:
print('ERROR: file "dev_token" seems to be missing,')
exit(1)
elif os.path.isfile('evernote/oauth_token'):
# Use saved OAuth token
credential_file = open('evernote/oauth_token', 'r')
token = credential_file.read()
else:
# Request OAuth token
try:
consumer_key = 'memri'
consumer_secret_file = open('evernote/consumer_secret', 'r')
consumer_secret = consumer_secret_file.read()
except:
print('ERROR: file "consumer_secret" seems to be missing')
exit(1)
request_token_url = 'https://evernote.com/oauth'
client_1 = EvernoteClient(consumer_key=consumer_key, consumer_secret=consumer_secret, sandbox=sandbox)
request_token = client_1.get_request_token('https://www.memri.io/')
# TODO manual copy of oauth_verifier to terminal should be replaced with a callback url that
# directs to a simple server on the pod
print(client_1.get_authorize_url(request_token))
print("1) Follow the link above (Ctrl-click in most terminals)\n"
"2) Login if you're note already, and (re)authorize the app\n"
"3) Copy the oauth_verifier, i.e. the part from the redirect url between '&oauth_verifier=' and "
"'&sandbox_lnb=false'")
oauth_verifier = input("Please enter oauth_verifier: ")
client_2 = EvernoteClient(consumer_key=consumer_key, consumer_secret=consumer_secret, sandbox=sandbox)
access_token = client_2.get_access_token(request_token['oauth_token'],
request_token['oauth_token_secret'],
oauth_verifier,
return_full_dict=False)
token = access_token
credential_file = open('evernote/oauth_token', 'w')
credential_file.write(token)
return token
```
%% Cell type:code id: tags:
``` python
def download_notes_and_resources(pod_client, note_store, notebook, notes, download_resources):
for raw_note in notes:
# print("- NOTE %s :: %s" % (raw_note.title, raw_note.guid))
print("- NOTE :: %s" % raw_note.guid)
note_content = note_store.getNote(raw_note.guid, True, False, False, False)
if note_content.resources is None:
note_content.resources = []
if note_content.tagGuids is None:
note_content.tagGuids = []
if download_resources:
for resource in note_content.resources:
resource_content = note_store.getResource(resource.guid, True, False, False, False)
try:
file_suffix = mime_to_filetype[resource_content.mime]
except:
print("File type not found: ".format(resource_content.mime))
file_suffix = "tbd"
print("-- RESOURCE: %s (%s)" % (resource_content.data.bodyHash.hex(), resource_content.mime))
pod_client.upload_file(bytearray(resource_content.data.body))
note_is_deleted = note_content.deleted is not None
json_object = {'external_id': note_content.guid,
'_notebooks': [notebook.name],
'_creator': "user",
'_id': note_content.guid,
'title': note_content.title,
'content': clean_html(note_content.content),
'deleted': note_content.deleted is not None,
'dateCreated': int(note_content.created / 1000),
'dateModified': int(note_content.updated / 1000),
# '_tags': list(map(lambda x: tag_names[x], note_content.tagGuids))
'_tags': note_content.tagGuids
}
pod_client.create_item(json_object, item_type='Note')
def main(pod_client):
sandbox = pod_client.get_run_variable('use_sandbox')
download_resources = pod_client.get_run_variable('download_resources')
client = EvernoteClient(token=pod_client.get_run_variable('token'),
sandbox=sandbox,
china=False)
user_store = client.get_user_store()
version_ok = user_store.checkVersion("Evernote EDAMTest (Python)",
UserStoreConstants.EDAM_VERSION_MAJOR,
UserStoreConstants.EDAM_VERSION_MINOR)
print("Is my Evernote API version up to date? ", str(version_ok))
print("For user : " + str(user_store.getUser()))
note_store = client.get_note_store()
tags = note_store.listTags()
print("Found ", len(tags), " tags:")
tag_names = dict()
for tag in tags:
tag_names[tag.guid] = tag.name
json_object = {
'external_id': tag.guid,
'name': tag.name
}
pod_client.create_item(json_object, item_type='Tag')
print("tag dictionary", tag_names)
note_contents = []
notebooks = note_store.listNotebooks()
print("Found ", len(notebooks), " notebooks:")
for (i, notebook) in enumerate(notebooks):
search = NoteStore.NoteFilter()
search.ascending = False
search.notebookGuid = notebooks[i].guid
spec = NoteStore.NotesMetadataResultSpec()
spec.includeTitle = True
spec.includeNotebookGuid = True
spec.includeTagGuids = True
print("NOTEBOOK: {}".format(notebook.guid))
num_notes_per_fetch = 25
num_notes_fetched = 0
while True:
print('download {} notes starting at {}'.format(num_notes_per_fetch, num_notes_fetched))
result = note_store.findNotesMetadata(search, num_notes_fetched, num_notes_per_fetch, spec)
download_notes_and_resources(pod_client, note_store, notebook, result.notes, download_resources)
num_notes_fetched += num_notes_per_fetch
if num_notes_fetched >= result.totalNotes:
break
# WARNING: create_token() needs a dev_token/consumer_secret which should stay secret
# These are the settings for this importer, would normally be set by the GUI/app/client
run_item = {
"uid": 10,
"download_resources": True,
"use_sandbox": True,
"token": create_token(True, True)
}
pod_client = DryRunPodClient(run_item=run_item)
main(pod_client)
```
%% Output
Running with POD_FULL_ADDRESS (from default) as https://0.0.0.0:3030
RUN_VARIABLES are injected/side-loaded
RUN_VARIABLES are {'uid': 10, 'download_resources': True, 'use_sandbox': True, 'token': 'S=s1:U=95d89:E=17b78bc8cac:C=174210b5d38:P=185:A=memri:V=2:H=d90fca24c6b8e0fb4c9276040ae61029'}
Is my Evernote API version up to date? True
For user : User(id=613769, username='gipraruloc', email=None, name=None, timezone=None, privilege=1, created=None, updated=None, deleted=None, active=True, shardId='s1', attributes=None, accounting=Accounting(uploadLimit=62914560, uploadLimitEnd=1600585200000, uploadLimitNextMonth=62914560, premiumServiceStatus=0, premiumOrderNumber=None, premiumCommerceService=None, premiumServiceStart=None, premiumServiceSKU=None, lastSuccessfulCharge=None, lastFailedCharge=None, lastFailedChargeReason=None, nextPaymentDue=None, premiumLockUntil=None, updated=None, premiumSubscriptionNumber=None, lastRequestedCharge=None, currency=None, unitPrice=None, businessId=None, businessName=None, businessRole=None, unitDiscount=None, nextChargeDate=None), premiumInfo=None, businessUserInfo=None)
Found 2 tags:
Create ITEM 68d4e3f5-a99f-40cd-bb8a-0aa0f35bd16c
Create ITEM a06b7acb-97d1-471e-b41b-6a3d4a5ac1ae
tag dictionary {'68d4e3f5-a99f-40cd-bb8a-0aa0f35bd16c': 'taggie', 'a06b7acb-97d1-471e-b41b-6a3d4a5ac1ae': 'help'}
Found 2 notebooks:
NOTEBOOK: 566b9853-cb65-4753-a9ee-df88576cfa88
download 25 notes starting at 0
- NOTE :: 34741222-3eda-45fa-ad3a-53f5f27a58e0
Create ITEM 34741222-3eda-45fa-ad3a-53f5f27a58e0
- NOTE :: 9350d56e-5ee1-495c-afe3-84c050d9cfdb
-- RESOURCE: 021d549060c5045bea9f1c05f6881c25 (image/png)
Uploaded FILE 5d33b73cdda94664b94f1e9f2a1629a4e2f75def46915404dbf83a2655345c6c
-- RESOURCE: 1702eccdc04f5c6aa63e9b760342d33d (image/gif)
Uploaded FILE e36d4050b48da8280a8be93ad2d985b0597ddb3239c7a6daf174b731a1a28d0d
-- RESOURCE: 2d12b1789d88c42fde1679b77f160708 (image/gif)
Uploaded FILE 4374c63745285172981ec53aeaab833acd6629edb2bde67c9a256e746006e882
-- RESOURCE: a0dcfd0624612564194581cb440ea47a (image/png)
Uploaded FILE 649043a31f945ffd22f871215f2afac71c8006f9bdb52e148b0137872ccf7cc5
-- RESOURCE: cdfffdefe2600c151943444dd13f1db9 (image/png)
Uploaded FILE a3394e33bdb6e112b4acaa5817e25ac4fd038d3ba951922093395110553a6b77
-- RESOURCE: d22ed3c188020967c32a14484b20b2cb (image/gif)
Uploaded FILE bceb8406ab3789e341be7df45184ccde9fda48efa4e58353332da1dd7034a4f1
Create ITEM 9350d56e-5ee1-495c-afe3-84c050d9cfdb
- NOTE :: 3d82606f-c5a6-4f49-b07d-a34cd9e724af
-- RESOURCE: 141190f07a90235f5bdde44d7c9b5dc9 (image/png)
Uploaded FILE a9fb5cd5a7dec22feca6d35011ef1246a71de79cfa486a2201d0df1ca77266e7
-- RESOURCE: 3a36536d4aefcd078cfcca9e47be8ad3 (application/pdf)
Uploaded FILE ce37b793b7c776d591eeac468625e5458625aea53055473c53d5bd171c87197f
-- RESOURCE: 730a3201895b92fe31f604b447fb62d4 (audio/mpeg)
Uploaded FILE b782271fb21ffb95d2ffde9cad5ff20a741151643ce1fff4fc160f3a481b9c7c
-- RESOURCE: a2a50c9d6abb3f1f19c9d001f771d942 (image/jpeg)
Uploaded FILE 1932d9eb6001caa39925ee6019f64699fadcc347b32f7c8101014dc2159e695a
-- RESOURCE: bb72e42078b736feed42c071cf7aa9ea (image/jpeg)
Uploaded FILE ca3a7dc6283a0dda311064047123f94f1ac4176a8a6b2babae0df6c8202412a9
-- RESOURCE: bf56c09a4f3d5cf381707e3382e3981f (image/png)
Uploaded FILE 2cd7a7f9ba006b0aafbeb76cc008638abc6f7b1a9fef7bf45ce0187ab615b22c
-- RESOURCE: e1a7be72b892022858656d5b54cc2215 (image/gif)
Uploaded FILE 51353dcaab885132b9a20d65c3cfd98cd78ee99cae6417c8f4aae34af2bec946
-- RESOURCE: c4876d9528a4db3c1f207c2213c61747 (image/png)
Uploaded FILE 70c6db37ce1c259ecc72bd6036c1b1111ee6880342032ddb9e21469b92714e42
-- RESOURCE: adb0a4d93d5bb31bd7023d1d86f9281f (image/png)
Uploaded FILE 622d42d1fcc2c57152de15521f54375d3c47a768e67cec8bfe00e349e63e6cdf
Create ITEM 3d82606f-c5a6-4f49-b07d-a34cd9e724af
- NOTE :: 60319654-6210-471e-af8b-b8de8fc2e56b
-- RESOURCE: 11a7bc2ed7ea229df6576921b625c8e0 (image/png)
Uploaded FILE af10caafba18b832e596c4f1b548f9592b58691af4c00adf840a1476577ba387
-- RESOURCE: 1497dc9b85d194badd130fddc7d1ed9f (image/png)
Uploaded FILE 78b7c413aa8c01155a2ed3105101b5230fe89b80a836d8f11922c96f909de56e
Create ITEM 60319654-6210-471e-af8b-b8de8fc2e56b
NOTEBOOK: 272aa8d6-0ba9-4a23-91b5-9c56664951fc
download 25 notes starting at 0
- NOTE :: f9ce0ea6-286a-4cb8-90a2-6021194dafd9
Create ITEM f9ce0ea6-286a-4cb8-90a2-6021194dafd9
%% Cell type:code id: tags:
``` python
#hide
from nbdev_template.core import *
from integrators import *
```
%% Cell type:markdown id: tags:
# Integrators
> Integrators integrate your information in the pod. They import your data from external services (gmail, whatsapp, icloud, facebook etc.), enrich your data with indexers (face recognition, spam detection, duplicate photo detection), and execute actions (sending mails, automatically share selected photo's with your family).
%% Cell type:markdown id: tags:
Integrators for memri have a single repo per language. This repo contains all python integrators. This repo is build with [nbdev](https://github.com/fastai/nbdev).
%% Cell type:markdown id: tags:
## Install
%% Cell type:markdown id: tags:
`pip install -e integrators`
%% Cell type:markdown id: tags:
## How to use
%% Cell type:markdown id: tags:
Fill me in please! Don't forget code examples:
%% Cell type:code id: tags:
``` python
1+1
```
%% Output
2
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment