Commit c523da55 authored by Koen van der Veen's avatar Koen van der Veen
Browse files

init evernote

parent 5346577f
Showing with 365 additions and 1 deletion
+365 -1
# evernote
# evernote plugin
This plugin imports your evernote notes via the python evernote sdk. It uses oauth for login
# Usage
Once authenticated, one can use the api as follows:
```
notebooks = note_store.listNotebooks()
for (i, notebook) in enumerate(notebooks):
(notes, files) = download_notebook(note_store, notebook.guid)
for note in notes:
print(note)
for (uid, resource) in files.items():
(raw, file_type) = download_resource(note_store, resource)
print(raw)
```
%% Cell type:code id: tags:
``` python
from evernote.api.client import EvernoteClient
import evernote.edam.notestore.NoteStore as NoteStore
import evernote.edam.userstore.constants as UserStoreConstants
from pathlib import Path
import os
import json
import xml.etree.ElementTree as ET
```
%% Cell type:markdown id: tags:
# Evernote Importer WIP
%% Cell type:code id: tags:
``` python
# !pip install git+https://github.com/evernote/evernote-sdk-python3
```
%% Cell type:code id: tags:
``` python
# pip install oauth2
```
%% Cell type:code id: tags:
``` python
mime_to_filetype = {'image/png': 'png',
'image/gif': 'gif',
'image/jpeg': 'jpg',
'application/pdf': 'pdf',
'audio/mpeg': 'mp3',
'audio/x-m4a': 'm4a'}
```
%% Cell type:code id: tags:
``` python
def create_token(use_oauth_flow, sandbox):
# EVERNOTE_PATH = HOME_DIR / '.memri' / 'evernote/'
EVERNOTE_PATH = Path.cwd()
if not use_oauth_flow:
# Use a developer token for authentication
try:
credential_file = open(EVERNOTE_PATH / 'dev_token', 'r')
token = credential_file.read()
except:
print('ERROR: file "dev_token" seems to be missing,')
exit(1)
elif os.path.isfile(EVERNOTE_PATH / 'oauth_token'):
# Use saved OAuth token
credential_file = open(EVERNOTE_PATH / 'oauth_token', 'r')
token = credential_file.read()
else:
# Request OAuth token
try:
consumer_key = 'memri'
consumer_secret_file = open(EVERNOTE_PATH / 'consumer_secret', 'r')
consumer_secret = consumer_secret_file.read()
except:
print('ERROR: file "consumer_secret" seems to be missing')
exit(1)
request_token_url = 'https://evernote.com/oauth'
client_1 = EvernoteClient(consumer_key=consumer_key, consumer_secret=consumer_secret, sandbox=sandbox)
request_token = client_1.get_request_token('https://www.memri.io/')
# TODO manual copy of oauth_verifier to terminal should be replaced with a callback url that
# directs to a simple server on the pod
print(client_1.get_authorize_url(request_token))
print("1) Follow the link above (Ctrl-click in most terminals)\n"
"2) Login if you're note already, and (re)authorize the app\n"
"3) Copy the oauth_verifier, i.e. the part from the redirect url between '&oauth_verifier=' and "
"'&sandbox_lnb=false'")
oauth_verifier = input("Please enter oauth_verifier: ")
client_2 = EvernoteClient(consumer_key=consumer_key, consumer_secret=consumer_secret, sandbox=sandbox)
access_token = client_2.get_access_token(request_token['oauth_token'],
request_token['oauth_token_secret'],
oauth_verifier,
return_full_dict=False)
token = access_token
credential_file = open(EVERNOTE_PATH / 'oauth_token', 'w')
credential_file.write(token)
return token
```
%% Cell type:code id: tags:
``` python
def download_notes_and_resources(note_store, notes):
return_notes = []
return_files = {}
for raw_note in notes:
# print("- NOTE %s :: %s" % (raw_note.title, raw_note.guid))
print("- NOTE :: %s" % raw_note.title)
note_content = note_store.getNote(raw_note.guid, True, False, False, False)
if note_content.resources is None:
note_content.resources = []
if note_content.tagGuids is None:
note_content.tagGuids = []
for resource in note_content.resources:
#resource_content = note_store.getResource(resource.guid, True, False, False, False)
return_files[resource.guid] = resource
note_is_deleted = note_content.deleted is not None
json_object = {'externalId': note_content.guid,
# '_notebooks': [notebook.name],
# '_creator': "user",
# '_id': note_content.guid,
'title': note_content.title,
'content': convert_html(note_content.content),
'deleted': note_is_deleted,
'dateCreated': int(note_content.created / 1000),
'dateModified': int(note_content.updated / 1000),
# '_tags': list(map(lambda x: tag_names[x], note_content.tagGuids))
# '_tags': note_content.tagGuids
}
return_notes.append(json_object)
return (return_notes, return_files)
```
%% Cell type:code id: tags:
``` python
def get_all_tags(note_store):
tags = note_store.listTags()
print("Found ", len(tags), " tags:")
tag_names = dict()
for tag in tags:
tag_names[tag.guid] = tag.name
json_object = {
'externalId': tag.guid,
'name': tag.name
}
return tag_names
```
%% Cell type:code id: tags:
``` python
def convert_html(original_html):
try:
root = ET.fromstring(original_html)
root.tag = 'div'
def iterator(parent, nested=False):
for child in reversed(parent):
if nested:
if len(child) >= 1:
iterator(child, nested=True)
if child.tag == 'en-media' or child.tag == 'en-crypt':
parent.remove(child)
elif child.tag == 'en-todo':
if child.tail is None:
child.tail = ''
parent.text = 'TODO ' + child.tail
parent.remove(child)
iterator(root, nested=True)
return ''.join([ET.tostring(child).decode('utf-8') for child in root])
except:
return ""
```
%% Cell type:code id: tags:
``` python
def download_resource(note_store, resource):
resource_content = note_store.getResource(resource.guid, True, False, False, False)
try:
file_suffix = mime_to_filetype[resource_content.mime]
except:
print("File type not found: ".format(resource_content.mime))
file_suffix = "tbd"
print("-- RESOURCE: %s (%s)" % (resource_content.data.bodyHash.hex(), resource_content.mime))
return resource_content.data.body, file_suffix
# return_files[f'{resource_content.data.bodyHash.hex()}.{file_suffix}'] = resource_content
```
%% Cell type:code id: tags:
``` python
def download_notebook(note_store, notebook_uid):
search = NoteStore.NoteFilter()
search.ascending = False
search.notebookGuid = notebook_uid
spec = NoteStore.NotesMetadataResultSpec()
spec.includeTitle = True
spec.includeNotebookGuid = True
spec.includeTagGuids = True
num_notes_per_fetch = 25
num_notes_fetched = 0
notes_in_notebook = []
files_in_notebook = {}
# This loop exists because there is a limit to the amount of notes we can request at a time, so we iterate
while True:
print('download {} notes starting at {}'.format(num_notes_per_fetch, num_notes_fetched))
result = note_store.findNotesMetadata(search, num_notes_fetched, num_notes_per_fetch, spec)
(notes_batch, files_batch) = download_notes_and_resources(note_store, result.notes)
notes_in_notebook.extend(notes_batch)
files_in_notebook.update(files_batch)
num_notes_fetched += num_notes_per_fetch
if num_notes_fetched >= result.totalNotes:
break
return (notes_in_notebook, files_in_notebook)
```
%% Cell type:code id: tags:
``` python
def main(use_oauth_flow, sandbox, download_resources):
token = create_token(use_oauth_flow, sandbox)
client = EvernoteClient(token=token, sandbox=sandbox, china=False)
user_store = client.get_user_store()
version_ok = user_store.checkVersion("Evernote EDAMTest (Python)",
UserStoreConstants.EDAM_VERSION_MAJOR,
UserStoreConstants.EDAM_VERSION_MINOR)
print("Is my Evernote API version up to date? ", str(version_ok))
print("For user : " + str(user_store.getUser()))
note_store = client.get_note_store()
tag_names = get_all_tags(note_store)
print("tag dictionary : ", tag_names)
# TODO: this dictionary holds [tag_uid -> tag_name], upload to POD
notebooks = note_store.listNotebooks()
print("Found ", len(notebooks), " notebooks:")
for (i, notebook) in enumerate(notebooks):
print("NOTEBOOK: {}".format(notebook.guid))
(notes, files) = download_notebook(note_store, notebook.guid)
# TODO: This loop writes all the notes to the file system
# replace this loop by uploading to the POD
if not os.path.exists('downloaded'):
os.makedirs('downloaded')
for note in notes:
with open(Path.cwd() / 'downloaded' / f"{note['title']}.json", "w") as outfile:
outfile.write(json.dumps(note))
# TODO: This loop writes all the files to the file system
# replace this loop by uploading to the POD
if download_resources:
for (uid, resource) in files.items():
(raw, file_type) = download_resource(note_store, resource)
with open(Path.cwd() / 'downloaded' / f"{uid}.{file_type}", "wb") as outfile:
outfile.write(bytearray(raw))
```
%% Cell type:markdown id: tags:
# Instructions:
authentication is annoying, for more info:
https://dev.evernote.com/doc/articles/authentication.php
Basically there are 3 files:
- "consumer_secret", which is Memri's secret key. Should be present in the CWD!
- "oauth_token", after the user logs in, this token is returned and saved, can then be used to access the user's notes
- "dev_token", similar to the "oauth_token", but can be requested directly (without consumer_secret), only usable on sandbox accounts
%% Cell type:code id: tags:
``` python
main(True, True, True)
```
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment