Commit 844ba580 authored by Koen van der Veen's avatar Koen van der Veen
Browse files

Merge branch 'plugin_base' into 'dev'

Plugin base

See merge request !16
parents e9deabe1 18102b74
Pipeline #2553 passed with stages
in 4 minutes and 28 seconds
Showing with 1411 additions and 257 deletions
+1411 -257
......@@ -15,6 +15,9 @@ entries:
- output: web,pdf
title: ItemBase
url: itembase.html
- output: web,pdf
title: Plugin
url: plugin.pluginbase.html
output: web
title: Getting Started
output: web
......
......@@ -17,7 +17,7 @@
{% if folder.output contains "web" %}
<li>
<a href="#" class="firstLevelHeader">{{ folder.title }}</a>
<ul>
<ul class="sidebarlist" style="display:block;">
{% for folderitem in folder.folderitems %}
{% if folderitem.output contains "web" %}
{% if folderitem.external_url %}
......
......@@ -50,6 +50,8 @@ h4 code {
/* background-color: white; */
}
#mysidebar .firstLevelHeader {
text-transform: uppercase;
font-style: normal;
......@@ -70,6 +72,14 @@ h4 code {
background-color: white !important;
}
.output_wrapper{
padding-top: 8px;
}
.sidebarlist{
display:block !important;
}
.nav ul li a{
color: #554C63 !important;
background-color: white !important;
......
......@@ -47,7 +47,7 @@ nb_path: "nbs/indexers.indexer.ipynb"
<div class="output_markdown rendered_html output_subarea ">
<h2 id="IndexerBase" class="doc_header"><code>class</code> <code>IndexerBase</code><a href="https://gitlab.memri.io/memri/pymemri/tree/prod/pymemri/indexers/indexer.py#L20" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>IndexerBase</code>(<strong><code>indexerClass</code></strong>=<em><code>None</code></em>, <strong>*<code>args</code></strong>, <strong>**<code>kwargs</code></strong>) :: <code>Indexer</code></p>
<h2 id="IndexerBase" class="doc_header"><code>class</code> <code>IndexerBase</code><a href="https://gitlab.memri.io/memri/pymemri/tree/prod/pymemri/indexers/indexer.py#L20" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>IndexerBase</code>(<strong><code>pluginClass</code></strong>=<em><code>None</code></em>, <strong>*<code>args</code></strong>, <strong>**<code>kwargs</code></strong>) :: <code>Indexer</code></p>
</blockquote>
<p>Item is the baseclass for all of the data classes.</p>
......@@ -173,7 +173,7 @@ when retrieving it from the database.</p>
<div class="output_markdown rendered_html output_subarea ">
<h4 id="run_integrator" class="doc_header"><code>run_integrator</code><a href="https://gitlab.memri.io/memri/pymemri/tree/prod/pymemri/indexers/indexer.py#L97" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>run_integrator</code>(<strong><code>environ</code></strong>=<em><code>None</code></em>, <strong><code>pod_full_address</code></strong>=<em><code>None</code></em>, <strong><code>integrator_run_id</code></strong>=<em><code>None</code></em>, <strong><code>database_key</code></strong>=<em><code>None</code></em>, <strong><code>owner_key</code></strong>=<em><code>None</code></em>, <strong><code>verbose</code></strong>=<em><code>False</code></em>)</p>
<h4 id="run_integrator" class="doc_header"><code>run_integrator</code><a href="https://gitlab.memri.io/memri/pymemri/tree/prod/pymemri/indexers/indexer.py#L96" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>run_integrator</code>(<strong><code>environ</code></strong>=<em><code>None</code></em>, <strong><code>pod_full_address</code></strong>=<em><code>None</code></em>, <strong><code>integrator_run_id</code></strong>=<em><code>None</code></em>, <strong><code>database_key</code></strong>=<em><code>None</code></em>, <strong><code>owner_key</code></strong>=<em><code>None</code></em>, <strong><code>verbose</code></strong>=<em><code>False</code></em>)</p>
</blockquote>
<p>Runs an integrator, you can either provide the run settings as parameters to this function (for local testing)
or via environment variables (this is how the pod communicates with integrators).</p>
......@@ -200,7 +200,7 @@ or via environment variables (this is how the pod communicates with integrators)
<span class="k">def</span> <span class="nf">create_toy_dataset</span><span class="p">(</span><span class="n">client</span><span class="p">):</span>
<span class="n">location</span> <span class="o">=</span> <span class="n">Location</span><span class="o">.</span><span class="n">from_data</span><span class="p">(</span><span class="n">latitude</span><span class="o">=-</span><span class="mf">37.81</span><span class="p">,</span> <span class="n">longitude</span><span class="o">=</span><span class="mf">144.96</span><span class="p">)</span>
<span class="n">address</span> <span class="o">=</span> <span class="n">Address</span><span class="o">.</span><span class="n">from_data</span><span class="p">()</span>
<span class="n">indexer</span> <span class="o">=</span> <span class="n">Indexer</span><span class="o">.</span><span class="n">from_data</span><span class="p">(</span><span class="n">indexerClass</span><span class="o">=</span><span class="s2">&quot;GeoIndexer&quot;</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s2">&quot;GeoIndexer&quot;</span><span class="p">)</span>
<span class="n">indexer</span> <span class="o">=</span> <span class="n">Indexer</span><span class="o">.</span><span class="n">from_data</span><span class="p">(</span><span class="n">pluginClass</span><span class="o">=</span><span class="s2">&quot;GeoIndexer&quot;</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s2">&quot;GeoIndexer&quot;</span><span class="p">)</span>
<span class="n">indexer_run</span> <span class="o">=</span> <span class="n">IndexerRun</span><span class="o">.</span><span class="n">from_data</span><span class="p">(</span><span class="n">progress</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">targetDataType</span><span class="o">=</span><span class="s2">&quot;Address&quot;</span><span class="p">)</span>
<span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="p">[</span><span class="n">location</span><span class="p">,</span> <span class="n">address</span><span class="p">,</span> <span class="n">indexer</span><span class="p">,</span> <span class="n">indexer_run</span><span class="p">]:</span> <span class="n">client</span><span class="o">.</span><span class="n">create</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
......@@ -233,7 +233,7 @@ or via environment variables (this is how the pod communicates with integrators)
<div class="output_markdown rendered_html output_subarea ">
<h4 id="generate_test_env" class="doc_header"><code>generate_test_env</code><a href="https://gitlab.memri.io/memri/pymemri/tree/prod/pymemri/indexers/indexer.py#L129" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>generate_test_env</code>(<strong><code>client</code></strong>, <strong><code>indexer_run</code></strong>)</p>
<h4 id="generate_test_env" class="doc_header"><code>generate_test_env</code><a href="https://gitlab.memri.io/memri/pymemri/tree/prod/pymemri/indexers/indexer.py#L128" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>generate_test_env</code>(<strong><code>client</code></strong>, <strong><code>indexer_run</code></strong>)</p>
</blockquote>
</div>
......
......@@ -47,7 +47,7 @@ nb_path: "nbs/itembase.ipynb"
<div class="output_markdown rendered_html output_subarea ">
<h2 id="Edge" class="doc_header"><code>class</code> <code>Edge</code><a href="https://gitlab.memri.io/memri/pymemri/tree/prod/pymemri/data/itembase.py#L58" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>Edge</code>(<strong><code>source</code></strong>, <strong><code>target</code></strong>, <strong><code>_type</code></strong>, <strong><code>label</code></strong>=<em><code>None</code></em>, <strong><code>sequence</code></strong>=<em><code>None</code></em>, <strong><code>created</code></strong>=<em><code>False</code></em>, <strong><code>reverse</code></strong>=<em><code>True</code></em>)</p>
<h2 id="Edge" class="doc_header"><code>class</code> <code>Edge</code><a href="https://gitlab.memri.io/memri/pymemri/tree/prod/pymemri/data/itembase.py#L59" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>Edge</code>(<strong><code>source</code></strong>, <strong><code>target</code></strong>, <strong><code>_type</code></strong>, <strong><code>label</code></strong>=<em><code>None</code></em>, <strong><code>sequence</code></strong>=<em><code>None</code></em>, <strong><code>created</code></strong>=<em><code>False</code></em>, <strong><code>reverse</code></strong>=<em><code>True</code></em>)</p>
</blockquote>
<p>Edges makes a link between two <a href="/pymemri/itembase.html#ItemBase"><code>ItemBase</code></a> Items. You won't use this class a lot in practice, as edges are
abstracted away for normal users. When items are retrieved from the database, the edges are parsed automatically.
......@@ -81,7 +81,7 @@ When you add an edge between to items within pymemri, you will often use <a href
<div class="output_markdown rendered_html output_subarea ">
<h4 id="Edge.traverse" class="doc_header"><code>Edge.traverse</code><a href="https://gitlab.memri.io/memri/pymemri/tree/prod/pymemri/data/itembase.py#L94" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>Edge.traverse</code>(<strong><code>start</code></strong>)</p>
<h4 id="Edge.traverse" class="doc_header"><code>Edge.traverse</code><a href="https://gitlab.memri.io/memri/pymemri/tree/prod/pymemri/data/itembase.py#L95" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>Edge.traverse</code>(<strong><code>start</code></strong>)</p>
</blockquote>
<p>We can traverse an edge starting from the source to the target or vice versa. In practice we often call
item.some_edge_type, which calls item.traverse(edgetype), which in turn calls this function.</p>
......@@ -93,6 +93,25 @@ item.some_edge_type, which calls item.traverse(edgetype), which in turn calls th
</div>
</div>
</div>
{% endraw %}
{% raw %}
<div class="cell border-box-sizing code_cell rendered">
<div class="input">
<div class="inner_cell">
<div class="input_area">
<div class=" highlight hl-ipython3"><pre><span></span><span class="n">ITEMBASE_PROPERTIES</span> <span class="o">=</span> <span class="p">[</span><span class="s2">&quot;dateAccessed&quot;</span><span class="p">,</span> <span class="s2">&quot;dateCreated&quot;</span><span class="p">,</span> <span class="s2">&quot;dateModified&quot;</span><span class="p">,</span> <span class="s2">&quot;deleted&quot;</span><span class="p">,</span> <span class="s2">&quot;externalId&quot;</span><span class="p">,</span> <span class="s2">&quot;itemDescription&quot;</span><span class="p">,</span>
<span class="s2">&quot;starred&quot;</span><span class="p">,</span> <span class="s2">&quot;version&quot;</span><span class="p">,</span> <span class="s2">&quot;id&quot;</span><span class="p">,</span> <span class="s2">&quot;importJson&quot;</span><span class="p">,</span> <span class="s2">&quot;name&quot;</span><span class="p">,</span> <span class="s2">&quot;repository&quot;</span><span class="p">,</span> <span class="s2">&quot;icon&quot;</span><span class="p">,</span> <span class="s2">&quot;bundleImage&quot;</span><span class="p">,</span>
<span class="s2">&quot;runDestination&quot;</span><span class="p">,</span> <span class="s2">&quot;pluginClass&quot;</span><span class="p">]</span>
</pre></div>
</div>
</div>
</div>
</div>
{% endraw %}
......@@ -107,7 +126,7 @@ item.some_edge_type, which calls item.traverse(edgetype), which in turn calls th
<div class="output_markdown rendered_html output_subarea ">
<h2 id="ItemBase" class="doc_header"><code>class</code> <code>ItemBase</code><a href="https://gitlab.memri.io/memri/pymemri/tree/prod/pymemri/data/itembase.py#L106" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>ItemBase</code>(<strong><code>id</code></strong>=<em><code>None</code></em>)</p>
<h2 id="ItemBase" class="doc_header"><code>class</code> <code>ItemBase</code><a href="https://gitlab.memri.io/memri/pymemri/tree/prod/pymemri/data/itembase.py#L107" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>ItemBase</code>(<strong><code>id</code></strong>=<em><code>None</code></em>)</p>
</blockquote>
<p>Provides a base class for all items. All items in the schema inherit from this class, and it provides some
basic functionality for consistency and to enable easier usage.</p>
......@@ -140,7 +159,7 @@ basic functionality for consistency and to enable easier usage.</p>
<div class="output_markdown rendered_html output_subarea ">
<h2 id="Item" class="doc_header"><code>class</code> <code>Item</code><a href="https://gitlab.memri.io/memri/pymemri/tree/prod/pymemri/data/itembase.py#L228" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>Item</code>(<strong><code>dateAccessed</code></strong>=<em><code>None</code></em>, <strong><code>dateCreated</code></strong>=<em><code>None</code></em>, <strong><code>dateModified</code></strong>=<em><code>None</code></em>, <strong><code>deleted</code></strong>=<em><code>None</code></em>, <strong><code>externalId</code></strong>=<em><code>None</code></em>, <strong><code>itemDescription</code></strong>=<em><code>None</code></em>, <strong><code>starred</code></strong>=<em><code>None</code></em>, <strong><code>version</code></strong>=<em><code>None</code></em>, <strong><code>id</code></strong>=<em><code>None</code></em>, <strong><code>importJson</code></strong>=<em><code>None</code></em>, <strong><code>changelog</code></strong>=<em><code>None</code></em>, <strong><code>label</code></strong>=<em><code>None</code></em>, <strong><code>genericAttribute</code></strong>=<em><code>None</code></em>, <strong><code>measure</code></strong>=<em><code>None</code></em>, <strong><code>sharedWith</code></strong>=<em><code>None</code></em>) :: <a href="/pymemri/itembase.html#ItemBase"><code>ItemBase</code></a></p>
<h2 id="Item" class="doc_header"><code>class</code> <code>Item</code><a href="https://gitlab.memri.io/memri/pymemri/tree/prod/pymemri/data/itembase.py#L226" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>Item</code>(<strong>**<code>kwargs</code></strong>) :: <a href="/pymemri/itembase.html#ItemBase"><code>ItemBase</code></a></p>
</blockquote>
<p>Item is the baseclass for all of the data classes.</p>
......@@ -172,7 +191,7 @@ basic functionality for consistency and to enable easier usage.</p>
<div class="output_markdown rendered_html output_subarea ">
<h4 id="ItemBase.add_edge" class="doc_header"><code>ItemBase.add_edge</code><a href="https://gitlab.memri.io/memri/pymemri/tree/prod/pymemri/data/itembase.py#L135" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>ItemBase.add_edge</code>(<strong><code>name</code></strong>, <strong><code>val</code></strong>)</p>
<h4 id="ItemBase.add_edge" class="doc_header"><code>ItemBase.add_edge</code><a href="https://gitlab.memri.io/memri/pymemri/tree/prod/pymemri/data/itembase.py#L133" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>ItemBase.add_edge</code>(<strong><code>name</code></strong>, <strong><code>val</code></strong>)</p>
</blockquote>
<p>Creates an edge of type name and makes it point to val</p>
......@@ -197,7 +216,7 @@ basic functionality for consistency and to enable easier usage.</p>
<div class="output_markdown rendered_html output_subarea ">
<h4 id="ItemBase.is_expanded" class="doc_header"><code>ItemBase.is_expanded</code><a href="https://gitlab.memri.io/memri/pymemri/tree/prod/pymemri/data/itembase.py#L144" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>ItemBase.is_expanded</code>()</p>
<h4 id="ItemBase.is_expanded" class="doc_header"><code>ItemBase.is_expanded</code><a href="https://gitlab.memri.io/memri/pymemri/tree/prod/pymemri/data/itembase.py#L142" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>ItemBase.is_expanded</code>()</p>
</blockquote>
<p>returns whether the node is expanded. An expanded node retrieved nodes that are
<em>directly</em> connected to it
......@@ -213,25 +232,55 @@ from the pod, and stored their values via edges in the object.</p>
</div>
{% endraw %}
<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
<div class="text_cell_render border-box-sizing rendered_html">
<h1 id="Usage">Usage<a class="anchor-link" href="#Usage"> </a></h1>
</div>
</div>
</div>
<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
<div class="text_cell_render border-box-sizing rendered_html">
<p>With the <a href="/pymemri/itembase.html#ItemBase"><code>ItemBase</code></a> and <a href="/pymemri/itembase.html#Edge"><code>Edge</code></a> classes we can create an item and its surrounding graph. The schema is defined in schema.py, in general we want to use the from_data staticmethod to generate new items, because it ensures that edges are linked from both the source and the target object. Let's make a new item and add it to the pod.</p>
</div>
</div>
</div>
{% raw %}
<div class="cell border-box-sizing code_cell rendered">
<div class="input">
<div class="output_wrapper">
<div class="output">
<div class="output_area">
<div class="output_markdown rendered_html output_subarea ">
<h4 id="ItemBase.expand" class="doc_header"><code>ItemBase.expand</code><a href="https://gitlab.memri.io/memri/pymemri/tree/prod/pymemri/data/itembase.py#L184" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>ItemBase.expand</code>(<strong><code>api</code></strong>)</p>
</blockquote>
<p>Expands a node (retrieves all directly connected nodes ands adds to object).</p>
<div class="inner_cell">
<div class="input_area">
<div class=" highlight hl-ipython3"><pre><span></span><span class="k">class</span> <span class="nc">MyItem</span><span class="p">(</span><span class="n">Item</span><span class="p">):</span>
<span class="n">properties</span> <span class="o">=</span> <span class="n">Item</span><span class="o">.</span><span class="n">properties</span> <span class="o">+</span> <span class="p">[</span><span class="s2">&quot;name&quot;</span><span class="p">,</span> <span class="s2">&quot;age&quot;</span><span class="p">]</span>
<span class="n">edges</span> <span class="o">=</span> <span class="n">Item</span><span class="o">.</span><span class="n">edges</span> <span class="o">+</span> <span class="p">[</span><span class="s2">&quot;friend&quot;</span><span class="p">]</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">age</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span><span class="n">friend</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">=</span> <span class="n">name</span>
<span class="bp">self</span><span class="o">.</span><span class="n">age</span> <span class="o">=</span> <span class="n">age</span>
<span class="bp">self</span><span class="o">.</span><span class="n">friend</span> <span class="o">=</span> <span class="n">fried</span> <span class="k">if</span> <span class="n">friend</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="k">else</span> <span class="p">[]</span>
</pre></div>
</div>
</div>
</div>
</div>
{% endraw %}
{% raw %}
<div class="cell border-box-sizing code_cell rendered">
<div class="input">
<div class="inner_cell">
<div class="input_area">
<div class=" highlight hl-ipython3"><pre><span></span><span class="kn">from</span> <span class="nn">pymemri.pod.client</span> <span class="kn">import</span> <span class="n">PodClient</span>
<span class="n">client</span> <span class="o">=</span> <span class="n">PodClient</span><span class="p">()</span>
</pre></div>
</div>
</div>
</div>
......@@ -241,43 +290,90 @@ from the pod, and stored their values via edges in the object.</p>
{% raw %}
<div class="cell border-box-sizing code_cell rendered">
<div class="input">
<div class="output_wrapper">
<div class="output">
<div class="inner_cell">
<div class="input_area">
<div class=" highlight hl-ipython3"><pre><span></span><span class="k">assert</span> <span class="n">client</span><span class="o">.</span><span class="n">add_to_schema</span><span class="p">(</span><span class="n">MyItem</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s2">&quot;abc&quot;</span><span class="p">,</span> <span class="n">age</span><span class="o">=</span><span class="mi">1</span><span class="p">))</span>
</pre></div>
<div class="output_area">
</div>
</div>
</div>
</div>
{% endraw %}
<div class="output_markdown rendered_html output_subarea ">
<h4 id="ItemBase.inherit_funcs" class="doc_header"><code>ItemBase.inherit_funcs</code><a href="https://gitlab.memri.io/memri/pymemri/tree/prod/pymemri/data/itembase.py#L220" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>ItemBase.inherit_funcs</code>(<strong><code>other</code></strong>)</p>
</blockquote>
<p>This function can be used to inherit new functionalities from a subclass. This is a patch to solve
the fact that python does provide extensions of classes that are defined in a different file that are
dynamic enough for our use case.</p>
{% raw %}
<div class="cell border-box-sizing code_cell rendered">
<div class="input">
<div class="inner_cell">
<div class="input_area">
<div class=" highlight hl-ipython3"><pre><span></span><span class="n">x</span> <span class="o">=</span> <span class="n">MyItem</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s2">&quot;me&quot;</span><span class="p">,</span> <span class="n">age</span><span class="o">=</span><span class="mi">30</span><span class="p">)</span>
<span class="n">x</span><span class="o">.</span><span class="n">add_edge</span><span class="p">(</span><span class="s2">&quot;friend&quot;</span><span class="p">,</span> <span class="n">MyItem</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s2">&quot;my friend&quot;</span><span class="p">,</span> <span class="n">age</span><span class="o">=</span><span class="mi">31</span><span class="p">))</span>
<span class="n">x</span><span class="o">.</span><span class="n">add_edge</span><span class="p">(</span><span class="s2">&quot;friend&quot;</span><span class="p">,</span> <span class="n">MyItem</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s2">&quot;my friend2&quot;</span><span class="p">,</span> <span class="n">age</span><span class="o">=</span><span class="mi">32</span><span class="p">))</span>
</pre></div>
</div>
</div>
</div>
</div>
{% endraw %}
{% raw %}
<div class="cell border-box-sizing code_cell rendered">
<div class="input">
<div class="inner_cell">
<div class="input_area">
<div class=" highlight hl-ipython3"><pre><span></span><span class="k">assert</span> <span class="n">client</span><span class="o">.</span><span class="n">create</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
</pre></div>
</div>
</div>
</div>
</div>
{% endraw %}
<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
<div class="text_cell_render border-box-sizing rendered_html">
<h1 id="Usage">Usage<a class="anchor-link" href="#Usage"> </a></h1>
{% raw %}
<div class="cell border-box-sizing code_cell rendered">
<div class="input">
<div class="inner_cell">
<div class="input_area">
<div class=" highlight hl-ipython3"><pre><span></span><span class="n">y</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">id</span><span class="p">)</span>
</pre></div>
</div>
</div>
</div>
</div>
<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
<div class="text_cell_render border-box-sizing rendered_html">
<p>With the <a href="/pymemri/itembase.html#ItemBase"><code>ItemBase</code></a> and <a href="/pymemri/itembase.html#Edge"><code>Edge</code></a> classes we can create an item and its surrounding graph. The schema is defined in schema.py, in general we want to use the from_data staticmethod to generate new items, because it ensures that edges are linked from both the source and the target object. Let's make an email item and create it in the pod.</p>
{% endraw %}
{% raw %}
<div class="cell border-box-sizing code_cell rendered">
<div class="input">
<div class="inner_cell">
<div class="input_area">
<div class=" highlight hl-ipython3"><pre><span></span><span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">y</span><span class="o">.</span><span class="n">friend</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">0</span>
</pre></div>
</div>
</div>
</div>
</div>
{% endraw %}
{% raw %}
<div class="cell border-box-sizing code_cell rendered">
......@@ -285,13 +381,29 @@ dynamic enough for our use case.</p>
<div class="inner_cell">
<div class="input_area">
<div class=" highlight hl-ipython3"><pre><span></span><span class="n">item</span> <span class="o">=</span> <span class="n">EmailMessage</span><span class="o">.</span><span class="n">from_data</span><span class="p">(</span><span class="n">content</span><span class="o">=</span><span class="s2">&quot;example content field&quot;</span><span class="p">)</span>
<div class=" highlight hl-ipython3"><pre><span></span><span class="n">y</span><span class="o">.</span><span class="n">friend</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">name</span>
</pre></div>
</div>
</div>
</div>
<div class="output_wrapper">
<div class="output">
<div class="output_area">
<div class="output_text output_subarea output_execute_result">
<pre>&#39;my friend&#39;</pre>
</div>
</div>
</div>
</div>
</div>
{% endraw %}
......
---
title: Plugins
keywords: fastai
sidebar: home_sidebar
nb_path: "nbs/plugin.pluginbase.ipynb"
---
<!--
#################################################
### THIS FILE WAS AUTOGENERATED! DO NOT EDIT! ###
#################################################
# file to edit: nbs/plugin.pluginbase.ipynb
# command to build the docs after a change: nbdev_build_docs
-->
<div class="container" id="notebook-container">
{% raw %}
<div class="cell border-box-sizing code_cell rendered">
</div>
{% endraw %}
{% raw %}
<div class="cell border-box-sizing code_cell rendered">
</div>
{% endraw %}
{% raw %}
<div class="cell border-box-sizing code_cell rendered">
</div>
{% endraw %}
<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
<div class="text_cell_render border-box-sizing rendered_html">
<p>Let's use the following plugin as an example of how we can start plugins.</p>
</div>
</div>
</div>
<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
<div class="text_cell_render border-box-sizing rendered_html">
<div class="highlight"><pre><span></span><span class="k">class</span> <span class="nc">MyPlugin</span><span class="p">(</span><span class="n">PluginBase</span><span class="p">):</span>
<span class="n">properties</span> <span class="o">=</span> <span class="n">PluginBase</span><span class="o">.</span><span class="n">properties</span>
<span class="n">edges</span><span class="o">=</span> <span class="n">PluginBase</span><span class="o">.</span><span class="n">edges</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">pluginPackage</span><span class="o">=</span><span class="s2">&quot;pymemri.plugin.pluginbase&quot;</span>
<span class="k">def</span> <span class="nf">run</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">run</span><span class="p">,</span> <span class="n">client</span><span class="p">):</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;running&quot;</span><span class="p">)</span>
</pre></div>
</div>
</div>
</div>
{% raw %}
<div class="cell border-box-sizing code_cell rendered">
<div class="input">
<div class="inner_cell">
<div class="input_area">
<div class=" highlight hl-ipython3"><pre><span></span><span class="kn">from</span> <span class="nn">pymemri.pod.client</span> <span class="kn">import</span> <span class="n">PodClient</span>
<span class="n">client</span> <span class="o">=</span> <span class="n">PodClient</span><span class="p">()</span>
</pre></div>
</div>
</div>
</div>
</div>
{% endraw %}
{% raw %}
<div class="cell border-box-sizing code_cell rendered">
<div class="input">
<div class="inner_cell">
<div class="input_area">
<div class=" highlight hl-ipython3"><pre><span></span><span class="k">assert</span> <span class="n">client</span><span class="o">.</span><span class="n">add_to_schema</span><span class="p">(</span><span class="n">MyPlugin</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s2">&quot;abc&quot;</span><span class="p">,</span> <span class="n">data_query</span><span class="o">=</span><span class="s2">&quot;abc&quot;</span><span class="p">))</span>
<span class="k">assert</span> <span class="n">client</span><span class="o">.</span><span class="n">add_to_schema</span><span class="p">(</span><span class="n">PluginRun</span><span class="p">())</span>
</pre></div>
</div>
</div>
</div>
</div>
{% endraw %}
{% raw %}
<div class="cell border-box-sizing code_cell rendered">
<div class="input">
<div class="inner_cell">
<div class="input_area">
<div class=" highlight hl-ipython3"><pre><span></span><span class="n">plugin</span> <span class="o">=</span> <span class="n">MyPlugin</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s2">&quot;abc&quot;</span><span class="p">,</span> <span class="n">data_query</span><span class="o">=</span><span class="s2">&quot;abc&quot;</span><span class="p">)</span>
<span class="n">run</span> <span class="o">=</span> <span class="n">PluginRun</span><span class="p">()</span>
<span class="n">run</span><span class="o">.</span><span class="n">add_edge</span><span class="p">(</span><span class="s2">&quot;plugin&quot;</span><span class="p">,</span> <span class="n">plugin</span><span class="p">)</span>
</pre></div>
</div>
</div>
</div>
</div>
{% endraw %}
{% raw %}
<div class="cell border-box-sizing code_cell rendered">
<div class="input">
<div class="inner_cell">
<div class="input_area">
<div class=" highlight hl-ipython3"><pre><span></span><span class="n">client</span><span class="o">.</span><span class="n">create</span><span class="p">(</span><span class="n">run</span><span class="p">)</span>
<span class="n">client</span><span class="o">.</span><span class="n">create</span><span class="p">(</span><span class="n">plugin</span><span class="p">)</span>
<span class="n">client</span><span class="o">.</span><span class="n">create_edge</span><span class="p">(</span><span class="n">run</span><span class="o">.</span><span class="n">get_edges</span><span class="p">(</span><span class="s2">&quot;plugin&quot;</span><span class="p">)[</span><span class="mi">0</span><span class="p">])</span>
</pre></div>
</div>
</div>
</div>
<div class="output_wrapper">
<div class="output">
<div class="output_area">
<div class="output_text output_subarea output_execute_result">
<pre>True</pre>
</div>
</div>
</div>
</div>
</div>
{% endraw %}
<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
<div class="text_cell_render border-box-sizing rendered_html">
<h1 id="Running-your-plugin">Running your plugin<a class="anchor-link" href="#Running-your-plugin"> </a></h1>
</div>
</div>
</div>
<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
<div class="text_cell_render border-box-sizing rendered_html">
<p>Plugins can be started using the pymemri <a href="/pymemri/plugin.pluginbase.html#run_plugin"><code>run_plugin</code></a> CLI. To use the CLI, you can either pass your run arguments as parameters, or set them as environment variables. If both are set, the CLI will prefer the passed arguments.</p>
</div>
</div>
</div>
{% raw %}
<div class="cell border-box-sizing code_cell rendered">
</div>
{% endraw %}
<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
<div class="text_cell_render border-box-sizing rendered_html">
<h2 id="CLI">CLI<a class="anchor-link" href="#CLI"> </a></h2>
</div>
</div>
</div>
{% raw %}
<div class="cell border-box-sizing code_cell rendered">
<div class="output_wrapper">
<div class="output">
<div class="output_area">
<div class="output_markdown rendered_html output_subarea ">
<h4 id="run_plugin" class="doc_header"><code>run_plugin</code><a href="https://gitlab.memri.io/memri/pymemri/tree/prod/pymemri/plugin/pluginbase.py#L122" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>run_plugin</code>(<strong><code>pod_full_address</code></strong>:<code>Param object at 0x7fc036ab0b50&gt;</code>=<em><code>None</code></em>, <strong><code>plugin_run_id</code></strong>:<code>Param object at 0x7fc035b497d0&gt;</code>=<em><code>None</code></em>, <strong><code>database_key</code></strong>:<code>Param object at 0x7fc036cea550&gt;</code>=<em><code>None</code></em>, <strong><code>owner_key</code></strong>:<code>Param object at 0x7fc036cea5d0&gt;</code>=<em><code>None</code></em>)</p>
</blockquote>
</div>
</div>
</div>
</div>
</div>
{% endraw %}
{% raw %}
<div class="cell border-box-sizing code_cell rendered">
</div>
{% endraw %}
{% raw %}
<div class="cell border-box-sizing code_cell rendered">
<div class="input">
<div class="inner_cell">
<div class="input_area">
<div class=" highlight hl-ipython3"><pre><span></span><span class="o">!</span>run_plugin --pod_full_address<span class="o">=</span><span class="nv">$DEFAULT_POD_ADDRESS</span> --plugin_run_id<span class="o">=</span><span class="nv">$run</span>.id --owner_key<span class="o">=</span><span class="nv">$client</span>.owner_key <span class="err">\</span>
<span class="o">--</span><span class="n">database_key</span><span class="o">=</span><span class="err">$</span><span class="n">client</span><span class="o">.</span><span class="n">database_key</span>
</pre></div>
</div>
</div>
</div>
<div class="output_wrapper">
<div class="output">
<div class="output_area">
<div class="output_subarea output_stream output_stdout output_text">
<pre>Used arguments passed to `run_plugin()` (ignoring environment)
pod_full_address=http://localhost:3030
plugin_run_id=61274342a226fe9ecb5d0efeb50ebc64
database_key=2275799687202554321586156394256447011413538728384161840804653842
owner_key=7877526860202989541720539875881419199482049862866439964933736900
running
</pre>
</div>
</div>
</div>
</div>
</div>
{% endraw %}
</div>
......@@ -243,17 +243,12 @@ Property myAge not defined in Schema (attempted to use it for json value 20) -->
<div class="inner_cell">
<div class="input_area">
<div class=" highlight hl-ipython3"><pre><span></span><span class="k">class</span> <span class="nc">Dog</span><span class="p">(</span><span class="n">Item</span><span class="p">):</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">,</span> <span class="n">age</span><span class="p">,</span> <span class="nb">id</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">deleted</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="nb">id</span><span class="o">=</span><span class="nb">id</span><span class="p">,</span> <span class="n">deleted</span><span class="o">=</span><span class="n">deleted</span><span class="p">)</span>
<span class="n">properties</span> <span class="o">=</span> <span class="n">Item</span><span class="o">.</span><span class="n">properties</span> <span class="o">+</span> <span class="p">[</span><span class="s2">&quot;name&quot;</span><span class="p">,</span> <span class="s2">&quot;age&quot;</span><span class="p">]</span>
<span class="n">edges</span> <span class="o">=</span> <span class="n">Item</span><span class="o">.</span><span class="n">edges</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">age</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">=</span> <span class="n">name</span>
<span class="bp">self</span><span class="o">.</span><span class="n">age</span> <span class="o">=</span> <span class="n">age</span>
<span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">from_json</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">json</span><span class="p">):</span>
<span class="nb">id</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;id&quot;</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
<span class="n">name</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;name&quot;</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
<span class="n">age</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;age&quot;</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">cls</span><span class="p">(</span><span class="nb">id</span><span class="o">=</span><span class="nb">id</span><span class="p">,</span><span class="n">name</span><span class="o">=</span><span class="n">name</span><span class="p">,</span><span class="n">age</span><span class="o">=</span><span class="n">age</span><span class="p">)</span>
</pre></div>
</div>
......@@ -369,7 +364,7 @@ Property myAge not defined in Schema (attempted to use it for json value 20) -->
<div class="output_text output_subarea output_execute_result">
<pre>[{&#39;item&#39;: Person (#a63b11ebe6996b2efb00b75c42ccd930), &#39;name&#39;: &#39;sender&#39;}]</pre>
<pre>[{&#39;item&#39;: Person (#806a88787321a9a81054e63d17ad2fbb), &#39;name&#39;: &#39;sender&#39;}]</pre>
</div>
</div>
......@@ -545,9 +540,9 @@ Property myAge not defined in Schema (attempted to use it for json value 20) -->
<div class="output_text output_subarea output_execute_result">
<pre>[Person (#a63b11ebe6996b2efb00b75c42ccd930),
Person (#16560d3dd6550a6aa58d88405fab177a),
Person (#fa5e4f210ec5b928e7eb263fcabf252f)]</pre>
<pre>[Person (#806a88787321a9a81054e63d17ad2fbb),
Person (#1f8e823a9b9ddaedf8dd37c9682beb0b),
Person (#cba8e451efdfe7be87dfeb10444ee670)]</pre>
</div>
</div>
......@@ -753,23 +748,6 @@ Property myAge not defined in Schema (attempted to use it for json value 20) -->
</div>
</div>
<div class="output_wrapper">
<div class="output">
<div class="output_area">
<div class="output_subarea output_stream output_stdout output_text">
<pre>creating
creating photo file
creating
Uploaded file
</pre>
</div>
</div>
</div>
</div>
</div>
{% endraw %}
......@@ -812,7 +790,7 @@ Uploaded file
<div class="output_text output_subarea output_execute_result">
<pre>IPhoto (#e49db579f6427af57651fc3ca5efafcf)</pre>
<pre>IPhoto (#424cce3929a63f5849c40c181e0ce252)</pre>
</div>
</div>
......
......@@ -2,6 +2,7 @@
"Getting Started": {
"Overview": "/",
"Pod Client": "pod.client.html",
"ItemBase": "itembase.html"
"ItemBase": "itembase.html",
"Plugin": "plugin.pluginbase.html"
}
}
%% Cell type:code id: tags:
``` python
%load_ext autoreload
%autoreload 2
# default_exp indexers.indexer
```
%% Cell type:code id: tags:
``` python
# export
from pymemri.data.schema import *
from pymemri.pod.client import PodClient, DEFAULT_POD_ADDRESS
from pymemri.imports import *
```
%% Cell type:code id: tags:
``` python
# hide
from nbdev.showdoc import *
```
%% Output
/opt/anaconda3/envs/pymemri/lib/python3.7/site-packages/fastprogress/fastprogress.py:102: UserWarning: Couldn't import ipywidgets properly, progress bar will use console behavior
warn("Couldn't import ipywidgets properly, progress bar will use console behavior")
%% Cell type:markdown id: tags:
# Indexer
%% Cell type:code id: tags:
``` python
# export
POD_FULL_ADDRESS_ENV = 'POD_FULL_ADDRESS'
RUN_UID_ENV = 'RUN_UID'
POD_SERVICE_PAYLOAD_ENV = 'POD_SERVICE_PAYLOAD'
DATABASE_KEY_ENV = 'databaseKey'
OWNER_KEY_ENV = 'ownerKey'
class IndexerBase(Indexer):
def __init__(self, indexerClass=None, *args, **kwargs):
if indexerClass is None: indexerClass=self.__class__.__name__
super().__init__(indexerClass=indexerClass, *args, **kwargs)
def __init__(self, pluginClass=None, *args, **kwargs):
if pluginClass is None: pluginClass=self.__class__.__name__
super().__init__(pluginClass=pluginClass, *args, **kwargs)
def populate(self, client, items, edges=False):
new_items = [x for x in items if x.id is None]
updated_items = [x for x in items if x.id is not None]
for item in new_items:
item.update(client, edges=False)
new_photos = [x for x in new_items if isinstance(x, Photo)]
for x in new_photos:
print("uploading photo")
client.upload_photo(x.data)
for item in updated_items:
item.update(client, edges=False)
if edges:
for item in new_items + updated_items:
item.update(client, edges=True)
def run(self, indexer_run, client):
data = self.get_data(client, indexer_run)
items = self.index(data, indexer_run, client)
self.populate(client, items, edges=True)
class IndexerData():
def __init__(self, **kwargs):
for k, v in kwargs.items():
self.__setattr__(k, v)
def __repr__(self):
return f"IndexerData \n{self.__dict__}"
def get_indexer_run_data(client, indexer_run):
if indexer_run.targetDataType is None:
raise NotImplementedError
else:
return client.search_by_fields({"_type": indexer_run.targetDataType})
def test_registration(integrator):
"""Check whether an integrator is registred. Registration is necessary to be able to load the right indexer
when retrieving it from the database."""
import pymemri.integrator_registry as registry
assert integrator.__name__ in dir(registry), f"Add {integrator.__name__} to integrators/integrator_registry.py"
```
%% Cell type:markdown id: tags:
# Running your own indexer
%% Cell type:markdown id: tags:
When we run an indexer we have four steps. 1) Get the indexer and indexer run based on the run id. 2) run the indexer 3) populate the graph with the new information. To mock that, first we create a client and add some toy data.
%% Cell type:code id: tags:
``` python
# hide
# export
def run_importer(importer_run, client):
from pymemri.integrator_registry import EmailImporter
importer = importer_run.importer[0]
# data = indexer.get_data(client, indexer_run)
temp_importer = EmailImporter()
temp_importer.run(importer_run, client)
# indexer.populate(client, updated_items, new_items)
def run_integrator_from_run_id(run_id, client):
run = client.get(run_id)
if isinstance(run, IndexerRun):
indexer = run.indexer[0]
indexer.run(run, client)
elif isinstance(run, ImporterRun):
run_importer(run, client)
else:
raise NotImplementedError(f"Cannot execute item of type {run}")
```
%% Cell type:code id: tags:
``` python
# export
def run_integrator(environ=None, pod_full_address=None, integrator_run_id=None, database_key=None, owner_key=None,
verbose=False):
"""Runs an integrator, you can either provide the run settings as parameters to this function (for local testing)
or via environment variables (this is how the pod communicates with integrators)."""
params = [pod_full_address, integrator_run_id, database_key, owner_key]
if all([p is None for p in params]):
try:
print("Reading run parameters from environment variables")
pod_full_address = environ.get(POD_FULL_ADDRESS_ENV, DEFAULT_POD_ADDRESS)
integrator_run_id = int(environ[RUN_UID_ENV])
pod_service_payload = json.loads(environ[POD_SERVICE_PAYLOAD_ENV])
database_key = pod_service_payload[DATABASE_KEY_ENV]
owner_key = pod_service_payload[OWNER_KEY_ENV]
except KeyError as e:
print(f"Environmentvariable {e} not found, exiting")
return
else:
assert not (None in params), \
f"Defined some params to run indexer, but not all. Missing {[p for p in params if p is None]}"
if verbose:
for name, val in [("pod_full_address", pod_full_address), ("integrator_run_id", integrator_run_id),
("database_key", database_key), ("owner_key", owner_key)]:
print(f"{name}={val}")
client = PodClient(url=pod_full_address, database_key=database_key, owner_key=owner_key)
run_integrator_from_run_id(integrator_run_id, client)
```
%% Cell type:code id: tags:
``` python
show_doc(run_integrator)
```
%% Output
<h4 id="run_integrator" class="doc_header"><code>run_integrator</code><a href="__main__.py#L3" class="source_link" style="float:right">[source]</a></h4>
> <code>run_integrator</code>(**`environ`**=*`None`*, **`pod_full_address`**=*`None`*, **`integrator_run_id`**=*`None`*, **`database_key`**=*`None`*, **`owner_key`**=*`None`*, **`verbose`**=*`False`*)
Runs an integrator, you can either provide the run settings as parameters to this function (for local testing)
or via environment variables (this is how the pod communicates with integrators).
%% Cell type:code id: tags:
``` python
# from pyintegrators.indexers.geo.geo_indexer import GeoIndexer
client = PodClient()
def create_toy_dataset(client):
location = Location.from_data(latitude=-37.81, longitude=144.96)
address = Address.from_data()
indexer = Indexer.from_data(indexerClass="GeoIndexer", name="GeoIndexer")
indexer = Indexer.from_data(pluginClass="GeoIndexer", name="GeoIndexer")
indexer_run = IndexerRun.from_data(progress=0, targetDataType="Address")
for x in [location, address, indexer, indexer_run]: client.create(x)
assert client.create_edge(Edge(indexer_run, indexer, "indexer"))
assert client.create_edge(Edge(location, address, "location"))
return indexer, indexer_run, location, address
```
%% Cell type:markdown id: tags:
## Running an indexer by providing environment variables
%% Cell type:code id: tags:
``` python
# export
def generate_test_env(client, indexer_run):
payload = json.dumps({DATABASE_KEY_ENV: client.database_key, OWNER_KEY_ENV: client.owner_key})
return {POD_FULL_ADDRESS_ENV: DEFAULT_POD_ADDRESS,
RUN_UID_ENV: indexer_run.id,
POD_SERVICE_PAYLOAD_ENV: payload}
```
%% Cell type:code id: tags:
``` python
# indexer, indexer_run, location, address = create_toy_dataset(client)
```
%% Cell type:code id: tags:
``` python
# run_integrator(environ=generate_test_env(client, indexer_run))
```
%% Cell type:code id: tags:
``` python
# client.delete_all()
```
%% Cell type:markdown id: tags:
## Run
%% Cell type:markdown id: tags:
Now we start with the setting we would normally have: some memri client makes a call to the pod to execute an indexer run. Lets start by getting the indexer and the indexer run.
%% Cell type:code id: tags:
``` python
# indexer, indexer_run, location, address = create_toy_dataset(client)
# id = indexer_run.id; id
```
%% Cell type:code id: tags:
``` python
# indexer_run = client.get(id)
# indexer = indexer_run.indexer[0]
# indexer
```
%% Cell type:markdown id: tags:
Next, we retrieve the data, which was specified in the client by the `targetDataType`.
%% Cell type:code id: tags:
``` python
# data = indexer.get_data(client, indexer_run)
# data
```
%% Cell type:code id: tags:
``` python
# output_items = indexer.index(data, indexer_run, client)
```
%% Cell type:code id: tags:
``` python
# indexer.populate(client, output_items)
```
%% Cell type:code id: tags:
``` python
# client.delete_all()
```
%% Cell type:markdown id: tags:
# Running the full Indexer pipeline
%% Cell type:markdown id: tags:
## Running an indexer by providing parameters as variables
%% Cell type:code id: tags:
``` python
# indexer, indexer_run, location, address = create_toy_dataset(client)
# run_integrator(pod_full_address=DEFAULT_POD_ADDRESS,
# integrator_run_id=indexer_run.id,
# database_key=client.database_key,
# owner_key=client.owner_key)
# client.delete_all()
```
%% Cell type:markdown id: tags:
## Registration
%% Cell type:markdown id: tags:
All indexers need to be registred before they can be ran. We can test our registration as follows
%% Cell type:code id: tags:
``` python
# test_registration(GeoIndexer)
```
%% Cell type:markdown id: tags:
> Important: Note that before running an indexer, it needs to be registered. We can do this by importing the file in `integrators.indexer_registry.py`.
%% Cell type:markdown id: tags:
# Export -
%% Cell type:code id: tags:
``` python
# hide
from nbdev.export import *
notebook2script()
```
%% Output
Converted basic.ipynb.
Converted data.photo.ipynb.
Converted importers.Importer.ipynb.
Converted importers.util.ipynb.
Converted index.ipynb.
Converted indexers.indexer.ipynb.
Converted itembase.ipynb.
Converted pod.client.ipynb.
%% Cell type:code id: tags:
``` python
```
......
%% Cell type:code id: tags:
``` python
%load_ext autoreload
%autoreload 2
# default_exp data.itembase
```
%% Cell type:markdown id: tags:
# Itembase
%% Cell type:markdown id: tags:
Any data class in pymemri inherits from `Item`. It is a base class for items with some handy functionalities to create new items and edges, retrieve all edges to other items, and sync with the pod.
%% Cell type:code id: tags:
``` python
# export
# hide
from pymemri.imports import *
ALL_EDGES = "allEdges"
SOURCE, TARGET, TYPE, EDGE_TYPE, LABEL, SEQUENCE = "_source", "_target", "_type", "_type", "label", "sequence"
EDGE_KEY = "allEdges"
```
%% Cell type:code id: tags:
``` python
#hide
from nbdev.showdoc import *
```
%% Cell type:code id: tags:
``` python
# export
# hide
class DB():
def __init__(self):
self.nodes = dict()
def add(self, node):
id = node.id
if id in self.nodes:
print(f"Error trying to add node, but node with with id: {id} is already in database")
self.nodes[id] = node
def get(self, id):
res = self.nodes.get(id, None)
return res
def contains(node):
id = node.get_property("id")
return id in self.nodes
def create(self, node):
existing = self.get(node.properties.get("id", None))
if existing is not None:
if not existing._expanded:
existing.edges = node.edges
existing._expanded = node.edges is not None
return existing
else:
self.add(node)
return node
def parse_base_item_json(json):
id = json.get("id", None)
dateAccessed = json.get("dateAccessed", None)
dateCreated = json.get("dateCreated", None)
dateModified = json.get("dateModified", None)
deleted = json.get("deleted", None)
externalId = json.get("externalId", None)
itemDescription = json.get("itemDescription", None)
starred = json.get("starred", None)
version = json.get("version", None)
return id, dateAccessed, dateCreated, dateModified, deleted, externalId, itemDescription, starred, version, None, None
```
%% Cell type:code id: tags:
``` python
# export
class Edge():
"""Edges makes a link between two `ItemBase` Items. You won't use this class a lot in practice, as edges are
abstracted away for normal users. When items are retrieved from the database, the edges are parsed automatically.
When you add an edge between to items within pymemri, you will often use `ItemBase.add_edge`"""
def __init__(self, source, target, _type, label=None, sequence=None, created=False, reverse=True):
self.source = source
self.target = target
self._type = _type
self.label = label
self.sequence = sequence
self.created = created
self.reverse = reverse
@classmethod
def from_json(cls, json):
from .schema import get_constructor
# we only set the target here
_type = json[EDGE_TYPE]
json_target = json[TARGET]
target_type = json_target["_type"]
indexer_class = json_target.get("indexerClass", None)
target_constructor = get_constructor(target_type, indexer_class)
plugin_class = json_target.get("pluginClass", None)
target_constructor = get_constructor(target_type, plugin_class)
target = target_constructor.from_json(json_target)
return cls(source=None, target=target, _type=_type)
def __repr__(self):
return f"{self.source} --{self._type}-> {self.target}"
def update(self, api):
if self.created:
api.create_edges([self])
def __eq__(self, other):
return self.source is other.source and self.target is other.target \
and self._type == other._type
def traverse(self, start):
"""We can traverse an edge starting from the source to the target or vice versa. In practice we often call
item.some_edge_type, which calls item.traverse(edgetype), which in turn calls this function."""
if start == self.source:
return self.target
elif start == self.target:
return self.source
else:
raise ValueError
```
%% Cell type:code id: tags:
``` python
show_doc(Edge.traverse)
```
%% Output
<h4 id="Edge.traverse" class="doc_header"><code>Edge.traverse</code><a href="__main__.py#L38" class="source_link" style="float:right">[source]</a></h4>
> <code>Edge.traverse</code>(**`start`**)
We can traverse an edge starting from the source to the target or vice versa. In practice we often call
item.some_edge_type, which calls item.traverse(edgetype), which in turn calls this function.
%% Cell type:code id: tags:
``` python
ITEMBASE_PROPERTIES = ["dateAccessed", "dateCreated", "dateModified", "deleted", "externalId", "itemDescription",
"starred", "version", "id", "importJson", "name", "repository", "icon", "bundleImage",
"runDestination", "pluginClass"]
```
%% Cell type:code id: tags:
``` python
# export
class ItemBase():
"""Provides a base class for all items. All items in the schema inherit from this class, and it provides some
basic functionality for consistency and to enable easier usage."""
global_db = DB()
def __init__(self, id=None):
self.id=id
self.add_to_db(self)
@classmethod
def add_to_db(cls, node):
existing = cls.global_db.get(node.id)
if existing is None and node.id is not None:
cls.global_db.add(node)
def replace_self(self, other):
self.__dict__.update(other.__dict__)
def __getattribute__(self, name):
val = object.__getattribute__(self, name)
if isinstance(val, Edge):
edge = val
return edge.traverse(start=self)
if isinstance(val, list) and len(val) > 0 and isinstance(val[0], Edge):
edges = val
return [edge.traverse(start=self) for edge in edges]
else:
return val
def add_edge(self, name, val):
"""Creates an edge of type name and makes it point to val"""
val = Edge(self, val, name, created=True)
if name not in self.__dict__:
raise NameError(f"object {self} does not have edge with name {name}")
existing = object.__getattribute__(self, name)
res = existing + [val]
self.__setattr__(name, res)
def is_expanded(self):
"""returns whether the node is expanded. An expanded node retrieved nodes that are
*directly* connected to it
from the pod, and stored their values via edges in the object."""
return len(self.get_all_edges()) > 0
def get_edges(self, name):
return object.__getattribute__(self, name)
def get_all_edges(self):
return [e for attr in self.__dict__.values() if self.attr_is_edge(attr) for e in attr]
def get_all_edge_names(self):
return [k for k,v in self.__dict__.items() if self.attr_is_edge(v)]
def get_property_names(self):
return [k for k, v in self.__dict__.items() if not type(v) == list]
@staticmethod
def attr_is_edge(attr):
return isinstance(attr, list) and len(attr)>0 and isinstance(attr[0], Edge)
def update(self, api, edges=True, create_if_not_exists=True, skip_nodes=False):
if not self.exists(api):
print(f"creating {self}")
api.create(self)
else:
print(f"updating {self}")
api.update_item(self)
if edges:
for e in self.get_all_edges():
e.update(api)
def exists(self, api):
res = api.search_by_fields({"id": self.id})
if res is None: return False
return len(res) == 1
def expand(self, api):
"""Expands a node (retrieves all directly connected nodes ands adds to object)."""
self._expanded = True
res = api.get(self.id, expanded=True)
for edge_name in res.get_all_edge_names():
edges = res.get_edges(edge_name)
for e in edges:
e.source = self
self.__setattr__(edge_name, edges)
# def expand(self, api):
# """Expands a node (retrieves all directly connected nodes ands adds to object)."""
# self._expanded = True
# res = api.get(self.id, expanded=True)
# for edge_name in res.get_all_edge_names():
# edges = res.get_edges(edge_name)
# for e in edges:
# e.source = self
# self.__setattr__(edge_name, edges)
# self.edges = res.edges
return self
# # self.edges = res.edges
# return self
def __repr__(self):
id = self.id
_type = self.__class__.__name__
return f"{_type} (#{id})"
@classmethod
def from_data(cls, *args, **kwargs):
edges = dict()
new_kwargs = dict()
for k, v in kwargs.items():
if isinstance(v, ItemBase):
edge = Edge(None, v, k)
edges[k] = edge
new_kwargs[k] = edge
else:
new_kwargs[k] = v
res = cls(*args, **new_kwargs)
for v in edges.values():
v.source = res
return res
def inherit_funcs(self, other):
"""This function can be used to inherit new functionalities from a subclass. This is a patch to solve
the fact that python does provide extensions of classes that are defined in a different file that are
dynamic enough for our use case."""
assert issubclass(other, self.__class__)
self.__class__ = other
# def inherit_funcs(self, other):
# """This function can be used to inherit new functionalities from a subclass. This is a patch to solve
# the fact that python does provide extensions of classes that are defined in a different file that are
# dynamic enough for our use case."""
# assert issubclass(other, self.__class__)
# self.__class__ = other
```
%% Cell type:code id: tags:
``` python
# export
class Item(ItemBase):
"""Item is the baseclass for all of the data classes."""
def __init__(self, dateAccessed=None, dateCreated=None, dateModified=None, deleted=None,
externalId=None, itemDescription=None, starred=None, version=None, id=None, importJson=None,
changelog=None, label=None, genericAttribute=None, measure=None, sharedWith=None):
super().__init__(id)
self.dateAccessed = dateAccessed
self.dateCreated = dateCreated
self.dateModified = dateModified
self.deleted = deleted
self.externalId = externalId
self.itemDescription = itemDescription
self.starred = starred
self.version = version
self.importJson = importJson
self.changelog = changelog if changelog is not None else []
self.label = label if label is not None else []
self.genericAttribute = genericAttribute if genericAttribute is not None else []
self.measure = measure if measure is not None else []
self.sharedWith = sharedWith if sharedWith is not None else []
properties = ["dateAccessed", "dateCreated", "dateModified", "deleted", "externalId", "itemDescription",
"starred", "version", "id", "importJson", "pluginClass"]
edges = ["changelog", "label", "genericAttribute", "measure", "sharedWith"]
def __init__(self, **kwargs):
# def __init__(self, dateAccessed=None, dateCreated=None, dateModified=None, deleted=None,
# externalId=None, itemDescription=None, starred=None, version=None, id=None, importJson=None,
# changelog=None, label=None, genericAttribute=None, measure=None, sharedWith=None):
super().__init__(kwargs.get("id"))
for p in self.properties:
if p == "id":
continue
setattr(self, p, kwargs.get(p, None))
for e in self.edges:
setattr(self, e, kwargs.get(e, []))
@classmethod
def parse_json(self, cls, json):
property_kwargs = Item.parse_properties(cls, json)
edge_kwargs = Item.parse_edges(cls, json)
return {**property_kwargs, **edge_kwargs}
@classmethod
def parse_properties(self, cls, json):
return {p: json.get(p, None) for p in cls.properties}
@classmethod
def parse_edges(self, cls, json):
all_edges = json.get(EDGE_KEY, None)
edge_kwargs = dict()
reverse_edges = [f"~{e}" for e in cls.edges]
if all_edges is not None:
for edge_json in all_edges:
edge = Edge.from_json(edge_json)
if edge.type in self.edges + reverse_edges:
edge_name = self.remove_prefix(edge.type)
if edge_name in edge_kwargs:
edge_kwargs[edge_name] += [edge]
else:
edge_kwargs[edge_name] = [edge]
return edge_kwargs
@classmethod
def remove_prefix(s, prefix="~"):
return s[1:] if s[0] == "`" else s
@classmethod
def from_json(cls, json):
kwargs = Item.parse_json(cls, json)
res = cls(**kwargs)
for e in res.get_all_edges(): e.source = res
return res
# if edge._type == "changelog" or edge._type == "~changelog":
# changelog.append(edge)
# elif edge._type == "label" or edge._type == "~label":
# label.append(edge)
# elif edge._type == "genericAttribute" or edge._type == "~genericAttribute":
# genericAttribute.append(edge)
# elif edge._type == "measure" or edge._type == "~measure":
# measure.append(edge)
# elif edge._type == "sharedWith" or edge._type == "~sharedWith":
# sharedWith.append(edge)
# elif edge._type == "indexerRun" or edge._type == "~indexerRun":
# indexerRun.append(edge)
# self.dateAccessed = dateAccessed
# self.dateCreated = dateCreated
# self.dateModified = dateModified
# self.deleted = deleted
# self.externalId = externalId
# self.itemDescription = itemDescription
# self.starred = starred
# self.version = version
# self.importJson = importJson
# self.changelog = changelog if changelog is not None else []
# self.label = label if label is not None else []
# self.genericAttribute = genericAttribute if genericAttribute is not None else []
# self.measure = measure if measure is not None else []
# self.sharedWith = sharedWith if sharedWith is not None else []
```
%% Cell type:code id: tags:
``` python
show_doc(ItemBase.add_edge)
```
%% Output
<h4 id="ItemBase.add_edge" class="doc_header"><code>ItemBase.add_edge</code><a href="__main__.py#L32" class="source_link" style="float:right">[source]</a></h4>
<h4 id="ItemBase.add_edge" class="doc_header"><code>ItemBase.add_edge</code><a href="__main__.py#L29" class="source_link" style="float:right">[source]</a></h4>
> <code>ItemBase.add_edge</code>(**`name`**, **`val`**)
Creates an edge of type name and makes it point to val
%% Cell type:code id: tags:
``` python
show_doc(ItemBase.is_expanded)
```
%% Output
<h4 id="ItemBase.is_expanded" class="doc_header"><code>ItemBase.is_expanded</code><a href="__main__.py#L41" class="source_link" style="float:right">[source]</a></h4>
<h4 id="ItemBase.is_expanded" class="doc_header"><code>ItemBase.is_expanded</code><a href="__main__.py#L38" class="source_link" style="float:right">[source]</a></h4>
> <code>ItemBase.is_expanded</code>()
returns whether the node is expanded. An expanded node retrieved nodes that are
*directly* connected to it
from the pod, and stored their values via edges in the object.
%% Cell type:markdown id: tags:
# Usage
%% Cell type:markdown id: tags:
With the `ItemBase` and `Edge` classes we can create an item and its surrounding graph. The schema is defined in schema.py, in general we want to use the from_data staticmethod to generate new items, because it ensures that edges are linked from both the source and the target object. Let's make a new item and add it to the pod.
%% Cell type:code id: tags:
``` python
show_doc(ItemBase.expand)
class MyItem(Item):
properties = Item.properties + ["name", "age"]
edges = Item.edges + ["friend"]
def __init__(self, name=None, age=None,friend=None, **kwargs):
super().__init__(**kwargs)
self.name = name
self.age = age
self.friend = fried if friend is not None else []
```
%% Output
%% Cell type:code id: tags:
<h4 id="ItemBase.expand" class="doc_header"><code>ItemBase.expand</code><a href="__main__.py#L81" class="source_link" style="float:right">[source]</a></h4>
> <code>ItemBase.expand</code>(**`api`**)
Expands a node (retrieves all directly connected nodes ands adds to object).
``` python
from pymemri.pod.client import PodClient
client = PodClient()
```
%% Cell type:code id: tags:
``` python
show_doc(ItemBase.inherit_funcs)
assert client.add_to_schema(MyItem(name="abc", age=1))
```
%% Output
%% Cell type:code id: tags:
<h4 id="ItemBase.inherit_funcs" class="doc_header"><code>ItemBase.inherit_funcs</code><a href="__main__.py#L117" class="source_link" style="float:right">[source]</a></h4>
> <code>ItemBase.inherit_funcs</code>(**`other`**)
This function can be used to inherit new functionalities from a subclass. This is a patch to solve
the fact that python does provide extensions of classes that are defined in a different file that are
dynamic enough for our use case.
``` python
x = MyItem(name="me", age=30)
x.add_edge("friend", MyItem(name="my friend", age=31))
x.add_edge("friend", MyItem(name="my friend2", age=32))
```
%% Cell type:markdown id: tags:
%% Cell type:code id: tags:
# Usage
``` python
assert client.create(x)
```
%% Cell type:markdown id: tags:
%% Cell type:code id: tags:
With the `ItemBase` and `Edge` classes we can create an item and its surrounding graph. The schema is defined in schema.py, in general we want to use the from_data staticmethod to generate new items, because it ensures that edges are linked from both the source and the target object. Let's make an email item and create it in the pod.
``` python
y = client.get(x.id)
```
%% Cell type:code id: tags:
``` python
# hide
from pymemri.data.schema import *
assert len(y.friend) > 0
```
%% Cell type:code id: tags:
``` python
item = EmailMessage.from_data(content="example content field")
y.friend[0].name
```
%% Output
'my friend'
%% Cell type:markdown id: tags:
# Export -
%% Cell type:code id: tags:
``` python
# hide
from nbdev.export import *
notebook2script()
```
%% Output
Converted basic.ipynb.
Converted data.photo.ipynb.
Converted importers.Importer.ipynb.
Converted importers.util.ipynb.
Converted index.ipynb.
Converted indexers.indexer.ipynb.
Converted itembase.ipynb.
Converted plugin.pluginbase.ipynb.
Converted pod.client.ipynb.
%% Cell type:code id: tags:
``` python
```
......
%% Cell type:code id: tags:
``` python
%load_ext autoreload
%autoreload 2
# default_exp plugin.pluginbase
```
%% Cell type:code id: tags:
``` python
# export
from pymemri.data.schema import *
from pymemri.pod.client import PodClient, DEFAULT_POD_ADDRESS
from pymemri.imports import *
from os import environ
```
%% Cell type:code id: tags:
``` python
# hide
from nbdev.showdoc import *
```
%% Cell type:markdown id: tags:
# Plugins
%% Cell type:code id: tags:
``` python
# export
POD_FULL_ADDRESS_ENV = 'POD_FULL_ADDRESS'
RUN_UID_ENV = 'RUN_ID'
POD_SERVICE_PAYLOAD_ENV = 'POD_SERVICE_PAYLOAD'
DATABASE_KEY_ENV = 'databaseKey'
OWNER_KEY_ENV = 'ownerKey'
```
%% Cell type:code id: tags:
``` python
# export
# hide
class PluginBase(Item):
"""Base class for plugins"""
properties = Item.properties + ["name", "repository", "icon", "data_query", "bundleImage",
"runDestination", "pluginClass", "pluginPackage"]
edges = Item.edges + ["IndexerRun"]
def __init__(self, name=None, repository=None, icon=None, query=None, bundleImage=None, runDestination=None,
pluginClass=None, indexerRun=None, **kwargs):
if pluginClass is None: pluginClass=self.__class__.__name__
self.pluginPackage=None
super().__init__(**kwargs)
self.name = name
self.repository = repository
self.icon = icon
self.query = query
self.bundleImage = bundleImage
self.runDestination = runDestination
self.pluginClass = pluginClass
self.indexerRun = indexerRun if indexerRun is not None else []
def run(self):
raise NotImplementedError()
```
%% Cell type:code id: tags:
``` python
# export
# hide
class PluginRun(Item):
properties = Item.properties
edges = Item.edges + ["plugin"]
def __init__(self, plugin=None, **kwargs):
super().__init__(**kwargs)
self.plugin=plugin if plugin is not None else []
```
%% Cell type:markdown id: tags:
Let's use the following plugin as an example of how we can start plugins.
%% Cell type:code id: tags:
``` python
# export
# hide
class MyPlugin(PluginBase):
""""""
properties = PluginBase.properties
edges= PluginBase.edges
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.pluginPackage="pymemri.plugin.pluginbase"
def run(self, run, client):
print("running")
```
%% Cell type:markdown id: tags:
```python
class MyPlugin(PluginBase):
properties = PluginBase.properties
edges= PluginBase.edges
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.pluginPackage="pymemri.plugin.pluginbase"
def run(self, run, client):
print("running")
```
%% Cell type:code id: tags:
``` python
from pymemri.pod.client import PodClient
client = PodClient()
```
%% Cell type:code id: tags:
``` python
assert client.add_to_schema(MyPlugin(name="abc", data_query="abc"))
assert client.add_to_schema(PluginRun())
```
%% Cell type:code id: tags:
``` python
plugin = MyPlugin(name="abc", data_query="abc")
run = PluginRun()
run.add_edge("plugin", plugin)
```
%% Cell type:code id: tags:
``` python
client.create(run)
client.create(plugin)
client.create_edge(run.get_edges("plugin")[0])
```
%% Output
True
%% Cell type:markdown id: tags:
# Running your plugin
%% Cell type:markdown id: tags:
Plugins can be started using the pymemri `run_plugin` CLI. To use the CLI, you can either pass your run arguments as parameters, or set them as environment variables. If both are set, the CLI will prefer the passed arguments.
%% Cell type:code id: tags:
``` python
# hide
# export
def run_plugin_from_run_id(run_id, client):
run = client.get(run_id)
plugins = run.plugin
if len(plugins) == 0:
raise ValueError(f"plugin run {run_id} has no plugin attached to it. Make sure there is a 'plugin' \
edge from your run to the actual plugin object.")
plugin = plugins[0]
plugin.run(run, client)
```
%% Cell type:code id: tags:
``` python
# export
# hide
def register_base_classes(client):
try:
assert client.add_to_schema(PluginRun())
except Exception as e:
raise ValueError("Could not add base schema")
```
%% Cell type:code id: tags:
``` python
# hide
run_plugin_from_run_id(run.id, client)
```
%% Output
running
%% Cell type:code id: tags:
``` python
# export
def _run_plugin(pod_full_address=None, plugin_run_id=None, database_key=None, owner_key=None,
verbose=False):
"""Runs an plugin, you can either provide the run settings as parameters to this function (for local testing)
or via environment variables (this is how the pod communicates with plugins)."""
if verbose:
for name, val in [("pod_full_address", pod_full_address), ("plugin_run_id", plugin_run_id),
("database_key", database_key), ("owner_key", owner_key)]:
print(f"{name}={val}")
print()
client = PodClient(url=pod_full_address, database_key=database_key, owner_key=owner_key)
register_base_classes(client)
run_plugin_from_run_id(plugin_run_id, client)
```
%% Cell type:code id: tags:
``` python
# hide
_run_plugin(pod_full_address=DEFAULT_POD_ADDRESS, plugin_run_id=run.id,
database_key=client.database_key, owner_key=client.owner_key)
```
%% Output
running
%% Cell type:markdown id: tags:
## CLI
%% Cell type:code id: tags:
``` python
# export
# hide
def _parse_env(env):
try:
pod_full_address = env.get(POD_FULL_ADDRESS_ENV, DEFAULT_POD_ADDRESS)
plugin_run_id = str(env[RUN_UID_ENV])
pod_service_payload = json.loads(env[POD_SERVICE_PAYLOAD_ENV])
database_key = pod_service_payload[DATABASE_KEY_ENV]
owner_key = pod_service_payload[OWNER_KEY_ENV]
return pod_full_address, plugin_run_id, pod_service_payload, database_key, owner_key
except KeyError as e:
raise Exception('Missing parameter: {}'.format(e)) from None
```
%% Cell type:code id: tags:
``` python
# export
from fastscript import *
import os
@call_parse
def run_plugin(pod_full_address:Param("The pod full address", str)=None,
plugin_run_id:Param("Run id of the plugin to be executed", str)=None,
database_key:Param("Database key of the pod", str)=None,
owner_key:Param("Owner key of the pod", str)=None):
env = os.environ
params = [pod_full_address, plugin_run_id, database_key, owner_key]
if all([p is None for p in params]):
print("Reading `run_plugin()` parameters from environment variables")
pod_full_address, plugin_run_id, pod_service_payload, database_key, owner_key = _parse_env(env)
else:
print("Used arguments passed to `run_plugin()` (ignoring environment)")
if (None in params):
raise ValueError(f"Defined some params to run indexer, but not all. Missing \
{[p for p in params if p is None]}")
_run_plugin(pod_full_address=pod_full_address, plugin_run_id=plugin_run_id,
database_key=database_key, owner_key=owner_key, verbose=True)
```
%% Cell type:code id: tags:
``` python
!run_plugin --pod_full_address=$DEFAULT_POD_ADDRESS --plugin_run_id=$run.id --owner_key=$client.owner_key \
--database_key=$client.database_key
```
%% Output
Used arguments passed to `run_plugin()` (ignoring environment)
pod_full_address=http://localhost:3030
plugin_run_id=61274342a226fe9ecb5d0efeb50ebc64
database_key=2275799687202554321586156394256447011413538728384161840804653842
owner_key=7877526860202989541720539875881419199482049862866439964933736900
running
%% Cell type:markdown id: tags:
## Running a Plugin by providing environment variables -
%% Cell type:code id: tags:
``` python
# hide
# # export
# def generate_test_env(client, indexer_run):
# payload = json.dumps({DATABASE_KEY_ENV: client.database_key, OWNER_KEY_ENV: client.owner_key})
# return {POD_FULL_ADDRESS_ENV: DEFAULT_POD_ADDRESS,
# RUN_UID_ENV: indexer_run.id,
# POD_SERVICE_PAYLOAD_ENV: payload}
```
%% Cell type:code id: tags:
``` python
# hide
# run_plugin(env=generate_test_env(client, run))
```
%% Cell type:markdown id: tags:
# Export -
%% Cell type:code id: tags:
``` python
# hide
from nbdev.export import *
notebook2script()
```
%% Output
Converted basic.ipynb.
Converted data.photo.ipynb.
Converted importers.Importer.ipynb.
Converted importers.util.ipynb.
Converted index.ipynb.
Converted indexers.indexer.ipynb.
Converted itembase.ipynb.
Converted plugin.pluginbase.ipynb.
Converted pod.client.ipynb.
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
# default_exp pod.client
%load_ext autoreload
%autoreload 2
```
%% Cell type:markdown id: tags:
# Pod Client
%% Cell type:code id: tags:
``` python
# export
from pymemri.data.itembase import Edge, ItemBase
from pymemri.data.basic import *
from pymemri.data.schema import *
from pymemri.data.itembase import Edge, ItemBase, Item
from pymemri.data.photo import Photo
from pymemri.imports import *
from hashlib import sha256
```
%% Cell type:code id: tags:
``` python
# export
DEFAULT_POD_ADDRESS = "http://localhost:3030"
POD_VERSION = "v3"
```
%% Cell type:code id: tags:
``` python
# export
class PodClient:
def __init__(self, url=DEFAULT_POD_ADDRESS, version=POD_VERSION, database_key=None, owner_key=None):
self.url = url
self.version = POD_VERSION
self.test_connection(verbose=False)
self.database_key=database_key if database_key is not None else self.generate_random_key()
self.owner_key=owner_key if owner_key is not None else self.generate_random_key()
self.base_url = f"{url}/{version}/{self.owner_key}"
self.auth_json = {"type":"ClientAuth", "databaseKey": self.database_key}
self.registered_classes=dict()
@staticmethod
def generate_random_key():
return "".join([str(random.randint(0, 9)) for i in range(64)])
def test_connection(self, verbose=True):
try:
res = requests.get(self.url)
if verbose: print("Succesfully connected to pod")
return True
except requests.exceptions.RequestException as e:
print("Could no connect to backend")
return False
def create(self, node):
if isinstance(node, Photo) and not self.create_photo_file(node): return False
try:
properties = self.get_properties_json(node)
properties = {k:v for k, v in properties.items() if v != []}
body = {"auth": self.auth_json, "payload":properties}
result = requests.post(f"{self.base_url}/create_item", json=body)
if result.status_code != 200:
print(result, result.content)
return False
else:
id = result.json()
node.id = id
ItemBase.add_to_db(node)
return True
except requests.exceptions.RequestException as e:
print(e)
return False
def add_to_schema(self, node):
self.registered_classes[node.__class__.__name__] = type(node)
attributes = self.get_properties_json(node)
for k, v in attributes.items():
if not isinstance(v, list) and k != "type":
if isinstance(v, str):
value_type = "Text"
elif isinstance(v, int):
value_type = "Integer"
payload = {"type": "ItemPropertySchema", "itemType": attributes["type"],
"propertyName": k, "valueType": value_type}
body = {"auth": self.auth_json, "payload": payload }
try:
result = requests.post(f"{self.base_url}/create_item", json=body)
if result.status_code != 200:
print(result, result.content)
return False
else:
id = result.json()
node.id = id
ItemBase.add_to_db(node)
except requests.exceptions.RequestException as e:
print(e)
return False
return True
def create_photo_file(self, photo):
file = photo.file[0]
self.create(file)
return self._upload_image(photo.data)
def _upload_image(self, arr):
return self.upload_file(arr.tobytes())
def upload_file(self, file):
# TODO: currently this only works for numpy images
try:
sha = sha256(file).hexdigest()
result = requests.post(f"{self.base_url}/upload_file/{self.database_key}/{sha}", data=file)
if result.status_code != 200:
print(result, result.content)
return False
else:
return True
except requests.exceptions.RequestException as e:
print(e)
return False
def get_file(self, sha):
# TODO: currently this only works for numpy images
try:
body= {"auth": self.auth_json,
"payload": {"sha256": sha}}
result = requests.post(f"{self.base_url}/get_file", json=body)
if result.status_code != 200:
print(result, result.content)
return None
else:
return result.content
except requests.exceptions.RequestException as e:
print(e)
return None
def get_photo(self, id, size=640):
photo = self.get(id)
self._load_photo_data(photo, size=size)
return photo
def _load_photo_data(self, photo, size=None):
if len(photo.file) > 0 and photo.data is None:
file = self.get_file(photo.file[0].sha256)
if file is None:
print(f"Could not load data of {photo} attached file item does not have data in pod")
return
data = np.frombuffer(file, dtype=np.uint8)
c = photo.channels
shape = (photo.height,photo.width, c) if c is not None and c > 1 else (photo.height, photo.width)
data = data.reshape(shape)
if size is not None: data = resize(data, size)
photo.data = data
return
print(f"could not load data of {photo}, no file attached")
def create_if_external_id_not_exists(self, node):
if not self.external_id_exists(node):
self.create(node)
def external_id_exists(self, node):
if node.externalId is None: return False
existing = self.search({"externalId": node.externalId})
return len(existing) > 0
def create_edges(self, edges):
"""Create edges between nodes, edges should be of format [{"_type": "friend", "_source": 1, "_target": 2}]"""
create_edges = []
for e in edges:
src, target = e.source.id, e.target.id
if src is None or target is None:
print(f"Could not create edge {e} missing source or target id")
return False
data = {"_source": src, "_target": target, "_type": e._type}
if e.label is not None: data[LABEL] = e.label
if e.sequence is not None: data[SEQUENCE] = e.sequence
if e.reverse:
data2 = copy(data)
data2["_source"] = target
data2["_target"] = src
data2["_type"] = "~" + data2["_type"]
create_edges.append(data2)
create_edges.append(data)
return self.bulk_action(create_items=[], update_items=[],create_edges=create_edges)
def delete_items(self, items):
ids = [i.id for i in items]
return self.bulk_action(delete_items=ids)
def delete_all(self):
items = self.get_all_items()
self.delete_items(items)
def bulk_action(self, create_items=None, update_items=None, create_edges=None, delete_items=None):
create_items = create_items if create_items is not None else []
update_items = update_items if update_items is not None else []
create_edges = create_edges if create_edges is not None else []
delete_items = delete_items if delete_items is not None else []
edges_data = {"databaseKey": self.database_key, "payload": {
"createItems": create_items, "updateItems": update_items,
"createEdges": create_edges, "deleteItems": delete_items}}
try:
result = requests.post(f"{self.base_url}/bulk_action",
json=edges_data)
if result.status_code != 200:
if "UNIQUE constraint failed" in str(result.content):
print(result.status_code, "Edge already exists")
else:
print(result, result.content)
return False
else:
return True
except requests.exceptions.RequestException as e:
print(e)
return False
def create_edge(self, edge):
payload = {"_source": edge.source.id, "_target": edge.target.id, "_name": edge._type}
body = {"auth": self.auth_json,
"payload": payload}
try:
result = requests.post(f"{self.base_url}/create_edge", json=body)
if result.status_code != 200:
print(result, result.content)
return False
else:
return True
except requests.exceptions.RequestException as e:
print(e)
return False
return self.create_edges([edge])
def get(self, id, expanded=True):
if not expanded:
res = self._get_item_with_properties(id)
else:
res = self._get_item_expanded(id)
if res is None:
return None
elif res.deleted == True:
print(f"Item with id {id} does not exist anymore")
return None
else:
return res
def get_all_items(self):
raise NotImplementedError()
try:
body = { "databaseKey": self.database_key, "payload":None}
result = requests.post(f"{self.base_url}/get_all_items", json=body)
if result.status_code != 200:
print(result, result.content)
return None
else:
json = result.json()
res = [self.item_from_json(x) for x in json]
return self.filter_deleted(res)
except requests.exceptions.RequestException as e:
print(e)
return None
def filter_deleted(self, items):
return [i for i in items if not i.deleted == True]
def _get_item_expanded(self, id):
item = self.get(id, expanded=False)
edges = self.get_edges(id)
for e in edges:
item.add_edge(e["name"], e["item"])
return item
# body = {"payload": [id],
# "databaseKey": self.database_key}
# try:
# result = requests.post(f"{self.base_url}/get_items_with_edges",
# json=body)
# if result.status_code != 200:
# print(result, result.content)
# return None
# else:
# json = result.json()[0]
# res = self.item_from_json(json)
# return res
# except requests.exceptions.RequestException as e:
# print(e)
# return None
def get_edges(self, id):
body = {"payload": {"item": str(id),
"direction": "Outgoing",
"expandItems": True},
"auth": self.auth_json}
try:
result = requests.post(f"{self.base_url}/get_edges", json=body)
if result.status_code != 200:
print(result, result.content)
return None
else:
json = result.json()
for d in json:
d["item"] = self.item_from_json(d["item"])
# res = self.item_from_json(json[0])
return json
except requests.exceptions.RequestException as e:
print(e)
return None
def _get_item_with_properties(self, id):
try:
body = {"auth": self.auth_json,
"payload": str(id)}
result = requests.post(f"{self.base_url}/get_item", json=body)
if result.status_code != 200:
print(result, result.content)
return None
else:
json = result.json()
if json == []:
return None
else:
res = self.item_from_json(json[0])
return res
except requests.exceptions.RequestException as e:
print(e)
return None
def get_properties_json(self, node, dates=True):
DATE_KEYS = ['dateCreated', 'dateModified', 'dateServerModified']
res = dict()
private = getattr(node, "private", [])
for k, v in node.__dict__.items():
if k[:1] != '_' and k != "private" and k not in private and not (isinstance(v, list)) \
and v is not None and (not (dates == False and k in DATE_KEYS)):
res[k] = v
res["type"] = self._get_schema_type(node)
return res
@staticmethod
def _get_schema_type(node):
for cls in node.__class__.mro():
# if cls.__module__ == "pymemri.data.schema" and cls.__name__ != "ItemBase":
if cls.__name__ != "ItemBase":
return cls.__name__
raise ValueError
def update_item(self, node):
data = self.get_properties_json(node, dates=False)
if "type" in data:
del data["type"]
if "deleted" in data:
del data["deleted"]
id = data["id"]
body = {"payload": data,
"auth": self.auth_json}
try:
result = requests.post(f"{self.base_url}/update_item",
json=body)
if result.status_code != 200:
print(result, result.content)
except requests.exceptions.RequestException as e:
print(e)
def search(self, fields_data):
body = {"payload": fields_data,
"auth": self.auth_json}
try:
result = requests.post(f"{self.base_url}/search", json=body)
json = result.json()
res = [self.item_from_json(item) for item in json]
return self.filter_deleted(res)
except requests.exceptions.RequestException as e:
return None
def search_last_added(self, type=None, with_prop=None, with_val=None):
query = {"_limit": 1, "_sortOrder": "Desc"}
if type is not None:
query["type"] = type
if with_prop is not None:
query[f"{with_prop}=="] = with_val
return client.search(query)[0]
def item_from_json(self, json):
indexer_class = json.get("indexerClass", None)
constructor = get_constructor(json["type"], indexer_class, extra=self.registered_classes)
plugin_class = json.get("pluginClass", None)
plugin_package = json.get("pluginPackage", None)
constructor = get_constructor(json["type"], plugin_class, plugin_package=plugin_package,
extra=self.registered_classes)
new_item = constructor.from_json(json)
existing = ItemBase.global_db.get(new_item.id)
# TODO: cleanup
if existing is not None:
if not existing.is_expanded() and new_item.is_expanded():
for edge_name in new_item.get_all_edge_names():
edges = new_item.get_edges(edge_name)
for e in edges:
e.source = existing
existing.__setattr__(edge_name, edges)
for prop_name in new_item.get_property_names():
existing.__setattr__(prop_name, new_item.__getattribute__(prop_name))
return existing
else:
return new_item
def get_properties(self, expanded):
properties = copy(expanded)
if ALL_EDGES in properties: del properties[ALL_EDGES]
return properties
def run_importer(self, id, servicePayload):
body = dict()
body["databaseKey"] = servicePayload["databaseKey"]
body["payload"] = {"id": id, "servicePayload": servicePayload}
print(body)
try:
res = requests.post(f"{self.base_url}/run_importer", json=body)
if res.status_code != 200:
print(f"Failed to start importer on {url}:\n{res.status_code}: {res.text}")
else:
print("Starting importer")
except requests.exceptions.RequestException as e:
print("Error with calling importer {e}")
```
%% Cell type:markdown id: tags:
Pymemri communicates with the pod via the `PodClient`. The PodClient requires you to provide a [database key](https://gitlab.memri.io/memri/pod/-/blob/dev/docs/HTTP_API.md#user-content-api-authentication-credentials) and an [owner key](https://gitlab.memri.io/memri/pod/-/blob/dev/docs/HTTP_API.md#user-content-api-authentication-credentials). During development, you don't have to worry about these keys, you can just omit the keys when initializing the `PodClient`, which creates a new user by defining random keys. *Note that this will create a new database for your every time you create a PodClient, if you want to access the same database with multiple PodClients, you have to set the same keys* When you are using the app, setting the keys in the pod, and passing them when calling an integrator is handled for you by the app itself.
%% Cell type:code id: tags:
``` python
client = PodClient()
success = client.test_connection()
assert success
```
%% Output
Succesfully connected to pod
%% Cell type:markdown id: tags:
## Creating Items and Edges
%% Cell type:markdown id: tags:
Now that we have access to the pod, we can create items here and upload them to the pod. All items are defined in the schema of the pod. When Initializing an Item, always make sure to use the from_data classmethod to initialize.
%% Cell type:code id: tags:
``` python
email_item = EmailMessage.from_data(content="example content field")
email_item
```
%% Output
EmailMessage (#None)
%% Cell type:code id: tags:
``` python
succes = client.add_to_schema(email_item)
assert succes
```
%% Cell type:code id: tags:
``` python
email_item = EmailMessage.from_data(content="example content field")
client.create(email_item)
```
%% Output
True
%% Cell type:markdown id: tags:
<!-- [08:09:30 vasya@vn971 pod] curl -X POST -H "Content-Type: application/json" --insecure "http://localhost:3030/v3/03170a2e7597b7b7e3d84c05391d139a62b157e78786d8c082f29dcf4c111314/create_item" -d '{"databaseKey": "2DD29CA851E7B56E4697B0E1F08507293D761A05CE4D1B628663F411A8086D99", "payload": {"type": "ItemPropertySchema", "itemType": "Person", "propertyName": "age", "valueType": "integer"}}'
16[08:09:31 vasya@vn971 pod]
[08:09:32 vasya@vn971 pod]
[08:09:32 vasya@vn971 pod]
[08:09:33 vasya@vn971 pod] curl -X POST -H "Content-Type: application/json" --insecure "http://localhost:3030/v3/03170a2e7597b7b7e3d84c05391d139a62b157e78786d8c082f29dcf4c111314/create_item" -d '{"databaseKey": "2DD29CA851E7B56E4697B0E1F08507293D761A05CE4D1B628663F411A8086D99", "payload": {"type": "Person", "age": 20}}'
17[08:10:02 vasya@vn971 pod]
[08:10:04 vasya@vn971 pod]
[08:10:04 vasya@vn971 pod]
[08:10:05 vasya@vn971 pod] curl -X POST -H "Content-Type: application/json" --insecure "http://localhost:3030/v3/03170a2e7597b7b7e3d84c05391d139a62b157e78786d8c082f29dcf4c111314/create_item" -d '{"databaseKey": "2DD29CA851E7B56E4697B0E1F08507293D761A05CE4D1B628663F411A8086D99", "payload": {"type": "Person", "myAge": 20}}'
Property myAge not defined in Schema (attempted to use it for json value 20) -->
%% Cell type:markdown id: tags:
We can easily define our own types, and use them in the pod.
%% Cell type:code id: tags:
``` python
class Dog(Item):
def __init__(self, name, age, id=None, deleted=None):
super().__init__(id=id, deleted=deleted)
properties = Item.properties + ["name", "age"]
edges = Item.edges
def __init__(self, name=None, age=None, **kwargs):
super().__init__(**kwargs)
self.name = name
self.age = age
@classmethod
def from_json(cls, json):
id = json.get("id", None)
name = json.get("name", None)
age = json.get("age", None)
return cls(id=id,name=name,age=age)
```
%% Cell type:code id: tags:
``` python
dog = Dog("max", 2)
client.add_to_schema(dog);
dog2 = Dog("bob", 3)
client.create(dog2);
```
%% Cell type:code id: tags:
``` python
dog_from_db = client.get(dog2.id, expanded=False)
```
%% Cell type:markdown id: tags:
We can connect items using edges. Let's create another item, a person, and connect the email and the person.
%% Cell type:code id: tags:
``` python
person_item = Person.from_data(firstName="Alice", lastName="X")
succes = client.add_to_schema(person_item)
assert succes
```
%% Cell type:code id: tags:
``` python
person_item = Person.from_data(firstName="Alice", lastName="X")
item_succes = client.create(person_item)
edge = Edge(email_item, person_item, "sender")
edge_succes = client.create_edge(edge)
assert item_succes and edge_succes
```
%% Cell type:code id: tags:
``` python
client.get_edges(email_item.id)
```
%% Output
[{'item': Person (#a63b11ebe6996b2efb00b75c42ccd930), 'name': 'sender'}]
[{'item': Person (#806a88787321a9a81054e63d17ad2fbb), 'name': 'sender'}]
%% Cell type:markdown id: tags:
If we use the normal `client.get` (without `expanded=False`), we also get items directly connected to the Item.
%% Cell type:code id: tags:
``` python
email_from_db = client.get(email_item.id)
```
%% Cell type:code id: tags:
``` python
assert isinstance(email_from_db.sender[0], Person)
```
%% Cell type:markdown id: tags:
# Fetching and updating Items
%% Cell type:markdown id: tags:
## Normal Items
%% Cell type:markdown id: tags:
We can use the client to fetch data from the database. This is in particular useful for indexers, which often use data in the database as input for their models. The simplest form of querying the database is by querying items in the pod by their id (unique identifier).
%% Cell type:code id: tags:
``` python
person_item = Person.from_data(firstName="Alice")
assert client.create(person_item)
```
%% Cell type:code id: tags:
``` python
person_from_db = client.get(person_item.id, expanded=False)
assert person_from_db is not None
assert person_from_db == person_item
assert person_from_db.id is not None
```
%% Cell type:markdown id: tags:
Appart from creating, we might want to update existing items:
%% Cell type:code id: tags:
``` python
person_item.lastName = "Awesome"
client.update_item(person_item)
person_from_db = client.get(person_item.id, expanded=False)
assert person_from_db.lastName == "Awesome"
```
%% Cell type:markdown id: tags:
When we don't know the ids of the items we want to fetch, we can also search by property. We can use this for instance when we want to query all items from a particular type to perform some indexing on. We can get all `Person` Items from the db by:
%% Cell type:markdown id: tags:
## Search
%% Cell type:code id: tags:
``` python
person_item2 = Person.from_data(firstName="Bob")
client.create(person_item2);
all_people = client.search({"type": "Person"})
assert all([isinstance(p, Person) for p in all_people]) and len(all_people) > 0
all_people[:3]
```
%% Output
[Person (#a63b11ebe6996b2efb00b75c42ccd930),
Person (#16560d3dd6550a6aa58d88405fab177a),
Person (#fa5e4f210ec5b928e7eb263fcabf252f)]
[Person (#806a88787321a9a81054e63d17ad2fbb),
Person (#1f8e823a9b9ddaedf8dd37c9682beb0b),
Person (#cba8e451efdfe7be87dfeb10444ee670)]
%% Cell type:markdown id: tags:
## Search last added items
%% Cell type:code id: tags:
``` python
person_item2 = Person.from_data(firstName="Last Person")
client.create(person_item2);
```
%% Cell type:code id: tags:
``` python
assert client.search_last_added(type="Person").firstName == "Last Person"
```
%% Cell type:markdown id: tags:
In the near future, Pod will support searching by user defined properties as well. This will allow for the following. **warning, this is currently not supported**
%% Cell type:markdown id: tags:
```client.search_last_added(type="Person", with_prop="ImportedBy", with_val="EmailImporter")```
%% Cell type:markdown id: tags:
## Uploading & downloading files
%% Cell type:markdown id: tags:
### File API
%% Cell type:markdown id: tags:
To work with files, the `PodClient` has a file api. The file api works by posting a blob to the `upload_file` endpoint, and creating an Item with a property with the same sha256 as the sha used in the endpoint.
%% Cell type:code id: tags:
``` python
from pymemri.data.photo import *
```
%% Cell type:code id: tags:
``` python
x = np.random.randint(0, 255+1, size=(640, 640), dtype=np.uint8)
photo = IPhoto.from_np(x)
file = photo.file[0]
succes = client.create(file)
succes2 = client._upload_image(x)
assert succes
assert succes2
```
%% Cell type:code id: tags:
``` python
data = client.get_file(file.sha256)
arr = np.frombuffer(data, dtype=np.uint8)
assert (arr.reshape(640,640) == x).all()
```
%% Cell type:markdown id: tags:
### Photo API
%% Cell type:markdown id: tags:
For photos we do this automatically using `PodClient.create` on a Photo and `PodClient.get_photo`:
%% Cell type:code id: tags:
``` python
x = np.random.randint(0, 255+1, size=(640, 640), dtype=np.uint8)
photo = IPhoto.from_np(x)
```
%% Cell type:code id: tags:
``` python
succes = client.add_to_schema(IPhoto.from_np(x))
```
%% Cell type:code id: tags:
``` python
assert client.create(photo)
```
%% Output
creating
creating photo file
creating
Uploaded file
%% Cell type:code id: tags:
``` python
res = client.get_photo(photo.id, size=640)
```
%% Cell type:code id: tags:
``` python
res
```
%% Output
IPhoto (#e49db579f6427af57651fc3ca5efafcf)
IPhoto (#424cce3929a63f5849c40c181e0ce252)
%% Cell type:code id: tags:
``` python
assert (res.data == x).all()
```
%% Cell type:markdown id: tags:
# Check if an item exists
%% Cell type:markdown id: tags:
Not supported yet by the new PodAPI
%% Cell type:code id: tags:
``` python
# hide
# person_item = Person.from_data(firstName="Eve", externalId="gmail_1")
# person_item2 = Person.from_data(firstName="Eve2", externalId="gmail_1")
# client.create_if_external_id_not_exists(person_item)
# client.create_if_external_id_not_exists(person_item2)
# existing = client.search({"externalId": "gmail_1"})
# assert len(existing) == 1
# client.delete_all()
```
%% Cell type:markdown id: tags:
# Resetting the db
%% Cell type:code id: tags:
``` python
# client.delete_all()
```
%% Cell type:markdown id: tags:
# Export -
%% Cell type:code id: tags:
``` python
# hide
from nbdev.export import *
notebook2script()
```
%% Output
Converted basic.ipynb.
Converted data.photo.ipynb.
Converted importers.Importer.ipynb.
Converted importers.util.ipynb.
Converted index.ipynb.
Converted indexers.indexer.ipynb.
Converted itembase.ipynb.
Converted plugin.pluginbase.ipynb.
Converted pod.client.ipynb.
%% Cell type:code id: tags:
``` python
```
......
......@@ -24,21 +24,28 @@ index = {"read_file": "basic.ipynb",
"IndexerData": "indexers.indexer.ipynb",
"get_indexer_run_data": "indexers.indexer.ipynb",
"test_registration": "indexers.indexer.ipynb",
"POD_FULL_ADDRESS_ENV": "indexers.indexer.ipynb",
"RUN_UID_ENV": "indexers.indexer.ipynb",
"POD_SERVICE_PAYLOAD_ENV": "indexers.indexer.ipynb",
"DATABASE_KEY_ENV": "indexers.indexer.ipynb",
"OWNER_KEY_ENV": "indexers.indexer.ipynb",
"POD_FULL_ADDRESS_ENV": "plugin.pluginbase.ipynb",
"RUN_UID_ENV": "plugin.pluginbase.ipynb",
"POD_SERVICE_PAYLOAD_ENV": "plugin.pluginbase.ipynb",
"DATABASE_KEY_ENV": "plugin.pluginbase.ipynb",
"OWNER_KEY_ENV": "plugin.pluginbase.ipynb",
"run_importer": "indexers.indexer.ipynb",
"run_integrator_from_run_id": "indexers.indexer.ipynb",
"run_integrator": "indexers.indexer.ipynb",
"generate_test_env": "indexers.indexer.ipynb",
"ALL_EDGES": "itembase.ipynb",
"EDGE_KEY": "itembase.ipynb",
"DB": "itembase.ipynb",
"parse_base_item_json": "itembase.ipynb",
"Edge": "itembase.ipynb",
"ItemBase": "itembase.ipynb",
"Item": "itembase.ipynb",
"PluginBase": "plugin.pluginbase.ipynb",
"PluginRun": "plugin.pluginbase.ipynb",
"MyPlugin": "plugin.pluginbase.ipynb",
"run_plugin_from_run_id": "plugin.pluginbase.ipynb",
"register_base_classes": "plugin.pluginbase.ipynb",
"run_plugin": "plugin.pluginbase.ipynb",
"DEFAULT_POD_ADDRESS": "pod.client.ipynb",
"POD_VERSION": "pod.client.ipynb",
"PodClient": "pod.client.ipynb"}
......@@ -49,6 +56,7 @@ modules = ["data/basic.py",
"importers/util.py",
"indexers/indexer.py",
"data/itembase.py",
"plugin/pluginbase.py",
"pod/client.py"]
doc_url = "http://memri.docs.memri.io/pymemri/"
......
# AUTOGENERATED! DO NOT EDIT! File to edit: nbs/itembase.ipynb (unless otherwise specified).
__all__ = ['ALL_EDGES', 'DB', 'parse_base_item_json', 'Edge', 'ItemBase', 'Item']
__all__ = ['ALL_EDGES', 'EDGE_KEY', 'DB', 'parse_base_item_json', 'Edge', 'ItemBase', 'Item']
# Cell
# hide
......@@ -8,6 +8,7 @@ from ..imports import *
ALL_EDGES = "allEdges"
SOURCE, TARGET, TYPE, EDGE_TYPE, LABEL, SEQUENCE = "_source", "_target", "_type", "_type", "label", "sequence"
EDGE_KEY = "allEdges"
# Cell
# hide
......@@ -75,8 +76,8 @@ class Edge():
_type = json[EDGE_TYPE]
json_target = json[TARGET]
target_type = json_target["_type"]
indexer_class = json_target.get("indexerClass", None)
target_constructor = get_constructor(target_type, indexer_class)
plugin_class = json_target.get("pluginClass", None)
target_constructor = get_constructor(target_type, plugin_class)
target = target_constructor.from_json(json_target)
return cls(source=None, target=target, _type=_type)
......@@ -118,9 +119,6 @@ class ItemBase():
if existing is None and node.id is not None:
cls.global_db.add(node)
def replace_self(self, other):
self.__dict__.update(other.__dict__)
def __getattribute__(self, name):
val = object.__getattribute__(self, name)
if isinstance(val, Edge):
......@@ -181,18 +179,18 @@ class ItemBase():
if res is None: return False
return len(res) == 1
def expand(self, api):
"""Expands a node (retrieves all directly connected nodes ands adds to object)."""
self._expanded = True
res = api.get(self.id, expanded=True)
for edge_name in res.get_all_edge_names():
edges = res.get_edges(edge_name)
for e in edges:
e.source = self
self.__setattr__(edge_name, edges)
# def expand(self, api):
# """Expands a node (retrieves all directly connected nodes ands adds to object)."""
# self._expanded = True
# res = api.get(self.id, expanded=True)
# for edge_name in res.get_all_edge_names():
# edges = res.get_edges(edge_name)
# for e in edges:
# e.source = self
# self.__setattr__(edge_name, edges)
# self.edges = res.edges
return self
# # self.edges = res.edges
# return self
def __repr__(self):
id = self.id
......@@ -217,31 +215,100 @@ class ItemBase():
v.source = res
return res
def inherit_funcs(self, other):
"""This function can be used to inherit new functionalities from a subclass. This is a patch to solve
the fact that python does provide extensions of classes that are defined in a different file that are
dynamic enough for our use case."""
assert issubclass(other, self.__class__)
self.__class__ = other
# def inherit_funcs(self, other):
# """This function can be used to inherit new functionalities from a subclass. This is a patch to solve
# the fact that python does provide extensions of classes that are defined in a different file that are
# dynamic enough for our use case."""
# assert issubclass(other, self.__class__)
# self.__class__ = other
# Cell
class Item(ItemBase):
"""Item is the baseclass for all of the data classes."""
def __init__(self, dateAccessed=None, dateCreated=None, dateModified=None, deleted=None,
externalId=None, itemDescription=None, starred=None, version=None, id=None, importJson=None,
changelog=None, label=None, genericAttribute=None, measure=None, sharedWith=None):
super().__init__(id)
self.dateAccessed = dateAccessed
self.dateCreated = dateCreated
self.dateModified = dateModified
self.deleted = deleted
self.externalId = externalId
self.itemDescription = itemDescription
self.starred = starred
self.version = version
self.importJson = importJson
self.changelog = changelog if changelog is not None else []
self.label = label if label is not None else []
self.genericAttribute = genericAttribute if genericAttribute is not None else []
self.measure = measure if measure is not None else []
self.sharedWith = sharedWith if sharedWith is not None else []
\ No newline at end of file
properties = ["dateAccessed", "dateCreated", "dateModified", "deleted", "externalId", "itemDescription",
"starred", "version", "id", "importJson", "pluginClass"]
edges = ["changelog", "label", "genericAttribute", "measure", "sharedWith"]
def __init__(self, **kwargs):
# def __init__(self, dateAccessed=None, dateCreated=None, dateModified=None, deleted=None,
# externalId=None, itemDescription=None, starred=None, version=None, id=None, importJson=None,
# changelog=None, label=None, genericAttribute=None, measure=None, sharedWith=None):
super().__init__(kwargs.get("id"))
for p in self.properties:
if p == "id":
continue
setattr(self, p, kwargs.get(p, None))
for e in self.edges:
setattr(self, e, kwargs.get(e, []))
@classmethod
def parse_json(self, cls, json):
property_kwargs = Item.parse_properties(cls, json)
edge_kwargs = Item.parse_edges(cls, json)
return {**property_kwargs, **edge_kwargs}
@classmethod
def parse_properties(self, cls, json):
return {p: json.get(p, None) for p in cls.properties}
@classmethod
def parse_edges(self, cls, json):
all_edges = json.get(EDGE_KEY, None)
edge_kwargs = dict()
reverse_edges = [f"~{e}" for e in cls.edges]
if all_edges is not None:
for edge_json in all_edges:
edge = Edge.from_json(edge_json)
if edge.type in self.edges + reverse_edges:
edge_name = self.remove_prefix(edge.type)
if edge_name in edge_kwargs:
edge_kwargs[edge_name] += [edge]
else:
edge_kwargs[edge_name] = [edge]
return edge_kwargs
@classmethod
def remove_prefix(s, prefix="~"):
return s[1:] if s[0] == "`" else s
@classmethod
def from_json(cls, json):
kwargs = Item.parse_json(cls, json)
res = cls(**kwargs)
for e in res.get_all_edges(): e.source = res
return res
# if edge._type == "changelog" or edge._type == "~changelog":
# changelog.append(edge)
# elif edge._type == "label" or edge._type == "~label":
# label.append(edge)
# elif edge._type == "genericAttribute" or edge._type == "~genericAttribute":
# genericAttribute.append(edge)
# elif edge._type == "measure" or edge._type == "~measure":
# measure.append(edge)
# elif edge._type == "sharedWith" or edge._type == "~sharedWith":
# sharedWith.append(edge)
# elif edge._type == "indexerRun" or edge._type == "~indexerRun":
# indexerRun.append(edge)
# self.dateAccessed = dateAccessed
# self.dateCreated = dateCreated
# self.dateModified = dateModified
# self.deleted = deleted
# self.externalId = externalId
# self.itemDescription = itemDescription
# self.starred = starred
# self.version = version
# self.importJson = importJson
# self.changelog = changelog if changelog is not None else []
# self.label = label if label is not None else []
# self.genericAttribute = genericAttribute if genericAttribute is not None else []
# self.measure = measure if measure is not None else []
# self.sharedWith = sharedWith if sharedWith is not None else []
\ No newline at end of file
......@@ -10,19 +10,27 @@
from .itembase import ItemBase, Edge, Item
def get_constructor(_type, indexer_class=None, extra=None):
def get_constructor(_type, plugin_class=None, plugin_package=None, extra=None):
import pymemri.indexers as models
from pymemri.data.photo import IPhoto
from pymemri.indexers.indexer import IndexerBase
import pymemri.integrator_registry
if _type == "Indexer" and indexer_class is not None and hasattr(pymemri.integrator_registry, indexer_class):
return getattr(pymemri.integrator_registry, indexer_class)
if _type == "Indexer" and plugin_class is not None and hasattr(pymemri.integrator_registry, plugin_class):
return getattr(pymemri.integrator_registry, plugin_class)
if plugin_class is not None and plugin_package is not None:
try:
mod = __import__(plugin_package, fromlist=[plugin_class])
dynamic = {plugin_class: getattr(mod, plugin_class)}
except Exception as e:
print(f"Could not import {plugin_class}.{plugin_package}")
else:
dynamic = dict()
classes = z = {**globals(), **locals(), **extra}
classes = z = {**globals(), **locals(), **extra, **dynamic}
if _type in classes:
if _type == "Indexer":
constructor = classes[indexer_class]
constructor = classes[plugin_class]
else:
i_class = "I" + _type
if i_class in classes:
......@@ -2062,7 +2070,7 @@ class Indexer(Item):
def __init__(self, dateAccessed=None, dateCreated=None, dateModified=None, deleted=None,
externalId=None, itemDescription=None, starred=None, version=None, id=None, importJson=None,
name=None, repository=None, icon=None, query=None, bundleImage=None, runDestination=None,
indexerClass=None, changelog=None, label=None, genericAttribute=None, measure=None, sharedWith=None,
pluginClass=None, changelog=None, label=None, genericAttribute=None, measure=None, sharedWith=None,
indexerRun=None):
super().__init__(dateAccessed=dateAccessed, dateCreated=dateCreated, dateModified=dateModified,
deleted=deleted, externalId=externalId, itemDescription=itemDescription, starred=starred,
......@@ -2074,7 +2082,7 @@ class Indexer(Item):
self.query = query
self.bundleImage = bundleImage
self.runDestination = runDestination
self.indexerClass = indexerClass
self.pluginClass = pluginClass
self.indexerRun = indexerRun if indexerRun is not None else []
@classmethod
......@@ -2096,7 +2104,7 @@ class Indexer(Item):
query = json.get("query", None)
bundleImage = json.get("bundleImage", None)
runDestination = json.get("runDestination", None)
indexerClass = json.get("indexerClass", None)
pluginClass = json.get("pluginClass", None)
changelog = []
label = []
......@@ -2124,7 +2132,7 @@ class Indexer(Item):
res = cls(dateAccessed=dateAccessed, dateCreated=dateCreated, dateModified=dateModified,
deleted=deleted, externalId=externalId, itemDescription=itemDescription, starred=starred,
version=version, id=id, importJson=importJson, name=name, repository=repository, icon=icon,
query=query, bundleImage=bundleImage, runDestination=runDestination, indexerClass=indexerClass,
query=query, bundleImage=bundleImage, runDestination=runDestination, pluginClass=pluginClass,
changelog=changelog, label=label, genericAttribute=genericAttribute, measure=measure,
sharedWith=sharedWith, indexerRun=indexerRun)
for e in res.get_all_edges(): e.source = res
......
......@@ -19,9 +19,9 @@ OWNER_KEY_ENV = 'ownerKey'
class IndexerBase(Indexer):
def __init__(self, indexerClass=None, *args, **kwargs):
if indexerClass is None: indexerClass=self.__class__.__name__
super().__init__(indexerClass=indexerClass, *args, **kwargs)
def __init__(self, pluginClass=None, *args, **kwargs):
if pluginClass is None: pluginClass=self.__class__.__name__
super().__init__(pluginClass=pluginClass, *args, **kwargs)
def populate(self, client, items, edges=False):
new_items = [x for x in items if x.id is None]
......@@ -93,7 +93,6 @@ def run_integrator_from_run_id(run_id, client):
raise NotImplementedError(f"Cannot execute item of type {run}")
# Cell
def run_integrator(environ=None, pod_full_address=None, integrator_run_id=None, database_key=None, owner_key=None,
verbose=False):
"""Runs an integrator, you can either provide the run settings as parameters to this function (for local testing)
......
# AUTOGENERATED! DO NOT EDIT! File to edit: nbs/plugin.pluginbase.ipynb (unless otherwise specified).
__all__ = ['POD_FULL_ADDRESS_ENV', 'RUN_UID_ENV', 'POD_SERVICE_PAYLOAD_ENV', 'DATABASE_KEY_ENV', 'OWNER_KEY_ENV',
'PluginBase', 'PluginRun', 'MyPlugin', 'run_plugin_from_run_id', 'register_base_classes', 'run_plugin']
# Cell
from ..data.schema import *
from ..pod.client import PodClient, DEFAULT_POD_ADDRESS
from ..imports import *
from os import environ
# Cell
POD_FULL_ADDRESS_ENV = 'POD_FULL_ADDRESS'
RUN_UID_ENV = 'RUN_ID'
POD_SERVICE_PAYLOAD_ENV = 'POD_SERVICE_PAYLOAD'
DATABASE_KEY_ENV = 'databaseKey'
OWNER_KEY_ENV = 'ownerKey'
# Cell
# hide
class PluginBase(Item):
"""Base class for plugins"""
properties = Item.properties + ["name", "repository", "icon", "data_query", "bundleImage",
"runDestination", "pluginClass", "pluginPackage"]
edges = Item.edges + ["IndexerRun"]
def __init__(self, name=None, repository=None, icon=None, query=None, bundleImage=None, runDestination=None,
pluginClass=None, indexerRun=None, **kwargs):
if pluginClass is None: pluginClass=self.__class__.__name__
self.pluginPackage=None
super().__init__(**kwargs)
self.name = name
self.repository = repository
self.icon = icon
self.query = query
self.bundleImage = bundleImage
self.runDestination = runDestination
self.pluginClass = pluginClass
self.indexerRun = indexerRun if indexerRun is not None else []
def run(self):
raise NotImplementedError()
# Cell
# hide
class PluginRun(Item):
properties = Item.properties
edges = Item.edges + ["plugin"]
def __init__(self, plugin=None, **kwargs):
super().__init__(**kwargs)
self.plugin=plugin if plugin is not None else []
# Cell
# hide
class MyPlugin(PluginBase):
""""""
properties = PluginBase.properties
edges= PluginBase.edges
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.pluginPackage="pymemri.plugin.pluginbase"
def run(self, run, client):
print("running")
# Cell
# export
def run_plugin_from_run_id(run_id, client):
run = client.get(run_id)
plugins = run.plugin
if len(plugins) == 0:
raise ValueError(f"plugin run {run_id} has no plugin attached to it. Make sure there is a 'plugin' \
edge from your run to the actual plugin object.")
plugin = plugins[0]
plugin.run(run, client)
# Cell
# hide
def register_base_classes(client):
try:
assert client.add_to_schema(PluginRun())
except Exception as e:
raise ValueError("Could not add base schema")
# Cell
def _run_plugin(pod_full_address=None, plugin_run_id=None, database_key=None, owner_key=None,
verbose=False):
"""Runs an plugin, you can either provide the run settings as parameters to this function (for local testing)
or via environment variables (this is how the pod communicates with plugins)."""
if verbose:
for name, val in [("pod_full_address", pod_full_address), ("plugin_run_id", plugin_run_id),
("database_key", database_key), ("owner_key", owner_key)]:
print(f"{name}={val}")
print()
client = PodClient(url=pod_full_address, database_key=database_key, owner_key=owner_key)
register_base_classes(client)
run_plugin_from_run_id(plugin_run_id, client)
# Cell
# hide
def _parse_env(env):
try:
pod_full_address = env.get(POD_FULL_ADDRESS_ENV, DEFAULT_POD_ADDRESS)
plugin_run_id = str(env[RUN_UID_ENV])
pod_service_payload = json.loads(env[POD_SERVICE_PAYLOAD_ENV])
database_key = pod_service_payload[DATABASE_KEY_ENV]
owner_key = pod_service_payload[OWNER_KEY_ENV]
return pod_full_address, plugin_run_id, pod_service_payload, database_key, owner_key
except KeyError as e:
raise Exception('Missing parameter: {}'.format(e)) from None
# Cell
from fastscript import *
import os
@call_parse
def run_plugin(pod_full_address:Param("The pod full address", str)=None,
plugin_run_id:Param("Run id of the plugin to be executed", str)=None,
database_key:Param("Database key of the pod", str)=None,
owner_key:Param("Owner key of the pod", str)=None):
env = os.environ
params = [pod_full_address, plugin_run_id, database_key, owner_key]
if all([p is None for p in params]):
print("Reading `run_plugin()` parameters from environment variables")
pod_full_address, plugin_run_id, pod_service_payload, database_key, owner_key = _parse_env(env)
else:
print("Used arguments passed to `run_plugin()` (ignoring environment)")
if (None in params):
raise ValueError(f"Defined some params to run indexer, but not all. Missing \
{[p for p in params if p is None]}")
_run_plugin(pod_full_address=pod_full_address, plugin_run_id=plugin_run_id,
database_key=database_key, owner_key=owner_key, verbose=True)
\ No newline at end of file
......@@ -3,9 +3,9 @@
__all__ = ['DEFAULT_POD_ADDRESS', 'POD_VERSION', 'PodClient']
# Cell
from ..data.itembase import Edge, ItemBase
from ..data.basic import *
from ..data.schema import *
from ..data.itembase import Edge, ItemBase, Item
from ..data.photo import Photo
from ..imports import *
from hashlib import sha256
......@@ -391,8 +391,11 @@ class PodClient:
return client.search(query)[0]
def item_from_json(self, json):
indexer_class = json.get("indexerClass", None)
constructor = get_constructor(json["type"], indexer_class, extra=self.registered_classes)
plugin_class = json.get("pluginClass", None)
plugin_package = json.get("pluginPackage", None)
constructor = get_constructor(json["type"], plugin_class, plugin_package=plugin_package,
extra=self.registered_classes)
new_item = constructor.from_json(json)
existing = ItemBase.global_db.get(new_item.id)
# TODO: cleanup
......
......@@ -15,7 +15,7 @@ language = English
custom_sidebar = True
license = apache2
status = 2
console_scripts = run_plugin=pymemri.plugin.pluginbase:run_plugin
requirements = requests tqdm ipdb fastprogress fastscript opencv-python nbdev==1.1.5 matplotlib
nbs_path = nbs
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment