Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
TEST Account
pymemri
Commits
59df96e7
Commit
59df96e7
authored
3 years ago
by
Eelco van der Wel
💬
Browse files
Options
Download
Email Patches
Plain Diff
rewrite + fix export_dataset
parent
6cdd832f
Changes
4
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
nbs/exporters.exporters.ipynb
+729
-0
nbs/exporters.exporters.ipynb
pymemri/_nbdev.py
+5
-2
pymemri/_nbdev.py
pymemri/exporters/__init__.py
+3
-0
pymemri/exporters/__init__.py
pymemri/exporters/exporters.py
+52
-10
pymemri/exporters/exporters.py
with
789 additions
and
12 deletions
+789
-12
nbs/exporters.
query
.ipynb
→
nbs/exporters.
exporters
.ipynb
+
729
-
0
View file @
59df96e7
This diff is collapsed.
Click to expand it.
pymemri/_nbdev.py
+
5
-
2
View file @
59df96e7
...
...
@@ -18,7 +18,10 @@ index = {"read_file": "basic.ipynb",
"DEFAULT_ENCODING"
:
"data.photo.ipynb"
,
"show_images"
:
"data.photo.ipynb"
,
"Photo"
:
"data.photo.ipynb"
,
"Query"
:
"exporters.query.ipynb"
,
"Query"
:
"exporters.exporters.ipynb"
,
"Dataset"
:
"exporters.exporters.ipynb"
,
"filter_missing"
:
"exporters.exporters.ipynb"
,
"export_dataset"
:
"exporters.exporters.ipynb"
,
"ALL_EDGES"
:
"itembase.ipynb"
,
"Edge"
:
"itembase.ipynb"
,
"ItemBase"
:
"itembase.ipynb"
,
...
...
@@ -94,7 +97,7 @@ index = {"read_file": "basic.ipynb",
modules
=
[
"data/basic.py"
,
"cvu/utils.py"
,
"data/photo.py"
,
"exporters/
query
.py"
,
"exporters/
exporters
.py"
,
"data/itembase.py"
,
"plugin/authenticators/credentials.py"
,
"plugin/authenticators/oauth.py"
,
...
...
This diff is collapsed.
Click to expand it.
pymemri/exporters/__init__.py
+
3
-
0
View file @
59df96e7
from
.exporters
import
export_dataset
__all__
=
[
"export_dataset"
]
\ No newline at end of file
This diff is collapsed.
Click to expand it.
pymemri/exporters/
query
.py
→
pymemri/exporters/
exporters
.py
+
52
-
10
View file @
59df96e7
# AUTOGENERATED! DO NOT EDIT! File to edit: nbs/exporters.
query
.ipynb (unless otherwise specified).
# AUTOGENERATED! DO NOT EDIT! File to edit: nbs/exporters.
exporters
.ipynb (unless otherwise specified).
__all__
=
[
'Query'
]
__all__
=
[
'Query'
,
'Dataset'
,
'filter_missing'
,
'export_dataset'
]
# Cell
# hide
...
...
@@ -27,9 +27,12 @@ class Query:
items
=
items
.
copy
()
for
edge
in
edges
:
items_to_query
=
dict
()
ids_to_query
=
list
()
query_item_idx
=
list
()
for
i
in
range
(
len
(
items
)):
item
=
items
[
i
]
if
item
is
None
:
continue
# Replace item with target item. If the edge is empty, it has to be queried again.
try
:
if
edge
not
in
item
.
edges
:
...
...
@@ -37,17 +40,17 @@ class Query:
else
:
items
[
i
]
=
getattr
(
item
,
edge
)[
0
]
except
Exception
:
items_to_query
[
i
]
=
item
ids_to_query
.
append
(
item
.
id
)
query_item_idx
.
append
(
i
)
items
[
i
]
=
None
# TODO Pod can't currently get multiple items by ID, API call for each item is required for now.
for
i
,
item
in
items_to_query
.
items
():
new_items
=
client
.
search
({
"ids"
:
ids_to_query
})
for
i
,
new_item
in
zip
(
query_item_idx
,
new_items
):
try
:
result
=
client
.
get
(
item
.
id
)
items
[
i
]
=
getattr
(
result
,
edge
)[
0
]
items
[
i
]
=
getattr
(
new_item
,
edge
)[
0
]
except
Exception
:
items
[
i
]
=
None
return
items
def
get_property_values
(
...
...
@@ -80,4 +83,43 @@ class Query:
result
=
{
prop
:
self
.
get_property_values
(
client
,
prop
,
items
)
for
prop
in
self
.
properties
}
return
self
.
convert_dtype
(
result
,
dtype
)
\ No newline at end of file
return
self
.
convert_dtype
(
result
,
dtype
)
# Cell
class
Dataset
(
Item
):
"""
Temporary dataset schema, remove when MVP2 is done.
"""
properties
=
Item
.
properties
+
[
"queryStr"
]
edges
=
Item
.
edges
+
[
"item"
]
def
__init__
(
self
,
queryStr
:
str
=
None
,
item
:
list
=
None
,
**
kwargs
):
super
().
__init__
(
**
kwargs
)
self
.
queryStr
=
queryStr
self
.
item
:
list
=
item
if
item
is
not
None
else
[]
# Cell
def
filter_missing
(
dataset
:
dict
)
->
dict
:
missing_idx
=
set
()
for
column
in
dataset
.
values
():
missing_idx
.
update
([
i
for
i
,
val
in
enumerate
(
column
)
if
val
is
None
])
return
{
k
:
[
item
for
i
,
item
in
enumerate
(
v
)
if
i
not
in
missing_idx
]
for
k
,
v
in
dataset
.
items
()
}
def
export_dataset
(
client
:
PodClient
,
dataset
:
Dataset
,
content_fields
:
List
[
str
]
=
[
"content"
],
label_field
:
str
=
"label.value"
,
missing_values
:
bool
=
False
,
dtype
:
str
=
"dict"
,
):
items
=
dataset
.
item
query
=
Query
(
"id"
,
*
content_fields
,
label_field
)
result
=
query
.
execute
(
client
,
dataset
.
item
)
if
not
missing_values
:
result
=
filter_missing
(
result
)
return
query
.
convert_dtype
(
result
,
dtype
)
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment