Commit 20b0e44a authored by Koen van der Veen's avatar Koen van der Veen
Browse files

bugfix formatter

Showing with 2 additions and 2 deletions
+2 -2
%% Cell type:code id:f380ac34 tags:
``` python
%load_ext autoreload
%autoreload 2
# default_exp template.formatter
```
%% Cell type:code id:a8a7e4c9 tags:
``` python
# export
# hide
from pathlib import Path
from typing import Dict, Union, List
from fastcore.script import call_parse, Param, store_true
import zipfile
from string import Template
import re
import giturlparse
import subprocess
import pymemri
import urllib
from pathlib import PurePosixPath
import requests
```
%% Cell type:code id:74de84a3 tags:
``` python
# hide
# test imports
from pprint import pprint
import os
```
%% Cell type:markdown id:77d47d51 tags:
# Creating plugins from a template
%% Cell type:code id:9898bd14 tags:
``` python
# export
# hide
TEMPLATE_URL = "https://gitlab.memri.io/memri/plugin-templates/-/archive/dev/plugin-templates-dev.zip"
TEMPLATE_BASE_PATH = "plugin-templates-dev"
```
%% Cell type:markdown id:475c3c1b tags:
Pymemri offers a range of plugin templates to set up testing, docker and CI for you. This way, you can focus on building your plugin, and be sure it works within the Memri ecosystem.
All plugins are hosted on our [GitLab](https://gitlab.memri.io/). In order to make your own plugin from a template,
1. Create an account on [GitLab](https://gitlab.memri.io/)
2. Create a _public_ [blank repository](https://gitlab.memri.io/projects/new#blank_project)
3. Clone the repository
4. run the `plugin_from_template` CLI inside the repository folder.
The CLI will infer most settings for you from your git account and repository name, only a template name and optional description are required.
```
plugin_from_template --template classifier_plugin --description "My Classifier Plugin"
```
To make sure all settings are correct, you can inspect `metadata.json`, which holds all information like your plugin name, and python package name.
-----------------
You can list the available templates with. All plugin templates are hosted [here](https://gitlab.memri.io/memri/plugin-templates).
```
plugin_from_template --list
```
The CLI has options to customize the plugin name, package name and other aspects of your plugin. For advanced use, run:
```
plugin_from_template --help
```
%% Cell type:markdown id:2c9883e9 tags:
## Utility functions -
%% Cell type:code id:2933c056 tags:
``` python
# export
# hide
# If the owner of the repository is one of these groups, the CLI requires an additional `user` argument
GITLAB_GROUPS = ["memri", "plugins"]
def get_remote_url():
path = Path(".")
url = subprocess.getoutput(f'git config --get remote.origin.url')
if not url:
raise ValueError(f"You can only run this from a initialized gitlab repository, and '{path}' is not an initialized git repository")
parsed = giturlparse.parse(url)
repo_url = parsed.url2https
if repo_url.endswith(".git"):
repo_url = repo_url[:-4]
return repo_url
def infer_git_info(url):
parsed = giturlparse.parse(url)
return parsed.owner, parsed.repo
```
%% Cell type:code id:16dfc0d2 tags:
``` python
# hide
remote_url = get_remote_url()
repo_owner, repo_name = infer_git_info(remote_url)
assert repo_owner == "memri"
assert repo_name == "pymemri"
```
%% Cell type:code id:42378645 tags:
``` python
# export
# hide
def download_file(url, fname=None):
cert_path = Path(pymemri.__file__).parent / "cert" / "gitlab.memri.io.pem"
r = requests.get(url, stream=True, verify=cert_path)
fname = url.rsplit('/', 1)[1] if fname is None else fname
with open(fname, 'wb') as fd:
for chunk in r.iter_content(chunk_size=128):
fd.write(chunk)
return fname
```
%% Cell type:code id:9a72f504 tags:
``` python
# export
# hide
def str_to_identifier(s, lower=True):
result = re.sub("\W|^(?=\d)", "_", s)
if lower:
result = result.lower()
return result
def str_to_gitlab_identifier(s, lower=True):
result = re.sub("\W|^(?=\d)", "-", s)
if lower:
result = result.lower()
return result
def reponame_to_displayname(reponame: str) -> str:
return re.sub("[-_]+", " ", reponame).title()
def download_plugin_template(
template_name: str, url: str = TEMPLATE_URL, base_path: str = TEMPLATE_BASE_PATH
):
base_path = Path(base_path) / template_name
zip_path = download_file(url)
with zipfile.ZipFile(zip_path, "r") as f:
result = {name: f.read(name) for name in f.namelist() if base_path in Path(name).parents}
if len(result) == 0:
raise ValueError(f"Could not find template: {template_name}")
result = {str(PurePosixPath(k).relative_to(PurePosixPath(base_path))): v.decode("utf-8") for k, v in result.items() if v}
Path(zip_path).unlink()
return result
def get_templates(url: str = TEMPLATE_URL) -> List[str]:
zip_path = download_file(url)
with zipfile.ZipFile(zip_path, "r") as f:
files_split = [name.split("/") for name in f.namelist()]
result = [fn[1] for fn in files_split if fn[-1] == '' and len(fn) == 3]
return result
```
%% Cell type:code id:becd3e18 tags:
``` python
# hide
assert len(get_templates())
```
%% Cell type:code id:f99827cd tags:
``` python
# hide
assert str_to_identifier("My Plugin") == "my_plugin"
template = download_plugin_template("classifier_plugin")
assert len(template)
pprint(list(template.keys()))
```
%% Output
['$package_name/model.py',
'$package_name/plugin.py',
'$package_name/schema.py',
'$package_name/utils.py',
'.gitignore',
'.gitlab-ci.yml',
'Dockerfile',
'LICENSE.txt',
'README.md',
'metadata.json',
'setup.cfg',
'setup.py',
'tests/test_plugin.py',
'tools/preload.py']
%% Cell type:code id:2707a14d tags:
``` python
# export
# hide
class TemplateFormatter:
def __init__(
self,
template_dict: Dict[str, str],
replace_dict: Dict[str, str],
tgt_path: Union[str, Path],
verbose: bool = False,
):
self.template_dict = template_dict
self.tgt_path = Path(tgt_path)
self.replace_dict = replace_dict
self.verbose = verbose
def format_content(self, content):
return Template(content).safe_substitute(self.replace_dict)
def format_path(self, path):
new_path = Template(path).safe_substitute(self.replace_dict)
return self.tgt_path / new_path
def format_file(self, filename, content):
new_path = self.format_path(filename)
new_content = self.format_content(content)
new_path.parent.mkdir(exist_ok=True, parents=True)
if self.verbose:
print(f"Formatting {filename} -> {new_path}")
with open(new_path, "w", encoding="utf-8") as f:
f.write(new_content)
def get_files(self):
return [self.format_path(filename) for filename in self.template_dict.keys()]
def format(self):
for filename, content in self.template_dict.items():
self.format_file(filename, content)
def print_filetree(self):
previous_prefix = None
res = "Created the following files"
for path in sorted([x.relative_to(self.tgt_path) for x in self.get_files()],
key=lambda item: 100 * str(item).count("/")):
n_slashes = str(path).count("/")
new_prefix = path.parent
if previous_prefix != new_prefix and str(new_prefix) != ".":
res = f"{res}\n├── {new_prefix}"
if n_slashes == 0:
res = f"{res}\n├── {path}"
elif n_slashes == 1:
res = f"{res}\n│ ├── {path.name}"
previous_prefix=new_prefix
print(res.strip() + "\n")
```
%% Cell type:markdown id:30c2cdd4 tags:
### Plugin Template CLI
With the `plugin_from_template` CLI, you can easily create a plugin where all CI pipelines, docker files, and test setups are configured for you. Multiple templates are available, to see the complete list use:
`plugin_from_template --list_templates`
%% Cell type:code id:9ef65986 tags:
``` python
# export
# hide
def get_template_replace_dict(
repo_url=None, user=None, plugin_name=None, package_name=None, description=None, install_requires=None, template_name=None
):
if repo_url is None:
repo_url = get_remote_url()
try:
repo_owner, repo_name = infer_git_info(repo_url)
except ValueError:
url_inf, owner_inf, name_inf = None, None, None
print("Could not infer git information from current directory, no initialized repository found.")
if repo_url is None:
repo_url = url_inf
if user is None:
if repo_owner in GITLAB_GROUPS:
user = None
else:
user = repo_owner
if plugin_name is None:
if repo_name is None:
plugin_name = None
else:
plugin_name = reponame_to_displayname(repo_name)
if package_name is None:
if repo_name is None:
package_name = None
else:
package_name = str_to_identifier(repo_name)
if install_requires is None:
install_requires = ""
else:
install_requires = "\n ".join([x.strip() for x in install_requires.split(",")
if x.strip() != "" and x.strip() not in ["pymemri", "pytest"]])
if template_name == "classifier_plugin":
assert package_name is not None
assert user is not None
repo_name_gitlab = str_to_gitlab_identifier(repo_name)
# hacky, dont change!
model_imports_ = f"""
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from """ +"pymemri.data_loader import load_huggingface_model_for_project"
from """ +"pymemri.data.loader import load_huggingface_model_for_project"
model_init = f"""
model = load_huggingface_model_for_project(project_path="{user}/{repo_name_gitlab}")
tokenizer = AutoTokenizer.from_pretrained("distilroberta-base", model_max_length=512)
self.pipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer, return_all_scores=True, truncation=True)
"""
model_predict = """
return self.pipeline(x)
"""
else:
model_imports_ = ""
model_init = "raise NotImplementedError()"
model_predict = "raise NotImplementedError()"
return {
"user": user,
"package_name": package_name,
"plugin_name": plugin_name,
"repo_name": repo_name,
"repo_url": repo_url,
"description": str(description),
"install_requires": install_requires,
"model_imports": model_imports_,
"model_init": model_init,
"model_predict": model_predict
}
```
%% Cell type:code id:074d003b tags:
``` python
# export
def _plugin_from_template(list_templates=False, user=None,repo_url=None,plugin_name=None,template_name="basic",
package_name=None,description=None,target_dir=".",verbose=True,install_requires=""):
if list_templates:
print("Available templates:")
for template in get_templates():
print(template)
return
template = download_plugin_template(template_name)
tgt_path = Path(target_dir)
replace_dict = get_template_replace_dict(
repo_url=repo_url,
user=user,
plugin_name=plugin_name,
package_name=package_name,
description=description,
install_requires=install_requires,
template_name=template_name
)
print(replace_dict)
formatter = TemplateFormatter(template, replace_dict, tgt_path)
formatter.format()
if verbose:
formatter.print_filetree()
print(f"Created `{replace_dict['plugin_name']}` using the {template_name} template.")
```
%% Cell type:code id:5c4df6b0 tags:
``` python
# export
@call_parse
def plugin_from_template(
list_templates: Param("List available plugin templates", store_true) = False,
user: Param("Your Gitlab username", str) = None,
repo_url: Param("The url of your empty Gitlab plugin repository", str) = None,
plugin_name: Param("Display name of your plugin", str) = None,
template_name: Param(
"Name of the template, use `list_templates` to see all available options"
) = "basic",
package_name: Param("Name of your plugin python package", str) = None,
description: Param("Description of your plugin", str) = None,
target_dir: Param("Directory to output the formatted template", str) = ".",
verbose: Param("Should print out dir", bool) = True,
install_requires: Param("Extra packages to install, provided as comma separated, e.g. pymemri,requests", str)=""
):
"""
CLI that downloads and formats a plugin template according to the arguments, and local git repository.
Args:
list_templates (Param, optional): If True, only list available templates. Defaults to False.
user (Param, optional): Your GitLab username. Defaults to None.
repo_url (Param, optional): The url of your gitlab plugin repository. Defaults to None.
plugin_name (Param, optional): The name of your plugin. Defaults to None.
template_name (Param, optional): The name of the template used. To list all options, see `list_templates`.
Defaults to "basic".
package_name (Param, optional): The name of the python package of your plugin. Inferred if left blank. Defaults to None.
description (Param, optional): An optional plugin description. Defaults to None.
target_dir (Param, optional): Directory where the plugin template is generated. Defaults to ".".
"""
_plugin_from_template(list_templates, user,repo_url,plugin_name,template_name,package_name,description,target_dir,
verbose,install_requires)
```
%% Cell type:code id:e7597558 tags:
``` python
!plugin_from_template --list_templates
```
%% Output
Available templates:
basic
classifier_plugin
%% Cell type:code id:3d54e18e tags:
``` python
# hide
import tempfile
template = download_plugin_template("classifier_plugin")
replace_dict = {
"user": "eelcovdw",
"repo_name": "sentiment-plugin",
"package_name": "sentiment_plugin",
"plugin_name": "Sentiment Plugin",
"description": "Predict sentiment on text messages",
"model_imports": """from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from pymemri.data.loader import load_huggingface_model_for_project""",
"model_init": """
model = load_huggingface_model_for_project(project_path="koenvanderveen/sentiment-plugin")
tokenizer = AutoTokenizer.from_pretrained("distilroberta-base", model_max_length=512)
self.pipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer, return_all_scores=True, truncation=True)
""",
"model_predict": """
return self.pipeline(x)
"""
}
# raise NotImplementedError()
with tempfile.TemporaryDirectory() as result_path:
result_path = Path(result_path)
formatter = TemplateFormatter(template, replace_dict, result_path)
formatter.format()
created_files = [f for f in result_path.rglob("*") if not os.path.isdir(f)]
contents = {}
for fn in created_files:
with open(fn, "r") as f:
contents[str(fn)] = f.read()
formatter.print_filetree()
# print("Created files:")
# pprint(created_files)
assert len(template) == len(created_files)
```
%% Output
/var/folders/q1/ryq93kwj055dlbpngxv1c7z40000gn/T/tmp5flm_y6x
Created the following files
├── .gitignore
├── .gitlab-ci.yml
├── Dockerfile
├── LICENSE.txt
├── README.md
├── metadata.json
├── setup.cfg
├── setup.py
├── sentiment_plugin
│ ├── model.py
│ ├── plugin.py
│ ├── schema.py
│ ├── utils.py
├── tests
│ ├── test_plugin.py
├── tools
│ ├── preload.py
%% Cell type:code id:8e008696 tags:
``` python
# hide
key = [k for k in contents.keys() if k.endswith("model.py")][0]
print(contents[key])
```
%% Cell type:code id:fc4993b2 tags:
``` python
# hide
from nbdev.export import *
notebook2script()
```
%% Output
Converted basic.ipynb.
Converted cvu.utils.ipynb.
Converted data.dataset.ipynb.
Converted data.loader.ipynb.
Converted data.oauth.ipynb.
Converted data.photo.ipynb.
Converted exporters.exporters.ipynb.
Converted gitlab_api.ipynb.
Converted index.ipynb.
Converted itembase.ipynb.
Converted plugin.authenticators.credentials.ipynb.
Converted plugin.authenticators.oauth.ipynb.
Converted plugin.listeners.ipynb.
Converted plugin.pluginbase.ipynb.
Converted plugin.states.ipynb.
Converted plugins.authenticators.password.ipynb.
Converted pod.api.ipynb.
Converted pod.client.ipynb.
Converted pod.db.ipynb.
Converted pod.utils.ipynb.
Converted template.config.ipynb.
Converted template.formatter.ipynb.
Converted test_schema.ipynb.
Converted test_utils.ipynb.
%% Cell type:code id:9d9cfe6f tags:
``` python
```
......
......@@ -203,7 +203,7 @@ def get_template_replace_dict(
# hacky, dont change!
model_imports_ = f"""
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from """ +"pymemri.data_loader import load_huggingface_model_for_project"
from """ +"pymemri.data.loader import load_huggingface_model_for_project"
model_init = f"""
model = load_huggingface_model_for_project(project_path="{user}/{repo_name_gitlab}")
tokenizer = AutoTokenizer.from_pretrained("distilroberta-base", model_max_length=512)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment