multiple implementations for uploading and downloading models

5f6cd3eb · Koen van der Veen · 93eaadbc · 5f6cd3eb · 5f6cd3eb · 5f6cd3eb
Commit 5f6cd3eb authored 3 years ago by Koen van der Veen
Expand all Hide whitespace changes
Inline Side-by-side

Showing

with 2169 additions and 0 deletions
+2169 -0
--- a/huggingface hub.ipynb
+++ b/huggingface hub.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "id": "15eaf136",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import AutoModelForSequenceClassification, AutoTokenizer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "id": "ebb34ad5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import AutoModel"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "id": "03570a6d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'lm_head.decoder.weight', 'lm_head.bias']\n",
+      "- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = AutoModelForSequenceClassification.from_pretrained(\"distilroberta-base\", num_labels=10)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "79ec1f77",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Hyperparameters\n",
+    "model_name = \"distilroberta-base\"\n",
+    "batch_size = 32\n",
+    "learning_rate = 1e-3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "id": "3d4ccd98",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# # Load model\n",
+    "# model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=10)\n",
+    "\n",
+    "# # To increase training speed, we will freeze all layers except the classifier head.\n",
+    "# for param in model.base_model.parameters():\n",
+    "#     param.requires_grad = False"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "eee2c6b1",
+   "metadata": {},
+   "source": [
+    "## Huggingface"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "id": "0d6d1660",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_name = \"my-test-model\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "id": "36e5d3c6",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/opt/anaconda3/envs/finetune-test1/lib/python3.7/site-packages/huggingface_hub/hf_api.py:1004: FutureWarning: `create_repo` now takes `token` as an optional positional argument. Be sure to adapt your code!\n",
+      "  FutureWarning,\n",
+      "Cloning https://huggingface.co/koenvdv/my-test-model into local empty directory.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "ebc772323db949aca5fe63f1284a3ff6",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Download file pytorch_model.bin:   0%|          | 2.83k/313M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "89be04b0b154473c8a198c594631ed7a",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Clean file pytorch_model.bin:   0%|          | 1.00k/313M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "754ea647b61e4397a234fea08ac228b7",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Upload file pytorch_model.bin:   0%|          | 32.0k/313M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "To https://huggingface.co/koenvdv/my-test-model\n",
+      "   566c22b..bbd3b52  main -> main\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'https://huggingface.co/koenvdv/my-test-model/commit/bbd3b526dc553b2055e90e38bdec3d126e3375b3'"
+      ]
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.push_to_hub(model_name, use_temp_dir=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "id": "9f9356b6",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Parameter containing:\n",
+       "tensor([[-1.1055e-02,  7.6790e-03, -1.7858e-02,  ...,  4.5453e-02,\n",
+       "          1.3963e-03, -3.4204e-03],\n",
+       "        [-1.4697e-02,  3.0819e-02,  2.9549e-02,  ..., -2.0563e-02,\n",
+       "          6.0812e-05, -1.7535e-02],\n",
+       "        [-3.9068e-03,  4.4484e-03, -2.5533e-02,  ..., -2.3708e-02,\n",
+       "         -3.8119e-02, -3.1126e-02],\n",
+       "        ...,\n",
+       "        [ 2.5309e-02, -3.8438e-03,  1.3118e-03,  ...,  1.6762e-02,\n",
+       "         -2.8897e-04, -1.2778e-02],\n",
+       "        [ 4.5472e-03, -3.1136e-02, -3.6995e-03,  ..., -8.1584e-03,\n",
+       "         -3.2597e-02,  8.8213e-03],\n",
+       "        [ 9.7282e-03,  1.5389e-02, -3.4779e-03,  ...,  4.2545e-02,\n",
+       "         -1.3812e-02, -1.7629e-02]], requires_grad=True)"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "list(model.pooler.parameters())[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "f463062b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Some weights of the model checkpoint at koenvdv/my-test-model were not used when initializing RobertaModel: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.out_proj.weight']\n",
+      "- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "Some weights of RobertaModel were not initialized from the model checkpoint at koenvdv/my-test-model and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+     ]
+    }
+   ],
+   "source": [
+    "model2 = AutoModel.from_pretrained(\"koenvdv/my-test-model\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "id": "bc4d775b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Parameter containing:\n",
+       "tensor([[-0.0255, -0.0311, -0.0239,  ...,  0.0234, -0.0239,  0.0197],\n",
+       "        [-0.0102, -0.0106,  0.0066,  ...,  0.0140, -0.0143,  0.0446],\n",
+       "        [ 0.0193,  0.0129, -0.0208,  ..., -0.0049, -0.0234, -0.0031],\n",
+       "        ...,\n",
+       "        [ 0.0045,  0.0013, -0.0094,  ...,  0.0265,  0.0124,  0.0301],\n",
+       "        [ 0.0038,  0.0182, -0.0185,  ..., -0.0072,  0.0107, -0.0242],\n",
+       "        [ 0.0063, -0.0095,  0.0072,  ...,  0.0284, -0.0157,  0.0159]],\n",
+       "       requires_grad=True)"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "list(model2.pooler.parameters())[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "fcf5e957",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "3212640d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Tracking run with wandb version 0.12.11"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Run data is saved locally in <code>/Users/koen/workspace/finetuning-example/wandb/run-20220307_173251-1zodkxm9</code>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Syncing run <strong><a href=\"https://wandb.ai/memri/test-project/runs/1zodkxm9\" target=\"_blank\">playful-gorge-2</a></strong> to <a href=\"https://wandb.ai/memri/test-project\" target=\"_blank\">Weights & Biases</a> (<a href=\"https://wandb.me/run\" target=\"_blank\">docs</a>)<br/>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Waiting for W&B process to finish... <strong style=\"color:green\">(success).</strong>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\\r'), FloatProgress(value=1.0, max…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Synced <strong style=\"color:#cdcd00\">playful-gorge-2</strong>: <a href=\"https://wandb.ai/memri/test-project/runs/1zodkxm9\" target=\"_blank\">https://wandb.ai/memri/test-project/runs/1zodkxm9</a><br/>Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Find logs at: <code>./wandb/run-20220307_173251-1zodkxm9/logs</code>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Import\n",
+    "import wandb\n",
+    "# Save your model.\n",
+    "torch.save(model.state_dict(), 'model.pth')\n",
+    "# Save as artifact for version control.\n",
+    "run = wandb.init(project='test-project')\n",
+    "artifact = wandb.Artifact('model', type='model')\n",
+    "artifact.add_file('model.pth')\n",
+    "run.log_artifact(artifact)\n",
+    "run.finish()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e59354b4",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r\n",
+      "        _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|\r\n",
+      "        _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|\r\n",
+      "        _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|\r\n",
+      "        _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|\r\n",
+      "        _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|\r\n",
+      "\r\n",
+      "        To login, `huggingface_hub` now requires a token generated from https://huggingface.co/settings/token.\r\n",
+      "        (Deprecated, will be removed in v0.3.0) To login with username and password instead, interrupt with Ctrl+C.\r\n",
+      "        \r\n",
+      "Token: "
+     ]
+    }
+   ],
+   "source": [
+    "# !huggingface-cli login"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "75501228",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import transformers\n",
+    "import datasets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "id": "b6f6ad1c",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'dataset' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "\u001b[0;32m/var/folders/q1/ryq93kwj055dlbpngxv1c7z40000gn/T/ipykernel_14295/673354679.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      9\u001b[0m     \u001b[0mmodel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     10\u001b[0m     \u001b[0margs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtraining_args\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 11\u001b[0;31m     \u001b[0mtrain_dataset\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     12\u001b[0m )\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'dataset' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "training_args = transformers.TrainingArguments(\n",
+    "    \"twitter-emoji-trainer\",\n",
+    "    learning_rate=learning_rate,\n",
+    "    per_device_train_batch_size=batch_size,\n",
+    "    logging_steps=100,\n",
+    ")\n",
+    "\n",
+    "trainer = transformers.Trainer(\n",
+    "    model=model,\n",
+    "    args=training_args,\n",
+    "    train_dataset=dataset\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "10b545ba",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "AutoModelForSequenceClassification.from_pretrained()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "1ac0bc31",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<generator object Module.parameters at 0x7fe3c130ced0>"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.parameters()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "6f15a71d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "wandb.finish()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "1b1e8b3a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "env: WANDB_PROJECT=twitter_test_wandb\n",
+      "env: WANDB_LOG_MODEL=true\n"
+     ]
+    }
+   ],
+   "source": [
+    "%env WANDB_PROJECT=twitter_test_wandb\n",
+    "%env WANDB_LOG_MODEL=true"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "365d52bd",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "env: WANDB_LOG_MODEL=true\n"
+     ]
+    }
+   ],
+   "source": [
+    "%env WANDB_LOG_MODEL=true\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "510ea51e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import wandb\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "10a6dcf3",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "wandb.login()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5da87653",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with wandb.init(project=\"twitter_test_wandb\") as run:\n",
+    "    my_model_name = \"run-bert-base-high-lr:latest\"\n",
+    "    my_model_artifact = run.use_artifact(my_model_name)\n",
+    "\n",
+    "  # Download model weights to a folder and return the path\n",
+    "    model_dir = my_model_artifact.download()\n",
+    "\n",
+    "  # Load your Hugging Face model from that folder\n",
+    "  #  using the same model class\n",
+    "    model = AutoModelForSequenceClassification.from_pretrained(model_dir, num_labels=num_labels)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.11"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": true
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
+%% Cell type:code id:15eaf136 tags:
+
+``` python
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+```
+
+%% Cell type:code id:ebb34ad5 tags:
+
+``` python
+from transformers import AutoModel
+```
+
+%% Cell type:code id:03570a6d tags:
+
+``` python
+model = AutoModelForSequenceClassification.from_pretrained("distilroberta-base", num_labels=10)
+```
+
+%% Output
+
+    Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'lm_head.decoder.weight', 'lm_head.bias']
+    - This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
+    - This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
+    Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
+    You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+
+%% Cell type:code id:79ec1f77 tags:
+
+``` python
+# Hyperparameters
+model_name = "distilroberta-base"
+batch_size = 32
+learning_rate = 1e-3
+```
+
+%% Cell type:code id:3d4ccd98 tags:
+
+``` python
+# # Load model
+# model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=10)
+
+# # To increase training speed, we will freeze all layers except the classifier head.
+# for param in model.base_model.parameters():
+#     param.requires_grad = False
+```
+
+%% Cell type:markdown id:eee2c6b1 tags:
+
+## Huggingface
+
+%% Cell type:code id:0d6d1660 tags:
+
+``` python
+model_name = "my-test-model"
+```
+
+%% Cell type:code id:36e5d3c6 tags:
+
+``` python
+model.push_to_hub(model_name, use_temp_dir=True)
+```
+
+%% Output
+
+    /opt/anaconda3/envs/finetune-test1/lib/python3.7/site-packages/huggingface_hub/hf_api.py:1004: FutureWarning: `create_repo` now takes `token` as an optional positional argument. Be sure to adapt your code!
+      FutureWarning,
+    Cloning https://huggingface.co/koenvdv/my-test-model into local empty directory.
+
+
+
+
+    To https://huggingface.co/koenvdv/my-test-model
+       566c22b..bbd3b52  main -> main
+    
+
+    'https://huggingface.co/koenvdv/my-test-model/commit/bbd3b526dc553b2055e90e38bdec3d126e3375b3'
+
+%% Cell type:code id:9f9356b6 tags:
+
+``` python
+list(model.pooler.parameters())[0]
+```
+
+%% Output
+
+    Parameter containing:
+    tensor([[-1.1055e-02,  7.6790e-03, -1.7858e-02,  ...,  4.5453e-02,
+              1.3963e-03, -3.4204e-03],
+            [-1.4697e-02,  3.0819e-02,  2.9549e-02,  ..., -2.0563e-02,
+              6.0812e-05, -1.7535e-02],
+            [-3.9068e-03,  4.4484e-03, -2.5533e-02,  ..., -2.3708e-02,
+             -3.8119e-02, -3.1126e-02],
+            ...,
+            [ 2.5309e-02, -3.8438e-03,  1.3118e-03,  ...,  1.6762e-02,
+             -2.8897e-04, -1.2778e-02],
+            [ 4.5472e-03, -3.1136e-02, -3.6995e-03,  ..., -8.1584e-03,
+             -3.2597e-02,  8.8213e-03],
+            [ 9.7282e-03,  1.5389e-02, -3.4779e-03,  ...,  4.2545e-02,
+             -1.3812e-02, -1.7629e-02]], requires_grad=True)
+
+%% Cell type:code id:f463062b tags:
+
+``` python
+model2 = AutoModel.from_pretrained("koenvdv/my-test-model")
+```
+
+%% Output
+
+    Some weights of the model checkpoint at koenvdv/my-test-model were not used when initializing RobertaModel: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
+    - This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
+    - This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
+    Some weights of RobertaModel were not initialized from the model checkpoint at koenvdv/my-test-model and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
+    You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+
+%% Cell type:code id:bc4d775b tags:
+
+``` python
+list(model2.pooler.parameters())[0]
+```
+
+%% Output
+
+    Parameter containing:
+    tensor([[-0.0255, -0.0311, -0.0239,  ...,  0.0234, -0.0239,  0.0197],
+            [-0.0102, -0.0106,  0.0066,  ...,  0.0140, -0.0143,  0.0446],
+            [ 0.0193,  0.0129, -0.0208,  ..., -0.0049, -0.0234, -0.0031],
+            ...,
+            [ 0.0045,  0.0013, -0.0094,  ...,  0.0265,  0.0124,  0.0301],
+            [ 0.0038,  0.0182, -0.0185,  ..., -0.0072,  0.0107, -0.0242],
+            [ 0.0063, -0.0095,  0.0072,  ...,  0.0284, -0.0157,  0.0159]],
+           requires_grad=True)
+
+%% Cell type:code id:fcf5e957 tags:
+
+``` python
+import torch
+```
+
+%% Cell type:code id:3212640d tags:
+
+``` python
+# Import
+import wandb
+# Save your model.
+torch.save(model.state_dict(), 'model.pth')
+# Save as artifact for version control.
+run = wandb.init(project='test-project')
+artifact = wandb.Artifact('model', type='model')
+artifact.add_file('model.pth')
+run.log_artifact(artifact)
+run.finish()
+```
+
+%% Output
+
+
+
+
+    
+
+
+
+
+
+%% Cell type:code id:e59354b4 tags:
+
+``` python
+# !huggingface-cli login
+```
+
+%% Output
+
+    
+            _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
+            _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
+            _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
+            _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
+            _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|
+    
+            To login, `huggingface_hub` now requires a token generated from https://huggingface.co/settings/token.
+            (Deprecated, will be removed in v0.3.0) To login with username and password instead, interrupt with Ctrl+C.
+    
+    Token:
+
+%% Cell type:code id:75501228 tags:
+
+``` python
+import transformers
+import datasets
+```
+
+%% Cell type:code id:b6f6ad1c tags:
+
+``` python
+training_args = transformers.TrainingArguments(
+    "twitter-emoji-trainer",
+    learning_rate=learning_rate,
+    per_device_train_batch_size=batch_size,
+    logging_steps=100,
+)
+
+trainer = transformers.Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=dataset
+)
+```
+
+%% Output
+
+    ---------------------------------------------------------------------------
+    NameError                                 Traceback (most recent call last)
+    /var/folders/q1/ryq93kwj055dlbpngxv1c7z40000gn/T/ipykernel_14295/673354679.py in <module>
+          9     model=model,
+         10     args=training_args,
+    ---> 11     train_dataset=dataset
+         12 )
+    NameError: name 'dataset' is not defined
+
+%% Cell type:code id:10b545ba tags:
+
+``` python
+AutoModelForSequenceClassification.from_pretrained()
+```
+
+%% Cell type:code id:1ac0bc31 tags:
+
+``` python
+model.parameters()
+```
+
+%% Output
+
+    <generator object Module.parameters at 0x7fe3c130ced0>
+
+%% Cell type:code id:6f15a71d tags:
+
+``` python
+wandb.finish()
+```
+
+%% Cell type:code id:1b1e8b3a tags:
+
+``` python
+%env WANDB_PROJECT=twitter_test_wandb
+%env WANDB_LOG_MODEL=true
+```
+
+%% Output
+
+    env: WANDB_PROJECT=twitter_test_wandb
+    env: WANDB_LOG_MODEL=true
+
+%% Cell type:code id:365d52bd tags:
+
+``` python
+%env WANDB_LOG_MODEL=true
+```
+
+%% Output
+
+    env: WANDB_LOG_MODEL=true
+
+%% Cell type:code id:510ea51e tags:
+
+``` python
+import wandb
+```
+
+%% Cell type:code id:10a6dcf3 tags:
+
+``` python
+wandb.login()
+```
+
+%% Output
+
+    True
+
+%% Cell type:code id:5da87653 tags:
+
+``` python
+with wandb.init(project="twitter_test_wandb") as run:
+    my_model_name = "run-bert-base-high-lr:latest"
+    my_model_artifact = run.use_artifact(my_model_name)
+
+  # Download model weights to a folder and return the path
+    model_dir = my_model_artifact.download()
+
+  # Load your Hugging Face model from that folder
+  #  using the same model class
+    model = AutoModelForSequenceClassification.from_pretrained(model_dir, num_labels=num_labels)
+```
--- a/package registry.ipynb
+++ b/package registry.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "3c859c6f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import AutoModelForSequenceClassification, AutoTokenizer\n",
+    "from transformers import AutoModel\n",
+    "from fastprogress.fastprogress import progress_bar\n",
+    "from pathlib import Path\n",
+    "import transformers\n",
+    "import torch\n",
+    "import wandb\n",
+    "import requests\n",
+    "import os, sys\n",
+    "from getpass import getpass\n",
+    "from datetime import datetime\n",
+    "from git import Repo\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "e50d8f8b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'roberta.pooler.dense.bias', 'lm_head.dense.weight']\n",
+      "- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = AutoModelForSequenceClassification.from_pretrained(\"distilroberta-base\", num_labels=10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "1301a03b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "MEMRI_PATH = Path.home() / \".memri\"\n",
+    "ACCESS_TOKEN_PATH = Path.home() / \".memri/access_token/access_token.txt\"\n",
+    "GITLAB_API_BASE_URL = \"https://gitlab.memri.io/api/v4\"\n",
+    "DEFAULT_PLUGIN_MODEL_PACKAGE_NAME = \"plugin-model-package\"\n",
+    "DEFAULT_PYTORCH_MODEL_NAME = \"pytorch_model.bin\"\n",
+    "DEFAULT_HUGGINFACE_CONFIG_NAME = \"config.json\"\n",
+    "DEFAULT_PACKAGE_VERSION = \"0.0.1\"\n",
+    "\n",
+    "TIME_FORMAT_GITLAB = '%Y-%m-%dT%H:%M:%S.%fZ'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "c9ac7d8e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def find_git_repo():\n",
+    "    path = \".\"\n",
+    "    for i in range(10):\n",
+    "        try:\n",
+    "            repo = Repo(f\"{path + ('.' * i)}/\")\n",
+    "        except:\n",
+    "            pass\n",
+    "        else:\n",
+    "            break\n",
+    "    if i == 9:\n",
+    "        raise ValueError(f\"could not fine git repo in {os.path.abspath('')}\")\n",
+    "    \n",
+    "    repo_name = repo.remotes.origin.url.split('.git')[0].split('/')[-1]\n",
+    "    return repo_name"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "d776ea68",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_registry_api_key():\n",
+    "    ACCESS_TOKEN_PATH.parent.mkdir(parents=True, exist_ok=True)\n",
+    "    if ACCESS_TOKEN_PATH.is_file():\n",
+    "        with open(ACCESS_TOKEN_PATH, \"r\") as f:\n",
+    "            return f.read()\n",
+    "    else:\n",
+    "        print(f\"\"\"\n",
+    "        The first time you are uploading a model you need to create an access_token\n",
+    "        at https://gitlab.memri.io/-/profile/personal_access_tokens?name=Model+Access+token&scopes=api\n",
+    "        Click at the blue button with 'Create personal access token'\"\n",
+    "        \"\"\")\n",
+    "        access_token = getpass(\"Then copy your personal access token from 'Your new personal access token', and paste here: \")\n",
+    "        with open(ACCESS_TOKEN_PATH, \"w\") as f:\n",
+    "            f.write(access_token)\n",
+    "        return access_token"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "0bbaadcb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# pb_ = progress_bar(range(1000))\n",
+    "# pb = iter(pb_)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "49861048",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# for i in range(1000):\n",
+    "#     next(pb)\n",
+    "#     pb_.update_bar(1000)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "d210854e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class upload_in_chunks(object):\n",
+    "    def __init__(self, filename, chunksize=1 << 13):\n",
+    "        self.filename = filename\n",
+    "        self.chunksize = chunksize\n",
+    "        self.totalsize = os.path.getsize(filename)\n",
+    "        self.readsofar = 0\n",
+    "\n",
+    "    def __iter__(self):\n",
+    "        n = 1000\n",
+    "        pb = progress_bar(range(n))\n",
+    "        pb_iter = iter(pb)\n",
+    "        i = 1\n",
+    "        delta = 1 / n\n",
+    "\n",
+    "        with open(self.filename, 'rb') as file:\n",
+    "            while True:\n",
+    "                data = file.read(self.chunksize)\n",
+    "                if not data:\n",
+    "                    sys.stderr.write(\"\\n\")\n",
+    "                    break\n",
+    "                self.readsofar += len(data)\n",
+    "                percent = self.readsofar * 1e2 / self.totalsize\n",
+    "                while (percent / 100) > i * delta:\n",
+    "                    next(pb_iter, None)\n",
+    "                    i += 1\n",
+    "                yield data\n",
+    "        pb.update_bar(n)\n",
+    "\n",
+    "    def __len__(self):\n",
+    "        return self.totalsize\n",
+    "    \n",
+    "class IterableToFileAdapter(object):\n",
+    "    def __init__(self, iterable):\n",
+    "        self.iterator = iter(iterable)\n",
+    "        self.length = len(iterable)\n",
+    "\n",
+    "    def read(self, size=-1): # TBD: add buffer for `len(data) > size` case\n",
+    "        return next(self.iterator, b'')\n",
+    "\n",
+    "    def __len__(self):\n",
+    "        return self.length"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "369a1a56",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def write_file_to_package_registry(project_id, file_path, api_key, version=DEFAULT_PACKAGE_VERSION):\n",
+    "    file_path = Path(file_path)\n",
+    "    file_name = file_path.name\n",
+    "    \n",
+    "    url = f\"{GITLAB_API_BASE_URL}/projects/{project_id}/packages/generic/{DEFAULT_PLUGIN_MODEL_PACKAGE_NAME}/{version}/{file_name}\"\n",
+    "    print(f\"uploading {file_path}\")\n",
+    "    it = upload_in_chunks(file_path, 10)\n",
+    "    res = requests.put(url=url, data=IterableToFileAdapter(it), \n",
+    "                     headers={\"PRIVATE-TOKEN\": api_key})\n",
+    "    \n",
+    "    if res.status_code not in [200, 201]:\n",
+    "        print(f\"Failed to upload {file_path}: {res.content}\")\n",
+    "    else:\n",
+    "        print(f\"Succesfully uploaded {file_path}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "6d0ad383",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# write_file_to_package_registry(155, \"config.json\", get_registry_api_key())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "4a574c05",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def project_id_from_name(project_name, api_key):\n",
+    "    res = requests.get(f\"{GITLAB_API_BASE_URL}/projects\",\n",
+    "                       headers={\"PRIVATE-TOKEN\": api_key},\n",
+    "                       params={\n",
+    "                           \"owned\": True,\n",
+    "                           \"search\": \"plugin-test2\"\n",
+    "                       })\n",
+    "    res =  [x.get(\"id\") for x in res.json()]\n",
+    "    if len(res) == 0:\n",
+    "        raise ValueError(f\"No plugin found with name {project_name}\")\n",
+    "    else:\n",
+    "        return res[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "5919704a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def write_huggingface_model_to_package_registry(project_name, model):\n",
+    "    api_key = get_registry_api_key()\n",
+    "    project_id = project_id_from_name(project_name, api_key)\n",
+    "    local_save_dir = Path(\"/tmp\")\n",
+    "    torch.save(model.state_dict(), local_save_dir / DEFAULT_PYTORCH_MODEL_NAME)\n",
+    "    model.config.to_json_file(local_save_dir / DEFAULT_HUGGINFACE_CONFIG_NAME)\n",
+    "    \n",
+    "    for f in [DEFAULT_HUGGINFACE_CONFIG_NAME, DEFAULT_PYTORCH_MODEL_NAME]:\n",
+    "        file_path = local_save_dir / f\n",
+    "        print(f\"writing {f} to package registry of {project_name} with project id {project_id}\")\n",
+    "        write_file_to_package_registry(project_id, file_path, api_key)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "0fab27e3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def write_model_to_package_registry(model, project_name=None):\n",
+    "    project_name = project_name if project_name is not None else find_git_repo()\n",
+    "    if isinstance(model, transformers.PreTrainedModel):\n",
+    "        write_huggingface_model_to_package_registry(project_name, model)\n",
+    "    else:\n",
+    "        raise ValueError(f\"Model type not supported: {type(model)}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "3046cff9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# project_name=\"plugin-test2\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "d1681ea5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# write_model_to_package_registry(model)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "c8338e66",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def download_package_file(filename, project_name=None, out_dir=None, package_name=DEFAULT_PLUGIN_MODEL_PACKAGE_NAME,\n",
+    "                          package_version=DEFAULT_PACKAGE_VERSION):\n",
+    "    if project_name is None:\n",
+    "        try:\n",
+    "            project_name = find_git_repo()\n",
+    "        except Exception as e:\n",
+    "            raise ValueError(\"no project name provided, but could also not find a git repo to infer project name\") from None\n",
+    "    out_dir = out_dir if out_dir is not None else MEMRI_PATH / \"projects\" / project_name\n",
+    "    out_dir.mkdir(parents=True, exist_ok=True)\n",
+    "    api_key = get_registry_api_key()\n",
+    "    project_id = project_id_from_name(project_name, api_key)\n",
+    "    print(f\"downloading {filename} from project with id {project_id} from package {package_name}\")\n",
+    "\n",
+    "    res = requests.get(\n",
+    "        url=f\"{GITLAB_API_BASE_URL}/projects/{project_id}/packages/generic/{package_name}/{package_version}/{filename}\"\n",
+    "    )\n",
+    "    res.raise_for_status() \n",
+    "    with open(out_dir / filename, \"wb\") as f:\n",
+    "        print(f\"writing {filename} to {out_dir}\")\n",
+    "        f.write(res.content)\n",
+    "    return out_dir"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "4f72de38",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def download_huggingface_model_for_project(files=None):\n",
+    "    if files is None:\n",
+    "        files = [\"config.json\", \"pytorch_model.bin\"]\n",
+    "    for f in files:\n",
+    "        out_dir = download_package_file(f)\n",
+    "    return out_dir"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "0ca16cf8",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "downloading config.json from project with id 155 from package plugin-model-package\n",
+      "writing config.json to /Users/koen/.memri/projects/finetuning-example\n",
+      "downloading pytorch_model.bin from project with id 155 from package plugin-model-package\n",
+      "writing pytorch_model.bin to /Users/koen/.memri/projects/finetuning-example\n"
+     ]
+    }
+   ],
+   "source": [
+    "out_dir = download_huggingface_model_for_project()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 91,
+   "id": "38fd199e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "writing config.json to /Users/koen/.memri/projects/finetuning-example\n"
+     ]
+    }
+   ],
+   "source": [
+    "filename = \"config.json\"\n",
+    "download_package_file(filename)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a17d5d3b",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2d0312f1",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "49c70d67",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cd6f2a0c",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "76303275",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# def get_package_id_from_package_name(project_id, package_name):\n",
+    "#     res = requests.get(url=f\"{BASE_URL}//projects/{project_id}/packages\",\n",
+    "#                    params={})\n",
+    "#     res = [x.get(\"id\") for x in res.json()]\n",
+    "#     if len(res) == 0:\n",
+    "#         raise ValueError(f\"No package found with name {package_name}\")\n",
+    "#     else:\n",
+    "#         return res[0]\n",
+    "\n",
+    "# def get_package_file_id(project_id, package_id, filename):\n",
+    "#     res = requests.get(f\"{GITLAB_API_BASE_URL}/projects/{project_id}/packages/{package_id}/package_files\")\n",
+    "#     if res.status_code in [200, 201]:\n",
+    "#         res = [x for x in res.json() if x.get(\"file_name\", None) == filename]\n",
+    "#         res = sorted(res, key=lambda x: datetime.strptime(x.get(\"created_at\", None), TIME_FORMAT_GITLAB))\n",
+    "#         if len(res)==0:\n",
+    "#             raise ValueError(f\"Could not find package files for package with id {package_id}\")\n",
+    "#         else:\n",
+    "#             return res[0][\"id\"]\n",
+    "#     else:\n",
+    "#         raise ValueError(f\"Could not find package files for package with id {package_id}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 186,
+   "id": "f21a3952",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "file_id = get_package_file_id(project_id, package_id, filename)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 190,
+   "id": "3660827b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'plugin_model_package'"
+      ]
+     },
+     "execution_count": 190,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "package_name"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 187,
+   "id": "2c66004b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "package_id=155"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.11"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": true
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
+%% Cell type:code id:3c859c6f tags:
+
+``` python
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+from transformers import AutoModel
+from fastprogress.fastprogress import progress_bar
+from pathlib import Path
+import transformers
+import torch
+import wandb
+import requests
+import os, sys
+from getpass import getpass
+from datetime import datetime
+from git import Repo
+```
+
+%% Cell type:code id:e50d8f8b tags:
+
+``` python
+model = AutoModelForSequenceClassification.from_pretrained("distilroberta-base", num_labels=10)
+```
+
+%% Output
+
+    Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'roberta.pooler.dense.bias', 'lm_head.dense.weight']
+    - This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
+    - This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
+    Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias']
+    You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+
+%% Cell type:code id:1301a03b tags:
+
+``` python
+MEMRI_PATH = Path.home() / ".memri"
+ACCESS_TOKEN_PATH = Path.home() / ".memri/access_token/access_token.txt"
+GITLAB_API_BASE_URL = "https://gitlab.memri.io/api/v4"
+DEFAULT_PLUGIN_MODEL_PACKAGE_NAME = "plugin-model-package"
+DEFAULT_PYTORCH_MODEL_NAME = "pytorch_model.bin"
+DEFAULT_HUGGINFACE_CONFIG_NAME = "config.json"
+DEFAULT_PACKAGE_VERSION = "0.0.1"
+
+TIME_FORMAT_GITLAB = '%Y-%m-%dT%H:%M:%S.%fZ'
+```
+
+%% Cell type:code id:c9ac7d8e tags:
+
+``` python
+def find_git_repo():
+    path = "."
+    for i in range(10):
+        try:
+            repo = Repo(f"{path + ('.' * i)}/")
+        except:
+            pass
+        else:
+            break
+    if i == 9:
+        raise ValueError(f"could not fine git repo in {os.path.abspath('')}")
+
+    repo_name = repo.remotes.origin.url.split('.git')[0].split('/')[-1]
+    return repo_name
+```
+
+%% Cell type:code id:d776ea68 tags:
+
+``` python
+def get_registry_api_key():
+    ACCESS_TOKEN_PATH.parent.mkdir(parents=True, exist_ok=True)
+    if ACCESS_TOKEN_PATH.is_file():
+        with open(ACCESS_TOKEN_PATH, "r") as f:
+            return f.read()
+    else:
+        print(f"""
+        The first time you are uploading a model you need to create an access_token
+        at https://gitlab.memri.io/-/profile/personal_access_tokens?name=Model+Access+token&scopes=api
+        Click at the blue button with 'Create personal access token'"
+        """)
+        access_token = getpass("Then copy your personal access token from 'Your new personal access token', and paste here: ")
+        with open(ACCESS_TOKEN_PATH, "w") as f:
+            f.write(access_token)
+        return access_token
+```
+
+%% Cell type:code id:0bbaadcb tags:
+
+``` python
+# pb_ = progress_bar(range(1000))
+# pb = iter(pb_)
+```
+
+%% Cell type:code id:49861048 tags:
+
+``` python
+# for i in range(1000):
+#     next(pb)
+#     pb_.update_bar(1000)
+```
+
+%% Cell type:code id:d210854e tags:
+
+``` python
+class upload_in_chunks(object):
+    def __init__(self, filename, chunksize=1 << 13):
+        self.filename = filename
+        self.chunksize = chunksize
+        self.totalsize = os.path.getsize(filename)
+        self.readsofar = 0
+
+    def __iter__(self):
+        n = 1000
+        pb = progress_bar(range(n))
+        pb_iter = iter(pb)
+        i = 1
+        delta = 1 / n
+
+        with open(self.filename, 'rb') as file:
+            while True:
+                data = file.read(self.chunksize)
+                if not data:
+                    sys.stderr.write("\n")
+                    break
+                self.readsofar += len(data)
+                percent = self.readsofar * 1e2 / self.totalsize
+                while (percent / 100) > i * delta:
+                    next(pb_iter, None)
+                    i += 1
+                yield data
+        pb.update_bar(n)
+
+    def __len__(self):
+        return self.totalsize
+
+class IterableToFileAdapter(object):
+    def __init__(self, iterable):
+        self.iterator = iter(iterable)
+        self.length = len(iterable)
+
+    def read(self, size=-1): # TBD: add buffer for `len(data) > size` case
+        return next(self.iterator, b'')
+
+    def __len__(self):
+        return self.length
+```
+
+%% Cell type:code id:369a1a56 tags:
+
+``` python
+def write_file_to_package_registry(project_id, file_path, api_key, version=DEFAULT_PACKAGE_VERSION):
+    file_path = Path(file_path)
+    file_name = file_path.name
+
+    url = f"{GITLAB_API_BASE_URL}/projects/{project_id}/packages/generic/{DEFAULT_PLUGIN_MODEL_PACKAGE_NAME}/{version}/{file_name}"
+    print(f"uploading {file_path}")
+    it = upload_in_chunks(file_path, 10)
+    res = requests.put(url=url, data=IterableToFileAdapter(it),
+                     headers={"PRIVATE-TOKEN": api_key})
+
+    if res.status_code not in [200, 201]:
+        print(f"Failed to upload {file_path}: {res.content}")
+    else:
+        print(f"Succesfully uploaded {file_path}")
+```
+
+%% Cell type:code id:6d0ad383 tags:
+
+``` python
+# write_file_to_package_registry(155, "config.json", get_registry_api_key())
+```
+
+%% Cell type:code id:4a574c05 tags:
+
+``` python
+def project_id_from_name(project_name, api_key):
+    res = requests.get(f"{GITLAB_API_BASE_URL}/projects",
+                       headers={"PRIVATE-TOKEN": api_key},
+                       params={
+                           "owned": True,
+                           "search": "plugin-test2"
+                       })
+    res =  [x.get("id") for x in res.json()]
+    if len(res) == 0:
+        raise ValueError(f"No plugin found with name {project_name}")
+    else:
+        return res[0]
+```
+
+%% Cell type:code id:5919704a tags:
+
+``` python
+def write_huggingface_model_to_package_registry(project_name, model):
+    api_key = get_registry_api_key()
+    project_id = project_id_from_name(project_name, api_key)
+    local_save_dir = Path("/tmp")
+    torch.save(model.state_dict(), local_save_dir / DEFAULT_PYTORCH_MODEL_NAME)
+    model.config.to_json_file(local_save_dir / DEFAULT_HUGGINFACE_CONFIG_NAME)
+
+    for f in [DEFAULT_HUGGINFACE_CONFIG_NAME, DEFAULT_PYTORCH_MODEL_NAME]:
+        file_path = local_save_dir / f
+        print(f"writing {f} to package registry of {project_name} with project id {project_id}")
+        write_file_to_package_registry(project_id, file_path, api_key)
+```
+
+%% Cell type:code id:0fab27e3 tags:
+
+``` python
+def write_model_to_package_registry(model, project_name=None):
+    project_name = project_name if project_name is not None else find_git_repo()
+    if isinstance(model, transformers.PreTrainedModel):
+        write_huggingface_model_to_package_registry(project_name, model)
+    else:
+        raise ValueError(f"Model type not supported: {type(model)}")
+```
+
+%% Cell type:code id:3046cff9 tags:
+
+``` python
+# project_name="plugin-test2"
+```
+
+%% Cell type:code id:d1681ea5 tags:
+
+``` python
+# write_model_to_package_registry(model)
+```
+
+%% Cell type:code id:c8338e66 tags:
+
+``` python
+def download_package_file(filename, project_name=None, out_dir=None, package_name=DEFAULT_PLUGIN_MODEL_PACKAGE_NAME,
+                          package_version=DEFAULT_PACKAGE_VERSION):
+    if project_name is None:
+        try:
+            project_name = find_git_repo()
+        except Exception as e:
+            raise ValueError("no project name provided, but could also not find a git repo to infer project name") from None
+    out_dir = out_dir if out_dir is not None else MEMRI_PATH / "projects" / project_name
+    out_dir.mkdir(parents=True, exist_ok=True)
+    api_key = get_registry_api_key()
+    project_id = project_id_from_name(project_name, api_key)
+    print(f"downloading {filename} from project with id {project_id} from package {package_name}")
+
+    res = requests.get(
+        url=f"{GITLAB_API_BASE_URL}/projects/{project_id}/packages/generic/{package_name}/{package_version}/{filename}"
+    )
+    res.raise_for_status()
+    with open(out_dir / filename, "wb") as f:
+        print(f"writing {filename} to {out_dir}")
+        f.write(res.content)
+    return out_dir
+```
+
+%% Cell type:code id:4f72de38 tags:
+
+``` python
+def download_huggingface_model_for_project(files=None):
+    if files is None:
+        files = ["config.json", "pytorch_model.bin"]
+    for f in files:
+        out_dir = download_package_file(f)
+    return out_dir
+```
+
+%% Cell type:code id:0ca16cf8 tags:
+
+``` python
+out_dir = download_huggingface_model_for_project()
+```
+
+%% Output
+
+    downloading config.json from project with id 155 from package plugin-model-package
+    writing config.json to /Users/koen/.memri/projects/finetuning-example
+    downloading pytorch_model.bin from project with id 155 from package plugin-model-package
+    writing pytorch_model.bin to /Users/koen/.memri/projects/finetuning-example
+
+%% Cell type:code id:38fd199e tags:
+
+``` python
+filename = "config.json"
+download_package_file(filename)
+```
+
+%% Output
+
+    writing config.json to /Users/koen/.memri/projects/finetuning-example
+
+%% Cell type:code id:a17d5d3b tags:
+
+``` python
+```
+
+%% Cell type:code id:2d0312f1 tags:
+
+``` python
+```
+
+%% Cell type:code id:49c70d67 tags:
+
+``` python
+```
+
+%% Cell type:code id:cd6f2a0c tags:
+
+``` python
+```
+
+%% Cell type:code id:76303275 tags:
+
+``` python
+# def get_package_id_from_package_name(project_id, package_name):
+#     res = requests.get(url=f"{BASE_URL}//projects/{project_id}/packages",
+#                    params={})
+#     res = [x.get("id") for x in res.json()]
+#     if len(res) == 0:
+#         raise ValueError(f"No package found with name {package_name}")
+#     else:
+#         return res[0]
+
+# def get_package_file_id(project_id, package_id, filename):
+#     res = requests.get(f"{GITLAB_API_BASE_URL}/projects/{project_id}/packages/{package_id}/package_files")
+#     if res.status_code in [200, 201]:
+#         res = [x for x in res.json() if x.get("file_name", None) == filename]
+#         res = sorted(res, key=lambda x: datetime.strptime(x.get("created_at", None), TIME_FORMAT_GITLAB))
+#         if len(res)==0:
+#             raise ValueError(f"Could not find package files for package with id {package_id}")
+#         else:
+#             return res[0]["id"]
+#     else:
+#         raise ValueError(f"Could not find package files for package with id {package_id}")
+```
+
+%% Cell type:code id:f21a3952 tags:
+
+``` python
+file_id = get_package_file_id(project_id, package_id, filename)
+```
+
+%% Cell type:code id:3660827b tags:
+
+``` python
+package_name
+```
+
+%% Output
+
+    'plugin_model_package'
+
+%% Cell type:code id:2c66004b tags:
+
+``` python
+package_id=155
+```
--- a/train dataset.ipynb
+++ b/train dataset.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "5e643a4a",
+   "metadata": {},
+   "source": [
+    "# Fine-tuning a text classifier on your Dataset\n",
+    "\n",
+    "Memri data apps often contain machine learning models, which can be trained on the labeled data from the Memri app. In this guide, we will load a labeled dataset from the Pod, and use it to fine-tune a [RoBERTa text classifier](https://medium.com/analytics-vidhya/evolving-with-bert-introduction-to-roberta-5174ec0e7c82).  The main purpose behind this notebook is to run it on your own dataset by labeling your data, and filling in the corresponding dataset name and pod key in the following steps. The output of this tutorial is a model, which can subsequently be used in your data app. For this tutorial, we created a fake dataset, which can be reproduced using [this notebook](https://gitlab.memri.io/memri/finetuning-example).  To make the training speed a bit faster, we use a smaller version of this model: [distilRoBERTa](https://huggingface.co/distilroberta-base). The code for this tutorial is available as a [notebook](https://gitlab.memri.io/memri/finetuning-example/-/blob/main/finetune_model.ipynb)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "e2cc7d46",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pymemri.data.itembase import Item, Edge\n",
+    "from pymemri.data.schema import (\n",
+    "    Message,\n",
+    "    Dataset,\n",
+    "    CategoricalLabel,\n",
+    "    DatasetEntry,\n",
+    ")\n",
+    "from pymemri.pod.client import PodClient\n",
+    "import datasets\n",
+    "import random\n",
+    "\n",
+    "from IPython.display import clear_output"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "199c0724",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Reusing dataset tweet_eval (/Users/koen/.cache/huggingface/datasets/tweet_eval/emoji/1.1.0/12aee5282b8784f3e95459466db4cdf45c6bf49719c25cdb0743d71ed0410343)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "63c55bec64674f85990959d2c6bf88ae",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/3 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# from the dataset page: https://huggingface.co/datasets/tweet_eval#source-data\n",
+    "emoji = '❤😍😂💕🔥😊😎✨💙😘📷🇺🇸☀💜😉💯😁🎄📸😜'\n",
+    "emoji_map = {i: e for i, e in enumerate(emoji)}\n",
+    "\n",
+    "emoji_dataset = datasets.load_dataset(\"tweet_eval\", \"emoji\")\n",
+    "emoji_dataset = emoji_dataset[\"train\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "18035047",
+   "metadata": {},
+   "source": [
+    "Next, we download and inspect the `tweet-eval-emoji` dataset. All entries in the dataset can be found through the `Dataset.entry` edge."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f3214b11",
+   "metadata": {},
+   "source": [
+    "The first step in training your model is exporting the dataset to a format we can use in Python. The `Dataset` class in pymemri can format your dataset to various datatypes with the `Dataset.to` method. In this notebook, we will use Pandas.\n",
+    "\n",
+    "The `columns` argument of `Dataset.to` defines which features are used. A column is either a property of the items in the Dataset (for example, the `content` of a `Message`), or a property of a connected item (the `value` of a `Label` connected to the `Message`)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "c8fafe5c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# data = dataset.to(\"pandas\", columns=[\"data.content\", \"annotation.labelValue\"])\n",
+    "# data.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "de7037d2",
+   "metadata": {},
+   "source": [
+    "# Finetuning a Hugging Face model\n",
+    "\n",
+    "In this guide, we finetune a Hugging Face model on the tweet_eval emoji task. The `transformers` library contains all code to do the training for us, we only need to define a torch `Dataset` that contains our data and handles tokenization."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "3da52423",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Hyperparameters\n",
+    "model_name = \"distilroberta-base\"\n",
+    "batch_size = 32\n",
+    "learning_rate = 1e-3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "f3687ff4",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'text': 'Sunday afternoon walking through Venice in the sun with @user ️ ️ ️ @ Abbot Kinney, Venice',\n",
+       " 'label': 12}"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "emoji_dataset[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "b56c58d1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# class EmojiDataset(torch.utils.data.Dataset):\n",
+    "#     def __init__(self, data: pd.DataFrame, tokenizer: transformers.PreTrainedTokenizerBase):\n",
+    "#         self.data = data\n",
+    "#         self.label2idx, self.idx2label = self.get_label_map()\n",
+    "#         self.num_labels = len(self.label2idx)\n",
+    "#         self.tokenizer = tokenizer\n",
+    "        \n",
+    "#     def tokenize(self, message, label=None):\n",
+    "#         tokenized = self.tokenizer(message, padding=\"max_length\", truncation=True)\n",
+    "#         if label:\n",
+    "#             tokenized[\"label\"] = self.label2idx[label]\n",
+    "#         return tokenized\n",
+    "\n",
+    "#     def get_label_map(self):\n",
+    "#         unique_labels = data[\"annotation.labelValue\"].unique()\n",
+    "#         return {l: i for i, l in enumerate(unique_labels)}, {i: l for i, l in enumerate(unique_labels)}\n",
+    "        \n",
+    "#     def __len__(self):\n",
+    "#         return len(self.data)\n",
+    "        \n",
+    "#     def __getitem__(self, idx):\n",
+    "#         # Get the row from self.data, and skip the first column (id).\n",
+    "#         return self.tokenize(*self.data.iloc[idx][1:])\n",
+    "\n",
+    "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
+    "# dataset = EmojiDataset(data, tokenizer)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9d78f8e0",
+   "metadata": {},
+   "source": [
+    "## Training\n",
+    "\n",
+    "The 🤗 Transformers library provides all the code we need for training a RoBERTa model. We will use their `Trainer` class, which handles all training, monitoring and integration with [Weights & Biases](https://wandb.ai/site) for us. The 🤗 Transformers documentation has a detailed tutorial on fine-tuning models, which can be found [here](https://huggingface.co/docs/transformers/training).\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "da9f8303",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load model\n",
+    "model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=10)\n",
+    "\n",
+    "# To increase training speed, we will freeze all layers except the classifier head.\n",
+    "for param in model.base_model.parameters():\n",
+    "    param.requires_grad = False"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "13a86a98",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "training_args = transformers.TrainingArguments(\n",
+    "    \"twitter-emoji-trainer\",\n",
+    "    learning_rate=learning_rate,\n",
+    "    per_device_train_batch_size=batch_size,\n",
+    "    logging_steps=100,\n",
+    ")\n",
+    "\n",
+    "trainer = transformers.Trainer(\n",
+    "    model=model,\n",
+    "    args=training_args,\n",
+    "    train_dataset=emoji_dataset\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "26d534f0",
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "The following columns in the training set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text.\n",
+      "/opt/anaconda3/envs/finetune-test1/lib/python3.7/site-packages/transformers/optimization.py:309: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use thePyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
+      "  FutureWarning,\n",
+      "***** Running training *****\n",
+      "  Num examples = 45000\n",
+      "  Num Epochs = 3\n",
+      "  Instantaneous batch size per device = 32\n",
+      "  Total train batch size (w. parallel, distributed & accumulation) = 32\n",
+      "  Gradient Accumulation steps = 1\n",
+      "  Total optimization steps = 4221\n"
+     ]
+    },
+    {
+     "ename": "ValueError",
+     "evalue": "You have to specify either input_ids or inputs_embeds",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "\u001b[0;32m/var/folders/q1/ryq93kwj055dlbpngxv1c7z40000gn/T/ipykernel_40819/4032920361.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtrainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;32m/opt/anaconda3/envs/finetune-test1/lib/python3.7/site-packages/transformers/trainer.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m   1363\u001b[0m                         \u001b[0mtr_loss_step\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtraining_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1364\u001b[0m                 \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1365\u001b[0;31m                     \u001b[0mtr_loss_step\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtraining_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1366\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1367\u001b[0m                 if (\n",
+      "\u001b[0;32m/opt/anaconda3/envs/finetune-test1/lib/python3.7/site-packages/transformers/trainer.py\u001b[0m in \u001b[0;36mtraining_step\u001b[0;34m(self, model, inputs)\u001b[0m\n\u001b[1;32m   1938\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1939\u001b[0m         \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautocast_smart_context_manager\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1940\u001b[0;31m             \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompute_loss\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1941\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1942\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_gpu\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/opt/anaconda3/envs/finetune-test1/lib/python3.7/site-packages/transformers/trainer.py\u001b[0m in \u001b[0;36mcompute_loss\u001b[0;34m(self, model, inputs, return_outputs)\u001b[0m\n\u001b[1;32m   1970\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1971\u001b[0m             \u001b[0mlabels\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1972\u001b[0;31m         \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1973\u001b[0m         \u001b[0;31m# Save past state if it exists\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1974\u001b[0m         \u001b[0;31m# TODO: this needs to be fixed and made cleaner later.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/opt/anaconda3/envs/finetune-test1/lib/python3.7/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m   1100\u001b[0m         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n\u001b[1;32m   1101\u001b[0m                 or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1102\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1103\u001b[0m         \u001b[0;31m# Do not call functions when jit is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1104\u001b[0m         \u001b[0mfull_backward_hooks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/opt/anaconda3/envs/finetune-test1/lib/python3.7/site-packages/transformers/models/roberta/modeling_roberta.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict)\u001b[0m\n\u001b[1;32m   1211\u001b[0m             \u001b[0moutput_attentions\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0moutput_attentions\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1212\u001b[0m             \u001b[0moutput_hidden_states\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0moutput_hidden_states\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1213\u001b[0;31m             \u001b[0mreturn_dict\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mreturn_dict\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1214\u001b[0m         )\n\u001b[1;32m   1215\u001b[0m         \u001b[0msequence_output\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/opt/anaconda3/envs/finetune-test1/lib/python3.7/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m   1100\u001b[0m         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n\u001b[1;32m   1101\u001b[0m                 or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1102\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1103\u001b[0m         \u001b[0;31m# Do not call functions when jit is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1104\u001b[0m         \u001b[0mfull_backward_hooks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/opt/anaconda3/envs/finetune-test1/lib/python3.7/site-packages/transformers/models/roberta/modeling_roberta.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)\u001b[0m\n\u001b[1;32m    800\u001b[0m             \u001b[0minput_shape\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minputs_embeds\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    801\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 802\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"You have to specify either input_ids or inputs_embeds\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    803\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    804\u001b[0m         \u001b[0mbatch_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mseq_length\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minput_shape\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mValueError\u001b[0m: You have to specify either input_ids or inputs_embeds"
+     ]
+    }
+   ],
+   "source": [
+    "trainer.train()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2fd063f5",
+   "metadata": {},
+   "source": [
+    "Thats it, we trained our model. Check out the next tutorial to see how you can use this model from within your data app."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5be36f3a",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.11"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
+%% Cell type:markdown id:5e643a4a tags:
+
+# Fine-tuning a text classifier on your Dataset
+
+Memri data apps often contain machine learning models, which can be trained on the labeled data from the Memri app. In this guide, we will load a labeled dataset from the Pod, and use it to fine-tune a [RoBERTa text classifier](https://medium.com/analytics-vidhya/evolving-with-bert-introduction-to-roberta-5174ec0e7c82).  The main purpose behind this notebook is to run it on your own dataset by labeling your data, and filling in the corresponding dataset name and pod key in the following steps. The output of this tutorial is a model, which can subsequently be used in your data app. For this tutorial, we created a fake dataset, which can be reproduced using [this notebook](https://gitlab.memri.io/memri/finetuning-example).  To make the training speed a bit faster, we use a smaller version of this model: [distilRoBERTa](https://huggingface.co/distilroberta-base). The code for this tutorial is available as a [notebook](https://gitlab.memri.io/memri/finetuning-example/-/blob/main/finetune_model.ipynb).
+
+%% Cell type:code id:e2cc7d46 tags:
+
+``` python
+from pymemri.data.itembase import Item, Edge
+from pymemri.data.schema import (
+    Message,
+    Dataset,
+    CategoricalLabel,
+    DatasetEntry,
+)
+from pymemri.pod.client import PodClient
+import datasets
+import random
+
+from IPython.display import clear_output
+```
+
+%% Cell type:code id:199c0724 tags:
+
+``` python
+# from the dataset page: https://huggingface.co/datasets/tweet_eval#source-data
+emoji = '❤😍😂💕🔥😊😎✨💙😘📷🇺🇸☀💜😉💯😁🎄📸😜'
+emoji_map = {i: e for i, e in enumerate(emoji)}
+
+emoji_dataset = datasets.load_dataset("tweet_eval", "emoji")
+emoji_dataset = emoji_dataset["train"]
+```
+
+%% Output
+
+    Reusing dataset tweet_eval (/Users/koen/.cache/huggingface/datasets/tweet_eval/emoji/1.1.0/12aee5282b8784f3e95459466db4cdf45c6bf49719c25cdb0743d71ed0410343)
+
+
+%% Cell type:markdown id:18035047 tags:
+
+Next, we download and inspect the `tweet-eval-emoji` dataset. All entries in the dataset can be found through the `Dataset.entry` edge.
+
+%% Cell type:markdown id:f3214b11 tags:
+
+The first step in training your model is exporting the dataset to a format we can use in Python. The `Dataset` class in pymemri can format your dataset to various datatypes with the `Dataset.to` method. In this notebook, we will use Pandas.
+
+The `columns` argument of `Dataset.to` defines which features are used. A column is either a property of the items in the Dataset (for example, the `content` of a `Message`), or a property of a connected item (the `value` of a `Label` connected to the `Message`).
+
+%% Cell type:code id:c8fafe5c tags:
+
+``` python
+# data = dataset.to("pandas", columns=["data.content", "annotation.labelValue"])
+# data.head()
+```
+
+%% Cell type:markdown id:de7037d2 tags:
+
+# Finetuning a Hugging Face model
+
+In this guide, we finetune a Hugging Face model on the tweet_eval emoji task. The `transformers` library contains all code to do the training for us, we only need to define a torch `Dataset` that contains our data and handles tokenization.
+
+%% Cell type:code id:3da52423 tags:
+
+``` python
+# Hyperparameters
+model_name = "distilroberta-base"
+batch_size = 32
+learning_rate = 1e-3
+```
+
+%% Cell type:code id:f3687ff4 tags:
+
+``` python
+emoji_dataset[0]
+```
+
+%% Output
+
+    {'text': 'Sunday afternoon walking through Venice in the sun with @user ️ ️ ️ @ Abbot Kinney, Venice',
+     'label': 12}
+
+%% Cell type:code id:b56c58d1 tags:
+
+``` python
+# class EmojiDataset(torch.utils.data.Dataset):
+#     def __init__(self, data: pd.DataFrame, tokenizer: transformers.PreTrainedTokenizerBase):
+#         self.data = data
+#         self.label2idx, self.idx2label = self.get_label_map()
+#         self.num_labels = len(self.label2idx)
+#         self.tokenizer = tokenizer
+
+#     def tokenize(self, message, label=None):
+#         tokenized = self.tokenizer(message, padding="max_length", truncation=True)
+#         if label:
+#             tokenized["label"] = self.label2idx[label]
+#         return tokenized
+
+#     def get_label_map(self):
+#         unique_labels = data["annotation.labelValue"].unique()
+#         return {l: i for i, l in enumerate(unique_labels)}, {i: l for i, l in enumerate(unique_labels)}
+
+#     def __len__(self):
+#         return len(self.data)
+
+#     def __getitem__(self, idx):
+#         # Get the row from self.data, and skip the first column (id).
+#         return self.tokenize(*self.data.iloc[idx][1:])
+
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+# dataset = EmojiDataset(data, tokenizer)
+```
+
+%% Cell type:markdown id:9d78f8e0 tags:
+
+## Training
+
+The 🤗 Transformers library provides all the code we need for training a RoBERTa model. We will use their `Trainer` class, which handles all training, monitoring and integration with [Weights & Biases](https://wandb.ai/site) for us. The 🤗 Transformers documentation has a detailed tutorial on fine-tuning models, which can be found [here](https://huggingface.co/docs/transformers/training).
+
+%% Cell type:code id:da9f8303 tags:
+
+``` python
+# Load model
+model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=10)
+
+# To increase training speed, we will freeze all layers except the classifier head.
+for param in model.base_model.parameters():
+    param.requires_grad = False
+```
+
+%% Cell type:code id:13a86a98 tags:
+
+``` python
+training_args = transformers.TrainingArguments(
+    "twitter-emoji-trainer",
+    learning_rate=learning_rate,
+    per_device_train_batch_size=batch_size,
+    logging_steps=100,
+)
+
+trainer = transformers.Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=emoji_dataset
+)
+```
+
+%% Cell type:code id:26d534f0 tags:
+
+``` python
+trainer.train()
+```
+
+%% Output
+
+    The following columns in the training set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text.
+    /opt/anaconda3/envs/finetune-test1/lib/python3.7/site-packages/transformers/optimization.py:309: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use thePyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning
+      FutureWarning,
+    ***** Running training *****
+      Num examples = 45000
+      Num Epochs = 3
+      Instantaneous batch size per device = 32
+      Total train batch size (w. parallel, distributed & accumulation) = 32
+      Gradient Accumulation steps = 1
+      Total optimization steps = 4221
+
+    ---------------------------------------------------------------------------
+    ValueError                                Traceback (most recent call last)
+    /var/folders/q1/ryq93kwj055dlbpngxv1c7z40000gn/T/ipykernel_40819/4032920361.py in <module>
+    ----> 1 trainer.train()
+
+    /opt/anaconda3/envs/finetune-test1/lib/python3.7/site-packages/transformers/trainer.py in train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
+       1363                         tr_loss_step = self.training_step(model, inputs)
+       1364                 else:
+    -> 1365                     tr_loss_step = self.training_step(model, inputs)
+       1366
+       1367                 if (
+    /opt/anaconda3/envs/finetune-test1/lib/python3.7/site-packages/transformers/trainer.py in training_step(self, model, inputs)
+       1938
+       1939         with self.autocast_smart_context_manager():
+    -> 1940             loss = self.compute_loss(model, inputs)
+       1941
+       1942         if self.args.n_gpu > 1:
+    /opt/anaconda3/envs/finetune-test1/lib/python3.7/site-packages/transformers/trainer.py in compute_loss(self, model, inputs, return_outputs)
+       1970         else:
+       1971             labels = None
+    -> 1972         outputs = model(**inputs)
+       1973         # Save past state if it exists
+       1974         # TODO: this needs to be fixed and made cleaner later.
+    /opt/anaconda3/envs/finetune-test1/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
+       1100         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
+       1101                 or _global_forward_hooks or _global_forward_pre_hooks):
+    -> 1102             return forward_call(*input, **kwargs)
+       1103         # Do not call functions when jit is used
+       1104         full_backward_hooks, non_full_backward_hooks = [], []
+    /opt/anaconda3/envs/finetune-test1/lib/python3.7/site-packages/transformers/models/roberta/modeling_roberta.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict)
+       1211             output_attentions=output_attentions,
+       1212             output_hidden_states=output_hidden_states,
+    -> 1213             return_dict=return_dict,
+       1214         )
+       1215         sequence_output = outputs[0]
+    /opt/anaconda3/envs/finetune-test1/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
+       1100         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
+       1101                 or _global_forward_hooks or _global_forward_pre_hooks):
+    -> 1102             return forward_call(*input, **kwargs)
+       1103         # Do not call functions when jit is used
+       1104         full_backward_hooks, non_full_backward_hooks = [], []
+    /opt/anaconda3/envs/finetune-test1/lib/python3.7/site-packages/transformers/models/roberta/modeling_roberta.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)
+        800             input_shape = inputs_embeds.size()[:-1]
+        801         else:
+    --> 802             raise ValueError("You have to specify either input_ids or inputs_embeds")
+        803
+        804         batch_size, seq_length = input_shape
+    ValueError: You have to specify either input_ids or inputs_embeds
+
+%% Cell type:markdown id:2fd063f5 tags:
+
+Thats it, we trained our model. Check out the next tutorial to see how you can use this model from within your data app.
+
+%% Cell type:code id:5be36f3a tags:
+
+``` python
+```
--- a/wandb.ipynb
+++ b/wandb.ipynb