{ "cells": [ { "cell_type": "markdown", "id": "90a312dd", "metadata": {}, "source": [] }, { "cell_type": "markdown", "id": "7140f2ef", "metadata": {}, "source": [ "## Pseudonymization and Depseudonymization\n", "\n", "1. generate keys" ] }, { "cell_type": "code", "execution_count": null, "id": "2e19922b", "metadata": { "vscode": { "languageId": "shellscript" } }, "outputs": [], "source": [ "%%bash\n", "\n", "logprep pseudo generate -f ./analyst 1024\n", "logprep pseudo generate -f ./depseudo 2048\n" ] }, { "cell_type": "markdown", "id": "03df49f3", "metadata": {}, "source": [ "2. pseudonymize a string" ] }, { "cell_type": "code", "execution_count": null, "id": "3989821b", "metadata": { "vscode": { "languageId": "shellscript" } }, "outputs": [], "source": [ "%%bash\n", "\n", "logprep pseudo pseudonymize ./analyst.crt ./depseudo.crt mystring" ] }, { "cell_type": "markdown", "id": "4f30f225", "metadata": {}, "source": [ "3. depseudonymize the string" ] }, { "cell_type": "code", "execution_count": null, "id": "48c96106", "metadata": { "vscode": { "languageId": "shellscript" } }, "outputs": [], "source": [ "%%bash\n", "\n", "logprep pseudo depseudonymize ./analyst.key ./depseudo.key < cyphertext from above >" ] }, { "cell_type": "markdown", "id": "76df9e2e", "metadata": {}, "source": [ "## Pseudonymizer Processor" ] }, { "cell_type": "code", "execution_count": null, "id": "86fee246", "metadata": {}, "outputs": [], "source": [ "import json\n", "import uuid\n", "from logprep.factory import Factory\n", "from logprep.util.time import TimeParser\n", "from logprep.ng.event.log_event import LogEvent\n", "from logprep.ng.event.event_state import EventStateType\n", "import logging\n", "import sys\n", "\n", "# Configure logging\n", "logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)\n", "\n", "document = {\n", " \"id\": f\"{uuid.uuid4()}\",\n", " \"@timestamp\": str(TimeParser.now()),\n", " \"user\": {\n", " \"name\": \"Hubert K. Kabal\",\n", " \"email\": \"kabal@example.com\",\n", " \"id\": 12345,\n", " },\n", "}\n", "\n", "event = LogEvent(document, original=b\"\", state=EventStateType.RECEIVED)\n", "\n", "print(f\"Event before processing: {json.dumps(event.data, indent=2)}\")\n", "\n", "# Pseudonymization\n", "config = {\n", " \"almighty pseudonymizer\": {\n", " \"type\": \"ng_pseudonymizer\",\n", " \"pubkey_analyst\": \"./analyst.crt\",\n", " \"pubkey_depseudo\": \"./depseudo.crt\",\n", " \"regex_mapping\": \"../../../../../examples/exampledata/rules/pseudonymizer/regex_mapping.yml\",\n", " \"hash_salt\": \"a_secret_tasty_ingredient\",\n", " \"outputs\": [\n", " {\"opensearch\": \"pseudonyms\"}\n", " ],\n", " \"rules\": [\n", " {\n", " \"filter\": \"*\",\n", " \"pseudonymizer\": {\n", " \"mapping\": {\n", " \"user.name\": \"RE_WHOLE_FIELD\",\n", " }\n", " }\n", " }\n", " ],\n", " \"max_cached_pseudonyms\": 1000000\n", " }\n", "}\n", "processor = Factory.create(config)\n", "processor.setup()\n", "processor.process(event)" ] }, { "cell_type": "code", "execution_count": null, "id": "607bbe20", "metadata": {}, "outputs": [], "source": [ "print(f\"Event after processing: {json.dumps(event.data, indent=2)}\")\n", "print(f\"{len(event.extra_data)=}\")\n", "print(f\"Event extra data: {json.dumps(event.extra_data[0].data, indent=2)}\")" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.3" } }, "nbformat": 4, "nbformat_minor": 5 }