<?xml version="1.0" encoding="UTF-8"?><rss xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:atom="http://www.w3.org/2005/Atom" version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:googleplay="http://www.google.com/schemas/play-podcasts/1.0"><channel><title><![CDATA[Everything Bagel]]></title><description><![CDATA[Bagel Labs is a distributed machine learning research lab.]]></description><link>https://blog.bagel.com</link><image><url>https://substackcdn.com/image/fetch/$s_!mmM_!,w_256,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa3ebd2f8-e40d-4c58-a917-e8e161d8df5c_864x864.png</url><title>Everything Bagel</title><link>https://blog.bagel.com</link></image><generator>Substack</generator><lastBuildDate>Mon, 27 Apr 2026 14:48:52 GMT</lastBuildDate><atom:link href="https://blog.bagel.com/feed" rel="self" type="application/rss+xml"/><copyright><![CDATA[Bagel Labs]]></copyright><language><![CDATA[en]]></language><webMaster><![CDATA[team@bagel.com]]></webMaster><itunes:owner><itunes:email><![CDATA[team@bagel.com]]></itunes:email><itunes:name><![CDATA[Bagel Labs]]></itunes:name></itunes:owner><itunes:author><![CDATA[Bagel Labs]]></itunes:author><googleplay:owner><![CDATA[team@bagel.com]]></googleplay:owner><googleplay:email><![CDATA[team@bagel.com]]></googleplay:email><googleplay:author><![CDATA[Bagel Labs]]></googleplay:author><itunes:block><![CDATA[Yes]]></itunes:block><item><title><![CDATA[Heterogeneous Decentralized Diffusion Models]]></title><description><![CDATA[Training free merge of heterogeneous objective diffusion ensemble]]></description><link>https://blog.bagel.com/p/heterogeneous-decentralized-diffusion</link><guid isPermaLink="false">https://blog.bagel.com/p/heterogeneous-decentralized-diffusion</guid><dc:creator><![CDATA[Gin Jiang]]></dc:creator><pubDate>Wed, 11 Mar 2026 15:16:10 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!YZ-E!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4634f5b1-debb-4713-b809-b768ca1958ee_1936x887.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!YZ-E!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4634f5b1-debb-4713-b809-b768ca1958ee_1936x887.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!YZ-E!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4634f5b1-debb-4713-b809-b768ca1958ee_1936x887.png 424w, https://substackcdn.com/image/fetch/$s_!YZ-E!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4634f5b1-debb-4713-b809-b768ca1958ee_1936x887.png 848w, https://substackcdn.com/image/fetch/$s_!YZ-E!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4634f5b1-debb-4713-b809-b768ca1958ee_1936x887.png 1272w, https://substackcdn.com/image/fetch/$s_!YZ-E!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4634f5b1-debb-4713-b809-b768ca1958ee_1936x887.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!YZ-E!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4634f5b1-debb-4713-b809-b768ca1958ee_1936x887.png" width="1456" height="667" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/4634f5b1-debb-4713-b809-b768ca1958ee_1936x887.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:667,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:557310,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://blog.bagel.com/i/190198146?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4634f5b1-debb-4713-b809-b768ca1958ee_1936x887.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!YZ-E!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4634f5b1-debb-4713-b809-b768ca1958ee_1936x887.png 424w, https://substackcdn.com/image/fetch/$s_!YZ-E!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4634f5b1-debb-4713-b809-b768ca1958ee_1936x887.png 848w, https://substackcdn.com/image/fetch/$s_!YZ-E!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4634f5b1-debb-4713-b809-b768ca1958ee_1936x887.png 1272w, https://substackcdn.com/image/fetch/$s_!YZ-E!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4634f5b1-debb-4713-b809-b768ca1958ee_1936x887.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><em>Based on Bagel Labs' CVPR 2026 paper on Heterogeneous Decentralized Diffusion Models: <a href="https://arxiv.org/abs/2603.06741">arxiv link</a></em></p><div><hr></div><p>Decentralized Diffusion Models (DDMs) train independent experts on disjoint data partitions and combine them at inference time. Existing DDM frameworks assume all experts share the same training objective. We relax this constraint. In our setup, some experts train with DDPM (&#949;-prediction) and others with Flow Matching (velocity-prediction), then unify at inference through a deterministic conversion into a common velocity space. <em>No retraining, no fine-tuning, <strong>no coordination</strong> during training.</em></p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!AgqY!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd7314350-4419-4818-9944-081effcc521e_1822x564.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!AgqY!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd7314350-4419-4818-9944-081effcc521e_1822x564.png 424w, https://substackcdn.com/image/fetch/$s_!AgqY!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd7314350-4419-4818-9944-081effcc521e_1822x564.png 848w, https://substackcdn.com/image/fetch/$s_!AgqY!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd7314350-4419-4818-9944-081effcc521e_1822x564.png 1272w, https://substackcdn.com/image/fetch/$s_!AgqY!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd7314350-4419-4818-9944-081effcc521e_1822x564.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!AgqY!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd7314350-4419-4818-9944-081effcc521e_1822x564.png" width="1456" height="451" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/d7314350-4419-4818-9944-081effcc521e_1822x564.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:451,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:173379,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://blog.bagel.com/i/190198146?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd7314350-4419-4818-9944-081effcc521e_1822x564.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!AgqY!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd7314350-4419-4818-9944-081effcc521e_1822x564.png 424w, https://substackcdn.com/image/fetch/$s_!AgqY!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd7314350-4419-4818-9944-081effcc521e_1822x564.png 848w, https://substackcdn.com/image/fetch/$s_!AgqY!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd7314350-4419-4818-9944-081effcc521e_1822x564.png 1272w, https://substackcdn.com/image/fetch/$s_!AgqY!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd7314350-4419-4818-9944-081effcc521e_1822x564.png 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p style="text-align: center;"><em>Heterogeneous experts trained independently on single GPUs, unified at inference through velocity conversion.</em></p><p>Under aligned inference settings, this heterogeneous ensemble (2 DDPM + 6 FM experts) achieves better FID (11.88 vs. 12.45) and higher intra-prompt diversity (LPIPS 0.631 vs. 0.617) than a homogeneous ensemble of 8 FM experts. Relative to the training scale reported for prior DDM work, our framework reduces compute from 1176 to 72 A100-days (16&#215;) and data from 158M to 11M images (14&#215;), with each expert requiring only 24&#8211;48 GB VRAM on a single GPU, making decentralized diffusion training accessible on commodity hardware.</p><h2>Background: Decentralized Flow Matching</h2><p>DDMs decompose a generative model into K experts, each trained on a semantically coherent subset of the data. Following <a href="https://arxiv.org/abs/2501.05450">prior DDM work</a>, the marginal velocity field is expressed as a weighted combination of per-expert conditional flows:</p><div class="latex-rendered" data-attrs="{&quot;persistentExpression&quot;:&quot;u_t(x_t) = \\sum_{k=1}^{K} p_t(k \\mid x_t) \\cdot u_t^{(k)}(x_t)&quot;,&quot;id&quot;:&quot;SCNCKAZAOF&quot;}" data-component-name="LatexBlockToDOM"></div><p>where u&#8348;&#8317;&#7503;&#8318;(x&#8348;) is the velocity predicted by expert k trained on cluster S&#8342;, and p&#8348;(k | x&#8348;) is a posterior weight from a learned router.</p><h3>Training</h3><p>We partition the dataset into K semantic clusters using DINOv2 features (1024-dimensional representations with hierarchical k-means). This produces semantically coherent partitions, e.g. portraits, landscapes, architecture. Each expert &#952;&#8342; trains exclusively on its assigned cluster S&#8342; with zero communication between experts. No shared parameters, no gradient synchronization, no activation passing.</p><h3>Routing</h3><p>A lightweight router (DiT-B, 129M parameters) learns to predict cluster assignments from noisy inputs:</p><div class="latex-rendered" data-attrs="{&quot;persistentExpression&quot;:&quot;p_\\phi(k \\mid x_t, t) = \\mathrm{softmax}(\\mathrm{Router}_\\phi(x_t, t))_k&quot;,&quot;id&quot;:&quot;XPENSRBJOT&quot;}" data-component-name="LatexBlockToDOM"></div><p>trained with cross-entropy loss against ground-truth cluster labels. At inference, the router dynamically selects and weights experts based on the current noisy state and timestep. We support three selection modes: Top-1 (single best expert), Top-K (weighted ensemble of K highest-probability experts), and Full Ensemble (all experts weighted by router probabilities). As shown in <a href="https://arxiv.org/abs/2602.02685">our prior work</a>, Top-2 routing consistently outperforms Full Ensemble because sparse routing maintains expert-data alignment, selecting experts that are in-distribution for the current denoising state.</p><h2>Heterogeneous Objectives</h2><p>Previous DDM work requires all experts to share the same training objective. This is a coordination requirement that may be impractical when contributors operate independently. We remove this constraint.</p><h3>Two objectives, different emphasis</h3><p>We train n experts with Flow Matching loss and m experts with DDPM loss. DDPM experts predict the noise &#949; added during the forward process:</p><div class="latex-rendered" data-attrs="{&quot;persistentExpression&quot;:&quot;\\mathcal{L}_{\\mathrm{DDPM}}^{(k)} = \\mathbb{E}_{x_0 \\in S_k, \\epsilon, t} \\left[ \\|\\epsilon_{\\theta_k}(\\alpha_t x_0 + \\sigma_t \\epsilon, t) - \\epsilon\\|^2 \\right]&quot;,&quot;id&quot;:&quot;BLXPNFFZDL&quot;}" data-component-name="LatexBlockToDOM"></div><p>Flow Matching experts predict the velocity field directly:</p><div class="latex-rendered" data-attrs="{&quot;persistentExpression&quot;:&quot;\\mathcal{L}_{\\mathrm{FM}}^{(k)} = \\mathbb{E}_{x_0 \\in S_k, \\epsilon, t} \\left[ \\|v_{\\theta_k}(x_t, t) - (\\epsilon - x_0)\\|^2 \\right]&quot;,&quot;id&quot;:&quot;OQSDUTGFLZ&quot;}" data-component-name="LatexBlockToDOM"></div><p>Here x&#8320; &#8712; S&#8342; means expert k trains only on clean samples from its assigned cluster S&#8342;, &#949; ~ N(0, I) is Gaussian noise, t is the sampled timestep, and &#945;&#8348;, &#963;&#8348; are the schedule coefficients controlling signal and noise strength, x&#8348; = (1-t)x&#8320; + t&#949; is the linear interpolation between clean data and noise.</p><p>Both objectives model the same generative process through different parameterizations. But they weigh errors differently across timesteps, and this difference is the mechanism behind heterogeneous ensembles.</p><h3>Complementary loss weighting</h3><p>We can write both losses in terms of the squared clean-sample estimation error |x&#770;&#8320; - x&#8320;|&#178; (the detailed derivation can be read from <a href="https://arxiv.org/abs/2107.00630">variational diffusion models</a>). Under a variance-preserving schedule (&#945;&#8348;&#178; + &#963;&#8348;&#178; = 1):</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!bzo3!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2614d385-6a6e-49a4-9af4-d1ce504bfb78_1200x390.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!bzo3!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2614d385-6a6e-49a4-9af4-d1ce504bfb78_1200x390.png 424w, https://substackcdn.com/image/fetch/$s_!bzo3!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2614d385-6a6e-49a4-9af4-d1ce504bfb78_1200x390.png 848w, https://substackcdn.com/image/fetch/$s_!bzo3!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2614d385-6a6e-49a4-9af4-d1ce504bfb78_1200x390.png 1272w, https://substackcdn.com/image/fetch/$s_!bzo3!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2614d385-6a6e-49a4-9af4-d1ce504bfb78_1200x390.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!bzo3!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2614d385-6a6e-49a4-9af4-d1ce504bfb78_1200x390.png" width="549" height="178.425" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/2614d385-6a6e-49a4-9af4-d1ce504bfb78_1200x390.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:390,&quot;width&quot;:1200,&quot;resizeWidth&quot;:549,&quot;bytes&quot;:26489,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://blog.bagel.com/i/190198146?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2614d385-6a6e-49a4-9af4-d1ce504bfb78_1200x390.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!bzo3!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2614d385-6a6e-49a4-9af4-d1ce504bfb78_1200x390.png 424w, https://substackcdn.com/image/fetch/$s_!bzo3!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2614d385-6a6e-49a4-9af4-d1ce504bfb78_1200x390.png 848w, https://substackcdn.com/image/fetch/$s_!bzo3!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2614d385-6a6e-49a4-9af4-d1ce504bfb78_1200x390.png 1272w, https://substackcdn.com/image/fetch/$s_!bzo3!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2614d385-6a6e-49a4-9af4-d1ce504bfb78_1200x390.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>The ratio between them is:</p><div class="latex-rendered" data-attrs="{&quot;persistentExpression&quot;:&quot;\\frac{w_v(t)}{w_\\epsilon(t)} = \\frac{1}{\\alpha_t^2}&quot;,&quot;id&quot;:&quot;GUSDKPXINP&quot;}" data-component-name="LatexBlockToDOM"></div><p>Here w_&#949;(t) and w&#7525;(t) are the effective per-timestep weights after rewriting each loss in terms of clean-sample estimation error. Since &#945;&#8348; &#8804; 1 and decays toward 0 at high noise, this ratio diverges. Velocity-prediction experts receive relatively stronger gradients at high-noise timesteps (global structure), while &#949;-prediction experts are relatively upweighted at low noise (fine details). In the paper, we derive this under a variance-preserving schedule and then note that linear interpolation recovers the same 1 / &#945;&#8348;&#178; structure. So the complementary weighting pattern holds both for the VP analysis and for the linear FM path used here.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!8898!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F33d7f880-730f-485d-964b-f176b13a2297_1467x842.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!8898!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F33d7f880-730f-485d-964b-f176b13a2297_1467x842.png 424w, https://substackcdn.com/image/fetch/$s_!8898!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F33d7f880-730f-485d-964b-f176b13a2297_1467x842.png 848w, https://substackcdn.com/image/fetch/$s_!8898!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F33d7f880-730f-485d-964b-f176b13a2297_1467x842.png 1272w, https://substackcdn.com/image/fetch/$s_!8898!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F33d7f880-730f-485d-964b-f176b13a2297_1467x842.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!8898!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F33d7f880-730f-485d-964b-f176b13a2297_1467x842.png" width="1456" height="836" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/33d7f880-730f-485d-964b-f176b13a2297_1467x842.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:836,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:167980,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://blog.bagel.com/i/190198146?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F33d7f880-730f-485d-964b-f176b13a2297_1467x842.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!8898!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F33d7f880-730f-485d-964b-f176b13a2297_1467x842.png 424w, https://substackcdn.com/image/fetch/$s_!8898!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F33d7f880-730f-485d-964b-f176b13a2297_1467x842.png 848w, https://substackcdn.com/image/fetch/$s_!8898!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F33d7f880-730f-485d-964b-f176b13a2297_1467x842.png 1272w, https://substackcdn.com/image/fetch/$s_!8898!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F33d7f880-730f-485d-964b-f176b13a2297_1467x842.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p style="text-align: center;"><em>DDPM concentrates learning signal near clean images. Flow Matching emphasizes high-noise timesteps. Their blind spots are complementary.</em></p><p>The implication is that each objective has a &#8220;blind spot&#8221; region where its effective weight is relatively low, and these blind spots are complementary. Where DDPM under-trains (high noise), FM trains hardest. Where FM under-trains (low noise), DDPM concentrates its signal. Mixing both in an ensemble lets each objective cover the other&#8217;s blind spots, providing more uniform coverage across the full denoising trajectory.</p><h2>Inference-Time Unification</h2><p>The central technical challenge is that DDPM experts output noise predictions &#949;_&#952;(x&#8348;, t) while FM experts output velocity predictions v_&#952;(x&#8348;, t). These live in different spaces. You cannot average them directly.</p><p>We unify all expert predictions into a common velocity space through a deterministic, schedule-aware conversion. The derivation proceeds in three steps.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!LVjP!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feff86531-af1a-4098-bd69-4991dca738df_3056x1671.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!LVjP!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feff86531-af1a-4098-bd69-4991dca738df_3056x1671.png 424w, https://substackcdn.com/image/fetch/$s_!LVjP!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feff86531-af1a-4098-bd69-4991dca738df_3056x1671.png 848w, https://substackcdn.com/image/fetch/$s_!LVjP!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feff86531-af1a-4098-bd69-4991dca738df_3056x1671.png 1272w, https://substackcdn.com/image/fetch/$s_!LVjP!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feff86531-af1a-4098-bd69-4991dca738df_3056x1671.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!LVjP!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feff86531-af1a-4098-bd69-4991dca738df_3056x1671.png" width="728" height="398" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/eff86531-af1a-4098-bd69-4991dca738df_3056x1671.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:796,&quot;width&quot;:1456,&quot;resizeWidth&quot;:728,&quot;bytes&quot;:365730,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://blog.bagel.com/i/190198146?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feff86531-af1a-4098-bd69-4991dca738df_3056x1671.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!LVjP!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feff86531-af1a-4098-bd69-4991dca738df_3056x1671.png 424w, https://substackcdn.com/image/fetch/$s_!LVjP!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feff86531-af1a-4098-bd69-4991dca738df_3056x1671.png 848w, https://substackcdn.com/image/fetch/$s_!LVjP!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feff86531-af1a-4098-bd69-4991dca738df_3056x1671.png 1272w, https://substackcdn.com/image/fetch/$s_!LVjP!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feff86531-af1a-4098-bd69-4991dca738df_3056x1671.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p style="text-align: center;"><em>The conversion pipeline: DDPM noise predictions are converted to velocity through deterministic algebra. FM predictions pass through unchanged.</em></p><h3>Step 1: Recover the clean-image estimate</h3><p>From the DDPM forward process x&#8348; = &#945;&#8348; x&#8320; + &#963;&#8348; &#949;, invert the linear map using the model&#8217;s noise prediction:</p><div class="latex-rendered" data-attrs="{&quot;persistentExpression&quot;:&quot;\\hat{x}_0 = \\frac{x_t - \\sigma_t \\epsilon_\\theta(x_t, t)}{\\alpha_t}&quot;,&quot;id&quot;:&quot;YAGNGYWTDA&quot;}" data-component-name="LatexBlockToDOM"></div><h3>Step 2: Derive the velocity</h3><p>Treating x&#770;&#8320; and &#949;_&#952; as fixed at their current-timestep values defines a deterministic path x&#771;&#8348; = &#945;&#8348; x&#770;&#8320; + &#963;&#8348; &#949;_&#952;. Differentiating with respect to t:</p><div class="latex-rendered" data-attrs="{&quot;persistentExpression&quot;:&quot;v(x_t, t) = \\frac{d\\alpha_t}{dt}\\hat{x}_0 + \\frac{d\\sigma_t}{dt}\\epsilon_\\theta(x_t, t)&quot;,&quot;id&quot;:&quot;BEKSJBMRWD&quot;}" data-component-name="LatexBlockToDOM"></div><p>Under linear interpolation (&#945;&#8348; = 1-t, &#963;&#8348; = t), the schedule derivatives are -1 and +1, so this simplifies to:</p><div class="latex-rendered" data-attrs="{&quot;persistentExpression&quot;:&quot;v(x_t, t) = \\epsilon_\\theta(x_t, t) - \\hat{x}_0&quot;,&quot;id&quot;:&quot;UBBGAMYHCT&quot;}" data-component-name="LatexBlockToDOM"></div><p>This is exactly the FM velocity target v = &#949; - x&#8320;.</p><h3>Step 3: Combine</h3><p>FM experts already output velocity, so they pass through unchanged. All predictions are now in the same space. The router assigns per-expert weights, and we take a weighted combination to form the ensemble field u&#8348;, which drives a standard ODE integration step (x_{t&#8722;&#916;t} = x&#8348; &#8722; u&#8348; &#183; &#916;t).</p><h3>Numerical stability</h3><p>The conversion requires dividing by &#945;&#8348;, which approaches zero at high noise. We apply three safeguards: (1) clamp x&#770;&#8320; to [-20, 20] for VAE latents, (2) use &#945;_safe = max(&#945;&#8348;, 0.01) in the denominator, and (3) apply adaptive velocity scaling that dampens converted predictions at elevated noise levels where schedule derivatives become large. These are simple clamps with no learned parameters.</p><p>The entire conversion is closed-form algebra. No learned components, no fine-tuning, no additional training of any kind.</p><h2>Efficient Training</h2><p>Prior DDM work required 1176 A100-days on 158M images. We achieve competitive quality with 72 A100-days on 11M images, a 16&#215; reduction in compute and 14&#215; in data. Three techniques enable this.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!GRcx!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0d44a3fa-269a-4c03-ba01-291556ffbd91_1563x1475.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!GRcx!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0d44a3fa-269a-4c03-ba01-291556ffbd91_1563x1475.png 424w, https://substackcdn.com/image/fetch/$s_!GRcx!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0d44a3fa-269a-4c03-ba01-291556ffbd91_1563x1475.png 848w, https://substackcdn.com/image/fetch/$s_!GRcx!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0d44a3fa-269a-4c03-ba01-291556ffbd91_1563x1475.png 1272w, https://substackcdn.com/image/fetch/$s_!GRcx!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0d44a3fa-269a-4c03-ba01-291556ffbd91_1563x1475.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!GRcx!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0d44a3fa-269a-4c03-ba01-291556ffbd91_1563x1475.png" width="676" height="637.9285714285714" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/0d44a3fa-269a-4c03-ba01-291556ffbd91_1563x1475.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1374,&quot;width&quot;:1456,&quot;resizeWidth&quot;:676,&quot;bytes&quot;:247656,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://blog.bagel.com/i/190198146?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0d44a3fa-269a-4c03-ba01-291556ffbd91_1563x1475.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!GRcx!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0d44a3fa-269a-4c03-ba01-291556ffbd91_1563x1475.png 424w, https://substackcdn.com/image/fetch/$s_!GRcx!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0d44a3fa-269a-4c03-ba01-291556ffbd91_1563x1475.png 848w, https://substackcdn.com/image/fetch/$s_!GRcx!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0d44a3fa-269a-4c03-ba01-291556ffbd91_1563x1475.png 1272w, https://substackcdn.com/image/fetch/$s_!GRcx!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0d44a3fa-269a-4c03-ba01-291556ffbd91_1563x1475.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p style="text-align: center;"><em>Resource comparison: our approach requires a fraction of the compute and data of prior DDM work.</em></p><h3>Pretrained checkpoint conversion</h3><p>We initialize experts from ImageNet-pretrained DiT checkpoints. Patch embeddings, positional embeddings, and all transformer blocks are fully transferred. Only the final projection layer (which differs between &#949;- and velocity-prediction targets) and the text projection (new modality) are reinitialized. Class-conditional embeddings from ImageNet pretraining are removed.</p><p>A key technical detail is timestep compatibility. DiT models expect discrete timesteps t &#8712; {0, 1, &#8230;, 999} while Flow Matching uses continuous t &#8712; [0, 1]. We handle this with runtime conversion (t_DiT = round(999t)) rather than modifying pretrained weights, preserving the learned timestep embedding structure.</p><p>Converted checkpoints reach validation loss parity 1.2&#215; faster than training from scratch.</p><h3>Efficient architecture</h3><p>Each expert uses DiT with <a href="https://arxiv.org/abs/2310.00426">PixArt-&#945;</a>&#8217;s AdaLN-Single conditioning. Rather than computing adaptive layer normalization parameters with per-block MLPs, a single global MLP produces all modulation signals:</p><div class="latex-rendered" data-attrs="{&quot;persistentExpression&quot;:&quot;\\mathbf{c} = \\mathrm{MLP}_{\\mathrm{global}}(\\tau(t)) \\in \\mathbb{R}^{6Ld}&quot;,&quot;id&quot;:&quot;OGWIVUBNTC&quot;}" data-component-name="LatexBlockToDOM"></div><p>reshaped into per-block slices plus learned per-block embeddings E&#7526;. This reduces parameters by 30% (891M to 605M for DiT-XL/2) while maintaining generation quality.</p><h3>True isolation</h3><p>Each expert trains on its own semantic cluster on a single GPU requiring 20&#8211;48GB VRAM. No gradient synchronization, no activation passing, no pipeline coordination, no parameter servers. This is not data parallelism with relaxed communication. There is literally zero inter-expert communication during training.</p><h2>Experiments</h2><p>We train on 11M LAION-Aesthetics images. For a high-quality subset of 3.9M images, we use LLaVA to generate improved captions. We evaluate using FID-50K on a held-out 50K test set. We train at two scales: DiT-B/2 (129M parameters per expert) and DiT-XL/2 (605M parameters per expert).</p><p>Our standard configuration uses K=8 experts. Experts 0 and 3 train with DDPM objectives (assigned to clusters containing high-fidelity subjects where &#949;-prediction excels at detail preservation). The remaining six use Flow Matching.</p><h3>Monolithic versus decentralized</h3><p>We first validate that decentralized training works. Using DiT-B/2 experts trained from scratch on LAION-Art (3.9M images), all with Flow Matching:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!gdwF!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdb04063b-225f-4132-94d8-e9c8eb0440ba_1100x570.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!gdwF!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdb04063b-225f-4132-94d8-e9c8eb0440ba_1100x570.png 424w, https://substackcdn.com/image/fetch/$s_!gdwF!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdb04063b-225f-4132-94d8-e9c8eb0440ba_1100x570.png 848w, https://substackcdn.com/image/fetch/$s_!gdwF!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdb04063b-225f-4132-94d8-e9c8eb0440ba_1100x570.png 1272w, https://substackcdn.com/image/fetch/$s_!gdwF!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdb04063b-225f-4132-94d8-e9c8eb0440ba_1100x570.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!gdwF!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdb04063b-225f-4132-94d8-e9c8eb0440ba_1100x570.png" width="488" height="252.87272727272727" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/db04063b-225f-4132-94d8-e9c8eb0440ba_1100x570.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:570,&quot;width&quot;:1100,&quot;resizeWidth&quot;:488,&quot;bytes&quot;:36217,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://blog.bagel.com/i/190198146?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdb04063b-225f-4132-94d8-e9c8eb0440ba_1100x570.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!gdwF!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdb04063b-225f-4132-94d8-e9c8eb0440ba_1100x570.png 424w, https://substackcdn.com/image/fetch/$s_!gdwF!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdb04063b-225f-4132-94d8-e9c8eb0440ba_1100x570.png 848w, https://substackcdn.com/image/fetch/$s_!gdwF!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdb04063b-225f-4132-94d8-e9c8eb0440ba_1100x570.png 1272w, https://substackcdn.com/image/fetch/$s_!gdwF!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdb04063b-225f-4132-94d8-e9c8eb0440ba_1100x570.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Top-2 achieves 22.60, outperforming the monolithic baseline by 23.7%. Full Ensemble underperforms dramatically (47.89), consistent with our prior finding that indiscriminate combination introduces prediction conflicts from out-of-distribution experts. Selective expert activation is essential.</p><h3>Homogeneous versus heterogeneous</h3><p>To isolate the effect of objective heterogeneity, we compare homogeneous and heterogeneous 8-expert DiT-XL/2 models under matched inference settings.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!FuzF!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F62c6bf77-4986-44dd-8658-4428d81cb1e1_1639x570.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!FuzF!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F62c6bf77-4986-44dd-8658-4428d81cb1e1_1639x570.png 424w, https://substackcdn.com/image/fetch/$s_!FuzF!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F62c6bf77-4986-44dd-8658-4428d81cb1e1_1639x570.png 848w, https://substackcdn.com/image/fetch/$s_!FuzF!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F62c6bf77-4986-44dd-8658-4428d81cb1e1_1639x570.png 1272w, https://substackcdn.com/image/fetch/$s_!FuzF!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F62c6bf77-4986-44dd-8658-4428d81cb1e1_1639x570.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!FuzF!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F62c6bf77-4986-44dd-8658-4428d81cb1e1_1639x570.png" width="1456" height="506" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/62c6bf77-4986-44dd-8658-4428d81cb1e1_1639x570.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:506,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:63517,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://blog.bagel.com/i/190198146?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F62c6bf77-4986-44dd-8658-4428d81cb1e1_1639x570.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!FuzF!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F62c6bf77-4986-44dd-8658-4428d81cb1e1_1639x570.png 424w, https://substackcdn.com/image/fetch/$s_!FuzF!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F62c6bf77-4986-44dd-8658-4428d81cb1e1_1639x570.png 848w, https://substackcdn.com/image/fetch/$s_!FuzF!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F62c6bf77-4986-44dd-8658-4428d81cb1e1_1639x570.png 1272w, https://substackcdn.com/image/fetch/$s_!FuzF!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F62c6bf77-4986-44dd-8658-4428d81cb1e1_1639x570.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Under aligned settings (first and last rows), heterogeneous experts improve FID from 12.45 to 11.88. Scaling from 1 to 2 DDPM experts improves FID from 19.75 to 15.09 under the conversion setting, suggesting the optimal DDPM:FM ratio deserves careful tuning per domain.</p><p>For diversity, we measure intra-prompt LPIPS by generating 10 images per prompt for 100 held-out prompts. Heterogeneous experts achieve 0.631 (&#177; 0.078) vs. homogeneous 0.617 (&#177; 0.074). Objective heterogeneity produces more varied outputs for identical prompts.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!f5wf!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3fd51f34-7191-49b8-a51f-5f5c32c64eed_1440x370.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!f5wf!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3fd51f34-7191-49b8-a51f-5f5c32c64eed_1440x370.png 424w, https://substackcdn.com/image/fetch/$s_!f5wf!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3fd51f34-7191-49b8-a51f-5f5c32c64eed_1440x370.png 848w, https://substackcdn.com/image/fetch/$s_!f5wf!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3fd51f34-7191-49b8-a51f-5f5c32c64eed_1440x370.png 1272w, https://substackcdn.com/image/fetch/$s_!f5wf!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3fd51f34-7191-49b8-a51f-5f5c32c64eed_1440x370.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!f5wf!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3fd51f34-7191-49b8-a51f-5f5c32c64eed_1440x370.png" width="1440" height="370" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/3fd51f34-7191-49b8-a51f-5f5c32c64eed_1440x370.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:370,&quot;width&quot;:1440,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:57681,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://blog.bagel.com/i/190198146?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3fd51f34-7191-49b8-a51f-5f5c32c64eed_1440x370.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!f5wf!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3fd51f34-7191-49b8-a51f-5f5c32c64eed_1440x370.png 424w, https://substackcdn.com/image/fetch/$s_!f5wf!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3fd51f34-7191-49b8-a51f-5f5c32c64eed_1440x370.png 848w, https://substackcdn.com/image/fetch/$s_!f5wf!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3fd51f34-7191-49b8-a51f-5f5c32c64eed_1440x370.png 1272w, https://substackcdn.com/image/fetch/$s_!f5wf!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3fd51f34-7191-49b8-a51f-5f5c32c64eed_1440x370.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p style="text-align: center;"><em>Heterogeneous ensembles improve both image quality (lower FID) and output diversity (higher LPIPS) over homogeneous baselines.</em></p><h3>Conversion quality</h3><p>We evaluate the DDPM &#8594; FM conversion in isolation, using experts trained on the same data cluster (to isolate objective effects from data distribution differences). Both DDPM and FM experts use DiT-XL/2 with identical hyperparameters.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Eseq!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1a6bce43-6ae0-49cb-91ed-35a5be543999_1660x660.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Eseq!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1a6bce43-6ae0-49cb-91ed-35a5be543999_1660x660.png 424w, https://substackcdn.com/image/fetch/$s_!Eseq!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1a6bce43-6ae0-49cb-91ed-35a5be543999_1660x660.png 848w, https://substackcdn.com/image/fetch/$s_!Eseq!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1a6bce43-6ae0-49cb-91ed-35a5be543999_1660x660.png 1272w, https://substackcdn.com/image/fetch/$s_!Eseq!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1a6bce43-6ae0-49cb-91ed-35a5be543999_1660x660.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Eseq!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1a6bce43-6ae0-49cb-91ed-35a5be543999_1660x660.png" width="1456" height="579" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/1a6bce43-6ae0-49cb-91ed-35a5be543999_1660x660.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:579,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:70525,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://blog.bagel.com/i/190198146?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1a6bce43-6ae0-49cb-91ed-35a5be543999_1660x660.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Eseq!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1a6bce43-6ae0-49cb-91ed-35a5be543999_1660x660.png 424w, https://substackcdn.com/image/fetch/$s_!Eseq!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1a6bce43-6ae0-49cb-91ed-35a5be543999_1660x660.png 848w, https://substackcdn.com/image/fetch/$s_!Eseq!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1a6bce43-6ae0-49cb-91ed-35a5be543999_1660x660.png 1272w, https://substackcdn.com/image/fetch/$s_!Eseq!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1a6bce43-6ae0-49cb-91ed-35a5be543999_1660x660.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Three findings emerge. First, the conversion works. DDPM &#8594; FM improves over native DDPM (FID 25.61 vs. 27.04) while preserving semantic coherence (CLIP 0.319 vs. 0.316). The conversion is most valuable as a compatibility mechanism rather than a lossless objective replacement.</p><p>Second, combined experts achieve higher output diversity (LPIPS 0.782) than single FM (0.752), approaching native DDPM levels (0.787). Heterogeneous objectives create complementary generation patterns.</p><p>Third, using the same cosine schedule for both objectives yields marginally better FID than different schedules (32.67 vs. 33.29), suggesting schedule alignment facilitates smoother expert transitions. But both combinations show similar diversity gains, indicating that objective heterogeneity drives the primary benefit.</p><h3>Routing threshold analysis</h3><p>For combined DDPM+FM experts, a deterministic router switches between them at a threshold t: DDPM handles timesteps t&#8217; &#8804; t (low noise), FM handles t&#8217; &gt; t (high noise).</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!P4al!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0e60c5d7-9c8f-401e-bb77-8f524c4ea30f_2172x1044.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!P4al!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0e60c5d7-9c8f-401e-bb77-8f524c4ea30f_2172x1044.png 424w, https://substackcdn.com/image/fetch/$s_!P4al!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0e60c5d7-9c8f-401e-bb77-8f524c4ea30f_2172x1044.png 848w, https://substackcdn.com/image/fetch/$s_!P4al!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0e60c5d7-9c8f-401e-bb77-8f524c4ea30f_2172x1044.png 1272w, https://substackcdn.com/image/fetch/$s_!P4al!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0e60c5d7-9c8f-401e-bb77-8f524c4ea30f_2172x1044.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!P4al!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0e60c5d7-9c8f-401e-bb77-8f524c4ea30f_2172x1044.png" width="1456" height="700" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/0e60c5d7-9c8f-401e-bb77-8f524c4ea30f_2172x1044.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:700,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:223084,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://blog.bagel.com/i/190198146?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0e60c5d7-9c8f-401e-bb77-8f524c4ea30f_2172x1044.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!P4al!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0e60c5d7-9c8f-401e-bb77-8f524c4ea30f_2172x1044.png 424w, https://substackcdn.com/image/fetch/$s_!P4al!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0e60c5d7-9c8f-401e-bb77-8f524c4ea30f_2172x1044.png 848w, https://substackcdn.com/image/fetch/$s_!P4al!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0e60c5d7-9c8f-401e-bb77-8f524c4ea30f_2172x1044.png 1272w, https://substackcdn.com/image/fetch/$s_!P4al!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0e60c5d7-9c8f-401e-bb77-8f524c4ea30f_2172x1044.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p style="text-align: center;"><em>Impact of router threshold on generation quality. Different thresholds produce a clear quality-diversity trade-off.</em></p><p>Lower thresholds (0.2&#8211;0.3) favor quality (FM-dominated denoising, optimal FID). Mid-range thresholds (0.4&#8211;0.5) favor diversity (balanced workload, highest LPIPS). Extreme thresholds (0.7) degrade both metrics, confirming that both expert types contribute essential complementary capabilities.</p><h3>Effects of expert ordering and router thresholds</h3><p>Expert ordering also matters. In a 2-expert setup (1 converted DDPM + 1 FM), we vary the ordering &#8212; DDPM&#8594;FM versus FM&#8594;DDPM &#8212; and the switching threshold &#964; &#8712; {0.3, 0.5, 0.7}.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!ghdn!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa947e51c-b692-49a7-8458-0c27816b3b14_1024x809.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!ghdn!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa947e51c-b692-49a7-8458-0c27816b3b14_1024x809.png 424w, https://substackcdn.com/image/fetch/$s_!ghdn!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa947e51c-b692-49a7-8458-0c27816b3b14_1024x809.png 848w, https://substackcdn.com/image/fetch/$s_!ghdn!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa947e51c-b692-49a7-8458-0c27816b3b14_1024x809.png 1272w, https://substackcdn.com/image/fetch/$s_!ghdn!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa947e51c-b692-49a7-8458-0c27816b3b14_1024x809.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!ghdn!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa947e51c-b692-49a7-8458-0c27816b3b14_1024x809.png" width="1024" height="809" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/a947e51c-b692-49a7-8458-0c27816b3b14_1024x809.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:809,&quot;width&quot;:1024,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:112860,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://blog.bagel.com/i/190198146?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa947e51c-b692-49a7-8458-0c27816b3b14_1024x809.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!ghdn!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa947e51c-b692-49a7-8458-0c27816b3b14_1024x809.png 424w, https://substackcdn.com/image/fetch/$s_!ghdn!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa947e51c-b692-49a7-8458-0c27816b3b14_1024x809.png 848w, https://substackcdn.com/image/fetch/$s_!ghdn!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa947e51c-b692-49a7-8458-0c27816b3b14_1024x809.png 1272w, https://substackcdn.com/image/fetch/$s_!ghdn!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa947e51c-b692-49a7-8458-0c27816b3b14_1024x809.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p style="text-align: center;"><em>Expert ordering and router threshold effects. FM&#8594;DDPM ordering produces more stable, coherent results, while DDPM&#8594;FM shows higher sensitivity to threshold selection.</em></p><p>The results reveal a striking asymmetry. FM&#8594;DDPM (bottom row) produces consistently cleaner images across all thresholds. DDPM&#8594;FM (top row) degrades at higher thresholds, with blocky artifacts and oversaturation. The reason: when DDPM operates first at high noise (&#945;&#8348; &#8594; 0), the conversion x&#770;&#8320; = (x&#8348; - &#963;&#8348; &#949;_&#952;)/&#945;&#8348; is numerically unstable, and errors early in the trajectory get baked into the image structure. Letting FM handle high-noise timesteps first avoids this entirely &#8212; the converted DDPM expert then refines at low noise where conversion is stable.</p><p><strong>Takeaway:</strong> DDPM-to-velocity conversion should be restricted to low-noise regimes (t &lt; 0.5).</p><h2>Discussion</h2><h3>Resource efficiency in context</h3><p>Our results should be interpreted carefully relative to prior DDM work. The DDM FID range of 5.5&#8211;10.5 was achieved at substantially larger training scale (1176 A100-days, 158M images). Our numbers are not directly comparable in absolute FID terms. What they do show is that competitive generation quality is attainable at a fraction of the resources, and that heterogeneous objectives provide an additional quality gain at no extra training cost.</p><h3>Limitations</h3><p>We evaluate only a narrow set of DDPM-to-FM ratios (1:7 and 2:6). The ideal allocation likely depends on the data distribution and downstream requirements. The deterministic conversion relies on hand-tuned numerical safeguards; a more robust conversion mechanism that generalizes across arbitrary schedules would strengthen applicability. We consider only &#949;- and velocity-prediction; extending to x&#8320;-prediction or consistency objectives could further diversify expert specialization but would require generalizing the conversion and routing mechanisms.</p><h3>What this enables</h3><p>The practical upshot is that decentralized diffusion training no longer requires coordinated infrastructure or agreement on training objectives. A contributor with a single GPU can train a DDPM expert on portraits. Another can train an FM expert on landscapes using different hardware. These experts combine at inference time without either contributor needing to know what the other was doing.</p><div><hr></div><h3>Citation</h3><pre><code><code>@inproceedings{jiang2026heterogeneous,
  title     = {Heterogeneous Decentralized Diffusion Models},
  author    = {Jiang, Zhiying and Seraj, Raihan and Villagra, Marcos and Roy, Bidhan},
  journal = {arXiv preprint arXiv:2603.06741},
  year    = {2026}
}</code></code></pre>]]></content:encoded></item><item><title><![CDATA[Stability–Quality Paradox in Decentralized Diffusion Models]]></title><description><![CDATA[Why the most stable samplers in decentralized diffusion produce the worst generations]]></description><link>https://blog.bagel.com/p/stability-quality-paradox</link><guid isPermaLink="false">https://blog.bagel.com/p/stability-quality-paradox</guid><dc:creator><![CDATA[Marcos Villagra]]></dc:creator><pubDate>Fri, 06 Feb 2026 15:56:46 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!APQT!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1052c06f-0688-4198-a6ee-37135a203585_1676x920.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!APQT!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1052c06f-0688-4198-a6ee-37135a203585_1676x920.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!APQT!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1052c06f-0688-4198-a6ee-37135a203585_1676x920.png 424w, https://substackcdn.com/image/fetch/$s_!APQT!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1052c06f-0688-4198-a6ee-37135a203585_1676x920.png 848w, https://substackcdn.com/image/fetch/$s_!APQT!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1052c06f-0688-4198-a6ee-37135a203585_1676x920.png 1272w, https://substackcdn.com/image/fetch/$s_!APQT!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1052c06f-0688-4198-a6ee-37135a203585_1676x920.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!APQT!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1052c06f-0688-4198-a6ee-37135a203585_1676x920.png" width="1456" height="799" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/1052c06f-0688-4198-a6ee-37135a203585_1676x920.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:799,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:10325535,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://blog.bagel.com/i/187024071?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1052c06f-0688-4198-a6ee-37135a203585_1676x920.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!APQT!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1052c06f-0688-4198-a6ee-37135a203585_1676x920.png 424w, https://substackcdn.com/image/fetch/$s_!APQT!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1052c06f-0688-4198-a6ee-37135a203585_1676x920.png 848w, https://substackcdn.com/image/fetch/$s_!APQT!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1052c06f-0688-4198-a6ee-37135a203585_1676x920.png 1272w, https://substackcdn.com/image/fetch/$s_!APQT!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1052c06f-0688-4198-a6ee-37135a203585_1676x920.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>In Decentralized Diffusion Models (DDMs), denoising is routed through independently trained experts at inference time. These experts can strongly disagree in their denoising predictions. What actually governs the quality of generations in such a system? We present the first ever systematic interpretability study of this question.</p><p>The natural expectation is that minimizing denoising trajectory sensitivity &#8212; minimizing how perturbations amplify during sampling &#8212; should govern generation quality. It doesn&#8217;t. Full ensemble routing, which combines all expert predictions at each step, achieves the most stable sampling dynamics and the best numerical convergence. It also produces the worst generation quality (FID 47.9 vs. 22.6 for sparse Top-2 routing). We call this the <em>stability&#8211;quality paradox</em>.</p><p>Instead, we identify expert-data alignment as the governing principle. Generation quality depends on routing inputs to experts whose training distribution covers the current denoising state, even when doing so makes the trajectory unstable. For DDM deployment, routing should prioritize expert-data alignment over the usually used numerical stability metrics.</p><div><hr></div><h2>Decentralized Diffusion Models</h2><p>DDMs aren&#8217;t <a href="https://arxiv.org/abs/1701.06538">Mixture-of-Experts</a> layers. They&#8217;re ensembles of independently trained models. In standard MoE architectures, experts are FFN layers within a shared backbone, trained jointly with load balancing losses, and routed at the token level. DDM experts are <em>complete diffusion models</em>, and routing occurs at the <em>input level</em> (entire noisy generations) rather than token level.</p><h4>Training</h4><p>Each expert trains in isolation on a disjoint data partition. The training data is partitioned into <em>K</em> clusters (e.g., using k-means on DINOv2 embeddings), and each expert sees only its assigned cluster. So one might train exclusively on landscapes, another on portraits. No shared parameters or gradient communication. Experts only collaborate at inference time.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!2dDl!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fac4321e5-674e-4f16-9636-2809d158c5d8_6336x2163.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!2dDl!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fac4321e5-674e-4f16-9636-2809d158c5d8_6336x2163.png 424w, https://substackcdn.com/image/fetch/$s_!2dDl!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fac4321e5-674e-4f16-9636-2809d158c5d8_6336x2163.png 848w, https://substackcdn.com/image/fetch/$s_!2dDl!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fac4321e5-674e-4f16-9636-2809d158c5d8_6336x2163.png 1272w, https://substackcdn.com/image/fetch/$s_!2dDl!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fac4321e5-674e-4f16-9636-2809d158c5d8_6336x2163.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!2dDl!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fac4321e5-674e-4f16-9636-2809d158c5d8_6336x2163.png" width="1456" height="497" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/ac4321e5-674e-4f16-9636-2809d158c5d8_6336x2163.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:497,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:984635,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://blog.bagel.com/i/187024071?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fac4321e5-674e-4f16-9636-2809d158c5d8_6336x2163.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!2dDl!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fac4321e5-674e-4f16-9636-2809d158c5d8_6336x2163.png 424w, https://substackcdn.com/image/fetch/$s_!2dDl!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fac4321e5-674e-4f16-9636-2809d158c5d8_6336x2163.png 848w, https://substackcdn.com/image/fetch/$s_!2dDl!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fac4321e5-674e-4f16-9636-2809d158c5d8_6336x2163.png 1272w, https://substackcdn.com/image/fetch/$s_!2dDl!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fac4321e5-674e-4f16-9636-2809d158c5d8_6336x2163.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>MoE experts, therefore, operate within a shared representational framework that limits their differences. In contrast, DDM experts are unrestricted and can generate vastly different outputs from the same input.</p><h4>Routing</h4><p>At inference time, a lightweight router predicts weights</p><div class="latex-rendered" data-attrs="{&quot;persistentExpression&quot;:&quot;w_t^{(k)}(x_t) \\geq 0 \\quad\\text{with}\\quad \\sum_{k=1}^K w_t^{(k)}(x_t) = 1&quot;,&quot;id&quot;:&quot;HABTHRVFRU&quot;}" data-component-name="LatexBlockToDOM"></div><p>at each denoising step. The routed velocity field is:</p><div class="latex-rendered" data-attrs="{&quot;persistentExpression&quot;:&quot;v_t(x_t) = \\sum_{k=1}^K w_t^{(k)}(x_t) \\cdot v_t^{(k)}(x_t).&quot;,&quot;id&quot;:&quot;BRXLLZDUYG&quot;}" data-component-name="LatexBlockToDOM"></div><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!T1dB!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa18e038d-2c53-4148-b6e2-0a78afa23620_4431x3119.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!T1dB!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa18e038d-2c53-4148-b6e2-0a78afa23620_4431x3119.png 424w, https://substackcdn.com/image/fetch/$s_!T1dB!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa18e038d-2c53-4148-b6e2-0a78afa23620_4431x3119.png 848w, https://substackcdn.com/image/fetch/$s_!T1dB!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa18e038d-2c53-4148-b6e2-0a78afa23620_4431x3119.png 1272w, https://substackcdn.com/image/fetch/$s_!T1dB!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa18e038d-2c53-4148-b6e2-0a78afa23620_4431x3119.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!T1dB!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa18e038d-2c53-4148-b6e2-0a78afa23620_4431x3119.png" width="1456" height="1025" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/a18e038d-2c53-4148-b6e2-0a78afa23620_4431x3119.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1025,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1590748,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://blog.bagel.com/i/187024071?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa18e038d-2c53-4148-b6e2-0a78afa23620_4431x3119.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!T1dB!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa18e038d-2c53-4148-b6e2-0a78afa23620_4431x3119.png 424w, https://substackcdn.com/image/fetch/$s_!T1dB!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa18e038d-2c53-4148-b6e2-0a78afa23620_4431x3119.png 848w, https://substackcdn.com/image/fetch/$s_!T1dB!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa18e038d-2c53-4148-b6e2-0a78afa23620_4431x3119.png 1272w, https://substackcdn.com/image/fetch/$s_!T1dB!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa18e038d-2c53-4148-b6e2-0a78afa23620_4431x3119.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The critical design question is how many experts should contribute at each step. Three natural strategies emerge.</p><p><strong>Top-1</strong> commits fully to the single most relevant expert. Every prediction comes from one model. If the router picks poorly, there&#8217;s no fallback.</p><p><strong>Top-2</strong> blends the two most relevant experts after renormalizing their weights. This allows experts to cross-check each other while still filtering out the majority.</p><p><strong>Full Ensemble</strong> weights all experts by their router probability. Every expert contributes to every step, giving the mathematically complete combination.</p><p>Standard numerical analysis would suggest that including more experts should help. Averaging reduces variance, smooths the velocity field, and stabilizes the ODE integration. We tested whether this intuition holds.</p><div><hr></div><h2>The Stability Paradox</h2><p>Numerical stability has been the default lens for optimizing diffusion sampling. The foundational probability-flow ODE formulation frames <a href="https://arxiv.org/abs/2011.13456">Lipschitz constants and discretization error as determining solver accuracy</a>. Recent work develops <a href="https://arxiv.org/abs/2505.24210">stabilized Runge-Kutta methods</a> for stiff diffusion ODEs, studies <a href="https://openreview.net/forum?id=WNkW0cOwiz">Lipschitz singularities</a> and their effects on sampling, and the entire solver design space from Euler to Heun to DPM++ is organized around stability-accuracy tradeoffs. These analyses target single-model diffusion, and DDM-specific stability analysis did not exist. We provide the first systematic test of whether this framework transfers to distributed training systems.</p><p>We evaluated on <a href="https://arxiv.org/abs/2510.03434">Paris</a>, the world&#8217;s first publicly released DDM, comprising 8 experts trained on <a href="https://arxiv.org/abs/2210.08402">LAION-Aesthetics</a>. We tracked trajectory sensitivity, measuring how strongly the velocity field responds to input perturbations, and step-refinement disagreement, the difference between images generated with <em>N</em> and <em>2N</em> steps. The results reveal a stability&#8211;quality paradox.</p><div class="latex-rendered" data-attrs="{&quot;persistentExpression&quot;:&quot;\\begin{array}{lccc}\n\\hline\n\\text{Routing Strategy} &amp; \\text{FID }\\downarrow &amp; \\hat{L}_{eff}^{(h)}\\downarrow &amp; \\Delta_{refine}\\downarrow \\\\\n\\hline\n\\text{Top-2 (Sparse)} &amp; \\mathbf{22.60} &amp; 17.48 &amp; 0.051 \\\\\n\\text{Full Ensemble} &amp; 47.89 &amp; \\mathbf{17.07} &amp; \\mathbf{0.020} \\\\\n\\hline\n\\end{array}&quot;,&quot;id&quot;:&quot;CTTGOCGVNJ&quot;}" data-component-name="LatexBlockToDOM"></div><p>Full Ensemble achieves the lowest trajectory sensitivity. It converges the most cleanly. And it produces the worst images, with FID nearly double that of Top-2.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!EY9s!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa1317a7b-718f-4931-8783-8eaa701ebb5d_3905x2641.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!EY9s!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa1317a7b-718f-4931-8783-8eaa701ebb5d_3905x2641.png 424w, https://substackcdn.com/image/fetch/$s_!EY9s!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa1317a7b-718f-4931-8783-8eaa701ebb5d_3905x2641.png 848w, https://substackcdn.com/image/fetch/$s_!EY9s!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa1317a7b-718f-4931-8783-8eaa701ebb5d_3905x2641.png 1272w, https://substackcdn.com/image/fetch/$s_!EY9s!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa1317a7b-718f-4931-8783-8eaa701ebb5d_3905x2641.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!EY9s!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa1317a7b-718f-4931-8783-8eaa701ebb5d_3905x2641.png" width="1456" height="985" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/a1317a7b-718f-4931-8783-8eaa701ebb5d_3905x2641.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:985,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:4891295,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://blog.bagel.com/i/187024071?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa1317a7b-718f-4931-8783-8eaa701ebb5d_3905x2641.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!EY9s!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa1317a7b-718f-4931-8783-8eaa701ebb5d_3905x2641.png 424w, https://substackcdn.com/image/fetch/$s_!EY9s!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa1317a7b-718f-4931-8783-8eaa701ebb5d_3905x2641.png 848w, https://substackcdn.com/image/fetch/$s_!EY9s!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa1317a7b-718f-4931-8783-8eaa701ebb5d_3905x2641.png 1272w, https://substackcdn.com/image/fetch/$s_!EY9s!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa1317a7b-718f-4931-8783-8eaa701ebb5d_3905x2641.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The cumulative IQR measures variability across denoising trajectories. Full Ensemble shows the lowest variability, indicating consistent, well-behaved numerical integration. Top-2 shows higher variability, yet produces superior images. For full experimental details, see the <a href="https://arxiv.org/abs/2602.02685">full paper</a>.</p><p>Why does averaging more experts &#8212; which stabilizes the denoising trajectory  degrade image quality? </p><div><hr></div><h2>Expert-Data Alignment Is The Governing Principle</h2><p>The answer is not in how smooth the path is, but where it leads. Full ensemble averaging reduces Jacobian spectral norms by cancelling variance across expert predictions, which is exactly why it wins on stability metrics. But each expert is trained on a disjoint data cluster. When all experts contribute to every input, most of them are producing velocity predictions for inputs that lie far outside their training distribution. The resulting velocity field is smooth because the out-of-distribution errors partially cancel, but it points toward a weighted average of all cluster centers rather than the data manifold.</p><p>In single-model diffusion, a smoother velocity field means cleaner integration means better samples. DDMs break this. The smoothing that lowers trajectory sensitivity is not coming from a better-conditioned ODE. It is coming from averaging contradictory predictions, which suppresses variance at the cost of introducing systematic bias away from any individual data cluster&#8217;s learned distribution. Sparse routing avoids this by selecting only the experts whose training data clusters are close to the current input in embedding space, keeping each active expert within its training distribution. The velocity field is noisier but points in the right direction.</p><p>We call this governing principle <em>expert-data alignment</em>. Generation quality depends on routing inputs to experts whose training clusters cover the current denoising state, even when doing so reduces the trajectory stability.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!eB-K!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37ba4b5f-d124-4e75-8d24-bbbd9e9a2c22_2658x1935.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!eB-K!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37ba4b5f-d124-4e75-8d24-bbbd9e9a2c22_2658x1935.png 424w, https://substackcdn.com/image/fetch/$s_!eB-K!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37ba4b5f-d124-4e75-8d24-bbbd9e9a2c22_2658x1935.png 848w, https://substackcdn.com/image/fetch/$s_!eB-K!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37ba4b5f-d124-4e75-8d24-bbbd9e9a2c22_2658x1935.png 1272w, https://substackcdn.com/image/fetch/$s_!eB-K!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37ba4b5f-d124-4e75-8d24-bbbd9e9a2c22_2658x1935.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!eB-K!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37ba4b5f-d124-4e75-8d24-bbbd9e9a2c22_2658x1935.png" width="1456" height="1060" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/37ba4b5f-d124-4e75-8d24-bbbd9e9a2c22_2658x1935.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1060,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:419997,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://blog.bagel.com/i/187024071?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37ba4b5f-d124-4e75-8d24-bbbd9e9a2c22_2658x1935.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!eB-K!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37ba4b5f-d124-4e75-8d24-bbbd9e9a2c22_2658x1935.png 424w, https://substackcdn.com/image/fetch/$s_!eB-K!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37ba4b5f-d124-4e75-8d24-bbbd9e9a2c22_2658x1935.png 848w, https://substackcdn.com/image/fetch/$s_!eB-K!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37ba4b5f-d124-4e75-8d24-bbbd9e9a2c22_2658x1935.png 1272w, https://substackcdn.com/image/fetch/$s_!eB-K!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37ba4b5f-d124-4e75-8d24-bbbd9e9a2c22_2658x1935.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>If expert-data alignment governs quality, 3 predictions should hold. Sparse routing should achieve higher alignment (selected experts have lower data-cluster distance). Selected experts should produce superior velocity predictions. And expert disagreement should correlate with quality degradation. We tested all three.</p><div><hr></div><h2>Experimental Validation</h2><h4>Cluster Distance Analysis</h4><p>Does sparse routing actually select experts whose training distribution match the input?</p><p>Using the Paris DDM (<em>K=8</em> experts, DiT-XL/2 architecture with ~606M parameters each), we extracted DINOv2-ViT-L/14 embeddings at timesteps <em>t</em> in <em>{0.3, 0.5, 0.7}</em> during sampling for 500 samples. For each state, we computed the Euclidean distance from the embedding to each of the 8 cluster centroids used during expert training, then ranked the experts by this distance.</p><div class="latex-rendered" data-attrs="{&quot;persistentExpression&quot;:&quot;\\begin{array}{lcc}\n\\hline\n\\text{Routing} &amp; \\text{Mean Cluster Rank }\\downarrow &amp; \\text{Top-2 Match Rate }\\uparrow \\\\\n\\hline\n\\text{Top-1} &amp; 1.54 \\pm 0.28 &amp; 90.2\\% \\\\\n\\text{Top-2} &amp; 1.96 \\pm 0.26 &amp; 83.9\\% \\\\\n\\text{Full Ensemble} &amp; 4.50\\ \\text{(uniform)} &amp; 25.0\\%\\ \\text{(random)} \\\\\n\\hline\n\\end{array}&quot;,&quot;id&quot;:&quot;KNIDJQNGOX&quot;}" data-component-name="LatexBlockToDOM"></div><p>The router consistently selects experts whose training distribution are closest to the current denoising state. Top-1 achieves a mean rank of 1.54 (near-optimal selection), while Top-2 maintains strong alignment at 1.96. Full Ensemble, by construction, averages across all experts regardless of relevance.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!lBMt!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdba5de61-ee76-4e5a-b092-54565c6527d5_3755x2548.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!lBMt!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdba5de61-ee76-4e5a-b092-54565c6527d5_3755x2548.png 424w, https://substackcdn.com/image/fetch/$s_!lBMt!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdba5de61-ee76-4e5a-b092-54565c6527d5_3755x2548.png 848w, https://substackcdn.com/image/fetch/$s_!lBMt!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdba5de61-ee76-4e5a-b092-54565c6527d5_3755x2548.png 1272w, https://substackcdn.com/image/fetch/$s_!lBMt!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdba5de61-ee76-4e5a-b092-54565c6527d5_3755x2548.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!lBMt!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdba5de61-ee76-4e5a-b092-54565c6527d5_3755x2548.png" width="1456" height="988" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/dba5de61-ee76-4e5a-b092-54565c6527d5_3755x2548.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:988,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:558741,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://blog.bagel.com/i/187024071?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdba5de61-ee76-4e5a-b092-54565c6527d5_3755x2548.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!lBMt!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdba5de61-ee76-4e5a-b092-54565c6527d5_3755x2548.png 424w, https://substackcdn.com/image/fetch/$s_!lBMt!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdba5de61-ee76-4e5a-b092-54565c6527d5_3755x2548.png 848w, https://substackcdn.com/image/fetch/$s_!lBMt!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdba5de61-ee76-4e5a-b092-54565c6527d5_3755x2548.png 1272w, https://substackcdn.com/image/fetch/$s_!lBMt!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdba5de61-ee76-4e5a-b092-54565c6527d5_3755x2548.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The router is picky, and that&#8217;s the point. Sparse routing filters out experts that are statistically likely to produce poor predictions because they&#8217;ve never seen similar data.</p><h4>Per-Expert Prediction Quality</h4><p>Do selected experts actually produce better predictions?</p><p>For each step in Top-2 generation, we computed velocity vectors from all experts and measured angular deviation from the final blended velocity (which successfully guides the image to completion).</p><div class="latex-rendered" data-attrs="{&quot;persistentExpression&quot;:&quot;\\begin{array}{lccc}\n\\hline\n\\text{System} &amp; \\text{Selected Experts} &amp; \\text{Non-Selected Experts} &amp; \\text{Gap} \\\\\n\\hline\n\\text{Paris (8 experts)} &amp; 3.6^\\circ &amp; 5.1^\\circ &amp; 29\\% \\\\\n\\text{MNIST (10 experts)} &amp; 6.4^\\circ &amp; 11.3^\\circ &amp; 43\\% \\\\\n\\hline\n\\end{array}&quot;,&quot;id&quot;:&quot;UNUQMZXQCG&quot;}" data-component-name="LatexBlockToDOM"></div><p>Selected experts consistently produce predictions that align more closely with the successful trajectory. The gap widens with specialization: the MNIST system (each expert trained on a single digit) shows a 43% difference versus 29% for Paris. When experts are highly specialized, the cost of including an inappropriate expert increases. A landscape expert might still contribute useful texture when generating a portrait. A &#8220;zero&#8221; expert offers actively harmful gradients when drawing a &#8220;seven.&#8221;</p><h4>Expert Disagreement Analysis</h4><p>Does expert disagreement predict quality degradation?</p><p>We computed trajectory-integrated disagreement: the average pairwise Euclidean distance between expert velocity predictions, summed over the denoising trajectory. We sorted generated images into quartiles by disagreement and measured <a href="https://arxiv.org/abs/1801.03924">LPIPS</a> (perceptual distance to reference).</p><p>Images in the high-disagreement quartile (Q4) exhibit worse LPIPS scores than those in the low-disagreement quartile (Q1). The relationship is monotonic.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!jcVq!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8dd5e1ae-b4bf-4337-ad2e-5c11a390d836_4098x2153.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!jcVq!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8dd5e1ae-b4bf-4337-ad2e-5c11a390d836_4098x2153.png 424w, https://substackcdn.com/image/fetch/$s_!jcVq!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8dd5e1ae-b4bf-4337-ad2e-5c11a390d836_4098x2153.png 848w, https://substackcdn.com/image/fetch/$s_!jcVq!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8dd5e1ae-b4bf-4337-ad2e-5c11a390d836_4098x2153.png 1272w, https://substackcdn.com/image/fetch/$s_!jcVq!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8dd5e1ae-b4bf-4337-ad2e-5c11a390d836_4098x2153.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!jcVq!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8dd5e1ae-b4bf-4337-ad2e-5c11a390d836_4098x2153.png" width="1456" height="765" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/8dd5e1ae-b4bf-4337-ad2e-5c11a390d836_4098x2153.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:765,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:500223,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://blog.bagel.com/i/187024071?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8dd5e1ae-b4bf-4337-ad2e-5c11a390d836_4098x2153.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!jcVq!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8dd5e1ae-b4bf-4337-ad2e-5c11a390d836_4098x2153.png 424w, https://substackcdn.com/image/fetch/$s_!jcVq!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8dd5e1ae-b4bf-4337-ad2e-5c11a390d836_4098x2153.png 848w, https://substackcdn.com/image/fetch/$s_!jcVq!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8dd5e1ae-b4bf-4337-ad2e-5c11a390d836_4098x2153.png 1272w, https://substackcdn.com/image/fetch/$s_!jcVq!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8dd5e1ae-b4bf-4337-ad2e-5c11a390d836_4098x2153.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>This is the Full Ensemble failure mode. When experts agree, the average produces reasonable results. When experts disagree (which happens frequently because most experts are out-of-distribution), the average becomes an incoherent compromise. It&#8217;s like asking a portrait painter and a landscape painter to collaborate on a cityscape. Sparse routing avoids this by silencing experts that are likely to disagree.</p><div><hr></div><h2>Is Stability Still Useful?</h2><p>If numerical stability doesn&#8217;t govern quality, is it still useful? Yes, but it just measures the wrong thing for generation quality.</p><h4>Stability Measures Convergence, Not Correctness</h4><p>Step-refinement disagreement measures whether the solver converges consistently. Full ensemble achieves excellent convergence with step-refinement disagreement approximately 0.020 - doubling the number of sampling steps barely changes the output. Top-2 exhibits more numerical noise with step-refinement disagreement approximately approximately 0.051.</p><p>But it&#8217;s possible to converge perfectly to a blurry, incoherent average. Stability metrics indicate how easily the solver finds a solution, not whether that solution is good.</p><h4>Within-Strategy Diagnostics</h4><p>Trajectory sensitivity may still work as a within-strategy diagnostic. If practitioners are using Top-2 routing, a sudden spike in sensitivity for a specific input might flag a &#8220;hard&#8221; sample that needs more inference steps, even though sensitivity doesn&#8217;t predict quality across routing strategies.</p><div><hr></div><h2>Discussion</h2><h4>Limitations</h4><p>We validate our hypothesis on two DDM systems (Paris with 8 experts, MNIST with 10 experts). The pattern is consistent across both, but additional systems would strengthen the conclusions - in progress at Bagel Labs. The relationship between cluster distance and prediction quality could be confounded by other factors, such as experts trained on larger clusters being more robust. We control for this by using the same embedding space (DINOv2) that was used during expert training.</p><h4>Implications</h4><p>For practitioners building DDM systems:</p><p>1. <strong>Routing should prioritize alignment over stability.</strong> Standard numerical stability metrics don&#8217;t indicate system health in DDMs. A &#8220;smooth&#8221; sampler may simply be averaging away useful signal.</p><p>2. <strong>Sparse routing is preferable.</strong> Top-2 routing achieves a favorable tradeoff: it maintains expert-data alignment while allowing experts to cross-reference each other. Top-1 may be too aggressive; Full Ensemble destroys alignment.</p><p>3. <strong>Monitor expert-data alignment directly.</strong> Track data-cluster distance ranks and expert disagreement during development, not just final FID scores.</p><div><hr></div><h2>Conclusion</h2><p>Numerical stability doesn&#8217;t govern generation quality in Decentralized Diffusion Models. Expert-data alignment does. It means routing inputs to experts trained on similar data.</p><p>DDM systems should be evaluated and optimized for alignment rather than stability. Sparse routing succeeds not because it produces stable trajectories, but because it ensures each active expert operates within its domain of competence.</p><div><hr></div><p><em>This post presents findings from our research. For full experimental details, methodology, and additional analyses, see:</em></p><p><a href="https://arxiv.org/abs/2602.02685">Expert-Data Alignment Governs Generation Quality in Decentralized Diffusion Models</a></p><pre><code>@misc{villagra_expertdataalignment_2026,
  author       = {Marcos Villagra and Bidhan Roy and Raihan Seraj and Zhiying Jiang},
  title        = {{Expert-Data Alignment Governs Generation Quality in Decentralized Diffusion Models}},
  howpublished = {\url{https://arxiv.org/abs/2602.02685}},
  note         = {arXiv:2602.02685 &#8226; accessed DD&#8239;Mon&#8239;YYYY},
  year         = {2026}
}</code></pre>]]></content:encoded></item><item><title><![CDATA[Introducing Paris]]></title><description><![CDATA[World's First Decentralized Trained Open-Weight Diffusion Model]]></description><link>https://blog.bagel.com/p/paris</link><guid isPermaLink="false">https://blog.bagel.com/p/paris</guid><dc:creator><![CDATA[Gin Jiang]]></dc:creator><pubDate>Tue, 07 Oct 2025 16:11:16 GMT</pubDate><enclosure url="https://substack-post-media.s3.amazonaws.com/public/images/da19c44c-070a-4398-a65d-f5e95fc86e5d_2134x1866.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>We&#8217;re releasing <strong>Paris</strong> - the world&#8217;s first decentralized trained open-weight diffusion model. The model is <strong>open for research and commercial use</strong> under the MIT license.&#65279;</p><p>We named it Paris after the city that has always been a refuge for those creating without permission. Two remarkable facts that makes Paris first of it&#8217;s kind,</p><ol><li><p>It&#8217;s a combination of smaller expert diffusion models pre-trained from scratch across different continents in complete isolation. The experts required<strong> zero gradient, parameter, or intermediate activation synchronization</strong> among each other during training.</p></li><li><p>This zero communication protocol achieves comparable quality to SOTA distributed approaches using <strong>14&#215; less data and 16&#215; less compute</strong>.</p></li></ol><p>How? Full technical report and model weights below.</p><p>Full Technical Report : <strong><a href="https://github.com/bageldotcom/paris/blob/main/paper.pdf">https://github.com/bageldotcom/paris/blob/main/paper.pdf</a></strong><br>Model Weights : <strong><a href="https://huggingface.co/bageldotcom/paris">https://huggingface.co/bageldotcom/paris</a></strong></p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://arxiv.org/abs/2510.03434" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!ePm1!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F17139304-fef9-4924-96a1-9e7245fe7ab9_1080x1518.png 424w, https://substackcdn.com/image/fetch/$s_!ePm1!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F17139304-fef9-4924-96a1-9e7245fe7ab9_1080x1518.png 848w, https://substackcdn.com/image/fetch/$s_!ePm1!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F17139304-fef9-4924-96a1-9e7245fe7ab9_1080x1518.png 1272w, https://substackcdn.com/image/fetch/$s_!ePm1!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F17139304-fef9-4924-96a1-9e7245fe7ab9_1080x1518.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!ePm1!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F17139304-fef9-4924-96a1-9e7245fe7ab9_1080x1518.png" width="1080" height="1518" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/17139304-fef9-4924-96a1-9e7245fe7ab9_1080x1518.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1518,&quot;width&quot;:1080,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:993073,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:&quot;https://arxiv.org/abs/2510.03434&quot;,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://blog.bagel.com/i/175281324?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F17139304-fef9-4924-96a1-9e7245fe7ab9_1080x1518.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!ePm1!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F17139304-fef9-4924-96a1-9e7245fe7ab9_1080x1518.png 424w, https://substackcdn.com/image/fetch/$s_!ePm1!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F17139304-fef9-4924-96a1-9e7245fe7ab9_1080x1518.png 848w, https://substackcdn.com/image/fetch/$s_!ePm1!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F17139304-fef9-4924-96a1-9e7245fe7ab9_1080x1518.png 1272w, https://substackcdn.com/image/fetch/$s_!ePm1!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F17139304-fef9-4924-96a1-9e7245fe7ab9_1080x1518.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p></p><p>We believe we can scale this approach to global state-of-the-art results. But that requires solving some more really, really hard problems. If you&#8217;re an ML researcher or engineer interested in helping us achieve this while doing the best open-source work of your career, come work with us: <strong><a href="http://jobs.bagel.com">jobs.bagel.com</a></strong>.&#65279;</p>]]></content:encoded></item><item><title><![CDATA[Tiny Tool Use]]></title><description><![CDATA[Open source library for teaching LLMs tool-use]]></description><link>https://blog.bagel.com/p/tiny-tool-use</link><guid isPermaLink="false">https://blog.bagel.com/p/tiny-tool-use</guid><dc:creator><![CDATA[Raihan Seraj]]></dc:creator><pubDate>Tue, 01 Jul 2025 14:03:17 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!VSzl!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffee5beca-30ff-4fd4-8451-32da1c938b2c_1456x816.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<blockquote><p>Bagel Labs launching <strong><a href="https://github.com/bagel-org/bagel-RL">Tiny Tool Use</a></strong>, an intentionally tiny but production grade open-source library designed to simplify the process of training open-source LLMs for tool use.</p><p>Tool-aware LLMs turn text into real-world actions. Which unlocks autonomous decision making for robotics and general infrastructure use.</p><p><strong>Tiny Tool Use</strong> distills the latest advances in tool-use RL, SFT and evaluation into easy to use templates. Letting teams train and evaluate tool-calling models without extra scaffolding.</p></blockquote><p>It is fully open source : <strong><a href="https://github.com/bagel-org/bagel-RL">https://github.com/bagel-org/bagel-RL</a></strong></p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!VSzl!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffee5beca-30ff-4fd4-8451-32da1c938b2c_1456x816.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!VSzl!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffee5beca-30ff-4fd4-8451-32da1c938b2c_1456x816.png 424w, https://substackcdn.com/image/fetch/$s_!VSzl!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffee5beca-30ff-4fd4-8451-32da1c938b2c_1456x816.png 848w, https://substackcdn.com/image/fetch/$s_!VSzl!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffee5beca-30ff-4fd4-8451-32da1c938b2c_1456x816.png 1272w, https://substackcdn.com/image/fetch/$s_!VSzl!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffee5beca-30ff-4fd4-8451-32da1c938b2c_1456x816.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!VSzl!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffee5beca-30ff-4fd4-8451-32da1c938b2c_1456x816.png" width="1456" height="816" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/fee5beca-30ff-4fd4-8451-32da1c938b2c_1456x816.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:816,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:2536529,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://blog.bagel.net/i/167189773?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffee5beca-30ff-4fd4-8451-32da1c938b2c_1456x816.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!VSzl!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffee5beca-30ff-4fd4-8451-32da1c938b2c_1456x816.png 424w, https://substackcdn.com/image/fetch/$s_!VSzl!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffee5beca-30ff-4fd4-8451-32da1c938b2c_1456x816.png 848w, https://substackcdn.com/image/fetch/$s_!VSzl!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffee5beca-30ff-4fd4-8451-32da1c938b2c_1456x816.png 1272w, https://substackcdn.com/image/fetch/$s_!VSzl!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffee5beca-30ff-4fd4-8451-32da1c938b2c_1456x816.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><strong>Tiny Tool Use</strong> ships with:</p><ul><li><p><strong>Interchangeable Training Algorithms</strong> &#8211; swap SFT, Direct Preference Optimization (DPO), synthetic teacher signals and more with a single config change.</p></li><li><p><strong>Configuration&#8209;only workflows</strong> &#8211; every experiment, tool schema, and hyper&#8209;parameter lives in a JSON file as a result performing training with different configuration is easy.</p></li><li><p><strong>First&#8209;class evaluation support</strong> &#8211; TensorBoard dashboards for training visualization and integration with <a href="https://gorilla.cs.berkeley.edu/leaderboard.html">Berkeley Function Calling Leaderboard</a> scripts.</p></li><li><p><strong>Dataset flexibility</strong> &#8211; plug in real data, generate synthetic traces, or compose both without touching core code.</p></li></ul><div><hr></div><h2><strong>Training Example Using Qwen3 Models</strong></h2><blockquote><p>We now provide an example of using the library to train Qwen3 models. We use Low Rank Adaptation (LoRA) to customize Qwen3 models on <a href="https://github.com/OpenBMB/ToolBench">ToolBench</a> dataset. The library ships with the example configuration provided in <em><strong>configs/sft_toolbench_config.json</strong></em><code> </code> which downloads the data, extracts it and uses the processed data for training.</p></blockquote><p>To run the training code with <code>Qwen3&#8212;0.6B</code> model, use the following command</p><pre><code>python train.py --config configs/sft_toolbench_config.json --output-dir lora_sft_qwen3/</code></pre><p>The script will start downloading the ToolBench dataset and unzipping, which will several minutes considering the size of ToolBench.</p><p>The above code starts the training procedure, using lora adapters. The configuration file can be edited for full training instead of lora adapters. Furthermore, the configuration of the adapters can also be changed accordingly.</p><h2><strong>Evaluation and Benchmarking</strong></h2><blockquote><p>Beyond its capabilities, the tiny tool use library offers a robust framework for evaluating the general tool-use capabilities of an adapted model. This includes the ability to compare evaluation results directly with established benchmarks, such as the <a href="https://gorilla.cs.berkeley.edu/leaderboard.html">Berkeley Function Calling Leaderboard</a>.</p></blockquote><p>The training statistics can be visualized by running tensorboard with the following command</p><pre><code>tensorboard --logdir lora_sft_qwen3/ </code></pre><div class="pullquote"><p>The performance of the adapted model on ToolBench data as training progresses. The  evaluation data is displayed in the TensorBoard dashboard for the <code>Qwen3-0.6B</code> model, demonstrating that the Tiny Tool Use library offers clear and interpretable training and evaluation metrics along with improved model capability for function calling.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!RDnT!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0036d5d3-f065-4678-ac5b-b4f5b1174f54_2202x1244.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!RDnT!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0036d5d3-f065-4678-ac5b-b4f5b1174f54_2202x1244.png 424w, https://substackcdn.com/image/fetch/$s_!RDnT!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0036d5d3-f065-4678-ac5b-b4f5b1174f54_2202x1244.png 848w, https://substackcdn.com/image/fetch/$s_!RDnT!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0036d5d3-f065-4678-ac5b-b4f5b1174f54_2202x1244.png 1272w, https://substackcdn.com/image/fetch/$s_!RDnT!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0036d5d3-f065-4678-ac5b-b4f5b1174f54_2202x1244.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!RDnT!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0036d5d3-f065-4678-ac5b-b4f5b1174f54_2202x1244.png" width="728" height="411.5" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/0036d5d3-f065-4678-ac5b-b4f5b1174f54_2202x1244.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;normal&quot;,&quot;height&quot;:823,&quot;width&quot;:1456,&quot;resizeWidth&quot;:728,&quot;bytes&quot;:258471,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://blog.bagel.net/i/167189773?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0036d5d3-f065-4678-ac5b-b4f5b1174f54_2202x1244.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!RDnT!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0036d5d3-f065-4678-ac5b-b4f5b1174f54_2202x1244.png 424w, https://substackcdn.com/image/fetch/$s_!RDnT!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0036d5d3-f065-4678-ac5b-b4f5b1174f54_2202x1244.png 848w, https://substackcdn.com/image/fetch/$s_!RDnT!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0036d5d3-f065-4678-ac5b-b4f5b1174f54_2202x1244.png 1272w, https://substackcdn.com/image/fetch/$s_!RDnT!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0036d5d3-f065-4678-ac5b-b4f5b1174f54_2202x1244.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p></p></div><p>When the training is complete, the adapters can be merged and saved using the following command</p><pre><code>Python save_merge_model.py \
--base_model Qwen/Qwen3-0.6B \
--adapter_path\ lora_sft_qwen3/ \
--output_dir merged_model/ \ 
--trust_remote_code </code></pre><p>A model adapted with a subset of Toolbench data can be obtained from the following link: <a href="https://huggingface.co/BagelLabs/TinyToolUse-Qwen3-0.6B-toolbench/tree/main">Qwen3-0.6B-ToolBench</a></p><h3>BFCL Leaderboard</h3><p>The <a href="https://gorilla.cs.berkeley.edu/leaderboard.html">BFCL evaluation</a> on the model can be performed using the following commands, which will generate model response on different test cases.</p><pre><code>export BFCL_PROJECT_ROOT=/path/to/your/desired/project/directory

bfcl generate --model Qwen/Qwen3-0.6B --local-model-path merged_model/ \
--test-category simple,parallel,multiple,multi_turn</code></pre><p>Finally to obtain the score on the generated model response, the following code is executed, which will save the scores as a csv file.</p><pre><code>bfcl evaluate --model Qwen/Qwen3-0.6B \
--test-category simple,parallel,multiple,multi_turn</code></pre><div><hr></div><p>Bagel Labs team will continue to improve the library to adapt it for broader tool use, with an emphasis on distributed learning algorithms. We welcome contributions, feature requests, and issues on our fully open-source repository: <strong><a href="https://github.com/bagel-org/bagel-RL">https://github.com/bagel-org/bagel-RL</a></strong></p>]]></content:encoded></item><item><title><![CDATA[Return on Experience (RoE)]]></title><description><![CDATA[A single benchmark that predicts Reinforcement Learning's future.]]></description><link>https://blog.bagel.com/p/return-on-experience</link><guid isPermaLink="false">https://blog.bagel.com/p/return-on-experience</guid><dc:creator><![CDATA[Bidhan Roy]]></dc:creator><pubDate>Thu, 01 May 2025 13:31:07 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!lUPh!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb57922e8-bbfd-4abb-8ad6-c16c4af1f43d_2048x2048.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>Reinforcement learning (RL) evolves at headline speed. Each month brings a new &#8220;state&#8209;of&#8209;the&#8209;art.&#8221; But we had to pause to ask one important question:</p><blockquote><p><em>Can a single number line up every milestone so far and hint at the next leap?</em></p></blockquote><p>We think <em>maybe</em>. We call that number <strong>Return on Experience (RoE)</strong>. It&#8217;s an early phase benchmark.</p><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://blog.bagel.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption"></p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div><p>What you read below is a lab notebook, handed down as working scripture. Feel free to share your peer review, or join the <strong><a href="https://jobs.bagel.net/">loop</a></strong> (what&#8217;s loop? more on that below).</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!lUPh!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb57922e8-bbfd-4abb-8ad6-c16c4af1f43d_2048x2048.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!lUPh!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb57922e8-bbfd-4abb-8ad6-c16c4af1f43d_2048x2048.png 424w, https://substackcdn.com/image/fetch/$s_!lUPh!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb57922e8-bbfd-4abb-8ad6-c16c4af1f43d_2048x2048.png 848w, https://substackcdn.com/image/fetch/$s_!lUPh!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb57922e8-bbfd-4abb-8ad6-c16c4af1f43d_2048x2048.png 1272w, https://substackcdn.com/image/fetch/$s_!lUPh!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb57922e8-bbfd-4abb-8ad6-c16c4af1f43d_2048x2048.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!lUPh!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb57922e8-bbfd-4abb-8ad6-c16c4af1f43d_2048x2048.png" width="1456" height="1456" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/b57922e8-bbfd-4abb-8ad6-c16c4af1f43d_2048x2048.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1456,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:6288136,&quot;alt&quot;:&quot;bagel, monetizable open-source AI&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://blog.bagel.net/i/162229256?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb57922e8-bbfd-4abb-8ad6-c16c4af1f43d_2048x2048.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="bagel, monetizable open-source AI" title="bagel, monetizable open-source AI" srcset="https://substackcdn.com/image/fetch/$s_!lUPh!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb57922e8-bbfd-4abb-8ad6-c16c4af1f43d_2048x2048.png 424w, https://substackcdn.com/image/fetch/$s_!lUPh!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb57922e8-bbfd-4abb-8ad6-c16c4af1f43d_2048x2048.png 848w, https://substackcdn.com/image/fetch/$s_!lUPh!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb57922e8-bbfd-4abb-8ad6-c16c4af1f43d_2048x2048.png 1272w, https://substackcdn.com/image/fetch/$s_!lUPh!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb57922e8-bbfd-4abb-8ad6-c16c4af1f43d_2048x2048.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><h3><strong>New experience costs more than extra GPU time</strong></h3><p>When power is cheap, we can keep feeding a language model massive amounts of internet text at little extra cost.</p><p>But a surgical robot, a self-driving car, or an ICU-triage policy cannot just &#8220;scrape more experience&#8221;.</p><p>Every new trial burns tire, takes staff time, or waits for legal clearance.</p><p>In these settings the scarce resource is fresh experience - each environment step that has to happen in the real world.</p><p>Sample-efficiency is the art of squeezing the most learning from the fewest such experiences, and <strong>RoE</strong> is a meter for that exchange rate.</p><div><hr></div><h3><strong>Recent AI breakthroughs point in an interesting direction</strong></h3><p><strong>DeepSeek-R1-Zero</strong> improves reasoning capability through millions of GRPO self-play prompt-episodes. Meaning, the model practically &#8220;argue with itself&#8221; millions of times, turning each prompt-response into a tiny lesson, without requiring tons of human text.</p><p><strong>OpenAI o3 tool use </strong>agent was trained to ask whether the next tool call (browser, code runner, or image) will actually move the answer forward. Each tool use carries an explicit cost during training, so the model invokes a tool only when the expected reward of that <em>experience</em> outweighs the cost.</p><p><strong>DreamerV3</strong> learns a world-model first, then practices inside that imagined space, harvesting thousands of virtual trials for every real one.</p><p>Different domains, same theme. Progress comes from squeezing more value out of each new <em>experience</em>. The next question is, how much result do we get per unit of experience? That ratio is <strong>RoE</strong>.</p><p>RoE compresses sample-efficiency into a single number that rises roughly logarithmically with progress.</p><div><hr></div><h3><strong>The Formula</strong></h3><p>Let&#8217;s turn that idea into one formal equation.</p><p><strong>Return on Experience (RoE)</strong> tells us how much &#8220;win&#8221; an RL agent buys per interaction it pays for. In other words, <em>if you spend one unit of real world experience, how far does your performance score climb?</em></p><div class="latex-rendered" data-attrs="{&quot;persistentExpression&quot;:&quot;\n\\operatorname{RoE} \\;=\\; \\frac{\\text{headline_score}}{\\displaystyle\\sum_{t} weight_t\\,count_t}\\;\\times\\;10^{6}\n\n&quot;,&quot;id&quot;:&quot;ZQXWBJJPAG&quot;}" data-component-name="LatexBlockToDOM"></div><p>One step, one score, divided by weighted cost. Where,</p><p><strong>headline score</strong> - the score a paper or a model brags about - Atari score, math-test accuracy, etc.</p><p><strong>count</strong> - how many times the RL agent interacted with <em>something</em> while learning.</p><p><strong>weight</strong> - Not all interactions are equal. Weight adds a rough price tag for each kind of interaction because some are costly and some are cheap.</p><ul><li><p>real-world action = 1 (full price)</p></li><li><p>calling an external tool (e.g. OpenAI o3) &#8776; 0.01</p></li><li><p>self-play or a step inside a learned simulator (e.g. DeepSeek-R1-zero) &#8776; 0.001</p></li></ul><p>We think of the weights as discounts. Because some synthetic/simulated steps cost a thousandth of a real-world step, tool calls cost about two hundredth.</p><p>This way, RoE lets us put a language model, a robot arm, and a video-game agent on the same chart. High RoE means the agent climbs the leaderboard quickly <strong>and</strong> keeps the cost for fresh experience low. Low RoE means it wastes a lot of real-world practice for each bump in score.</p><div><hr></div><h3>A short history of RoE Loops</h3><p>Below we cast some historical milestones of Reinforcement Learning into the RoE benchmark, to show RoE could predict the performance of them.</p><p>We call each big RoE jump a <em><strong>loop</strong></em> - one full turn of experience &#8594; insight &#8594; back to model/agent. Like the <a href="https://en.wikipedia.org/wiki/Closed-loop_transfer_function">closed-loop theorem</a> (or a bagel ring).</p><p>But first, some info on the classic benchmarks used to calculate RoE,</p><ul><li><p><strong>Atari</strong> - 57 classic video games used as a standard obstacle course for RL.</p><ul><li><p><em>Human-normalised score</em> takes a human play-through on each game, sets that to <strong>1.0</strong>, then shows how far the agent climbs above it.</p></li><li><p><em>Median</em> is the middle game after ranking all 57 scores; a quick &#8220;overall&#8221; health check.</p></li><li><p><em>Atari-80k / Atari-100k mean</em> looks at the <strong>average</strong> score but limits the agent to only 80k or 100k game frames, so we can see who learns fastest, not just who learns best.</p></li></ul></li><li><p><strong>GSM8K</strong> - 8.5k grade-school word problems. For each problem an RL agent writes one answer. <em>Pass@1</em> is simply the percentage of problems it gets right on that very first try.</p></li></ul><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!XLXw!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcd3dcaf7-6d44-43b3-b738-8340a52a1e9f_2180x690.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!XLXw!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcd3dcaf7-6d44-43b3-b738-8340a52a1e9f_2180x690.png 424w, https://substackcdn.com/image/fetch/$s_!XLXw!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcd3dcaf7-6d44-43b3-b738-8340a52a1e9f_2180x690.png 848w, https://substackcdn.com/image/fetch/$s_!XLXw!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcd3dcaf7-6d44-43b3-b738-8340a52a1e9f_2180x690.png 1272w, https://substackcdn.com/image/fetch/$s_!XLXw!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcd3dcaf7-6d44-43b3-b738-8340a52a1e9f_2180x690.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!XLXw!,w_2400,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcd3dcaf7-6d44-43b3-b738-8340a52a1e9f_2180x690.png" width="1200" height="379.94505494505495" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/cd3dcaf7-6d44-43b3-b738-8340a52a1e9f_2180x690.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;large&quot;,&quot;height&quot;:461,&quot;width&quot;:1456,&quot;resizeWidth&quot;:1200,&quot;bytes&quot;:212974,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://blog.bagel.net/i/162229256?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcd3dcaf7-6d44-43b3-b738-8340a52a1e9f_2180x690.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-large" alt="" srcset="https://substackcdn.com/image/fetch/$s_!XLXw!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcd3dcaf7-6d44-43b3-b738-8340a52a1e9f_2180x690.png 424w, https://substackcdn.com/image/fetch/$s_!XLXw!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcd3dcaf7-6d44-43b3-b738-8340a52a1e9f_2180x690.png 848w, https://substackcdn.com/image/fetch/$s_!XLXw!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcd3dcaf7-6d44-43b3-b738-8340a52a1e9f_2180x690.png 1272w, https://substackcdn.com/image/fetch/$s_!XLXw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcd3dcaf7-6d44-43b3-b738-8340a52a1e9f_2180x690.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><em>Here is a worked example of RoE calculation with OpenAI o3,</em></p><p>For o3, headline_score = 0.982, weight = 0.01, interaction<strong> </strong>count = 2M.</p><p>So, according to our RoE equation above,</p><div class="latex-rendered" data-attrs="{&quot;persistentExpression&quot;:&quot;\\mathrm{RoE}_{\\text{o3}}\n  = \\left( \\frac{0.982}{0.01 \\times 2\\,000\\,000} \\right)\\times 10^{6}\n  \\;\\approx\\; 49\n&quot;,&quot;id&quot;:&quot;ZUWCNQLUTG&quot;}" data-component-name="LatexBlockToDOM"></div><p>Observations,</p><ul><li><p>Rainbow quadrupled RoE over DQN by upgrading loss functions and replay, proving that better optimization alone can unlock large efficiency from same amount of experiences.</p></li><li><p>EfficientZero learned to plan inside a latent world model instead of in the real world. With most interactions being synthetic, RoE passed 10.</p></li><li><p>DreamerV3 pushed the same idea further. Over 90% of the experiences were simulated. Real-world performance climbed with very little new real-world <em>experience</em>.</p></li><li><p>DeepSeek-R1 brought model self play into the limelight. Model debates with itself on each problem, <a href="https://developer.nvidia.com/blog/nvidia-blackwell-delivers-world-record-deepseek-r1-inference-performance">boosting GSM8K accuracy</a> while keeping the new and cost heavy experience ledger small.</p></li><li><p>Cost-aware tool use has been another RL win of 2025. OpenAI o3 trained the RL agent to accrue a cost while calling a tool. That optimized RoE to ~50.</p></li></ul><div><hr></div><h3>Prediction for the next 5 years</h3><p>RoE has grown on a log curve - 0.023 for Rainbow (2018) up to about 50 for OpenAI o3 (2025). There&#8217;s no sign that the slope is flattening. World-model research, sample-efficient policy optimization and <strong>a coming surge in cheap compute</strong> all point the same way.</p><p>Below is a 5 year prediction on RoE numbers, and examples of what that could mean, inspired from peer-reviewed academic work,</p><p><em>(We count the past jumps - Rainbow through o3 - as Loops 1-5.)</em></p><p><strong>Loop 6 - Year 2026 - RoE &#8776; 150</strong></p><p>World-model research will slip from Atari into the operating room. Hospitals will train surgical robots almost fully in world-model simulators such as <a href="https://arxiv.org/abs/2310.04676">Surgical Gym</a> and <a href="https://autolab.berkeley.edu/assets/publications/media/2024-ICRA-ORBIT-Surgical.pdf?utm_source=bagel.net">ORBIT-Surgical</a>. A bot will practise thousands of virtual stitches for before doing a real one.</p><p><strong>Loop 7 - Year 2027 - RoE &#8776; 1000</strong></p><p>Scaling labs will run foundation world-models such as <a href="https://deepmind.google/discover/blog/genie-2-a-large-scale-foundation-world-model/?utm_source=bagel.net">Genie</a> on ~30x cheaper compute hardware. Agents will write and test entire NeurIPS papers inside those generated 3D sandboxes, requiring only high-level edits from humans. <em>RoE will move past the four-digit mark</em>.</p><p><strong>Loop 8 - Year 2028 - RoE &#8776; 3000</strong></p><p>Self-play on synthetic molecule sets will reduce wet-lab screening from tens of thousands of assays to a few hundred. Drug-discovery will be massively accelerated. <em>RoE will keep its log-pace and enter &#8220;several thousand&#8221; territory.</em></p><p><strong>Loop 9 - Year 2029 - RoE &#8776; 8000</strong></p><p>Regulators will approve the first cargo aircraft whose flight-control laws are proved largely in <a href="https://pure.tudelft.nl/ws/portalfiles/portal/172957377/ribeiro_et_al_2024_certification_of_reinforcement_learning_applications_for_air_transport_operations_based_on.pdf/?utm_source=bagel.net">simulation-based certification workflows</a>. One hour of real flight data will be enough to sign off.</p><p><strong>Loop 10 - Year 2030 - RoE &#8776; 12000</strong></p><p>Companies will field <strong>&#8220;digital graduates&#8221;</strong> - ~30B parameter reasoning models trained with <strong><a href="https://arxiv.org/abs/2503.20783/?utm_source=bagel.net">Dr. GRPO</a></strong> style length-penalised objectives. Each will master a new technical field from a few annotated pages, then draft patents, close mergers and rewrite tax law overnight. One human auditor will review the output of a thousand such agents.</p><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://blog.bagel.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">Thanks for reading! Subscribe for free to receive new posts.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div>]]></content:encoded></item><item><title><![CDATA[ZKLoRA]]></title><description><![CDATA[Zero-Knowledge Verification of LoRA training on SOTA Models in 1-2 Seconds]]></description><link>https://blog.bagel.com/p/zklora</link><guid isPermaLink="false">https://blog.bagel.com/p/zklora</guid><dc:creator><![CDATA[Bidhan Roy]]></dc:creator><pubDate>Tue, 21 Jan 2025 15:50:33 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!8ydi!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8cc9f136-cc75-4a6e-9c58-1e2883028237_1220x1232.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>In 2024, zero knowledge verifiability for machine learning seemed impossible. The latency overhead was too high. We did a research report on it <a href="https://blog.bagel.net/p/with-great-data-comes-great-responsibility">here</a>.</p><p>But, it is 2025. The year of AGI. And, we have made it possible.</p><p>Today, we&#8217;re open sourcing a frontier research, <strong>ZKLoRA</strong>. A zero knowledge protocol that allows verification of <a href="https://arxiv.org/abs/2106.09685">LoRA</a> fine-tuning of open source AI models, <strong>in 1-2 seconds</strong>.<strong><br><br></strong>And not only for toy models, but for current state of the art open source models like llama 3.3 etc. with tens or hundreds of billions of parameters.</p><p>Want to try it yourself? Here&#8217;s the code : <a href="https://github.com/bagel-org/ZKLoRA">https://github.com/bagel-org/ZKLoRA</a></p><p>Want to see the benchmarks and curious how it works? Read the full research paper here : <a href="https://github.com/bagel-org/ZKLoRA/blob/main/paper.pdf">https://github.com/bagel-org/ZKLoRA/blob/main/paper.pdf</a></p><div><hr></div><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!8ydi!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8cc9f136-cc75-4a6e-9c58-1e2883028237_1220x1232.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!8ydi!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8cc9f136-cc75-4a6e-9c58-1e2883028237_1220x1232.png 424w, https://substackcdn.com/image/fetch/$s_!8ydi!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8cc9f136-cc75-4a6e-9c58-1e2883028237_1220x1232.png 848w, https://substackcdn.com/image/fetch/$s_!8ydi!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8cc9f136-cc75-4a6e-9c58-1e2883028237_1220x1232.png 1272w, https://substackcdn.com/image/fetch/$s_!8ydi!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8cc9f136-cc75-4a6e-9c58-1e2883028237_1220x1232.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!8ydi!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8cc9f136-cc75-4a6e-9c58-1e2883028237_1220x1232.png" width="1220" height="1232" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/8cc9f136-cc75-4a6e-9c58-1e2883028237_1220x1232.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1232,&quot;width&quot;:1220,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:406377,&quot;alt&quot;:&quot;https://github.com/bagel-org/ZKLoRA/blob/main/paper.pdf&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="https://github.com/bagel-org/ZKLoRA/blob/main/paper.pdf" title="https://github.com/bagel-org/ZKLoRA/blob/main/paper.pdf" srcset="https://substackcdn.com/image/fetch/$s_!8ydi!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8cc9f136-cc75-4a6e-9c58-1e2883028237_1220x1232.png 424w, https://substackcdn.com/image/fetch/$s_!8ydi!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8cc9f136-cc75-4a6e-9c58-1e2883028237_1220x1232.png 848w, https://substackcdn.com/image/fetch/$s_!8ydi!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8cc9f136-cc75-4a6e-9c58-1e2883028237_1220x1232.png 1272w, https://substackcdn.com/image/fetch/$s_!8ydi!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8cc9f136-cc75-4a6e-9c58-1e2883028237_1220x1232.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div class="file-embed-wrapper" data-component-name="FileToDOM"><div class="file-embed-container-reader"><div class="file-embed-container-top"><image class="file-embed-thumbnail-default" src="https://substackcdn.com/image/fetch/$s_!0Cy0!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack.com%2Fimg%2Fattachment_icon.svg"></image><div class="file-embed-details"><div class="file-embed-details-h1">ZKLoRA Paper</div><div class="file-embed-details-h2">331KB &#8729; PDF file</div></div><a class="file-embed-button wide" href="https://blog.bagel.com/api/v1/file/b2729a4d-0178-4d35-b51f-0757e878b08c.pdf"><span class="file-embed-button-text">Download</span></a></div><a class="file-embed-button narrow" href="https://blog.bagel.com/api/v1/file/b2729a4d-0178-4d35-b51f-0757e878b08c.pdf"><span class="file-embed-button-text">Download</span></a></div></div><div><hr></div><p><br>We are Bagel Labs, a distributed machine learning research lab.</p><p>We believe ZKLoRA kickstarts a new era for verifiable model training across untrusted networks.</p>]]></content:encoded></item><item><title><![CDATA[Train Fast, But Think Slow]]></title><description><![CDATA[Inference Time Compute vs Training Time Compute for AI Reasoning]]></description><link>https://blog.bagel.com/p/train-fast-but-think-slow</link><guid isPermaLink="false">https://blog.bagel.com/p/train-fast-but-think-slow</guid><dc:creator><![CDATA[Bidhan Roy]]></dc:creator><pubDate>Wed, 06 Nov 2024 14:59:55 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!KofN!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F43b0961c-abcd-48b8-b44c-8f66735d6c85_2245x1587.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>AI is like fire.</p><p>We have had radical technological advancements in recent history. Social media, augmented reality, platform shifts like web, mobile. But AI is way more significant of a technology. It is as significant as the discovery of fire. It has the potential to change the trajectory of the evolution of our species.</p><p>One of the holy grails of unlocking this potential of AI is to build systems that can reason like humans. By improving AI's, Large Language Models in particular, ability to break down complex problems and apply logical steps.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!KofN!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F43b0961c-abcd-48b8-b44c-8f66735d6c85_2245x1587.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!KofN!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F43b0961c-abcd-48b8-b44c-8f66735d6c85_2245x1587.png 424w, https://substackcdn.com/image/fetch/$s_!KofN!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F43b0961c-abcd-48b8-b44c-8f66735d6c85_2245x1587.png 848w, https://substackcdn.com/image/fetch/$s_!KofN!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F43b0961c-abcd-48b8-b44c-8f66735d6c85_2245x1587.png 1272w, https://substackcdn.com/image/fetch/$s_!KofN!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F43b0961c-abcd-48b8-b44c-8f66735d6c85_2245x1587.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!KofN!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F43b0961c-abcd-48b8-b44c-8f66735d6c85_2245x1587.png" width="1456" height="1029" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/43b0961c-abcd-48b8-b44c-8f66735d6c85_2245x1587.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1029,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:4701335,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!KofN!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F43b0961c-abcd-48b8-b44c-8f66735d6c85_2245x1587.png 424w, https://substackcdn.com/image/fetch/$s_!KofN!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F43b0961c-abcd-48b8-b44c-8f66735d6c85_2245x1587.png 848w, https://substackcdn.com/image/fetch/$s_!KofN!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F43b0961c-abcd-48b8-b44c-8f66735d6c85_2245x1587.png 1272w, https://substackcdn.com/image/fetch/$s_!KofN!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F43b0961c-abcd-48b8-b44c-8f66735d6c85_2245x1587.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Bagel's research team has been exploring this problem. Analyzing LLM building techniques, especially fine-tuning techniques, to allow Large Language Models to evolve from pattern-recognizing prediction agents to true cognitive agents. Our deep research spanned three major types of reasoning, aka intelligence: <strong>arithmetic</strong>, <strong>commonsense</strong>, and <strong>symbolic</strong>.</p><p>Today, we're sharing our findings. This research targets the core of what we believe to be the ultimate AI evolution, human-level reasoning. Or beyond (God level?).</p><p>We have explored techniques for the training and fine-tuning phases of model development. We have also ventured into the absolutely fascinating world of inference-time reasoning. This is where LLMs can be built or fine-tuned to generate novel solutions during inference, even if the solutions aren't part of their training dataset.</p><p><em>Dive in.<strong> And if you're in a rush, we have a TLDR at the end.</strong></em></p><div><hr></div><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://blog.bagel.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption"><em>Thanks for reading Bagel! Subscribe for free to join our <strong>30,000+</strong> readers.</em>&#65279;</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div><div><hr></div><h2>Types of Reasoning</h2><p>Varied types of reasoning tasks stretch AI's abilities. First, let's understand how they're defined.</p><p><strong>Arithmetic reasoning</strong> pushes machine learning to test problem-solving in a clear way. It forces models to break down problems. Choose from many strategies. Connect steps to find solutions. This makes math different. It shows exactly how well models can grasp details. And use the right solution steps in order.</p><p><strong>Commonsense reasoning</strong> upends our expectations. Models must understand the strange logic of everyday life. The challenges emerge when systems face the quirks of human interactions. The implicit rules we take for granted. For example, a door opens before you walk through. Time flows forward not backward. Water makes things wet. These obvious truths become complex puzzles for artificial systems to unravel.</p><p><strong>Symbolic reasoning</strong> flips the script on traditional machine learning. While neural networks excel at fuzzy pattern matching, symbols demand precision. Models must follow strict rules. Manipulate abstract concepts. Chain logical steps. Like a careful mathematician rather than an intuitive artist. The symbols hold no inherent meaning. Yet through them, we build towers of logic that reach toward human-level reasoning.</p><p>Beyond these core types, reasoning takes many forms. <strong>Logical deduction</strong> draws rigid conclusions while induction makes creative leaps. <strong>Causal reasoning</strong> traces the hidden threads between actions and consequences. <strong>Multimodal reasoning</strong> juggles text, images, and data in a complex combination of understanding. Knowledge graphs map the relationship of facts and relationships. Yet all serve one goal - moving AI from pattern matching toward true comprehension. From memorized responses to novel insights. From prediction to understanding.</p><p>Below, we look into training time and inference time approaches to enhance these types of reasoning.&#65279;</p><div><hr></div><h2>1. Training Time Approaches</h2><h3>1.1. Fine-Tuning Approaches</h3><h4>Parameter Efficient Fine-Tuning (PEFT)</h4><p><strong>How it works:</strong> PEFT reverses traditional model adaptation <a href="https://arxiv.org/abs/2304.01933">(Hu et al. 2023)</a>. Four methods reveal new techniques.</p><p>Prompt-based learning embeds adjustable signals into frozen models. Prefix-tuning and P-tuning introduce small changes. These changes alter outputs without altering the main model.</p><p>Reparametrization methods like LoRA simplify complex weight matrices. They turn large updates into efficient low-rank forms. LoRA captures patterns from high-dimensional spaces with minimal adjustment.</p><p>Adapters create extra neural pathways. Series adapters stack, each layer adjusting outputs gradually. Parallel adapters develop side skills, keeping the base intact.</p><p>Adapter placement is key. Series adapters fit after MLP layers. Parallel adapters excel within them. LoRA touches both attention and MLP layers. Each method targets the right spot.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!AUS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F45b81123-bfda-4832-ab75-287da30364c8_12308x5306.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!AUS7!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F45b81123-bfda-4832-ab75-287da30364c8_12308x5306.png 424w, https://substackcdn.com/image/fetch/$s_!AUS7!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F45b81123-bfda-4832-ab75-287da30364c8_12308x5306.png 848w, https://substackcdn.com/image/fetch/$s_!AUS7!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F45b81123-bfda-4832-ab75-287da30364c8_12308x5306.png 1272w, https://substackcdn.com/image/fetch/$s_!AUS7!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F45b81123-bfda-4832-ab75-287da30364c8_12308x5306.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!AUS7!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F45b81123-bfda-4832-ab75-287da30364c8_12308x5306.png" width="1456" height="628" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/45b81123-bfda-4832-ab75-287da30364c8_12308x5306.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:628,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:2352673,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!AUS7!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F45b81123-bfda-4832-ab75-287da30364c8_12308x5306.png 424w, https://substackcdn.com/image/fetch/$s_!AUS7!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F45b81123-bfda-4832-ab75-287da30364c8_12308x5306.png 848w, https://substackcdn.com/image/fetch/$s_!AUS7!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F45b81123-bfda-4832-ab75-287da30364c8_12308x5306.png 1272w, https://substackcdn.com/image/fetch/$s_!AUS7!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F45b81123-bfda-4832-ab75-287da30364c8_12308x5306.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><strong>Why it's useful:</strong> PEFT reduces resource demands. Large models gain new abilities without major changes. PEFT preserves the base while adding specialized skills. Hardware that struggled with fine-tuning now handles complex updates.</p><p><strong>Tradeoffs:</strong> Not all tasks fit PEFT. Some models need deeper changes. Base model limitations still exist. Combining methods is tricky. PEFT may struggle with very complex tasks.</p><h4>WizardMath</h4><p><strong>How it works:</strong> WizardMath learns in three distinct steps <a href="https://arxiv.org/abs/2308.09583">(Luo et al., 2023)</a>.</p><p>First is supervised fine-tuning. Here, the model picks up raw mathematical patterns. It starts recognizing basic structures. Patterns get mapped to solutions. This step builds intuition for common operations. The foundation is set.</p><p>Next, instruction reward models refine the process. They judge both answers and methods. These models look for efficiency. They guide the model toward elegant solutions. The focus shifts from correctness to quality.</p><p>Finally, PPO-based reinforcement learning enhances problem-solving. The model tests ideas, adapts, and improves. Evol-Instruct feedback loops refine its logic with each run <a href="https://arxiv.org/abs/2304.12244">(Xu et al. 2023)</a>. It gets better at selecting strategies.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!kBJn!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3d966ce5-0428-4fa5-9e26-8b17f622ed77_755x554.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!kBJn!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3d966ce5-0428-4fa5-9e26-8b17f622ed77_755x554.png 424w, https://substackcdn.com/image/fetch/$s_!kBJn!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3d966ce5-0428-4fa5-9e26-8b17f622ed77_755x554.png 848w, https://substackcdn.com/image/fetch/$s_!kBJn!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3d966ce5-0428-4fa5-9e26-8b17f622ed77_755x554.png 1272w, https://substackcdn.com/image/fetch/$s_!kBJn!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3d966ce5-0428-4fa5-9e26-8b17f622ed77_755x554.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!kBJn!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3d966ce5-0428-4fa5-9e26-8b17f622ed77_755x554.png" width="755" height="554" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/3d966ce5-0428-4fa5-9e26-8b17f622ed77_755x554.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:554,&quot;width&quot;:755,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:63146,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!kBJn!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3d966ce5-0428-4fa5-9e26-8b17f622ed77_755x554.png 424w, https://substackcdn.com/image/fetch/$s_!kBJn!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3d966ce5-0428-4fa5-9e26-8b17f622ed77_755x554.png 848w, https://substackcdn.com/image/fetch/$s_!kBJn!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3d966ce5-0428-4fa5-9e26-8b17f622ed77_755x554.png 1272w, https://substackcdn.com/image/fetch/$s_!kBJn!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3d966ce5-0428-4fa5-9e26-8b17f622ed77_755x554.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Source <a href="https://arxiv.org/abs/2308.09583">Luo et al. (2023)</a></figcaption></figure></div><p><strong>Why it's Useful:</strong> Most models just match patterns. WizardMath thinks in logical steps. It breaks down problems like a mathematician. It selects methods based on understanding, not memory. This leads to solutions that are both effective and precise.</p><p><strong>Tradeoffs:</strong> Training WizardMath takes heavy computational resources. Its deep math focus limits general use. Low-quality data can introduce errors. Practical solutions can sometimes lose to elegant ones.</p><h4>Divergent Chain of Thought (DCoT)</h4><p><strong>How it works:</strong> DCoT breaks the single-path approach <a href="https://arxiv.org/abs/2407.03181">(Puerto et al. 2024)</a>. Multiple paths form at once. Each one tackles the problem differently. Yet all conclude in a single inference.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!o_W9!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59964608-2139-442f-a802-0e25055033e7_12254x3596.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!o_W9!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59964608-2139-442f-a802-0e25055033e7_12254x3596.png 424w, https://substackcdn.com/image/fetch/$s_!o_W9!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59964608-2139-442f-a802-0e25055033e7_12254x3596.png 848w, https://substackcdn.com/image/fetch/$s_!o_W9!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59964608-2139-442f-a802-0e25055033e7_12254x3596.png 1272w, https://substackcdn.com/image/fetch/$s_!o_W9!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59964608-2139-442f-a802-0e25055033e7_12254x3596.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!o_W9!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59964608-2139-442f-a802-0e25055033e7_12254x3596.png" width="1456" height="427" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/59964608-2139-442f-a802-0e25055033e7_12254x3596.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:427,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:2144820,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!o_W9!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59964608-2139-442f-a802-0e25055033e7_12254x3596.png 424w, https://substackcdn.com/image/fetch/$s_!o_W9!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59964608-2139-442f-a802-0e25055033e7_12254x3596.png 848w, https://substackcdn.com/image/fetch/$s_!o_W9!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59964608-2139-442f-a802-0e25055033e7_12254x3596.png 1272w, https://substackcdn.com/image/fetch/$s_!o_W9!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59964608-2139-442f-a802-0e25055033e7_12254x3596.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Zero-shot generation creates diverse solutions. Every path seeks the truth. Each follows its own course. Some are direct. Others are more complex. All valid. The model acts like a group of experts. Each path offers a different view.</p><p>These paths then interact. Strong strategies merge. Weak points become clear. The model learns to assess its own reasoning. It compares methods. It blends insights. All this happens without extra training.</p><p><strong>Why it's Useful:</strong> Multiple paths offer built-in validation. When paths align, certainty rises. When they don&#8217;t, issues appear. Different views reveal hidden details. Diversity deepens understanding.</p><p><strong>Tradeoffs:</strong> More paths need more computation. Balancing variety and consistency is tricky. Conflicting paths need resolving. For simple tasks, it's overkill. A group isn't always better than one.</p><h3>1.2. Pre-training and Knowledge Transfer</h3><h4>Continued Pre-training</h4><p><strong>How it works:</strong> Models like Galactica <a href="https://arxiv.org/abs/2211.09085">(Taylor et al. 2022)</a> and MINERVA <a href="https://arxiv.org/abs/2206.14858">(Lewkowycz et al. 2022)</a> go beyond standard training. They learn from over 100 billion tokens of scientific data. This includes mathematical papers, scientific articles, and technical documentation. Raw data is converted into structured knowledge.</p><p>Galactica includes tokens for specific scientific terms. It treats citations as part of the vocabulary. Chemical formulas become meaningful. Mathematical symbols are treated like tools. It learns the language of science.</p><p>MINERVA focuses on quantitative reasoning. It answers natural language questions in physics, chemistry, and economics. Converts questions into math formulas. Uses LaTeX to present detailed solutions. It performs the calculations on its own.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!acNj!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6135f6f7-5005-4de5-b0c9-46ffa98e7127_13179x10482.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!acNj!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6135f6f7-5005-4de5-b0c9-46ffa98e7127_13179x10482.png 424w, https://substackcdn.com/image/fetch/$s_!acNj!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6135f6f7-5005-4de5-b0c9-46ffa98e7127_13179x10482.png 848w, https://substackcdn.com/image/fetch/$s_!acNj!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6135f6f7-5005-4de5-b0c9-46ffa98e7127_13179x10482.png 1272w, https://substackcdn.com/image/fetch/$s_!acNj!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6135f6f7-5005-4de5-b0c9-46ffa98e7127_13179x10482.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!acNj!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6135f6f7-5005-4de5-b0c9-46ffa98e7127_13179x10482.png" width="1456" height="1158" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/6135f6f7-5005-4de5-b0c9-46ffa98e7127_13179x10482.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1158,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:3258014,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!acNj!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6135f6f7-5005-4de5-b0c9-46ffa98e7127_13179x10482.png 424w, https://substackcdn.com/image/fetch/$s_!acNj!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6135f6f7-5005-4de5-b0c9-46ffa98e7127_13179x10482.png 848w, https://substackcdn.com/image/fetch/$s_!acNj!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6135f6f7-5005-4de5-b0c9-46ffa98e7127_13179x10482.png 1272w, https://substackcdn.com/image/fetch/$s_!acNj!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6135f6f7-5005-4de5-b0c9-46ffa98e7127_13179x10482.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><strong>Why it's useful:</strong> Smaller models can surpass larger ones in specific fields. They grasp complex math. Work with technical notation naturally. The gap between general models and experts shrinks.</p><p><strong>Tradeoffs:</strong> Training costs rise. Each field requires massive new data. As new knowledge grows, old knowledge fades. Balancing focus and breadth is hard. It might be great at physics but weak in other areas.</p><h4>Curriculum Learning</h4><p><strong>How it Works:</strong> Learning transforms from random sampling to structured progression <a href="https://aclanthology.org/2022.naacl-main.72/">(Adyasha &amp; Maharana 2022)</a>. Like evolution, but guided. Deliberate. Purposeful.</p><p>A teacher network ranks training samples. Easy concepts come first. Complex ideas build on simple ones. The pacing function controls the flow of knowledge. Sometimes fixed. Sometimes adaptive. Responds to the model's growing understanding.</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!j1RA!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9c0f254-ebcf-4061-bc68-e02e84f22c5b_12469x2476.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!j1RA!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9c0f254-ebcf-4061-bc68-e02e84f22c5b_12469x2476.png 424w, https://substackcdn.com/image/fetch/$s_!j1RA!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9c0f254-ebcf-4061-bc68-e02e84f22c5b_12469x2476.png 848w, https://substackcdn.com/image/fetch/$s_!j1RA!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9c0f254-ebcf-4061-bc68-e02e84f22c5b_12469x2476.png 1272w, https://substackcdn.com/image/fetch/$s_!j1RA!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9c0f254-ebcf-4061-bc68-e02e84f22c5b_12469x2476.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!j1RA!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9c0f254-ebcf-4061-bc68-e02e84f22c5b_12469x2476.png" width="1456" height="289" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f9c0f254-ebcf-4061-bc68-e02e84f22c5b_12469x2476.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:289,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1388543,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!j1RA!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9c0f254-ebcf-4061-bc68-e02e84f22c5b_12469x2476.png 424w, https://substackcdn.com/image/fetch/$s_!j1RA!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9c0f254-ebcf-4061-bc68-e02e84f22c5b_12469x2476.png 848w, https://substackcdn.com/image/fetch/$s_!j1RA!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9c0f254-ebcf-4061-bc68-e02e84f22c5b_12469x2476.png 1272w, https://substackcdn.com/image/fetch/$s_!j1RA!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9c0f254-ebcf-4061-bc68-e02e84f22c5b_12469x2476.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>Three methods measure sample difficulty. Question Answering Probability tracks how often the model succeeds. Model Variability watches for consistent responses. Energy-based scoring identifies outliers and edge cases. The curriculum adapts based on these signals.</p><p><strong>Why it's Useful:</strong> Models learn more efficiently. They build strong foundations before tackling complexity. Understanding grows naturally. Organically. Each concept reinforces the last. Difficult ideas become manageable when approached in sequence.</p><p><strong>Tradeoffs:</strong> Designing effective curricula challenges even experts. Learning time stretches longer. Some concepts resist ordered progression. The path from simple to complex isn't always clear. Sometimes chaos teaches better than order.</p><h4>CoT Knowledge Distillation</h4><p><strong>How it Works:</strong> Large models become teachers. Small models become students. Knowledge transfers through carefully curated examples <a href="https://arxiv.org/abs/2212.08410">(Magister et al. 2023)</a>.</p><p>The process splits into two phases. First, generate Chain of Thought data. Large models solve problems step by step. Show their work. Create a roadmap of reasoning. Only correct solutions make the cut. Quality matters more than quantity.</p><p>Then comes student fine-tuning. Small models learn from these examples. They see not just answers but thinking processes. The target answer guides early steps. This prevents small errors from derailing entire solutions. Teacher forcing ensures the student stays on track.</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!FQ4Q!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd01e171a-ef2a-4411-84d3-aef09635d3ca_12427x3046.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!FQ4Q!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd01e171a-ef2a-4411-84d3-aef09635d3ca_12427x3046.png 424w, https://substackcdn.com/image/fetch/$s_!FQ4Q!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd01e171a-ef2a-4411-84d3-aef09635d3ca_12427x3046.png 848w, https://substackcdn.com/image/fetch/$s_!FQ4Q!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd01e171a-ef2a-4411-84d3-aef09635d3ca_12427x3046.png 1272w, https://substackcdn.com/image/fetch/$s_!FQ4Q!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd01e171a-ef2a-4411-84d3-aef09635d3ca_12427x3046.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!FQ4Q!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd01e171a-ef2a-4411-84d3-aef09635d3ca_12427x3046.png" width="1456" height="357" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/d01e171a-ef2a-4411-84d3-aef09635d3ca_12427x3046.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:357,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1408897,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!FQ4Q!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd01e171a-ef2a-4411-84d3-aef09635d3ca_12427x3046.png 424w, https://substackcdn.com/image/fetch/$s_!FQ4Q!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd01e171a-ef2a-4411-84d3-aef09635d3ca_12427x3046.png 848w, https://substackcdn.com/image/fetch/$s_!FQ4Q!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd01e171a-ef2a-4411-84d3-aef09635d3ca_12427x3046.png 1272w, https://substackcdn.com/image/fetch/$s_!FQ4Q!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd01e171a-ef2a-4411-84d3-aef09635d3ca_12427x3046.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p><strong>Why it's Useful:</strong> Advanced reasoning becomes accessible to smaller models. Complex problem-solving skills transfer efficiently. Small models learn to think clearly with limited resources. They gain the wisdom of larger models without the computational burden.</p><p><strong>Tradeoffs:</strong> Some sophistication gets lost in translation. Students never quite match their teachers. The distillation process demands careful curation. Bad examples can teach bad habits. The balance between compression and comprehension remains delicate.</p><div><hr></div><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://discord.gg/bagelnet&quot;,&quot;text&quot;:&quot;Join Bagel Community&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://discord.gg/bagelnet"><span>Join Bagel Community</span></a></p><div><hr></div><h2>2. Inference Time Approaches</h2><h3>2.1. Chain-Based Methods</h3><h4>Chain of Thought (CoT)</h4><p><strong>How it works:</strong> <a href="https://arxiv.org/abs/2201.11903">Wei et al.</a> redefined reasoning with their 2022 paper. They introduced language models to step-by-step problem-solving using just eight examples. These guided models unlock hidden potential.</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Mvb6!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F057aae90-c73d-4a7a-96d0-a8896dcc27c3_12526x2795.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Mvb6!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F057aae90-c73d-4a7a-96d0-a8896dcc27c3_12526x2795.png 424w, https://substackcdn.com/image/fetch/$s_!Mvb6!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F057aae90-c73d-4a7a-96d0-a8896dcc27c3_12526x2795.png 848w, https://substackcdn.com/image/fetch/$s_!Mvb6!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F057aae90-c73d-4a7a-96d0-a8896dcc27c3_12526x2795.png 1272w, https://substackcdn.com/image/fetch/$s_!Mvb6!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F057aae90-c73d-4a7a-96d0-a8896dcc27c3_12526x2795.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Mvb6!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F057aae90-c73d-4a7a-96d0-a8896dcc27c3_12526x2795.png" width="1456" height="325" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/057aae90-c73d-4a7a-96d0-a8896dcc27c3_12526x2795.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:325,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1535340,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Mvb6!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F057aae90-c73d-4a7a-96d0-a8896dcc27c3_12526x2795.png 424w, https://substackcdn.com/image/fetch/$s_!Mvb6!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F057aae90-c73d-4a7a-96d0-a8896dcc27c3_12526x2795.png 848w, https://substackcdn.com/image/fetch/$s_!Mvb6!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F057aae90-c73d-4a7a-96d0-a8896dcc27c3_12526x2795.png 1272w, https://substackcdn.com/image/fetch/$s_!Mvb6!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F057aae90-c73d-4a7a-96d0-a8896dcc27c3_12526x2795.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>With precise prompts, models show their internal reasoning. No need for new training or changes to the model. This latent capability is accessed by using strategic examples.</p><p>The models learn to break down problems into logical steps that mimic human thinking. Each step becomes clear. The internal thought process shifts from a black box to a visible sequence.</p><p>This approach scaled well. PaLM, with chain-of-thought prompting, hit 75.6% on StrategyQA. Even sports questions saw 95.4% accuracy, surpassing human experts. Complex math problems were solved with clear, step-by-step reasoning. In commonsense tasks, hidden assumptions surfaced in natural language. Symbolic problems became easy to follow.</p><p><strong>Why it's useful:</strong> Wei et al.'s work showed breakthroughs across fields. LaMDA 137B demonstrated this by generating 96% correct answers with sound reasoning. Problem-solving became transparent. Larger models produced more coherent explanations.</p><p><strong>Tradeoffs:</strong> Reasoning sometimes fails. Models can get confused. Wei&#8217;s research showed that 46% of wrong answers had minor mistakes, while 54% had major logical errors. Sequential reasoning can hit barriers. Complex tasks push models to their limits.</p><h4>Program of Thought (PoT)</h4><p><strong>How it works:</strong> <a href="https://arxiv.org/abs/2211.12588">Chen et al.'s 2022</a> work changed how models approach math. They turned natural language into executable programs that solve complex problems with machine-level precision.</p><p>The process is seamless. Word problems convert directly into Python code. Variables capture key details from the text. Functions embody solution strategies. Algorithms emerge from simple descriptions. The model coordinates external tools with precision.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!QaY-!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8c4f518b-0f8c-4465-a9f8-429bffb43db1_12809x7778.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!QaY-!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8c4f518b-0f8c-4465-a9f8-429bffb43db1_12809x7778.png 424w, https://substackcdn.com/image/fetch/$s_!QaY-!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8c4f518b-0f8c-4465-a9f8-429bffb43db1_12809x7778.png 848w, https://substackcdn.com/image/fetch/$s_!QaY-!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8c4f518b-0f8c-4465-a9f8-429bffb43db1_12809x7778.png 1272w, https://substackcdn.com/image/fetch/$s_!QaY-!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8c4f518b-0f8c-4465-a9f8-429bffb43db1_12809x7778.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!QaY-!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8c4f518b-0f8c-4465-a9f8-429bffb43db1_12809x7778.png" width="1456" height="884" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/8c4f518b-0f8c-4465-a9f8-429bffb43db1_12809x7778.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:884,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:2365539,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!QaY-!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8c4f518b-0f8c-4465-a9f8-429bffb43db1_12809x7778.png 424w, https://substackcdn.com/image/fetch/$s_!QaY-!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8c4f518b-0f8c-4465-a9f8-429bffb43db1_12809x7778.png 848w, https://substackcdn.com/image/fetch/$s_!QaY-!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8c4f518b-0f8c-4465-a9f8-429bffb43db1_12809x7778.png 1272w, https://substackcdn.com/image/fetch/$s_!QaY-!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8c4f518b-0f8c-4465-a9f8-429bffb43db1_12809x7778.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>PoT set new records, improving math benchmarks by 8% in few-shot settings. In zero-shot, the gains were 12%. The code tells a story with structured logic. Control flows mirror human thought. Programs serve as both solution and explanation.</p><p>PAL expanded on this. <a href="https://arxiv.org/abs/2211.10435">Gao et al. in 2023</a> showed how models could use Python interpreters for better reasoning. Complex calculations became sharper. Formal math operations translated into natural expression.</p><p><strong>Why it's useful:</strong> Precision dominates. Math problems flow into code. The model combines high-level reasoning with computational accuracy. It's like a mathematician working alongside a supercomputer.</p><p><strong>Tradeoffs:</strong> Some problems don't translate well into code. Executing programs raises security concerns. The model must handle both natural language and code, increasing the risk of errors.</p><h3>2.2. Consistency and Verification Methods</h3><h4>Self-Consistency (SC)</h4><p><strong>How it works:</strong> <a href="https://arxiv.org/abs/2203.11171">Wang et al.</a> introduced SC in 2022, shifting from greedy decoding to statistical sampling. This method changes inference entirely.</p><p>Instead of one solution, each step produces multiple paths. SC explores various reasoning attempts at once. The decoder samples different trajectories in the probability space. Errors are reduced by repeating steps, leading to validation through sampling.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!lsDw!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F39d25474-2a9f-4519-a575-9d03e62eb82b_12427x4767.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!lsDw!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F39d25474-2a9f-4519-a575-9d03e62eb82b_12427x4767.png 424w, https://substackcdn.com/image/fetch/$s_!lsDw!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F39d25474-2a9f-4519-a575-9d03e62eb82b_12427x4767.png 848w, https://substackcdn.com/image/fetch/$s_!lsDw!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F39d25474-2a9f-4519-a575-9d03e62eb82b_12427x4767.png 1272w, https://substackcdn.com/image/fetch/$s_!lsDw!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F39d25474-2a9f-4519-a575-9d03e62eb82b_12427x4767.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!lsDw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F39d25474-2a9f-4519-a575-9d03e62eb82b_12427x4767.png" width="1456" height="559" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/39d25474-2a9f-4519-a575-9d03e62eb82b_12427x4767.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:559,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:2011625,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!lsDw!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F39d25474-2a9f-4519-a575-9d03e62eb82b_12427x4767.png 424w, https://substackcdn.com/image/fetch/$s_!lsDw!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F39d25474-2a9f-4519-a575-9d03e62eb82b_12427x4767.png 848w, https://substackcdn.com/image/fetch/$s_!lsDw!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F39d25474-2a9f-4519-a575-9d03e62eb82b_12427x4767.png 1272w, https://substackcdn.com/image/fetch/$s_!lsDw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F39d25474-2a9f-4519-a575-9d03e62eb82b_12427x4767.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>SC&#8217;s statistical foundation is strong. It marginalizes over samples to minimize errors in individual paths. Think of it like quantum mechanics: multiple paths exist, and truth emerges from the statistical patterns.</p><p>Their approach was groundbreaking. The decoder generates n unique reasoning chains, each following a different probability path. Final answers come from majority voting, but the process goes beyond simple counts.</p><p>Wang's team tested models from UL2-20B to PaLM-540B. Accuracy increased across the board. Smaller models showed the most improvement, indicating SC unlocks hidden potential in models of all sizes.</p><p><strong>Why it's useful:</strong> Numbers tell the story. Multiple paths automatically validate answers. Different paths catch edge cases. Robustness increases as more paths are explored. Quantity becomes quality.</p><p><strong>Tradeoffs:</strong> Computation grows costly. Each path demands resources. Memory use spikes. Contradictory paths sometimes arise. Resolving these conflicts adds complexity.</p><h4>Self-Endorsement (SE)</h4><p><strong>How it works:</strong> <a href="https://arxiv.org/abs/2402.15631">Wang et al.&#8217;s 2024</a> paper introduced SE, a new verification method. The system generates diverse responses and then analyzes them. Facts are extracted, labeled, and compared. Cross-response validation assigns endorsement scores to each fact.</p><p>SE uses advanced fact extraction algorithms. Neural retrieval identifies key claims, and automatic cross-referencing helps the model distinguish strong facts from weaker ones. This statistical validation process drives the system.</p><p>High-scoring facts shape future outputs, while low-scoring ones lead to re-evaluation. Each pass refines the model&#8217;s response through consistency.</p><p>The fact extraction pipeline is highly technical. Named entity recognition identifies key elements, and relation extraction maps connections. All of this occurs without human input.</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!v8Ww!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2823c389-9945-4800-9310-61b705827cea_12395x3071.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!v8Ww!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2823c389-9945-4800-9310-61b705827cea_12395x3071.png 424w, https://substackcdn.com/image/fetch/$s_!v8Ww!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2823c389-9945-4800-9310-61b705827cea_12395x3071.png 848w, https://substackcdn.com/image/fetch/$s_!v8Ww!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2823c389-9945-4800-9310-61b705827cea_12395x3071.png 1272w, https://substackcdn.com/image/fetch/$s_!v8Ww!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2823c389-9945-4800-9310-61b705827cea_12395x3071.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!v8Ww!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2823c389-9945-4800-9310-61b705827cea_12395x3071.png" width="1456" height="361" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/2823c389-9945-4800-9310-61b705827cea_12395x3071.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:361,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1687409,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!v8Ww!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2823c389-9945-4800-9310-61b705827cea_12395x3071.png 424w, https://substackcdn.com/image/fetch/$s_!v8Ww!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2823c389-9945-4800-9310-61b705827cea_12395x3071.png 848w, https://substackcdn.com/image/fetch/$s_!v8Ww!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2823c389-9945-4800-9310-61b705827cea_12395x3071.png 1272w, https://substackcdn.com/image/fetch/$s_!v8Ww!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2823c389-9945-4800-9310-61b705827cea_12395x3071.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p><strong>Why it's useful:</strong> Accuracy improves. Hallucinations drop. The system validates its own facts. Confidence scores make responses more reliable.</p><p><strong>Tradeoffs:</strong> Processing takes longer. Fact extraction sometimes fails. Complex statements resist simple validation. Some valid facts get rejected if they don&#8217;t fit the statistical pattern.</p><h4>Least-to-Most Prompting (LM)</h4><p><strong>How it works:</strong> <a href="https://arxiv.org/abs/2205.10625">Zhou et al.</a> introduced LM in 2022, a system that breaks tasks into smaller parts and solves them step by step.</p><p>The process follows phases. First, the model analyzes the input. Next, it identifies sub-tasks. Then, it solves each part. Finally, it combines the results. Each phase builds on the previous one.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!fwnz!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F263c73f4-2993-4b73-b86e-41e28442ba37_12354x4072.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!fwnz!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F263c73f4-2993-4b73-b86e-41e28442ba37_12354x4072.png 424w, https://substackcdn.com/image/fetch/$s_!fwnz!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F263c73f4-2993-4b73-b86e-41e28442ba37_12354x4072.png 848w, https://substackcdn.com/image/fetch/$s_!fwnz!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F263c73f4-2993-4b73-b86e-41e28442ba37_12354x4072.png 1272w, https://substackcdn.com/image/fetch/$s_!fwnz!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F263c73f4-2993-4b73-b86e-41e28442ba37_12354x4072.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!fwnz!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F263c73f4-2993-4b73-b86e-41e28442ba37_12354x4072.png" width="1456" height="480" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/263c73f4-2993-4b73-b86e-41e28442ba37_12354x4072.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:480,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1796200,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!fwnz!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F263c73f4-2993-4b73-b86e-41e28442ba37_12354x4072.png 424w, https://substackcdn.com/image/fetch/$s_!fwnz!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F263c73f4-2993-4b73-b86e-41e28442ba37_12354x4072.png 848w, https://substackcdn.com/image/fetch/$s_!fwnz!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F263c73f4-2993-4b73-b86e-41e28442ba37_12354x4072.png 1272w, https://substackcdn.com/image/fetch/$s_!fwnz!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F263c73f4-2993-4b73-b86e-41e28442ba37_12354x4072.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>For example, in the last-letter task with "cat dog bird," the model processes each word separately. It finds &#8216;t&#8217; from "cat," &#8216;g&#8217; from "dog," and &#8216;d&#8217; from "bird." Then, it combines them into "tgd." The model achieved 94% accuracy with four words and 74% even with twelve.</p><p>Errors are predictable. Sometimes letters drop during connection. Sometimes extras appear. But it rarely confuses the final letter of each word.</p><p><strong>Why it's useful:</strong> LM is highly efficient. It only needs two examples to work well. It uses less tokens than traditional methods, achieving equal or better results.</p><p>Scaling is impressive. The model handles sequences four times longer than its training examples without losing accuracy. Standard methods fail on long sequences, scoring 31.8% on twelve-word tests. LM hits 74%, with a growing advantage on harder tasks.</p><p><strong>Tradeoffs:</strong> Some tasks don't split easily. Certain problems need a different approach. The method requires more steps, which adds processing time.</p><p>Technical limits arise. The model must track partial solutions, and memory usage grows with longer sequences. Some tasks need several attempts to find the best split.</p><p>Careful planning is essential. The order of sub-tasks affects accuracy, and managing information efficiently becomes critical. The system must adapt its splitting strategy for different problems.</p><div><hr></div><h3>How to Test for Reasoning</h3><p>Cognitive sciences have studied human reasoning since experimental psychology emerged in the late 19th century. This field has been crucial for technological development, education improvement, cognitive disorder treatment, and better decision-making. Scientists use various tools to study reasoning, including problem-solving tasks, computational models, brain imaging (fMRI and EEG), and behavioral measurements like eye-tracking. These combined methods help researchers understand how humans reason.</p><p>Analogously, AI researchers have invented reasoning tasks to test the reasoning capabilities of LLMs in the form of special datasets. Being AI more of an engineering field and computer science field, these datasets provide a rigorous benchmark to test AI systems. This allows researchers to measure a model&#8217;s accuracy and identify areas where it may be falling short.</p><p>Datasets for testing AI reasoning on one type of reasoning should be diverse around that type of reasoning in order to test various complexities and nuances in tasks. For example, to evaluate language models' common sense capabilities, a dataset like <a href="https://paperswithcode.com/dataset/arc">ARC</a><strong> </strong>is used. The figure below we shows a ranking of the best LLMs for the ARC-challenge dataset taken from different sources.</p><p>Inference-time techniques appear in green, training-time techniques appear in orange, and standard base models appear in blue.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!eSlj!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6e6d20db-e85a-4d66-a12d-7e129f9e25c5_11925x7145.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!eSlj!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6e6d20db-e85a-4d66-a12d-7e129f9e25c5_11925x7145.png 424w, https://substackcdn.com/image/fetch/$s_!eSlj!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6e6d20db-e85a-4d66-a12d-7e129f9e25c5_11925x7145.png 848w, https://substackcdn.com/image/fetch/$s_!eSlj!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6e6d20db-e85a-4d66-a12d-7e129f9e25c5_11925x7145.png 1272w, https://substackcdn.com/image/fetch/$s_!eSlj!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6e6d20db-e85a-4d66-a12d-7e129f9e25c5_11925x7145.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!eSlj!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6e6d20db-e85a-4d66-a12d-7e129f9e25c5_11925x7145.png" width="1456" height="872" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/6e6d20db-e85a-4d66-a12d-7e129f9e25c5_11925x7145.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:872,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:2439854,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!eSlj!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6e6d20db-e85a-4d66-a12d-7e129f9e25c5_11925x7145.png 424w, https://substackcdn.com/image/fetch/$s_!eSlj!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6e6d20db-e85a-4d66-a12d-7e129f9e25c5_11925x7145.png 848w, https://substackcdn.com/image/fetch/$s_!eSlj!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6e6d20db-e85a-4d66-a12d-7e129f9e25c5_11925x7145.png 1272w, https://substackcdn.com/image/fetch/$s_!eSlj!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6e6d20db-e85a-4d66-a12d-7e129f9e25c5_11925x7145.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>In the image above,<strong> the best performing techniques correspond to inference-time approaches</strong>, in particular, SC has a clear advantage over standard CoT. The fine-tuning approaches cannot match the inference-time approaches where they show a clear advantage.</p><div><hr></div><h2>TLDR</h2><p>Our research focuses on strengthening reasoning in Large Language Models (LLMs) in three ways. First, <strong>arithmetic reasoning</strong> - approaching math problems logically. Next, <strong>commonsense reasoning</strong> - grasping everyday situations and drawing conclusions. Finally, <strong>symbolic reasoning</strong> - handling abstract symbols by strict logic.</p><p>We explore two strategies to push these areas forward. First are <strong>training-time methods</strong>. These adjust AI&#8217;s learning process, adjusting it for specific tasks but needing time and computing power. For example, <strong>WizardMath</strong> teaches detailed problem-solving for math, while <strong>PEFT</strong> (Parameter Efficient Fine-Tuning) builds skills without huge resources. <strong>DCoT</strong> (Divergent Chain of Thought) allows AI to consider multiple solutions simultaneously.</p><p>The second approach is <strong>Inference-time methods</strong>. These enhance existing models without retraining, bringing quick improvements, though sometimes with less depth. <strong>Chain of Thought (CoT)</strong> prompts AI to explain each step it takes. <strong>Program of Thought (PoT)</strong> has AI write and run code to boost accuracy. <strong>Self-Consistency (SC)</strong> checks multiple paths to ensure reliable answers.</p><p>The below table is a summary of our findings.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!8GDl!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59cb85b7-3fdf-47bb-b01a-a11abdc09143_2176x1632.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!8GDl!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59cb85b7-3fdf-47bb-b01a-a11abdc09143_2176x1632.png 424w, https://substackcdn.com/image/fetch/$s_!8GDl!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59cb85b7-3fdf-47bb-b01a-a11abdc09143_2176x1632.png 848w, https://substackcdn.com/image/fetch/$s_!8GDl!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59cb85b7-3fdf-47bb-b01a-a11abdc09143_2176x1632.png 1272w, https://substackcdn.com/image/fetch/$s_!8GDl!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59cb85b7-3fdf-47bb-b01a-a11abdc09143_2176x1632.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!8GDl!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59cb85b7-3fdf-47bb-b01a-a11abdc09143_2176x1632.png" width="1456" height="1092" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/59cb85b7-3fdf-47bb-b01a-a11abdc09143_2176x1632.png&quot;,&quot;srcNoWatermark&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/2f55577f-283e-4d55-be1f-3c3b22df7eda_2176x1632.png&quot;,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1092,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:101374,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!8GDl!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59cb85b7-3fdf-47bb-b01a-a11abdc09143_2176x1632.png 424w, https://substackcdn.com/image/fetch/$s_!8GDl!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59cb85b7-3fdf-47bb-b01a-a11abdc09143_2176x1632.png 848w, https://substackcdn.com/image/fetch/$s_!8GDl!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59cb85b7-3fdf-47bb-b01a-a11abdc09143_2176x1632.png 1272w, https://substackcdn.com/image/fetch/$s_!8GDl!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59cb85b7-3fdf-47bb-b01a-a11abdc09143_2176x1632.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Techniques for Enhancing AI Reasoning&#65279;</figcaption></figure></div><p>By open-sourcing our research on AI reasoning, our team at Bagel aims to collaborate with the Open Source AI community to forge humanity's next chapter.</p>]]></content:encoded></item><item><title><![CDATA[Machine Unlearning]]></title><description><![CDATA[Right to be forgotten by AI]]></description><link>https://blog.bagel.com/p/machine-unlearning</link><guid isPermaLink="false">https://blog.bagel.com/p/machine-unlearning</guid><dc:creator><![CDATA[Bidhan Roy]]></dc:creator><pubDate>Tue, 10 Sep 2024 13:37:49 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!BozS!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb2ba08e0-0628-4c79-af61-729ca6ee3c06_2245x1587.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>AI memory enters the era of selective forgetting. Machine unlearning, a novel technology, selectively erases information from trained models without full retraining.</p><p>It's the absolute cutting edge of AI model safety and data privacy.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!BozS!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb2ba08e0-0628-4c79-af61-729ca6ee3c06_2245x1587.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!BozS!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb2ba08e0-0628-4c79-af61-729ca6ee3c06_2245x1587.png 424w, https://substackcdn.com/image/fetch/$s_!BozS!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb2ba08e0-0628-4c79-af61-729ca6ee3c06_2245x1587.png 848w, https://substackcdn.com/image/fetch/$s_!BozS!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb2ba08e0-0628-4c79-af61-729ca6ee3c06_2245x1587.png 1272w, https://substackcdn.com/image/fetch/$s_!BozS!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb2ba08e0-0628-4c79-af61-729ca6ee3c06_2245x1587.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!BozS!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb2ba08e0-0628-4c79-af61-729ca6ee3c06_2245x1587.png" width="1456" height="1029" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/b2ba08e0-0628-4c79-af61-729ca6ee3c06_2245x1587.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1029,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:5534520,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!BozS!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb2ba08e0-0628-4c79-af61-729ca6ee3c06_2245x1587.png 424w, https://substackcdn.com/image/fetch/$s_!BozS!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb2ba08e0-0628-4c79-af61-729ca6ee3c06_2245x1587.png 848w, https://substackcdn.com/image/fetch/$s_!BozS!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb2ba08e0-0628-4c79-af61-729ca6ee3c06_2245x1587.png 1272w, https://substackcdn.com/image/fetch/$s_!BozS!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb2ba08e0-0628-4c79-af61-729ca6ee3c06_2245x1587.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Microsoft researchers showcased this capability by <a href="https://arxiv.org/pdf/2310.02238">removing Harry Potter references</a> from META&#8217;s Llama2 Model. OpenAI researchers applied unlearning to eliminate biased content, enhancing model safety.</p><p>Unlearning addresses GDPR's "<a href="https://en.wikipedia.org/wiki/Right_to_be_forgotten#:~:text=The%20right%20to%20be%20forgotten%20is%20distinct%20from%20the%20right,publicly%20at%20a%20certain%20time.">right to be forgotten</a>" mandate. For example, <a href="https://research.google/blog/announcing-the-first-machine-unlearning-challenge/">Google held a machine unlearning competition</a> focusing on removing specific facial images from an AI model trained to predict age from images.</p><p>For cybersecurity, unlearning mitigates data poisoning risks. <a href="https://www.marktechpost.com/2024/02/05/this-ai-paper-from-ut-austin-and-jpmorgan-chase-unveils-a-novel-algorithm-for-machine-unlearning-in-image-to-image-generative-models/">JPMorgan Chase applied unlearning</a> to consumer systems, preserving customer privacy without compromising effectiveness.</p><p>The future of scalable AI systems lie in selective forgetting, not indiscriminate data accumulation. This breakthrough ushers in a new paradigm of AI privacy, where machines learn, unlearn, and relearn with unprecedented flexibility.</p><p>As we stand on this new frontier, we have researched different machine unlearning techniques, that will lead us to a world where machines possess human-like memory malleability.</p><p><em><strong>And if you're in a rush, we have a TLDR at the end.</strong></em></p><div><hr></div><h1>What is Machine Unlearning</h1><p>Machine unlearning erases specific data from pre-trained models without full retraining. This process requires embedding unlearning mechanisms during initial model training.</p><p>We have broken down the techniques into four major ones: <strong>Exact</strong>, <strong>Approximate, Prompt-Based, Decentralized.</strong></p><ol><li><p><strong>Exact methods</strong> remove specific data precisely from AI models. They divide data or transform algorithms to target unwanted information. This approach deletes or updates only necessary parts, avoiding full model retrains.</p></li><li><p><strong>Approximate methods</strong> take a broader approach. They fine-tune models on slightly altered data to reduce targeted information's influence. Some add controlled noise during training, limiting each data point's impact from the start.</p></li><li><p><strong>Prompt-Based techniques</strong> guide AI language models to "forget" without changing their core. They use smart word tricks to create examples that steer the AI away from unwanted info. The AI receives new instructions to ignore certain knowledge, without erasing its memory.</p></li><li><p><strong>Decentralized</strong> approaches erase data from AI systems spread across multiple devices. They remove information when a participant exits the network. These methods use compact models to efficiently spread the impact of this exit. This eliminates the leaving party's influence without retraining the entire system. The process maintains collaborative learning while enabling selective data removal.</p></li></ol><p>As these techniques evolve, machine unlearning becomes more effective, enabling models to forget specific information while maintaining strong performance.</p><div><hr></div><h1>1. Exact Methods for Machine Unlearning</h1><p>Exact unlearning removes specific data from machine learning models. It targets a subset called the "forget set" within the main dataset. The process creates a new model that performs nearly identically to the original. This technique deletes chosen data points rapidly, without the need of full retraining.</p><p>The updated model maintains high accuracy on remaining data. Think of it as selectively erasing memories from a human brain while preserving overall knowledge and functionality.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!bWVV!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6cd9ce93-6887-4ecd-bd1d-68fa02004a32_12134x5002.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!bWVV!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6cd9ce93-6887-4ecd-bd1d-68fa02004a32_12134x5002.png 424w, https://substackcdn.com/image/fetch/$s_!bWVV!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6cd9ce93-6887-4ecd-bd1d-68fa02004a32_12134x5002.png 848w, https://substackcdn.com/image/fetch/$s_!bWVV!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6cd9ce93-6887-4ecd-bd1d-68fa02004a32_12134x5002.png 1272w, https://substackcdn.com/image/fetch/$s_!bWVV!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6cd9ce93-6887-4ecd-bd1d-68fa02004a32_12134x5002.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!bWVV!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6cd9ce93-6887-4ecd-bd1d-68fa02004a32_12134x5002.png" width="1456" height="600" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/6cd9ce93-6887-4ecd-bd1d-68fa02004a32_12134x5002.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:600,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:3203877,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!bWVV!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6cd9ce93-6887-4ecd-bd1d-68fa02004a32_12134x5002.png 424w, https://substackcdn.com/image/fetch/$s_!bWVV!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6cd9ce93-6887-4ecd-bd1d-68fa02004a32_12134x5002.png 848w, https://substackcdn.com/image/fetch/$s_!bWVV!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6cd9ce93-6887-4ecd-bd1d-68fa02004a32_12134x5002.png 1272w, https://substackcdn.com/image/fetch/$s_!bWVV!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6cd9ce93-6887-4ecd-bd1d-68fa02004a32_12134x5002.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">General approach to exact machine unlearning.</figcaption></figure></div><p>We have broken down the exact methods in two major types.</p><ul><li><p>SISA.</p></li><li><p>Statistical Query.</p></li></ul><h2>1.1. SISA</h2><p>SISA (<em>Sharding</em>, <em>Isolation</em>, <em>Slicing</em>, and <em>Aggregation</em>) is an advanced algorithmic framework designed to accelerate machine unlearning processes <a href="https://arxiv.org/abs/1912.03817">(Bourtoule et al. 2019)</a>. This technique strategically limits the influence of individual data points during model training, facilitating efficient removal of specific data without necessitating complete model retraining.</p><h4><strong>SISA Architecture</strong></h4><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!wuiW!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbb17f2ac-d851-467e-8f34-3551d0cfd929_12248x13821.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!wuiW!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbb17f2ac-d851-467e-8f34-3551d0cfd929_12248x13821.png 424w, https://substackcdn.com/image/fetch/$s_!wuiW!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbb17f2ac-d851-467e-8f34-3551d0cfd929_12248x13821.png 848w, https://substackcdn.com/image/fetch/$s_!wuiW!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbb17f2ac-d851-467e-8f34-3551d0cfd929_12248x13821.png 1272w, https://substackcdn.com/image/fetch/$s_!wuiW!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbb17f2ac-d851-467e-8f34-3551d0cfd929_12248x13821.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!wuiW!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbb17f2ac-d851-467e-8f34-3551d0cfd929_12248x13821.png" width="1456" height="1643" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/bb17f2ac-d851-467e-8f34-3551d0cfd929_12248x13821.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1643,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:7437378,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!wuiW!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbb17f2ac-d851-467e-8f34-3551d0cfd929_12248x13821.png 424w, https://substackcdn.com/image/fetch/$s_!wuiW!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbb17f2ac-d851-467e-8f34-3551d0cfd929_12248x13821.png 848w, https://substackcdn.com/image/fetch/$s_!wuiW!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbb17f2ac-d851-467e-8f34-3551d0cfd929_12248x13821.png 1272w, https://substackcdn.com/image/fetch/$s_!wuiW!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbb17f2ac-d851-467e-8f34-3551d0cfd929_12248x13821.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">SISA data partitioning and aggregation.</figcaption></figure></div><p>The diagram shows a layered machine learning system. Training data forms the base, split into <em><strong>'n'</strong></em> shards (Shard <em><strong>1</strong></em> to Shard <em><strong>n</strong></em>). Each shard is an isolated subset of the dataset. Shards divide into <em><strong>'R'</strong></em> slices (slice <em><strong>1</strong></em> to slice <em><strong>R</strong></em>), enabling incremental training and unlearning.</p><p>Models <em><strong>1</strong></em> through <em><strong>n</strong></em>, shown as connected nodes, train on corresponding shards. These models constitute the <em>SISA (Sharded, Isolated, Sliced, Aggregated)</em> system core. This structure allows parallel processing and targeted updates.</p><p>An aggregation layer tops the system. It combines outputs from all models to generate the final prediction. This method leverages insights from different data subsets.</p><p>The design enhances data management and system flexibility. It processes large datasets efficiently while allowing precise adjustments. The architecture balances scalability with fine-tuned control.</p><h4><strong>SISA Training</strong></h4><p>The SISA training process operates systematically. The main dataset splits into <em><strong>'n'</strong></em> shards, each training a separate model. For each shard:</p><ol><li><p>The first slice initializes a new model, training for a preset number of epochs.</p></li><li><p>Subsequent slices continue training from the model's previous state.</p></li><li><p>The last slice finalizes training, producing the shard's constituent model.</p></li></ol><p>This slice-wise approach enables incremental learning. Models update efficiently as new data arrives. It also facilitates data removal without full retraining. The process repeats for all shards.</p><p>The final step assembles the complete SISA model by combining all trained constituent models. This structure allows for parallel processing and targeted updates.</p><h4><strong>SISA Inference</strong></h4><p>SISA inference runs input through all models (Model <em><strong>1</strong></em> to <em><strong>n</strong></em>). Each model produces a prediction. The system aggregates these predictions via majority voting. It selects the most common label as output.</p><p>This method reduces individual model biases. The aggregation layer functions as an ensemble. It combines insights from all data shards.</p><p>Voting can be weighted for more reliable models. This allows inference fine-tuning. The system can track confidence by measuring model agreement.</p><p>This enables parallel processing during inference and can reduce latency in large-scale applications.</p><h4><strong>SISA Unlearning Procedure</strong></h4><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!RcVE!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc4bc5720-e151-401d-96ff-dd1cc5c4f2b8_12222x14590.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!RcVE!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc4bc5720-e151-401d-96ff-dd1cc5c4f2b8_12222x14590.png 424w, https://substackcdn.com/image/fetch/$s_!RcVE!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc4bc5720-e151-401d-96ff-dd1cc5c4f2b8_12222x14590.png 848w, https://substackcdn.com/image/fetch/$s_!RcVE!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc4bc5720-e151-401d-96ff-dd1cc5c4f2b8_12222x14590.png 1272w, https://substackcdn.com/image/fetch/$s_!RcVE!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc4bc5720-e151-401d-96ff-dd1cc5c4f2b8_12222x14590.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!RcVE!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc4bc5720-e151-401d-96ff-dd1cc5c4f2b8_12222x14590.png" width="1456" height="1738" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c4bc5720-e151-401d-96ff-dd1cc5c4f2b8_12222x14590.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1738,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:7695440,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!RcVE!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc4bc5720-e151-401d-96ff-dd1cc5c4f2b8_12222x14590.png 424w, https://substackcdn.com/image/fetch/$s_!RcVE!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc4bc5720-e151-401d-96ff-dd1cc5c4f2b8_12222x14590.png 848w, https://substackcdn.com/image/fetch/$s_!RcVE!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc4bc5720-e151-401d-96ff-dd1cc5c4f2b8_12222x14590.png 1272w, https://substackcdn.com/image/fetch/$s_!RcVE!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc4bc5720-e151-401d-96ff-dd1cc5c4f2b8_12222x14590.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Deletion of data in SISA.</figcaption></figure></div><p>SISA unlearning locates the target data to be deleted, in a specific shard and slice, shown as the red box in <em><strong>Shard 2</strong>,<strong> slice 2</strong>. </em>The system then jumps back to a point just before this data was added. This is shown in the green outline around <em><strong>slice 2</strong></em> in <em><strong>Shard 2</strong></em>.</p><p>From this past state, SISA begins selective retraining. <strong>Model</strong> <strong>2</strong> Retrain, in green, shows this process. It retrains using all data except the unlearn target.</p><p>Once complete, the retrained <strong>Model</strong> <strong>2</strong> replaces its original version. The Aggregation layer at the top then updates to use this new model's input.</p><p>This selective retraining approach allows SISA to precisely remove specific data. It does this without disturbing other parts of the system, as <strong>Model 1</strong> and <strong>Model n</strong> remain unchanged.</p><h4><strong>SISA Advantages</strong></h4><ol><li><p><strong>Computational Efficiency:</strong> SISA significantly reduces the computational overhead of unlearning by limiting retraining to a single shard and subset of slices.</p></li><li><p><strong>Scalability:</strong> The sharding approach allows for parallel processing and efficient handling of large datasets.</p></li><li><p><strong>Isolation:</strong> By confining data points to specific shards and slices, SISA minimizes the impact of individual data removals on the overall model.</p></li><li><p><strong>Adaptability:</strong> The framework can be extended to various machine learning algorithms and data types.</p></li></ol><h4><strong>SISA-based Methods</strong></h4><p>Several adaptations of SISA have been developed for specific use cases.</p><ul><li><p><strong>DaRE Forest:</strong> A random forest variant using a two-level approach with random and greedy nodes, enabling efficient updates of subtrees upon data removal <a href="https://proceedings.mlr.press/v139/brophy21a.html">(Brophy &amp; Lowd 2021)</a>.</p></li><li><p><strong>HedgeCut:</strong> Focuses on low-latency unlearning in extremely randomized trees, introducing split robustness concepts <a href="https://dl.acm.org/doi/abs/10.1145/3448016.3457239">(Schelter et al. 2021)</a>.</p></li><li><p><strong>GraphEraser:</strong> Tailored for Graph Neural Networks (GNNs), incorporating balanced graph partition and optimized shard model importance scoring <a href="https://arxiv.org/abs/2103.14991">(Chen et al. 2021)</a>.</p></li><li><p><strong>RecEraser:</strong> Specialized for recommendation tasks, employing adaptive aggregation methods to combine sub-model predictions <a href="https://arxiv.org/abs/2201.06820">(Chen et al. 2022)</a>.</p></li></ul><p>In conclusion, SISA represents a flexible and efficient framework for managing data removal requests in machine learning models. Its architecture and process flow, as illustrated in the provided images, demonstrate a sophisticated approach to balancing model performance with data privacy and right-to-be-forgotten requirements.</p><h2>1.2 Statistical Query (SQ) Approach</h2><p><a href="https://ieeexplore.ieee.org/abstract/document/7163042">Cao &amp; Yang (2015)</a> introduced an innovative approach to machine unlearning, distinct from the SISA framework. This method uses statistical queries and summation forms to enable efficient unlearning without complete model retraining.</p><h4>SQ Core Concepts</h4><ol><li><p><strong>Summation Form Transformation</strong>: The key idea is transforming learning algorithms into a summation form. In this form, the algorithm relies on a small number of summations, each representing an efficiently computable transformation of the training data samples. These summations are saved alongside the trained model.</p></li><li><p><strong>Statistical Query (SQ) Learning</strong>: To achieve the summation form, <a href="https://ieeexplore.ieee.org/abstract/document/7163042">Cao &amp; Yang</a> employ Statistical Query (SQ) learning, introduced by <a href="https://vtaly.net/papers/Kearns93-2017.pdf">Kearns (1993)</a> as a refinement of the <a href="https://en.wikipedia.org/wiki/Probably_approximately_correct_learning">PAC </a>learning model. In SQ learning, the algorithm interacts with a statistical query oracle instead of directly examining individual examples. The learning algorithm queries the oracle with a function and a tolerance parameter, receiving an estimate of the expected value over a distribution of labeled examples.</p></li><li><p><strong>Formal Definition:</strong> More formally, the learning algorithm queries an oracle with a function <em><strong>&#958;:X x {0,1} &#8594; {0,1}</strong></em> and a tolerance parameter <em><strong>&#964;</strong></em>. The oracle responds with an estimate of the expected value of <em><strong>&#958;</strong></em> over a distribution <em><strong>D</strong></em> of labeled examples such that the estimate of <em><strong>E[&#958;(x,y)]</strong></em> is within an additive <em><strong>&#964;</strong></em>. The training algorithm only has access to these statistics; it does not directly access the input data.</p></li></ol><h4>How SQ Works</h4><p>The image below illustrates the process flow.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!rSo7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff6b2433f-6763-40b7-83b4-20b08c762306_12371x5412.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!rSo7!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff6b2433f-6763-40b7-83b4-20b08c762306_12371x5412.png 424w, https://substackcdn.com/image/fetch/$s_!rSo7!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff6b2433f-6763-40b7-83b4-20b08c762306_12371x5412.png 848w, https://substackcdn.com/image/fetch/$s_!rSo7!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff6b2433f-6763-40b7-83b4-20b08c762306_12371x5412.png 1272w, https://substackcdn.com/image/fetch/$s_!rSo7!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff6b2433f-6763-40b7-83b4-20b08c762306_12371x5412.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!rSo7!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff6b2433f-6763-40b7-83b4-20b08c762306_12371x5412.png" width="1456" height="637" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f6b2433f-6763-40b7-83b4-20b08c762306_12371x5412.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:637,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:4011671,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!rSo7!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff6b2433f-6763-40b7-83b4-20b08c762306_12371x5412.png 424w, https://substackcdn.com/image/fetch/$s_!rSo7!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff6b2433f-6763-40b7-83b4-20b08c762306_12371x5412.png 848w, https://substackcdn.com/image/fetch/$s_!rSo7!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff6b2433f-6763-40b7-83b4-20b08c762306_12371x5412.png 1272w, https://substackcdn.com/image/fetch/$s_!rSo7!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff6b2433f-6763-40b7-83b4-20b08c762306_12371x5412.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Machine unlearning with statistical queries.</figcaption></figure></div><p>The image shows a sophisticated approach to machine learning and unlearning using Statistical Query (SQ) learning. Here's a technical breakdown:</p><ul><li><p><strong>Data Points</strong>: The left side shows input data points <em><strong>x&#8321;, x&#8322;, x&#8323;</strong></em>. These represent individual training examples from the dataset <em><strong>D</strong></em>.</p></li><li><p><strong>Statistical Queries</strong>: Dashed lines connect data points to <em><strong>q&#8321;</strong></em> and <em><strong>q&#8322;</strong></em>, representing statistical query functions <em><strong>&#958;:X x {0,1} &#8594; {0,1}</strong></em>. These queries transform raw data into relevant statistics.</p></li><li><p><strong>Summations:</strong> The <em><strong>&#931;</strong></em> symbols denote summation operators. They aggregate query results across the dataset, computing <em><strong>E[&#958;(x,y)]</strong></em> for each query function.</p></li><li><p><strong>Tolerance:</strong> While not explicitly shown, each summation has an associated tolerance parameter <em><strong>&#964;</strong></em>, defining the acceptable error margin for the computed statistic.</p></li><li><p><strong>Neural Network:</strong> The orange box on the right depicts a neural network architecture. It contains multiple layers (Input, Hidden, Output) interconnected by edges.</p></li><li><p><strong>SQ to Model Pipeline:</strong> Solid arrows from summations to the neural network illustrate how aggregated statistics directly inform model parameters or training.</p></li><li><p><strong>Unlearning Process:</strong> </p><ul><li><p>To forget a data point, the system updates stored summations by subtracting that point's contributions.</p></li><li><p>This involves simple arithmetic operations on the summations.</p></li><li><p>The model is then efficiently recomputed using these updated summations.</p></li></ul></li><li><p><strong>Unlearning Efficiency:</strong> This structure allows for efficient unlearning. Removing a data point only requires updating the summations, not retraining the entire model.</p></li><li><p><strong>Scalability:</strong> The approach scales well with large datasets, as the model depends on a fixed number of summations rather than individual data points.</p></li></ul><p>This SQ learning framework transforms complex learning algorithms into a summation form, enabling rapid updates and unlearning while maintaining model accuracy.</p><h4>SQ Technical Considerations</h4><p>The effectiveness of this method depends on expressing the learning algorithm in terms of statistical queries. The choice of queries and summations can significantly impact unlearning efficiency and model performance. There's a balance to strike between the granularity of summations and the efficiency of unlearning.</p><p>This non-SISA approach provides a novel perspective on machine unlearning, focusing on data representation and model dependency rather than architectural modifications. Its application in various machine learning contexts remains an active area of research and development.</p><div><hr></div><h1>2. Approximate Methods for Machine Unlearning</h1><p>While exact methods for machine unlearning are effective, they often require significant computational resources and storage overhead. <a href="https://ieeexplore.ieee.org/abstract/document/10488864">Xu et al. (2024)</a> present approximate methods as a more efficient alternative, especially for large datasets and complex models.</p><h3>General Approach <a href="https://ieeexplore.ieee.org/abstract/document/10488864">(Xu et al. 2024)</a></h3><p>The process, as illustrated in the image, consists of four key steps:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!a5xJ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01ae0c04-3c0b-4a97-9c5e-4e3c845dd0b5_12222x4693.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!a5xJ!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01ae0c04-3c0b-4a97-9c5e-4e3c845dd0b5_12222x4693.png 424w, https://substackcdn.com/image/fetch/$s_!a5xJ!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01ae0c04-3c0b-4a97-9c5e-4e3c845dd0b5_12222x4693.png 848w, https://substackcdn.com/image/fetch/$s_!a5xJ!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01ae0c04-3c0b-4a97-9c5e-4e3c845dd0b5_12222x4693.png 1272w, https://substackcdn.com/image/fetch/$s_!a5xJ!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01ae0c04-3c0b-4a97-9c5e-4e3c845dd0b5_12222x4693.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!a5xJ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01ae0c04-3c0b-4a97-9c5e-4e3c845dd0b5_12222x4693.png" width="1456" height="559" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/01ae0c04-3c0b-4a97-9c5e-4e3c845dd0b5_12222x4693.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:559,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:2461331,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!a5xJ!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01ae0c04-3c0b-4a97-9c5e-4e3c845dd0b5_12222x4693.png 424w, https://substackcdn.com/image/fetch/$s_!a5xJ!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01ae0c04-3c0b-4a97-9c5e-4e3c845dd0b5_12222x4693.png 848w, https://substackcdn.com/image/fetch/$s_!a5xJ!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01ae0c04-3c0b-4a97-9c5e-4e3c845dd0b5_12222x4693.png 1272w, https://substackcdn.com/image/fetch/$s_!a5xJ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01ae0c04-3c0b-4a97-9c5e-4e3c845dd0b5_12222x4693.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Approximate machine unlearning.</figcaption></figure></div><p><strong>1. Influence Computation: </strong>The process begins with the <em>"Dataset"</em> shown on the left side of the image. From this dataset, we identify <em>"data to forget"</em> - specific training examples we want the model to unlearn. The <em>"Influence"</em> step analyzes how these data points impact the model's predictions. This analysis employs influence functions, a technique from robust statistics. For a given data point <em><strong>z</strong></em>, its influence <em><strong>I(z)</strong></em> is calculated as:</p><div class="latex-rendered" data-attrs="{&quot;persistentExpression&quot;:&quot;\\mathcal I(z)=-H_{\\hat\\theta}^{-1}\\nabla_{\\theta}L(z,\\hat\\theta),&quot;,&quot;id&quot;:&quot;BBEOSKNTGN&quot;}" data-component-name="LatexBlockToDOM"></div><p>Where:</p><ul><li><p><em><strong>H<sub>&#952;</sub></strong></em> is the Hessian matrix of the loss function <em><strong>L</strong></em>,</p></li><li><p><em><strong>&#952;</strong></em> represents the model parameters,</p></li><li><p><em><strong>&#8711;<sub>&#952;</sub></strong></em> <em><strong>L(z, &#952;)</strong></em> is the gradient of the loss with respect to <em><strong>&#952;</strong></em>.</p></li></ul><p>Computing this for large models can be challenging due to the Hessian inverse. Practical implementations often use approximation techniques like the conjugate gradient method to estimate <em><strong>H<sub>&#952;</sub><sup>-1</sup>&#8711;<sub>&#952;</sub>L</strong></em> efficiently.</p><p><strong>2. Model Parameter Adjustment: </strong>The image shows a transition from an initial neural network to an adjusted one. This step modifies the model parameters to counteract the influence of the data to be forgotten. The adjustment typically follows the direction opposite to the computed influence:</p><div class="latex-rendered" data-attrs="{&quot;persistentExpression&quot;:&quot;\\mathcal \\theta_{new}=\\theta-\\epsilon \\times I(z)&quot;,&quot;id&quot;:&quot;YVXBJRFTHO&quot;}" data-component-name="LatexBlockToDOM"></div><p>where <em><strong>&#949;</strong></em> is a small step size. In practice, this update may be applied iteratively or combined with optimization techniques like Adam or RMSprop for stability.</p><p><strong>3. Noise Addition: </strong>The <em>"noise"</em> element above the neural network in the image represents the introduction of controlled noise to the model. This step implements differential privacy techniques to prevent inference of recently removed data. The noise is calibrated based on the sensitivity of the unlearning operation. For Gaussian noise, we might add noise drawn from <em><strong>N(0, &#963;<sup>2</sup>)</strong></em> to each parameter, where <em><strong>&#963;</strong></em> is calculated as:</p><div class="latex-rendered" data-attrs="{&quot;persistentExpression&quot;:&quot;\\displaystyle \\sigma = c \\cdot \\frac{\\Delta f}{\\varepsilon}.&quot;,&quot;id&quot;:&quot;UTYYGXLLLA&quot;}" data-component-name="LatexBlockToDOM"></div><p>Here, <em><strong>c</strong></em> is a constant, <em><strong>&#916;f</strong></em> is the sensitivity of the unlearning operation, and <em><strong>&#949;</strong></em> is the desired privacy parameter. The choice between Gaussian and Laplacian noise depends on the specific privacy guarantees required.</p><p><strong>4. Updated Model Validation: </strong>The final neural network in the image, labeled <em>"New model with influence minimized"</em> represents the outcome of the unlearning process. This step assesses the performance of the new model to ensure effective unlearning while maintaining overall functionality.<br><br>Validation employs metrics such as:</p><ol><li><p><em><strong>Accuracy:</strong> (TP + TN) / (TP + TN + FP + FN)</em></p></li><li><p><em><strong>F1 Score:</strong> 2 * (Precision * Recall) / (Precision + Recall)</em></p></li><li><p><em><strong>AUC-ROC:</strong> Area under the Receiver Operating Characteristic curve</em></p></li></ol><p><br>These metrics are compared against both the original model and an unlearned model.<br><br>Additionally, specific tests may be conducted to ensure the removed data cannot be inferred, such as membership inference attacks or analyzing the model's uncertainty on the forgotten examples.</p><p>This process provides a balance between unlearning efficiency and effectiveness, making it applicable to a wide range of machine learning models and datasets.<br><br>Below, we will go in-depth into two approximate methods of machine unlearning: <strong>Certified Removal</strong> and <strong>Unlearning in Spiking Neural Networks</strong>.&#65279;</p><h2>2.1 Certified Removal</h2><p><a href="https://arxiv.org/abs/1911.03030">Guo et al. (2019)</a> unveiled certified removal, a big step in approximate machine unlearning. This method offers theoretical safeguards against adversarial extraction of removed training data. The image provided illustrates the key components and process flow of certified removal.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!MjlC!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1c9cd28e-2447-4cd6-b3e3-e7c7f97f3795_12147x5925.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!MjlC!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1c9cd28e-2447-4cd6-b3e3-e7c7f97f3795_12147x5925.png 424w, https://substackcdn.com/image/fetch/$s_!MjlC!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1c9cd28e-2447-4cd6-b3e3-e7c7f97f3795_12147x5925.png 848w, https://substackcdn.com/image/fetch/$s_!MjlC!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1c9cd28e-2447-4cd6-b3e3-e7c7f97f3795_12147x5925.png 1272w, https://substackcdn.com/image/fetch/$s_!MjlC!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1c9cd28e-2447-4cd6-b3e3-e7c7f97f3795_12147x5925.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!MjlC!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1c9cd28e-2447-4cd6-b3e3-e7c7f97f3795_12147x5925.png" width="1456" height="710" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/1c9cd28e-2447-4cd6-b3e3-e7c7f97f3795_12147x5925.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:710,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:3382763,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!MjlC!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1c9cd28e-2447-4cd6-b3e3-e7c7f97f3795_12147x5925.png 424w, https://substackcdn.com/image/fetch/$s_!MjlC!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1c9cd28e-2447-4cd6-b3e3-e7c7f97f3795_12147x5925.png 848w, https://substackcdn.com/image/fetch/$s_!MjlC!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1c9cd28e-2447-4cd6-b3e3-e7c7f97f3795_12147x5925.png 1272w, https://substackcdn.com/image/fetch/$s_!MjlC!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1c9cd28e-2447-4cd6-b3e3-e7c7f97f3795_12147x5925.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Certified removal algorithm for approximate machine unlearning.</figcaption></figure></div><h3>How Certified Removal Works</h3><p>Certified removal operates by strategically adjusting the model to "forget" specific data points. Here's a simplified breakdown of the process.</p><ol><li><p><strong>Noise Injection:</strong> During the initial training, a controlled amount of noise is added to the model. This noise acts as a form of protection, making it harder for an adversary to extract information about individual data points.</p></li><li><p><strong>Influence Calculation:</strong> For each piece of data in the forget set, the method calculates its "influence" on the model. This influence represents how much that specific data point contributed to the model's current state.</p></li><li><p><strong>Parameter Adjustment:</strong> Using the calculated influence, the model's internal parameters are carefully adjusted. This adjustment effectively cancels out the impact of the forgotten data points.</p></li></ol><p>The result is a new model that behaves as if it had never seen the forgotten data in the first place.</p><h3>Why Certified Removal Matters</h3><p><a href="https://arxiv.org/abs/1911.03030">Guo et al. (2019)</a> demonstrated the efficiency of certified removal compared to full retraining. In their experiments with a linear model trained on the MNIST dataset:</p><ul><li><p>Certified removal took 0.04 seconds to remove a data point.</p></li><li><p>Full retraining of the model took 15.6 seconds.</p></li></ul><p>These results highlight the significant time savings offered by certified removal, especially when dealing with large datasets or frequent removal requests.</p><h2>2.2 Unlearning in Spiking Neural Networks</h2><p>Spiking Neural Networks (SNNs) mimic the behavior of biological neurons in the human brain (<a href="https://www.sciencedirect.com/science/article/abs/pii/S0893608097000117">Maass 1997</a>). As shown in the figure below, SNNs consist of interconnected neuron-like units that communicate through a processing network.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!ZzYw!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F46f36da9-e933-43e0-895d-8dee9cca1349_12159x5980.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!ZzYw!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F46f36da9-e933-43e0-895d-8dee9cca1349_12159x5980.png 424w, https://substackcdn.com/image/fetch/$s_!ZzYw!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F46f36da9-e933-43e0-895d-8dee9cca1349_12159x5980.png 848w, https://substackcdn.com/image/fetch/$s_!ZzYw!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F46f36da9-e933-43e0-895d-8dee9cca1349_12159x5980.png 1272w, https://substackcdn.com/image/fetch/$s_!ZzYw!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F46f36da9-e933-43e0-895d-8dee9cca1349_12159x5980.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!ZzYw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F46f36da9-e933-43e0-895d-8dee9cca1349_12159x5980.png" width="1456" height="716" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/46f36da9-e933-43e0-895d-8dee9cca1349_12159x5980.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:716,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:4889228,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!ZzYw!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F46f36da9-e933-43e0-895d-8dee9cca1349_12159x5980.png 424w, https://substackcdn.com/image/fetch/$s_!ZzYw!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F46f36da9-e933-43e0-895d-8dee9cca1349_12159x5980.png 848w, https://substackcdn.com/image/fetch/$s_!ZzYw!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F46f36da9-e933-43e0-895d-8dee9cca1349_12159x5980.png 1272w, https://substackcdn.com/image/fetch/$s_!ZzYw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F46f36da9-e933-43e0-895d-8dee9cca1349_12159x5980.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Spiking neural network structure.</figcaption></figure></div><p>In SNNs, each neuron has a membrane potential that changes over time. When this potential reaches a specific threshold, the neuron "fires" or "spikes," sending a signal along its axon (represented by orange lines in the image). This spiking behavior is a key characteristic that distinguishes SNNs from traditional artificial neural networks.</p><p><a href="https://www.frontiersin.org/journals/neurorobotics/articles/10.3389/fnbot.2024.1361577/full">Wang et al. (2023)</a> introduced a technique for unlearning in SNNs, addressing the need for privacy mechanisms and unlearning techniques in these biology inspired models. SNNs have shown success in pattern recognition, particularly in speech (<a href="https://www.frontiersin.org/journals/neurorobotics/articles/10.3389/fnbot.2024.1361577/full">Wang et al. 2023</a>) and image recognition (<a href="https://arxiv.org/abs/2307.11411">Su et al. 2023</a>). They have also demonstrated potential in medical applications, such as constructing stimulation systems for Parkinson's patients (<a href="https://www.mtu.edu/news/2023/03/michigan-tech-researchers-develop-smart-deep-brain-stimulation-systems-for-parkinsons-patients.html">Geiger 2023</a>).</p><h3>How SNN Unlearning works</h3><p>It happens in three phases, which can be understood in the context of the image above.</p><ol><li><p><strong>Selective Retraining</strong>: This phase identifies neurons (the branching structures) and synapses (the connections between neurons, represented by arrows in the central network) responsible for the information to be forgotten. It estimates the correlation between a neuron's spike train (the pattern of signals sent along the orange lines) and the targeted data. Synapses are selected based on their weight change due to learning the targeted data. The weights of these synapses are then adjusted using a modified learning rule.</p></li><li><p><strong>Synaptic Pruning</strong>: This step aims to remove synapses whose weight change surpasses a given threshold. In the context of the image, this would involve selectively removing some of the neuron connections in the interconnected network. All synapses in the network are verified and removed if necessary, effectively eliminating traces of the targeted data.</p></li><li><p><strong>Adaptive Thresholding</strong>: In this phase, neuron firing thresholds are dynamically modified based on their activity in relation to the targeted data. This would affect how easily the neurons "fire" or send signals along the orange lines. This reduces the neuron's response to stimuli linked with the data to be unlearned.</p></li></ol><p>The image shows input neurons on the left (Input 1, Input 2), hidden layers in the center (H1<sub>1</sub>, H1<sub>2</sub>, H1<sub>3</sub>, H2<sub>1</sub>, H2<sub>2</sub>, H2<sub>3</sub>), and output neurons on the right (Output 1, Output 2). The unlearning process would involve modifying the connections and behaviors of these components to "forget" specific information.</p><h3>Why SNN Unlearning matters</h3><p><a href="https://www.frontiersin.org/journals/neurorobotics/articles/10.3389/fnbot.2024.1361577/full">Wang et al. (2023)</a> tested the unlearning effectiveness on two datasets: <a href="https://archive.ics.uci.edu/dataset/240/human+activity+recognition+using+smartphones">UCI HAR</a> and MNIST. The results showed:</p><ul><li><p>A decrease in performance metrics (accuracy, precision, recall, etc.) for both datasets after unlearning. This indicates that the network successfully "forgot" the targeted information.</p></li><li><p>The <a href="https://archive.ics.uci.edu/dataset/240/human+activity+recognition+using+smartphones">UCI HAR</a> dataset exhibited a bigger drop in performance compared to <a href="https://yann.lecun.com/exdb/mnist/">MNIST</a>, indicating dataset-specific resilience to the unlearning process. This suggests that the complexity of the data and the way it's encoded in the network can affect the unlearning process.</p></li><li><p>Retraining after unlearning recovered a considerable portion of the lost performance, especially for the <a href="https://yann.lecun.com/exdb/mnist/">MNIST</a> dataset. This demonstrates the network's ability to relearn and adapt after the unlearning process.</p></li><li><p>A clear trade-off emerged between the percentage of samples unlearned and accuracy loss. As more samples were removed, accuracy decreased. This highlights the balance between preserving overall performance and effectively removing specific information.</p></li></ul><p>This method focuses on minimizing the impact of neurons and synapses highly correlated with the forget set. By targeting only these elements, as represented by the specific connections and nodes in the central network of the image, the approach avoids full retraining. This makes it an approximate method for unlearning in SNNs, offering a balance between effective information removal and computational efficiency.</p><h2>2.3 Other Approximate Methods</h2><p>The field of approximate machine unlearning continues to evolve beyond the work of <a href="https://arxiv.org/abs/1911.03030">Guo et al. (2019)</a>. Several researchers have proposed innovative approaches.</p><p><a href="https://arxiv.org/abs/2103.03279">Sekhari et al. (2021)</a> introduced an algorithm utilizing cheap-to-store data statistics. This method is great for convex loss functions and can unlearn a significant number of samples without full dataset access.</p><p><a href="https://arxiv.org/abs/2209.12269">Suriyakumar &amp; Wilson (2022)</a> developed an online unlearning algorithm based on the infinitesimal jackknife method. Their approach reduces computational overhead by inverting the Hessian matrix only once.</p><p><a href="https://arxiv.org/abs/2204.07655">Mehta et al. (2022)</a> proposed a parameter selection technique using conditional independence tests. Their L-CODEC and L-FOCI algorithms identify relevant model parameters for unlearning, avoiding full Hessian matrix inversion.</p><p><a href="https://arxiv.org/abs/2203.00846">Wu et al. (2022)</a> presented the Performance Unchanged Model Augmentation (PUMA) method. PUMA removes unique characteristics of marked data points while preserving model performance through influence function calculations.</p><p><a href="https://arxiv.org/abs/2207.04806">Tanno et al. (2022)</a> introduced a "predictive approach" for identifying causes of model failures in medical imaging. They adapted the Elastic Weight Consolidation method to compute training example influence on failure sets.</p><p><a href="https://arxiv.org/abs/2108.11577">Warnecke et al. (2021)</a> proposed a framework for unlearning features and labels instead of entire data points. Their method leverages influence functions to perform closed-form updates on model parameters.</p><p>These advancements demonstrate the ongoing efforts to improve machine unlearning efficiency and applicability across various domains and model types.</p><div><hr></div><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://blog.bagel.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://blog.bagel.com/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h1>3. Prompt-Based Unlearning</h1><p>Prompt-based unlearning techniques aim to make language models "forget" specific information without directly modifying their parameters. These methods are particularly relevant for SOTA large language models (LLMs) where direct access to model parameters is often restricted.</p><p>It's important to stress that prompt-based methods do not facilitate machine unlearning as per our earlier definition, they provide a way to "pretend" to forget information.</p><h3>How Prompt-Based methods work</h3><h4>Guardrails</h4><p><a href="https://arxiv.org/abs/2403.03329">Thaker et al. (2024)</a> explored the effectiveness of simple guardrail approaches for unlearning in LLMs.</p><ol><li><p><strong>Prompting:</strong> Crafting specific prompts to guide the model's behavior.</p></li><li><p><strong>Input/Output Filtering:</strong> Screening inputs and outputs to prevent unwanted information.</p></li></ol><p>These techniques were tested on three different benchmarks:</p><ul><li><p><a href="https://arxiv.org/abs/2310.02238">Who's Harry Potter?</a></p></li><li><p><a href="https://arxiv.org/abs/2401.06121">TOFU</a> (TOpic-based Forgetting Using model Update)</p></li><li><p><a href="https://arxiv.org/abs/2403.03218">WMDP</a> (Wikipedia Movie Distance Plot)</p></li></ul><h4>In-Context Unlearning (ICUL) </h4><p>Inspired by in-context learning (<a href="https://arxiv.org/abs/2005.14165">Brown et al. 2020</a>), ICUL (<a href="https://arxiv.org/abs/2310.07579">Pawelczyk et al., 2024</a>) involves constructing a specific context in a prompt that includes both correctly labeled and mislabeled examples. The process involves three steps.</p><ol><li><p><strong>Relabeling forget points</strong>: Choose a number of data points you want the model to forget. For each of these points, change its original label to a different, incorrect label. This creates a list of items, each with its content and a new, incorrect label.</p></li><li><p><strong>Adding correct examples</strong>: Select a number of correctly labeled examples from your dataset. Add these examples to the list created in step 1. Now you have a longer list that includes both the relabeled "forget" points and some correctly labeled examples.</p></li><li><p><strong>Creating the final prompt</strong>: Take the list from step 2 and add your actual query or question at the end. When you submit this to the language model, set the temperature to 0, which makes the model's responses more deterministic (less random).</p></li></ol><h3>Why Prompt-Based methods matter</h3><p><strong>Guardrails (<a href="https://arxiv.org/abs/2403.03329">Thaker et al., 2024</a>)</strong></p><ul><li><p>Achieved unlearning performance comparable to more complex fine-tuning approaches.</p></li><li><p>Offered a simple, resource-efficient approach to unlearning in LLMs.</p></li></ul><p><strong>In-Context Unlearning (ICUL) (<a href="https://arxiv.org/abs/2310.07579">Pawelczyk et al., 2024</a>)</strong></p><ul><li><p>Demonstrated performance equal to or better than some leading unlearning methods that require access to model parameters.</p></li><li><p>Effectively eliminated the influence of a training point on a model's output in text classification and question-answering tasks.</p></li><li><p>Used significantly less memory compared to traditional unlearning methods such as gradient ascent.</p></li></ul><p>Both methods address the challenge of unlearning in black-box LLMs without requiring direct access to model parameters. This is crucial given the widespread use of LLMs in various professional and personal contexts, where avoiding outputs like hate speech, toxic behavior, or hallucinations due to data poisoning is essential.</p><div><hr></div><h1>3. Decentralized Machine Unlearning</h1><p>Decentralized machine unlearning addresses the challenge of removing specific information from AI systems geographically distributed across interconnected devices.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!QMeg!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F55a5926b-c4cd-41be-829d-6f049975c081_12245x6068.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!QMeg!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F55a5926b-c4cd-41be-829d-6f049975c081_12245x6068.png 424w, https://substackcdn.com/image/fetch/$s_!QMeg!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F55a5926b-c4cd-41be-829d-6f049975c081_12245x6068.png 848w, https://substackcdn.com/image/fetch/$s_!QMeg!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F55a5926b-c4cd-41be-829d-6f049975c081_12245x6068.png 1272w, https://substackcdn.com/image/fetch/$s_!QMeg!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F55a5926b-c4cd-41be-829d-6f049975c081_12245x6068.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!QMeg!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F55a5926b-c4cd-41be-829d-6f049975c081_12245x6068.png" width="1456" height="722" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/55a5926b-c4cd-41be-829d-6f049975c081_12245x6068.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:722,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:3486669,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!QMeg!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F55a5926b-c4cd-41be-829d-6f049975c081_12245x6068.png 424w, https://substackcdn.com/image/fetch/$s_!QMeg!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F55a5926b-c4cd-41be-829d-6f049975c081_12245x6068.png 848w, https://substackcdn.com/image/fetch/$s_!QMeg!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F55a5926b-c4cd-41be-829d-6f049975c081_12245x6068.png 1272w, https://substackcdn.com/image/fetch/$s_!QMeg!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F55a5926b-c4cd-41be-829d-6f049975c081_12245x6068.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Decentralized machine unlearning environment.</figcaption></figure></div><p>The <strong>HDUS (Heterogeneous Decentralized Unlearning framework with Seed model distillation)</strong> method tackles this complex issue when knowledge disseminates through a network of collaborating machines <a href="https://arxiv.org/abs/2308.13269">(Ye et al. 2023)</a>.<br><br>This approach is vital in a connected world where decentralized systems like edge computing expand, and data privacy laws demand deletion rights.</p><h3>How HDUS Works</h3><p>HDUS uses a dual-model approach for greater precision and efficiency.</p><ol><li><p><strong>Main Model</strong>: A high-capacity neural network trained on local data, functioning as the core intelligence.</p></li><li><p><strong>Seed Model</strong>: A lightweight version that distills insights from the main model, enabling peer collaboration.</p></li></ol><p>The seed model, trained on a reference (or synthetic) data sample, mimics the main model&#8217;s behavior without revealing sensitive data. In collaborative training, peers share seed model parameters. For inference, each peer generates output using its main model and an ensemble of neighbors' seed models.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!o0wF!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02ce7ddd-fb75-4653-b98f-fa07f6938bc2_1812x1024.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!o0wF!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02ce7ddd-fb75-4653-b98f-fa07f6938bc2_1812x1024.png 424w, https://substackcdn.com/image/fetch/$s_!o0wF!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02ce7ddd-fb75-4653-b98f-fa07f6938bc2_1812x1024.png 848w, https://substackcdn.com/image/fetch/$s_!o0wF!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02ce7ddd-fb75-4653-b98f-fa07f6938bc2_1812x1024.png 1272w, https://substackcdn.com/image/fetch/$s_!o0wF!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02ce7ddd-fb75-4653-b98f-fa07f6938bc2_1812x1024.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!o0wF!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02ce7ddd-fb75-4653-b98f-fa07f6938bc2_1812x1024.png" width="1456" height="823" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/02ce7ddd-fb75-4653-b98f-fa07f6938bc2_1812x1024.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:823,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!o0wF!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02ce7ddd-fb75-4653-b98f-fa07f6938bc2_1812x1024.png 424w, https://substackcdn.com/image/fetch/$s_!o0wF!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02ce7ddd-fb75-4653-b98f-fa07f6938bc2_1812x1024.png 848w, https://substackcdn.com/image/fetch/$s_!o0wF!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02ce7ddd-fb75-4653-b98f-fa07f6938bc2_1812x1024.png 1272w, https://substackcdn.com/image/fetch/$s_!o0wF!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02ce7ddd-fb75-4653-b98f-fa07f6938bc2_1812x1024.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Source: <a href="https://arxiv.org/abs/2308.13269">Ye et al. (2023)</a></figcaption></figure></div><p>The above image shows how client <em><strong>a&#8321;</strong></em> processes data using its main model, extracts key insights through a seed model, and integrates inputs from neighboring clients <strong>(b&#8321;, b&#8322;, &#8230;, b&#8342;)</strong> to generate a final output. The diagram highlights the complex connections between processing units, storage, and data paths, reflecting the intricate nature of the HDUS framework.</p><p>When a device exits, it triggers unlearning by notifying its neighbors. They remove its contribution from their submodels, ensuring its influence is erased without needing full model retraining.</p><h3>Why HDUS Matters</h3><p>HDUS offers critical advantages by leveraging decentralized machine unlearning:</p><p>1. <strong>Efficiency</strong>: Fast unlearning without full model retraining, using lightweight seed models for updates.</p><p>2. <strong>Compatibility</strong>: Works across diverse device networks, supporting varied model architectures.</p><p>3. <strong>Complete removal</strong>: Ensures total erasure of a departing device's contribution, ensuring privacy.</p><p>4. <strong>Exact unlearning</strong>: HDUS achieves precise data removal in a decentralized setting, maintaining unlearning integrity across distributed nodes.</p><p>HDUS overcomes the challenge of propagating unlearning requests in networks where knowledge has already spread across peers. It maintains integrity across dynamic network topologies, vital for edge computing and federated learning environments.</p><p>The HDUS approach removes data associated with specific devices, not individual data points. This shift is crucial for privacy management in distributed AI systems, especially in environments like mobile edge computing and IoT, where devices frequently join or leave.</p><p>Unlike other approaches, HDUS focuses on removing a party's entire contribution, advancing privacy and data rights in decentralized AI.</p><div><hr></div><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://hackathon.bagel.net/&quot;,&quot;text&quot;:&quot;Join Hack-AI-Thon by Bagel&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://hackathon.bagel.net/"><span>Join Hack-AI-Thon by Bagel</span></a></p><div><hr></div><h1>TLDR</h1><p>Machine unlearning transforms AI privacy by erasing data from trained models without full retraining. Companies like META and JPMorgan Chase leverage this innovation.</p><p>Unlearning specific data points is often <em>exponentially</em> faster than full retraining or distillation. Cutting computational power and storage needs. It removes targeted information, preserves model performance, and enables selective forgetting without losing other critical knowledge.</p><p>At <a href="https://twitter.com/bagel_network">Bagel</a>, we fuse breakthrough technology like machine unlearning into an evolving Machine Learning ecosystem, redefining privacy and collaboration in AI.</p><p>Four major methods drive machine unlearning: <strong>Exact (SISA, SQ)</strong>, <strong>Approximate</strong>, <strong>Prompt-Based</strong>, and <strong>Decentralized (HDUS)</strong>. Each offers unique approaches to efficient data removal, with out full model retraining.</p><p>Method comparison:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!LKBE!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F65d7a0f6-b2b6-4c32-ad71-4cad75644bcb_1408x1056.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!LKBE!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F65d7a0f6-b2b6-4c32-ad71-4cad75644bcb_1408x1056.png 424w, https://substackcdn.com/image/fetch/$s_!LKBE!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F65d7a0f6-b2b6-4c32-ad71-4cad75644bcb_1408x1056.png 848w, https://substackcdn.com/image/fetch/$s_!LKBE!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F65d7a0f6-b2b6-4c32-ad71-4cad75644bcb_1408x1056.png 1272w, https://substackcdn.com/image/fetch/$s_!LKBE!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F65d7a0f6-b2b6-4c32-ad71-4cad75644bcb_1408x1056.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!LKBE!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F65d7a0f6-b2b6-4c32-ad71-4cad75644bcb_1408x1056.png" width="1408" height="1056" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/65d7a0f6-b2b6-4c32-ad71-4cad75644bcb_1408x1056.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1056,&quot;width&quot;:1408,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:97944,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!LKBE!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F65d7a0f6-b2b6-4c32-ad71-4cad75644bcb_1408x1056.png 424w, https://substackcdn.com/image/fetch/$s_!LKBE!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F65d7a0f6-b2b6-4c32-ad71-4cad75644bcb_1408x1056.png 848w, https://substackcdn.com/image/fetch/$s_!LKBE!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F65d7a0f6-b2b6-4c32-ad71-4cad75644bcb_1408x1056.png 1272w, https://substackcdn.com/image/fetch/$s_!LKBE!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F65d7a0f6-b2b6-4c32-ad71-4cad75644bcb_1408x1056.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Machine unlearning enables AI to adapt, correct, and protect privacy at scale.</p>]]></content:encoded></item><item><title><![CDATA[With Great Data, Comes Great Responsibility II]]></title><description><![CDATA[Privacy preserving machine learning (PPML) with TEE and MPC.]]></description><link>https://blog.bagel.com/p/with-great-data-comes-great-responsibility-d67</link><guid isPermaLink="false">https://blog.bagel.com/p/with-great-data-comes-great-responsibility-d67</guid><dc:creator><![CDATA[Bidhan Roy]]></dc:creator><pubDate>Mon, 10 Jun 2024 16:13:10 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!kud-!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9f94205-2e62-46ea-8070-9462c5687ce6_2245x1587.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>Recently the Bipartisan Senate AI Working Group called for a <a href="https://www.mayerbrown.com/en/insights/publications/2024/05/senate-ai-working-group-releases-roadmap-for-artificial-intelligence-policy">staggering $32 billion per year</a> funding for AI safety research, emphasizing privacy. A study done by <a href="https://www.cisco.com/c/dam/en_us/about/doing_business/trust-center/docs/cisco-privacy-benchmark-study-2024.pdf">Cisco</a> reveals that 1 in 4 organizations already banned generative AI, and 40% have already experienced <a href="https://edgedelta.com/company/blog/data-security-statistics">AI privacy breaches</a>. These events underscore the need to address data and model security risks in AI.</p><p>As a leader in privacy-preserving machine learning (PPML), Bagel's lab has been <a href="https://blog.bagel.net/p/with-great-data-comes-great-responsibility">advancing the field </a>significantly. Building upon our research of PPML approaches using privacy-enhancing technologies (PETs) like differential privacy, federated learning, ZKML, and FHE, we have deeply investigated two additional technologies gaining adoption: trusted execution environments (TEEs) and secure multiparty computation (MPC).</p><p><em><a href="https://blog.bagel.net/p/with-great-data-comes-great-responsibility">See here for the part 1 of this series.</a></em></p><p>In this article, through a blend of theoretical insights and real-world examples, we will demonstrate how TEEs and MPCs can be harnessed to build robust, secure, and scalable PPML solutions while navigating the complex regulatory landscape of AI safety and privacy. We will discuss how these techniques can be utilized for your use case, compare them to the discussed privacy technologies <a href="https://blog.bagel.net/p/with-great-data-comes-great-responsibility">in our previous research post</a>, and analyze their pros and cons.</p><p><em><strong>And if you're in a rush, we have a TLDR at the end.</strong></em></p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!kud-!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9f94205-2e62-46ea-8070-9462c5687ce6_2245x1587.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!kud-!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9f94205-2e62-46ea-8070-9462c5687ce6_2245x1587.png 424w, https://substackcdn.com/image/fetch/$s_!kud-!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9f94205-2e62-46ea-8070-9462c5687ce6_2245x1587.png 848w, https://substackcdn.com/image/fetch/$s_!kud-!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9f94205-2e62-46ea-8070-9462c5687ce6_2245x1587.png 1272w, https://substackcdn.com/image/fetch/$s_!kud-!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9f94205-2e62-46ea-8070-9462c5687ce6_2245x1587.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!kud-!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9f94205-2e62-46ea-8070-9462c5687ce6_2245x1587.png" width="1456" height="1029" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f9f94205-2e62-46ea-8070-9462c5687ce6_2245x1587.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1029,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:6273883,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!kud-!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9f94205-2e62-46ea-8070-9462c5687ce6_2245x1587.png 424w, https://substackcdn.com/image/fetch/$s_!kud-!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9f94205-2e62-46ea-8070-9462c5687ce6_2245x1587.png 848w, https://substackcdn.com/image/fetch/$s_!kud-!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9f94205-2e62-46ea-8070-9462c5687ce6_2245x1587.png 1272w, https://substackcdn.com/image/fetch/$s_!kud-!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9f94205-2e62-46ea-8070-9462c5687ce6_2245x1587.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><h1>Trusted Execution Environments</h1><p>Trusted Execution Environments (TEEs) are technologies for hardware-assisted confidential computing. TEEs enable the execution of isolated and verifiable code inside protected memory, also known as enclaves or secure worlds. TEEs ensure that code and data within enclaves are protected from external threats, including malicious software and unauthorized access. This isolation is critical for maintaining the integrity of machine learning models, especially when deployed in untrusted environments such as edge devices or cloud platforms. By using TEEs, developers can safeguard the intellectual property embedded in their models and be sure that the models produce reliable and tamper-proof results.</p><h3><strong>General View of TEE Components</strong></h3><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!gH2T!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0f2e3251-93fb-448f-9990-3d6d92d57e07_1504x966.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!gH2T!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0f2e3251-93fb-448f-9990-3d6d92d57e07_1504x966.png 424w, https://substackcdn.com/image/fetch/$s_!gH2T!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0f2e3251-93fb-448f-9990-3d6d92d57e07_1504x966.png 848w, https://substackcdn.com/image/fetch/$s_!gH2T!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0f2e3251-93fb-448f-9990-3d6d92d57e07_1504x966.png 1272w, https://substackcdn.com/image/fetch/$s_!gH2T!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0f2e3251-93fb-448f-9990-3d6d92d57e07_1504x966.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!gH2T!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0f2e3251-93fb-448f-9990-3d6d92d57e07_1504x966.png" width="1456" height="935" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/0f2e3251-93fb-448f-9990-3d6d92d57e07_1504x966.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:935,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:147303,&quot;alt&quot;:&quot;General View of Trusted Execution Environment Components&#65279;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="General View of Trusted Execution Environment Components&#65279;" title="General View of Trusted Execution Environment Components&#65279;" srcset="https://substackcdn.com/image/fetch/$s_!gH2T!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0f2e3251-93fb-448f-9990-3d6d92d57e07_1504x966.png 424w, https://substackcdn.com/image/fetch/$s_!gH2T!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0f2e3251-93fb-448f-9990-3d6d92d57e07_1504x966.png 848w, https://substackcdn.com/image/fetch/$s_!gH2T!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0f2e3251-93fb-448f-9990-3d6d92d57e07_1504x966.png 1272w, https://substackcdn.com/image/fetch/$s_!gH2T!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0f2e3251-93fb-448f-9990-3d6d92d57e07_1504x966.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>In a TEE, the user that wants to use the enclave first establishes a secure connection (attestation) using a key generated by a secure processor. The user can verify the integrity of the enclave through an attestation process that is certified by the manufacturer. If the attestation is successful, the user can trust the enclave to execute code and store data securely.</p><p>The overall security also depends on the underlying hardware security features, such as memory encryption and isolation. Hardware-based isolation mechanisms ensure that the code and data inside the enclave are protected from any unauthorized access that resides outside the enclave. The chip containing the enclave is designed to resist tampering, and any hardware tampering will make the enclave non-functional.</p><h3><strong>Software Attestation Process</strong></h3><ol><li><p><strong>Manufacturer Root Key</strong>: The manufacturer has a unique root key embedded in the hardware.</p></li><li><p><strong>Attestation Keys</strong>: Each device processor has an embedded attestation key (private and public) from the manufacturer.</p></li><li><p><strong>Endorsement Certificate</strong>: The manufacturer certifies the public attestation key, indicating it is bound to a tamper-resistant hardware chip.</p></li><li><p><strong>Verifier Interaction</strong>: The verifier sends a message to the TEE, which includes the required attestation.</p></li><li><p><strong>Attestation Response</strong>: The TEE responds with the attestation key and the signed message.</p></li><li><p><strong>Verification</strong>: The verifier checks the attestation by verifying the hash (key+message) with the attestation key. If accepted, the verifier trusts the attestation.</p></li></ol><h3><strong>Chain of Trust in Software Attestation</strong></h3><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!u994!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0f970a06-652e-45cb-8355-416878c3524b_987x872.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!u994!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0f970a06-652e-45cb-8355-416878c3524b_987x872.png 424w, https://substackcdn.com/image/fetch/$s_!u994!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0f970a06-652e-45cb-8355-416878c3524b_987x872.png 848w, https://substackcdn.com/image/fetch/$s_!u994!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0f970a06-652e-45cb-8355-416878c3524b_987x872.png 1272w, https://substackcdn.com/image/fetch/$s_!u994!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0f970a06-652e-45cb-8355-416878c3524b_987x872.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!u994!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0f970a06-652e-45cb-8355-416878c3524b_987x872.png" width="987" height="872" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/0f970a06-652e-45cb-8355-416878c3524b_987x872.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:872,&quot;width&quot;:987,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:189785,&quot;alt&quot;:&quot;Chain of Trust in Software Attestation in Secure Multiparty Computation.&#65279;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="Chain of Trust in Software Attestation in Secure Multiparty Computation.&#65279;" title="Chain of Trust in Software Attestation in Secure Multiparty Computation.&#65279;" srcset="https://substackcdn.com/image/fetch/$s_!u994!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0f970a06-652e-45cb-8355-416878c3524b_987x872.png 424w, https://substackcdn.com/image/fetch/$s_!u994!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0f970a06-652e-45cb-8355-416878c3524b_987x872.png 848w, https://substackcdn.com/image/fetch/$s_!u994!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0f970a06-652e-45cb-8355-416878c3524b_987x872.png 1272w, https://substackcdn.com/image/fetch/$s_!u994!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0f970a06-652e-45cb-8355-416878c3524b_987x872.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Source <a href="https://eprint.iacr.org/2016/086">(Costan &amp; Devadas 2016)</a>.</figcaption></figure></div><p>The measurement in the context of TEEs is a cryptographic hash produced from the code and data inside the enclave. The measurement is included in the attestation signature to prove that the enclave was not tampered with. The verifier can then check the measurement against a known good value to ensure the enclave's integrity. Finally, the TEE produces the attestation signature containing the measurement plus other attestation data and sends it back to the verifier. The verifier can then use this attestation signature to verify the integrity of the enclave. In some cases, the verifier shares a secret key that can be used to establish a secure communication channel between the TEE and the verifier.</p><h3><strong>Main Hardware-Assisted TEEs</strong></h3><ul><li><p>Intel SGX</p></li><li><p>AMD TrustZone</p></li><li><p>Arm TrustZone</p></li><li><p>Apple Secure Enclave</p></li><li><p>NVIDIA Backed Authentication</p></li></ul><h3><strong>Attack Vectors</strong></h3><p>Despite the robust security guarantees provided by Trusted Execution Environments (TEEs) through isolation and attestation from a trusted manufacturer, research has shown that TEEs are still vulnerable to several types of attacks:</p><ol><li><p><strong>Side-Channel Attacks: </strong>Side-channel attacks exploit indirect information leakage from the TEE, such as timing, power consumption, electromagnetic emissions, or memory access patterns. Examples include:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!tluX!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F42403354-0f6b-4380-90a3-e5034647910c_2831x2297.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!tluX!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F42403354-0f6b-4380-90a3-e5034647910c_2831x2297.png 424w, https://substackcdn.com/image/fetch/$s_!tluX!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F42403354-0f6b-4380-90a3-e5034647910c_2831x2297.png 848w, https://substackcdn.com/image/fetch/$s_!tluX!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F42403354-0f6b-4380-90a3-e5034647910c_2831x2297.png 1272w, https://substackcdn.com/image/fetch/$s_!tluX!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F42403354-0f6b-4380-90a3-e5034647910c_2831x2297.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!tluX!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F42403354-0f6b-4380-90a3-e5034647910c_2831x2297.png" width="1456" height="1181" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/42403354-0f6b-4380-90a3-e5034647910c_2831x2297.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1181,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:448595,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!tluX!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F42403354-0f6b-4380-90a3-e5034647910c_2831x2297.png 424w, https://substackcdn.com/image/fetch/$s_!tluX!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F42403354-0f6b-4380-90a3-e5034647910c_2831x2297.png 848w, https://substackcdn.com/image/fetch/$s_!tluX!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F42403354-0f6b-4380-90a3-e5034647910c_2831x2297.png 1272w, https://substackcdn.com/image/fetch/$s_!tluX!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F42403354-0f6b-4380-90a3-e5034647910c_2831x2297.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><ol><li><p><strong>Timing Side-Channel Attacks</strong>: These attacks measure the time taken to execute certain operations to infer sensitive information. Timing fluctuations caused by operations like multiplication and division can be exploited to obtain encryption keys. Studies have shown that timing attacks are one of the most damaging side-channel attacks, particularly in cryptographic implementations. Research by <a href="https://dl.acm.org/doi/abs/10.1145/3456631">Fei et al. (2021)</a> provides detailed insights into these vulnerabilities.</p></li><li><p><strong>Memory Side-Channel Attacks</strong>: These attacks monitor memory access patterns to deduce the data being processed. Memory-based side-channel attacks observe events on shared resources in the memory hierarchy, such as cache hits and misses, to infer secret-dependent memory access patterns. Research has demonstrated the effectiveness of these attacks on both CPUs and GPUs. Studies by <a href="https://eprint.iacr.org/2017/549">Sasy et al. (2017)</a> highlight the risks associated with these attacks.</p></li><li><p><strong>Network Side-Channel Attacks</strong>: These attacks analyze network traffic patterns to extract confidential information. Network side-channel attacks exploit the correlation between network traffic and the operations being performed within the TEE. Studies have highlighted the risks associated with these attacks in various networked environments. Research by <a href="https://eprint.iacr.org/2016/086">Costan and Devadas (2016)</a> discusses these vulnerabilities in detail.</p></li></ol></li></ol><ol start="2"><li><p><strong>Replay Attacks: </strong>Replay attacks involve intercepting and retransmitting valid data to create unauthorized effects. For instance, an attacker might replace a newer message with an older one from the same sender, effectively resetting the state of a computation. Research by <a href="https://eprint.iacr.org/2000/015">Bellare et al. (2000)</a> and <a href="https://arxiv.org/abs/2208.10134">Mo et al. (2023)</a> provide comprehensive studies on these attacks.</p></li><li><p><strong>Host-Based Attacks: </strong>Host-based attacks exploit vulnerabilities arising from the interactions between the host operating system (OS), user-space processes, and the TEEs running on the same system. Examples include:</p><ol><li><p><strong>Data Poisoning</strong>: Malicious data is injected into the system to corrupt the training process of machine learning models. Research by <a href="https://arxiv.org/abs/2208.10134">Mo et al. (2023)</a> and <a href="https://dl.acm.org/doi/abs/10.1145/3472634.3472652">Liu et al. (2021)</a> provide detailed analyses of these attacks.</p></li><li><p><strong>Adversarial Examples</strong>: Inputs are crafted to deceive machine learning models into making incorrect predictions. Studies by <a href="https://arxiv.org/abs/2208.10134">Mo et al. (2023)</a> and <a href="https://dl.acm.org/doi/abs/10.1145/3472634.3472652">Liu et al. (2021)</a> discuss these vulnerabilities.</p></li></ol></li><li><p><strong>Access Pattern Exploitation: </strong>Previous research has demonstrated methods to exploit access patterns to TEEs to classify encrypted inputs with high accuracy. This type of attack can reveal sensitive information about the data being processed within the TEE. <a href="https://arxiv.org/abs/1810.00602">Grover et al. (2018)</a> and <a href="https://arxiv.org/abs/2105.01281">Zhang et al. (2021)</a> provide detailed insights into these vulnerabilities.</p></li></ol><p>These vulnerabilities highlight the need for continuous research and development to get the security of TEEs ready for untrusted peer-to-peer networks. Below, we discuss how TEEs can be used for privacy-preserving machine learning (PPML), especially for inference or model training.</p><h3>Inference in TEEs</h3><p>In order to perform inference inside TEEs, the data providers and the model provider first need to prepare their corresponding dataset and model. They encrypt this information, to keep it safe during transit.</p><p>Next, a remote attestation ceremony takes place. The data provider, model provider, and the TEE engage in a key exchange process described above. They verify each other's identities, exchange keys and establish a secure communication channel.</p><p>With trust established, the encrypted dataset and model are transferred into the TEE. Inside the TEE they are decrypted.</p><p>Now, the inference begins. The TEE executes the inference process, analyzing the data using the model while ensuring secure isolation. The inference results, computed labels, are encrypted before being returned to the data provider. The model, having served its purpose, can be discarded. The figure below shows the entire inference process.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!2TkS!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0252e4dd-f440-414d-b31a-51374b3ced99_1692x741.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!2TkS!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0252e4dd-f440-414d-b31a-51374b3ced99_1692x741.png 424w, https://substackcdn.com/image/fetch/$s_!2TkS!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0252e4dd-f440-414d-b31a-51374b3ced99_1692x741.png 848w, https://substackcdn.com/image/fetch/$s_!2TkS!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0252e4dd-f440-414d-b31a-51374b3ced99_1692x741.png 1272w, https://substackcdn.com/image/fetch/$s_!2TkS!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0252e4dd-f440-414d-b31a-51374b3ced99_1692x741.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!2TkS!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0252e4dd-f440-414d-b31a-51374b3ced99_1692x741.png" width="1456" height="638" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/0252e4dd-f440-414d-b31a-51374b3ced99_1692x741.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:638,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:184232,&quot;alt&quot;:&quot;Machine learning inference using Trusted Execution Environments.&#65279;&#65279;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="Machine learning inference using Trusted Execution Environments.&#65279;&#65279;" title="Machine learning inference using Trusted Execution Environments.&#65279;&#65279;" srcset="https://substackcdn.com/image/fetch/$s_!2TkS!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0252e4dd-f440-414d-b31a-51374b3ced99_1692x741.png 424w, https://substackcdn.com/image/fetch/$s_!2TkS!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0252e4dd-f440-414d-b31a-51374b3ced99_1692x741.png 848w, https://substackcdn.com/image/fetch/$s_!2TkS!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0252e4dd-f440-414d-b31a-51374b3ced99_1692x741.png 1272w, https://substackcdn.com/image/fetch/$s_!2TkS!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0252e4dd-f440-414d-b31a-51374b3ced99_1692x741.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Some implementations of this ML inference process include:</p><ul><li><p><strong>Occlumency.</strong> A system that leverages Intel SGX to preserve the confidentiality and integrity of user data throughout the entire deep learning inference process <a href="https://saumaypushp.github.io/files/Occlumency__MobiCom19_Camera_ready_.pdf">(Lee et al. 2019)</a>.</p></li><li><p><strong>Branchy-TEE.</strong> A framework that dynamically loads the inference network into the TEE on-demand, based on an early-exit mechanism to break the hardware performance bottleneck of the TEE <a href="https://ksiresearch.org/seke/seke23paper/paper131.pdf">(Wang et al. 2019)</a>.</p></li><li><p><strong>Origami.</strong> A system that provides privacy-preserving inference for large deep neural network models through a combination of enclave cryptographic blinding and accelerator-based computation <a href="https://arxiv.org/abs/1912.03485">(Giri Nara et al. 2019)</a>.</p></li></ul><h3>Training in TEEs</h3><p>In order to train a model, a data provider needs to load its dataset and an initial model describing the architecture of the network that is going to be trained.</p><p>Similarly as for inference, first we encrypt the dataset and the initial model. Then, remote attestation and key exchange takes place between the data provider and the TEE and a secure communications channel is created. Through this channel, the dataset, the initial model and the training algorithm are loaded into the TEE. Then, the TEE executes the training algorithm isolated from any external influence. After a given number of epochs, the weights are returned to the data owner through the secure channel.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!9bFm!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff1bd6d63-ae7c-429f-978f-5f9088e553dd_1652x830.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!9bFm!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff1bd6d63-ae7c-429f-978f-5f9088e553dd_1652x830.png 424w, https://substackcdn.com/image/fetch/$s_!9bFm!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff1bd6d63-ae7c-429f-978f-5f9088e553dd_1652x830.png 848w, https://substackcdn.com/image/fetch/$s_!9bFm!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff1bd6d63-ae7c-429f-978f-5f9088e553dd_1652x830.png 1272w, https://substackcdn.com/image/fetch/$s_!9bFm!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff1bd6d63-ae7c-429f-978f-5f9088e553dd_1652x830.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!9bFm!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff1bd6d63-ae7c-429f-978f-5f9088e553dd_1652x830.png" width="1456" height="732" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f1bd6d63-ae7c-429f-978f-5f9088e553dd_1652x830.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:732,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:196384,&quot;alt&quot;:&quot;Machine learning training using Trusted Execution Environments.&#65279;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="Machine learning training using Trusted Execution Environments.&#65279;" title="Machine learning training using Trusted Execution Environments.&#65279;" srcset="https://substackcdn.com/image/fetch/$s_!9bFm!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff1bd6d63-ae7c-429f-978f-5f9088e553dd_1652x830.png 424w, https://substackcdn.com/image/fetch/$s_!9bFm!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff1bd6d63-ae7c-429f-978f-5f9088e553dd_1652x830.png 848w, https://substackcdn.com/image/fetch/$s_!9bFm!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff1bd6d63-ae7c-429f-978f-5f9088e553dd_1652x830.png 1272w, https://substackcdn.com/image/fetch/$s_!9bFm!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff1bd6d63-ae7c-429f-978f-5f9088e553dd_1652x830.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Some implementations of ML training are:</p><ul><li><p><strong>TensorSCONE.</strong> A secure TensorFlow framework using Intel SGX, which enables the training and usage of TensorFlow models within a TEE <a href="https://arxiv.org/abs/1902.04413">(Kunkel et al. 2019)</a>.</p></li><li><p><strong>Graphcore IPUs.</strong> Graphcore's IPU Trusted Extensions (ITX) provide a TEE for AI accelerators, ensuring strong confidentiality and integrity guarantees with lower performance overheads for training <a href="https://arxiv.org/abs/2205.09005">(Vaswani et al. 2022)</a>.</p></li><li><p><strong>Citadel.</strong> This is an ML system that protects both data and model privacy using Intel SGX. Citadel performs distributed training across multiple <em>training enclaves</em> for the data owner, and an <em>aggregator enclave</em> for the model owner. It uses <em>Zero-sum masking</em> for secure aggregation between training enclaves, which is a technique where data owners collectively generate masks and apply them to their individual updates before sending them to the aggregator <a href="https://eprint.iacr.org/2017/281">(Bonawitz et al. 2017).</a> The authors of Citadel show that increasing the number of training enclaves from 1 to 32 results in increased throughput of 4.7X - 19.6X <a href="https://arxiv.org/abs/2105.01281">(Zhang et al. 2021)</a>.</p></li></ul><h3>Pros of TEEs</h3><p>The advantages of using TEE with neural networks are:</p><ul><li><p><strong>Confidential Execution.</strong> The ML inference and training process is executed within the TEE. The TEE ensures that the data and model are protected from any external access or tampering during the inference process.</p></li><li><p><strong>Isolation.</strong> The TEE isolates the inference and training process from the rest of the system, ensuring that even if the host system is compromised, the data and model remain secure.</p></li></ul><h3>Cons of TEEs</h3><p>The disadvantages of using TEE with neural networks are:</p><ul><li><p><strong>Requires trust on the manufacturer.</strong> In the attestation process, a certificate of the public key from the manufacture is required who works as a certificate authority. In a peer-to-peer network setting, trust is an undesirable property <a href="https://arxiv.org/abs/2208.10134">(Mo et al. 2023)</a>.</p></li><li><p><strong>Restricted resources.</strong> TEEs offer restricted computation resources inside their secure enclave. This forces ML developers to find clever ways to implement ML tasks, for example, efficient partition of ML processes (cf. <a href="https://arxiv.org/abs/2104.14380">Mo et al. (2021)</a>; <a href="https://dl.acm.org/doi/abs/10.1145/3472634.3472652">Liu et al. (2021)</a>). </p></li><li><p><strong>ML inside TEEs are still begin attacked.</strong> The ML process is still vulnerable. Previous research conducted by <a href="https://arxiv.org/abs/1810.00602">Grover et al. (2018)</a> demonstrated methods to exploit access patterns to TEEs in order to classify encrypted inputs with high accuracy.</p></li><li><p><strong>Vulnerable interactions with the outside.</strong> This refers to a type of security attack known as "Host-based Attacks" in the context of TEEs. These attacks exploit vulnerabilities arising from the interactions between the host operating system (OS), user-space processes, and the TEEs running on the same system. It includes data poisoning and adversarial examples <a href="https://arxiv.org/abs/2208.10134">(Mo et al. 2023)</a>.</p></li><li><p><strong>SDK support.</strong> Most TEEs only provide basic low-level SDK, and therefore, it is hard to port all ML dependencies into TEEs. Hence, porting an ML code still requires a lot of code refactoring <a href="https://arxiv.org/abs/2208.10134">(Mo et al. 2023)</a>.</p></li><li><p><strong>Large TCB size due to libOS use.</strong> The Trusted Computing Base (TCB) is the set of hardware and software components that are critical to the security of a TEE system. A library operating system (<a href="https://gramineproject.io/">libOS</a>) is a lightweight operating system that provides application-level abstractions without the need for a full operating system kernel. A libOS can be used to easily port ML applications into a TEE, but such port results in a huge TCB size, which can lead to a reduced security <a href="https://arxiv.org/abs/2208.10134">(Mo et al. 2023)</a>.</p></li></ul><div><hr></div><h1>Secure Multiparty Computation (MPC)</h1><p>Secure Multiparty Computation (MPC) is a cryptographic technique that allows multiple parties to jointly compute a function over their private inputs without revealing those inputs to each other. The key idea behind MPC is to divide the computation into smaller steps, where each party performs a portion of the computation (<a href="https://eprint.iacr.org/2020/300">Lindell 2020</a>).</p><h3><strong>How It Works</strong></h3><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!j5fF!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F981a3f1c-2454-4236-9c83-0502790105dc_1457x1051.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!j5fF!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F981a3f1c-2454-4236-9c83-0502790105dc_1457x1051.png 424w, https://substackcdn.com/image/fetch/$s_!j5fF!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F981a3f1c-2454-4236-9c83-0502790105dc_1457x1051.png 848w, https://substackcdn.com/image/fetch/$s_!j5fF!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F981a3f1c-2454-4236-9c83-0502790105dc_1457x1051.png 1272w, https://substackcdn.com/image/fetch/$s_!j5fF!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F981a3f1c-2454-4236-9c83-0502790105dc_1457x1051.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!j5fF!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F981a3f1c-2454-4236-9c83-0502790105dc_1457x1051.png" width="1456" height="1050" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/981a3f1c-2454-4236-9c83-0502790105dc_1457x1051.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1050,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:124083,&quot;alt&quot;:&quot;Secure multiparty computation.&#65279;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="Secure multiparty computation.&#65279;" title="Secure multiparty computation.&#65279;" srcset="https://substackcdn.com/image/fetch/$s_!j5fF!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F981a3f1c-2454-4236-9c83-0502790105dc_1457x1051.png 424w, https://substackcdn.com/image/fetch/$s_!j5fF!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F981a3f1c-2454-4236-9c83-0502790105dc_1457x1051.png 848w, https://substackcdn.com/image/fetch/$s_!j5fF!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F981a3f1c-2454-4236-9c83-0502790105dc_1457x1051.png 1272w, https://substackcdn.com/image/fetch/$s_!j5fF!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F981a3f1c-2454-4236-9c83-0502790105dc_1457x1051.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The process usually involves the following steps:</p><ol><li><p><strong>Function Agreement</strong>: All parties agree on the function <em><strong>f</strong></em> to be computed, as shown in the image: <em><strong>f(x1, x2, ..., xn)</strong></em>.</p></li><li><p><strong>Input Sharing</strong>: Each party provides its private input (<em><strong>x1, x2, x3, x4, x5</strong></em> in the image) but keeps it hidden from others.</p></li><li><p><strong>Protocol Execution</strong>: The parties interact through a carefully designed protocol to compute the output. This protocol ensures that no party learns anything about the other parties' inputs beyond what is revealed by the output <em><strong>y = f(x1, x2, ..., xn)</strong></em>.</p></li><li><p><strong>Output Computation</strong>: The final output <em><strong>y</strong></em> is computed and revealed to all parties.</p></li></ol><p>The key properties of MPC protocols are:</p><ul><li><p><strong>Privacy</strong>: No information about any party's input x is revealed beyond what the output y inherently reveals.</p></li><li><p><strong>Correctness</strong>: Each party is guaranteed that the output is correctly computed from the inputs according to the function <em><strong>f</strong></em>, even if some parties deviate from the protocol.</p></li><li><p><strong>Independence of inputs</strong>: Parties choose inputs freely.</p></li><li><p><strong>Guaranteed output delivery</strong>: All parties get the output.</p></li><li><p><strong>Fairness</strong>: Bad actors can't gain an edge (<a href="https://eprint.iacr.org/2020/300">Lindell 2020</a>).</p></li></ul><p>Privacy and correctness are two basic properties that any MPC protocol should support. For stronger security, protocols should also ensure independence of inputs, guaranteed output delivery, and fairness to all parties.</p><h3><strong>Common Schemes in MPC</strong></h3><ul><li><p><strong>Secret Sharing: </strong>One common technique used in MPC is secret sharing. Discovered independently by <a href="https://dl.acm.org/doi/10.1145/359168.359176">Shamir (1979)</a> and <a href="https://www.computer.org/csdl/proceedings-article/afips/1979/50870313/12OmNCeK2a1">Blakley (1979)</a>. In secret sharing, a secret is divided into multiple shares, and each share is distributed to different parties. No single share reveals any information about the secret, but when combined, the shares can reconstruct the secret. Shamir's Secret Sharing is a well-known method where a secret is represented as a polynomial, and shares are points on this polynomial. The secret can be reconstructed using Lagrange interpolation.</p></li><li><p><strong>Garbled Circuits: </strong>Another technique is Garbled Circuits, discovered by <a href="https://ieeexplore.ieee.org/abstract/document/4568207">Yao (1986)</a>, which is used for two-party computations. In this method, one party (the garbler) creates an encrypted version of the function (the garbled circuit) and sends it to the other party (the evaluator). The evaluator uses its input and the garbled circuit to compute the output without learning the other party's input. This method ensures that the computation is secure and private.</p></li><li><p><strong>Oblivious Transfer: </strong>Oblivious transfer is a fundamental building block in many MPC protocols. It allows a sender to send one of many possible messages to a receiver, but the sender does not know which message was received. This technique is crucial for ensuring that parties do not learn more information than they are supposed to.</p></li></ul><h3><strong>Common Applications</strong></h3><ul><li><p><strong>Data Analytics</strong>: MPC enables joint data analysis while keeping inputs private. This is useful in healthcare, finance, and marketing where privacy is crucial. Companies can perform secure statistical analysis on combined datasets without exposing individual records (<a href="https://ieeexplore.ieee.org/document/8935900">Zhou et al. 2019</a>).</p></li><li><p><strong>Auctions and Biddings</strong>: MPC can conduct auctions where bids remain private, only revealing the winning bid price. This enhances fairness and privacy in auctions (<a href="https://link.springer.com/chapter/10.1007/11889663_10">Bogetoft et al. 2006</a>).</p></li><li><p><strong>Fraud Detection</strong>: By securely analyzing combined transaction data from multiple sources using MPC, fraud detection algorithms can operate on a broader dataset while preserving individual transaction privacy (<a href="https://www.nasdaq.com/articles/multi-party-computation-mpc-technology-can-ensure-effective-fraud-detection-2021-09-08">Nielsen 2021</a>).</p></li></ul><p>Below, we will review how to use MPC to perform inference and training in machine learning in a privacy preserving manner using secret-sharing. The main idea is that data and model are shared and computations occur on shares, not raw data.</p><h3>MPC in Inference</h3><p>In order to explain how inference is done using MPC, we need to introduce some basics on secret sharing. We present here a definition by <a href="https://www.nowpublishers.com/article/Details/SEC-019">Evans et al. (2017)</a>. A <em><strong>(t,n)</strong></em>-secret sharing scheme splits a secret <em><strong>s</strong></em> into <em><strong>n</strong></em> shares in a way that <em><strong>t-1</strong></em> shares reveal no information about the secret <em><strong>s</strong></em>, while <em><strong>t</strong></em> or more shares allow reconstruction of <em><strong>s</strong></em>. In a two-party secret sharing scheme we have <em><strong>t=n=2</strong></em>. In the following we will explain a simple approach for inference given by <a href="https://arxiv.org/abs/2211.01452">Li et al. (2022)</a> in the context of transformer models.</p><p>Similarly to zkML and FHE, where all operations of a neural network must be translated into arithmetic operations, we use a secret sharing scheme for computing additions and multiplications. Let us consider two parties, a data provider with an data input <em><strong>x</strong></em>, and a model provider with a model input <em><strong>y</strong></em> consisting of all weights in the model.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!_f2x!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F703b8323-d7f4-41ae-b717-586a1c9208ef_1541x1941.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!_f2x!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F703b8323-d7f4-41ae-b717-586a1c9208ef_1541x1941.png 424w, https://substackcdn.com/image/fetch/$s_!_f2x!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F703b8323-d7f4-41ae-b717-586a1c9208ef_1541x1941.png 848w, https://substackcdn.com/image/fetch/$s_!_f2x!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F703b8323-d7f4-41ae-b717-586a1c9208ef_1541x1941.png 1272w, https://substackcdn.com/image/fetch/$s_!_f2x!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F703b8323-d7f4-41ae-b717-586a1c9208ef_1541x1941.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!_f2x!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F703b8323-d7f4-41ae-b717-586a1c9208ef_1541x1941.png" width="1456" height="1834" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/703b8323-d7f4-41ae-b717-586a1c9208ef_1541x1941.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1834,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:264752,&quot;alt&quot;:&quot;Machine learning inference using multiparty computation.&#65279;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="Machine learning inference using multiparty computation.&#65279;" title="Machine learning inference using multiparty computation.&#65279;" srcset="https://substackcdn.com/image/fetch/$s_!_f2x!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F703b8323-d7f4-41ae-b717-586a1c9208ef_1541x1941.png 424w, https://substackcdn.com/image/fetch/$s_!_f2x!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F703b8323-d7f4-41ae-b717-586a1c9208ef_1541x1941.png 848w, https://substackcdn.com/image/fetch/$s_!_f2x!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F703b8323-d7f4-41ae-b717-586a1c9208ef_1541x1941.png 1272w, https://substackcdn.com/image/fetch/$s_!_f2x!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F703b8323-d7f4-41ae-b717-586a1c9208ef_1541x1941.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>To compute additions, the data provider splits <em><strong>x</strong></em> into two shares <em><strong>x1</strong></em> and <em><strong>x2</strong></em> with <em><strong>x=x1+x2</strong></em> and the model provider splits <em><strong>y</strong></em> into <em><strong>y1</strong></em> and <em><strong>y2</strong></em> with <em><strong>y=y1+y2</strong></em>. The data provider keeps <em><strong>x1</strong></em> and <em><strong>y1</strong></em> and the model provider keeps <em><strong>x2</strong></em> and <em><strong>y2</strong></em>. This way, neither can reconstruct the original inputs <em><strong>x</strong></em> and <em><strong>y</strong></em>. Then the data provider computes <em><strong>D=x1+y1</strong></em> and the model provider computes <em><strong>M=x2+y2</strong></em>. The reconstruction process then results in <em><strong>D+M=x+y</strong></em>.</p><p>To compute multiplications, the parties can use a secure protocol based on <a href="https://link.springer.com/chapter/10.1007/3-540-46766-1_34">Beaver triples</a>. Both parties compute the shares <em><strong>x1</strong></em>,<em><strong>x2</strong></em>,<em><strong>y1</strong></em>,<em><strong>y2</strong></em> and the data provider gets <em><strong>x1</strong></em>,<em><strong>y1</strong></em> and the model provider gets <em><strong>x2</strong></em>,<em><strong>y2</strong></em>. Now a Beaver triple is generated <em><strong>c=ab</strong></em> using <a href="https://en.wikipedia.org/wiki/Oblivious_transfer">oblivious transfer</a> or <a href="https://en.wikipedia.org/wiki/Homomorphic_encryption">homomorphic encryption</a> and <em><strong>a</strong></em>,<em><strong>b</strong></em>,<em><strong>c</strong></em> are secret shared with the data provider receiving <em><strong>a1</strong></em>,<em><strong>b1</strong></em>,<em><strong>c1</strong></em> and the model provider receiving <em><strong>a2</strong></em>,<em><strong>b2</strong></em>,<em><strong>c2</strong></em>. Now the data provider computes <em><strong>&#1013;1=x1-a1</strong></em> and <em><strong>&#948;1=y1-b1</strong></em> and the model provider computes <em><strong>&#1013;2=x2-a2</strong></em> and <em><strong>&#948;2=y2-b2</strong></em>. They communicate these numbers and reconstruct <em><strong>&#1013;=&#1013;1+&#1013;2</strong></em> and <em><strong>&#948;=&#948;1+&#948;2</strong></em>. Then the data provider can compute <em><strong>r1=c1+&#1013;&#183;b1+&#948;&#183;a1+&#948;&#183;&#1013;</strong></em> and the model provider can compute <em><strong>r2=c2+&#1013;&#183;b2+&#948;&#183;a2+&#948;&#183;&#1013;</strong></em>. The multiplication result is then given by <em><strong>xy=r1+r2</strong></em>, which is an addition operation that can be jointly computed using the protocol of the previous paragraph.</p><p>The implementation of more complex operations, like comparisons, can benefit from 3-party MPC as shown by <a href="https://eprint.iacr.org/2023/100">Dong et al. (2023)</a>.</p><h3>MPC in Training</h3><p>For the case of training with MPC we assume we have two or more data providers that would like to keep their inputs secret and they would like to jointly compute the weights of a model. A secret-sharing scheme was used by <a href="https://arxiv.org/abs/2007.12557v3">Liu et al. (2021)</a> where they showed how to construct an efficient <em><strong>n</strong></em>-party protocol for secure neural network training that can provide security for all honest participants even when a majority of the parties are malicious.</p><p>Similarly as in the case for inference, all operations in the architecture of the neural network must be performed using arithmetic operations, that is, additions and multiplications.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!6DXT!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F447d0d46-66fe-4100-9d3f-787dfa0f4887_1463x2097.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!6DXT!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F447d0d46-66fe-4100-9d3f-787dfa0f4887_1463x2097.png 424w, https://substackcdn.com/image/fetch/$s_!6DXT!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F447d0d46-66fe-4100-9d3f-787dfa0f4887_1463x2097.png 848w, https://substackcdn.com/image/fetch/$s_!6DXT!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F447d0d46-66fe-4100-9d3f-787dfa0f4887_1463x2097.png 1272w, https://substackcdn.com/image/fetch/$s_!6DXT!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F447d0d46-66fe-4100-9d3f-787dfa0f4887_1463x2097.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!6DXT!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F447d0d46-66fe-4100-9d3f-787dfa0f4887_1463x2097.png" width="1456" height="2087" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/447d0d46-66fe-4100-9d3f-787dfa0f4887_1463x2097.png&quot;,&quot;srcNoWatermark&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/a33e3bee-8812-4bde-878d-510f217e0cf8_1463x2097.png&quot;,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:2087,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:252223,&quot;alt&quot;:&quot;Machine learning model training using multiparty computation.&#65279;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="Machine learning model training using multiparty computation.&#65279;" title="Machine learning model training using multiparty computation.&#65279;" srcset="https://substackcdn.com/image/fetch/$s_!6DXT!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F447d0d46-66fe-4100-9d3f-787dfa0f4887_1463x2097.png 424w, https://substackcdn.com/image/fetch/$s_!6DXT!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F447d0d46-66fe-4100-9d3f-787dfa0f4887_1463x2097.png 848w, https://substackcdn.com/image/fetch/$s_!6DXT!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F447d0d46-66fe-4100-9d3f-787dfa0f4887_1463x2097.png 1272w, https://substackcdn.com/image/fetch/$s_!6DXT!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F447d0d46-66fe-4100-9d3f-787dfa0f4887_1463x2097.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>In the case of a 2-party protocol, for each private value <em><strong>x</strong></em>, the data provider must split it into shares <em><strong>x1</strong></em>,<em><strong>x2</strong></em> and it gives <em><strong>x2</strong></em> to the other data provider. Similarly as in the inference case, a <a href="https://link.springer.com/chapter/10.1007/3-540-46766-1_34">Beaver triple</a> is computed and shared between each data provider. In general, for <em><strong>n</strong></em> data providers, each input is partitioned into <em><strong>n</strong></em> shares.</p><p>At the end of training, a model is learned and shared between the data provider and the model trainer.</p><h3>Pros of MPC</h3><p>The advantages of using MPC with neural networks are:</p><ul><li><p><strong>Joint privacy-preserving data analysis.</strong> MPC allows multiple parties to collaboratively analyze data without revealing their individual datasets. This is particularly valuable in industries like healthcare, finance, and marketing, where sensitive data must be protected.</p></li><li><p><strong>Secure Machine Learning Model Training.</strong> MPC can be used to train machine learning models on data from multiple sources without exposing the individual datasets. This is crucial in scenarios where data owners are reluctant to share their data due to privacy concerns or legal restrictions.</p></li></ul><h3>Cons of MPC</h3><p>The disadvantages of using MPC with neural networks are:</p><ul><li><p><strong>Trust.</strong> While MPC protocols aim to provide trustless computation, their deployment in real-world systems introduces additional trust assumptions and practical considerations <a href="https://mpc.cs.berkeley.edu/blog/deployment-dilemma.html">(Lindell et al. 2023)</a>.</p></li><li><p><strong>Complexity.</strong> Developing and deploying MPC solutions can be complex. It requires expertise in cryptography, secure protocol design, and distributed systems. Ensuring the correctness and security of the MPC protocol itself is essential, as any vulnerabilities could compromise the privacy of the data being processed <a href="https://arxiv.org/abs/2007.12557v3">(Liu et al. 2021)</a>.</p></li></ul><div><hr></div><h1>TLDR</h1><p><a href="https://blog.bagel.net/p/with-great-data-comes-great-responsibility">In Part 1</a>, we discussed the four main privacy-preserving machine learning techniques: differential privacy (DP), zero-knowledge machine learning (ZKML), federated learning (FL), and fully homomorphic encryption (FHE). We assessed these techniques based on data privacy, model algorithm privacy, model weights privacy, and verifiability.</p><p>In this Part 2, we looked into two other popular privacy-enhancing technologies: trusted execution environments (TEEs) and secure multiparty computation (MPC).</p><p>TEEs allow for isolated and verifiable code execution in protected memory, safeguarding code and data from external threats. However, TEEs require trust in the manufacturer and introduce a single point of failure, which is not suitable for trustless and peer-to-peer (P2P) networks.</p><p>MPC enables multiple parties to compute on shared data while keeping inputs private. It facilitates joint privacy-preserving data analysis and secure machine learning model training without exposing individual datasets. However, their deployment in real-world systems introduces trust assumptions.</p><p>The table below summarizes our findings - <a href="https://blog.bagel.net/p/with-great-data-comes-great-responsibility">see part 1</a> for the definitions of each property.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!EJPb!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc73ac38a-1276-478c-9833-fecc55c0767c_3840x2160.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!EJPb!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc73ac38a-1276-478c-9833-fecc55c0767c_3840x2160.png 424w, https://substackcdn.com/image/fetch/$s_!EJPb!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc73ac38a-1276-478c-9833-fecc55c0767c_3840x2160.png 848w, https://substackcdn.com/image/fetch/$s_!EJPb!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc73ac38a-1276-478c-9833-fecc55c0767c_3840x2160.png 1272w, https://substackcdn.com/image/fetch/$s_!EJPb!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc73ac38a-1276-478c-9833-fecc55c0767c_3840x2160.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!EJPb!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc73ac38a-1276-478c-9833-fecc55c0767c_3840x2160.png" width="1456" height="819" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c73ac38a-1276-478c-9833-fecc55c0767c_3840x2160.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:819,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:236298,&quot;alt&quot;:&quot;Privacy-preserving machine learning method comparison, Differential Privacy, Zero-knowledge machine learning, Federated learning, Fully homomorphic encryption, Trusted execution environments, Multi-party computation.&#65279;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="Privacy-preserving machine learning method comparison, Differential Privacy, Zero-knowledge machine learning, Federated learning, Fully homomorphic encryption, Trusted execution environments, Multi-party computation.&#65279;" title="Privacy-preserving machine learning method comparison, Differential Privacy, Zero-knowledge machine learning, Federated learning, Fully homomorphic encryption, Trusted execution environments, Multi-party computation.&#65279;" srcset="https://substackcdn.com/image/fetch/$s_!EJPb!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc73ac38a-1276-478c-9833-fecc55c0767c_3840x2160.png 424w, https://substackcdn.com/image/fetch/$s_!EJPb!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc73ac38a-1276-478c-9833-fecc55c0767c_3840x2160.png 848w, https://substackcdn.com/image/fetch/$s_!EJPb!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc73ac38a-1276-478c-9833-fecc55c0767c_3840x2160.png 1272w, https://substackcdn.com/image/fetch/$s_!EJPb!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc73ac38a-1276-478c-9833-fecc55c0767c_3840x2160.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div>]]></content:encoded></item><item><title><![CDATA[Data Synthesis]]></title><description><![CDATA[Synthetic Data 101]]></description><link>https://blog.bagel.com/p/data-synthesis</link><guid isPermaLink="false">https://blog.bagel.com/p/data-synthesis</guid><dc:creator><![CDATA[Marcos Villagra]]></dc:creator><pubDate>Wed, 15 May 2024 15:26:50 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!xGBK!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe04537b7-ecee-41f7-9b87-cd05a63d0778_3368x2381.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>Synthetic data is AI's new secret weapon. It solves major challenges of using real-world data.</p><p>Consider privacy. Industries like <a href="https://www.forbes.com/sites/forbestechcouncil/2024/04/03/synthetic-data-applications-in-finance/?sh=506d1b43695d">finance</a>, <a href="https://www.nature.com/articles/s41551-021-00751-8">drug discovery</a>, <a href="https://pubmed.ncbi.nlm.nih.gov/38523605/">medical imaging</a> can work around strict data sharing restrictions through synthetic data. It mimics real world data without exposing personal information, allowing for secure, <a href="https://arxiv.org/abs/2309.00652">regulation compliant AI development</a>.</p><p>Training AI for rare, extreme scenarios is also vital, but often impossible with real data. Synthetic data solves this problem. It can <a href="https://developer.nvidia.com/blog/using-synthetic-data-to-address-novel-viewpoints-for-autonomous-vehicle-perception/">simulate events like uncommon car accidents to train autonomous vehicle</a>. This vastly expands the scope and robustness of AI systems.</p><p>Another persistent issue is <a href="https://labelyourdata.com/articles/bias-in-machine-learning">bias in datasets</a>. Real data often reflects <a href="https://link.springer.com/article/10.1007/s00146-022-01474-3">historical prejudices or demographic imbalances</a>. Synthetic data enables the creation of balanced datasets that counteract these biases, leading to fairer AI systems.</p><p>AI development needs rapid access to vast amount of quality data. Real data collection and processing has overhead. Synthetic data can be generated instantly, supporting fast testing and iteration with diverse datasets. It <a href="https://ascopubs.org/doi/10.1200/CCI.23.00201">improves model accuracy</a> while <a href="https://forbes.com/sites/robtoews/2022/06/12/synthetic-data-is-about-to-transform-artificial-intelligence/?sh=458b21c67523">cutting the need for data labeling</a> and collection drastically.</p><p><em>This article takes you on a journey into the fascinating world of synthetic data. We'll uncover the top techniques used to create it. We'll see how it's solving critical problems across sectors. And we'll give you an unfiltered look at the pros and cons you need to know. Or how you can implement it for your specific business need.</em></p><p><em><strong>If you're in a rush, we have a TLDR at the end.</strong></em></p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!xGBK!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe04537b7-ecee-41f7-9b87-cd05a63d0778_3368x2381.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!xGBK!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe04537b7-ecee-41f7-9b87-cd05a63d0778_3368x2381.png 424w, https://substackcdn.com/image/fetch/$s_!xGBK!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe04537b7-ecee-41f7-9b87-cd05a63d0778_3368x2381.png 848w, https://substackcdn.com/image/fetch/$s_!xGBK!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe04537b7-ecee-41f7-9b87-cd05a63d0778_3368x2381.png 1272w, https://substackcdn.com/image/fetch/$s_!xGBK!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe04537b7-ecee-41f7-9b87-cd05a63d0778_3368x2381.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!xGBK!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe04537b7-ecee-41f7-9b87-cd05a63d0778_3368x2381.png" width="1456" height="1029" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/e04537b7-ecee-41f7-9b87-cd05a63d0778_3368x2381.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1029,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:10218355,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!xGBK!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe04537b7-ecee-41f7-9b87-cd05a63d0778_3368x2381.png 424w, https://substackcdn.com/image/fetch/$s_!xGBK!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe04537b7-ecee-41f7-9b87-cd05a63d0778_3368x2381.png 848w, https://substackcdn.com/image/fetch/$s_!xGBK!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe04537b7-ecee-41f7-9b87-cd05a63d0778_3368x2381.png 1272w, https://substackcdn.com/image/fetch/$s_!xGBK!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe04537b7-ecee-41f7-9b87-cd05a63d0778_3368x2381.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><h1>How To Generate Synthetic Data</h1><p>Synthetic data is statistically accurate simulations of real-world datasets. Two forms of it exist: fully synthetic data, created without real data, and partially synthetic data, which may include original dataset elements.</p><p>Data quality depends on generation technique and type. In this article, we have explored three major useful synthetic data generation technique families <strong>Generative</strong>, <strong>Evolutionary</strong>, and <strong>Marginal-based</strong>. Technique choice depends on data type (tabular, time-series, images) and desired properties (diversity, realism, privacy).</p><div><hr></div><h1>Generative Methods</h1><p>Generative AI has had a big impact across all industries pushing teams to perform at a higher rate. The goal of generative AI is essentially to generate new data, be it image, audio, video, text, etc. It is, thus, natural to rely on generative AI techniques to create new synthetic data.</p><p>Below we discuss three of the most popular generative synthetic data generation methods below, <em>Variational Autoencoders (VAEs)</em>,<strong> </strong><em>Generative Adversarial Networks (GANs)</em><strong> </strong>and<strong> </strong><em>Diffusion Models</em>.</p><h2>1. Variational Autoencoders (VAEs)</h2><p>A <a href="https://arxiv.org/abs/1312.6114">Variational Autoencoder</a> (VAE) is an algorithmic tool that compresses and decompresses data. VAEs are practical tools used in diverse fields such as computational chemistry for generating <a href="https://arxiv.org/abs/1802.03480">molecular graphs</a>, <a href="https://datadance.ai/machine-learning/training-a-variational-autoencoder-for-anomaly-detection-using-tensorflow/">anomaly detection</a> in data streams, and even in <a href="https://www.mdpi.com/1424-8220/23/7/3457">game design</a> to create diverse and complex environments.</p><p>The goal of VAEs is to transform data points from a high-dimensional space, to a lower-dimensional, more meaningful <a href="https://www.larksuite.com/en_us/topics/ai-glossary/latent-space">latent space</a>. The transformation process involves two main components: the encoder and the decoder. The encoder's task is to map data to the latent space, while the decoder reconstructs the data back to the original high-dimensional space. The goal is to minimize the error between the original and reconstructed data, denoted as</p><div class="latex-rendered" data-attrs="{&quot;persistentExpression&quot;:&quot;\\epsilon(x,D(E(x))).&quot;,&quot;id&quot;:&quot;ZFCJHPWRFB&quot;}" data-component-name="LatexBlockToDOM"></div><p>where <em>x</em> is an input, <em>E</em> and <em>D</em> denote the encoder and decoder functions, and <em>&#1013;</em> is an error function.</p><p>A VAE differs from a standard autoencoder by introducing a probabilistic twist. The encoder in a VAE doesn't directly output coordinates in the latent space. Instead, it outputs parameters to a probability distribution&#8212;typically a Gaussian denoted by</p><div class="latex-rendered" data-attrs="{&quot;persistentExpression&quot;:&quot;\\mathcal N(\\mu_x,\\sigma_x)&quot;,&quot;id&quot;:&quot;PVNNALNDKI&quot;}" data-component-name="LatexBlockToDOM"></div><p>where <em>&#956;</em> denotes the mean and <em>&#963;</em> it&#8217;s standard deviation where both can depend on the input <em>x</em>. The figure below shows the general architecture of a VAE.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Tpoh!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F526b13f5-1e8c-46b2-84ea-e87dd43a190f_923x307.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Tpoh!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F526b13f5-1e8c-46b2-84ea-e87dd43a190f_923x307.png 424w, https://substackcdn.com/image/fetch/$s_!Tpoh!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F526b13f5-1e8c-46b2-84ea-e87dd43a190f_923x307.png 848w, https://substackcdn.com/image/fetch/$s_!Tpoh!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F526b13f5-1e8c-46b2-84ea-e87dd43a190f_923x307.png 1272w, https://substackcdn.com/image/fetch/$s_!Tpoh!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F526b13f5-1e8c-46b2-84ea-e87dd43a190f_923x307.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Tpoh!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F526b13f5-1e8c-46b2-84ea-e87dd43a190f_923x307.png" width="923" height="307" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/526b13f5-1e8c-46b2-84ea-e87dd43a190f_923x307.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:307,&quot;width&quot;:923,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:50843,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Tpoh!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F526b13f5-1e8c-46b2-84ea-e87dd43a190f_923x307.png 424w, https://substackcdn.com/image/fetch/$s_!Tpoh!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F526b13f5-1e8c-46b2-84ea-e87dd43a190f_923x307.png 848w, https://substackcdn.com/image/fetch/$s_!Tpoh!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F526b13f5-1e8c-46b2-84ea-e87dd43a190f_923x307.png 1272w, https://substackcdn.com/image/fetch/$s_!Tpoh!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F526b13f5-1e8c-46b2-84ea-e87dd43a190f_923x307.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The decoder samples from the distribution generated by the encoder to generate new data points, effectively turning the decoder into a generator of synthetic data. This process is governed by a loss function <em>L</em>, which balances two terms</p><div class="latex-rendered" data-attrs="{&quot;persistentExpression&quot;:&quot;L(x,y)=\\|x-y\\|^2+KL(\\mathcal N(\\mu_x,\\sigma_x),\\mathcal N(0,1)),&quot;,&quot;id&quot;:&quot;XCPKCKIDOL&quot;}" data-component-name="LatexBlockToDOM"></div><p>where <em>KL</em> denotes the <a href="https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence">Kullback-Leibler divergence</a>, a measure of how one probability distribution diverges from a second, expected probability distribution.</p><h4><strong>Pros</strong></h4><ul><li><p><strong>Data Type Flexibility.</strong> VAEs are adaptable to various data types, including multimodal data.</p></li><li><p><strong>Anomaly Detection. </strong>It excels in identifying data points that deviate significantly from the norm, as these will have high reconstruction errors.</p></li><li><p><strong>Efficient Sampling.</strong> Post-training, sampling from the latent space and generating new data via the decoder is straightforward and efficient.</p></li></ul><h4><strong>Cons</strong></h4><ul><li><p><strong>Loss Function Balancing.</strong> The dual nature of the loss function, combining reconstruction and regularization, requires careful tuning, which is still an active area of research <a href="https://link.springer.com/article/10.1007/s42979-021-00702-9">(Asperti et al. 2021)</a>.</p></li><li><p><strong>Blurriness in Output.</strong> Often, the output images or data from VAEs lack sharpness <a href="https://arxiv.org/abs/2002.09860">(Asperti 2020)</a>.</p></li><li><p><strong>Clustering in Latent Space.</strong> In scenarios where the dataset inherently contains subcategories, these often manifest as clusters in the latent space, which can be problematic for generative tasks where uniformity might be desired <a href="https://link.springer.com/article/10.1007/s42979-021-00702-9">(Asperti et al. 2021)</a>.</p></li></ul><h2>2. Generative Adversarial Networks (GANs)</h2><p>In a Generative Adversarial Networks (GANs), <em>two AI algorithms are locked in a game competing against each other</em>. We have two neural networks, the generator (<em>G</em>) and the discriminator (<em>D</em>), engaging in a continuous game of deception and detection. The generator strives to create data so authentic that the discriminator cannot distinguish it from real data. Conversely, the discriminator's goal is to accurately identify whether the data it reviews is genuine or fabricated by the generator. This dynamic competition drives both networks towards perfection, simulating a game where the  prize is the ability to replicate reality.</p><p>GANs have found their way into various industries showcasing their versatility. <a href="https://www.nvidia.com/en-us/">NVIDIA</a>, for instance, uses GANs to create lifelike <a href="https://blogs.nvidia.com/blog/neurips-research-limited-data-gan/">artwork</a> and to transform 2D images into <a href="https://research.nvidia.com/publication/2022-06_efficient-geometry-aware-3d-generative-adversarial-networks">3D shapes</a>. <a href="https://www.ebay.com/">eBay</a> employs them for image-based <a href="https://patents.google.com/patent/US20190286950A1/en">search</a> functionalities in their marketplace. <a href="https://auto.hindustantimes.com/auto/cars/audi-uses-artificial-intelligence-to-design-new-wheels-41670915582655.html">Audi</a> leverages GANs for innovative wheel design, while <a href="https://www.zalando.com/">Zalando</a> <a href="https://engineering.zalando.com/posts/2018/09/texture-distribution-artistic-expression.html">generates new textures</a> for the fashion industry.</p><p>In a GAN, the generator crafts fake data, aiming to pass it off as real. The discriminator tries to separate the true from the false. This process is a high-level game, where each player sharpens their skills in response to the other's moves. The generator uses input &#119909; from a distribution <em>p</em>&#8203; corresponding to a random variable <em>X</em> to produce output aiming to mimic a target distribution <em>T</em>. The discriminator examines outputs &#119910;=&#119866;(&#119909;), guessing if they're real or fake. Ideally, the generator's output distribution &#119866;(&#119883;) aligns so closely with &#119879; that the discriminator is left guessing, assigning a 50% probability to both real and fake data.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!urrf!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7d0334fb-b60c-4889-b9f7-924fd3a67b47_1538x868.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!urrf!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7d0334fb-b60c-4889-b9f7-924fd3a67b47_1538x868.png 424w, https://substackcdn.com/image/fetch/$s_!urrf!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7d0334fb-b60c-4889-b9f7-924fd3a67b47_1538x868.png 848w, https://substackcdn.com/image/fetch/$s_!urrf!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7d0334fb-b60c-4889-b9f7-924fd3a67b47_1538x868.png 1272w, https://substackcdn.com/image/fetch/$s_!urrf!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7d0334fb-b60c-4889-b9f7-924fd3a67b47_1538x868.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!urrf!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7d0334fb-b60c-4889-b9f7-924fd3a67b47_1538x868.png" width="1456" height="822" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/7d0334fb-b60c-4889-b9f7-924fd3a67b47_1538x868.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:822,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:197262,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!urrf!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7d0334fb-b60c-4889-b9f7-924fd3a67b47_1538x868.png 424w, https://substackcdn.com/image/fetch/$s_!urrf!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7d0334fb-b60c-4889-b9f7-924fd3a67b47_1538x868.png 848w, https://substackcdn.com/image/fetch/$s_!urrf!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7d0334fb-b60c-4889-b9f7-924fd3a67b47_1538x868.png 1272w, https://substackcdn.com/image/fetch/$s_!urrf!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7d0334fb-b60c-4889-b9f7-924fd3a67b47_1538x868.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><h4><strong>Pros</strong></h4><ul><li><p><strong>Anomaly Detection.</strong> GANs excel in identifying outliers, making them invaluable for tasks like <a href="https://arxiv.org/abs/2002.02271">fraud detection</a>.</p></li><li><p><strong>High-Quality Outputs.</strong> They are capable of producing images so realistic that they <a href="https://machinelearningmastery.com/what-are-generative-adversarial-networks-gans/">can fool</a> the human eye.</p></li><li><p><strong>Privacy Protection.</strong> Incorporating <a href="https://arxiv.org/abs/1802.06739">differential privacy</a> during training ensures sensitive data remains secure.</p></li></ul><h4><strong>Cons</strong></h4><ul><li><p><strong>Mode Collapse.</strong> A scenario where the generator produces <a href="https://medium.com/@marcodelpra/generative-adversarial-networks-dba10e1b4424">limited varieties</a> of output, making it easier for the discriminator to identify fakes.</p></li><li><p><strong>Training Challenges.</strong> Balancing the <a href="https://jonathan-hui.medium.com/gan-why-it-is-so-hard-to-train-generative-advisory-networks-819a86b3750b">training pace</a> of both networks is crucial, yet difficult, to ensure both improve simultaneously.</p></li><li><p><strong>Non-Convergence.</strong> The unique loss function of GANs can lead to challenges in achieving <a href="https://jonathan-hui.medium.com/gan-why-it-is-so-hard-to-train-generative-advisory-networks-819a86b3750b">convergence</a> through gradient descent.</p></li></ul><h2>3. Diffusion Models</h2><p><a href="https://arxiv.org/abs/1503.03585">Diffusion models</a> represent an alternative generative AI approach, primarily used for creating synthetic images. They appear in the automatic generation of high-resolution images and videos where notable implementations include Stability AI's <a href="https://stability.ai/news/stable-diffusion-3">Stable Diffusion</a>, OpenAI's <a href="https://www.assemblyai.com/blog/how-dall-e-2-actually-works/">Dall-E</a> and <a href="https://openai.com/index/sora/">Sora</a>, and Meta's ventures into <a href="https://lightplane.github.io/">3D image generation</a>. These models operate through a dual-process system: a forward diffusion that adds noise, and a reverse diffusion that reconstructs the image. This method offers a robust framework based on principles of non-equilibrium thermodynamics.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!J-cz!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffce80dcb-76e0-4b84-b79d-5843d0270601_1072x273.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!J-cz!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffce80dcb-76e0-4b84-b79d-5843d0270601_1072x273.png 424w, https://substackcdn.com/image/fetch/$s_!J-cz!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffce80dcb-76e0-4b84-b79d-5843d0270601_1072x273.png 848w, https://substackcdn.com/image/fetch/$s_!J-cz!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffce80dcb-76e0-4b84-b79d-5843d0270601_1072x273.png 1272w, https://substackcdn.com/image/fetch/$s_!J-cz!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffce80dcb-76e0-4b84-b79d-5843d0270601_1072x273.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!J-cz!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffce80dcb-76e0-4b84-b79d-5843d0270601_1072x273.png" width="1072" height="273" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/fce80dcb-76e0-4b84-b79d-5843d0270601_1072x273.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:273,&quot;width&quot;:1072,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:67037,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!J-cz!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffce80dcb-76e0-4b84-b79d-5843d0270601_1072x273.png 424w, https://substackcdn.com/image/fetch/$s_!J-cz!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffce80dcb-76e0-4b84-b79d-5843d0270601_1072x273.png 848w, https://substackcdn.com/image/fetch/$s_!J-cz!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffce80dcb-76e0-4b84-b79d-5843d0270601_1072x273.png 1272w, https://substackcdn.com/image/fetch/$s_!J-cz!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffce80dcb-76e0-4b84-b79d-5843d0270601_1072x273.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The forward diffusion is a <a href="https://en.wikipedia.org/wiki/Markov_chain">Markov chain</a> process. It incrementally introduces Gaussian noise into an image, transforming it step-by-step into a noisy version. This process is mathematically represented as:</p><div class="latex-rendered" data-attrs="{&quot;persistentExpression&quot;:&quot;q(x_t|x_0)=\\prod_{t=1}^t\\mathcal N(x_t;\\sqrt{1-\\beta_t}x_{t-1},\\beta_tI).&quot;,&quot;id&quot;:&quot;WMYGPTKNIM&quot;}" data-component-name="LatexBlockToDOM"></div><p>The mean of the normal distribution is </p><div class="latex-rendered" data-attrs="{&quot;persistentExpression&quot;:&quot;\\sqrt{1-\\beta_t}x_{t-1}&quot;,&quot;id&quot;:&quot;VIYKNSGPNU&quot;}" data-component-name="LatexBlockToDOM"></div><p>and the variance is </p><div class="latex-rendered" data-attrs="{&quot;persistentExpression&quot;:&quot;\\beta_t I.&quot;,&quot;id&quot;:&quot;REJIBRVCGR&quot;}" data-component-name="LatexBlockToDOM"></div><p>The sequence <em>&#946;1,&#8230;,&#946;t</em>, known as a schedule, varies between 0 and 1.</p><p>The reverse diffusion employs a deep neural network, specifically a <a href="https://en.wikipedia.org/wiki/U-Net">U-Net</a> architecture, to reconstruct a new image from the noisy data. The goal is to learn a probability distribution</p><div class="latex-rendered" data-attrs="{&quot;persistentExpression&quot;:&quot;p_\\theta(x_{t-1}|x_t)=\\mathcal N(x_{t-1};\\mu_\\theta(x_t,t),\\Sigma_\\theta(x_t,t)),&quot;,&quot;id&quot;:&quot;YWMSELJLGF&quot;}" data-component-name="LatexBlockToDOM"></div><p>where <em>&#952;</em> represents the model parameters. Training this model involves maximizing the likelihood of matching the learned probability distributions of the equation above with the original distributions <em>q</em>.</p><h4><strong>Pros</strong></h4><ul><li><p><strong>Theoretical foundations.</strong> These models are grounded in the solid theoretical frameworks of <a href="https://arxiv.org/abs/1503.03585">thermodynamics</a> and differential equations.</p></li><li><p><strong>Efficient inference.</strong> They are capable of generating <a href="https://openaccess.thecvf.com/content/CVPR2022/html/Rombach_High-Resolution_Image_Synthesis_With_Latent_Diffusion_Models_CVPR_2022_paper.html">high-resolution images</a> efficiently, even on personal computing devices.</p></li><li><p><strong>High-quality outputs.</strong> The images produced are detailed and realistic, making them central to technologies like <a href="https://www.assemblyai.com/blog/how-dall-e-2-actually-works/">Dall-E</a> and <a href="https://openai.com/index/sora/">Sora</a>.</p></li></ul><h4><strong>Cons</strong></h4><ul><li><p><strong>Training time.</strong> Developing a diffusion model from scratch demands significant computational <a href="https://www.databricks.com/blog/stable-diffusion-2">resources and expertise</a>.</p></li><li><p><strong>Complexity.</strong> A deep understanding of the underlying <a href="https://medium.com/@schoolofcoreai5/generative-diffusion-models-compare-and-contrast-generative-diffusion-models-with-gans-and-their-bafe13b0f09a">theoretical principles</a> is necessary to effectively implement and optimize these models <a href="https://arxiv.org/abs/2312.02696">(Karras et al. (2024)</a>.</p></li></ul><div><hr></div><h1><strong>Evolutionary</strong> Methods</h1><p>Evolutionary methods comprise a collection of algorithms and techniques that iteratively construct synthetic data from a seed dataset. By applying certain operations of combination and mutation of data points, evolutionary methods look to generate synthetic data that is diverse and deep in the context of the initial dataset. </p><h2>1. <strong>Genetic Algorithms</strong></h2><p>The algorithm Private-GSD (Private Genetic Synthetic Data) proposed by <a href="https://arxiv.org/abs/2306.03257">Liu et al. (2023)</a> is a <a href="https://link.springer.com/article/10.1007/s40745-021-00354-9">genetic algorithm</a> designed to generate synthetic data that approximates the statistical properties of an underlying sensitive dataset while ensuring differential privacy. It leverages principles from biological evolution to iteratively optimize a population of synthetic datasets. By applying selection pressure and introducing variation through mutation and recombination, these methods evolve datasets that can produce increasingly diverse and high-quality synthetic examples over generations.</p><p>The process of Private-GSD is shown in the figure below.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!dOGN!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb679fd9c-7971-4845-8cf7-43c0163e9ffd_2484x2492.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!dOGN!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb679fd9c-7971-4845-8cf7-43c0163e9ffd_2484x2492.png 424w, https://substackcdn.com/image/fetch/$s_!dOGN!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb679fd9c-7971-4845-8cf7-43c0163e9ffd_2484x2492.png 848w, https://substackcdn.com/image/fetch/$s_!dOGN!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb679fd9c-7971-4845-8cf7-43c0163e9ffd_2484x2492.png 1272w, https://substackcdn.com/image/fetch/$s_!dOGN!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb679fd9c-7971-4845-8cf7-43c0163e9ffd_2484x2492.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!dOGN!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb679fd9c-7971-4845-8cf7-43c0163e9ffd_2484x2492.png" width="1456" height="1461" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/b679fd9c-7971-4845-8cf7-43c0163e9ffd_2484x2492.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1461,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1123267,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!dOGN!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb679fd9c-7971-4845-8cf7-43c0163e9ffd_2484x2492.png 424w, https://substackcdn.com/image/fetch/$s_!dOGN!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb679fd9c-7971-4845-8cf7-43c0163e9ffd_2484x2492.png 848w, https://substackcdn.com/image/fetch/$s_!dOGN!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb679fd9c-7971-4845-8cf7-43c0163e9ffd_2484x2492.png 1272w, https://substackcdn.com/image/fetch/$s_!dOGN!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb679fd9c-7971-4845-8cf7-43c0163e9ffd_2484x2492.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><ol><li><p><strong>Initialization</strong>: Private-GSD starts with an initial population of synthetic datasets randomly generated. Each dataset in this population is a candidate solution to the problem of generating synthetic data that closely matches the statistical properties of the original dataset.</p></li><li><p><strong>Evaluation</strong>: Each candidate synthetic dataset is evaluated based on how well it approximates the statistical queries of interest on the original dataset. This evaluation is quantified using a loss function that measures the difference between the statistical properties of the synthetic and original datasets.</p></li><li><p><strong>Selection</strong>: The algorithm selects a subset of the best-performing synthetic datasets from the current population. These datasets are considered "elite" and are carried over to the next generation.</p></li><li><p><strong>Crossover and Mutation</strong>: Private-GSD generates new synthetic datasets by combining features from the elite datasets (crossover) and introducing random changes (mutations) to some of these datasets. This process is inspired by genetic recombination and mutation in biological evolution.</p></li><li><p><strong>Iteration</strong>: Steps 2 through 4 are repeated for a specified number of generations or until a convergence criterion is met. With each iteration, the population of synthetic datasets evolves, ideally becoming better approximations of the original dataset's statistical properties.</p></li><li><p><strong>Output</strong>: The algorithm outputs the best-performing synthetic dataset from the final generation.</p></li></ol><h4><strong>Pros</strong></h4><ul><li><p><strong>Flexibility.</strong> Private-GSD can generate synthetic data that approximates a wide range of statistical queries, including those that are non-differentiable and thus challenging for other algorithms that rely on gradient-based optimization.</p></li><li><p><strong>Privacy Preservation.</strong> By design, Private-GSD ensures differential privacy, making it suitable for generating synthetic data from sensitive datasets without compromising individual privacy.</p></li><li><p><strong>No Requirement for Differentiability.</strong> Unlike methods that rely on gradient-based optimization, Private-GSD does not require the objective function to be differentiable. This allows it to work with a broader range of statistical queries.</p></li></ul><h4><strong>Cons</strong></h4><ul><li><p><strong>Computational Complexity.</strong> The iterative nature of genetic algorithms, combined with the need to evaluate multiple candidate solutions in each generation, can make Private-GSD computationally intensive, especially for large datasets and complex statistical queries  <a href="https://arxiv.org/abs/2306.03257">(Liu et al. 2023)</a>.</p></li><li><p><strong>Parameter Sensitivity.</strong> The performance of Private-GSD can be sensitive to its hyperparameters, such as the size of the population, the number of generations, and the rates of crossover and mutation. Finding the optimal set of parameters may require extensive experimentation <a href="https://arxiv.org/abs/2306.03257">(Liu et al. 2023)</a>.</p></li><li><p><strong>Convergence Guarantees</strong>: While Private-GSD is designed to improve the approximation of statistical properties over generations, there may not be strong theoretical guarantees on the convergence rate or the quality of the final synthetic dataset compared to the original dataset <a href="https://arxiv.org/abs/2306.03257">(Liu et al. 2023)</a>.</p></li></ul><h2>2. Self-instruct</h2><p>The process of training Large Language Models (LLMs) to follow instructions often involves labor-intensive tasks, particularly in dataset creation. OpenAI, for instance, employed numerous annotators to develop an open-domain instruction dataset for training <a href="https://proceedings.neurips.cc/paper_files/paper/2022/hash/b1efde53be364a73914f58805a001731-Abstract-Conference.html">InstructGPT</a>. Conversely, <a href="https://lmsys.org/blog/2023-03-30-vicuna/">Vicuna</a> utilized around 70,000 user-shared conversations from <a href="https://sharegpt.com/">ShareGPT.com</a> to fine-tune a <a href="https://llama.meta.com/">LLaMA</a> model. Human annotation introduces notable challenges. According to <a href="https://arxiv.org/abs/2304.12244">Xao et al. (2023)</a>, these include the tendency of human-created instructions to skew towards easier levels and the issue of annotator fatigue, which limits the production of complex instructions over extended periods. This can lead to LLMs generating <a href="https://arxiv.org/abs/2311.05232">hallucinations</a>. To counter these issues, synthetic open-domain instruction data generation is essential.</p><p>In computational linguistics, Self-instruct was developed by <a href="https://arxiv.org/abs/2212.10560">Wang et al. (2022)</a> as a technique that leverages the inference capabilities of LLMs to generate synthetic, open-domain instruction data. Self-instruct was used in <a href="https://crfm.stanford.edu/2023/03/13/alpaca.html">Alpaca</a>, a fine-tuned LLM from Stanford, and it was a key component in the tech stacks of AI companies like <a href="https://www.continuumlabs.ai/">Continuum Labs</a> and <a href="https://lightning.ai/">Lightning AI</a>. This showcases the technique's practical value and versatility.</p><p>The process of Self-instruct begins with a seed set of manually-written tasks. This initial step is crucial as it lays the foundation for the model to generate new instructions. GPT-3 is chosen for its robust inference capabilities. It's tasked with a simple job: take an instruction and an input, and produce an output.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!ixBn!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdd0e03b1-9513-485a-a976-aae6ee124c9d_2678x1467.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!ixBn!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdd0e03b1-9513-485a-a976-aae6ee124c9d_2678x1467.png 424w, https://substackcdn.com/image/fetch/$s_!ixBn!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdd0e03b1-9513-485a-a976-aae6ee124c9d_2678x1467.png 848w, https://substackcdn.com/image/fetch/$s_!ixBn!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdd0e03b1-9513-485a-a976-aae6ee124c9d_2678x1467.png 1272w, https://substackcdn.com/image/fetch/$s_!ixBn!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdd0e03b1-9513-485a-a976-aae6ee124c9d_2678x1467.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!ixBn!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdd0e03b1-9513-485a-a976-aae6ee124c9d_2678x1467.png" width="1456" height="798" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/dd0e03b1-9513-485a-a976-aae6ee124c9d_2678x1467.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:798,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:337394,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!ixBn!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdd0e03b1-9513-485a-a976-aae6ee124c9d_2678x1467.png 424w, https://substackcdn.com/image/fetch/$s_!ixBn!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdd0e03b1-9513-485a-a976-aae6ee124c9d_2678x1467.png 848w, https://substackcdn.com/image/fetch/$s_!ixBn!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdd0e03b1-9513-485a-a976-aae6ee124c9d_2678x1467.png 1272w, https://substackcdn.com/image/fetch/$s_!ixBn!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdd0e03b1-9513-485a-a976-aae6ee124c9d_2678x1467.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Source <a href="https://arxiv.org/abs/2212.10560">Wang et al. (2022)</a>.</figcaption></figure></div><ol><li><p><strong>Generating Task Instructions. </strong>Starting from a pool of 175 tasks, the model starts generating new instructions. In each iteration, a mix of human-written and model-generated instructions are used to enrich the task pool, ensuring diversity and complexity.</p></li><li><p><strong>Task Discrimination: Classification or Not? </strong>This stage involves discerning whether a task is a classification task. The model is prompted with a mix of classification and non-classification instructions to make this determination, a critical step for tailoring the generation approach.</p></li><li><p><strong>Crafting Input-Output Pairs. </strong>Depending on the task's nature, the model adopts either an <em>input-first </em>or <em>output-first </em>strategy. This bifurcation allows for a more nuanced generation of data, aligning closely with the task's requirements.</p></li><li><p><strong>Filtering and Postprocessing. </strong>The final step involves filtering to ensure quality. Using metrics like <a href="https://en.wikipedia.org/wiki/ROUGE_(metric)">ROGUE-L</a> similarity, the model removes redundant or low-quality instructions, refining the task pool to only include the most unique tasks.</p></li></ol><h4>Pros</h4><ul><li><p><strong>Diverse Instruction Sets.</strong> By generating instructions from the model itself, Self-instruct facilitates the creation of a more diverse and creative set of tasks.</p></li><li><p><strong>Cost-effectiveness and Scalability.</strong> Generating instructions using the model itself is more cost-effective compared to the traditional method of collecting large-scale human-annotated datasets.</p></li><li><p><strong>Reduction in Dependency on Human-Written Instructions.</strong> Self-instruct minimizes this dependency by generating its own instructions.</p></li></ul><h4><strong>Cons</strong></h4><ul><li><p><strong>Model Dependency:</strong> The technique's reliance on a specific LLM inherits its limitations <a href="https://arxiv.org/abs/2212.10560">(Wang et al. 2022)</a>.</p></li><li><p><strong>Bias Reinforcement:</strong> There's a struggle to produce balanced labels, inadvertently amplifying the model's inherent biases <a href="https://arxiv.org/abs/2212.10560">(Wang et al. 2022)</a>.</p></li><li><p><strong>Dataset Growth Plateau:</strong> The exponential growth of new instructions eventually levels off, indicating a saturation point <a href="https://arxiv.org/abs/2212.10560">(Wang et al. 2022)</a>.</p></li></ul><h2>3. Evol-instruct</h2><p>Evol-instruct is a technique revolutionizing the creation of synthetic open-domain instruction data across various difficulty levels. Evol-instruct has been instrumental in projects like <a href="https://ragas.io/">Ragas</a>'s <a href="https://medium.com/machine-learning-intuition/retrieval-augmented-generation-rag-control-your-models-knowledge-and-hallucinations-ea3c6345a659">RAG</a> pipeline and the <a href="https://arxiv.org/abs/2306.08568">WizardCoder</a> LLM, surpassing models like Anthropic's <a href="https://www.anthropic.com/claude">Claude</a> and Google's <a href="https://gemini.google.com/">Bard</a> in performance. Its application continues to expand across AI enterprises, like <a href="https://www.clarifai.com/blog/wizardcoder-large-language-model-for-code">Clarifai</a>, demonstrating its pivotal role in advancing LLM capabilities.</p><p>The core concept involves evolving a known set of instruction-answer pairs over a series of epochs, each designed to enhance the dataset's complexity, richness, and diversity.</p><p>The initial dataset comprises pairs of instruction-reply of the form</p><div class="latex-rendered" data-attrs="{&quot;persistentExpression&quot;:&quot;D_k(0)=(I_k(0),R_k(0))&quot;,&quot;id&quot;:&quot;VRDFVWEMLG&quot;}" data-component-name="LatexBlockToDOM"></div><p>for &#119896;<em> </em>instructions. Each epoch &#119905; updates the dataset to <em>D(t+1) </em>by refining each instruction &#119868;(&#119905;)&#8203; through a prompt to an LLM, which then generates an improved instruction &#119868;(&#119905;+1)&#8203;. This new instruction is used to obtain a corresponding answer <em>R</em>(&#119905;+1). After &#119872; epochs, this iterative process results in multiple datasets &#119863;(1),&#8230;,&#119863;(&#119872;).</p><p>The evolution process bifurcates into two types of prompts: <em>in-depth</em> and <em>in-breadth</em> evolving. In-depth evolving involves enhancing instructions through adding constraints, deepening content, concretizing details, increasing reasoning steps, and complicating inputs. Each modification aims to incrementally raise the instruction's difficulty, with a word limit of 10 to 20.</p><p>In-breadth evolving, on the other hand, focuses on generating new instructions from existing ones to broaden topic and skill coverage and enhance diversity.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!CdMr!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe75e54ac-5668-44f8-8fc5-902d786e6110_1706x1292.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!CdMr!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe75e54ac-5668-44f8-8fc5-902d786e6110_1706x1292.png 424w, https://substackcdn.com/image/fetch/$s_!CdMr!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe75e54ac-5668-44f8-8fc5-902d786e6110_1706x1292.png 848w, https://substackcdn.com/image/fetch/$s_!CdMr!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe75e54ac-5668-44f8-8fc5-902d786e6110_1706x1292.png 1272w, https://substackcdn.com/image/fetch/$s_!CdMr!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe75e54ac-5668-44f8-8fc5-902d786e6110_1706x1292.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!CdMr!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe75e54ac-5668-44f8-8fc5-902d786e6110_1706x1292.png" width="1456" height="1103" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/e75e54ac-5668-44f8-8fc5-902d786e6110_1706x1292.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1103,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:290678,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!CdMr!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe75e54ac-5668-44f8-8fc5-902d786e6110_1706x1292.png 424w, https://substackcdn.com/image/fetch/$s_!CdMr!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe75e54ac-5668-44f8-8fc5-902d786e6110_1706x1292.png 848w, https://substackcdn.com/image/fetch/$s_!CdMr!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe75e54ac-5668-44f8-8fc5-902d786e6110_1706x1292.png 1272w, https://substackcdn.com/image/fetch/$s_!CdMr!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe75e54ac-5668-44f8-8fc5-902d786e6110_1706x1292.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Source <a href="https://arxiv.org/abs/2304.12244">Xao et al. (2023)</a>.</figcaption></figure></div><p>An additional step, <em>Elimination evolving</em>, filters out unsuccessful instruction evolutions. An instruction is deemed unsuccessful if it adds no new information, complicates response generation for the LLM, consists only of punctuation and stop words, or merely copies words from the prompt.</p><p>Upon completing all epochs, the instruction-answer pairs from &#119863;(1),&#8230;,&#119863;(&#119872;) are shuffled randomly to ensure a uniform distribution of varying difficulty levels.</p><h4><strong>Pros</strong></h4><ul><li><p><strong>Variety and Challenge.</strong> The technique excels in generating tasks of varying difficulty, pushing the boundaries of what models can learn <a href="https://arxiv.org/abs/2304.12244">(Xu et al. 2023)</a>.</p></li><li><p><strong>Quality. </strong>Human annotator experiments reveal superior performance on complex instructions, showcasing the high caliber of the synthetic dataset <a href="https://arxiv.org/abs/2304.12244">(Xu et al. 2023)</a>.</p></li></ul><h4>Cons</h4><ul><li><p><strong>Failure Rate in Instruction Evolution.</strong> The Evol-instruct method sometimes fails during the instruction evolution process <a href="https://arxiv.org/abs/2304.12244">(Xu et al. 2023)</a>.</p></li><li><p><strong>Complexity Management.</strong> While Evol-instruct is designed to increase the complexity of instructions, managing this complexity effectively is challenging. There is a risk of generating instructions that are too complex <a href="https://arxiv.org/abs/2304.12244">(Xu et al. 2023)</a>.</p></li><li><p><strong>Quality Control.</strong> Ensuring consistent quality across the evolved instructions is another challenge. The method relies heavily on the initial quality of the seed instructions and the effectiveness of the LLM used in the evolution process <a href="https://arxiv.org/abs/2304.12244">(Xu et al. 2023)</a>.</p></li></ul><div><hr></div><h1><strong>Marginal-based Methods</strong></h1><p>Marginal-based methods focus on modelling the marginal distributions and inter-attribute dependencies in the original data. They typically construct a probabilistic model, such as a graphical model or <a href="https://academic.oup.com/jamia/article-abstract/28/4/801/6046159?redirectedFrom=fulltext&amp;login=false">Bayesian network</a>, to capture these statistical properties. Synthetic data is then generated by sampling from this model. Some marginal-based methods provide rigorous guarantees of differential privacy, making them well-suited for tabular data synthesis when preserving privacy is crucial.</p><p>Marginal-based methods is a nascent field in private synthetic data and it is still looking to be adopted in relation to other competing techniques like GANs. <a href="https://www.tmlt.io/">Tumult Labs</a> is looking at these techniques and <a href="https://www.tmlt.io/resources/benchmarking-differentially-private-synthetic-data-generation-algorithms">currently researching</a> its potential.</p><h2><strong>1. Multiplicative Weights Exponential Mechanism (MWEM)</strong></h2><p>The MWEM (Multiplicative Weights Exponential Mechanism) algorithm proposed by <a href="https://arxiv.org/abs/1012.4763">Hardt et al. (2010)</a> is designed for differentially private data release, particularly focusing on producing synthetic datasets that respect differential privacy while answering a set of linear queries. The algorithm is a combination of the multiplicative weights update rule and the <a href="https://en.wikipedia.org/wiki/Exponential_mechanism">Exponential Mechanism</a>, which are used to iteratively refine an approximation of a dataset in a way that balances privacy and accuracy.</p><p>Let <em>D</em> be a dataset with <em>d</em> columns. A marginal for a subset <em>r</em> of the <em>d</em> columns is a histogram for <em>r</em>, that is, a table that counts the number of occurrences of tuples with columns in <em>r</em>. A marginal is often referred to as a <em>marginal query</em>. A <em>workload</em> <em>W</em> is a collection of marginal queries. A marginal-based method takes a workload as input, then it adapts intelligently to the queries in the workload, and generates synthetic data that is tailored to these queries.</p><p>The MWEM has three main components. A Multiplicative Weights Update Rule that is used to adjust the weights of an approximating dataset to better reflect the true dataset based on the discrepancies in query responses. The <a href="https://en.wikipedia.org/wiki/Exponential_mechanism">Exponential Mechanism</a> selects the most informative queries to improve the dataset approximation. The <a href="https://en.wikipedia.org/wiki/Additive_noise_differential_privacy_mechanisms">Laplace Mechanism</a> for adding noise to the query results to ensure differential privacy.</p><p>The MWEM algorithm proceeds as shown in the figure below.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!kgjH!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F99edc5c6-0bce-4720-86e7-f11c3f170126_1644x566.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!kgjH!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F99edc5c6-0bce-4720-86e7-f11c3f170126_1644x566.png 424w, https://substackcdn.com/image/fetch/$s_!kgjH!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F99edc5c6-0bce-4720-86e7-f11c3f170126_1644x566.png 848w, https://substackcdn.com/image/fetch/$s_!kgjH!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F99edc5c6-0bce-4720-86e7-f11c3f170126_1644x566.png 1272w, https://substackcdn.com/image/fetch/$s_!kgjH!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F99edc5c6-0bce-4720-86e7-f11c3f170126_1644x566.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!kgjH!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F99edc5c6-0bce-4720-86e7-f11c3f170126_1644x566.png" width="1456" height="501" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/99edc5c6-0bce-4720-86e7-f11c3f170126_1644x566.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:501,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:67176,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!kgjH!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F99edc5c6-0bce-4720-86e7-f11c3f170126_1644x566.png 424w, https://substackcdn.com/image/fetch/$s_!kgjH!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F99edc5c6-0bce-4720-86e7-f11c3f170126_1644x566.png 848w, https://substackcdn.com/image/fetch/$s_!kgjH!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F99edc5c6-0bce-4720-86e7-f11c3f170126_1644x566.png 1272w, https://substackcdn.com/image/fetch/$s_!kgjH!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F99edc5c6-0bce-4720-86e7-f11c3f170126_1644x566.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>It starts with an initial approximation of the dataset, typically a uniform distribution over the data domain. Then it uses the exponential mechanism to select a query that is poorly explained by the current approximation. Then the selected query on the true dataset is measured, adding noise via the Laplace mechanism to ensure privacy. These steps of select-measure are repeated for a number of iterations. Finally, the output is an average of the approximations across all iterations, which forms the synthetic dataset.</p><p>An improved algorithm based on MWEM was developed by <a href="https://arxiv.org/abs/2201.12677">McKenna et al. (2022)</a>, where MWEM is enhanced with the <a href="https://arxiv.org/abs/1901.09136">Probabilistic Graphical Model</a> or PGM and the <a href="https://en.wikipedia.org/wiki/Additive_noise_differential_privacy_mechanisms">Gaussian mechanism</a> is used instead of the Laplace mechanism.</p><h4><strong>Pros</strong></h4><ul><li><p><strong>Privacy.</strong> MWEM is designed with differential privacy mechanisms <a href="https://arxiv.org/abs/2201.12677">(McKenna et al. 2022)</a>.</p></li><li><p><strong>Effectiveness.</strong> <a href="https://arxiv.org/abs/2201.12677">MWEM+PGM</a> is currently the best method for generating synthetic data with privacy guarantees <a href="https://arxiv.org/abs/2112.09238">(Tao et al. 2023)</a>.</p></li></ul><h4><strong>Cons</strong></h4><ul><li><p><strong>Limited data types.</strong> MWEM have only been tested on discrete data. Much research still needs to be done in categorical and numerical data <a href="https://arxiv.org/abs/2201.12677">(McKenna et al. 2022)</a>.</p></li><li><p><strong>Lack of maturity.</strong> The field of marginal-based methods is very recent, and still much research is needed to have a good understanding of its impact and applications <a href="https://www.jair.org/index.php/jair/article/view/14649">(Ponomareva et al. 2023)</a>.</p></li></ul><h2>2. <strong>PrivBayes</strong></h2><p>PrivBayes is a differentially private method proposed by <a href="https://dl.acm.org/doi/abs/10.1145/3134428">Zhang et al. (2017)</a> for releasing high-dimensional data, which is effective when dealing with datasets that contain a large number of attributes. The algorithm operates in three main phases: network learning, distribution learning, and data synthesis.</p><p>In the first phase, PrivBayes constructs a <a href="https://en.wikipedia.org/wiki/Bayesian_network">Bayesian network</a> that approximates the full-dimensional distribution of the dataset. This network is built using a differentially private method, ensuring that the privacy of the data is maintained. The Bayesian network is a graphical model that represents a set of variables and their conditional dependencies via a directed acyclic graph.</p><p>Once the Bayesian network is established, PrivBayes computes a set of differentially private conditional distributions for the data in the subspaces defined by the network. This involves adding noise to the distributions to ensure differential privacy, typically using mechanisms like the <a href="https://en.wikipedia.org/wiki/Additive_noise_differential_privacy_mechanisms">Laplace mechanism</a>.</p><p>In the final phase, PrivBayes uses the noisy conditional distributions and the structure of the Bayesian network to generate a synthetic dataset. This dataset is an approximation of the original data but is constructed in such a way that it maintains the privacy of the individuals in the dataset.</p><h4><strong>Pros</strong></h4><ul><li><p><strong>Handling High-Dimensional Data.</strong> PrivBayes handles the challenge of releasing high-dimensional data, which is always a barrier for traditional differential privacy methods.</p></li><li><p><strong>Reduced Noise Addition.</strong> By focusing on low-dimensional marginals, PrivBayes can inject less noise resulting in higher utility of the released data.</p></li><li><p><strong>Flexibility in Query Evaluation.</strong> The synthetic data generated by PrivBayes can support a wide range of queries allowing the estimation of various statistical properties.</p></li><li><p><strong>Scalability</strong>. The method scales well with the size of the data and the number of attributes, making it suitable for large datasets.</p></li></ul><h4><strong>Cons</strong></h4><ul><li><p><strong>Complexity in Network Construction.</strong> The process of learning a differentially private Bayesian network is complex and can be computationally intensive.</p></li><li><p><strong>Accuracy Dependence on Network Quality.</strong> The accuracy of the synthetic data heavily depends on the quality of the Bayesian network constructed.</p></li><li><p><strong>Parameter Sensitivity</strong>. The performance of PrivBayes can be sensitive to the choice of parameters, such as the privacy budget allocated to different phases of the algorithm.</p></li></ul><div><hr></div><h2>TLDR</h2><p>This is a quick summary of the key points covered in the article about different techniques for generating synthetic data for machine learning.</p><p><strong>Generative Methods</strong> use AI models to generate new synthetic data. Popular techniques include Variational Autoencoders (VAEs), Generative Adversarial Networks (GANs), and Diffusion Models. VAEs compress data into a lower-dimensional space and then reconstruct it, allowing for data generation. GANs have two competing AI models, a generator creating fake data and a discriminator identifying real vs. fake data. Diffusion Models gradually add noise to an image and then learn to reverse the process, generating new images.</p><p><strong>Evolutionary Methods</strong> iteratively construct synthetic data from a seed dataset by applying operations like combination and mutation. Techniques like Private-GSD use genetic algorithms, while Self-instruct and Evol-instruct leverage large language models to generate and evolve open-domain instruction data.</p><p><strong>Marginal-based Methods</strong> model the marginal distributions and inter-attribute dependencies in the original data using probabilistic models like graphical models or Bayesian networks. Synthetic data is then generated by sampling from these models. Techniques like MWEM and PrivBayes provide differential privacy guarantees, making them suitable for sensitive tabular data.</p><p>The table below summarizes the different aspects of the methods.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!61rv!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F348716d8-5667-405b-9e12-5f401b8e3681_2346x860.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!61rv!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F348716d8-5667-405b-9e12-5f401b8e3681_2346x860.png 424w, https://substackcdn.com/image/fetch/$s_!61rv!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F348716d8-5667-405b-9e12-5f401b8e3681_2346x860.png 848w, https://substackcdn.com/image/fetch/$s_!61rv!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F348716d8-5667-405b-9e12-5f401b8e3681_2346x860.png 1272w, https://substackcdn.com/image/fetch/$s_!61rv!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F348716d8-5667-405b-9e12-5f401b8e3681_2346x860.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!61rv!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F348716d8-5667-405b-9e12-5f401b8e3681_2346x860.png" width="1456" height="534" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/348716d8-5667-405b-9e12-5f401b8e3681_2346x860.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:534,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:215201,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!61rv!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F348716d8-5667-405b-9e12-5f401b8e3681_2346x860.png 424w, https://substackcdn.com/image/fetch/$s_!61rv!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F348716d8-5667-405b-9e12-5f401b8e3681_2346x860.png 848w, https://substackcdn.com/image/fetch/$s_!61rv!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F348716d8-5667-405b-9e12-5f401b8e3681_2346x860.png 1272w, https://substackcdn.com/image/fetch/$s_!61rv!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F348716d8-5667-405b-9e12-5f401b8e3681_2346x860.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><p>Synthetic data is disrupting the AI industry status quo. As techniques like generative models, evolutionary methods, and marginal-based approaches continue to advance, the potential applications of synthetic data are boundless. Embracing this technology can empower businesses to unlock more accessible, reliable, and impactful AI.</p>]]></content:encoded></item><item><title><![CDATA[The Inference Interference]]></title><description><![CDATA[How to design verifiable inference that&#8217;s not slow]]></description><link>https://blog.bagel.com/p/the-inference-interference</link><guid isPermaLink="false">https://blog.bagel.com/p/the-inference-interference</guid><dc:creator><![CDATA[Bidhan Roy]]></dc:creator><pubDate>Thu, 11 Apr 2024 13:58:35 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!kqUN!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd18229c6-c015-453a-a593-f1ba5acf2908_2245x1587.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>Blind trust in black-box AI apps is a ticking time bomb. It threatens to destroy our faith in the technology. Take ChatGPT. Users are forced to rely on unverifiable claims about what models are generating their inferences. Recent <a href="https://news.ycombinator.com/item?id=36633995">performance degradation of GPT-4</a> has heightened suspicions. People wonder if OpenAI <a href="https://www.reddit.com/r/ChatGPT/comments/17nzewn/a_theory_on_why_gpt4_got_worse/">even using the advertised model version</a>. This erodes user confidence. This highlights the urgent need for verifiability.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!kqUN!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd18229c6-c015-453a-a593-f1ba5acf2908_2245x1587.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!kqUN!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd18229c6-c015-453a-a593-f1ba5acf2908_2245x1587.png 424w, https://substackcdn.com/image/fetch/$s_!kqUN!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd18229c6-c015-453a-a593-f1ba5acf2908_2245x1587.png 848w, https://substackcdn.com/image/fetch/$s_!kqUN!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd18229c6-c015-453a-a593-f1ba5acf2908_2245x1587.png 1272w, https://substackcdn.com/image/fetch/$s_!kqUN!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd18229c6-c015-453a-a593-f1ba5acf2908_2245x1587.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!kqUN!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd18229c6-c015-453a-a593-f1ba5acf2908_2245x1587.png" width="1456" height="1029" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/d18229c6-c015-453a-a593-f1ba5acf2908_2245x1587.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1029,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:4878712,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!kqUN!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd18229c6-c015-453a-a593-f1ba5acf2908_2245x1587.png 424w, https://substackcdn.com/image/fetch/$s_!kqUN!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd18229c6-c015-453a-a593-f1ba5acf2908_2245x1587.png 848w, https://substackcdn.com/image/fetch/$s_!kqUN!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd18229c6-c015-453a-a593-f1ba5acf2908_2245x1587.png 1272w, https://substackcdn.com/image/fetch/$s_!kqUN!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd18229c6-c015-453a-a593-f1ba5acf2908_2245x1587.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Decentralized AI networks amplify this problem, as nodes host various models and compete for users' attention and money. We need a way to verify their claims and protect clients from fraud. For example, paying top dollar for the latest LLAMA outputs but getting inferior model results. The stakes couldn't be higher in a decentralized AI ecosystem.</p><p>The solution to this is <strong>Verifiable inference</strong>, using mechanisms to verify the use of specific models for generating inferences. This enables honest competition and protects clients.</p><p>While zero-knowledge proofs as a solution have been explored extensively by the decentralized AI community, and we discussed it in-depth <a href="https://blog.bagel.net/p/with-great-data-comes-great-responsibility">in a previous article</a>, they are currently too slow and expensive. At Bagel, we have investigated more practical and widely used alternatives from the traditional AI world, such as <strong>watermarking</strong> (<a href="https://www.usenix.org/conference/usenixsecurity21/presentation/jia">Jia et al. 2021</a>) and <strong>fingerprinting</strong> (<a href="https://dl.acm.org/doi/abs/10.1145/3323873.3325042">Chen et al. 2019</a>). Initially developed to prevent model extraction attacks, these methods also serve as unique model identifiers thus enabling verifiability. Compared to zero-knowledge approaches, they offer enhanced efficiency and ease of use, better aligning with current applications of verifiable deep learning models.</p><p>Today, we're open sourcing our research. Our goal is to empower the decentralized AI community and inspire builders to explore diverse, high-performance solutions from traditional AI. Together, we can create a more robust decentralized AI ecosystem that benefits the mainstream AI market.</p><p><em><strong>If you're in a rush, we have a TLDR at the end.</strong></em></p><div><hr></div><h2>Watermarking</h2><h3>How it works</h3><p>In the landscape of machine learning, protecting foundation model as an intellectual property (IP) rights is a paramount concern. Enter watermarks &#8211; a clever defensive mechanism against <a href="https://iopscience.iop.org/article/10.1088/1742-6596/2189/1/012024/meta">model extraction attacks</a>. Just like classical digital watermarking, a form of <a href="https://en.wikipedia.org/wiki/Steganography">steganography</a> where a message is concealed within a digital object, watermarks for machine learning models serve as a guardian of ownership, authenticity, and integrity. They help safeguard IP and enforce licenses, ensuring that the hard work of model creators doesn't go unrecognized.</p><p><a href="https://ieeexplore.ieee.org/abstract/document/10143370">Lederer et al. (2023)</a> outline a plethora of characteristics that a watermark for a machine learning model must possess, but three stand out as the most crucial:</p><p><strong>Effectiveness</strong> - Effectiveness means that the watermark can be verified at any time by the model's creator.</p><p><strong>Fidelity -</strong> Fidelity ensures that the model's accuracy remains unaffected by the watermark's presence.</p><p><strong>Robustness</strong> - That's the watermark's ability to withstand a barrage of attacks, from fine-tuning and model compression to watermark detection, removal, overwriting, or invalidation.</p><p>But how does the magic happen? Any watermarking embedding method consists of two algorithms: an extraction algorithm that retrieves the watermark from a model, and a verification algorithm that confirms its presence.</p><p>There are <a href="https://ieeexplore.ieee.org/abstract/document/10143370">two main families of watermark embedding techniques</a>: i) white-box watermarking and, ii) black-box watermarking. The general process of watermarking is illustrated in the figure below.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!LV_H!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F13503ea5-a11a-40d6-8467-e1336b8ee9a4_1962x1035.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!LV_H!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F13503ea5-a11a-40d6-8467-e1336b8ee9a4_1962x1035.png 424w, https://substackcdn.com/image/fetch/$s_!LV_H!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F13503ea5-a11a-40d6-8467-e1336b8ee9a4_1962x1035.png 848w, https://substackcdn.com/image/fetch/$s_!LV_H!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F13503ea5-a11a-40d6-8467-e1336b8ee9a4_1962x1035.png 1272w, https://substackcdn.com/image/fetch/$s_!LV_H!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F13503ea5-a11a-40d6-8467-e1336b8ee9a4_1962x1035.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!LV_H!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F13503ea5-a11a-40d6-8467-e1336b8ee9a4_1962x1035.png" width="1456" height="768" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/13503ea5-a11a-40d6-8467-e1336b8ee9a4_1962x1035.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:768,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:264456,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!LV_H!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F13503ea5-a11a-40d6-8467-e1336b8ee9a4_1962x1035.png 424w, https://substackcdn.com/image/fetch/$s_!LV_H!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F13503ea5-a11a-40d6-8467-e1336b8ee9a4_1962x1035.png 848w, https://substackcdn.com/image/fetch/$s_!LV_H!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F13503ea5-a11a-40d6-8467-e1336b8ee9a4_1962x1035.png 1272w, https://substackcdn.com/image/fetch/$s_!LV_H!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F13503ea5-a11a-40d6-8467-e1336b8ee9a4_1962x1035.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>White-box watermarking requires full access to the model. The idea is to embed a signature <em><strong>s</strong></em> into the model's weights during training by adding an extra term to the loss function. This is done carefully to maintain the model's accuracy. The embedding is achieved by modifying the loss function through a <a href="https://arxiv.org/abs/1802.02601">regularizer parameter</a>. Let <em><strong>w</strong></em> be a vector of all weights in a model. The goal is to embed <em><strong>s</strong></em> into <em><strong>w</strong></em> using an embedding matrix <em><strong>M</strong></em>, which acts as a secret key usually held by the model owner. The watermark is extracted by applying <em><strong>M</strong></em> to the weight vector <em><strong>w</strong></em>, followed by a threshold function (see <a href="https://arxiv.org/abs/1802.02601">Nagai et al. (2018)</a> for details).</p><p>Black-box watermarking, on the other hand, only requires query access to the model. It creates backdoors on data using <a href="https://proceedings.neurips.cc/paper/2017/hash/9d7311ba459f9e45ed746755a32dcd11-Abstract.html">data poisoning</a>. In this context, a backdoor is a set of input-output pairs known to the model owner that triggers a behavior not predictable by model consumers. For example, deliberately adding wrong labels to data points. The goal is to ensure that the model performs correctly on the main classification task, but the backdoor exhibits a specific behavior defined by the model owner.</p><p>Next we show two applications of watermarks in the context of verifying ownership of models and verifying inference from models.</p><h3>Watermarking for Verifiable Inference</h3><h4>Public Models</h4><p>Black-box watermarking is a game-changer when it comes to verifiable inference for public models. It's like a secret handshake between the model creator and the user, ensuring that the model is authentic and trustworthy.</p><p>Here's how it works: the model creator embeds a watermark and then discloses its presence to the world. It's like a badge of honor, a mark of quality. Any user who interacts with the model can then authenticate it, verifying that it's the real deal.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!THJT!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9619d517-1468-4ae0-a1a4-a2778c967dd2_2600x1181.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!THJT!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9619d517-1468-4ae0-a1a4-a2778c967dd2_2600x1181.png 424w, https://substackcdn.com/image/fetch/$s_!THJT!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9619d517-1468-4ae0-a1a4-a2778c967dd2_2600x1181.png 848w, https://substackcdn.com/image/fetch/$s_!THJT!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9619d517-1468-4ae0-a1a4-a2778c967dd2_2600x1181.png 1272w, https://substackcdn.com/image/fetch/$s_!THJT!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9619d517-1468-4ae0-a1a4-a2778c967dd2_2600x1181.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!THJT!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9619d517-1468-4ae0-a1a4-a2778c967dd2_2600x1181.png" width="1456" height="661" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/9619d517-1468-4ae0-a1a4-a2778c967dd2_2600x1181.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:661,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:299372,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!THJT!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9619d517-1468-4ae0-a1a4-a2778c967dd2_2600x1181.png 424w, https://substackcdn.com/image/fetch/$s_!THJT!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9619d517-1468-4ae0-a1a4-a2778c967dd2_2600x1181.png 848w, https://substackcdn.com/image/fetch/$s_!THJT!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9619d517-1468-4ae0-a1a4-a2778c967dd2_2600x1181.png 1272w, https://substackcdn.com/image/fetch/$s_!THJT!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9619d517-1468-4ae0-a1a4-a2778c967dd2_2600x1181.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><a href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7206275/">Zhong et al. (2020)</a> took this concept to the next level. They developed a black-box watermarking technique that adds new labels to inputs that have nothing to do with the original dataset. It's like adding a secret code that only the model creator knows.</p><p>When a user wants to verify the watermark, they simply query these special inputs. If the watermark is present in the inference results, it's a clear sign that the model is authentic. It's like a digital signature that can't be forged.</p><p><a href="https://arxiv.org/abs/1904.00344">Chen et al. (2019)</a> took a slightly different approach. They generated watermark keys and used fine-tuning to embed a signature in the model. It's like hiding a secret message in plain sight.</p><p>With the watermark keys in hand, any user can interact with the model and extract the signature through its predictions. It's like unlocking a hidden layer of authentication, ensuring that the model is genuine and trustworthy.</p><h4>Private Models</h4><p>When a machine learning model is privately hosted, the mechanism is almost the same, black-box watermarking is used, except the model owner hosts the model privately. Model consumers gets access to the model via and API or gateway. First the model creator runs the watermark algorithm on his model and loads it to the hosting service. Then, via API access, users can query the model via inputs and obtains inferences as output. The backdoor is still public, so users can later verify the correct use of the model.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!1ew9!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2ba4bd8d-7e7f-45d3-8c20-9771cb9f3853_2612x1108.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!1ew9!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2ba4bd8d-7e7f-45d3-8c20-9771cb9f3853_2612x1108.png 424w, https://substackcdn.com/image/fetch/$s_!1ew9!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2ba4bd8d-7e7f-45d3-8c20-9771cb9f3853_2612x1108.png 848w, https://substackcdn.com/image/fetch/$s_!1ew9!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2ba4bd8d-7e7f-45d3-8c20-9771cb9f3853_2612x1108.png 1272w, https://substackcdn.com/image/fetch/$s_!1ew9!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2ba4bd8d-7e7f-45d3-8c20-9771cb9f3853_2612x1108.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!1ew9!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2ba4bd8d-7e7f-45d3-8c20-9771cb9f3853_2612x1108.png" width="1456" height="618" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/2ba4bd8d-7e7f-45d3-8c20-9771cb9f3853_2612x1108.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:618,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:306432,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!1ew9!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2ba4bd8d-7e7f-45d3-8c20-9771cb9f3853_2612x1108.png 424w, https://substackcdn.com/image/fetch/$s_!1ew9!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2ba4bd8d-7e7f-45d3-8c20-9771cb9f3853_2612x1108.png 848w, https://substackcdn.com/image/fetch/$s_!1ew9!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2ba4bd8d-7e7f-45d3-8c20-9771cb9f3853_2612x1108.png 1272w, https://substackcdn.com/image/fetch/$s_!1ew9!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2ba4bd8d-7e7f-45d3-8c20-9771cb9f3853_2612x1108.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>With knowledge of the backdoor, then users can run the verification algorithm to check for the watermark. The verification algorithm will tell if the correct model was used or not.</p><p>For private models offering API access, <a href="https://arxiv.org/abs/1911.08053">Xu and Yuan (2019)</a> showed how to add unique serial numbers to the  trigger-set of watermarks. Their serial number technique is independent of labels and can be supported by digital certification authorities.</p><div><hr></div><h2>Fingerprinting</h2><h3>How it works</h3><p>Fingerprinting is a fascinating approach to model identification that differs from watermarking in a key way. <em>While watermarking embeds a secret message into the model, fingerprinting relies on the model's inherent characteristics to create a unique identifier</em> (<a href="https://dl.acm.org/doi/abs/10.1145/3323873.3325042">Chen et al., 2019</a>; <a href="https://arxiv.org/abs/1912.00888">Lukas et al., 2019</a>). This identifier, akin to a digital DNA, can be transferred to any models derived from the original, making it a robust tool for proving model provenance.</p><p>A fingerprinting scheme consists of two essential components: a generation algorithm for creating fingerprints and a verification algorithm for confirming their presence.</p><ul><li><p><code>Generate(M,D)</code>: Given white-box access to a model <code>M</code> and a dataset <code>D</code>, this procedure outputs a fingerprint <code>F</code> and verification keys <code>K={M(x) : x in F}</code>.</p></li></ul><p>The figure below illustrates the process of generating fingerprints.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!1GJt!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b5ff4fd-5a43-40aa-8f31-0cada18ec1d7_1848x633.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!1GJt!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b5ff4fd-5a43-40aa-8f31-0cada18ec1d7_1848x633.png 424w, https://substackcdn.com/image/fetch/$s_!1GJt!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b5ff4fd-5a43-40aa-8f31-0cada18ec1d7_1848x633.png 848w, https://substackcdn.com/image/fetch/$s_!1GJt!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b5ff4fd-5a43-40aa-8f31-0cada18ec1d7_1848x633.png 1272w, https://substackcdn.com/image/fetch/$s_!1GJt!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b5ff4fd-5a43-40aa-8f31-0cada18ec1d7_1848x633.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!1GJt!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b5ff4fd-5a43-40aa-8f31-0cada18ec1d7_1848x633.png" width="1456" height="499" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/9b5ff4fd-5a43-40aa-8f31-0cada18ec1d7_1848x633.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:499,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:212712,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!1GJt!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b5ff4fd-5a43-40aa-8f31-0cada18ec1d7_1848x633.png 424w, https://substackcdn.com/image/fetch/$s_!1GJt!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b5ff4fd-5a43-40aa-8f31-0cada18ec1d7_1848x633.png 848w, https://substackcdn.com/image/fetch/$s_!1GJt!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b5ff4fd-5a43-40aa-8f31-0cada18ec1d7_1848x633.png 1272w, https://substackcdn.com/image/fetch/$s_!1GJt!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b5ff4fd-5a43-40aa-8f31-0cada18ec1d7_1848x633.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><ul><li><p><code>Verify(M&#8217;(F),K)</code>: Given black-box access to a model <code>M&#8217;</code>, fingerprint <code>F</code>, and verification keys <code>K</code>, this procedure outputs 1 if <code>M&#8217;</code> is verified by the fingerprint and 0 otherwise.</p></li></ul><p>Generating fingerprints is a complex task that requires a deep understanding of the model and its training data. One approach is to use <em>adversarial examples</em>. By adding carefully crafted noise to a correctly predicted data point, the model can be tricked into predicting a desired label. The data point and noise pair form an adversarial example, which can serve as a fingerprint.</p><p><a href="https://arxiv.org/pdf/1910.12903.pdf">Cao et al. (2019)</a> demonstrated how to construct adversarial examples near the model's decision boundary and leverage their transferability to surrogate models. <a href="https://openaccess.thecvf.com/content/CVPR2022/papers/Peng_Fingerprinting_Deep_Neural_Networks_Globally_via_Universal_Adversarial_Perturbations_CVPR_2022_paper.pdf">Peng et al. (2022)</a> employed <em>Universal Adversarial Perturbations</em> (UAPs), which are vectors drawn from a low-dimensional subspace containing most normal vectors of the <a href="https://en.wikipedia.org/wiki/Decision_boundary">decision boundary</a>. UAPs can function as a model's fingerprint, allowing the owner to verify if a given UAP vector <em><strong>v</strong></em> lies within the suspect model's UAP space.</p><p>In the following section, we will explore how fingerprints can be applied to verifiable inference. Fingerprinting provides a unique method for identifying models based on their inherent characteristics, distinguishing it from watermarking's embedded secret messages. As we delve further into this captivating topic, we will uncover the power of fingerprinting in ensuring model provenance.</p><h3>Fingerprinting for Verifiable Inference</h3><h4>Public Models</h4><p>When a model is public, the creator generates a fingerprint and keys, making them accessible to all. Consumers can download the model, fingerprint, and keys as a package.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!oOge!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10cf374a-440b-435d-bfbb-9c3d592bd14c_1852x973.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!oOge!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10cf374a-440b-435d-bfbb-9c3d592bd14c_1852x973.png 424w, https://substackcdn.com/image/fetch/$s_!oOge!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10cf374a-440b-435d-bfbb-9c3d592bd14c_1852x973.png 848w, https://substackcdn.com/image/fetch/$s_!oOge!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10cf374a-440b-435d-bfbb-9c3d592bd14c_1852x973.png 1272w, https://substackcdn.com/image/fetch/$s_!oOge!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10cf374a-440b-435d-bfbb-9c3d592bd14c_1852x973.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!oOge!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10cf374a-440b-435d-bfbb-9c3d592bd14c_1852x973.png" width="1456" height="765" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/10cf374a-440b-435d-bfbb-9c3d592bd14c_1852x973.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:765,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:327845,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!oOge!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10cf374a-440b-435d-bfbb-9c3d592bd14c_1852x973.png 424w, https://substackcdn.com/image/fetch/$s_!oOge!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10cf374a-440b-435d-bfbb-9c3d592bd14c_1852x973.png 848w, https://substackcdn.com/image/fetch/$s_!oOge!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10cf374a-440b-435d-bfbb-9c3d592bd14c_1852x973.png 1272w, https://substackcdn.com/image/fetch/$s_!oOge!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10cf374a-440b-435d-bfbb-9c3d592bd14c_1852x973.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>With full access, consumers can modify the model through fine-tuning or compression. But how can they verify it's the genuine article?</p><p>Enter the verification algorithm. By running it on the query results and fingerprint, consumers can confirm the model's authenticity.</p><p>For public models, unique identifier fingerprints are crucial. <a href="https://arxiv.org/abs/1912.00888">Lukas et al. (2021)</a> proposed a game-changing technique: <em>conferrable examples</em>. These crafted examples act as adversarial inputs not just for the target model, but also for any imitators. They transfer to surrogates but not to independently trained models.</p><p><a href="https://www.sciencedirect.com/science/article/abs/pii/S014036641931686X">Zhao et al. (2020)</a> introduced <em>adversarial marks</em>, another transferrable fingerprint that can't be removed without sacrificing significant accuracy.</p><p>These innovative techniques ensure that public models remain trustworthy and authentic. They give consumers the power to verify, while creators can share their work with confidence.</p><p>In a world where AI models are increasingly open and accessible, fingerprinting is a vital tool. It's the key to maintaining trust and integrity in the face of modification and imitation.</p><p>So, the next time you download a public model, look for the fingerprint. It's your guarantee of authenticity in an ever-evolving AI landscape.</p><h4>Private Models</h4><p>Model creators may keep their models private to protect intellectual property rights. When generating a fingerprint, they can host the model privately and provide consumer access through a public API.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!F4MP!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8042c9e5-c6ee-40b5-9494-d656831db9f1_2147x1002.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!F4MP!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8042c9e5-c6ee-40b5-9494-d656831db9f1_2147x1002.png 424w, https://substackcdn.com/image/fetch/$s_!F4MP!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8042c9e5-c6ee-40b5-9494-d656831db9f1_2147x1002.png 848w, https://substackcdn.com/image/fetch/$s_!F4MP!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8042c9e5-c6ee-40b5-9494-d656831db9f1_2147x1002.png 1272w, https://substackcdn.com/image/fetch/$s_!F4MP!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8042c9e5-c6ee-40b5-9494-d656831db9f1_2147x1002.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!F4MP!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8042c9e5-c6ee-40b5-9494-d656831db9f1_2147x1002.png" width="1456" height="680" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/8042c9e5-c6ee-40b5-9494-d656831db9f1_2147x1002.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:680,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:360156,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!F4MP!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8042c9e5-c6ee-40b5-9494-d656831db9f1_2147x1002.png 424w, https://substackcdn.com/image/fetch/$s_!F4MP!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8042c9e5-c6ee-40b5-9494-d656831db9f1_2147x1002.png 848w, https://substackcdn.com/image/fetch/$s_!F4MP!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8042c9e5-c6ee-40b5-9494-d656831db9f1_2147x1002.png 1272w, https://substackcdn.com/image/fetch/$s_!F4MP!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8042c9e5-c6ee-40b5-9494-d656831db9f1_2147x1002.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The verification algorithm for fingerprints always works on black-box access to the model. With public knowledge of the fingerprint, any consumer can verify it using the API.</p><p>Private models with public APIs are crucial for giving users access to LLMs like ChatGPT and Claude. However, LLMs pose a challenge in generating fingerprints due to their vast number of parameters.</p><p>Recent work by <a href="https://arxiv.org/abs/2210.07543">Gu et al. (2002)</a>, <a href="https://ojs.aaai.org/index.php/AAAI/article/view/26750">Li et al. (2023)</a>, and <a href="https://arxiv.org/abs/2401.12255">Xu et al. (2024)</a> demonstrates how to construct fingerprints for LLMs. These methods implant input-output pairs that exploit the model's inherent characteristics.</p><p><a href="https://arxiv.org/abs/2401.12255">Xu et al. (2024)</a> presents the fastest fingerprinting method, claiming to fingerprint LLAMA2-13B in less than a minute using a single A100 GPU. Their technical contribution is that their methods can work in either white-box mode using an F-transformer or black-box mode using fine-tuning.</p><p>As AI continues to evolve, fingerprinting will play a vital role in ensuring the integrity of private models accessed through public APIs. It's a powerful tool that strikes a balance between accessibility and security.</p><div><hr></div><h1>TLDR</h1><p>Verified inference is crucial in <a href="https://ieeexplore.ieee.org/abstract/document/7424435">MLaaS</a>, both centralized and decentralized. Consumers should be able to confirm the model generating the inference is the one they requested and paid for.</p><p>ZKML proves a model was executed on provided data, <a href="https://blog.bagel.net/p/with-great-data-comes-great-responsibility#%C2%A7tldr">but compromises data privacy</a>. Current ZK proof generation techniques are time-consuming due to cryptographic operations on top of the computation being proved (<a href="https://eprint.iacr.org/2023/1345">Garg et al., 2023</a>).</p><p>ZKML allows perfect proofs of inference and provenance but struggles with real-time proofs for relevant deep neural networks like LLMs (<a href="https://github.com/openai/gpt-2">GPT2</a>, <a href="https://llama.meta.com/">Llama</a>).</p><p>Watermarking and fingerprinting fill the gaps. Practical and effective inference verification is a must in decentralized AI where trust cannot be assumed. <a href="https://arxiv.org/abs/2401.12255">Xu et al. (2024)</a> generated fingerprints in the largest LLMs in <strong>under one minute</strong>.</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!uJNn!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe494999c-c486-4914-895d-30dcfca9bc14_2030x498.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!uJNn!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe494999c-c486-4914-895d-30dcfca9bc14_2030x498.png 424w, https://substackcdn.com/image/fetch/$s_!uJNn!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe494999c-c486-4914-895d-30dcfca9bc14_2030x498.png 848w, https://substackcdn.com/image/fetch/$s_!uJNn!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe494999c-c486-4914-895d-30dcfca9bc14_2030x498.png 1272w, https://substackcdn.com/image/fetch/$s_!uJNn!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe494999c-c486-4914-895d-30dcfca9bc14_2030x498.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!uJNn!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe494999c-c486-4914-895d-30dcfca9bc14_2030x498.png" width="1456" height="357" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/e494999c-c486-4914-895d-30dcfca9bc14_2030x498.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:357,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:116176,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!uJNn!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe494999c-c486-4914-895d-30dcfca9bc14_2030x498.png 424w, https://substackcdn.com/image/fetch/$s_!uJNn!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe494999c-c486-4914-895d-30dcfca9bc14_2030x498.png 848w, https://substackcdn.com/image/fetch/$s_!uJNn!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe494999c-c486-4914-895d-30dcfca9bc14_2030x498.png 1272w, https://substackcdn.com/image/fetch/$s_!uJNn!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe494999c-c486-4914-895d-30dcfca9bc14_2030x498.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>Verifiable inference is achievable through all three techniques. Watermarking and fingerprinting provide extraction and verification algorithms, while ZK offers proofs of knowledge.</p><p>Efficiency varies significantly. ZK is the slowest, while watermarking and fingerprinting can be fast, making them more applicable considering current state-of-the-art.</p><p><a href="https://en.wikipedia.org/wiki/Knowledge_distillation">Knowledge distillation</a>, a model compression attack, is a threat watermarking and fingerprinting are designed to protect against by passing the identifying markers to the compressed model. ZK does not address this issue, which is significant in the traditional AI industry and will be covered in a future article.</p><p>Output accountability is where ZK shines. While watermarks and fingerprints can establish ownership, they don't inherently ensure model integrity or output correctness. ZKPs prove a model was evaluated correctly on specific inputs, enabling accountability (<a href="https://ieeexplore.ieee.org/abstract/document/10143370">Lederer et al., 2023</a>; <a href="https://dl.acm.org/doi/full/10.1145/3595292">Oliynik et al., 2023</a>).</p><div><hr></div><p>As AI reshapes our world, we can't build the future on blind trust. Verifiability is essential for decentralized AI networks. We need transparency and accountability. AI's potential is vast, but honesty and integrity must come first. Watermarking and fingerprinting are key alternatives tools for that.</p><p>The stakes are high. We have to get this right.</p><div><hr></div><p><em>Bagel Labs is a distributed machine learning research lab.</em></p>]]></content:encoded></item><item><title><![CDATA[With Great Data, Comes Great Responsibility]]></title><description><![CDATA[Privacy preserving machine learning (PPML) at Bagel &#129391;]]></description><link>https://blog.bagel.com/p/with-great-data-comes-great-responsibility</link><guid isPermaLink="false">https://blog.bagel.com/p/with-great-data-comes-great-responsibility</guid><dc:creator><![CDATA[Bidhan Roy]]></dc:creator><pubDate>Tue, 05 Mar 2024 13:59:23 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!iWQD!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd9872bda-6d60-45e9-ab04-6e45bc338e69_2245x1587.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>Trillion-dollar industries are unable to leverage their immensely valuable data for AI training and inference due to privacy concerns. The potential for AI-driven breakthroughs&#8212;genomic secrets that could cure diseases, predictive insights to eliminate supply chain waste, and chevrons of untapped energy sources&#8212;remain locked away. Privacy regulations also closely guard this valuable and sensitive information.</p><p>To propel human civilization forward in energy, healthcare, and collaboration, it is crucial to enable AI systems that train and generate inference on data while maintaining full end-to-end privacy. At Bagel, we believe accessing a fundamental resource like knowledge, for both human-driven and autonomous AI, should not entail a compromise on privacy.</p><p>We have applied and experimented with almost all the major privacy-preserving machine learning (PPML) mechanisms. Below, we share our insights, our approach, and some research breakthroughs.</p><p><em><strong>And if you're in a rush, we have a TLDR at the end.</strong></em></p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!iWQD!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd9872bda-6d60-45e9-ab04-6e45bc338e69_2245x1587.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!iWQD!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd9872bda-6d60-45e9-ab04-6e45bc338e69_2245x1587.png 424w, https://substackcdn.com/image/fetch/$s_!iWQD!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd9872bda-6d60-45e9-ab04-6e45bc338e69_2245x1587.png 848w, https://substackcdn.com/image/fetch/$s_!iWQD!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd9872bda-6d60-45e9-ab04-6e45bc338e69_2245x1587.png 1272w, https://substackcdn.com/image/fetch/$s_!iWQD!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd9872bda-6d60-45e9-ab04-6e45bc338e69_2245x1587.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!iWQD!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd9872bda-6d60-45e9-ab04-6e45bc338e69_2245x1587.png" width="1456" height="1029" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/d9872bda-6d60-45e9-ab04-6e45bc338e69_2245x1587.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1029,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:5786820,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!iWQD!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd9872bda-6d60-45e9-ab04-6e45bc338e69_2245x1587.png 424w, https://substackcdn.com/image/fetch/$s_!iWQD!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd9872bda-6d60-45e9-ab04-6e45bc338e69_2245x1587.png 848w, https://substackcdn.com/image/fetch/$s_!iWQD!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd9872bda-6d60-45e9-ab04-6e45bc338e69_2245x1587.png 1272w, https://substackcdn.com/image/fetch/$s_!iWQD!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd9872bda-6d60-45e9-ab04-6e45bc338e69_2245x1587.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><h2>Privacy-preserving Machine Learning (PPML)</h2><p>Recent advances in academia and industry have focused on incorporating privacy mechanisms into machine learning models, highlighting a significant move towards <a href="https://arxiv.org/abs/2108.04417">privacy-preserving machine learning (PPML)</a>. At Bagel, we have experimented with all the major PPML techniques, particularly those post <a href="https://link.springer.com/chapter/10.1007/11681878_14">differential privacy</a>. Our work, positioned at the intersection of AI and cryptography, draws from the cutting edge in both domains.</p><p>Our research covered a wide range of PPML techniques suitable for our platform. Among those, <a href="https://digitalprivacy.ieee.org/publications/topics/what-is-differential-privacy">Differential Privacy</a> (DP), <a href="https://research.google/pubs/federated-learning-strategies-for-improving-communication-efficiency/">Federated Learning</a>, <a href="https://arxiv.org/abs/2310.14848">Zero-knowledge Machine Learning</a> (ZKML) and <a href="https://queue.acm.org/detail.cfm?id=3561800">Fully Homomorphic Encryption</a> Machine Learning (FHEML) stood out for their potential in PPML.</p><p>First, we will delve into each of these, examining their advantages and drawbacks. In subsequent posts, we will describe Bagel's approach to data privacy, which addresses and resolves the challenges associated with the existing solutions.</p><div><hr></div><h2>Differential Privacy (DP)</h2><p>One of the first and most important techniques with a mathematical guarantee for incorporating privacy into data is <em>differential privacy</em> or DP (<a href="https://link.springer.com/chapter/10.1007/11761679_29">Dwork et al. 2006</a>), addressing the challenges faced by earlier methods with a quantifiable privacy definition.</p><p>DP ensures that a randomized algorithm, <em>A</em>, maintains privacy across datasets <em>D1</em> and <em>D2</em>&#8212;which differ by a single record&#8212;by keeping the probability of <em>A(D1)</em> and <em>A(D2)</em> generating identical outcomes relatively unchanged. This principle implies that minor dataset modifications do not significantly alter outcome probabilities, marking a pivotal advancement in data privacy.</p><p>The application of DP in machine learning, particularly in neural network training and inference, demonstrates its versatility and effectiveness. Notable implementations include adapting DP for <a href="https://deepai.org/machine-learning-glossary-and-terms/supervised-learning">supervised learning</a> algorithms by integrating random noise at various phases: <em>directly onto the data</em>, within the training process, or during inference, as highlighted by <a href="https://www.jair.org/index.php/jair/article/view/14649">Ponomareva et al. (2023)</a> and further references.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!LmWg!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fafc70b6a-478f-412c-a8a5-ef1d976d8702_965x553.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!LmWg!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fafc70b6a-478f-412c-a8a5-ef1d976d8702_965x553.png 424w, https://substackcdn.com/image/fetch/$s_!LmWg!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fafc70b6a-478f-412c-a8a5-ef1d976d8702_965x553.png 848w, https://substackcdn.com/image/fetch/$s_!LmWg!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fafc70b6a-478f-412c-a8a5-ef1d976d8702_965x553.png 1272w, https://substackcdn.com/image/fetch/$s_!LmWg!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fafc70b6a-478f-412c-a8a5-ef1d976d8702_965x553.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!LmWg!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fafc70b6a-478f-412c-a8a5-ef1d976d8702_965x553.png" width="965" height="553" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/afc70b6a-478f-412c-a8a5-ef1d976d8702_965x553.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:553,&quot;width&quot;:965,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:53110,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!LmWg!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fafc70b6a-478f-412c-a8a5-ef1d976d8702_965x553.png 424w, https://substackcdn.com/image/fetch/$s_!LmWg!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fafc70b6a-478f-412c-a8a5-ef1d976d8702_965x553.png 848w, https://substackcdn.com/image/fetch/$s_!LmWg!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fafc70b6a-478f-412c-a8a5-ef1d976d8702_965x553.png 1272w, https://substackcdn.com/image/fetch/$s_!LmWg!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fafc70b6a-478f-412c-a8a5-ef1d976d8702_965x553.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The balance between privacy and accuracy in DP is influenced by the noise level: <em>greater noise enhances privacy at the cost of accuracy</em>, affecting both inference and training stages. This relationship was explored by <a href="https://dl.acm.org/doi/abs/10.1145/2976749.2978318">Abadi et al. in (2016)</a> through the introduction of Gaussian noise to the stochastic gradient descent (DP-SGD) algorithm, observing the noise's impact on accuracy across the <a href="https://www.tensorflow.org/datasets/catalog/mnist">MNIST</a> and <a href="https://www.tensorflow.org/datasets/catalog/cifar10">CIFAR-10</a> datasets.</p><p>An innovative DP application, <strong>Private Aggregation of Teacher Ensembles (PATE</strong>) by <a href="https://arxiv.org/abs/1610.05755">Papernot et al. in (2016)</a>, divides a dataset into disjoint subsets, training networks on each without privacy, termed as teachers. These networks' aggregated inferences, subjected to added noise for privacy, inform the training of a student model to emulate the teacher ensemble. This method also underscores the trade-off between privacy enhancement through noise addition and the resultant accuracy reduction.</p><p>Further studies affirm that while privacy can be secured with little impact on execution times <a href="https://ieeexplore.ieee.org/document/7454576">(Li </a><em><a href="https://ieeexplore.ieee.org/document/7454576">et a.</a></em><a href="https://ieeexplore.ieee.org/document/7454576"> 2015)</a>, stringent privacy measures can obscure discernible patterns essential for learning <a href="https://dl.acm.org/doi/abs/10.1145/2976749.2978318">(Abadi </a><em><a href="https://dl.acm.org/doi/abs/10.1145/2976749.2978318">et al.</a></em><a href="https://dl.acm.org/doi/abs/10.1145/2976749.2978318"> 2016)</a>. Consequently, a certain level of privacy must be relinquished in DP to facilitate effective machine learning model training, illustrating the nuanced balance between privacy preservation and learning efficiency.</p><h3>Pros of Differential Privacy</h3><p>The advantages of using DP are:</p><p><strong>Effortless.</strong> Easy to implement into algorithms and code.</p><p><strong>Algorithm independence.</strong> Schemes can be made independent of the training or inference algorithm.</p><p><strong>Fast.</strong> Some DP mechanisms have shown to have little impact on the execution times of algorithms.</p><p><strong>Tunable privacy.</strong> The degree of desired privacy can be chosen by the algorithm designer.</p><h3>Cons of Differential Privacy</h3><p><strong>Access to private data is still necessary.</strong> Teachers in the PATE scheme must have full access to the private data <a href="https://arxiv.org/abs/1610.05755">(Papernot </a><em><a href="https://arxiv.org/abs/1610.05755">et al.</a></em><a href="https://arxiv.org/abs/1610.05755"> 2016)</a> in order to train a neural network. Also, the stochastic gradient descent algorithm based on DP only adds noise to the weight updates and needs access to private data for training <a href="https://dl.acm.org/doi/abs/10.1145/2976749.2978318">(Abadi </a><em><a href="https://dl.acm.org/doi/abs/10.1145/2976749.2978318">et al.</a></em><a href="https://dl.acm.org/doi/abs/10.1145/2976749.2978318"> 2016)</a>.</p><p><strong>Privacy-Accuracy-Speed trade-off on data.</strong> All implementations must sacrifice some privacy in order to get good results. If there is no discernable pattern in the input, then there is nothing to train <a href="https://dl.acm.org/doi/abs/10.1145/3336191.3371856">(Feyisetan </a><em><a href="https://dl.acm.org/doi/abs/10.1145/3336191.3371856">et al.</a></em><a href="https://dl.acm.org/doi/abs/10.1145/3336191.3371856"> 2020)</a>. The implementation of some noise mechanisms can impact execution times, necessitating a balance between speed and the goals of privacy and accuracy.</p><div><hr></div><h2>Zero-Knowledge Machine Learning (ZKML)</h2><p>A zero-knowledge proof system (ZKP) is a method allowing a prover <em>P</em> to convince a verifier <em>V</em> about the truth of a statement without disclosing any information apart from the statement's veracity. To affirm the statement's truth, <em>P</em> produces a proof <code>&#960;</code> for <em>V</em> to review, enabling <em>V</em> to be convinced of the statement's truthfulness.</p><p>Zero-Knowledge Machine Learning (ZKML) is an approach that combines the principles of zero-knowledge proofs (ZKPs) with machine learning. This integration allows machine learning models to be trained and to infer with verifiability.</p><p>For an in-depth examination of ZKML, refer to the work by <a href="https://arxiv.org/abs/2310.14848">Xin </a><em><a href="https://arxiv.org/abs/2310.14848">et al.</a></em><a href="https://arxiv.org/abs/2310.14848"> in (2023)</a>. Below we provide a brief explanation that focuses on the utilization of ZKPs for neural network training and inference.</p><h3>ZKML Inference</h3><p>Consider an unlabeled dataset <em>A</em> and a pretrained neural network <em>N</em> tasked with labeling each record in <em>A</em>. To generate a ZK proof of <em>N</em>'s computation during labeling, an arithmetic circuit <em>C</em> representing <em>N</em> is required, including circuits for each neuron's activation function. Assuming such a circuit <em>C</em> exists and is publicly accessible, the network's weights and a dataset record become the private and public inputs, respectively. For any record <em>a</em> of <em>A</em>, <em>N</em>'s output is denoted by a pair <code>(l,&#960;)</code>, where <code>l</code> is the label and <code>&#960;</code> is a zero-knowledge argument asserting the existence of specific weights that facilitated the labeling.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!3JEm!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ae54dbe-88cd-4f55-aad1-6dc69d586777_546x327.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!3JEm!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ae54dbe-88cd-4f55-aad1-6dc69d586777_546x327.png 424w, https://substackcdn.com/image/fetch/$s_!3JEm!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ae54dbe-88cd-4f55-aad1-6dc69d586777_546x327.png 848w, https://substackcdn.com/image/fetch/$s_!3JEm!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ae54dbe-88cd-4f55-aad1-6dc69d586777_546x327.png 1272w, https://substackcdn.com/image/fetch/$s_!3JEm!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ae54dbe-88cd-4f55-aad1-6dc69d586777_546x327.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!3JEm!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ae54dbe-88cd-4f55-aad1-6dc69d586777_546x327.png" width="546" height="327" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/9ae54dbe-88cd-4f55-aad1-6dc69d586777_546x327.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:327,&quot;width&quot;:546,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:53138,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!3JEm!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ae54dbe-88cd-4f55-aad1-6dc69d586777_546x327.png 424w, https://substackcdn.com/image/fetch/$s_!3JEm!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ae54dbe-88cd-4f55-aad1-6dc69d586777_546x327.png 848w, https://substackcdn.com/image/fetch/$s_!3JEm!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ae54dbe-88cd-4f55-aad1-6dc69d586777_546x327.png 1272w, https://substackcdn.com/image/fetch/$s_!3JEm!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ae54dbe-88cd-4f55-aad1-6dc69d586777_546x327.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>This model illustrates how ZK proves the accurate execution of a neural network on data, concealing the network's weights within a ZK proof. Consequently, any verifier can be assured that the executing agent possesses the necessary weights.</p><h3>ZKML Training</h3><p>ZKPs are applicable during training to validate <em>N</em>'s correct execution on a labeled dataset <em>A</em>. Here, <em>A</em> serves as the public input, with an arithmetic circuit <em>C</em> depicting the neural network <em>N</em>. The training process requires an additional arithmetic circuit to implement the optimization function, minimizing the loss function. For each training epoch <em>i</em>, a proof <code>&#960;_i</code> is generated, confirming the algorithm's accurate execution through epochs <em>1</em> to <em>i-1</em>, including the validity of the preceding epoch's proof. The training culminates with a compressed proof <code>&#960;</code>, proving the correct training over dataset <em>A</em>.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!fPrQ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd0905062-cec9-4546-826b-3f15fe41515a_632x451.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!fPrQ!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd0905062-cec9-4546-826b-3f15fe41515a_632x451.png 424w, https://substackcdn.com/image/fetch/$s_!fPrQ!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd0905062-cec9-4546-826b-3f15fe41515a_632x451.png 848w, https://substackcdn.com/image/fetch/$s_!fPrQ!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd0905062-cec9-4546-826b-3f15fe41515a_632x451.png 1272w, https://substackcdn.com/image/fetch/$s_!fPrQ!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd0905062-cec9-4546-826b-3f15fe41515a_632x451.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!fPrQ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd0905062-cec9-4546-826b-3f15fe41515a_632x451.png" width="632" height="451" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/d0905062-cec9-4546-826b-3f15fe41515a_632x451.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:451,&quot;width&quot;:632,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:74353,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!fPrQ!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd0905062-cec9-4546-826b-3f15fe41515a_632x451.png 424w, https://substackcdn.com/image/fetch/$s_!fPrQ!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd0905062-cec9-4546-826b-3f15fe41515a_632x451.png 848w, https://substackcdn.com/image/fetch/$s_!fPrQ!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd0905062-cec9-4546-826b-3f15fe41515a_632x451.png 1272w, https://substackcdn.com/image/fetch/$s_!fPrQ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd0905062-cec9-4546-826b-3f15fe41515a_632x451.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The explanation above illustrates that during training, the network's weights are concealed to ensure that the training is correctly executed on the given dataset <em>A</em>. Additionally, all internal states of the network remain undisclosed throughout the training process.</p><h3>Pros of ZKML</h3><p>The advantages of using ZKPs with neural networks are:</p><p><strong>Privacy of model weights.</strong> The weights of the neural network are never revealed during training or inference in any way. The weights and the internal states of the network algorithm are private inputs for the ZKP.</p><p><strong>Verifiability.</strong> The proof certifies the proper execution of training or inference processes and guarantees the accurate computation of weights.</p><p><strong>Trustlessness.</strong> The proof and its verification properties ensure that the data owner is not required to place trust in the agent operating the neural network. Instead, the data owner can rely on the proof to confirm the accuracy of both the computation and the existence of correct weights.</p><h3>Cons of ZKML</h3><p>The disadvantages of using ZKPs with neural networks are:</p><p><strong>No data privacy.</strong> The agent running the neural network needs access to the data in order to train or do inference. Data is considered a parameter that is publicly known to the data owner and the prover running the neural network <a href="https://arxiv.org/abs/2310.14848">(Xing </a><em><a href="https://arxiv.org/abs/2310.14848">et al.</a></em><a href="https://arxiv.org/abs/2310.14848"> 2023)</a>.</p><p><strong>No privacy for the model&#8217;s algorithm.</strong> In order to create a ZK proof, the algorithm of the entire neural network should be publicly known. This includes the activation functions, the loss function, optimization algorithm used, etc <a href="https://arxiv.org/abs/2310.14848">(Xing </a><em><a href="https://arxiv.org/abs/2310.14848">et al.</a></em><a href="https://arxiv.org/abs/2310.14848"> 2023)</a>.</p><p><strong>Proof generation of an expensive computation.</strong> Presently, the process of generating a ZK proof is computationally demanding. Creating a proof for each epoch within a training algorithm can exacerbate the computational burden of an already resource-intensive task.</p><div><hr></div><h2>Federated Learning (FL)</h2><p>In Federated Learning or FL we look to train a global model using a dataset that is distributed in multiple servers with local data samples but without each server sharing their local data.</p><p>In FL there is a global objective function that is being optimized which is defined as</p><div class="latex-rendered" data-attrs="{&quot;persistentExpression&quot;:&quot;f(x_1,\\dots,x_n)=\\frac 1 n \\sum_{i=1}^n f_i(x_i),&quot;,&quot;id&quot;:&quot;SYHKRRVSBP&quot;}" data-component-name="LatexBlockToDOM"></div><p>where <em>n</em> is the number of servers, each variables is the set of parameter as viewed by the server <em>i</em>, and each function is a local objective function of server <em>i</em>. FL tries to find the best set of values that optimizes <em>f</em>.</p><p>The figure below shows the general process in FL.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!TLl9!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc7af073e-f9a8-44c3-a395-547e815ba09c_530x772.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!TLl9!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc7af073e-f9a8-44c3-a395-547e815ba09c_530x772.png 424w, https://substackcdn.com/image/fetch/$s_!TLl9!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc7af073e-f9a8-44c3-a395-547e815ba09c_530x772.png 848w, https://substackcdn.com/image/fetch/$s_!TLl9!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc7af073e-f9a8-44c3-a395-547e815ba09c_530x772.png 1272w, https://substackcdn.com/image/fetch/$s_!TLl9!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc7af073e-f9a8-44c3-a395-547e815ba09c_530x772.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!TLl9!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc7af073e-f9a8-44c3-a395-547e815ba09c_530x772.png" width="530" height="772" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c7af073e-f9a8-44c3-a395-547e815ba09c_530x772.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:772,&quot;width&quot;:530,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:87205,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!TLl9!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc7af073e-f9a8-44c3-a395-547e815ba09c_530x772.png 424w, https://substackcdn.com/image/fetch/$s_!TLl9!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc7af073e-f9a8-44c3-a395-547e815ba09c_530x772.png 848w, https://substackcdn.com/image/fetch/$s_!TLl9!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc7af073e-f9a8-44c3-a395-547e815ba09c_530x772.png 1272w, https://substackcdn.com/image/fetch/$s_!TLl9!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc7af073e-f9a8-44c3-a395-547e815ba09c_530x772.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><ol><li><p><strong>Initialization.</strong> An initial global model is created and distributed by a central server to all other servers.</p></li><li><p><strong>Local training.</strong> Each server trains the model using their local data. This ensures data privacy and security.</p></li><li><p><strong>Model update.</strong> After training, each server shares with the central server their local updates like gradients and parameters.</p></li><li><p><strong>Aggregation.</strong> The central server receives all local updates and aggregates them into the global model, for example, using averaging.</p></li><li><p><strong>Model distribution.</strong> The updated model is distributed again with local servers and the previous steps are repeated until a desired level of performance is achieve by the global model.</p></li></ol><p>Since local servers never share their local data, FL guarantees privacy over that data. However, the model being constructed is shared among all parties, and hence, its structure and set of parameters are not hidden.</p><h3>Pros of FL</h3><p>The advantages of using FL are:</p><p><strong>Data privacy.</strong> The local data on the local servers are never shared. All computations are done locally, and there is no need of communication between them.</p><p><strong>Distributed computing.</strong> The creation of the global model is distributed among local servers, thereby parallelizing a resource-intensive computation. Thus, FL is considered a distributed machine learning framework <a href="https://arxiv.org/abs/2108.04417">(Xu et al. 2021)</a>.</p><h3>Cons of FL</h3><p>The disadvantages of using FL are:</p><p><strong>Model is not private.</strong> The global model is shared among each local server in order to do their computations locally. This includes the aggregated weights and gradients at each step of the FL process. Thus, each local server is aware of the entire architecture of the global model <a href="https://research.google/pubs/federated-learning-strategies-for-improving-communication-efficiency/">(Kone&#269;n&#253; et al. 2016)</a>.</p><p><strong>Data leakage.</strong> Recent research indicates that data leakage remains a persistent issue, notably through mechanisms such as gradient sharing&#8212;see for example <a href="https://link.springer.com/chapter/10.1007/978-3-030-96896-0_15">Jin </a><em><a href="https://link.springer.com/chapter/10.1007/978-3-030-96896-0_15">et al.</a></em><a href="https://link.springer.com/chapter/10.1007/978-3-030-96896-0_15"> (2022)</a>. Consequently, <strong>FL cannot provide complete assurances of data privacy</strong>.</p><p><strong>Trust.</strong> Since no proofs are generated in FL, every party involved in the process need to be trusted that their computation and parameters were computed as expected <a href="https://www.sciencedirect.com/science/article/abs/pii/S0020025522014359">(Gao </a><em><a href="https://www.sciencedirect.com/science/article/abs/pii/S0020025522014359">et al.</a></em><a href="https://www.sciencedirect.com/science/article/abs/pii/S0020025522014359"> 2023)</a>.</p><div><hr></div><h2>Fully Homomorphic Encryption (FHE)</h2><p>At its core, homomorphic encryption permits computations on encrypted data. By "homomorphic," we refer to the capacity of an encryption scheme to allow specific operations on ciphertexts that, when decrypted, yield the same result as operations performed directly on the plaintexts.</p><p>Consider a scenario with a secret key <code>k</code> and a plaintext <code>m</code>. In an encryption scheme <code>(E,D)</code>, where E and <code>D</code> represent encryption and decryption algorithms respectively, the condition <code>D(k,E(k,m))=m</code> must hold. A scheme <code>(E,D)</code> is deemed <em>fully homomorphic</em> if for any key <code>k</code> and messages <code>m</code>, the properties <code>E(k,m+m&#8217;)=E(k,m)+E(k,m&#8217;)</code> and <code>E(k,m*m&#8217;)=E(k,m)* E(k,m&#8217;) </code>are satisfied, with addition and multiplication defined over a finite field. If only one operation is supported, the scheme is <em>partially homomorphic</em>. This definition implies that operations on encrypted data mirror those on plaintext, crucial for maintaining data privacy during processing.</p><p>In plain words, if we have a fully homomorphic encryption scheme, then operating over the encrypted data is equivalent to operating over the plaintext. We will write FHE to refer to a fully homomorphic encryption scheme. The figure below shows how an arbitrary homomorphic operation works over a plaintext and ciphertext.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!87-2!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2267d644-eea7-40b1-a07e-710fb415ce66_659x578.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!87-2!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2267d644-eea7-40b1-a07e-710fb415ce66_659x578.png 424w, https://substackcdn.com/image/fetch/$s_!87-2!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2267d644-eea7-40b1-a07e-710fb415ce66_659x578.png 848w, https://substackcdn.com/image/fetch/$s_!87-2!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2267d644-eea7-40b1-a07e-710fb415ce66_659x578.png 1272w, https://substackcdn.com/image/fetch/$s_!87-2!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2267d644-eea7-40b1-a07e-710fb415ce66_659x578.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!87-2!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2267d644-eea7-40b1-a07e-710fb415ce66_659x578.png" width="659" height="578" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/2267d644-eea7-40b1-a07e-710fb415ce66_659x578.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:578,&quot;width&quot;:659,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:56286,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!87-2!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2267d644-eea7-40b1-a07e-710fb415ce66_659x578.png 424w, https://substackcdn.com/image/fetch/$s_!87-2!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2267d644-eea7-40b1-a07e-710fb415ce66_659x578.png 848w, https://substackcdn.com/image/fetch/$s_!87-2!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2267d644-eea7-40b1-a07e-710fb415ce66_659x578.png 1272w, https://substackcdn.com/image/fetch/$s_!87-2!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2267d644-eea7-40b1-a07e-710fb415ce66_659x578.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The homomorphic property of FHE makes it invaluable in situations where data must remain secure while still being used for computations. For instance, if we possess sensitive data and require a third party to perform data analysis on it, we can rely on FHE to encrypt the data. This allows the third party to conduct analysis on the encrypted data without the need for decryption. The mathematical properties of FHE guarantee the accuracy of the analysis results.</p><h3>FHE Inference</h3><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!q1OX!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F48cf5b92-124b-4ee5-bb86-7a33c152f02a_533x372.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!q1OX!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F48cf5b92-124b-4ee5-bb86-7a33c152f02a_533x372.png 424w, https://substackcdn.com/image/fetch/$s_!q1OX!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F48cf5b92-124b-4ee5-bb86-7a33c152f02a_533x372.png 848w, https://substackcdn.com/image/fetch/$s_!q1OX!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F48cf5b92-124b-4ee5-bb86-7a33c152f02a_533x372.png 1272w, https://substackcdn.com/image/fetch/$s_!q1OX!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F48cf5b92-124b-4ee5-bb86-7a33c152f02a_533x372.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!q1OX!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F48cf5b92-124b-4ee5-bb86-7a33c152f02a_533x372.png" width="533" height="372" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/48cf5b92-124b-4ee5-bb86-7a33c152f02a_533x372.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:372,&quot;width&quot;:533,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:60112,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!q1OX!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F48cf5b92-124b-4ee5-bb86-7a33c152f02a_533x372.png 424w, https://substackcdn.com/image/fetch/$s_!q1OX!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F48cf5b92-124b-4ee5-bb86-7a33c152f02a_533x372.png 848w, https://substackcdn.com/image/fetch/$s_!q1OX!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F48cf5b92-124b-4ee5-bb86-7a33c152f02a_533x372.png 1272w, https://substackcdn.com/image/fetch/$s_!q1OX!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F48cf5b92-124b-4ee5-bb86-7a33c152f02a_533x372.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Fully Homomorphic Encryption (FHE) can be used to perform inference in neural networks while preserving data privacy. Let's consider a scenario where <em>N</em> is a pretrained neural network, <em>A</em> is a dataset, and <code>(E,D)</code> is an asymmetric FHE scheme. The goal is to perform inference on a record <em>a</em> of <em>A</em> without revealing the sensitive information contained in <em>a</em> to the neural network.</p><p>The inference process using FHE begins with encryption. The data owner encrypts the record <em>a</em> using the encryption algorithm E with the public key <code>public_key</code>, obtaining the encrypted record <code>a&#8217; = E(public_key, a)</code>.</p><p>Next, the data owner sends the encrypted record <code>a&#8217;</code> along with <code>public_key</code> to the neural network <em>N</em>. The neural network <em>N</em> must have knowledge of the encryption scheme <code>(E,D)</code> and its parameters to correctly apply homomorphic operations over the encrypted data <code>a&#8217;</code>. Any arithmetic operation performed by <em>N</em> can be safely applied to <code>a&#8217;</code> due to the homomorphic properties of the encryption scheme.</p><p>One challenge in using FHE for neural network inference is handling non-linear activation functions, such as sigmoid and ReLU, which involve non-arithmetic computations. To compute these functions homomorphically, they need to be approximated by low-degree polynomials. The approximations allow the activation functions to be computed using homomorphic operations on the encrypted data <code>a&#8217;</code>.</p><p>After applying the necessary homomorphic operations and approximated activation functions, the neural network <em>N</em> obtains the inference result. It's important to note that the inference result is still in encrypted form, as all computations were performed on encrypted data.</p><p>Finally, the encrypted inference result is sent back to the data owner, who uses the private key associated with the FHE scheme to decrypt the result using the decryption algorithm <code>D</code>. The decrypted inference result is obtained, which can be interpreted and utilized by the data owner.</p><p>By following this inference process, the neural network <em>N</em> can perform computations on the encrypted data <code>a&#8217;</code> without having access to the original sensitive information. The FHE scheme ensures that the data remains encrypted throughout the inference process, and only the data owner with the private key can decrypt the final result.</p><p>It's important to note that the neural network <em>N</em> must be designed and trained to work with the specific FHE scheme and its parameters. Additionally, the approximation of non-linear activation functions by low-degree polynomials may introduce some level of approximation error, which should be considered and evaluated based on the specific application and accuracy requirements.</p><h3>FHE Training</h3><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!vwiA!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f4d7e5b-76ea-4e1f-9c2d-5a41f8477cb5_533x372.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!vwiA!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f4d7e5b-76ea-4e1f-9c2d-5a41f8477cb5_533x372.png 424w, https://substackcdn.com/image/fetch/$s_!vwiA!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f4d7e5b-76ea-4e1f-9c2d-5a41f8477cb5_533x372.png 848w, https://substackcdn.com/image/fetch/$s_!vwiA!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f4d7e5b-76ea-4e1f-9c2d-5a41f8477cb5_533x372.png 1272w, https://substackcdn.com/image/fetch/$s_!vwiA!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f4d7e5b-76ea-4e1f-9c2d-5a41f8477cb5_533x372.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!vwiA!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f4d7e5b-76ea-4e1f-9c2d-5a41f8477cb5_533x372.png" width="533" height="372" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/3f4d7e5b-76ea-4e1f-9c2d-5a41f8477cb5_533x372.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:372,&quot;width&quot;:533,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:60703,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!vwiA!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f4d7e5b-76ea-4e1f-9c2d-5a41f8477cb5_533x372.png 424w, https://substackcdn.com/image/fetch/$s_!vwiA!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f4d7e5b-76ea-4e1f-9c2d-5a41f8477cb5_533x372.png 848w, https://substackcdn.com/image/fetch/$s_!vwiA!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f4d7e5b-76ea-4e1f-9c2d-5a41f8477cb5_533x372.png 1272w, https://substackcdn.com/image/fetch/$s_!vwiA!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f4d7e5b-76ea-4e1f-9c2d-5a41f8477cb5_533x372.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The process of training a neural network using Fully Homomorphic Encryption (FHE) is conceptually similar to performing inference, but with a few key differences. Let's dive into the details.</p><p>Imagine we have an untrained neural network <em>N</em> and an encrypted dataset <code>A&#8217; = E(public_key, A)</code>, where <code>E</code> is the encryption function and <code>public_key</code> is the public key of an asymmetric FHE scheme. Our goal is to train <em>N</em> on the encrypted data <code>A&#8217;</code> while preserving the privacy of the original dataset <em>A</em>.</p><p>The training process unfolds as follows. Each operation performed by the network and the training algorithm is executed on each encrypted record <code>a&#8217;</code> of <code>A'</code>. This includes both the forward and backward passes of the network. As with inference, any non-arithmetic operations like activation functions need to be approximated using low-degree polynomials to be compatible with the homomorphic properties of FHE.</p><p>A fascinating aspect of this approach is that the weights obtained during training are themselves encrypted. They can only be decrypted using the private key of the FHE scheme, which is held exclusively by the data owner. This means that even the agent executing the neural network training never has access to the actual weight values, only their encrypted counterparts.</p><p>Think about the implications of this. The data owner can outsource the computational heavy lifting of training to a third party, like a cloud provider with powerful GPUs, without ever revealing their sensitive data. The training process operates on encrypted data and produces encrypted weights, ensuring end-to-end privacy.</p><p>Once training is complete, the neural network sends the collection of encrypted weights <em>w&#8217;</em> back to the data owner. The data owner can then decrypt the weights using his private key, obtaining the final trained model. He is the sole party capable of accessing the unencrypted weights and using the model for inference on plaintext data.</p><p>There are a few caveats to keep in mind. FHE operations are computationally expensive, so training a neural network with FHE will generally be slower than training on unencrypted data.</p><h3>Pros of FHE</h3><p>The advantages of using FHE are:</p><p><strong>Data privacy.</strong> Third-party access to encrypted private data is effectively prevented, a security guarantee upheld by the assurances of FHE and <a href="https://arxiv.org/abs/2208.08125">lattice-based cryptography</a> <a href="https://dl.acm.org/doi/abs/10.1145/1536414.1536440">(Gentry 2009)</a>.</p><p><strong>Model privacy.</strong> Training and inference processes are carried out on encrypted data, eliminating the need to share or publicize the neural network's parameters for accurate data analysis.</p><p><strong>Effectiveness.</strong> Previous studies have demonstrated that neural networks operating on encrypted data using FHE maintain their accuracy&#8212;see for example <a href="https://ieeexplore.ieee.org/abstract/document/9025601">Nandakumar </a><em><a href="https://ieeexplore.ieee.org/abstract/document/9025601">et al.</a></em><a href="https://ieeexplore.ieee.org/abstract/document/9025601"> (2019)</a> and <a href="https://ieeexplore.ieee.org/abstract/document/8885038">Xu </a><em><a href="https://ieeexplore.ieee.org/abstract/document/8885038">et al.</a></em><a href="https://ieeexplore.ieee.org/abstract/document/8885038"> (2019)</a>. Therefore, we can be assured that employing FHE for training and inference processes will achieve the anticipated outcomes.</p><p><strong>Quantum resistance.</strong> The security of FHE, unlike other encryption schemes, is grounded in difficult problems derived from Lattice theory. These problems are considered to be hard even for quantum computers <a href="https://dl.acm.org/doi/abs/10.1145/1060590.1060603">(Regev 2005)</a>, thus offering enhanced protection against potential quantum threats in the future.</p><h3>Cons of FHE</h3><p>The disadvantages of using FHE are:</p><p><strong>Verifiability.</strong> FHE does not offer proofs of correct encryption nor correct computation. Hence, we must rely on trust that the data intended for encryption is indeed the correct data <a href="https://arxiv.org/abs/2301.07041">(Viand </a><em><a href="https://arxiv.org/abs/2301.07041">et al.</a></em><a href="https://arxiv.org/abs/2301.07041"> 2023)</a>.</p><p><strong>Speed.</strong> Relative to conventional encryption schemes, FHE is still considered to be slow during parameter setups, encryption and decryption algorithms <a href="https://cacm.acm.org/magazines/2023/5/272277-unlocking-the-potential-of-fully-homomorphic-encryption/fulltext">(Gorantala </a><em><a href="https://cacm.acm.org/magazines/2023/5/272277-unlocking-the-potential-of-fully-homomorphic-encryption/fulltext">et al.</a></em><a href="https://cacm.acm.org/magazines/2023/5/272277-unlocking-the-potential-of-fully-homomorphic-encryption/fulltext"> 2023)</a>.</p><p><strong>Memory requirements.</strong> The number of weights that need to be encrypted are proportional to the size of the network. Even for small networks, the RAM memory requirements are in the order of gigabytes <a href="https://eprint.iacr.org/2018/462">(Chen </a><em><a href="https://eprint.iacr.org/2018/462">et al.</a></em><a href="https://eprint.iacr.org/2018/462"> 2018)</a>, <a href="https://openaccess.thecvf.com/content_CVPRW_2019/html/CV-COPS/Nandakumar_Towards_Deep_Neural_Network_Training_on_Encrypted_Data_CVPRW_2019_paper.html">(Nandakumar </a><em><a href="https://openaccess.thecvf.com/content_CVPRW_2019/html/CV-COPS/Nandakumar_Towards_Deep_Neural_Network_Training_on_Encrypted_Data_CVPRW_2019_paper.html">et al.</a></em><a href="https://openaccess.thecvf.com/content_CVPRW_2019/html/CV-COPS/Nandakumar_Towards_Deep_Neural_Network_Training_on_Encrypted_Data_CVPRW_2019_paper.html"> 2019)</a>.</p><p><strong>Usability.</strong> FHE schemes use many parameters that need to be carefully tuned and requires extensive experience from users <a href="https://eprint.iacr.org/2022/915">(Al Badawi </a><em><a href="https://eprint.iacr.org/2022/915">et al.</a></em><a href="https://eprint.iacr.org/2022/915"> 2022)</a>, <a href="https://eprint.iacr.org/2020/1481">(Halevi &amp; Shoup 2020)</a>.</p><div><hr></div><h2>TLDR</h2><p>We examined the four most widely used privacy-preserving techniques in machine learning, focusing on neural network training and inference. We evaluated these techniques across four dimensions: data privacy, model algorithm privacy, model weights privacy, and verifiability.</p><p>Data privacy considers the model owner's access to private data. Differential privacy (DP) and zero-knowledge machine learning (ZKML) require access to private data for training and proof generation, respectively. Federated learning (FL) enables training and inference without revealing data, while fully homomorphic encryption (FHE) allows computations on encrypted data.</p><p>Model algorithm privacy refers to the data owner's access to the model's algorithms. DP does not require algorithm disclosure, while ZKML necessitates it for proof generation. FL distributes algorithms among local servers, and FHE operates without accessing the model's algorithms.</p><p>Model weights privacy concerns the data owner's access to the model's weights. DP and ZKML keep weights undisclosed or provide proofs of existence without revealing values. FL involves exchanging weights among servers for decentralized learning, contrasting with DP and ZKML's privacy-preserving mechanisms. FHE enables training and inference on encrypted data, eliminating the need for model owners to know the weights.</p><p>Verifiability refers to the inherent capabilities for verifiable computation. ZKML inherently provides this capability. DP, FL, and FHE would not provide similar levels of integrity assurance.</p><p>The table below summarizes our findings:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!oinJ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F24448480-0c36-4a60-9bbc-f73387b71c5e_2034x744.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!oinJ!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F24448480-0c36-4a60-9bbc-f73387b71c5e_2034x744.png 424w, https://substackcdn.com/image/fetch/$s_!oinJ!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F24448480-0c36-4a60-9bbc-f73387b71c5e_2034x744.png 848w, https://substackcdn.com/image/fetch/$s_!oinJ!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F24448480-0c36-4a60-9bbc-f73387b71c5e_2034x744.png 1272w, https://substackcdn.com/image/fetch/$s_!oinJ!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F24448480-0c36-4a60-9bbc-f73387b71c5e_2034x744.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!oinJ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F24448480-0c36-4a60-9bbc-f73387b71c5e_2034x744.png" width="1456" height="533" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/24448480-0c36-4a60-9bbc-f73387b71c5e_2034x744.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:533,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:121150,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!oinJ!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F24448480-0c36-4a60-9bbc-f73387b71c5e_2034x744.png 424w, https://substackcdn.com/image/fetch/$s_!oinJ!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F24448480-0c36-4a60-9bbc-f73387b71c5e_2034x744.png 848w, https://substackcdn.com/image/fetch/$s_!oinJ!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F24448480-0c36-4a60-9bbc-f73387b71c5e_2034x744.png 1272w, https://substackcdn.com/image/fetch/$s_!oinJ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F24448480-0c36-4a60-9bbc-f73387b71c5e_2034x744.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://blog.bagel.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://blog.bagel.com/subscribe?"><span>Subscribe now</span></a></p><p></p><p></p><p></p>]]></content:encoded></item></channel></rss>