<?xml version="1.0" encoding="UTF-8"?><rss xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:atom="http://www.w3.org/2005/Atom" version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:googleplay="http://www.google.com/schemas/play-podcasts/1.0"><channel><title><![CDATA[Data Tinkerer: Data Science]]></title><description><![CDATA[Dig into the latest data science/ML developments in major companies]]></description><link>https://www.datatinkerer.io/s/data-science</link><image><url>https://substackcdn.com/image/fetch/$s_!JEdj!,w_256,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6bea5ccd-f356-4154-a1db-16268300510e_500x500.png</url><title>Data Tinkerer: Data Science</title><link>https://www.datatinkerer.io/s/data-science</link></image><generator>Substack</generator><lastBuildDate>Thu, 09 Apr 2026 17:24:35 GMT</lastBuildDate><atom:link href="https://www.datatinkerer.io/feed" rel="self" type="application/rss+xml"/><copyright><![CDATA[Data Tinkerer]]></copyright><language><![CDATA[en]]></language><webMaster><![CDATA[datatinkerer@substack.com]]></webMaster><itunes:owner><itunes:email><![CDATA[datatinkerer@substack.com]]></itunes:email><itunes:name><![CDATA[Data Tinkerer]]></itunes:name></itunes:owner><itunes:author><![CDATA[Data Tinkerer]]></itunes:author><googleplay:owner><![CDATA[datatinkerer@substack.com]]></googleplay:owner><googleplay:email><![CDATA[datatinkerer@substack.com]]></googleplay:email><googleplay:author><![CDATA[Data Tinkerer]]></googleplay:author><itunes:block><![CDATA[Yes]]></itunes:block><item><title><![CDATA[How Shopify Scales Taxonomy Evolution Across 10,000+ Categories With Multi-Agent AI]]></title><description><![CDATA[From reactive manual curation to continuous taxonomy evolution grounded in merchant reality.]]></description><link>https://www.datatinkerer.io/p/how-shopify-scales-taxonomy-evolution-across-10000-categories-with-ai-agents</link><guid isPermaLink="false">https://www.datatinkerer.io/p/how-shopify-scales-taxonomy-evolution-across-10000-categories-with-ai-agents</guid><dc:creator><![CDATA[Data Tinkerer]]></dc:creator><pubDate>Thu, 26 Feb 2026 04:00:23 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!tUAj!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10490256-5538-403a-bc50-b153a36a9b6f_1536x1024.webp" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>Fellow Data Tinkerers!</p><p>Today we will look at how Shopify scales its product categorisation using agentic AI</p><p>But before that, I wanted to share with you what you could unlock if you share Data Tinkerer with just <strong>1 more person</strong>.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!jEOH!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6544e5f1-6242-45c5-b4e9-0fda20c0d106_800x402.gif" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!jEOH!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6544e5f1-6242-45c5-b4e9-0fda20c0d106_800x402.gif 424w, https://substackcdn.com/image/fetch/$s_!jEOH!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6544e5f1-6242-45c5-b4e9-0fda20c0d106_800x402.gif 848w, https://substackcdn.com/image/fetch/$s_!jEOH!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6544e5f1-6242-45c5-b4e9-0fda20c0d106_800x402.gif 1272w, https://substackcdn.com/image/fetch/$s_!jEOH!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6544e5f1-6242-45c5-b4e9-0fda20c0d106_800x402.gif 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!jEOH!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6544e5f1-6242-45c5-b4e9-0fda20c0d106_800x402.gif" width="800" height="402" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/6544e5f1-6242-45c5-b4e9-0fda20c0d106_800x402.gif&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:402,&quot;width&quot;:800,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:3369150,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/gif&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.datatinkerer.io/i/188769392?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6544e5f1-6242-45c5-b4e9-0fda20c0d106_800x402.gif&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!jEOH!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6544e5f1-6242-45c5-b4e9-0fda20c0d106_800x402.gif 424w, https://substackcdn.com/image/fetch/$s_!jEOH!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6544e5f1-6242-45c5-b4e9-0fda20c0d106_800x402.gif 848w, https://substackcdn.com/image/fetch/$s_!jEOH!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6544e5f1-6242-45c5-b4e9-0fda20c0d106_800x402.gif 1272w, https://substackcdn.com/image/fetch/$s_!jEOH!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6544e5f1-6242-45c5-b4e9-0fda20c0d106_800x402.gif 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p></p><p>There are 100+ resources to learn all things data (science, engineering, analysis). It includes videos, courses, projects and can be filtered by tech stack (Python, SQL, Spark and etc), skill level (Beginner, Intermediate and so on)  provider name or free/paid. So if you know other people who like staying up to date on all things data, please share Data Tinkerer with them!</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.datatinkerer.io/leaderboard?&amp;referrer_token=4tlsmi&amp;utm_source=post&quot;,&quot;text&quot;:&quot;Refer a friend&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.datatinkerer.io/leaderboard?&amp;referrer_token=4tlsmi&amp;utm_source=post"><span>Refer a friend</span></a></p><p>Now, with that out of the way, let&#8217;s get to Shopify&#8217;s multi-agent taxonomy</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!tUAj!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10490256-5538-403a-bc50-b153a36a9b6f_1536x1024.webp" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!tUAj!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10490256-5538-403a-bc50-b153a36a9b6f_1536x1024.webp 424w, https://substackcdn.com/image/fetch/$s_!tUAj!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10490256-5538-403a-bc50-b153a36a9b6f_1536x1024.webp 848w, https://substackcdn.com/image/fetch/$s_!tUAj!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10490256-5538-403a-bc50-b153a36a9b6f_1536x1024.webp 1272w, https://substackcdn.com/image/fetch/$s_!tUAj!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10490256-5538-403a-bc50-b153a36a9b6f_1536x1024.webp 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!tUAj!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10490256-5538-403a-bc50-b153a36a9b6f_1536x1024.webp" width="1456" height="971" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/10490256-5538-403a-bc50-b153a36a9b6f_1536x1024.webp&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:971,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:72968,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/webp&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.datatinkerer.io/i/188769392?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10490256-5538-403a-bc50-b153a36a9b6f_1536x1024.webp&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!tUAj!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10490256-5538-403a-bc50-b153a36a9b6f_1536x1024.webp 424w, https://substackcdn.com/image/fetch/$s_!tUAj!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10490256-5538-403a-bc50-b153a36a9b6f_1536x1024.webp 848w, https://substackcdn.com/image/fetch/$s_!tUAj!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10490256-5538-403a-bc50-b153a36a9b6f_1536x1024.webp 1272w, https://substackcdn.com/image/fetch/$s_!tUAj!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10490256-5538-403a-bc50-b153a36a9b6f_1536x1024.webp 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">(Source: Shopify)</figcaption></figure></div><h3>TL;DR</h3><div><hr></div><h4><strong>Situation</strong></h4><p>Shopify&#8217;s product classification system makes tens of millions of predictions daily, across a taxonomy with 10,000+ categories and 2,000+ attributes. Commerce changes fast, the taxonomy has to keep up or the whole stack starts drifting.</p><h4><strong>Task</strong></h4><p>Keep the taxonomy current at scale without relying on slow, reactive, manual curation. Fix volume, expertise and consistency problems before they hit merchants, customers and model quality.</p><h4><strong>Action</strong></h4><p>Built an AI multi-agent system: structural analysis + product-driven analysis, then intelligent synthesis. Added equivalence detection (category = broader category + attribute filters) plus automated QA via domain-specific AI judges.</p><h4><strong>Result</strong></h4><p>Taxonomy branches can be analyzed in parallel: hundreds of categories instead of a few per day. Quality improved via grounded merchant data + structural consistency, with judges filtering proposals (example: &#8220;MagSafe compatible&#8221; approved at 93% confidence).</p><h4><strong>Use Cases</strong></h4><p>Category discovery, attribute gap detection, taxonomy maintenance, search and filtering improvement</p><h4><strong>Tech Stack/Framework</strong></h4><p>AI agent, equivalence detection, multi-agent system</p><div><hr></div><h3>Explained further</h3><div><hr></div><h4>Context</h4><p>Last year, over 875 million people bought items from Shopify merchants. Shopify already runs a product classification system that makes tens of millions of predictions daily with a high degree of accuracy.</p><p>But classification is the easy part compared to the thing underneath it: taxonomy. Because the model doesn&#8217;t just need to be right, it also needs a clean, consistent set of labels to be right <em>about</em>.</p><p>That&#8217;s the challenge for Shopify: once you have 10,000+ categories and 2,000+ attributes, the taxonomy becomes its own product with its own failure modes. It can get stale. It can get inconsistent. It can drift away from how merchants actually describe products. And when that happens, the classifier quality takes the blame for what is basically a taxonomy debt problem.</p><p>So this post is about what Shopify did next: they built an AI multi-agent system that doesn&#8217;t just classify products, it actively improves the taxonomy labels themselves so the system stays agile as commerce changes.</p><div><hr></div><h4>The challenge: scaling taxonomy without losing accuracy</h4><p>A taxonomy is a contract between three groups that rarely agree:</p><ul><li><p>Merchants describing products the way they think about them</p></li><li><p>Customers searching and filtering with their own mental model</p></li><li><p>Platform systems trying to enforce structure so everything stays queryable and comparable</p></li></ul><p>Now add the reality that commerce never sits still. New products appear. Old categories split. Entire verticals get reshaped by trends, tech and regulation. The taxonomy has to keep up or the platform drifts away from how people actually shop and sell.</p><p>Shopify frames the challenge as three problems.</p><p><strong>The volume problem: manual updates can&#8217;t keep up</strong></p><p>A global product taxonomy needs constant attention. Every new product type, emerging technology category and seasonal trend potentially triggers taxonomy updates. </p><p>Manual curation becomes a bottleneck because taxonomy work is not one change. It is usually a bundle: a category addition, a hierarchy decision, a set of attributes, naming alignment and a check for duplicates or conflicts.</p><p>For example, consider the emergence of categories like smart home devices or remote work equipment. Each category represents not just new categories but also entirely new attribute sets.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!u4Rg!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F08494fd3-dfe6-4019-acce-a25af7e18b77_2462x841.webp" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!u4Rg!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F08494fd3-dfe6-4019-acce-a25af7e18b77_2462x841.webp 424w, https://substackcdn.com/image/fetch/$s_!u4Rg!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F08494fd3-dfe6-4019-acce-a25af7e18b77_2462x841.webp 848w, https://substackcdn.com/image/fetch/$s_!u4Rg!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F08494fd3-dfe6-4019-acce-a25af7e18b77_2462x841.webp 1272w, https://substackcdn.com/image/fetch/$s_!u4Rg!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F08494fd3-dfe6-4019-acce-a25af7e18b77_2462x841.webp 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!u4Rg!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F08494fd3-dfe6-4019-acce-a25af7e18b77_2462x841.webp" width="1456" height="497" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/08494fd3-dfe6-4019-acce-a25af7e18b77_2462x841.webp&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:497,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:42484,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/webp&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.datatinkerer.io/i/188769392?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F08494fd3-dfe6-4019-acce-a25af7e18b77_2462x841.webp&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!u4Rg!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F08494fd3-dfe6-4019-acce-a25af7e18b77_2462x841.webp 424w, https://substackcdn.com/image/fetch/$s_!u4Rg!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F08494fd3-dfe6-4019-acce-a25af7e18b77_2462x841.webp 848w, https://substackcdn.com/image/fetch/$s_!u4Rg!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F08494fd3-dfe6-4019-acce-a25af7e18b77_2462x841.webp 1272w, https://substackcdn.com/image/fetch/$s_!u4Rg!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F08494fd3-dfe6-4019-acce-a25af7e18b77_2462x841.webp 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">A new category example (Source: Shopify)</figcaption></figure></div><p>Smart home devices for instance need connectivity types, power requirements and compatibility. Those are specs that did not exist in the taxonomy before.</p><p>So the work isn&#8217;t a one-off. It&#8217;s continuous expansion and adjustment across a giant tree of concepts.</p><p><strong>The expertise problem: every vertical has its own rules</strong></p><p>Good taxonomy design is domain-heavy. You do not get it right by being generally smart. You get it right by knowing what matters in that product world. For example, there are nuanced differences between types of guitar pickups or appropriate attributes for skincare products.</p><p>A taxonomy team can&#8217;t realistically maintain deep expertise across every vertical that merchants sell into. But if the taxonomy is inconsistent or poorly structured, merchants pay for it through reduced discoverability, suboptimal search results and ineffective filters for customers.</p><p><strong>The consistency problem: one concept, five different labels</strong></p><p>As the taxonomy grows organically, inconsistencies creep in:</p><ul><li><p>similar concepts represented differently across categories</p></li><li><p>naming conventions inconsistent</p></li><li><p>discrepancies between merchant categorization and customer expectations</p></li></ul><p>Those inconsistencies compound. Merchants get confused when listing. Customers get frustrated when filtering and comparing. And the classifier quality drops because labels stop being reliably meaningful across the tree.</p><p>This is the part most teams underestimate. In a taxonomy, small inconsistencies behave like small data quality issues: they don&#8217;t stay small.</p><div><hr></div><h4>From manual taxonomy work to agent-led evolution</h4><p>Shopify&#8217;s taxonomy management evolved from a manual workflow into an AI-driven system.</p><p><strong>The old way: Expert review, slow throughput</strong></p><p>The traditional pattern is familiar:</p><ol><li><p>domain experts analyze product data</p></li><li><p>identify gaps or inconsistencies</p></li><li><p>propose changes</p></li><li><p>implement changes via careful review</p></li></ol><p>It ensures quality but it also creates bottlenecks.</p><p>The biggest problem was the reactive nature of it: Shopify would only recognize the need for new categories or attributes <em><strong>after</strong></em> merchants began listing products that didn&#8217;t fit. By then, the system had already missed chances to give merchants and customers a better experience.</p><p>So even when you do great manual work, you&#8217;re always late.</p><p><strong>The breakthrough: Two lenses, one system</strong></p><p>Advanced language models opened a door: not to replace human experts, but to augment them with scale and consistency.</p><p>The key insight was that taxonomy improvement comes from two different angles:</p><ul><li><p><strong>structural analysis</strong>: the logical structure of the taxonomy, gaps in hierarchies, missing relationships</p></li><li><p><strong>product-driven analysis</strong>: what real product data says merchants actually sell and how they describe it</p></li></ul><p>Each angle catches different issues. Shopify&#8217;s breakthrough was combining them into a system that can continuously propose improvements then filter them through quality checks before human review.</p><div><hr></div><h4>Inside the system: How the agents work</h4><p>The new architecture rests on three principles:</p><ul><li><p>specialized analysis</p></li><li><p>intelligent coordination</p></li><li><p>quality assurance</p></li></ul><p>And the intent is clear: continuous evolution, not one-time taxonomy construction.</p><p><strong>What&#8217;s different: continuous evolution, not one-time creation</strong></p><p>AI&#8217;s been used for product categorisation and one-off taxonomy builds for a while. The difference here is instead of building it once and hoping it holds, Shopify uses specialised AI agents to keep the taxonomy evolving continuously. There are 3 core components to this approach:</p><p><strong>1- Real product grounding: </strong>The system integrates actual merchant product data so proposals reflect how merchants describe and categorize products. This keeps decisions grounded in commerce reality rather than only theory.</p><p>In other words: if merchants are consistently describing a differentiator, it probably belongs in the taxonomy, even if it offends someone&#8217;s idea of a &#8220;pure&#8221; category tree.</p><p><strong>2- Multi-agent specialization: </strong>Multiple specialized agents run different analyses. One focuses on structural consistency. Another focuses on product-driven insights. Then those outputs are synthesized. The claim here is that the combination finds improvements that neither agent would find alone.</p><p>That makes sense structurally. Taxonomy is both a graph problem and a language problem.</p><p><strong>3- Sophisticated equivalence discovery: </strong>This is the most interesting component. detecting equivalence relationships where a specific category equals a broader category filtered by attribute values.</p><p>This matters because merchants should be able to organize their catalogs however they want, while the platform still understands what products &#8216;mean&#8217; underneath the merchant&#8217;s choices.</p><p>So instead of forcing everyone into one rigid structure, Shopify tries to learn mappings that preserve flexibility and still support search, recommendations, and analytics.</p><p><strong>Architecture flow</strong></p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!jtG3!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F36e057eb-1a81-48b4-be6f-6fc481b3a4c1_470x840.webp" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!jtG3!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F36e057eb-1a81-48b4-be6f-6fc481b3a4c1_470x840.webp 424w, https://substackcdn.com/image/fetch/$s_!jtG3!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F36e057eb-1a81-48b4-be6f-6fc481b3a4c1_470x840.webp 848w, https://substackcdn.com/image/fetch/$s_!jtG3!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F36e057eb-1a81-48b4-be6f-6fc481b3a4c1_470x840.webp 1272w, https://substackcdn.com/image/fetch/$s_!jtG3!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F36e057eb-1a81-48b4-be6f-6fc481b3a4c1_470x840.webp 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!jtG3!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F36e057eb-1a81-48b4-be6f-6fc481b3a4c1_470x840.webp" width="470" height="840" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/36e057eb-1a81-48b4-be6f-6fc481b3a4c1_470x840.webp&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:840,&quot;width&quot;:470,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:23704,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/webp&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.datatinkerer.io/i/188769392?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F36e057eb-1a81-48b4-be6f-6fc481b3a4c1_470x840.webp&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!jtG3!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F36e057eb-1a81-48b4-be6f-6fc481b3a4c1_470x840.webp 424w, https://substackcdn.com/image/fetch/$s_!jtG3!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F36e057eb-1a81-48b4-be6f-6fc481b3a4c1_470x840.webp 848w, https://substackcdn.com/image/fetch/$s_!jtG3!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F36e057eb-1a81-48b4-be6f-6fc481b3a4c1_470x840.webp 1272w, https://substackcdn.com/image/fetch/$s_!jtG3!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F36e057eb-1a81-48b4-be6f-6fc481b3a4c1_470x840.webp 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">AI Agent architecture flow (Source: Shopify)</figcaption></figure></div><p>The AI agent workflow works like this:</p><ul><li><p>enable agents to explore the taxonomy</p></li><li><p>run multi-stage analysis (structural + product-driven)</p></li><li><p>synthesize and resolve conflicts</p></li><li><p>detect equivalences</p></li><li><p>run automated QA using judges</p></li><li><p>send refined proposals to humans</p></li><li><p>update the taxonomy in production</p></li></ul><div><hr></div><h4>Enabling agent-taxonomy interaction</h4><p>Before agents can improve anything, they need to &#8216;read&#8217; the taxonomy like a human would.</p><p>Shopify implemented a system that allows agents to:</p><ul><li><p>search for related categories</p></li><li><p>examine hierarchical relationships</p></li><li><p>verify whether proposed changes conflict with existing elements</p></li></ul><p>A good example: an agent analyzing guitar-related categories can explore the full musical instruments hierarchy, inspect related attributes across instruments and look for patterns that suggest better structure.</p><p>In other words, the agent doesn&#8217;t just look at one node. It roams the neighborhood.</p><div><hr></div><h4>The pipeline: specialised agents, staged decisions</h4><p>For the AI Agent to be work properly, different specialised agents are at work to provide specific insights:</p><p><strong>Structural analysis: </strong>This agent looks at the taxonomy itself for logical consistency, completeness, gaps in category hierarchies, naming convention inconsistencies and opportunities to reorganize related concepts.</p><p>It operates purely on the taxonomy structure and aims to keep the whole thing coherent.</p><p><strong>Product-driven analysis: </strong>This agent integrates real merchant data and examines how products are described and categorized on the platform.</p><p>Specifically, it looks at patterns in product titles, product descriptions and merchant-defined categories. The goal is to find gaps between how merchants think about products and how the taxonomy represents them.</p><p>This is an important distinction. A taxonomy can be structurally perfect and still be useless if it doesn&#8217;t match merchant reality.</p><p><strong>Intelligent synthesis: </strong>Now we have two streams of recommendations:</p><ul><li><p>structure-driven improvements</p></li><li><p>product-driven improvements</p></li></ul><p>They can conflict. They can overlap. They can propose redundant changes.</p><p>The synthesis step merges insights, resolves conflicts, and eliminates redundancies. And sometimes the best answer is not pick one, it&#8217;s combine both.</p><p><strong>Equivalence detection: </strong>This agent solves a practical commerce problem: merchants want flexibility but platform systems need consistency.</p><p>Consider golf shoes:</p><ul><li><p>Merchant A uses a specific &#8216;Golf Shoes&#8217; category</p></li><li><p>Merchant B uses &#8216;Athletic Shoes&#8217; with an &#8216;Activity Type = Golf attribute</p></li></ul><p>Both are valid for the merchant. But search, recommendations and analytics benefit from understanding these represent the same product set.</p><p>So the system detects attribute-based equivalences of the form:</p><blockquote><p>specific category = broader category + one or more attribute filters</p></blockquote><p>This lets merchants organize however makes sense for their business while keeping platform intelligence consistent across different catalog structures.</p><p>If you&#8217;ve ever tried to do cross-merchant analytics at scale, you can probably feel why Shopify cared enough to build an entire agent for this.</p><div><hr></div><h4>Automated QA: judges before humans</h4><p>After proposals are generated, Shopify adds automated QA through specialized AI judges.</p><p>These judges evaluate proposed changes using reasoning capabilities and taxonomy design principles to filter and refine suggestions before human review.</p><p>The important detail is that evaluation differs by change type:</p><ul><li><p>adding new attributes</p></li><li><p>creating category hierarchies</p></li><li><p>modifying existing structures</p></li></ul><p>Different changes require different criteria, so one generic &#8216;judge prompt&#8217; would be weak. So instead, they use <strong>domain-specific judges</strong>.</p><p>An electronics-focused judge applies electronics expertise. A musical instruments judge applies that domain&#8217;s patterns and rules. The goal is consistent domain-aware evaluation across verticals.</p><div><hr></div><h3>Results</h3><p>The system can analyze taxonomy branches in parallel, identifying improvement opportunities that used to take weeks of manual work.</p><p>Where experts might analyze a few categories per day, the system can evaluate hundreds of categories, checking both:</p><ul><li><p>structural consistency</p></li><li><p>alignment with real product data</p></li></ul><p>This matters most for emerging product categories. When new product types become popular on the platform, the system can quickly identify taxonomy gaps and propose comprehensive solutions, instead of reactive patches that build up debt.</p><p><strong>Quality improvements</strong></p><p>The multi-agent design improves consistency and comprehensiveness because it combines two lenses:</p><ul><li><p>structural analysis keeps hierarchy organization logical and consistent</p></li><li><p>product-driven analysis keeps categories and attributes aligned with merchant reality</p></li></ul><p>The automated QA layer reduces iteration cycles by catching issues before human review and applying domain expertise consistently.</p><p><strong>Example: mobile phone accessories and MagSafe compatibility</strong></p><p>Product analysis identified that merchants frequently advertise &#8220;MagSafe support&#8221; for accessories such as chargers, cases and wallets.</p><p>So the agent proposed adding a boolean attribute: &#8216;MagSafe compatible.&#8217;</p><p>A specialized electronics judge evaluated the proposal and checked:</p><ul><li><p>no duplicate attribute already exists</p></li><li><p>boolean type is appropriate</p></li><li><p>while brand-specific, MagSafe is treated as a legitimate technical standard similar to Bluetooth or Qi</p></li></ul><p>The judge approved the attribute with <strong>93% confidence</strong>, noting it would improve customer filtering for MagSafe-ready products.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!M4Uu!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8e5da6d9-d490-4773-a9a5-77b2d8b2166d_2048x1460.webp" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!M4Uu!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8e5da6d9-d490-4773-a9a5-77b2d8b2166d_2048x1460.webp 424w, https://substackcdn.com/image/fetch/$s_!M4Uu!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8e5da6d9-d490-4773-a9a5-77b2d8b2166d_2048x1460.webp 848w, https://substackcdn.com/image/fetch/$s_!M4Uu!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8e5da6d9-d490-4773-a9a5-77b2d8b2166d_2048x1460.webp 1272w, https://substackcdn.com/image/fetch/$s_!M4Uu!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8e5da6d9-d490-4773-a9a5-77b2d8b2166d_2048x1460.webp 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!M4Uu!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8e5da6d9-d490-4773-a9a5-77b2d8b2166d_2048x1460.webp" width="1456" height="1038" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/8e5da6d9-d490-4773-a9a5-77b2d8b2166d_2048x1460.webp&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1038,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:215182,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/webp&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.datatinkerer.io/i/188769392?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8e5da6d9-d490-4773-a9a5-77b2d8b2166d_2048x1460.webp&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!M4Uu!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8e5da6d9-d490-4773-a9a5-77b2d8b2166d_2048x1460.webp 424w, https://substackcdn.com/image/fetch/$s_!M4Uu!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8e5da6d9-d490-4773-a9a5-77b2d8b2166d_2048x1460.webp 848w, https://substackcdn.com/image/fetch/$s_!M4Uu!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8e5da6d9-d490-4773-a9a5-77b2d8b2166d_2048x1460.webp 1272w, https://substackcdn.com/image/fetch/$s_!M4Uu!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8e5da6d9-d490-4773-a9a5-77b2d8b2166d_2048x1460.webp 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">MagSafe example (Source: Shopify)</figcaption></figure></div><p>This example matters because it demonstrates the full loop:</p><ul><li><p>merchant reality creates a signal</p></li><li><p>the agent proposes a structured change</p></li><li><p>a domain judge validates it with rule checks and domain framing</p></li><li><p>humans get a higher quality proposal to review</p></li></ul><p><strong>Scaling development: from reactive fixes to proactive evolution</strong></p><p>The biggest shift is strategic: taxonomy development becomes proactive, not reactive.</p><p>Instead of waiting for a merchant pain point or a platform limitation to trigger a change, the system can identify and address gaps earlier.</p><p>The system can also reason over the entire taxonomy structure, which supports cross-category consistency. That helps avoid the fragmentation you get when teams fix issues in isolation.</p><p>To validate the approach, they applied it to a specific area: <strong>Electronics &gt; Communications &gt; Telephony</strong> (called &#8220;Telephony AI&#8221; in their analysis) and compared it against their previous manual expansion method.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!3I-O!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8ae93987-6ddd-4093-891c-44bed1b0a9ff_1558x1164.webp" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!3I-O!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8ae93987-6ddd-4093-891c-44bed1b0a9ff_1558x1164.webp 424w, https://substackcdn.com/image/fetch/$s_!3I-O!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8ae93987-6ddd-4093-891c-44bed1b0a9ff_1558x1164.webp 848w, https://substackcdn.com/image/fetch/$s_!3I-O!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8ae93987-6ddd-4093-891c-44bed1b0a9ff_1558x1164.webp 1272w, https://substackcdn.com/image/fetch/$s_!3I-O!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8ae93987-6ddd-4093-891c-44bed1b0a9ff_1558x1164.webp 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!3I-O!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8ae93987-6ddd-4093-891c-44bed1b0a9ff_1558x1164.webp" width="1456" height="1088" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/8ae93987-6ddd-4093-891c-44bed1b0a9ff_1558x1164.webp&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1088,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:103104,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/webp&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.datatinkerer.io/i/188769392?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8ae93987-6ddd-4093-891c-44bed1b0a9ff_1558x1164.webp&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!3I-O!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8ae93987-6ddd-4093-891c-44bed1b0a9ff_1558x1164.webp 424w, https://substackcdn.com/image/fetch/$s_!3I-O!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8ae93987-6ddd-4093-891c-44bed1b0a9ff_1558x1164.webp 848w, https://substackcdn.com/image/fetch/$s_!3I-O!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8ae93987-6ddd-4093-891c-44bed1b0a9ff_1558x1164.webp 1272w, https://substackcdn.com/image/fetch/$s_!3I-O!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8ae93987-6ddd-4093-891c-44bed1b0a9ff_1558x1164.webp 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">AI Agent impact (Source: Shopify)</figcaption></figure></div><p>As you can see from the chart, the AI-assisted method can compress years of work into weeks for the taxonomy area if the agents are applied across all verticals.</p><div><hr></div><h3>The full scoop</h3><p>To learn more about this, check <a href="https://shopify.engineering/product-taxonomy-at-scale">Shopify's Engineering Blog</a> post on this topic</p><div><hr></div><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://www.datatinkerer.io/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">If you liked this post and don&#8217;t want to miss the next one, subscribe to Data Tinkerer!</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div><p>If you are already subscribed and enjoyed the article, please give it a like and/or share it others, really appreciate it &#128591;</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.datatinkerer.io/p/how-shopify-scales-taxonomy-evolution-across-10000-categories-with-ai-agents?utm_source=substack&utm_medium=email&utm_content=share&action=share&quot;,&quot;text&quot;:&quot;Share&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.datatinkerer.io/p/how-shopify-scales-taxonomy-evolution-across-10000-categories-with-ai-agents?utm_source=substack&utm_medium=email&utm_content=share&action=share"><span>Share</span></a></p><div><hr></div><h3>Keep learning</h3><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;e78745a9-87b3-4842-91b1-c47c28b3e197&quot;,&quot;caption&quot;:&quot;Production ML isn&#8217;t only about clever architectures. It&#8217;s about judgment, trade-offs and systems that hold up when data is messy.<br /><br />I sat down with Ahsaas Bajaj , Senior ML Engineer at Instacart, to talk about how they handle product substitutions at scale, what actually moves business metrics and what changes when you move into a senior ML role.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;lg&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;How to Build a Recommendation System at Scale: Insights from Instacart&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:291590442,&quot;name&quot;:&quot;Data Tinkerer&quot;,&quot;bio&quot;:&quot;Head of analytics sharing deep dives and learnings about AI and all things data (science, engineering, analysis)&quot;,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/83d3bbe0-5fb8-4f8d-9b74-036abbd6fec9_500x500.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null},{&quot;id&quot;:175610076,&quot;name&quot;:&quot;Ahsaas Bajaj&quot;,&quot;bio&quot;:&quot;Senior Machine Learning Engineer II at Instacart&quot;,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/34dac958-9c70-4f48-89ed-6c2e0d6f197e_899x901.png&quot;,&quot;is_guest&quot;:true,&quot;bestseller_tier&quot;:null,&quot;primaryPublicationSubscribeUrl&quot;:&quot;https://bajajahsaas.substack.com/subscribe?&quot;,&quot;primaryPublicationUrl&quot;:&quot;https://bajajahsaas.substack.com&quot;,&quot;primaryPublicationName&quot;:&quot;Ahsaas Bajaj&quot;,&quot;primaryPublicationId&quot;:7296320}],&quot;post_date&quot;:&quot;2026-01-29T03:30:24.563Z&quot;,&quot;cover_image&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/3e6c5924-ed6c-4998-8e4a-8f88d9102c8b_844x473.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.datatinkerer.io/p/how-to-build-a-recommendation-system-at-scale&quot;,&quot;section_name&quot;:&quot;Data Science&quot;,&quot;video_upload_id&quot;:null,&quot;id&quot;:181648418,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:12,&quot;comment_count&quot;:2,&quot;publication_id&quot;:3422740,&quot;publication_name&quot;:&quot;Data Tinkerer&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!JEdj!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6bea5ccd-f356-4154-a1db-16268300510e_500x500.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;85d1fa84-549b-4cc1-b9b2-ea55a5e0b6fb&quot;,&quot;caption&quot;:&quot;DoorDash built an anomaly detection platform to catch fraud trends before they result into huge top-line losses.<br /><br />This piece breaks down how they scan hundreds of millions of overlapping segments each day, cut fraud detection time from 100+ days to under three and save tens of millions annually by finding small signals while they still look like noise.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;lg&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;How DoorDash Saves Tens of Millions of Dollars Per Year by Detecting Fraud 30&#215; Faster&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:291590442,&quot;name&quot;:&quot;Data Tinkerer&quot;,&quot;bio&quot;:&quot;Head of analytics sharing deep dives and learnings about AI and all things data (science, engineering, analysis)&quot;,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/83d3bbe0-5fb8-4f8d-9b74-036abbd6fec9_500x500.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2026-01-23T05:56:24.141Z&quot;,&quot;cover_image&quot;:&quot;https://images.unsplash.com/photo-1648091855444-76f97897dcd4?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxkb29yZGFzaHxlbnwwfHx8fDE3NjkxNDc0MjB8MA&amp;ixlib=rb-4.1.0&amp;q=80&amp;w=1080&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.datatinkerer.io/p/how-doordash-saves-tens-of-millions-a-year-by-detecting-fraud&quot;,&quot;section_name&quot;:&quot;Data Science&quot;,&quot;video_upload_id&quot;:null,&quot;id&quot;:185495640,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:15,&quot;comment_count&quot;:0,&quot;publication_id&quot;:3422740,&quot;publication_name&quot;:&quot;Data Tinkerer&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!JEdj!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6bea5ccd-f356-4154-a1db-16268300510e_500x500.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div>]]></content:encoded></item><item><title><![CDATA[How to Build a Recommendation System at Scale: Insights from Instacart]]></title><description><![CDATA[A Senior ML Engineer on production constraints, rules vs ML and the workflow behind large-scale recommender systems]]></description><link>https://www.datatinkerer.io/p/how-to-build-a-recommendation-system-at-scale</link><guid isPermaLink="false">https://www.datatinkerer.io/p/how-to-build-a-recommendation-system-at-scale</guid><dc:creator><![CDATA[Data Tinkerer]]></dc:creator><pubDate>Thu, 29 Jan 2026 03:30:24 GMT</pubDate><enclosure url="https://substack-post-media.s3.amazonaws.com/public/images/3e6c5924-ed6c-4998-8e4a-8f88d9102c8b_844x473.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>Fellow Data Tinkerers,</p><p>Following on from previous posts talking to people in the field, today I will be talking with Ahsaas Bajaj who is a Senior Machine Learning Engineer at Instacart. He works on large-scale recommendation systems that serves millions of customers.</p><p>We talked about his rise from software engineering to machine learning at Instacart, how does he decide between rules based vs ML approaches and how he approaches the work now as a more senior stakeholder.</p><p>So without further ado, let&#8217;s get into it!</p><div><hr></div><h4><strong>Can you tell us a bit about your role?</strong></h4><p>I&#8217;m a Senior ML Engineer at Instacart, working across customer and shopper experiences on large-scale recommendation systems that make millions of decisions each day. For the past three years, I&#8217;ve led the technical strategy for the Product Substitutions ML system, focused on solving the out-of-stock problem. </p><p>The goal is simple: when an item isn&#8217;t available, suggest a replacement that preserves customer intent and keeps the order intact. My role spans system design, modeling and evaluation, balancing customer satisfaction, shopper efficiency and business impact at scale.</p><div><hr></div><h4><strong>How did you get into machine learning?</strong></h4><p>My path into ML wasn&#8217;t a straight line. I started as a software engineer at Samsung Research on the on-device search team, which pushed me deep into information retrieval and search system design. That work sparked an interest in research and led me to pursue a graduate degree in computer science. </p><p>It shaped how I approach ML today: less focus on models in isolation, more on how systems behave in production. I wanted that work to have real user impact, which took me to Walmart Labs and eventually to Instacart.</p><div class="pullquote"><p><em><strong>Ahsaas&#8217;s path</strong></em></p><p><em><strong>software engineer &#8594; data scientist &#8594; ML engineer &#8594; senior ML engineer</strong></em></p></div><h4><strong>What does a &#8216;typical&#8217; week look like for you?</strong></h4><p>As I&#8217;ve moved into a more senior role, the balance has shifted from pure coding to a mix of execution and direction. My week usually breaks down into three buckets:</p><p><strong>Alignment (30%)</strong>: The glue work. I spend time with product, backend engineering, and leadership aligning on roadmaps. The focus isn&#8217;t just <em>what</em> we&#8217;re building, but <em>why</em>, making sure ML work ties directly to business goals.</p><p><strong>Deep work (30%)</strong>: Hands-on modeling, coding and system design. Staying close to the code is non-negotiable for me, even at a senior level.</p><p><strong>Analysis and &#8220;the why&#8221; (40%)</strong>: This is where I spend the most time. I dig into model errors, read raw customer complaints about failed substitutions and sanity-check improvement ideas. This is also where I write proposal docs. In my view, the highest-leverage work a senior MLE does is deciding what problems to solve next, not just executing on what&#8217;s assigned.</p><div><hr></div><h4><strong>How do you decide when a problem actually needs ML or if rules-based is good enough?</strong></h4><p>I think about it in terms of complexity versus value.</p><p>If a problem can be solved deterministically with clear rules and those rules are stable and understandable, that&#8217;s often the right solution. Machine learning becomes useful when the space of behaviors is too large, nuanced, or context-dependent for rules to scale.</p><p>Good data is also a prerequisite. Without reliable signals and feedback loops, even the most sophisticated model won&#8217;t perform well in production.</p><div><hr></div><h4><strong>You have written about your work on a recommendation model at Instacart. Can you share a summary of what you have done?</strong></h4><p>I&#8217;ve spent the past three years leading the technical development of Instacart&#8217;s <a href="https://tech.instacart.com/how-instacart-uses-machine-learning-to-suggest-replacements-for-out-of-stock-products-8f80d03bb5af">Product Substitutions system</a>, which handles millions of replacement decisions daily. The core challenge is deceptively simple: when a customer&#8217;s requested item is out of stock, what should we suggest instead?</p><p>What makes this interesting from an ML perspective is that it&#8217;s fundamentally a relevance problem, not a search problem. We&#8217;re not just matching product attributes&#8212;we&#8217;re trying to understand what the customer actually wanted and find alternatives that preserve that intent. This required rethinking how we model the relationship between items, how we define &#8220;good&#8221; substitutions, and how we evaluate success in a way that maps to real customer satisfaction.</p><p>The system has evolved significantly over time, moving from simpler heuristics to more sophisticated learned representations. But the north star has always been the same: keep orders complete while respecting what customers actually care about.</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!YXvE!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06d474a9-46da-42ae-be81-a0ca692fb52f_720x187.webp" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!YXvE!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06d474a9-46da-42ae-be81-a0ca692fb52f_720x187.webp 424w, https://substackcdn.com/image/fetch/$s_!YXvE!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06d474a9-46da-42ae-be81-a0ca692fb52f_720x187.webp 848w, https://substackcdn.com/image/fetch/$s_!YXvE!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06d474a9-46da-42ae-be81-a0ca692fb52f_720x187.webp 1272w, https://substackcdn.com/image/fetch/$s_!YXvE!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06d474a9-46da-42ae-be81-a0ca692fb52f_720x187.webp 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!YXvE!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06d474a9-46da-42ae-be81-a0ca692fb52f_720x187.webp" width="720" height="187" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/06d474a9-46da-42ae-be81-a0ca692fb52f_720x187.webp&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:187,&quot;width&quot;:720,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:8232,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/webp&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.datatinkerer.io/i/181648418?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06d474a9-46da-42ae-be81-a0ca692fb52f_720x187.webp&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!YXvE!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06d474a9-46da-42ae-be81-a0ca692fb52f_720x187.webp 424w, https://substackcdn.com/image/fetch/$s_!YXvE!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06d474a9-46da-42ae-be81-a0ca692fb52f_720x187.webp 848w, https://substackcdn.com/image/fetch/$s_!YXvE!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06d474a9-46da-42ae-be81-a0ca692fb52f_720x187.webp 1272w, https://substackcdn.com/image/fetch/$s_!YXvE!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06d474a9-46da-42ae-be81-a0ca692fb52f_720x187.webp 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a><figcaption class="image-caption">Siamese network (Source: <a href="https://tech.instacart.com/how-instacart-uses-machine-learning-to-suggest-replacements-for-out-of-stock-products-8f80d03bb5af">Instacart</a>)</figcaption></figure></div><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!uWDY!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbe97d103-ab8d-46c0-a6c1-dc95484a86c1_720x447.webp" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!uWDY!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbe97d103-ab8d-46c0-a6c1-dc95484a86c1_720x447.webp 424w, https://substackcdn.com/image/fetch/$s_!uWDY!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbe97d103-ab8d-46c0-a6c1-dc95484a86c1_720x447.webp 848w, https://substackcdn.com/image/fetch/$s_!uWDY!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbe97d103-ab8d-46c0-a6c1-dc95484a86c1_720x447.webp 1272w, https://substackcdn.com/image/fetch/$s_!uWDY!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbe97d103-ab8d-46c0-a6c1-dc95484a86c1_720x447.webp 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!uWDY!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbe97d103-ab8d-46c0-a6c1-dc95484a86c1_720x447.webp" width="720" height="447" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/be97d103-ab8d-46c0-a6c1-dc95484a86c1_720x447.webp&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:447,&quot;width&quot;:720,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:15228,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/webp&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.datatinkerer.io/i/181648418?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbe97d103-ab8d-46c0-a6c1-dc95484a86c1_720x447.webp&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!uWDY!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbe97d103-ab8d-46c0-a6c1-dc95484a86c1_720x447.webp 424w, https://substackcdn.com/image/fetch/$s_!uWDY!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbe97d103-ab8d-46c0-a6c1-dc95484a86c1_720x447.webp 848w, https://substackcdn.com/image/fetch/$s_!uWDY!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbe97d103-ab8d-46c0-a6c1-dc95484a86c1_720x447.webp 1272w, https://substackcdn.com/image/fetch/$s_!uWDY!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbe97d103-ab8d-46c0-a6c1-dc95484a86c1_720x447.webp 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Product layer: one each for original and candidate product (Source: <a href="https://tech.instacart.com/how-instacart-uses-machine-learning-to-suggest-replacements-for-out-of-stock-products-8f80d03bb5af">Instacart</a>)</figcaption></figure></div><div><hr></div><h4><strong>And what has been the impact on the business?</strong></h4><p>Substitutions sit at a critical junction in the order lifecycle. When done well, they&#8217;re invisible - customers get what they need and the order stays intact. When done poorly, they create friction everywhere: customers reject items or request refunds, shoppers waste time on unsuccessful suggestions, and order values drop.</p><p>Our work has meaningfully moved the needle on the metrics that matter: replacement acceptance rates, refund frequency, and what we call &#8220;perfect order fill rate&#8221;&#8212;the percentage of orders where every item was either found or successfully replaced. These improvements compound across millions of weekly orders.</p><p>Beyond the immediate transactional metrics, we&#8217;ve also seen positive signals in repeat ordering behavior and customer satisfaction scores, particularly for orders that required multiple substitutions. Instacart has <a href="https://investors.instacart.com/static-files/27fac1c6-da32-40ca-8ef4-c8261b5ee12b">referenced</a> this system publicly when discussing operational improvements at scale.</p><p>For me, the real validation is when customers don&#8217;t notice the algorithm at all - they just notice their groceries arrived complete.</p><div><hr></div><h4><strong>What does the tech stack look like for ML at Instacart?</strong></h4><p>Instacart&#8217;s ML stack is built around an internal platform called <a href="https://www.instacart.com/company/tech-innovation/griffin-how-instacarts-ml-platform-tripled-ml-applications-in-a-year">Griffin</a>, which standardizes the end-to-end ML lifecycle, from feature engineering and training to deployment and real-time inference. A core piece of this is a shared Feature Marketplace, where teams define, version and reuse batch and streaming features with strong offline-to-online consistency.</p><p>Workflows are orchestrated with Apache Airflow and model training runs through a unified abstraction that supports multiple compute backends and common ML frameworks. With <a href="https://tech.instacart.com/introducing-griffin-2-0-instacarts-next-gen-ml-platform-b7331e73b8d7">Griffin 2.0</a>, the platform moved to a Kubernetes-based setup and added distributed training with Ray, which significantly improved scalability and iteration speed.</p><p>Griffin also includes a centralized model registry and metadata store, making experiments easier to track and reproduce. In production, models are deployed as standardized services that handle feature loading and low-latency inference across both customer and shopper experiences.</p><p>The main benefit is focus: teams spend less time on infrastructure and more time on modeling, evaluation and trade-offs.</p><div><hr></div><h4><strong>How do you use AI in your day-to-day work and where do you find it genuinely valuable?</strong></h4><p>I&#8217;ve integrated GenAI primarily to shift my focus from execution to decision-making. It&#8217;s useful for routine tasks like scaffolding data pipelines or optimizing SQL queries, but I find the highest leverage comes from <strong>qualitative analysis</strong>.</p><p>I routinely feed thousands of customer comments and shopper notes about bad substitutions into LLM-driven pipelines that cluster feedback into coherent themes. What used to be unstructured noise becomes a prioritized list of failure modes. This allows me to spend less time parsing data and more time solving the specific problems that actually impact customer trust.</p><div><hr></div><h4><strong>How has your perspective changed moving to a more senior role? </strong></h4><p>The biggest shift is realizing that <strong>Judgment &gt; Code</strong>. Early in my career, I obsessed over the <em>how</em> - the architecture, the libraries, the latency. Now, I obsess over the <em>what </em>and the<em> why.</em> The real work is filtering ideas. In a sea of seemingly good ideas, my job is to find the <em>most bullish</em> one - the one with the highest ROI - and kill the others.</p><p>I&#8217;ve also learned that <strong>Writing is Engineering.</strong> You cannot build big things alone. To get buy-in from leadership and cross-functional teams, you must be able to write crisp, narrative-driven proposals that explain <em>why</em> this mathematical solution solves a human problem.</p><div class="pullquote"><p><strong>The biggest shift is realizing that Judgment &gt; Code</strong></p></div><h4><strong>What&#8217;s one thing you wish you&#8217;d known earlier about machine learning?</strong></h4><p>The value of <strong>error analysis</strong>. It&#8217;s easy to celebrate aggregate metrics like accuracy or F1 but the real breakthroughs come from studying the &#8220;horror cases,&#8221; where the model is confidently wrong. Those examples are uncomfortable to look at but they&#8217;re where the most useful ideas come from. You can&#8217;t fix what you don&#8217;t deeply understand.</p><div><hr></div><p>If you enjoyed reading this, check out Ahsaas&#8217;s <a href="https://tech.instacart.com/how-instacart-uses-machine-learning-to-suggest-replacements-for-out-of-stock-products-8f80d03bb5af">original article</a> about his work at Instacart</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://tech.instacart.com/how-instacart-uses-machine-learning-to-suggest-replacements-for-out-of-stock-products-8f80d03bb5af" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!QfuD!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F276ea8ad-5379-4559-8bae-2cb8d384a294_692x394.png 424w, https://substackcdn.com/image/fetch/$s_!QfuD!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F276ea8ad-5379-4559-8bae-2cb8d384a294_692x394.png 848w, https://substackcdn.com/image/fetch/$s_!QfuD!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F276ea8ad-5379-4559-8bae-2cb8d384a294_692x394.png 1272w, https://substackcdn.com/image/fetch/$s_!QfuD!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F276ea8ad-5379-4559-8bae-2cb8d384a294_692x394.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!QfuD!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F276ea8ad-5379-4559-8bae-2cb8d384a294_692x394.png" width="692" height="394" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/276ea8ad-5379-4559-8bae-2cb8d384a294_692x394.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:394,&quot;width&quot;:692,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:197952,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:&quot;https://tech.instacart.com/how-instacart-uses-machine-learning-to-suggest-replacements-for-out-of-stock-products-8f80d03bb5af&quot;,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.datatinkerer.io/i/181648418?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F276ea8ad-5379-4559-8bae-2cb8d384a294_692x394.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!QfuD!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F276ea8ad-5379-4559-8bae-2cb8d384a294_692x394.png 424w, https://substackcdn.com/image/fetch/$s_!QfuD!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F276ea8ad-5379-4559-8bae-2cb8d384a294_692x394.png 848w, https://substackcdn.com/image/fetch/$s_!QfuD!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F276ea8ad-5379-4559-8bae-2cb8d384a294_692x394.png 1272w, https://substackcdn.com/image/fetch/$s_!QfuD!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F276ea8ad-5379-4559-8bae-2cb8d384a294_692x394.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><p>Was there a question that you would like to ask?</p><p><strong>Let me know your thoughts by replying to the email or leaving a comment below!</strong></p><p>If you are already subscribed and enjoyed the article, please give it a like and/or share it others, really appreciate it &#128591;</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.datatinkerer.io/p/from-dental-cleaning-to-data-cleaning-pivoting-to-healthcare-analytics?utm_source=substack&amp;utm_medium=email&amp;utm_content=share&amp;action=share&amp;token=eyJ1c2VyX2lkIjoyOTE1OTA0NDIsInBvc3RfaWQiOjE3NjQ3MzIyMywiaWF0IjoxNzY4ODkyNzM0LCJleHAiOjE3NzE0ODQ3MzQsImlzcyI6InB1Yi0zNDIyNzQwIiwic3ViIjoicG9zdC1yZWFjdGlvbiJ9.vxPR9Jc4G7L4Yjw3wvlaaj8dKYSscG1A_D7Wiblqr1o&quot;,&quot;text&quot;:&quot;Share&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.datatinkerer.io/p/from-dental-cleaning-to-data-cleaning-pivoting-to-healthcare-analytics?utm_source=substack&amp;utm_medium=email&amp;utm_content=share&amp;action=share&amp;token=eyJ1c2VyX2lkIjoyOTE1OTA0NDIsInBvc3RfaWQiOjE3NjQ3MzIyMywiaWF0IjoxNzY4ODkyNzM0LCJleHAiOjE3NzE0ODQ3MzQsImlzcyI6InB1Yi0zNDIyNzQwIiwic3ViIjoicG9zdC1yZWFjdGlvbiJ9.vxPR9Jc4G7L4Yjw3wvlaaj8dKYSscG1A_D7Wiblqr1o"><span>Share</span></a></p><div><hr></div><h3><strong>Keep reading</strong></h3><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;2d3312e0-ea8d-4705-959d-5748abc99f31&quot;,&quot;caption&quot;:&quot;Today I will be talking with Jose Parre&#241;o Garcia who is a Senior Data Science Manager at Skyscanner and writer of the Senior Data Science Lead newsletter.<br /><br />We talked about his rise from data analyst to Senior DS Manager at Skyscanner, what &#8220;production-ready&#8221; really means and why the real intelligence in data science lives before and after the model.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;lg&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;From Data Analyst to Senior DS Manager at Skyscanner&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:291590442,&quot;name&quot;:&quot;Data Tinkerer&quot;,&quot;bio&quot;:&quot;Ex-head of analytics sharing deep dives and learnings about AI and all things data (science, engineering, analysis)&quot;,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/83d3bbe0-5fb8-4f8d-9b74-036abbd6fec9_500x500.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null},{&quot;id&quot;:255728031,&quot;name&quot;:&quot;Jose Parre&#241;o Garcia&quot;,&quot;bio&quot;:&quot;I write about Data Science, Machine Learning and leading data teams. I have built teams from scratch and lead 50+ data scientists @Skyscanner. Now, I share my experience with you.&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!h_mv!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0c4dad41-478b-4960-a5e0-98ed1e54657e_1168x1046.jpeg&quot;,&quot;is_guest&quot;:true,&quot;bestseller_tier&quot;:null,&quot;primaryPublicationSubscribeUrl&quot;:&quot;https://joseparreogarcia.substack.com/subscribe?&quot;,&quot;primaryPublicationUrl&quot;:&quot;https://joseparreogarcia.substack.com&quot;,&quot;primaryPublicationName&quot;:&quot;Senior Data Science Lead&quot;,&quot;primaryPublicationId&quot;:2833541}],&quot;post_date&quot;:&quot;2025-11-13T03:54:26.969Z&quot;,&quot;cover_image&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/06735d58-e8f2-4106-88ae-efe0658c217c_764x661.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.datatinkerer.io/p/from-data-analyst-to-senior-ds-manager-at-skyscanner&quot;,&quot;section_name&quot;:&quot;Data Science&quot;,&quot;video_upload_id&quot;:null,&quot;id&quot;:176541975,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:13,&quot;comment_count&quot;:2,&quot;publication_id&quot;:3422740,&quot;publication_name&quot;:&quot;Data Tinkerer&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!JEdj!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6bea5ccd-f356-4154-a1db-16268300510e_500x500.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;c93130ad-2195-48c8-b16d-9ee951675f0b&quot;,&quot;caption&quot;:&quot;Check out the breakdown of Ahsaas's original article which we published last year!&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;lg&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;The Art of Substitution: Instacart&#8217;s ML Model for Better Shopping Choices&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:291590442,&quot;name&quot;:&quot;Data Tinkerer&quot;,&quot;bio&quot;:&quot;Ex-head of analytics sharing deep dives and learnings about AI and all things data (science, engineering, analysis)&quot;,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/83d3bbe0-5fb8-4f8d-9b74-036abbd6fec9_500x500.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2025-01-12T23:01:15.656Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!9h_o!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F00a24c5b-b7a6-4d5c-9bc2-0e7b691d7d75_4800x2700.webp&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.datatinkerer.io/p/the-art-of-substitution-instacarts-ml-model&quot;,&quot;section_name&quot;:&quot;Data Science&quot;,&quot;video_upload_id&quot;:null,&quot;id&quot;:154057578,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:2,&quot;comment_count&quot;:0,&quot;publication_id&quot;:3422740,&quot;publication_name&quot;:&quot;Data Tinkerer&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!JEdj!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6bea5ccd-f356-4154-a1db-16268300510e_500x500.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div>]]></content:encoded></item><item><title><![CDATA[How DoorDash Saves Tens of Millions of Dollars Per Year by Detecting Fraud 30× Faster]]></title><description><![CDATA[A daily anomaly detection system that cut discovery time from 100+ days to under three.]]></description><link>https://www.datatinkerer.io/p/how-doordash-saves-tens-of-millions-a-year-by-detecting-fraud</link><guid isPermaLink="false">https://www.datatinkerer.io/p/how-doordash-saves-tens-of-millions-a-year-by-detecting-fraud</guid><dc:creator><![CDATA[Data Tinkerer]]></dc:creator><pubDate>Fri, 23 Jan 2026 05:56:24 GMT</pubDate><enclosure url="https://images.unsplash.com/photo-1648091855444-76f97897dcd4?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxkb29yZGFzaHxlbnwwfHx8fDE3NjkxNDc0MjB8MA&amp;ixlib=rb-4.1.0&amp;q=80&amp;w=1080" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>Fellow Data Tinkerers!</p><p>Today we will look at how DoorDash uses anomaly detection to save millions of dollars by flagging fraud trends early. </p><p>But before that, I wanted to share with you what you could unlock if you share Data Tinkerer with just <strong>1 more person</strong>.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!2Roe!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5e760a1-29af-4234-b7eb-7b6070bb0d44_800x402.gif" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!2Roe!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5e760a1-29af-4234-b7eb-7b6070bb0d44_800x402.gif 424w, https://substackcdn.com/image/fetch/$s_!2Roe!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5e760a1-29af-4234-b7eb-7b6070bb0d44_800x402.gif 848w, https://substackcdn.com/image/fetch/$s_!2Roe!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5e760a1-29af-4234-b7eb-7b6070bb0d44_800x402.gif 1272w, https://substackcdn.com/image/fetch/$s_!2Roe!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5e760a1-29af-4234-b7eb-7b6070bb0d44_800x402.gif 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!2Roe!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5e760a1-29af-4234-b7eb-7b6070bb0d44_800x402.gif" width="800" height="402" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c5e760a1-29af-4234-b7eb-7b6070bb0d44_800x402.gif&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:402,&quot;width&quot;:800,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:3369150,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/gif&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.datatinkerer.io/i/175671629?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5e760a1-29af-4234-b7eb-7b6070bb0d44_800x402.gif&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!2Roe!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5e760a1-29af-4234-b7eb-7b6070bb0d44_800x402.gif 424w, https://substackcdn.com/image/fetch/$s_!2Roe!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5e760a1-29af-4234-b7eb-7b6070bb0d44_800x402.gif 848w, https://substackcdn.com/image/fetch/$s_!2Roe!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5e760a1-29af-4234-b7eb-7b6070bb0d44_800x402.gif 1272w, https://substackcdn.com/image/fetch/$s_!2Roe!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5e760a1-29af-4234-b7eb-7b6070bb0d44_800x402.gif 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>There are 100+ resources to learn all things data (science, engineering, analysis). It includes videos, courses, projects and can be filtered by tech stack (Python, SQL, Spark and etc), skill level (Beginner, Intermediate and so on)  provider name or free/paid. So if you know other people who like staying up to date on all things data, please share Data Tinkerer with them!</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.datatinkerer.io/leaderboard?&amp;referrer_token=4tlsmi&amp;utm_source=post&quot;,&quot;text&quot;:&quot;Refer a friend&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.datatinkerer.io/leaderboard?&amp;referrer_token=4tlsmi&amp;utm_source=post"><span>Refer a friend</span></a></p><p>Now, with that out of the way, let&#8217;s get to DoorDash&#8217;s fraud detection!</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://images.unsplash.com/photo-1648091855444-76f97897dcd4?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxkb29yZGFzaHxlbnwwfHx8fDE3NjkxNDc0MjB8MA&amp;ixlib=rb-4.1.0&amp;q=80&amp;w=1080" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://images.unsplash.com/photo-1648091855444-76f97897dcd4?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxkb29yZGFzaHxlbnwwfHx8fDE3NjkxNDc0MjB8MA&amp;ixlib=rb-4.1.0&amp;q=80&amp;w=1080 424w, https://images.unsplash.com/photo-1648091855444-76f97897dcd4?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxkb29yZGFzaHxlbnwwfHx8fDE3NjkxNDc0MjB8MA&amp;ixlib=rb-4.1.0&amp;q=80&amp;w=1080 848w, https://images.unsplash.com/photo-1648091855444-76f97897dcd4?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxkb29yZGFzaHxlbnwwfHx8fDE3NjkxNDc0MjB8MA&amp;ixlib=rb-4.1.0&amp;q=80&amp;w=1080 1272w, https://images.unsplash.com/photo-1648091855444-76f97897dcd4?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxkb29yZGFzaHxlbnwwfHx8fDE3NjkxNDc0MjB8MA&amp;ixlib=rb-4.1.0&amp;q=80&amp;w=1080 1456w" sizes="100vw"><img src="https://images.unsplash.com/photo-1648091855444-76f97897dcd4?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxkb29yZGFzaHxlbnwwfHx8fDE3NjkxNDc0MjB8MA&amp;ixlib=rb-4.1.0&amp;q=80&amp;w=1080" width="5320" height="3377" data-attrs="{&quot;src&quot;:&quot;https://images.unsplash.com/photo-1648091855444-76f97897dcd4?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxkb29yZGFzaHxlbnwwfHx8fDE3NjkxNDc0MjB8MA&amp;ixlib=rb-4.1.0&amp;q=80&amp;w=1080&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:3377,&quot;width&quot;:5320,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;a close up of a cell phone on a table&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/jpg&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="a close up of a cell phone on a table" title="a close up of a cell phone on a table" srcset="https://images.unsplash.com/photo-1648091855444-76f97897dcd4?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxkb29yZGFzaHxlbnwwfHx8fDE3NjkxNDc0MjB8MA&amp;ixlib=rb-4.1.0&amp;q=80&amp;w=1080 424w, https://images.unsplash.com/photo-1648091855444-76f97897dcd4?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxkb29yZGFzaHxlbnwwfHx8fDE3NjkxNDc0MjB8MA&amp;ixlib=rb-4.1.0&amp;q=80&amp;w=1080 848w, https://images.unsplash.com/photo-1648091855444-76f97897dcd4?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxkb29yZGFzaHxlbnwwfHx8fDE3NjkxNDc0MjB8MA&amp;ixlib=rb-4.1.0&amp;q=80&amp;w=1080 1272w, https://images.unsplash.com/photo-1648091855444-76f97897dcd4?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxkb29yZGFzaHxlbnwwfHx8fDE3NjkxNDc0MjB8MA&amp;ixlib=rb-4.1.0&amp;q=80&amp;w=1080 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Photo by <a href="https://unsplash.com/@querysprout">Marques Thomas</a> on <a href="https://unsplash.com">Unsplash</a></figcaption></figure></div><h3>TL;DR</h3><div><hr></div><h4><strong>Situation</strong></h4><p>Fraud trends at DoorDash often blended into normal delivery noise and went unnoticed for weeks, causing avoidable losses. Existing detection was reactive and too slow.</p><h4><strong>Task</strong></h4><p>Detect emerging fraud trends early across millions of users and segments, before they materially impact top-line metrics.</p><h4><strong>Action</strong></h4><p>Build a daily anomaly detection platform that segments key fraud metrics across millions of overlapping dimensions, applies time-series z-score detection, clusters related anomalies and routes them into an ops investigation workflow.</p><h4><strong>Result</strong></h4><p>Cut average fraud detection time from 100+ days to under 3 days, surfaced 60%+ of new fraud trends early, and saved tens of millions annually.</p><h4><strong>Use Cases</strong></h4><p>Anomaly detection, fraud detection, payment monitoring, policy change impact monitoring</p><h4><strong>Tech Stack/Framework</strong></h4><p>Apache Airflow, DuckDB, Apache Spark, Python</p><div><hr></div><h3>Explained further</h3><div><hr></div><h4>Fraud trend detection before it becomes a headline</h4><p>Fraud doesn&#8217;t always kick the door down. Sometimes it slips in through the side window and blends into the noise of millions of legitimate deliveries.</p><p>A small spike in refund claims. A pattern in high-risk charges linked to a specific bank. A subtle shift in behavior that looks like randomness until it isn&#8217;t. Left alone, those early signals can snowball into a large trend with real top-line impact.</p><p>DoorDash&#8217;s fraud team wanted to flip the script. Instead of reacting after a new fraud trend has had weeks to grow unchecked, how could they spot it as early as possible, before significant damage is done?</p><p>This post shares how the DoorDash team built an anomaly detection platform that scans for emerging patterns across millions of user segments and surfaces the ones that matter before they spiral into major losses.</p><div><hr></div><h4>Terminology</h4><p>&#8216;Anomaly detection&#8217; is a broad term. Even within fraud, people can mean very different things by it. For this system, DoorDash defined two categories up front:</p><p><strong>Anomalous trend detection</strong></p><p>Looking for anomalous behavior in a <em>collection</em> of users that may represent a new fraud or false-positive trend.</p><p>Here, no single datapoint needs to be weird. The anomaly is the time-series pattern that emerges from many points together, like a growing fraud segment over time.</p><p><strong>Anomalous outlier detection</strong></p><p>Looking for <em>individual</em> outliers, like a specific user or transaction that is rare or deviates sharply from normal behavior.</p><p>In this case, the datapoint is the anomaly. It might be part of a broader trend, or it might be a one-off.</p><p>This post focuses how DoorDash built a system to detect <strong>anomalous trends</strong>.</p><p>Here are some terms used within the article and their definitions and examples to make them easier to understand.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!a0Fu!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F98747126-080a-4ec0-954e-03e2ce6fdeb8_2173x757.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!a0Fu!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F98747126-080a-4ec0-954e-03e2ce6fdeb8_2173x757.png 424w, https://substackcdn.com/image/fetch/$s_!a0Fu!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F98747126-080a-4ec0-954e-03e2ce6fdeb8_2173x757.png 848w, https://substackcdn.com/image/fetch/$s_!a0Fu!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F98747126-080a-4ec0-954e-03e2ce6fdeb8_2173x757.png 1272w, https://substackcdn.com/image/fetch/$s_!a0Fu!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F98747126-080a-4ec0-954e-03e2ce6fdeb8_2173x757.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!a0Fu!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F98747126-080a-4ec0-954e-03e2ce6fdeb8_2173x757.png" width="1456" height="507" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/98747126-080a-4ec0-954e-03e2ce6fdeb8_2173x757.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:507,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:132407,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.datatinkerer.io/i/185495640?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F98747126-080a-4ec0-954e-03e2ce6fdeb8_2173x757.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!a0Fu!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F98747126-080a-4ec0-954e-03e2ce6fdeb8_2173x757.png 424w, https://substackcdn.com/image/fetch/$s_!a0Fu!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F98747126-080a-4ec0-954e-03e2ce6fdeb8_2173x757.png 848w, https://substackcdn.com/image/fetch/$s_!a0Fu!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F98747126-080a-4ec0-954e-03e2ce6fdeb8_2173x757.png 1272w, https://substackcdn.com/image/fetch/$s_!a0Fu!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F98747126-080a-4ec0-954e-03e2ce6fdeb8_2173x757.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Terminology table (Source: Doordash)</figcaption></figure></div><div><hr></div><h4><strong>Designing the system around real fraud failures</strong></h4><p>The DoorDash team started the way you&#8217;d hope a fraud platform starts: by talking to the people who have to use it.</p><p>They met with frontline fraud teams responsible for tracking and fighting new fraud trends and asked for concrete historical examples of trends that simmered longer than ideal before being discovered and mitigated. These became the positive test cases.</p><p>Next, the teams were asked for:</p><ul><li><p>Their most useful early-warning indicator <strong>metrics</strong></p></li><li><p>The <strong>dimensions</strong> they commonly use to slice data when investigating a new fraud trend</p></li></ul><p>That produced a working set of:</p><ul><li><p>Positive examples (historical missed or late-found fraud trends)</p></li><li><p>A set of metrics that act as early-warning signals</p></li><li><p>A set of dimensions that represent how investigators naturally segment the world</p></li></ul><p>Then the DoorDash team built the system and backtested it. Tuning came next, but the tuning goal was very specific:</p><p>1- Maintain 100% recall on the test trends<br>2- Minimise the number of non-fraudulent anomalies per day</p><p>One observation stood out from this phase. The system was fairly insensitive to exact tuning values. What mattered more was upstream: choosing thoughtful metrics and dimensions that can actually capture fraud trends in the first place.</p><p>In other words: the math is important but the slices you choose decide what you can even see.</p><div><hr></div><h4>Architecture overview</h4><p>The anomaly detection platform runs as a daily job coordinated by Airflow. It looks for fraud trends growing on a day-to-week timescale.</p><p>DoorDash currently runs anomaly detection jobs for both <strong>consumer fraud</strong> and <strong>Dasher fraud</strong>, with plans to expand to more applications over time.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Osej!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0b856a6b-f238-472b-a3a3-8ab309843cf8_1024x268.webp" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Osej!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0b856a6b-f238-472b-a3a3-8ab309843cf8_1024x268.webp 424w, https://substackcdn.com/image/fetch/$s_!Osej!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0b856a6b-f238-472b-a3a3-8ab309843cf8_1024x268.webp 848w, https://substackcdn.com/image/fetch/$s_!Osej!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0b856a6b-f238-472b-a3a3-8ab309843cf8_1024x268.webp 1272w, https://substackcdn.com/image/fetch/$s_!Osej!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0b856a6b-f238-472b-a3a3-8ab309843cf8_1024x268.webp 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Osej!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0b856a6b-f238-472b-a3a3-8ab309843cf8_1024x268.webp" width="1024" height="268" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/0b856a6b-f238-472b-a3a3-8ab309843cf8_1024x268.webp&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:268,&quot;width&quot;:1024,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:43376,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/webp&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.datatinkerer.io/i/185495640?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0b856a6b-f238-472b-a3a3-8ab309843cf8_1024x268.webp&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Osej!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0b856a6b-f238-472b-a3a3-8ab309843cf8_1024x268.webp 424w, https://substackcdn.com/image/fetch/$s_!Osej!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0b856a6b-f238-472b-a3a3-8ab309843cf8_1024x268.webp 848w, https://substackcdn.com/image/fetch/$s_!Osej!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0b856a6b-f238-472b-a3a3-8ab309843cf8_1024x268.webp 1272w, https://substackcdn.com/image/fetch/$s_!Osej!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0b856a6b-f238-472b-a3a3-8ab309843cf8_1024x268.webp 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Doordash anomaly detection platform (Source: Doordash)</figcaption></figure></div><p>The platform has five steps:</p><ol><li><p>Preparing daily fraud snapshots</p></li><li><p>Metric aggregation on multi-dimensional segments</p></li><li><p>Time-series anomaly detection</p></li><li><p>Hierarchical clustering on anomalous segments</p></li><li><p>Turning clusters into investigations and containment</p></li></ol><div><hr></div><h4>Step 1: Preparing daily fraud snapshots</h4><p>DoorDash chose a daily batch job for the initial implementation because the fraud trends they historically missed developed over <strong>a few days to a few weeks</strong>.</p><p>An Airflow DAG prepares a dataset for each anomaly detection job containing the day&#8217;s data snapshot in a wide-table format.</p><p>If the trends you historically missed unfold across days and weeks, you do not need sub-second streaming to get meaningful wins. You need consistency, coverage and a reliable cadence.</p><div><hr></div><h4>Step 2: Metric aggregation on multi-dimensional segments</h4><p>This is the scale step. Once the daily snapshot is ready, DoorDash loads the single date&#8217;s data into a Python environment via Spark, then computes metric aggregates across segments.</p><p>For each metric, they track both:</p><ul><li><p><strong>Absolute value</strong> of the metric</p><ul><li><p>Example: dollar value of credit and refund claims</p></li></ul></li><li><p><strong>Relative (normalized) value</strong> of the metric</p><ul><li><p>Example: credit and refund claims divided by dollar value of orders</p></li></ul></li></ul><p>Why both? because absolute values catch &#8216;this is costing real money&#8217; and relative values catch &#8216;this is spiking compared to what is normal for this slice&#8217;.</p><p>Then comes segmentation. Segments are formed from single, double and triple product combinations of all dimensions. That quickly becomes huge and can run into 100s of millions of segments at Doordash scale and compute becomes important</p><p><strong>DuckDB for aggregation</strong></p><p>DoorDash computes metric aggregates using DuckDB, an in-memory Python database optimised for fast OLAP-style operations.</p><p>They chose DuckDB because it was:</p><ul><li><p>Much faster (less than 10 minutes)</p></li><li><p>More memory efficient than Pandas</p></li></ul><p>The system also excludes dimensional products with cardinality greater than 10^7 to reduce the total number of segments to a manageable size.</p><p>Finally, storage format.</p><p>The day&#8217;s metrics aggregated across hundreds of millions of segments are stored in the data warehouse in <strong>sparse tall table format</strong>.</p><p>In plain English: if a segment has a metric value of zero, DoorDash drops it. That cuts storage and keeps both DuckDB and the downstream warehouse from filling up with rows that say &#8216;nothing happened here.&#8217;</p><div><hr></div><h4>Step 3: Time-series anomaly detection</h4><p>After Step 2, DoorDash has daily metric aggregates by segment. They keep the previous 28 days of data in the data warehouse, so the platform now has several hundred million metric time series, each of length 28.</p><p>DoorDash chose a simple <strong>moving-window z-score</strong> approach, because it performed well in testing and detected all historical fraudulent trends they used as positive examples.</p><p><strong>Baseline and test setup</strong></p><ul><li><p>First <strong>21 days</strong> form the baseline</p></li><li><p>The <strong>28th day</strong> is the test day</p></li><li><p>There is a <strong>7-day gap</strong> between the baseline and the test day</p></li></ul><p>That gap exists for a very specific reason. The team noticed many historical fraud trends had a noisy phase when they first started scaling. By leaving a gap, the baseline variance better reflects &#8216;normal before the trend&#8217; which reduces missed trends.</p><p><strong>What counts as an anomaly</strong></p><p>A segment&#8217;s time series is flagged as anomalous if it meets both:</p><ol><li><p><strong>Statistical significance: </strong>The 28th-day <em>relative</em> metric is greater than X standard deviations above the mean of the 21-day baseline. DoorDash found <strong>6 standard deviations</strong> worked well empirically.</p></li><li><p><strong>Business significance: </strong>The 28th-day <em>absolute</em> metric exceeds the 21-day baseline by a dollar value and/or count that is meaningful for that metric. Thresholds vary by metric and were chosen with operations partners.</p></li></ol><p>That two-part rule matters. Statistical significance alone finds weirdness. Business significance filters it down to weirdness that&#8217;s worth a human&#8217;s time.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!-sSP!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F30486ff1-68ef-4ae3-bc16-a96093efa794_1024x595.webp" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!-sSP!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F30486ff1-68ef-4ae3-bc16-a96093efa794_1024x595.webp 424w, https://substackcdn.com/image/fetch/$s_!-sSP!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F30486ff1-68ef-4ae3-bc16-a96093efa794_1024x595.webp 848w, https://substackcdn.com/image/fetch/$s_!-sSP!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F30486ff1-68ef-4ae3-bc16-a96093efa794_1024x595.webp 1272w, https://substackcdn.com/image/fetch/$s_!-sSP!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F30486ff1-68ef-4ae3-bc16-a96093efa794_1024x595.webp 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!-sSP!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F30486ff1-68ef-4ae3-bc16-a96093efa794_1024x595.webp" width="1024" height="595" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/30486ff1-68ef-4ae3-bc16-a96093efa794_1024x595.webp&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:595,&quot;width&quot;:1024,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:60360,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/webp&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.datatinkerer.io/i/185495640?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F30486ff1-68ef-4ae3-bc16-a96093efa794_1024x595.webp&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!-sSP!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F30486ff1-68ef-4ae3-bc16-a96093efa794_1024x595.webp 424w, https://substackcdn.com/image/fetch/$s_!-sSP!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F30486ff1-68ef-4ae3-bc16-a96093efa794_1024x595.webp 848w, https://substackcdn.com/image/fetch/$s_!-sSP!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F30486ff1-68ef-4ae3-bc16-a96093efa794_1024x595.webp 1272w, https://substackcdn.com/image/fetch/$s_!-sSP!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F30486ff1-68ef-4ae3-bc16-a96093efa794_1024x595.webp 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Anomaly calculation example (Source: Doordash)</figcaption></figure></div><div><hr></div><h4><strong>Step 4: Hierarchical clustering on anomalous segments</strong></h4><p>Real fraud trends rarely show up as a single clean segment anomaly. A single trend often triggers anomalous increases across many partially overlapping segments. Example:</p><p>A spike in credit and refund claims at &#8216;Retailer One&#8217; could cause anomalies in segments like:</p><ul><li><p><code>{business_name='Retailer One'}</code></p></li><li><p><code>{country='US', business_name='Retailer One'}</code></p></li><li><p><code>{business_vertical='retail', business_name='Retailer One'}</code></p></li></ul><p>So Step 4 exists to shrink &#8216;thousands of anomalies&#8217; into &#8216;a few dozen things to look at&#8217;.</p><p><strong>Segment graph structure</strong></p><p>Dimensional segments have a natural structure that can be represented as a three-layer graph:</p><ul><li><p><strong>Top layer:</strong> singlets</p><ul><li><p><code>{business_name='Retailer One'}</code></p></li></ul></li><li><p><strong>Middle layer:</strong> pairs</p><ul><li><p><code>{business_name='Retailer One', country='US'}</code></p></li></ul></li><li><p><strong>Bottom layer:</strong> triplets</p><ul><li><p><code>{business_name='Retailer One', country='US', checkout_platform='iOS'}</code></p></li></ul></li></ul><p>DoorDash further partitions the graph by <code>METRIC_NAME</code> so clustering happens within a metric type.</p><p><strong>Clustering rules</strong></p><p>To connect anomalies within the same metric type:</p><ol><li><p><strong>Connect parent anomalies with child anomalies</strong></p><ul><li><p><code>{business_name='Retailer One'}</code> is parent of <code>{country='US', business_name='Retailer One'}</code></p></li><li><p><code>{country='US', business_name='Retailer One'}</code> is parent of <code>{business_name='Retailer One', country='US', checkout_platform='iOS'}</code></p></li></ul></li><li><p><strong>Connect sibling anomaly triplets</strong> if they share <strong>2/3</strong> of their keys and values</p><ul><li><p><code>{business_name='Retailer One', country='US', checkout_platform='iOS'}</code><br>connects with<br><code>{business_name='Retailer One', country='US', business_vertical='retail'}</code></p></li></ul></li></ol><p>Then DoorDash runs a graph partition algorithm to find connected anomaly clusters.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!FXWL!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff2331571-5361-4687-9c9e-661654117e83_912x317.webp" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!FXWL!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff2331571-5361-4687-9c9e-661654117e83_912x317.webp 424w, https://substackcdn.com/image/fetch/$s_!FXWL!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff2331571-5361-4687-9c9e-661654117e83_912x317.webp 848w, https://substackcdn.com/image/fetch/$s_!FXWL!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff2331571-5361-4687-9c9e-661654117e83_912x317.webp 1272w, https://substackcdn.com/image/fetch/$s_!FXWL!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff2331571-5361-4687-9c9e-661654117e83_912x317.webp 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!FXWL!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff2331571-5361-4687-9c9e-661654117e83_912x317.webp" width="912" height="317" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f2331571-5361-4687-9c9e-661654117e83_912x317.webp&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:317,&quot;width&quot;:912,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:23418,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/webp&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.datatinkerer.io/i/185495640?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff2331571-5361-4687-9c9e-661654117e83_912x317.webp&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!FXWL!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff2331571-5361-4687-9c9e-661654117e83_912x317.webp 424w, https://substackcdn.com/image/fetch/$s_!FXWL!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff2331571-5361-4687-9c9e-661654117e83_912x317.webp 848w, https://substackcdn.com/image/fetch/$s_!FXWL!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff2331571-5361-4687-9c9e-661654117e83_912x317.webp 1272w, https://substackcdn.com/image/fetch/$s_!FXWL!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff2331571-5361-4687-9c9e-661654117e83_912x317.webp 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Red circles indicate anomalous segments, while grey circles indicate non-anomalous segments. (Source: Doordash)</figcaption></figure></div><p><strong>Picking a representative segment</strong></p><p>Ops teams review a cluster starting from a single representative segment chosen using a fitness function:</p><pre><code><code>fitness = abs_anom_amt * rel_amt / level^1.2
</code></code></pre><p>Where:</p><ul><li><p><code>abs_anom_amt</code> = 28th-day metric minus the 21-day baseline</p></li><li><p><code>rel_amt</code> = relative (normalized) 28th-day metric within the segment</p></li><li><p><code>level</code> = 0 for singlets, 1 for pairs, 2 for triplets</p></li></ul><p>The intuition:</p><ul><li><p><code>abs_anom_amt</code> behaves a bit like &#8216;how much impact&#8217; (think recall)</p></li><li><p><code>rel_amt</code> behaves a bit like &#8216;how concentrated&#8217; (think precision)</p></li><li><p>dividing by a weak function of <code>level</code> biases toward simpler segments</p></li></ul><p>So the representative is usually a segment that is impactful, unusually high relative to its baseline and not needlessly specific.</p><p><strong>What volume looks like in practice</strong></p><p>In real operation, DoorDash typically sees anomalies in several thousand segments per day. Clustering reduces that to <strong>20 to 60 anomalous clusters per day</strong> across consumer and Dasher fraud areas, which is a volume the operations team can realistically investigate.</p><div><hr></div><h4><strong>Step 5: Turning clusters into investigations and containment</strong></h4><p>Detection is not the finish line, it is just the trigger.</p><p>The representative anomalous segments, along with all other segments in the cluster and example events (deliveries and Dasher assignments), are accessible in a workflow tool for ops investigation.</p><p>Ops agents review example deliveries or assignments within the representative segment, looking for trends or patterns that may represent a new fraud trend.</p><p>Sometimes the pattern is non-fraudulent, like a new promotion causing a spike in refunds. Other times it is fraudulent.</p><p>When a trend is deemed fraudulent:</p><ul><li><p>it is root-caused in partnership with engineering and product teams so the root cause can be addressed</p></li><li><p>a separate containment team runs queries to identify and stop fraudsters matching the trend pattern until product fixes land</p></li></ul><p>So the system is not just detection. It&#8217;s detection wired into investigation, containment and longer-term remediation.</p><div><hr></div><h4>Results</h4><p>DoorDash now uses the anomaly detection platform as its primary early-warning source for new fraudulent trends.</p><p>Key results reported by the team:</p><ul><li><p>More than <strong>60%</strong> of all new fraud trends today are found through anomaly detection, and that share is growing as coverage expands.</p></li><li><p>Average time-to-detect new fraud trends dropped from <strong>more than 100 days</strong> to <strong>less than three days</strong> over the past year.</p></li><li><p>The platform saves <strong>tens of millions of dollars per year</strong> by flagging small but growing fraud trends before they get out of control.</p></li></ul><div><hr></div><h3>The full scoop</h3><p>To learn more about this, check <a href="https://careersatdoordash.com/blog/doordash-anomaly-detection-platform-to-catch-fraud-trends">DoorDash's Engineering Blog</a> post on this topic</p><div><hr></div><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://www.datatinkerer.io/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">If you liked this post and don&#8217;t want to miss the next one, subscribe to Data Tinkerer!</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div><p>If you are already subscribed and enjoyed the article, please give it a like and/or share it others, really appreciate it &#128591;</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.datatinkerer.io/p/how-doordash-saves-tens-of-millions-a-year-by-detecting-fraud?utm_source=substack&utm_medium=email&utm_content=share&action=share&quot;,&quot;text&quot;:&quot;Share&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.datatinkerer.io/p/how-doordash-saves-tens-of-millions-a-year-by-detecting-fraud?utm_source=substack&utm_medium=email&utm_content=share&action=share"><span>Share</span></a></p><div><hr></div><h3>Keep learning</h3><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;61ab4b4f-02dc-4ef8-a0eb-c6193f2bb650&quot;,&quot;caption&quot;:&quot;How do you handle search queries like &#8220;low-carb spicy chicken wrap with gluten-free tortilla&#8221; at scale?<br /><br />DoorDash rebuilt its search pipeline to better understand both user intent and product metadata. The result? A 30% increase in relevant results and measurable gains across key engagement metrics.<br /><br />This post breaks down the hybrid approach they used; combining LLMs, structured taxonomies and real-time retrieval without sacrificing speed or accuracy.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;lg&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;How DoorDash Used LLMs to Trigger 30% More Relevant Results&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:291590442,&quot;name&quot;:&quot;Data Tinkerer&quot;,&quot;bio&quot;:&quot;Ex-head of analytics sharing deep dives and learnings about AI and all things data (science, engineering, analysis)&quot;,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/83d3bbe0-5fb8-4f8d-9b74-036abbd6fec9_500x500.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2025-06-26T09:37:56.405Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!8K0n!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37f9be1c-e138-41d4-9596-b4cd02897f95_432x860.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.datatinkerer.io/p/how-doordash-used-llms-to-trigger&quot;,&quot;section_name&quot;:&quot;Data Science&quot;,&quot;video_upload_id&quot;:null,&quot;id&quot;:166857110,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:7,&quot;comment_count&quot;:0,&quot;publication_id&quot;:3422740,&quot;publication_name&quot;:&quot;Data Tinkerer&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!JEdj!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6bea5ccd-f356-4154-a1db-16268300510e_500x500.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;b7387bad-e21d-43d0-9227-adbed3439e2b&quot;,&quot;caption&quot;:&quot;Behind every 'smart' answer is a chain of fallible steps: retrieval, ranking, prompting and others.<br /><br />Dropbox Dash turned that complexity into a testable, measurable system.<br /><br />Here&#8217;s how they made their evaluation as rigorous as code.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;lg&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;How Dropbox Made AI Evaluation Work at Scale&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:291590442,&quot;name&quot;:&quot;Data Tinkerer&quot;,&quot;bio&quot;:&quot;Ex-head of analytics sharing deep dives and learnings about AI and all things data (science, engineering, analysis)&quot;,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/83d3bbe0-5fb8-4f8d-9b74-036abbd6fec9_500x500.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2025-10-09T07:14:50.996Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!oMNY!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3a58eb26-c9ac-492d-96f2-343a7f503ddc_800x450.gif&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.datatinkerer.io/p/how-dropbox-made-ai-evaluation-work-at-scale&quot;,&quot;section_name&quot;:&quot;Data Science&quot;,&quot;video_upload_id&quot;:null,&quot;id&quot;:175671629,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:8,&quot;comment_count&quot;:0,&quot;publication_id&quot;:3422740,&quot;publication_name&quot;:&quot;Data Tinkerer&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!JEdj!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6bea5ccd-f356-4154-a1db-16268300510e_500x500.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div>]]></content:encoded></item><item><title><![CDATA[From Data Analyst to Senior DS Manager at Skyscanner]]></title><description><![CDATA[How a mechanical engineer found data through robotics. Data led to modelling. Modelling led to managing teams at Skyscanner.]]></description><link>https://www.datatinkerer.io/p/from-data-analyst-to-senior-ds-manager-at-skyscanner</link><guid isPermaLink="false">https://www.datatinkerer.io/p/from-data-analyst-to-senior-ds-manager-at-skyscanner</guid><dc:creator><![CDATA[Data Tinkerer]]></dc:creator><pubDate>Thu, 13 Nov 2025 03:54:26 GMT</pubDate><enclosure url="https://substack-post-media.s3.amazonaws.com/public/images/06735d58-e8f2-4106-88ae-efe0658c217c_764x661.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>Fellow Data Tinkerers,</p><p>Following on from previous posts talking to people in the field, today I will be talking with <span class="mention-wrap" data-attrs="{&quot;name&quot;:&quot;Jose Parre&#241;o Garcia&quot;,&quot;id&quot;:255728031,&quot;type&quot;:&quot;user&quot;,&quot;url&quot;:null,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!h_mv!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0c4dad41-478b-4960-a5e0-98ed1e54657e_1168x1046.jpeg&quot;,&quot;uuid&quot;:&quot;fe93d583-b52c-4160-878b-e32c4f822419&quot;}" data-component-name="MentionToDOM"></span> who is a Senior Data Science Manager at Skyscanner and writer of the <em>Senior Data Science Lead</em> newsletter.</p><div class="embedded-publication-wrap" data-attrs="{&quot;id&quot;:2833541,&quot;name&quot;:&quot;Senior Data Science Lead&quot;,&quot;logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!t4IN!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbbe3704e-4589-40b2-bbb8-007336c4f09a_990x990.png&quot;,&quot;base_url&quot;:&quot;https://joseparreogarcia.substack.com&quot;,&quot;hero_text&quot;:&quot;Helping managers build world-class teams, data professionals master storytelling and guiding those looking to break into Data Science. I have built teams from scratch and lead 50+ data scientists. Now, I share my experience with you.&quot;,&quot;author_name&quot;:&quot;Jose Parre&#241;o Garcia&quot;,&quot;show_subscribe&quot;:true,&quot;logo_bg_color&quot;:&quot;#ffffff&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="EmbeddedPublicationToDOMWithSubscribe"><div class="embedded-publication show-subscribe"><a class="embedded-publication-link-part" native="true" href="https://joseparreogarcia.substack.com?utm_source=substack&amp;utm_campaign=publication_embed&amp;utm_medium=web"><img class="embedded-publication-logo" src="https://substackcdn.com/image/fetch/$s_!t4IN!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbbe3704e-4589-40b2-bbb8-007336c4f09a_990x990.png" width="56" height="56" style="background-color: rgb(255, 255, 255);"><span class="embedded-publication-name">Senior Data Science Lead</span><div class="embedded-publication-hero-text">Helping managers build world-class teams, data professionals master storytelling and guiding those looking to break into Data Science. I have built teams from scratch and lead 50+ data scientists. Now, I share my experience with you.</div><div class="embedded-publication-author-name">By Jose Parre&#241;o Garcia</div></a><form class="embedded-publication-subscribe" method="GET" action="https://joseparreogarcia.substack.com/subscribe?"><input type="hidden" name="source" value="publication-embed"><input type="hidden" name="autoSubmit" value="true"><input type="email" class="email-input" name="email" placeholder="Type your email..."><input type="submit" class="button primary" value="Subscribe"></form></div></div><p>We talked about his rise from data analyst to Senior DS Manager at Skyscanner, what &#8220;production-ready&#8221; really means and why the real intelligence in data science lives before and after the model.</p><p>So without further ado, let&#8217;s get into it!</p>
      <p>
          <a href="https://www.datatinkerer.io/p/from-data-analyst-to-senior-ds-manager-at-skyscanner">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[How Dropbox Made AI Evaluation Work at Scale]]></title><description><![CDATA[Every prompt, retriever and model now has to earn its merge.]]></description><link>https://www.datatinkerer.io/p/how-dropbox-made-ai-evaluation-work-at-scale</link><guid isPermaLink="false">https://www.datatinkerer.io/p/how-dropbox-made-ai-evaluation-work-at-scale</guid><dc:creator><![CDATA[Data Tinkerer]]></dc:creator><pubDate>Thu, 09 Oct 2025 07:14:50 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!oMNY!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3a58eb26-c9ac-492d-96f2-343a7f503ddc_800x450.gif" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>Fellow Data Tinkerers!</p><p>Today we will look at how Dropbox does eval for its conversational AI</p><p>But before that, I wanted to share with you what you could unlock if you share Data Tinkerer with just <strong>1 more person</strong>.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!2Roe!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5e760a1-29af-4234-b7eb-7b6070bb0d44_800x402.gif" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!2Roe!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5e760a1-29af-4234-b7eb-7b6070bb0d44_800x402.gif 424w, https://substackcdn.com/image/fetch/$s_!2Roe!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5e760a1-29af-4234-b7eb-7b6070bb0d44_800x402.gif 848w, https://substackcdn.com/image/fetch/$s_!2Roe!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5e760a1-29af-4234-b7eb-7b6070bb0d44_800x402.gif 1272w, https://substackcdn.com/image/fetch/$s_!2Roe!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5e760a1-29af-4234-b7eb-7b6070bb0d44_800x402.gif 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!2Roe!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5e760a1-29af-4234-b7eb-7b6070bb0d44_800x402.gif" width="800" height="402" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c5e760a1-29af-4234-b7eb-7b6070bb0d44_800x402.gif&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:402,&quot;width&quot;:800,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:3369150,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/gif&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.datatinkerer.io/i/175671629?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5e760a1-29af-4234-b7eb-7b6070bb0d44_800x402.gif&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!2Roe!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5e760a1-29af-4234-b7eb-7b6070bb0d44_800x402.gif 424w, https://substackcdn.com/image/fetch/$s_!2Roe!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5e760a1-29af-4234-b7eb-7b6070bb0d44_800x402.gif 848w, https://substackcdn.com/image/fetch/$s_!2Roe!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5e760a1-29af-4234-b7eb-7b6070bb0d44_800x402.gif 1272w, https://substackcdn.com/image/fetch/$s_!2Roe!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc5e760a1-29af-4234-b7eb-7b6070bb0d44_800x402.gif 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>There are 100+ resources to learn all things data (science, engineering, analysis). It includes videos, courses, projects and can be filtered by tech stack (Python, SQL, Spark and etc), skill level (Beginner, Intermediate and so on)  provider name or free/paid. So if you know other people who like staying up to date on all things data, please share Data Tinkerer with them!</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.datatinkerer.io/leaderboard?&amp;referrer_token=4tlsmi&amp;utm_source=post&quot;,&quot;text&quot;:&quot;Refer a friend&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.datatinkerer.io/leaderboard?&amp;referrer_token=4tlsmi&amp;utm_source=post"><span>Refer a friend</span></a></p><p>Now, with that out of the way, let&#8217;s get to Dropbox&#8217;s AI evaluation!</p><h3>TL;DR</h3>
      <p>
          <a href="https://www.datatinkerer.io/p/how-dropbox-made-ai-evaluation-work-at-scale">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[How Netflix Used Deep Learning to Slash Video Quality Control Time by 90%]]></title><description><![CDATA[Neural networks, synthetic pixel generators and a smarter pipeline]]></description><link>https://www.datatinkerer.io/p/how-netflix-used-deep-learning-to-slash-quality-control-time-by-90-percent</link><guid isPermaLink="false">https://www.datatinkerer.io/p/how-netflix-used-deep-learning-to-slash-quality-control-time-by-90-percent</guid><dc:creator><![CDATA[Data Tinkerer]]></dc:creator><pubDate>Thu, 11 Sep 2025 10:02:43 GMT</pubDate><enclosure url="https://images.unsplash.com/photo-1611798821136-26bfb61b734f?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHw5fHxuZXRmbGl4fGVufDB8fHx8MTc1NzU4MDE2M3ww&amp;ixlib=rb-4.1.0&amp;q=80&amp;w=1080" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>Fellow Data Tinkerers!</p><p>Today we will look at how Netflix uses neural networks to automate the process of flagging pixel errors</p><p>But before that, I wanted to share with you what you could unlock if you share Data Tinkerer with just <strong>1 more person</strong>.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!T4QL!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcb501a13-29b3-40a4-9123-28146ed72cad_800x402.gif" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!T4QL!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcb501a13-29b3-40a4-9123-28146ed72cad_800x402.gif 424w, https://substackcdn.com/image/fetch/$s_!T4QL!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcb501a13-29b3-40a4-9123-28146ed72cad_800x402.gif 848w, https://substackcdn.com/image/fetch/$s_!T4QL!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcb501a13-29b3-40a4-9123-28146ed72cad_800x402.gif 1272w, https://substackcdn.com/image/fetch/$s_!T4QL!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcb501a13-29b3-40a4-9123-28146ed72cad_800x402.gif 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!T4QL!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcb501a13-29b3-40a4-9123-28146ed72cad_800x402.gif" width="800" height="402" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/cb501a13-29b3-40a4-9123-28146ed72cad_800x402.gif&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:402,&quot;width&quot;:800,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:3369150,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/gif&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.datatinkerer.io/i/173316812?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcb501a13-29b3-40a4-9123-28146ed72cad_800x402.gif&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!T4QL!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcb501a13-29b3-40a4-9123-28146ed72cad_800x402.gif 424w, https://substackcdn.com/image/fetch/$s_!T4QL!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcb501a13-29b3-40a4-9123-28146ed72cad_800x402.gif 848w, https://substackcdn.com/image/fetch/$s_!T4QL!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcb501a13-29b3-40a4-9123-28146ed72cad_800x402.gif 1272w, https://substackcdn.com/image/fetch/$s_!T4QL!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcb501a13-29b3-40a4-9123-28146ed72cad_800x402.gif 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>There are 100+ resources to learn all things data (science, engineering, analysis). It includes videos, courses, projects and can be filtered by tech stack (Python, SQL, Spark and etc), skill level (Beginner, Intermediate and so on)  provider name or free/paid. So if you know other people who like staying up to date on all things data, please share Data Tinkerer with them!</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.datatinkerer.io/leaderboard?&amp;referrer_token=4tlsmi&amp;utm_source=post&quot;,&quot;text&quot;:&quot;Refer a friend&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.datatinkerer.io/leaderboard?&amp;referrer_token=4tlsmi&amp;utm_source=post"><span>Refer a friend</span></a></p><p>Now, with that out of the way, let&#8217;s get to automation of video quality control by Netflix</p>
      <p>
          <a href="https://www.datatinkerer.io/p/how-netflix-used-deep-learning-to-slash-quality-control-time-by-90-percent">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[How Uber Built an AI Agent That Answers Financial Questions in Slack]]></title><description><![CDATA[Uber's Finch - the AI agent that finds the right data, runs the query and delivers secure, real-time answers where the team already works.]]></description><link>https://www.datatinkerer.io/p/how-uber-built-an-ai-agent-that-answers</link><guid isPermaLink="false">https://www.datatinkerer.io/p/how-uber-built-an-ai-agent-that-answers</guid><dc:creator><![CDATA[Data Tinkerer]]></dc:creator><pubDate>Thu, 14 Aug 2025 06:30:48 GMT</pubDate><enclosure url="https://images.unsplash.com/photo-1723083661335-c88891b92323?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwzfHx1YmVyfGVufDB8fHx8MTc1NTA3Mzg5MXww&amp;ixlib=rb-4.1.0&amp;q=80&amp;w=1080" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>Fellow Data Tinkerers!</p><p>Today we will look at how Uber uses agentic AI to answer financial questions.</p><p>But before that, I wanted to share with you what you could unlock if you share Data Tinkerer with just <strong>1 more person</strong>.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!ZcLD!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2b140c5d-c8ca-4e70-94f9-eaaf9abcecbb_800x402.gif" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!ZcLD!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2b140c5d-c8ca-4e70-94f9-eaaf9abcecbb_800x402.gif 424w, https://substackcdn.com/image/fetch/$s_!ZcLD!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2b140c5d-c8ca-4e70-94f9-eaaf9abcecbb_800x402.gif 848w, https://substackcdn.com/image/fetch/$s_!ZcLD!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2b140c5d-c8ca-4e70-94f9-eaaf9abcecbb_800x402.gif 1272w, https://substackcdn.com/image/fetch/$s_!ZcLD!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2b140c5d-c8ca-4e70-94f9-eaaf9abcecbb_800x402.gif 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!ZcLD!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2b140c5d-c8ca-4e70-94f9-eaaf9abcecbb_800x402.gif" width="800" height="402" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/2b140c5d-c8ca-4e70-94f9-eaaf9abcecbb_800x402.gif&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:402,&quot;width&quot;:800,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:3369150,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/gif&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.datatinkerer.io/i/170833604?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2b140c5d-c8ca-4e70-94f9-eaaf9abcecbb_800x402.gif&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!ZcLD!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2b140c5d-c8ca-4e70-94f9-eaaf9abcecbb_800x402.gif 424w, https://substackcdn.com/image/fetch/$s_!ZcLD!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2b140c5d-c8ca-4e70-94f9-eaaf9abcecbb_800x402.gif 848w, https://substackcdn.com/image/fetch/$s_!ZcLD!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2b140c5d-c8ca-4e70-94f9-eaaf9abcecbb_800x402.gif 1272w, https://substackcdn.com/image/fetch/$s_!ZcLD!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2b140c5d-c8ca-4e70-94f9-eaaf9abcecbb_800x402.gif 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>There are 100+ resources to learn all things data (science, engineering, analysis). It includes videos, courses, projects and can be filtered by tech stack (Python, SQL, Spark and etc), skill level (Beginner, Intermediate and so on)  provider name or free/paid. So if you know other people who like staying up to date on all things data, please share Data Tinkerer with them!</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.datatinkerer.io/leaderboard?&amp;referrer_token=4tlsmi&amp;utm_source=post&quot;,&quot;text&quot;:&quot;Refer a friend&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.datatinkerer.io/leaderboard?&amp;referrer_token=4tlsmi&amp;utm_source=post"><span>Refer a friend</span></a></p><p>Now, with that out of the way, let&#8217;s get to an actual use case of agentic AI by Uber</p>
      <p>
          <a href="https://www.datatinkerer.io/p/how-uber-built-an-ai-agent-that-answers">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[How Target Used GenAI to Lift Sales by 9% Across 100K+ Products]]></title><description><![CDATA[LLMs helped Target match the right add-ons, from throw pillows to phone cases -boosting engagement (+11%) and relevance (+12%) without blowing up compute.]]></description><link>https://www.datatinkerer.io/p/how-target-used-genai-to-lift-sales-by-9-percent-across-100-thousand-products</link><guid isPermaLink="false">https://www.datatinkerer.io/p/how-target-used-genai-to-lift-sales-by-9-percent-across-100-thousand-products</guid><dc:creator><![CDATA[Data Tinkerer]]></dc:creator><pubDate>Thu, 17 Jul 2025 08:09:48 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!xMXL!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a5b8468-f356-4c8d-ac56-9257ccb4bc7a_705x346.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>Fellow Data Tinkerers!</p><p>Today we will look at how Target used GenAI to recommend better products to users.</p><p>But before that, I wanted to share an example of what you could unlock if you share Data Tinkerer with just <strong>2 other people</strong>.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Fg5x!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdf1b89c4-c318-47ec-8691-3bc57709f808_2550x3300.jpeg" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Fg5x!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdf1b89c4-c318-47ec-8691-3bc57709f808_2550x3300.jpeg 424w, https://substackcdn.com/image/fetch/$s_!Fg5x!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdf1b89c4-c318-47ec-8691-3bc57709f808_2550x3300.jpeg 848w, https://substackcdn.com/image/fetch/$s_!Fg5x!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdf1b89c4-c318-47ec-8691-3bc57709f808_2550x3300.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!Fg5x!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdf1b89c4-c318-47ec-8691-3bc57709f808_2550x3300.jpeg 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Fg5x!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdf1b89c4-c318-47ec-8691-3bc57709f808_2550x3300.jpeg" width="1456" height="1884" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/df1b89c4-c318-47ec-8691-3bc57709f808_2550x3300.jpeg&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1884,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:2526039,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/jpeg&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.datatinkerer.io/i/168527137?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdf1b89c4-c318-47ec-8691-3bc57709f808_2550x3300.jpeg&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Fg5x!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdf1b89c4-c318-47ec-8691-3bc57709f808_2550x3300.jpeg 424w, https://substackcdn.com/image/fetch/$s_!Fg5x!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdf1b89c4-c318-47ec-8691-3bc57709f808_2550x3300.jpeg 848w, https://substackcdn.com/image/fetch/$s_!Fg5x!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdf1b89c4-c318-47ec-8691-3bc57709f808_2550x3300.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!Fg5x!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdf1b89c4-c318-47ec-8691-3bc57709f808_2550x3300.jpeg 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>There are 100+ more cheat sheets covering everything from Python, R, SQL, Spark to Power BI, Tableau, Git and many more. So if you know other people who like staying up to date on all things data, please share Data Tinkerer with them!</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.datatinkerer.io/leaderboard?&amp;referrer_token=4tlsmi&amp;utm_source=post&quot;,&quot;text&quot;:&quot;Refer a friend&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.datatinkerer.io/leaderboard?&amp;referrer_token=4tlsmi&amp;utm_source=post"><span>Refer a friend</span></a></p><p>Now, with that out of the way, let&#8217;s see how Target leverages GenAI for better shopping experience.</p><h3>TL;DR</h3>
      <p>
          <a href="https://www.datatinkerer.io/p/how-target-used-genai-to-lift-sales-by-9-percent-across-100-thousand-products">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[How DoorDash Used LLMs to Trigger 30% More Relevant Results]]></title><description><![CDATA[What happens when you mix knowledge graphs, tight vocabularies and just enough AI? In this piece, we evaluate the food delivery company DoorDash on LLM experiments and how they get cleaner segments, smarter retrieval and a system that knows what &#8220;no-milk vanilla ice cream&#8221; actually means.]]></description><link>https://www.datatinkerer.io/p/how-doordash-used-llms-to-trigger</link><guid isPermaLink="false">https://www.datatinkerer.io/p/how-doordash-used-llms-to-trigger</guid><dc:creator><![CDATA[Data Tinkerer]]></dc:creator><pubDate>Thu, 26 Jun 2025 09:37:56 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!8K0n!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37f9be1c-e138-41d4-9596-b4cd02897f95_432x860.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>Fellow Data Tinkerers!</p><p>Today we will look at how Doordash used LLM to show better results to users.</p><p>But before that, I wanted to share an example of what you could unlock if you share Data Tinkerer with just <strong>2 other people</strong>.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!CVJs!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79222a65-e006-4ef4-8919-a95cc40d6485_5167x3655.jpeg" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!CVJs!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79222a65-e006-4ef4-8919-a95cc40d6485_5167x3655.jpeg 424w, https://substackcdn.com/image/fetch/$s_!CVJs!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79222a65-e006-4ef4-8919-a95cc40d6485_5167x3655.jpeg 848w, https://substackcdn.com/image/fetch/$s_!CVJs!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79222a65-e006-4ef4-8919-a95cc40d6485_5167x3655.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!CVJs!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79222a65-e006-4ef4-8919-a95cc40d6485_5167x3655.jpeg 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!CVJs!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79222a65-e006-4ef4-8919-a95cc40d6485_5167x3655.jpeg" width="1456" height="1030" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/79222a65-e006-4ef4-8919-a95cc40d6485_5167x3655.jpeg&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1030,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:4188404,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/jpeg&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.datatinkerer.io/i/166857110?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79222a65-e006-4ef4-8919-a95cc40d6485_5167x3655.jpeg&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!CVJs!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79222a65-e006-4ef4-8919-a95cc40d6485_5167x3655.jpeg 424w, https://substackcdn.com/image/fetch/$s_!CVJs!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79222a65-e006-4ef4-8919-a95cc40d6485_5167x3655.jpeg 848w, https://substackcdn.com/image/fetch/$s_!CVJs!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79222a65-e006-4ef4-8919-a95cc40d6485_5167x3655.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!CVJs!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79222a65-e006-4ef4-8919-a95cc40d6485_5167x3655.jpeg 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>There are 100+ more cheat sheets covering everything from Python, R, SQL, Spark to Power BI, Tableau, Git and many more. So if you know other people who like staying up to date on all things data, please share Data Tinkerer with them!</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.datatinkerer.io/leaderboard?&amp;referrer_token=4tlsmi&amp;utm_source=post&quot;,&quot;text&quot;:&quot;Refer a friend&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.datatinkerer.io/leaderboard?&amp;referrer_token=4tlsmi&amp;utm_source=post"><span>Refer a friend</span></a></p><p>Now, with that out of the way, Let&#8217;s see how Doordash leverages LLM for better search results</p><h3>TL;DR</h3><div><hr></div><h4><strong>Situation</strong></h4><p>DoorDash users search with ultra-specific, complex queries (e.g. low-carb spicy chicken wrap with gluten-free tortilla), expecting accurate matches. Traditional keyword systems couldn&#8217;t handle the nuance and pure LLM-based solutions were prone to hallucination.</p><h4><strong>Task</strong></h4><p>Build a scalable, accurate and fast search system that could deeply understand both queries and items without sacrificing relevance or blowing up compute.</p><h4><strong>Action</strong></h4><ul><li><p>DoorDash built a hybrid system using:</p><ul><li><p>LLMs (for query segmentation and classification)</p></li><li><p>Knowledge graphs (for structured metadata)</p></li><li><p>Approximate Nearest Neighbor search (for grounding output in known taxonomies)</p></li></ul></li><li><p>Added tight guardrails: constrained vocabularies, structured output prompts, and post-processing validation.</p></li><li><p>Plugged structured query outputs into the search rankers and retrained based on improved engagement data.</p></li></ul><h4><strong>Result</strong></h4><ul><li><p>~30% increase in trigger rate for &#8220;Popular Dishes&#8221; carousel</p></li><li><p>2% lift in whole page relevance (WPR) for intent-heavy queries</p></li><li><p>+1.6% WPR boost from retrained rankers due to better training signals</p></li></ul><h4><strong>Use Cases</strong></h4><p>Improved search relevance, query processing at scale, ranking and personalisation</p><h4><strong>Tech Stack/Framework</strong></h4><p>LLM, knowledge graph, ANN</p><div><hr></div><h3>Explained further</h3><div><hr></div><p>At DoorDash, users don&#8217;t just type &#8220;burger&#8221; and call it a day. They type &#8220;low-carb spicy chicken wrap with gluten-free tortilla&#8221; and expect relevant suggestions. These aren&#8217;t simple lookups, they&#8217;re loaded with layered preferences, dietary restrictions and dish expectations.</p><p>So how do you build a search system that can handle that level of complexity without cracking under pressure?</p><p>The DoorDash team went with a hybrid approach: combine the brute force of traditional keyword systems with the nuance of LLMs and knowledge graphs. It&#8217;s like pairing a calculator with a philosophy major. It works and here is how.</p><div><hr></div><h4>What happens behind the search bar</h4><p>At a high level, DoorDash's search engine processes two kinds of things:</p><ul><li><p><strong>Documents</strong> - the restaurants or menu/store items</p></li><li><p><strong>Queries</strong> - what users type in</p></li></ul><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!zU_H!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa2037337-77af-4368-a931-4ceadc5a02fa_1024x249.webp" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!zU_H!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa2037337-77af-4368-a931-4ceadc5a02fa_1024x249.webp 424w, https://substackcdn.com/image/fetch/$s_!zU_H!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa2037337-77af-4368-a931-4ceadc5a02fa_1024x249.webp 848w, https://substackcdn.com/image/fetch/$s_!zU_H!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa2037337-77af-4368-a931-4ceadc5a02fa_1024x249.webp 1272w, https://substackcdn.com/image/fetch/$s_!zU_H!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa2037337-77af-4368-a931-4ceadc5a02fa_1024x249.webp 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!zU_H!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa2037337-77af-4368-a931-4ceadc5a02fa_1024x249.webp" width="1024" height="249" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/a2037337-77af-4368-a931-4ceadc5a02fa_1024x249.webp&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:249,&quot;width&quot;:1024,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:24628,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/webp&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.datatinkerer.io/i/166857110?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa2037337-77af-4368-a931-4ceadc5a02fa_1024x249.webp&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!zU_H!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa2037337-77af-4368-a931-4ceadc5a02fa_1024x249.webp 424w, https://substackcdn.com/image/fetch/$s_!zU_H!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa2037337-77af-4368-a931-4ceadc5a02fa_1024x249.webp 848w, https://substackcdn.com/image/fetch/$s_!zU_H!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa2037337-77af-4368-a931-4ceadc5a02fa_1024x249.webp 1272w, https://substackcdn.com/image/fetch/$s_!zU_H!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa2037337-77af-4368-a931-4ceadc5a02fa_1024x249.webp 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a><figcaption class="image-caption">Life of a document and the life of a query (Source: Doordash)</figcaption></figure></div><p>Each goes through its own journey. Queries get parsed, segmented, labeled and spell-checked. Documents get annotated and enriched with metadata, like vegan, spicy or chicken.</p><p>But the real power comes from understanding both deeply enough to match the right taco to the right craving.</p><div><hr></div><h4>Two worlds: documents and queries</h4><p>Documents at DoorDash aren&#8217;t just raw strings, they&#8217;re enriched with metadata pulled from custom-built <a href="https://en.wikipedia.org/wiki/Knowledge_graph">knowledge graphs</a>. These graphs define relationships between things like dish type, cuisine, ingredients and dietary preferences.</p><p>Take Non-Dairy Milk &amp; Cookies Vanilla Frozen Dessert - 8 oz as an example. It gets tagged with:</p><ul><li><p>Dietary_Preference: "Dairy-Free"</p></li><li><p>Flavor: "Vanilla"</p></li><li><p>Product_Category: "Ice Cream"</p></li><li><p>Quantity: "8 oz"</p></li></ul><p>On the query side, things get chunked up and mapped to similar concepts. So &#8220;small no-milk vanilla ice cream&#8221; gets split into:</p><p><code>[&#8220;small&#8221;, &#8220;no-milk&#8221;, &#8220;vanilla ice cream&#8221;]</code></p><p>Without this kind of breakdown, the search engine would either over-match (ice cream that isn&#8217;t dairy-free) or under-match (only exact string matches). With it, search becomes smarter. </p><p>But understanding concepts is only half the battle. How do you break down messy, real-world queries in the first place? That&#8217;s where things get messy and where traditional methods start to hit their limits.</p><div><hr></div><h4>Using LLMs without letting them run wild</h4><p>Traditionally, query segmentation has leaned on methods like <a href="https://en.wikipedia.org/wiki/Pointwise_mutual_information">pointwise mutual information</a> (which checks how often words co-occur compared to random chance) and <a href="https://en.wikipedia.org/wiki/N-gram">n-gram analysis</a> (which looks at fixed word sequences) to figure out which terms belong together. That works fine for clean, simple queries. But once you throw in overlapping entities or any real ambiguity, those old-school tricks start to break down fast.</p><p>Take a query like &#8220;turkey sandwich with cranberry sauce.&#8221; Is the cranberry sauce a separate item or part of the sandwich? Without context, traditional methods have no clue. They just see word chunks, not relationships.</p><p>LLMs, however, can understand the context if given the right guardrails. They will usually segment things in a way that actually makes sense, picking up on how words relate to each other across different use cases. </p><p>But here&#8217;s the catch: LLMs are prone to hallucination. So you couldn&#8217;t just throw queries at them and hope for the best. Doordash needed a controlled vocabulary to keep things grounded, something that ensured the segments were not only real but useful for the retrieval system.</p><p>Fortunately, they already had a <a href="https://careersatdoordash.com/blog/building-doordashs-product-knowledge-graph-with-large-language-models/">knowledge graph</a> in place with a solid ontology and a bunch of taxonomies to work with. So instead of letting the model come up with its own segmentations, it&#8217;s prompted to identify meaningful chunks and tag each one with a specific category from Doordash taxonomy.</p><p>For food, They have got taxonomies for cuisine types, dish types, dietary tags and more. For retail, it&#8217;s things like brand, dietary preference, product category, etc.</p><p>Back to our earlier query: &#8220;small no-milk vanilla ice cream.&#8221;</p><p>If we just asked for segments, we might get:</p><p><code>[&#8220;small&#8221;, &#8220;no-milk&#8221;, &#8220;vanilla ice cream&#8221;]</code></p><p>Not bad, but vague. Instead, the team asks the model to return a structured output mapping that relates to one of the Doordash taxonomy categories:</p><p><code>{</code></p><p><code>  Quantity: "small",</code></p><p><code>  Dietary_Preference: "no-milk",</code></p><p><code>  Flavor: "vanilla",</code></p><p><code>  Product_Category: "ice cream"</code></p><p><code>}</code></p><p>This setup gives the model extra context to work with and it leads to better results. it doesn&#8217;t just segment, it classifies. But classification isn&#8217;t the end of the road. Once you&#8217;ve got the chunks, you need to actually understand what they <em>mean</em>.</p><div><hr></div><h4>Entity linking: turning words into meaning</h4><p>Once a query is segmented, the next step is to map each segment to a concept in the knowledge graph. For example, &#8220;no-milk&#8221; isn&#8217;t just a string match. It should link to the &#8220;dairy-free&#8221; concept even if the item description never says &#8220;no-milk&#8221; explicitly anywhere.</p><p>LLMs are handy here too but just like in segmentation, they can still hallucinate. To avoid that problem, the team constrained the model&#8217;s output using only concepts from their controlled vocabularies (i.e., taxonomy terms from the knowledge graph).</p><p>The guardrails come from a curated candidate list, pulled using <a href="https://www.elastic.co/blog/understanding-ann">approximate nearest neighbor</a> (ANN) retrieval. ANN is a fast way to find the most similar entries in a big dataset without doing a full exhaustive search. This keeps the LLM grounded by forcing it to choose from concepts that actually exist in the system.</p><p>Take the earlier example: the segment &#8220;no-milk.&#8221; Instead of letting the LLM invent a new category like &#8220;milk-averse&#8221;, the ANN system first retrieves candidates like &#8220;dairy-free&#8221; or &#8220;vegan.&#8221; The model then just has to pick the best match based on context, keeping the output both accurate and mappable.</p><p>The method behind this is a classic <a href="https://en.wikipedia.org/wiki/Retrieval-augmented_generation">retrieval-augmented generation</a> (RAG) setup:</p><ol><li><p>For every query and knowledge graph concept (a.k.a. candidate label), embeddings are generated either from closed-source models, pre-trained ones or DoorDash&#8217;s own.</p></li><li><p>Using ANN retrieval, the top 100 closest taxonomy concepts are pulled for each query. This trims the fat from the prompt and avoids blowing up the model&#8217;s context window.</p></li><li><p>The LLM is then prompted to link query segments to taxonomy concepts across domains like dish types, dietary tags, cuisines and more.</p></li></ol><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!j9sg!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86fa8e51-edc5-4e75-8618-da94ce60cc14_1024x1024.webp" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!j9sg!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86fa8e51-edc5-4e75-8618-da94ce60cc14_1024x1024.webp 424w, https://substackcdn.com/image/fetch/$s_!j9sg!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86fa8e51-edc5-4e75-8618-da94ce60cc14_1024x1024.webp 848w, https://substackcdn.com/image/fetch/$s_!j9sg!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86fa8e51-edc5-4e75-8618-da94ce60cc14_1024x1024.webp 1272w, https://substackcdn.com/image/fetch/$s_!j9sg!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86fa8e51-edc5-4e75-8618-da94ce60cc14_1024x1024.webp 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!j9sg!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86fa8e51-edc5-4e75-8618-da94ce60cc14_1024x1024.webp" width="1024" height="1024" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/86fa8e51-edc5-4e75-8618-da94ce60cc14_1024x1024.webp&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1024,&quot;width&quot;:1024,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:70928,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/webp&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.datatinkerer.io/i/166857110?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86fa8e51-edc5-4e75-8618-da94ce60cc14_1024x1024.webp&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!j9sg!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86fa8e51-edc5-4e75-8618-da94ce60cc14_1024x1024.webp 424w, https://substackcdn.com/image/fetch/$s_!j9sg!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86fa8e51-edc5-4e75-8618-da94ce60cc14_1024x1024.webp 848w, https://substackcdn.com/image/fetch/$s_!j9sg!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86fa8e51-edc5-4e75-8618-da94ce60cc14_1024x1024.webp 1272w, https://substackcdn.com/image/fetch/$s_!j9sg!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86fa8e51-edc5-4e75-8618-da94ce60cc14_1024x1024.webp 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">LLMs for query segmentation and entity linking (Source: Doordash)</figcaption></figure></div><h4>The retrieval payoff</h4><p>Let&#8217;s go back to &#8220;small no-milk vanilla ice cream.&#8221; After segmentation and entity linking, the final structured output might look like this:</p><p><code>{</code></p><p><code>  Dietary_Preference: "Dairy-Free",</code></p><p><code>  Flavor: "Vanilla",</code></p><p><code>  Product_Category: "Ice Cream"</code></p><p><code>}</code></p><p>Now retrieval becomes precise, it can make <strong>Dietary Preference</strong> a MUST condition (non-negotiable) and treat <strong>Flavor</strong> as a SHOULD (nice to have). It&#8217;s like giving the system a conscience, it knows what matters more.</p><p>But smart retrieval isn&#8217;t enough. You also need to double-check what the model spits out before someone eats the wrong thing.</p><div><hr></div><h4>Evaluations: catching hallucinations before users do</h4><p>Accuracy is especially critical for filters like dietary restrictions. One mistake and someone could end up eating something they&#8217;re allergic to.</p><p>To keep hallucinations in check and make sure both segmentation and entity linking are solid, the team added post-processing steps to validate the model&#8217;s output. Once that&#8217;s done, they run manual audits on every batch of processed queries to spot any issues before they hit production.</p><p>Annotators go through a statistically significant sample of results and check two things:</p><ol><li><p>Are the segments correct?</p></li><li><p>Are they mapped to the right concepts in the knowledge graph?</p></li></ol><p>This manual review helps surface any systematic slip-ups, fine-tune prompts and improve the overall pipeline. Still, even a well-reviewed system comes with trade-offs, especially when scale and change are constants.</p><div><hr></div><h4>Memorisation vs. generalisation: the trade-off</h4><p>LLMs are a solid tool for query understanding but like most things in ML, there&#8217;s a trade-off. In this case, it&#8217;s memorisation versus generalisation.</p><p>If you run LLMs in batch mode on a fixed set of queries, you&#8217;ll usually get high accuracy. That works great when the query space is small and predictable. But the further you drift into the long tail (which, let&#8217;s be honest, is where a lot of real-world queries live), things start to get tricky.</p><p>Relying purely on memorisation comes with baggage:</p><ul><li><p><strong>Scalability</strong>: New queries show up constantly in a system like DoorDash. Preprocessing every single one just isn&#8217;t realistic.</p></li><li><p><strong>Maintenance overhead</strong>: Any changes to the knowledge graph or user behavior mean re-running batches, tweaking prompts and revalidating outputs.</p></li><li><p><strong>Stale features</strong>: Segments or links that were fine last month might be outdated now.</p></li></ul><p>That&#8217;s where more generalisable approaches shine. Things like embedding-based retrieval, traditional statistical models or even smart rule-based systems can handle new queries on the fly without any preprocessing.</p><p>These methods bring a few key advantages:</p><ul><li><p><strong>Scalability</strong>: They work on any query, even ones the system hasn&#8217;t seen before.</p></li><li><p><strong>Flexibility</strong>: They keep up with evolving language like new dishes, slang, weird typos</p></li><li><p><strong>Real-time readiness</strong>: No batch processing delays; they can act immediately.</p></li></ul><p>The downside? They don&#8217;t have the same deep contextual smarts as LLMs. That can hurt precision, especially when subtle relationships between terms matter.</p><p>So DoorDash takes a hybrid approach. Instead of picking one side, they combine the strengths of both:</p><ul><li><p>LLMs for deep understanding when you can afford it</p></li><li><p>Lightweight retrieval methods when you need scale and speed</p></li></ul><p>The result: better precision where it matters and better coverage everywhere else.</p><p>But theory only gets you so far. The real test? Whether it plays nicely with the rest of the stack.</p><div><hr></div><h4>Making it all work in production</h4><p>Of course, even the best query understanding system is only useful if the rest of the stack knows what to do with it. For DoorDash, that means making sure the new signals actually integrate cleanly into the search pipeline, especially with the rankers.</p><p>Rankers are the part of the system responsible for taking a big pile of candidate items or stores and deciding what order they should show up in. So once the team introduced these new, structured query signals, the rankers had to be updated to recognise and use them.</p><p>As the rankers adjusted to the new inputs and to the new patterns of user behavior that came with better retrieval, the impact showed up fast. Online metrics went up, both on the relevance side and on key business KPIs.</p><p>One feature in particular showed just how well things were clicking.</p><div><hr></div><h4>Real world example: a&#231;a&#237; bowl, anyone?</h4><p>One clear beneficiary of DoorDash&#8217;s improved query understanding system is the &#8220;Popular Dishes&#8221; carousel shown below. This component shows users a ranked list of food items when they search with clear dish intent (like &#8220;a&#231;a&#237; bowl&#8221;).</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!8K0n!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37f9be1c-e138-41d4-9596-b4cd02897f95_432x860.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!8K0n!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37f9be1c-e138-41d4-9596-b4cd02897f95_432x860.png 424w, https://substackcdn.com/image/fetch/$s_!8K0n!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37f9be1c-e138-41d4-9596-b4cd02897f95_432x860.png 848w, https://substackcdn.com/image/fetch/$s_!8K0n!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37f9be1c-e138-41d4-9596-b4cd02897f95_432x860.png 1272w, https://substackcdn.com/image/fetch/$s_!8K0n!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37f9be1c-e138-41d4-9596-b4cd02897f95_432x860.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!8K0n!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37f9be1c-e138-41d4-9596-b4cd02897f95_432x860.png" width="432" height="860" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/37f9be1c-e138-41d4-9596-b4cd02897f95_432x860.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:860,&quot;width&quot;:432,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:302615,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.datatinkerer.io/i/166857110?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37f9be1c-e138-41d4-9596-b4cd02897f95_432x860.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!8K0n!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37f9be1c-e138-41d4-9596-b4cd02897f95_432x860.png 424w, https://substackcdn.com/image/fetch/$s_!8K0n!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37f9be1c-e138-41d4-9596-b4cd02897f95_432x860.png 848w, https://substackcdn.com/image/fetch/$s_!8K0n!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37f9be1c-e138-41d4-9596-b4cd02897f95_432x860.png 1272w, https://substackcdn.com/image/fetch/$s_!8K0n!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F37f9be1c-e138-41d4-9596-b4cd02897f95_432x860.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Ranked list of food items in the &#8220;Popular Dishes&#8221; carousel. (Source: Doordash)</figcaption></figure></div><p>When someone types in &#8220;a&#231;a&#237; bowl,&#8221; they&#8217;re not looking to browse menus. They want a specific thing. The carousel lets them quickly scan options across stores, compare prices and pick what looks good. It&#8217;s fast, it&#8217;s focused and thanks to the new pipeline, it&#8217;s pulling in way more relevant results.</p><p>After rolling out the updated query understanding and retrieval system, DoorDash saw a ~30% jump in the carousel trigger rate. That means more queries are now returning results that qualify for the Popular Dishes treatment. It&#8217;s a strong signal that the system is getting closer to what users actually want.</p><p>And it&#8217;s not just about quantity, quality improved too. With better segmentation and entity linking, the system can retrieve a broader, more accurate set of items tied to the user&#8217;s intent. This showed up in their whole page relevance (WPR) metric, which measures how relevant the entire result page is from the user&#8217;s point of view. For dish-intent queries, WPR jumped by over 2%.</p><p>And because users were interacting with a wider variety of results, DoorDash was able to retrain its rankers on a more diverse engagement dataset. That led to a newer, smarter ranker version and another 1.6% bump in WPR. And because users interact more with the right stuff, the system gets better training data for the ranker, which then improves things further and becomes a virtuous cycle.</p><div><hr></div><h3><strong>Lessons learned</strong></h3><ul><li><p><strong>LLMs are powerful but more so with guardrails.</strong> Left to their own devices, they hallucinate. But paired with taxonomies and controlled vocabularies, they become sharp tools for segmentation and classification.</p></li><li><p><strong>Structured queries unlock smart retrieval.</strong> Turning messy user input into clean, labeled chunks (like "Flavor: Vanilla") makes it easier to match intent, not just strings.</p></li><li><p><strong>Hybrid &gt; pure LLM.</strong> DoorDash got the best results by mixing deep LLM understanding with fast, generalizable methods like embedding retrieval.</p></li><li><p><strong>Better understanding improves ranking.</strong> Once structured query signals flowed into rankers, DoorDash saw real-world lifts: 30% more carousel triggers and measurable boosts to conversion and relevance.</p></li></ul><div><hr></div><h3>The full scoop</h3><p>To learn more about this, check <a href="https://careersatdoordash.com/blog/how-doordash-leverages-llms-for-better-search-retrieval/?utm_source=datatinkerer.io&amp;utm_medium=email">Doordash's Engineering Blog</a> post on this topic</p><div><hr></div><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://www.datatinkerer.io/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">If you liked this post and don&#8217;t want to miss the next one, subscribe to Data Tinkerer!</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div><p>If you are already subscribed and enjoyed the article, please give it a like and/or share it others, really appreciate it &#128591;</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.datatinkerer.io/p/how-doordash-used-llms-to-trigger?utm_source=substack&utm_medium=email&utm_content=share&action=share&quot;,&quot;text&quot;:&quot;Share&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.datatinkerer.io/p/how-doordash-used-llms-to-trigger?utm_source=substack&utm_medium=email&utm_content=share&action=share"><span>Share</span></a></p><div><hr></div><h3>Keep learning</h3><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;8366d4af-303d-41cb-a5d4-a328ba80d9a4&quot;,&quot;caption&quot;:&quot;Uber&#8217;s invoices were a hot mess. Thousands of formats, 25+ languages and way too much human copy-pasting. Even with automation, it was chaos. Their solution? a GenAI-powered doc processing system that cut invoice handling time by 70% and slashed costs by 30%.<br /><br />If you want to learn about an actual example of GenAI being used in practice (rather than just vibes), check this article.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;lg&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;How Uber Cut Invoice Handling Time by 70% with GenAI (Without Ditching Humans)&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:291590442,&quot;name&quot;:&quot;Data Tinkerer&quot;,&quot;bio&quot;:&quot;Ex-head of analytics sharing deep dives and learnings about AI and all things data (science, engineering, analysis)&quot;,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/83d3bbe0-5fb8-4f8d-9b74-036abbd6fec9_500x500.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2025-06-05T06:27:57.923Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F991ca7ce-ad3d-4038-a8ee-3b4ffd0a60e2_1024x1024.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.datatinkerer.io/p/how-uber-cut-invoice-handling-time-by-70-percent-with-genai&quot;,&quot;section_name&quot;:&quot;Data Science&quot;,&quot;video_upload_id&quot;:null,&quot;id&quot;:163834123,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:7,&quot;comment_count&quot;:2,&quot;publication_id&quot;:null,&quot;publication_name&quot;:&quot;Data Tinkerer&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!JEdj!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6bea5ccd-f356-4154-a1db-16268300510e_500x500.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;7c48fd80-80c7-4200-aad2-d1c2a1e7b0f2&quot;,&quot;caption&quot;:&quot;Reddit needed to flag NSFW images the second they were uploaded. They built a deep learning system that does exactly that; fast, scalable and battle-tested in prod. Here&#8217;s how it works.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;lg&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;How Reddit Scans 1M+ Images a Day to Flag NSFW Content Using Deep Learning&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:291590442,&quot;name&quot;:&quot;Data Tinkerer&quot;,&quot;bio&quot;:&quot;Ex-head of analytics sharing deep dives and learnings about AI and all things data (science, engineering, analysis)&quot;,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/83d3bbe0-5fb8-4f8d-9b74-036abbd6fec9_500x500.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2025-05-15T02:01:35.562Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5d83ae17-98ce-4009-8228-9abd61f5665b_1536x1024.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.datatinkerer.io/p/how-reddit-scans-1m-images-a-day-to-flag-nsfw-content-using-deep-learning&quot;,&quot;section_name&quot;:&quot;Data Science&quot;,&quot;video_upload_id&quot;:null,&quot;id&quot;:163519952,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:4,&quot;comment_count&quot;:0,&quot;publication_id&quot;:null,&quot;publication_name&quot;:&quot;Data Tinkerer&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!JEdj!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6bea5ccd-f356-4154-a1db-16268300510e_500x500.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div>]]></content:encoded></item><item><title><![CDATA[How Uber Cut Invoice Handling Time by 70% with GenAI (Without Ditching Humans)]]></title><description><![CDATA[What started as a messy mix of bots and spreadsheets is now a smart, scalable pipeline powered by LLMs and human-in-the-loop design.]]></description><link>https://www.datatinkerer.io/p/how-uber-cut-invoice-handling-time-by-70-percent-with-genai</link><guid isPermaLink="false">https://www.datatinkerer.io/p/how-uber-cut-invoice-handling-time-by-70-percent-with-genai</guid><dc:creator><![CDATA[Data Tinkerer]]></dc:creator><pubDate>Thu, 05 Jun 2025 06:27:57 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F991ca7ce-ad3d-4038-a8ee-3b4ffd0a60e2_1024x1024.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>Fellow Data Tinkerers!</p><p>This article was originally published on <span class="mention-wrap" data-attrs="{&quot;name&quot;:&quot;AI Disruption&quot;,&quot;id&quot;:2557538,&quot;type&quot;:&quot;pub&quot;,&quot;url&quot;:&quot;https://open.substack.com/pub/aidisruption&quot;,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/40adc650-231f-403a-89a6-1e7a0761fe03_500x500.png&quot;,&quot;uuid&quot;:&quot;a74115c7-cfc1-466d-abaf-b082ca451433&quot;}" data-component-name="MentionToDOM"></span> around two weeks ago. <span class="mention-wrap" data-attrs="{&quot;name&quot;:&quot;Meng Li&quot;,&quot;id&quot;:138649186,&quot;type&quot;:&quot;user&quot;,&quot;url&quot;:null,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/4206cf36-9fcc-4b06-95e1-d751f9f4c3b7_388x388.jpeg&quot;,&quot;uuid&quot;:&quot;9bf31ca7-c82b-4b75-8f8a-83bed53ee424&quot;}" data-component-name="MentionToDOM"></span> who writes the newsletter suggested doing a guest post and I was more than happy to do it. </p><p>You should check out his newsletter if you need to stay up to date on all the latest developments in AI. He is a machine in terms of keeping track of the latest news and publishing it quickly for his subscribers. So if staying updated on all things AI is your jam, give it a go!</p><div class="embedded-publication-wrap" data-attrs="{&quot;id&quot;:2557538,&quot;name&quot;:&quot;AI Disruption&quot;,&quot;logo_url&quot;:&quot;https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F40adc650-231f-403a-89a6-1e7a0761fe03_500x500.png&quot;,&quot;base_url&quot;:&quot;https://aidisruption.ai&quot;,&quot;hero_text&quot;:&quot;A Beijing-based AI engineer with a decade of experience shares at least 2 daily articles on AI industry trends and technical insights.&quot;,&quot;author_name&quot;:&quot;Meng Li&quot;,&quot;show_subscribe&quot;:true,&quot;logo_bg_color&quot;:&quot;#ffffff&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="EmbeddedPublicationToDOMWithSubscribe"><div class="embedded-publication show-subscribe"><a class="embedded-publication-link-part" native="true" href="https://aidisruption.ai?utm_source=substack&amp;utm_campaign=publication_embed&amp;utm_medium=web"><img class="embedded-publication-logo" src="https://substackcdn.com/image/fetch/$s_!1xmv!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F40adc650-231f-403a-89a6-1e7a0761fe03_500x500.png" width="56" height="56" style="background-color: rgb(255, 255, 255);"><span class="embedded-publication-name">AI Disruption</span><div class="embedded-publication-hero-text">A Beijing-based AI engineer with a decade of experience shares at least 2 daily articles on AI industry trends and technical insights.</div><div class="embedded-publication-author-name">By Meng Li</div></a><form class="embedded-publication-subscribe" method="GET" action="https://aidisruption.ai/subscribe?"><input type="hidden" name="source" value="publication-embed"><input type="hidden" name="autoSubmit" value="true"><input type="email" class="email-input" name="email" placeholder="Type your email..."><input type="submit" class="button primary" value="Subscribe"></form></div></div><p>Now, let&#8217;s get to today&#8217;s deep dive on how Uber used GenAI in their invoice processing workflow.</p>
      <p>
          <a href="https://www.datatinkerer.io/p/how-uber-cut-invoice-handling-time-by-70-percent-with-genai">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[How Reddit Scans 1M+ Images a Day to Flag NSFW Content Using Deep Learning]]></title><description><![CDATA[A behind-the-scenes look at the ML pipeline that cut moderation latency from seconds to milliseconds without blowing the budget.]]></description><link>https://www.datatinkerer.io/p/how-reddit-scans-1m-images-a-day-to-flag-nsfw-content-using-deep-learning</link><guid isPermaLink="false">https://www.datatinkerer.io/p/how-reddit-scans-1m-images-a-day-to-flag-nsfw-content-using-deep-learning</guid><dc:creator><![CDATA[Data Tinkerer]]></dc:creator><pubDate>Thu, 15 May 2025 02:01:35 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5d83ae17-98ce-4009-8228-9abd61f5665b_1536x1024.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>Fellow Data Tinkerers!</p><p>Today we will look at how Reddit handles NSFW content at scale using deep learning</p><p>But before that, I wanted to thank those who shared Data Tinkerer with others and share an example of what you can unlock if you share Data Tinkerer with just <strong>2 other people</strong>.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!wvpQ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F33c4d0ad-c61a-49bc-9c6c-265bf936babc_5167x3655.jpeg" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!wvpQ!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F33c4d0ad-c61a-49bc-9c6c-265bf936babc_5167x3655.jpeg 424w, https://substackcdn.com/image/fetch/$s_!wvpQ!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F33c4d0ad-c61a-49bc-9c6c-265bf936babc_5167x3655.jpeg 848w, https://substackcdn.com/image/fetch/$s_!wvpQ!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F33c4d0ad-c61a-49bc-9c6c-265bf936babc_5167x3655.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!wvpQ!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F33c4d0ad-c61a-49bc-9c6c-265bf936babc_5167x3655.jpeg 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!wvpQ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F33c4d0ad-c61a-49bc-9c6c-265bf936babc_5167x3655.jpeg" width="1456" height="1030" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/33c4d0ad-c61a-49bc-9c6c-265bf936babc_5167x3655.jpeg&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1030,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:4188404,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/jpeg&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.datatinkerer.io/i/163519952?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F33c4d0ad-c61a-49bc-9c6c-265bf936babc_5167x3655.jpeg&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!wvpQ!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F33c4d0ad-c61a-49bc-9c6c-265bf936babc_5167x3655.jpeg 424w, https://substackcdn.com/image/fetch/$s_!wvpQ!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F33c4d0ad-c61a-49bc-9c6c-265bf936babc_5167x3655.jpeg 848w, https://substackcdn.com/image/fetch/$s_!wvpQ!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F33c4d0ad-c61a-49bc-9c6c-265bf936babc_5167x3655.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!wvpQ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F33c4d0ad-c61a-49bc-9c6c-265bf936babc_5167x3655.jpeg 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>There are 100+ more cheat sheets covering everything from Python, R, SQL, Spark to Power BI, Tableau, Git and many more. So if you know other people who like staying up to date on all things data, please share Data Tinkerer with them!</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.datatinkerer.io/leaderboard?&amp;referrer_token=4tlsmi&amp;utm_source=post&quot;,&quot;text&quot;:&quot;Refer a friend&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.datatinkerer.io/leaderboard?&amp;referrer_token=4tlsmi&amp;utm_source=post"><span>Refer a friend</span></a></p><p>Now, with that out of the way, let&#8217;s get to Reddit&#8217;s automated content moderation</p>
      <p>
          <a href="https://www.datatinkerer.io/p/how-reddit-scans-1m-images-a-day-to-flag-nsfw-content-using-deep-learning">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[How Walmart Automated 400+ Forecasts and Cut Runtime by Half]]></title><description><![CDATA[Their Autotuning Framework slashed errors, halved processing time, and scaled across thousands of time series without manual tuning.]]></description><link>https://www.datatinkerer.io/p/how-walmart-automated-400-forecasts-and-cut-runtime-by-half</link><guid isPermaLink="false">https://www.datatinkerer.io/p/how-walmart-automated-400-forecasts-and-cut-runtime-by-half</guid><dc:creator><![CDATA[Data Tinkerer]]></dc:creator><pubDate>Thu, 24 Apr 2025 08:46:03 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd440e8bc-3d10-4c06-8381-20f392f22739_1536x1024.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>Fellow Data Tinkerers!</p><p>Today we will look at how Walmart autotuned its time series forecasting</p><p>But before that, I wanted to share an example of what you could unlock if you share Data Tinkerer with just <strong>2 other people</strong>.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!DOof!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb032ec61-eabc-4382-baeb-1d4bc80f763c_1695x1206.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!DOof!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb032ec61-eabc-4382-baeb-1d4bc80f763c_1695x1206.png 424w, https://substackcdn.com/image/fetch/$s_!DOof!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb032ec61-eabc-4382-baeb-1d4bc80f763c_1695x1206.png 848w, https://substackcdn.com/image/fetch/$s_!DOof!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb032ec61-eabc-4382-baeb-1d4bc80f763c_1695x1206.png 1272w, https://substackcdn.com/image/fetch/$s_!DOof!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb032ec61-eabc-4382-baeb-1d4bc80f763c_1695x1206.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!DOof!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb032ec61-eabc-4382-baeb-1d4bc80f763c_1695x1206.png" width="1456" height="1036" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/b032ec61-eabc-4382-baeb-1d4bc80f763c_1695x1206.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1036,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:424800,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.datatinkerer.io/i/162021285?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb032ec61-eabc-4382-baeb-1d4bc80f763c_1695x1206.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!DOof!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb032ec61-eabc-4382-baeb-1d4bc80f763c_1695x1206.png 424w, https://substackcdn.com/image/fetch/$s_!DOof!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb032ec61-eabc-4382-baeb-1d4bc80f763c_1695x1206.png 848w, https://substackcdn.com/image/fetch/$s_!DOof!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb032ec61-eabc-4382-baeb-1d4bc80f763c_1695x1206.png 1272w, https://substackcdn.com/image/fetch/$s_!DOof!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb032ec61-eabc-4382-baeb-1d4bc80f763c_1695x1206.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>There are 100+ more cheat sheets covering everything from Python, R, SQL, Spark to Power BI, Tableau, Git and many more. So if you know other people who like staying up to date on all things data, please share Data Tinkerer with them!</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.datatinkerer.io/leaderboard?&amp;referrer_token=4tlsmi&amp;utm_source=post&quot;,&quot;text&quot;:&quot;Refer a friend&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.datatinkerer.io/leaderboard?&amp;referrer_token=4tlsmi&amp;utm_source=post"><span>Refer a friend</span></a></p><p>Now, with that out of the way, let&#8217;s get to Walmart&#8217;s autotuning framework!</p>
      <p>
          <a href="https://www.datatinkerer.io/p/how-walmart-automated-400-forecasts-and-cut-runtime-by-half">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[Inside the Mind of an LLM]]></title><description><![CDATA[The strange and structured world of Claude&#8217;s internal "thoughts"]]></description><link>https://www.datatinkerer.io/p/inside-the-mind-of-an-llm</link><guid isPermaLink="false">https://www.datatinkerer.io/p/inside-the-mind-of-an-llm</guid><dc:creator><![CDATA[Data Tinkerer]]></dc:creator><pubDate>Thu, 03 Apr 2025 09:04:09 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5ddb5769-990c-4104-9b6a-2bacb465c492_1024x1536.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>Fellow Data Tinkerers!</p><p>Today we will look at how an LLM thinks based on the research done by Anthropic.</p><p>But before that, I wanted to share an example of what you could unlock if you share Data Tinkerer with just <strong>3 other people</strong>.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!ttQz!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F21323fe2-aeae-4cea-ae7c-de6b1cfb97ca_1497x1332.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!ttQz!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F21323fe2-aeae-4cea-ae7c-de6b1cfb97ca_1497x1332.png 424w, https://substackcdn.com/image/fetch/$s_!ttQz!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F21323fe2-aeae-4cea-ae7c-de6b1cfb97ca_1497x1332.png 848w, https://substackcdn.com/image/fetch/$s_!ttQz!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F21323fe2-aeae-4cea-ae7c-de6b1cfb97ca_1497x1332.png 1272w, https://substackcdn.com/image/fetch/$s_!ttQz!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F21323fe2-aeae-4cea-ae7c-de6b1cfb97ca_1497x1332.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!ttQz!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F21323fe2-aeae-4cea-ae7c-de6b1cfb97ca_1497x1332.png" width="1456" height="1296" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/21323fe2-aeae-4cea-ae7c-de6b1cfb97ca_1497x1332.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1296,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:453580,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.datatinkerer.io/i/160238601?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F21323fe2-aeae-4cea-ae7c-de6b1cfb97ca_1497x1332.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!ttQz!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F21323fe2-aeae-4cea-ae7c-de6b1cfb97ca_1497x1332.png 424w, https://substackcdn.com/image/fetch/$s_!ttQz!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F21323fe2-aeae-4cea-ae7c-de6b1cfb97ca_1497x1332.png 848w, https://substackcdn.com/image/fetch/$s_!ttQz!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F21323fe2-aeae-4cea-ae7c-de6b1cfb97ca_1497x1332.png 1272w, https://substackcdn.com/image/fetch/$s_!ttQz!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F21323fe2-aeae-4cea-ae7c-de6b1cfb97ca_1497x1332.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>There are 100+ more cheat sheets covering everything from Python, R, SQL, Spark to Power BI, Tableau, Git and many more. So if you know other people who like staying up to date on all things data, please share Data Tinkerer with them!</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.datatinkerer.io/leaderboard?&amp;referrer_token=4tlsmi&amp;utm_source=post&quot;,&quot;text&quot;:&quot;Refer a friend&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.datatinkerer.io/leaderboard?&amp;referrer_token=4tlsmi&amp;utm_source=post"><span>Refer a friend</span></a></p><p>Now, with that out of the way, let&#8217;s get to the inner workings of Claude&#8217;s mind</p>
      <p>
          <a href="https://www.datatinkerer.io/p/inside-the-mind-of-an-llm">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[eBay’s e-Llama: AI Trained on 1 Trillion Tokens, Boosting E-Commerce Accuracy by 25%]]></title><description><![CDATA[How eBay optimized AI to deliver better product matches, faster support, and more accurate pricing]]></description><link>https://www.datatinkerer.io/p/ebays-e-llama-ai-trained-on-1-trillion-words-boosting-accuracy-by-25-percent</link><guid isPermaLink="false">https://www.datatinkerer.io/p/ebays-e-llama-ai-trained-on-1-trillion-words-boosting-accuracy-by-25-percent</guid><dc:creator><![CDATA[Data Tinkerer]]></dc:creator><pubDate>Wed, 12 Mar 2025 23:23:36 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!9y-g!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F251f45db-ed52-4366-83d6-afdba9726499_800x463.jpeg" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!9y-g!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F251f45db-ed52-4366-83d6-afdba9726499_800x463.jpeg" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!9y-g!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F251f45db-ed52-4366-83d6-afdba9726499_800x463.jpeg 424w, https://substackcdn.com/image/fetch/$s_!9y-g!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F251f45db-ed52-4366-83d6-afdba9726499_800x463.jpeg 848w, https://substackcdn.com/image/fetch/$s_!9y-g!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F251f45db-ed52-4366-83d6-afdba9726499_800x463.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!9y-g!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F251f45db-ed52-4366-83d6-afdba9726499_800x463.jpeg 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!9y-g!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F251f45db-ed52-4366-83d6-afdba9726499_800x463.jpeg" width="800" height="463" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/251f45db-ed52-4366-83d6-afdba9726499_800x463.jpeg&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:463,&quot;width&quot;:800,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:18777,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/jpeg&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.datatinkerer.io/i/158897458?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F251f45db-ed52-4366-83d6-afdba9726499_800x463.jpeg&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!9y-g!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F251f45db-ed52-4366-83d6-afdba9726499_800x463.jpeg 424w, https://substackcdn.com/image/fetch/$s_!9y-g!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F251f45db-ed52-4366-83d6-afdba9726499_800x463.jpeg 848w, https://substackcdn.com/image/fetch/$s_!9y-g!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F251f45db-ed52-4366-83d6-afdba9726499_800x463.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!9y-g!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F251f45db-ed52-4366-83d6-afdba9726499_800x463.jpeg 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">(Source: <a href="https://innovation.ebayinc.com/tech/features/scaling-large-language-models-for-e-commerce-the-development-of-a-llama-based-customized-llm-for-e-commerce/?utm_source=datatinkerer.io&amp;utm_medium=newsletter">eBay</a>)</figcaption></figure></div><h3>TL;DR</h3>
      <p>
          <a href="https://www.datatinkerer.io/p/ebays-e-llama-ai-trained-on-1-trillion-words-boosting-accuracy-by-25-percent">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[From Pins to Personalization: Inside Pinterest's Retrieval System for 500 Million Users]]></title><description><![CDATA[Understand the challenges and solutions in delivering personalized content to Pinterest's massive community.]]></description><link>https://www.datatinkerer.io/p/from-pins-to-personalization-inside</link><guid isPermaLink="false">https://www.datatinkerer.io/p/from-pins-to-personalization-inside</guid><dc:creator><![CDATA[Data Tinkerer]]></dc:creator><pubDate>Mon, 17 Feb 2025 06:31:20 GMT</pubDate><enclosure url="https://images.unsplash.com/photo-1636044594149-6e2f289c3868?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHw2fHxwaW50ZXJlc3R8ZW58MHx8fHwxNzM5NzczMTU1fDA&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://images.unsplash.com/photo-1636044594149-6e2f289c3868?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHw2fHxwaW50ZXJlc3R8ZW58MHx8fHwxNzM5NzczMTU1fDA&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://images.unsplash.com/photo-1636044594149-6e2f289c3868?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHw2fHxwaW50ZXJlc3R8ZW58MHx8fHwxNzM5NzczMTU1fDA&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080 424w, https://images.unsplash.com/photo-1636044594149-6e2f289c3868?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHw2fHxwaW50ZXJlc3R8ZW58MHx8fHwxNzM5NzczMTU1fDA&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080 848w, https://images.unsplash.com/photo-1636044594149-6e2f289c3868?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHw2fHxwaW50ZXJlc3R8ZW58MHx8fHwxNzM5NzczMTU1fDA&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080 1272w, https://images.unsplash.com/photo-1636044594149-6e2f289c3868?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHw2fHxwaW50ZXJlc3R8ZW58MHx8fHwxNzM5NzczMTU1fDA&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080 1456w" sizes="100vw"><img src="https://images.unsplash.com/photo-1636044594149-6e2f289c3868?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHw2fHxwaW50ZXJlc3R8ZW58MHx8fHwxNzM5NzczMTU1fDA&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080" width="6000" height="4000" data-attrs="{&quot;src&quot;:&quot;https://images.unsplash.com/photo-1636044594149-6e2f289c3868?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHw2fHxwaW50ZXJlc3R8ZW58MHx8fHwxNzM5NzczMTU1fDA&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:4000,&quot;width&quot;:6000,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;a red square button with a pin on it&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/jpg&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="a red square button with a pin on it" title="a red square button with a pin on it" srcset="https://images.unsplash.com/photo-1636044594149-6e2f289c3868?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHw2fHxwaW50ZXJlc3R8ZW58MHx8fHwxNzM5NzczMTU1fDA&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080 424w, https://images.unsplash.com/photo-1636044594149-6e2f289c3868?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHw2fHxwaW50ZXJlc3R8ZW58MHx8fHwxNzM5NzczMTU1fDA&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080 848w, https://images.unsplash.com/photo-1636044594149-6e2f289c3868?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHw2fHxwaW50ZXJlc3R8ZW58MHx8fHwxNzM5NzczMTU1fDA&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080 1272w, https://images.unsplash.com/photo-1636044594149-6e2f289c3868?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHw2fHxwaW50ZXJlc3R8ZW58MHx8fHwxNzM5NzczMTU1fDA&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Photo by <a href="true">Dima Solomin</a> on <a href="https://unsplash.com">Unsplash</a></figcaption></figure></div><h3>TL;DR</h3><div><hr></div><h4><strong>Situation</strong></h4><p>Pinterest's existing content retrieval system relied on traditional methods that couldn't fully capture the complex relationships between users and the vast array of content, leading to less personalized recommendations.</p><h4><strong>Task</strong></h4><p>Develop a scalable, embedding-based retrieval system capable of learning and representing the nuanced interactions between users and content, effectively processing Pinterest's extensive dataset.</p><h4><strong>Action</strong></h4><ol><li><p><strong>System Design:</strong> They designed an internal embedding-based retrieval system for organic content, utilizing advanced machine learning techniques to generate embeddings that position users and content within a shared vector space.</p></li><li><p><strong>Data Processing:</strong> To train the model effectively, they processed large-scale data, extracting meaningful features from user interactions and content metadata, handling billions of data points to ensure accurate embeddings..</p></li><li><p><strong>Model Training and Deployment:</strong> The model was trained on this extensive dataset, optimized for performance and relevance, and seamlessly integrated into Pinterest's infrastructure without disrupting user experience.</p></li></ol><h4><strong>Result</strong></h4><p>Implementing the embedding-based retrieval system improved content relevance and user engagement on Pinterest, leading to more personalized recommendations and increased interaction rates.</p><h4><strong>Use Cases</strong></h4><p>Personalized Recommendation, Search Functionality</p><h4><strong>Tech Stack/Framework</strong></h4><p>Two-Tower Model, Approximate Nearest Neighbor, Auto Retraining</p><div><hr></div><h3>Explained Further</h3><div><hr></div><h4><strong>Content Discovery with Advanced Retrieval System</strong></h4>
      <p>
          <a href="https://www.datatinkerer.io/p/from-pins-to-personalization-inside">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[Improving Search for 1B+ LinkedIn Users with GenAI]]></title><description><![CDATA[Discover how LinkedIn used AI to refine search suggestions and create a better user experience]]></description><link>https://www.datatinkerer.io/p/improving-search-for-1b-linkedin-users-with-genai</link><guid isPermaLink="false">https://www.datatinkerer.io/p/improving-search-for-1b-linkedin-users-with-genai</guid><dc:creator><![CDATA[Data Tinkerer]]></dc:creator><pubDate>Mon, 10 Feb 2025 00:01:11 GMT</pubDate><enclosure url="https://images.unsplash.com/photo-1616469829581-73993eb86b02?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxsaW5rZWRpbnxlbnwwfHx8fDE3Mzg3NDEwNjF8MA&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://images.unsplash.com/photo-1616469829581-73993eb86b02?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxsaW5rZWRpbnxlbnwwfHx8fDE3Mzg3NDEwNjF8MA&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://images.unsplash.com/photo-1616469829581-73993eb86b02?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxsaW5rZWRpbnxlbnwwfHx8fDE3Mzg3NDEwNjF8MA&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080 424w, https://images.unsplash.com/photo-1616469829581-73993eb86b02?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxsaW5rZWRpbnxlbnwwfHx8fDE3Mzg3NDEwNjF8MA&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080 848w, https://images.unsplash.com/photo-1616469829581-73993eb86b02?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxsaW5rZWRpbnxlbnwwfHx8fDE3Mzg3NDEwNjF8MA&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080 1272w, https://images.unsplash.com/photo-1616469829581-73993eb86b02?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxsaW5rZWRpbnxlbnwwfHx8fDE3Mzg3NDEwNjF8MA&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080 1456w" sizes="100vw"><img src="https://images.unsplash.com/photo-1616469829581-73993eb86b02?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxsaW5rZWRpbnxlbnwwfHx8fDE3Mzg3NDEwNjF8MA&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080" width="6000" height="4000" data-attrs="{&quot;src&quot;:&quot;https://images.unsplash.com/photo-1616469829581-73993eb86b02?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxsaW5rZWRpbnxlbnwwfHx8fDE3Mzg3NDEwNjF8MA&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:4000,&quot;width&quot;:6000,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;white and blue labeled box&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/jpg&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="white and blue labeled box" title="white and blue labeled box" srcset="https://images.unsplash.com/photo-1616469829581-73993eb86b02?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxsaW5rZWRpbnxlbnwwfHx8fDE3Mzg3NDEwNjF8MA&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080 424w, https://images.unsplash.com/photo-1616469829581-73993eb86b02?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxsaW5rZWRpbnxlbnwwfHx8fDE3Mzg3NDEwNjF8MA&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080 848w, https://images.unsplash.com/photo-1616469829581-73993eb86b02?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxsaW5rZWRpbnxlbnwwfHx8fDE3Mzg3NDEwNjF8MA&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080 1272w, https://images.unsplash.com/photo-1616469829581-73993eb86b02?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxsaW5rZWRpbnxlbnwwfHx8fDE3Mzg3NDEwNjF8MA&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Photo by <a href="true">Souvik Banerjee</a> on <a href="https://unsplash.com">Unsplash</a></figcaption></figure></div><h3>TL;DR</h3>
      <p>
          <a href="https://www.datatinkerer.io/p/improving-search-for-1b-linkedin-users-with-genai">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[No Cookies, No Problem: Grammarly’s Ad Experiment]]></title><description><![CDATA[How Grammarly Measured Ad Impact Without Tracking 30M+ Users]]></description><link>https://www.datatinkerer.io/p/no-cookies-no-problem-grammarlys-ad-experiment</link><guid isPermaLink="false">https://www.datatinkerer.io/p/no-cookies-no-problem-grammarlys-ad-experiment</guid><dc:creator><![CDATA[Data Tinkerer]]></dc:creator><pubDate>Sun, 02 Feb 2025 23:30:05 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!_487!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5f52f6f1-3e62-414e-87a0-bc545d7338f3_1024x608.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!_487!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5f52f6f1-3e62-414e-87a0-bc545d7338f3_1024x608.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!_487!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5f52f6f1-3e62-414e-87a0-bc545d7338f3_1024x608.png 424w, https://substackcdn.com/image/fetch/$s_!_487!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5f52f6f1-3e62-414e-87a0-bc545d7338f3_1024x608.png 848w, https://substackcdn.com/image/fetch/$s_!_487!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5f52f6f1-3e62-414e-87a0-bc545d7338f3_1024x608.png 1272w, https://substackcdn.com/image/fetch/$s_!_487!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5f52f6f1-3e62-414e-87a0-bc545d7338f3_1024x608.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!_487!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5f52f6f1-3e62-414e-87a0-bc545d7338f3_1024x608.png" width="1024" height="608" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/5f52f6f1-3e62-414e-87a0-bc545d7338f3_1024x608.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:&quot;normal&quot;,&quot;height&quot;:608,&quot;width&quot;:1024,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!_487!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5f52f6f1-3e62-414e-87a0-bc545d7338f3_1024x608.png 424w, https://substackcdn.com/image/fetch/$s_!_487!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5f52f6f1-3e62-414e-87a0-bc545d7338f3_1024x608.png 848w, https://substackcdn.com/image/fetch/$s_!_487!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5f52f6f1-3e62-414e-87a0-bc545d7338f3_1024x608.png 1272w, https://substackcdn.com/image/fetch/$s_!_487!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5f52f6f1-3e62-414e-87a0-bc545d7338f3_1024x608.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><h3>TL;DR</h3><div><hr></div><h4><strong>Situation</strong></h4><p>Grammarly&#8217;s <strong>30M+ daily users</strong> rely on paid ads, but traditional attribution failed to measure YouTube&#8217;s true impact, especially with third-party cookies disappearing.</p><h4><strong>Task</strong></h4><p>Find a privacy-first way to determine if YouTube ads drive new users or just capture organic sign-ups using geo experimentation.</p><h4><strong>Action</strong></h4><ul><li><p><strong>Geo Experimentation:</strong> Stopped ads in select regions while maintaining them in others to measure impact.</p></li><li><p><strong>Data Modeling:</strong> Used Google&#8217;s TBR package with a BSTS model to estimate incremental user acquisition.</p></li><li><p><strong>Geo Split Selection:</strong> Created balanced test/control groups using clustering and randomization.</p></li><li><p><strong>Power Analysis:</strong> Optimized experiment duration and sample size for accuracy and minimal business disruption.</p></li><li><p><strong>KPI Measurement:</strong> Focused on new active users instead of revenue, aligning with Grammarly&#8217;s freemium model.</p></li></ul><h4><strong>Result</strong></h4><p>YouTube ads were most effective before peak seasons but had less impact during them. When ads were paused, new sign-ups dropped immediately, showing YouTube&#8217;s stronger mid-funnel role than expected.</p><h4><strong>Use Cases</strong></h4><p>Marketing Attribution Improvement, Media Spend Optimization, Incrementality Measurement</p><h4><strong>Tech Stack/Framework</strong></h4><p>Google TBR (Time-Based Regression), Bayesian Structural Time Series (BSTS) Model, Clustering, Randomization</p><div><hr></div><h3>Explained Further</h3><div><hr></div><h4><strong>Understanding the Attribution Challenge</strong></h4><p>Measuring marketing effectiveness is challenging, especially for ads that don&#8217;t involve direct clicks. It&#8217;s easy to track whether someone clicks on a Google search ad, but what about an ad they only <strong>view</strong> on YouTube? Traditional attribution models often struggle to assign proper credit to top-of-funnel marketing, leading to an undervaluation of video ads and TV campaigns.</p>
      <p>
          <a href="https://www.datatinkerer.io/p/no-cookies-no-problem-grammarlys-ad-experiment">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[To Build or not to Build AI Agents]]></title><description><![CDATA[Discover Anthropic&#8217;s framework for knowing when AI agents add value or when simpler solutions suffice]]></description><link>https://www.datatinkerer.io/p/to-build-or-not-to-build-ai-agents</link><guid isPermaLink="false">https://www.datatinkerer.io/p/to-build-or-not-to-build-ai-agents</guid><dc:creator><![CDATA[Data Tinkerer]]></dc:creator><pubDate>Mon, 27 Jan 2025 01:17:58 GMT</pubDate><enclosure url="https://images.unsplash.com/photo-1620712943543-bcc4688e7485?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxhaSUyMGFnZW50fGVufDB8fHx8MTczNzk0MDYwNnww&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://images.unsplash.com/photo-1620712943543-bcc4688e7485?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxhaSUyMGFnZW50fGVufDB8fHx8MTczNzk0MDYwNnww&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://images.unsplash.com/photo-1620712943543-bcc4688e7485?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxhaSUyMGFnZW50fGVufDB8fHx8MTczNzk0MDYwNnww&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080 424w, https://images.unsplash.com/photo-1620712943543-bcc4688e7485?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxhaSUyMGFnZW50fGVufDB8fHx8MTczNzk0MDYwNnww&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080 848w, https://images.unsplash.com/photo-1620712943543-bcc4688e7485?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxhaSUyMGFnZW50fGVufDB8fHx8MTczNzk0MDYwNnww&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080 1272w, https://images.unsplash.com/photo-1620712943543-bcc4688e7485?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxhaSUyMGFnZW50fGVufDB8fHx8MTczNzk0MDYwNnww&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080 1456w" sizes="100vw"><img src="https://images.unsplash.com/photo-1620712943543-bcc4688e7485?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxhaSUyMGFnZW50fGVufDB8fHx8MTczNzk0MDYwNnww&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080" width="2765" height="3456" data-attrs="{&quot;src&quot;:&quot;https://images.unsplash.com/photo-1620712943543-bcc4688e7485?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxhaSUyMGFnZW50fGVufDB8fHx8MTczNzk0MDYwNnww&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:3456,&quot;width&quot;:2765,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;black and white robot toy on red wooden table&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/jpg&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="black and white robot toy on red wooden table" title="black and white robot toy on red wooden table" srcset="https://images.unsplash.com/photo-1620712943543-bcc4688e7485?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxhaSUyMGFnZW50fGVufDB8fHx8MTczNzk0MDYwNnww&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080 424w, https://images.unsplash.com/photo-1620712943543-bcc4688e7485?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxhaSUyMGFnZW50fGVufDB8fHx8MTczNzk0MDYwNnww&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080 848w, https://images.unsplash.com/photo-1620712943543-bcc4688e7485?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxhaSUyMGFnZW50fGVufDB8fHx8MTczNzk0MDYwNnww&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080 1272w, https://images.unsplash.com/photo-1620712943543-bcc4688e7485?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHwxfHxhaSUyMGFnZW50fGVufDB8fHx8MTczNzk0MDYwNnww&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Photo by <a href="true">Andrea De Santis</a> on <a href="https://unsplash.com">Unsplash</a></figcaption></figure></div><h3>TL;DR</h3>
      <p>
          <a href="https://www.datatinkerer.io/p/to-build-or-not-to-build-ai-agents">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[How Uber Scaled Incentive Optimisation by 40x]]></title><description><![CDATA[Learn more about the hybrid Ray-Spark architecture that transformed Uber&#8217;s budget allocation system]]></description><link>https://www.datatinkerer.io/p/how-uber-scaled-incentive-optimisation-by-40-percent</link><guid isPermaLink="false">https://www.datatinkerer.io/p/how-uber-scaled-incentive-optimisation-by-40-percent</guid><dc:creator><![CDATA[Data Tinkerer]]></dc:creator><pubDate>Mon, 20 Jan 2025 00:01:51 GMT</pubDate><enclosure url="https://images.unsplash.com/photo-1554260570-83dc2f46ef79?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHw3fHx1YmVyfGVufDB8fHx8MTczNjc1NDU1OHww&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://images.unsplash.com/photo-1554260570-83dc2f46ef79?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHw3fHx1YmVyfGVufDB8fHx8MTczNjc1NDU1OHww&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://images.unsplash.com/photo-1554260570-83dc2f46ef79?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHw3fHx1YmVyfGVufDB8fHx8MTczNjc1NDU1OHww&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080 424w, https://images.unsplash.com/photo-1554260570-83dc2f46ef79?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHw3fHx1YmVyfGVufDB8fHx8MTczNjc1NDU1OHww&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080 848w, https://images.unsplash.com/photo-1554260570-83dc2f46ef79?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHw3fHx1YmVyfGVufDB8fHx8MTczNjc1NDU1OHww&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080 1272w, https://images.unsplash.com/photo-1554260570-83dc2f46ef79?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHw3fHx1YmVyfGVufDB8fHx8MTczNjc1NDU1OHww&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080 1456w" sizes="100vw"><img src="https://images.unsplash.com/photo-1554260570-83dc2f46ef79?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHw3fHx1YmVyfGVufDB8fHx8MTczNjc1NDU1OHww&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080" width="4297" height="6445" data-attrs="{&quot;src&quot;:&quot;https://images.unsplash.com/photo-1554260570-83dc2f46ef79?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHw3fHx1YmVyfGVufDB8fHx8MTczNjc1NDU1OHww&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:6445,&quot;width&quot;:4297,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;person holding black smartphone&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/jpg&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="person holding black smartphone" title="person holding black smartphone" srcset="https://images.unsplash.com/photo-1554260570-83dc2f46ef79?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHw3fHx1YmVyfGVufDB8fHx8MTczNjc1NDU1OHww&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080 424w, https://images.unsplash.com/photo-1554260570-83dc2f46ef79?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHw3fHx1YmVyfGVufDB8fHx8MTczNjc1NDU1OHww&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080 848w, https://images.unsplash.com/photo-1554260570-83dc2f46ef79?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHw3fHx1YmVyfGVufDB8fHx8MTczNjc1NDU1OHww&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080 1272w, https://images.unsplash.com/photo-1554260570-83dc2f46ef79?crop=entropy&amp;cs=tinysrgb&amp;fit=max&amp;fm=jpg&amp;ixid=M3wzMDAzMzh8MHwxfHNlYXJjaHw3fHx1YmVyfGVufDB8fHx8MTczNjc1NDU1OHww&amp;ixlib=rb-4.0.3&amp;q=80&amp;w=1080 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Photo by <a href="true">Austin Distel</a> on <a href="https://unsplash.com">Unsplash</a></figcaption></figure></div><h3>TL;DR</h3>
      <p>
          <a href="https://www.datatinkerer.io/p/how-uber-scaled-incentive-optimisation-by-40-percent">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[The Art of Substitution: Instacart’s ML Model for Better Shopping Choices]]></title><description><![CDATA[Discover how Instacart uses machine learning to create tailored solutions for unavailable grocery items]]></description><link>https://www.datatinkerer.io/p/the-art-of-substitution-instacarts-ml-model</link><guid isPermaLink="false">https://www.datatinkerer.io/p/the-art-of-substitution-instacarts-ml-model</guid><dc:creator><![CDATA[Data Tinkerer]]></dc:creator><pubDate>Sun, 12 Jan 2025 23:01:15 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!9h_o!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F00a24c5b-b7a6-4d5c-9bc2-0e7b691d7d75_4800x2700.webp" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!9h_o!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F00a24c5b-b7a6-4d5c-9bc2-0e7b691d7d75_4800x2700.webp" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!9h_o!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F00a24c5b-b7a6-4d5c-9bc2-0e7b691d7d75_4800x2700.webp 424w, https://substackcdn.com/image/fetch/$s_!9h_o!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F00a24c5b-b7a6-4d5c-9bc2-0e7b691d7d75_4800x2700.webp 848w, https://substackcdn.com/image/fetch/$s_!9h_o!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F00a24c5b-b7a6-4d5c-9bc2-0e7b691d7d75_4800x2700.webp 1272w, https://substackcdn.com/image/fetch/$s_!9h_o!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F00a24c5b-b7a6-4d5c-9bc2-0e7b691d7d75_4800x2700.webp 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!9h_o!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F00a24c5b-b7a6-4d5c-9bc2-0e7b691d7d75_4800x2700.webp" width="1456" height="819" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/00a24c5b-b7a6-4d5c-9bc2-0e7b691d7d75_4800x2700.webp&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:819,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:183808,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/webp&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!9h_o!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F00a24c5b-b7a6-4d5c-9bc2-0e7b691d7d75_4800x2700.webp 424w, https://substackcdn.com/image/fetch/$s_!9h_o!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F00a24c5b-b7a6-4d5c-9bc2-0e7b691d7d75_4800x2700.webp 848w, https://substackcdn.com/image/fetch/$s_!9h_o!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F00a24c5b-b7a6-4d5c-9bc2-0e7b691d7d75_4800x2700.webp 1272w, https://substackcdn.com/image/fetch/$s_!9h_o!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F00a24c5b-b7a6-4d5c-9bc2-0e7b691d7d75_4800x2700.webp 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><h3>TL;DR</h3>
      <p>
          <a href="https://www.datatinkerer.io/p/the-art-of-substitution-instacarts-ml-model">
              Read more
          </a>
      </p>
   ]]></content:encoded></item></channel></rss>