<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
<channel>
  <title>Dustin Deus — Writing</title>
  <link>https://starptech.com/blog</link>
  <atom:link href="https://starptech.com/rss.xml" rel="self" type="application/rss+xml" />
  <description>Notes on infrastructure, shipping software, and startups.</description>
  <language>en</language>
  <lastBuildDate>Fri, 29 May 2026 13:41:26 GMT</lastBuildDate>
<item>
  <title>Why more GPUs is not enough for LLM inference</title>
  <link>https://starptech.com/blog/why-more-gpus-is-not-enough-for-llm-inference/</link>
  <guid>https://starptech.com/blog/why-more-gpus-is-not-enough-for-llm-inference/</guid>
  <description>What I learned deploying and tuning large-model inference: KV cache, routing, and cache hierarchy matter as much as raw GPU count.</description>
  <category>AI infrastructure</category>
  <pubDate>Thu, 28 May 2026 00:00:00 GMT</pubDate>
</item>
</channel>
</rss>