<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
  <channel>
    <title>Monitoring on Vitor Pontual | The VeePee Hub</title>
    <link>https://vitorpontual.com/tags/monitoring/</link>
    <description>Recent content in Monitoring on Vitor Pontual | The VeePee Hub</description>
    <generator>Hugo</generator>
    <language>en-us</language>
    <lastBuildDate>Mon, 09 Feb 2026 00:00:00 +0000</lastBuildDate>
    <atom:link href="https://vitorpontual.com/tags/monitoring/index.xml" rel="self" type="application/rss+xml" />
    <item>
      <title>Ollama Fleet Manager</title>
      <link>https://vitorpontual.com/services/ollama-fleet-manager/</link>
      <pubDate>Mon, 09 Feb 2026 00:00:00 +0000</pubDate>
      <guid>https://vitorpontual.com/services/ollama-fleet-manager/</guid>
      <description>&lt;p&gt;A dashboard and intelligent proxy for managing a fleet of Ollama GPU servers. Point any Ollama client at the proxy and it routes requests to the best available server automatically—no application changes needed.&lt;/p&gt;&#xA;&lt;ul&gt;&#xA;&lt;li&gt;&lt;strong&gt;Intelligent request routing&lt;/strong&gt; — prioritizes servers with model already loaded, then model on disk, then most free VRAM&lt;/li&gt;&#xA;&lt;li&gt;&lt;strong&gt;Real-time fleet monitoring&lt;/strong&gt; — server status, loaded models, VRAM usage, CPU/GPU temperature, memory, disk, and uptime&lt;/li&gt;&#xA;&lt;li&gt;&lt;strong&gt;Usage analytics&lt;/strong&gt; — request volume, success rates, latency percentiles, and breakdowns by model, source, and server over 24h/7d/30d windows&lt;/li&gt;&#xA;&lt;li&gt;&lt;strong&gt;Request aggregation&lt;/strong&gt; — &lt;code&gt;/api/tags&lt;/code&gt;, &lt;code&gt;/api/ps&lt;/code&gt;, and &lt;code&gt;/v1/models&lt;/code&gt; combine responses from all servers into a single unified list&lt;/li&gt;&#xA;&lt;li&gt;&lt;strong&gt;Scheduled jobs&lt;/strong&gt; — cron-based model scheduling with conflict detection across the fleet&lt;/li&gt;&#xA;&lt;li&gt;&lt;strong&gt;Telegram alerts&lt;/strong&gt; — server offline/online, overheating, low memory, and reboot notifications&lt;/li&gt;&#xA;&lt;li&gt;&lt;strong&gt;Plugin system&lt;/strong&gt; — extensible architecture for community plugins&lt;/li&gt;&#xA;&lt;li&gt;&lt;strong&gt;OpenAI API compatible&lt;/strong&gt; — supports &lt;code&gt;/v1/*&lt;/code&gt; endpoints so OpenAI-compatible tools work out of the box&lt;/li&gt;&#xA;&lt;/ul&gt;</description>
    </item>
  </channel>
</rss>
