TL;DR:

  • Set up RTX 3090 eGPU hardware connection
  • Install NVIDIA drivers and CUDA toolkit
  • Configure rootless Docker with GPU support
  • Run vLLM container with GPU acceleration
  • Test inference with your preferred models

Prerequisites

Hardware Requirements

# Verify eGPU connection
lspci | grep NVIDIA

System Requirements

# Ubuntu/Debian
sudo apt update
sudo apt install build-essential

# Docker (rootless)
curl -fsSL https://get.docker.com/rootless | sh
export PATH=$HOME/bin:$PATH
export DOCKER_HOST=unix://$XDG_RUNTIME_DIR/docker.sock

# NVIDIA drivers
sudo add-apt-repository ppa:graphics-drivers/ppa
sudo apt install nvidia-driver-470

# CUDA toolkit
wget https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run
sudo ./cuda_11.8.0_520.61.05_linux.run

NVIDIA Drivers

Installation

# Add NVIDIA PPA
sudo add-apt-repository ppa:graphics-drivers/ppa
sudo apt update

# Install NVIDIA driver
sudo apt install nvidia-driver-470

Verify Installation

nvidia-smi

CUDA Toolkit

Installation

# Download CUDA 11.8
wget https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run

# Make executable and run installer
chmod +x cuda_11.8.0_520.61.05_linux.run
sudo ./cuda_11.8.0_520.61.05_linux.run

Verify Installation

nvcc --version

Rootless Docker

Installation

# Install rootless Docker
curl -fsSL https://get.docker.com/rootless | sh
export PATH=$HOME/bin:$PATH
export DOCKER_HOST=unix://$XDG_RUNTIME_DIR/docker.sock

# Add to shell profile
echo 'export PATH=$HOME/bin:$PATH' >> ~/.bashrc
echo 'export DOCKER_HOST=unix://$XDG_RUNTIME_DIR/docker.sock' >> ~/.bashrc

# Start Docker daemon
systemctl --user start docker

NVIDIA Container Toolkit Setup

# Install NVIDIA Container Toolkit
distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
curl -s -L https://nvidia.github.io/libnvidia-container/experimental/$distribution/libnvidia-container.list | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list

sudo apt-get update
sudo apt-get install -y nvidia-container-toolkit

# Configure for rootless Docker
sudo nvidia-ctk runtime configure --runtime=docker --no-cgroups
sudo nvidia-ctk runtime configure --runtime=docker --config=$HOME/.config/docker/daemon.json
systemctl --user restart docker

Test GPU Access

# Test NVIDIA Container Toolkit
docker run --rm --gpus all nvidia/cuda:11.8-base-ubuntu20.04 nvidia-smi

vLLM

Docker Installation

# Pull vLLM image
docker pull vllm/vllm-openai:latest

# Run vLLM server
docker run --gpus all --shm-size 1g \
  -p 8000:8000 \
  -v $HOME/.cache/huggingface:/root/.cache/huggingface \
  vllm/vllm-openai:latest \
  --model microsoft/DialoGPT-medium \
  --gpu-memory-utilization 0.9

Native Installation

# Install Python environment
sudo apt install python3.8 python3.8-venv
python3 -m venv vllm-env
source vllm-env/bin/activate

# Install vLLM
pip install vllm
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

Configuration

# Environment variables
export CUDA_VISIBLE_DEVICES=1
export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512

Basic Usage

# Test API endpoint
curl -X POST "http://localhost:8000/v1/chat/completions" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "microsoft/DialoGPT-medium",
    "messages": [{"role": "user", "content": "Hello"}],
    "max_tokens": 100
  }'

Performance Monitoring

# Monitor GPU usage
watch -n 1 nvidia-smi

References