feat: Add pixi environment (#534)

* feat: Add pixi environment

* Add pixi manifest pixi.toml for Linux x86, macOS arm64, Windows 64.

* ci: Update CI workflow and unify to one

* Enable workflow dispatch.
* Add concurrency limits.
* Use pixi for CI workflow.
* Unify to a single workflow for all OS tested

* feat: Add pixi lock file

* Ensure tensorflow-cpu installed on Windows

* fix package check

* fix package check

* simplification plus uv and pip runners

* some fixes to pixi and pip

* create pixi.lock

* fix pixi.lock issue

* another attempt trying to fix get_packages

* another attempt trying to fix get_packages

* clean up python_environment_check.py

* updated runner and docs

* use bash

* proper env activiation

* proper env activiation

---------

Co-authored-by: rasbt <mail@sebastianraschka.com>
This commit is contained in:
Matthew Feickert 2025-02-17 10:33:53 -07:00 committed by GitHub
parent 15592c1204
commit bd0484c1be
14 changed files with 13169 additions and 222 deletions

View File

@ -1,55 +0,0 @@
name: Code tests (Linux)
on:
push:
branches: [ main ]
paths:
- '**/*.py' # Run workflow for changes in Python files
- '**/*.ipynb'
- '**/*.yaml'
- '**/*.yml'
- '**/*.sh'
pull_request:
branches: [ main ]
paths:
- '**/*.py'
- '**/*.ipynb'
- '**/*.yaml'
- '**/*.yml'
- '**/*.sh'
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Install dependencies
run: |
curl -LsSf https://astral.sh/uv/install.sh | sh
uv python install 3.10
uv add . --dev
uv pip install -r ch05/07_gpt_to_llama/tests/test-requirements-extra.txt
uv add pytest-ruff nbval
- name: Test Selected Python Scripts
run: |
source .venv/bin/activate
pytest --ruff setup/02_installing-python-libraries/tests.py
pytest --ruff ch04/01_main-chapter-code/tests.py
pytest --ruff ch05/01_main-chapter-code/tests.py
pytest --ruff ch05/07_gpt_to_llama/tests/tests.py
pytest --ruff ch06/01_main-chapter-code/tests.py
- name: Validate Selected Jupyter Notebooks
run: |
source .venv/bin/activate
pytest --ruff --nbval ch02/01_main-chapter-code/dataloader.ipynb
pytest --ruff --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
pytest --ruff --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb

View File

@ -1,55 +0,0 @@
name: Code tests (macOS)
on:
push:
branches: [ main ]
paths:
- '**/*.py' # Run workflow for changes in Python files
- '**/*.ipynb'
- '**/*.yaml'
- '**/*.yml'
- '**/*.sh'
pull_request:
branches: [ main ]
paths:
- '**/*.py'
- '**/*.ipynb'
- '**/*.yaml'
- '**/*.yml'
- '**/*.sh'
jobs:
test:
runs-on: macos-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Install dependencies
run: |
curl -LsSf https://astral.sh/uv/install.sh | sh
uv python install 3.10
uv add . --dev
uv pip install -r ch05/07_gpt_to_llama/tests/test-requirements-extra.txt
uv add pytest-ruff nbval
- name: Test Selected Python Scripts
run: |
source .venv/bin/activate
pytest --ruff setup/02_installing-python-libraries/tests.py
pytest --ruff ch04/01_main-chapter-code/tests.py
pytest --ruff ch05/01_main-chapter-code/tests.py
pytest --ruff ch05/07_gpt_to_llama/tests/tests.py
pytest --ruff ch06/01_main-chapter-code/tests.py
- name: Validate Selected Jupyter Notebooks
run: |
source .venv/bin/activate
pytest --ruff --nbval ch02/01_main-chapter-code/dataloader.ipynb
pytest --ruff --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
pytest --ruff --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb

61
.github/workflows/basic-tests-pip.yml vendored Normal file
View File

@ -0,0 +1,61 @@
name: Code tests (plain pip)
on:
push:
branches: [ main ]
paths:
- '**/*.py'
- '**/*.ipynb'
- '**/*.yaml'
- '**/*.yml'
- '**/*.sh'
pull_request:
branches: [ main ]
paths:
- '**/*.py'
- '**/*.ipynb'
- '**/*.yaml'
- '**/*.yml'
- '**/*.sh'
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
pip-tests:
name: Pip Tests (Ubuntu Only)
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Create Virtual Environment and Install Dependencies
run: |
python -m venv .venv
source .venv/bin/activate
pip install --upgrade pip
pip install -r requirements.txt
pip install -r ch05/07_gpt_to_llama/tests/test-requirements-extra.txt
pip install pytest pytest-ruff nbval
- name: Test Selected Python Scripts
run: |
source .venv/bin/activate
pytest --ruff setup/02_installing-python-libraries/tests.py
pytest --ruff ch04/01_main-chapter-code/tests.py
pytest --ruff ch05/01_main-chapter-code/tests.py
pytest --ruff ch05/07_gpt_to_llama/tests/tests.py
pytest --ruff ch06/01_main-chapter-code/tests.py
- name: Validate Selected Jupyter Notebooks
run: |
source .venv/bin/activate
pytest --ruff --nbval ch02/01_main-chapter-code/dataloader.ipynb
pytest --ruff --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
pytest --ruff --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb

60
.github/workflows/basic-tests-pixi.yml vendored Normal file
View File

@ -0,0 +1,60 @@
name: Code tests (pixi)
on:
push:
branches: [ main ]
paths:
- '**/*.py'
- '**/*.ipynb'
- '**/*.yaml'
- '**/*.yml'
- '**/*.sh'
pull_request:
branches: [ main ]
paths:
- '**/*.py'
- '**/*.ipynb'
- '**/*.yaml'
- '**/*.yml'
- '**/*.sh'
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
test:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
steps:
- uses: actions/checkout@v4
- name: Set up pixi (without caching)
uses: prefix-dev/setup-pixi@v0.8.2
with:
environments: tests
cache: false
- name: List installed packages
run: |
pixi list --environment tests
- name: Test Selected Python Scripts
shell: pixi run --environment tests bash -e {0}
run: |
pytest --ruff setup/02_installing-python-libraries/tests.py
pytest --ruff ch04/01_main-chapter-code/tests.py
pytest --ruff ch05/01_main-chapter-code/tests.py
pytest --ruff ch05/07_gpt_to_llama/tests/tests.py
pytest --ruff ch06/01_main-chapter-code/tests.py
- name: Validate Selected Jupyter Notebooks
shell: pixi run --environment tests bash -e {0}
run: |
pytest --ruff --nbval ch02/01_main-chapter-code/dataloader.ipynb
pytest --ruff --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
pytest --ruff --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb

66
.github/workflows/basic-tests-uv.yml vendored Normal file
View File

@ -0,0 +1,66 @@
name: Code tests (uv)
on:
push:
branches: [ main ]
paths:
- '**/*.py'
- '**/*.ipynb'
- '**/*.yaml'
- '**/*.yml'
- '**/*.sh'
pull_request:
branches: [ main ]
paths:
- '**/*.py'
- '**/*.ipynb'
- '**/*.yaml'
- '**/*.yml'
- '**/*.sh'
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
uv-tests:
name: UV Tests (Matrix OS)
strategy:
matrix:
os: [ ubuntu-latest, macos-latest]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- name: Set up Python (uv)
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Install uv and dependencies
shell: bash
run: |
curl -LsSf https://astral.sh/uv/install.sh | sh
uv python install 3.10
uv add . --dev
uv pip install -r ch05/07_gpt_to_llama/tests/test-requirements-extra.txt
uv add pytest-ruff nbval
- name: Test Selected Python Scripts (uv)
shell: bash
run: |
source .venv/bin/activate
pytest --ruff setup/02_installing-python-libraries/tests.py
pytest --ruff ch04/01_main-chapter-code/tests.py
pytest --ruff ch05/01_main-chapter-code/tests.py
pytest --ruff ch05/07_gpt_to_llama/tests/tests.py
pytest --ruff ch06/01_main-chapter-code/tests.py
- name: Validate Selected Jupyter Notebooks (uv)
shell: bash
run: |
source .venv/bin/activate
pytest --ruff --nbval ch02/01_main-chapter-code/dataloader.ipynb
pytest --ruff --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
pytest --ruff --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb

View File

@ -1,4 +1,4 @@
name: Code tests (Windows pip)
name: Code tests Windows (uv/pip)
on:
push:
@ -32,17 +32,22 @@ jobs:
python-version: '3.10'
- name: Install dependencies
shell: pwsh
shell: bash
run: |
export PATH="$HOME/.local/bin:$PATH"
pip install --upgrade pip
pip install -r requirements.txt
pip install uv
uv venv --python=python3.10
source .venv/Scripts/activate
pip install -r requirements.txt # because of dependency issue on Windows when using `uv pip`
pip install tensorflow-io-gcs-filesystem==0.31.0 # Explicit for Windows
pip install -r ch05/07_gpt_to_llama/tests/test-requirements-extra.txt
pip install pytest-ruff nbval
- name: Run Python Tests
shell: pwsh
shell: bash
run: |
source .venv/Scripts/activate
pytest --ruff setup/02_installing-python-libraries/tests.py
pytest --ruff ch04/01_main-chapter-code/tests.py
pytest --ruff ch05/01_main-chapter-code/tests.py
@ -50,8 +55,9 @@ jobs:
pytest --ruff ch06/01_main-chapter-code/tests.py
- name: Run Jupyter Notebook Tests
shell: pwsh
shell: bash
run: |
source .venv/Scripts/activate
pytest --ruff --nbval ch02/01_main-chapter-code/dataloader.ipynb
pytest --ruff --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
pytest --ruff --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb
pytest --ruff --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb

View File

@ -1,57 +0,0 @@
name: Code tests (Windows)
on:
push:
branches: [ main ]
paths:
- '**/*.py'
- '**/*.ipynb'
- '**/*.yaml'
- '**/*.yml'
- '**/*.sh'
pull_request:
branches: [ main ]
paths:
- '**/*.py'
- '**/*.ipynb'
- '**/*.yaml'
- '**/*.yml'
- '**/*.sh'
jobs:
test:
runs-on: windows-latest
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Install uv
shell: pwsh
run: |
Invoke-WebRequest -Uri "https://astral.sh/uv/install.ps1" -OutFile "uv_install.ps1"
& .\uv_install.ps1
- name: Install dependencies with uv
shell: pwsh
run: |
uv venv --python=python3.10
uv pip install tensorflow-io-gcs-filesystem==0.31.0 # Explicit for Windows
uv pip install -r requirements.txt
uv pip install pytest-ruff
- name: Run Python Tests
shell: pwsh
run: |
uv run pytest --ruff setup/02_installing-python-libraries/tests.py
uv run pytest --ruff ch04/01_main-chapter-code/tests.py
uv run pytest --ruff ch05/01_main-chapter-code/tests.py
uv run pytest --ruff ch05/07_gpt_to_llama/tests/tests.py
uv run pytest --ruff ch06/01_main-chapter-code/tests.py
- name: Run Jupyter Notebook Tests
shell: pwsh
run: |
uv run pytest --ruff --nbval ch02/01_main-chapter-code/dataloader.ipynb
uv run pytest --ruff --nbval ch03/01_main-chapter-code/multihead-attention.ipynb
uv run pytest --ruff --nbval ch02/04_bonus_dataloader-intuition/dataloader-intuition.ipynb

4
.gitignore vendored
View File

@ -285,3 +285,7 @@ cython_debug/
# vscode
.vscode/
# pixi environments
.pixi
*.egg-info

12754
pixi.lock Normal file

File diff suppressed because it is too large Load Diff

47
pixi.toml Normal file
View File

@ -0,0 +1,47 @@
[project]
authors = ["Matthew Feickert <matthew.feickert@cern.ch>"]
channels = ["conda-forge"]
name = "LLMs-from-scratch"
platforms = ["linux-64", "osx-arm64", "win-64"]
version = "0.1.0"
[tasks]
[dependencies]
python = "3.10.*"
pytorch-cpu = ">=2.6.0,<3"
jupyterlab = ">=4.3.5,<5"
tiktoken = ">=0.9.0,<0.10"
matplotlib = ">=3.10.0,<4"
tqdm = ">=4.67.1,<5"
pandas = ">=2.2.3,<3"
numpy = ">=1.26,<2.1"
psutil = ">=5.9.5,<7"
[pypi-dependencies]
# The TensorFlow team unfortunately does not maintain the conda-forge
# feedstock and it is currently broken for TensorFlow v2.18.0
tensorflow = ">=2.18.0, <3"
llms-from-scratch = { path = ".", editable = true }
[target.win-64.pypi-dependencies]
tensorflow-cpu = ">=2.18.0, <3"
[feature.tests.dependencies]
blobfile = ">=3.0.0,<4"
huggingface_hub = ">=0.24.7,<0.29"
ipywidgets = ">=8.1.2,<9"
safetensors = ">=0.4.4,<0.6"
sentencepiece = ">=0.1.99,<0.3"
transformers = ">=4.33.2,<5"
pytest = ">=8.3.4,<9"
nbval = ">=0.11.0,<0.12"
[feature.tests.pypi-dependencies]
pytest-ruff = ">=0.4.1, <0.5"
[feature.tests.target.win-64.pypi-dependencies]
tensorflow-io-gcs-filesystem = "==0.31.0"
[environments]
tests = ["tests"]

View File

@ -27,9 +27,11 @@ This section guides you through the Python setup and package installation proced
> [!NOTE]
> There are alternative ways to install Python and use `uv`. For example, you can install Python directly via `uv` and use `uv add` instead of `uv pip install` for even faster package management.
>
> If you prefer the native `uv` commands, refer to the [./native-uv.md tutorial](./native-uv.md). I also recommend checking the official [`uv` documentation](https://docs.astral.sh/uv/).
> If you are a macOS or Linux user and prefer the native `uv` commands, refer to the [./native-uv.md tutorial](./native-uv.md). I also recommend checking the official [`uv` documentation](https://docs.astral.sh/uv/).
>
> While `uv add` offers additional speed advantages, I think that `uv pip` is slightly more user-friendly, making it a good starting point for beginners. However, if you're new to Python package management, the native `uv` interface is also a great opportunity to learn it from the start. It's also how I use `uv` now, but I realize it the barrier to entry is a bit higher if you are coming from `pip` and `conda`.
> The `uv add` syntax also applies to Windows users. However, I found that some dependencies in the `pyproject.toml` cause problems on Windows. So, for Windows users, I recommend `pix` instead, which has a similar `pixi add` workflow like `uv add`. For more information, see the [./native-pixi.md tutorial](./native-pixi.md).
>
> While `uv add` and `pixi add` offer additional speed advantages, I think that `uv pip` is slightly more user-friendly, making it a good starting point for beginners. However, if you're new to Python package management, the native `uv` interface is also a great opportunity to learn it from the start. It's also how I use `uv` now, but I realize it the barrier to entry is a bit higher if you are coming from `pip` and `conda`.
@ -153,9 +155,13 @@ uv pip install -U -r https://raw.githubusercontent.com/rasbt/LLMs-from-scratch/r
<img src="https://sebastianraschka.com/images/LLMs-from-scratch-images/setup/uv-setup/uv-install.png" width="700" height="auto" alt="Uv install">
&nbsp;
> [!NOTE]
> If you have problems with the following commands above due to certain dependencies (for example, if you are using Windows), you can always fall back to using regular pip:
> `pip install -r requirements.txt`
> or
> `pip install -U -r https://raw.githubusercontent.com/rasbt/LLMs-from-scratch/refs/heads/main/requirements.txt`
<br>

View File

@ -0,0 +1,106 @@
# Native pixi Python and package management
This tutorial is an alternative to the [`./native-uv.md`](native-uv.md) document for those who prefer `pixi`'s native commands over traditional environment and package managers like `conda` and `pip`.
Note that pixi uses `uv add` under the hood, as described in [`./native-uv.md`](native-uv.md).
Pixi and uv are both modern package and environment management tools for Python, but pixi is a polyglot package manager designed for managing not just Python but also other languages (similar to conda), while uv is a Python-specific tool optimized for ultra-fast dependency resolution and package installation.
Someone might choose pixi over uv if they need a polyglot package manager that supports multiple languages (not just Python) or prefer a declarative environment management approach similar to conda. For more information, please visit the official [pixi documentation](https://pixi.sh/latest/).
In this tutorial, I am using a computer running macOS, but this workflow is similar for Linux machines and may work for other operating systems as well.
&nbsp;
## 1. Install pixi
Pixi can be installed as follows, depending on your operating system.
<br>
**macOS and Linux**
```bash
curl -fsSL https://pixi.sh/install.sh | sh
```
or
```bash
wget -qO- https://pixi.sh/install.sh | sh
```
<br>
**Windows**
```powershell
powershell -ExecutionPolicy ByPass -c "irm -useb https://pixi.sh/install.ps1 | iex"
```
> [!NOTE]
> For more installation options, please refer to the official [pixi documentation](https://pixi.sh/latest/).
&nbsp;
## 1. Install Python
You can install Python using pixi:
```bash
pixi add python=3.10
```
> [!NOTE]
> I recommend installing a Python version that is at least 2 versions older than the most recent release to ensure PyTorch compatibility. For example, if the most recent version is Python 3.13, I recommend installing version 3.10 or 3.11. You can find out the most recent Python version by visiting [python.org](https://www.python.org).
&nbsp;
## 3. Install Python packages and dependencies
To install all required packages from a `pixi.toml` file (such as the one located at the top level of this GitHub repository), run the following command, assuming the file is in the same directory as your terminal session:
```bash
pixi install
```
> [!NOTE]
> If you encounter issues with dependencies (for example, if you are using Windows), you can always fall back to pip: `pixi run pip install -U -r requirements.txt`
By default, `pixi install` will create a separate virtual environment specific to the project.
You can install new packages that are not specified in `pixi.toml` via `pixi add`, for example:
```bash
pixi add packaging
```
And you can remove packages via `pixi remove`, for example,
```bash
pixi remove packaging
```
&nbsp;
## 4. Run Python code
Your environment should now be ready to run the code in the repository.
Optionally, you can run an environment check by executing the `python_environment_check.py` script in this repository:
```bash
pixi run python setup/02_installing-python-libraries/python_environment_check.py
```
<br>
**Launching JupyterLab**
You can launch a JupyterLab instance via:
```bash
pixi run jupyter lab
```
---
Any questions? Please feel free to reach out in the [Discussion Forum](https://github.com/rasbt/LLMs-from-scratch/discussions).

View File

@ -116,28 +116,6 @@ uv run python setup/02_installing-python-libraries/python_environment_check.py
<img src="https://sebastianraschka.com/images/LLMs-from-scratch-images/setup/uv-setup/uv-run-check.png?1" width="700" height="auto" alt="Uv install">
Or, if you don't want to type `uv run python` ever time you execute code, manually activate the virtual environment first.
On macOS/Linux:
```bash
source .venv/bin/activate
```
On Windows (PowerShell):
```bash
.venv\Scripts\activate
```
Then, run:
```bash
python setup/02_installing-python-libraries/python_environment_check.py
```
<br>
**Launching JupyterLab**
@ -150,19 +128,7 @@ uv run jupyter lab
**Skipping the `uv run` command**
If you find typing `uv run` cumbersome and want to run scripts via
```bash
python script.py
```
and launch JupyterLab via
```bash
juputer lab
```
instead, you can activated the environment manually.
If you find typing `uv run` cumbersome, you can manually activate the virtual environment as described below.
On macOS/Linux:
@ -176,6 +142,20 @@ On Windows (PowerShell):
.venv\Scripts\activate
```
Then, you can run scripts via
```bash
python script.py
```
and launch JupyterLab via
```bash
juputer lab
```
&nbsp;

View File

@ -3,7 +3,6 @@
# - https://www.manning.com/books/build-a-large-language-model-from-scratch
# Code: https://github.com/rasbt/LLMs-from-scratch
from importlib.metadata import PackageNotFoundError, import_module, version as get_version
from os.path import dirname, exists, join, realpath
from packaging.version import parse as version_parse
@ -22,20 +21,45 @@ def get_packages(pkgs):
"""
Returns a dictionary mapping package names (in lowercase) to their installed version.
"""
PACKAGE_MODULE_OVERRIDES = {
"tensorflow-cpu": ["tensorflow", "tensorflow_cpu"],
}
result = {}
for p in pkgs:
try:
# Try to import the package
imported = import_module(p)
# Determine possible module names to try.
module_names = PACKAGE_MODULE_OVERRIDES.get(p.lower(), [p])
version_found = None
for module_name in module_names:
try:
version = getattr(imported, "__version__", None)
if version is None:
version = get_version(p)
result[p.lower()] = version
except PackageNotFoundError:
result[p.lower()] = "0.0"
except ImportError:
result[p.lower()] = "0.0"
imported = import_module(module_name)
version_found = getattr(imported, "__version__", None)
if version_found is None:
try:
version_found = get_version(module_name)
except PackageNotFoundError:
version_found = None
if version_found is not None:
break # Stop if we successfully got a version.
except ImportError:
# Also try replacing hyphens with underscores as a fallback.
alt_module = module_name.replace("-", "_")
if alt_module != module_name:
try:
imported = import_module(alt_module)
version_found = getattr(imported, "__version__", None)
if version_found is None:
try:
version_found = get_version(alt_module)
except PackageNotFoundError:
version_found = None
if version_found is not None:
break
except ImportError:
continue
continue
if version_found is None:
version_found = "0.0"
result[p.lower()] = version_found
return result