diff --git a/ch05/11_qwen3/README.md b/ch05/11_qwen3/README.md index b05a651..4db5b07 100644 --- a/ch05/11_qwen3/README.md +++ b/ch05/11_qwen3/README.md @@ -212,16 +212,16 @@ token_ids = generate_text_simple( Note that the peak memory usage is only listed for Nvidia CUDA devices, as it is easier to calculate. However, the memory usage on other devices is likely similar as it uses a similar precision format, and the KV cache storage dominates here for the generated 150-token text (however, different devices may implement matrix multiplication differently and may result in different peak memory requirements). | Model | Mode | Hardware | Tokens/sec | GPU Memory (VRAM) | -|------------|-------------------|-----------------|------------|-------------------| +| ---------- | ----------------- | --------------- | ---------- | ----------------- | | Qwen3Model | Regular | Mac Mini M4 CPU | 1 | - | -| Qwen3Model | Regular compiled | Mac Mini M4 CPU | - | - | +| Qwen3Model | Regular compiled | Mac Mini M4 CPU | 1 | - | | Qwen3Model | KV cache | Mac Mini M4 CPU | 80 | - | -| Qwen3Model | KV cache compiled | Mac Mini M4 CPU | - | - | +| Qwen3Model | KV cache compiled | Mac Mini M4 CPU | 82 | - | | | | | | | | Qwen3Model | Regular | Mac Mini M4 GPU | 21 | - | -| Qwen3Model | Regular compiled | Mac Mini M4 GPU | - | - | +| Qwen3Model | Regular compiled | Mac Mini M4 GPU | Error | - | | Qwen3Model | KV cache | Mac Mini M4 GPU | 32 | - | -| Qwen3Model | KV cache compiled | Mac Mini M4 GPU | - | - | +| Qwen3Model | KV cache compiled | Mac Mini M4 GPU | Error | - | | | | | | | | Qwen3Model | Regular | Nvidia A100 GPU | 25 | 1.49 GB | | Qwen3Model | Regular compiled | Nvidia A100 GPU | 107 | 1.99 GB | diff --git a/pkg/llms_from_scratch/llama3.py b/pkg/llms_from_scratch/llama3.py index 21a03e0..88509e1 100644 --- a/pkg/llms_from_scratch/llama3.py +++ b/pkg/llms_from_scratch/llama3.py @@ -309,7 +309,7 @@ class Llama3Tokenizer: special_tokens=self.special, ) - def encode(self, text, bos=False, eos=False): + def encode(self, text, bos=False, eos=False, **kwargs): ids = ([self.special["<|begin_of_text|>"]] if bos else []) \ + self.model.encode(text) if eos: diff --git a/pyproject.toml b/pyproject.toml index db995d4..f8e321f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "llms-from-scratch" -version = "1.0.13" +version = "1.0.14" description = "Implement a ChatGPT-like LLM in PyTorch from scratch, step by step" readme = "README.md" requires-python = ">=3.10"