Commit a8b9cdf3 authored by Tom's avatar Tom
Browse files

update readme

parent de3b67fe
Showing with 11 additions and 25 deletions
+11 -25
......@@ -38,10 +38,17 @@ Running Llama 2 with gradio web UI on GPU or CPU from anywhere (Linux/Windows/Ma
## Install
### Method 1: From [PyPI](https://pypi.org/project/llama2-wrapper/)
```
pip install llama2-wrapper
```
### Method 2: From Source:
```
git clone https://github.com/liltom-eth/llama2-webui.git
cd llama2-webui
pip install -r requirements.txt
```
### Install Issues:
`bitsandbytes >= 0.39` may not work on older NVIDIA GPUs. In that case, to use `LOAD_IN_8BIT`, you may have to downgrade like this:
- `pip install bitsandbytes==0.38.1`
......@@ -52,8 +59,6 @@ pip uninstall bitsandbytes
pip install https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.0-py3-none-win_amd64.whl
```
If run on CPU, install llama.cpp additionally by `pip install llama-cpp-python`.
## Download Llama-2 Models
Llama 2 is a collection of pre-trained and fine-tuned generative text models ranging in scale from 7 billion to 70 billion parameters.
......@@ -140,11 +145,8 @@ Make sure you have downloaded the 4-bit model from `Llama-2-7b-Chat-GPTQ` and se
### Run on CPU
Run Llama-2 model on CPU requires [llama.cpp](https://github.com/ggerganov/llama.cpp) dependency and [llama.cpp Python Bindings](https://github.com/abetlen/llama-cpp-python).
Run Llama-2 model on CPU requires [llama.cpp](https://github.com/ggerganov/llama.cpp) dependency and [llama.cpp Python Bindings](https://github.com/abetlen/llama-cpp-python), which are already installed.
```bash
pip install llama-cpp-python
```
Download GGML models like `llama-2-7b-chat.ggmlv3.q4_0.bin` following [Download Llama-2 Models](#download-llama-2-models) section. `llama-2-7b-chat.ggmlv3.q4_0.bin` model requires at least 6 GB RAM to run on CPU.
......
......@@ -279,23 +279,6 @@ files = [
{file = "bitsandbytes-0.40.2.tar.gz", hash = "sha256:808ac966272c63bccb2be6d77365275a4c28f1fa348d33656e670de3cab40fc4"},
]
[[package]]
name = "bitsandbytes"
version = "0.41.0"
description = "k-bit optimizers and matrix multiplication routines."
optional = false
python-versions = "*"
files = [
{file = "bitsandbytes-0.41.0-py3-none-win_amd64.whl", hash = "sha256:a75ed614d691bf0acdbc22c91b546563994fb2d49fa0a0432d01ee24cdd2d8d8"},
]
[package.dependencies]
scipy = "*"
[package.source]
type = "url"
url = "https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.0-py3-none-win_amd64.whl"
[[package]]
name = "certifi"
version = "2023.7.22"
......@@ -3049,4 +3032,4 @@ multidict = ">=4.0"
[metadata]
lock-version = "2.0"
python-versions = ">=3.10,<3.13"
content-hash = "5482b38ff725a66f0a85c8b00208518d1ebcfac36357a1ea245dcb9c2b47f8bd"
content-hash = "597fac1c79c7631f7fbb7d07af9d22b2aab9abcf32065955d821d2cfaaea7d5b"
......@@ -9,3 +9,4 @@ torch==2.0.1
transformers==4.31.0
tqdm==4.65.0
python-dotenv==1.0.0
llama-cpp-python== 0.1.77
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment