diff --git a/README.md b/README.md index 0865a1b12d606c04ab956e77f1c52df7713c3f23..dcbcbf51668e4401f3f6ce141cf24830d4fd478b 100644 --- a/README.md +++ b/README.md @@ -38,10 +38,17 @@ Running Llama 2 with gradio web UI on GPU or CPU from anywhere (Linux/Windows/Ma ## Install +### Method 1: From [PyPI](https://pypi.org/project/llama2-wrapper/) ``` +pip install llama2-wrapper +``` +### Method 2: From Source: +``` +git clone https://github.com/liltom-eth/llama2-webui.git +cd llama2-webui pip install -r requirements.txt ``` - +### Install Issues: `bitsandbytes >= 0.39` may not work on older NVIDIA GPUs. In that case, to use `LOAD_IN_8BIT`, you may have to downgrade like this: - `pip install bitsandbytes==0.38.1` @@ -52,8 +59,6 @@ pip uninstall bitsandbytes pip install https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.0-py3-none-win_amd64.whl ``` -If run on CPU, install llama.cpp additionally by `pip install llama-cpp-python`. - ## Download Llama-2 Models Llama 2 is a collection of pre-trained and fine-tuned generative text models ranging in scale from 7 billion to 70 billion parameters. @@ -140,11 +145,8 @@ Make sure you have downloaded the 4-bit model from `Llama-2-7b-Chat-GPTQ` and se ### Run on CPU -Run Llama-2 model on CPU requires [llama.cpp](https://github.com/ggerganov/llama.cpp) dependency and [llama.cpp Python Bindings](https://github.com/abetlen/llama-cpp-python). +Run Llama-2 model on CPU requires [llama.cpp](https://github.com/ggerganov/llama.cpp) dependency and [llama.cpp Python Bindings](https://github.com/abetlen/llama-cpp-python), which are already installed. -```bash -pip install llama-cpp-python -``` Download GGML models like `llama-2-7b-chat.ggmlv3.q4_0.bin` following [Download Llama-2 Models](#download-llama-2-models) section. `llama-2-7b-chat.ggmlv3.q4_0.bin` model requires at least 6 GB RAM to run on CPU. diff --git a/poetry.lock b/poetry.lock index 8c0e4f817f2caf252cba12e74c7cbdab0f6229a2..1c9a5814bfbc5afface1f0699eb2faf1d33fc661 100644 --- a/poetry.lock +++ b/poetry.lock @@ -279,23 +279,6 @@ files = [ {file = "bitsandbytes-0.40.2.tar.gz", hash = "sha256:808ac966272c63bccb2be6d77365275a4c28f1fa348d33656e670de3cab40fc4"}, ] -[[package]] -name = "bitsandbytes" -version = "0.41.0" -description = "k-bit optimizers and matrix multiplication routines." -optional = false -python-versions = "*" -files = [ - {file = "bitsandbytes-0.41.0-py3-none-win_amd64.whl", hash = "sha256:a75ed614d691bf0acdbc22c91b546563994fb2d49fa0a0432d01ee24cdd2d8d8"}, -] - -[package.dependencies] -scipy = "*" - -[package.source] -type = "url" -url = "https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.0-py3-none-win_amd64.whl" - [[package]] name = "certifi" version = "2023.7.22" @@ -3049,4 +3032,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.13" -content-hash = "5482b38ff725a66f0a85c8b00208518d1ebcfac36357a1ea245dcb9c2b47f8bd" +content-hash = "597fac1c79c7631f7fbb7d07af9d22b2aab9abcf32065955d821d2cfaaea7d5b" diff --git a/requirements.txt b/requirements.txt index 5b38c3030fdbdd104103ef447f45db16ed63ff70..f9bd00355f7e649162c4e7e3eb3634a82da34119 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,3 +9,4 @@ torch==2.0.1 transformers==4.31.0 tqdm==4.65.0 python-dotenv==1.0.0 +llama-cpp-python== 0.1.77 \ No newline at end of file