diff --git a/README.md b/README.md
index 0865a1b12d606c04ab956e77f1c52df7713c3f23..dcbcbf51668e4401f3f6ce141cf24830d4fd478b 100644
--- a/README.md
+++ b/README.md
@@ -38,10 +38,17 @@ Running Llama 2 with gradio web UI on GPU or CPU from anywhere (Linux/Windows/Ma
 
 
 ## Install
+### Method 1: From [PyPI](https://pypi.org/project/llama2-wrapper/)
 ```
+pip install llama2-wrapper
+```
+### Method 2: From Source:
+```
+git clone https://github.com/liltom-eth/llama2-webui.git
+cd llama2-webui
 pip install -r requirements.txt
 ```
-
+### Install Issues:
 `bitsandbytes >= 0.39` may not work on older NVIDIA GPUs. In that case, to use `LOAD_IN_8BIT`, you may have to downgrade like this:
 
 -  `pip install bitsandbytes==0.38.1`
@@ -52,8 +59,6 @@ pip uninstall bitsandbytes
 pip install https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.0-py3-none-win_amd64.whl
 ```
 
-If run on CPU, install llama.cpp additionally by `pip install llama-cpp-python`.
-
 ## Download Llama-2 Models
 
 Llama 2 is a collection of pre-trained and fine-tuned generative text models ranging in scale from 7 billion to 70 billion parameters.
@@ -140,11 +145,8 @@ Make sure you have downloaded the 4-bit model from `Llama-2-7b-Chat-GPTQ` and se
 
 ### Run on CPU
 
-Run Llama-2 model on CPU requires [llama.cpp](https://github.com/ggerganov/llama.cpp) dependency and [llama.cpp Python Bindings](https://github.com/abetlen/llama-cpp-python). 
+Run Llama-2 model on CPU requires [llama.cpp](https://github.com/ggerganov/llama.cpp) dependency and [llama.cpp Python Bindings](https://github.com/abetlen/llama-cpp-python), which are already installed. 
 
-```bash
-pip install llama-cpp-python
-```
 
 Download GGML models like `llama-2-7b-chat.ggmlv3.q4_0.bin` following [Download Llama-2 Models](#download-llama-2-models) section. `llama-2-7b-chat.ggmlv3.q4_0.bin` model requires at least 6 GB RAM to run on CPU.
 
diff --git a/poetry.lock b/poetry.lock
index 8c0e4f817f2caf252cba12e74c7cbdab0f6229a2..1c9a5814bfbc5afface1f0699eb2faf1d33fc661 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -279,23 +279,6 @@ files = [
     {file = "bitsandbytes-0.40.2.tar.gz", hash = "sha256:808ac966272c63bccb2be6d77365275a4c28f1fa348d33656e670de3cab40fc4"},
 ]
 
-[[package]]
-name = "bitsandbytes"
-version = "0.41.0"
-description = "k-bit optimizers and matrix multiplication routines."
-optional = false
-python-versions = "*"
-files = [
-    {file = "bitsandbytes-0.41.0-py3-none-win_amd64.whl", hash = "sha256:a75ed614d691bf0acdbc22c91b546563994fb2d49fa0a0432d01ee24cdd2d8d8"},
-]
-
-[package.dependencies]
-scipy = "*"
-
-[package.source]
-type = "url"
-url = "https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.0-py3-none-win_amd64.whl"
-
 [[package]]
 name = "certifi"
 version = "2023.7.22"
@@ -3049,4 +3032,4 @@ multidict = ">=4.0"
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.13"
-content-hash = "5482b38ff725a66f0a85c8b00208518d1ebcfac36357a1ea245dcb9c2b47f8bd"
+content-hash = "597fac1c79c7631f7fbb7d07af9d22b2aab9abcf32065955d821d2cfaaea7d5b"
diff --git a/requirements.txt b/requirements.txt
index 5b38c3030fdbdd104103ef447f45db16ed63ff70..f9bd00355f7e649162c4e7e3eb3634a82da34119 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,3 +9,4 @@ torch==2.0.1
 transformers==4.31.0
 tqdm==4.65.0
 python-dotenv==1.0.0
+llama-cpp-python== 0.1.77
\ No newline at end of file