ollamafy / Dockerfile
unclemusclez's picture
Update Dockerfile
bcfd37f verified
FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && \
apt-get upgrade -y && \
apt-get install -y --no-install-recommends ca-certificates \
git \
git-lfs \
wget \
curl \
nvidia-driver-535 \
python3.10 \
python3.10-venv \
python3-pip \
python-is-python3
RUN curl -L https://ollama.com/download/ollama-linux-amd64 -o /usr/bin/ollama
RUN chmod +x /usr/bin/ollama
ENV USER='ollamafy'
RUN useradd -m -u 1000 ${USER}
USER ${USER}
ENV HOME=/home/${USER} \
PATH=${HOME}/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${PATH} \
APPDIR=${HOME}/app
WORKDIR ${APPDIR}
COPY --chown=1000 . ${APPDIR}
ENV PYTHONPATH=${HOME}/.local/bin:${PYTHONPATH}
RUN python -m pip install --no-cache-dir -U pip setuptools wheel
RUN python -m pip install "huggingface-hub" "hf-transfer" "gradio[oauth]>=4.28.0" "gradio_huggingfacehub_search==0.0.7" "APScheduler"
RUN git clone https://github.com/ggerganov/llama.cpp
COPY groups_merged.txt llama.cpp/.
WORKDIR ${APPDIR}/llama.cpp
RUN git pull
RUN python -m pip install -r requirements.txt
RUN GGML_CUDA=1 LLAMA_CUDA=1 make -j llama-quantize
ENV PYTHONUNBUFFERED=1 \
HF_HUB_ENABLE_HF_TRANSFER=1 \
GRADIO_ALLOW_FLAGGING=never \
GRADIO_NUM_PORTS=1 \
GRADIO_SERVER_NAME=0.0.0.0 \
GRADIO_THEME=huggingface \
TQDM_POSITION=-1 \
TQDM_MININTERVAL=1 \
SYSTEM=spaces \
LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cuda/lib64:${LD_LIBRARY_PATH} \
NVIDIA_DRIVER_CAPABILITIES=compute,utility \
NVIDIA_VISIBLE_DEVICES=all \
OLLAMA_MODELS=${APPDIR}/.ollama/models \
OLLAMA_HOST=127.0.0.1:0
WORKDIR ${APPDIR}
ENTRYPOINT python app.py