Spaces:
Runtime error
Runtime error
FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 | |
ENV DEBIAN_FRONTEND=noninteractive | |
RUN apt-get update && \ | |
apt-get upgrade -y && \ | |
apt-get install -y --no-install-recommends ca-certificates \ | |
git \ | |
git-lfs \ | |
wget \ | |
curl \ | |
nvidia-driver-535 \ | |
python3.10 \ | |
python3.10-venv \ | |
python3-pip \ | |
python-is-python3 | |
RUN curl -L https://ollama.com/download/ollama-linux-amd64 -o /usr/bin/ollama | |
RUN chmod +x /usr/bin/ollama | |
ENV USER='ollamafy' | |
RUN useradd -m -u 1000 ${USER} | |
USER ${USER} | |
ENV HOME=/home/${USER} \ | |
PATH=${HOME}/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${PATH} \ | |
APPDIR=${HOME}/app | |
WORKDIR ${APPDIR} | |
COPY --chown=1000 . ${APPDIR} | |
ENV PYTHONPATH=${HOME}/.local/bin:${PYTHONPATH} | |
RUN python -m pip install --no-cache-dir -U pip setuptools wheel | |
RUN python -m pip install "huggingface-hub" "hf-transfer" "gradio[oauth]>=4.28.0" "gradio_huggingfacehub_search==0.0.7" "APScheduler" | |
RUN git clone https://github.com/ggerganov/llama.cpp | |
COPY groups_merged.txt llama.cpp/. | |
WORKDIR ${APPDIR}/llama.cpp | |
RUN git pull | |
RUN python -m pip install -r requirements.txt | |
RUN GGML_CUDA=1 LLAMA_CUDA=1 make -j llama-quantize | |
ENV PYTHONUNBUFFERED=1 \ | |
HF_HUB_ENABLE_HF_TRANSFER=1 \ | |
GRADIO_ALLOW_FLAGGING=never \ | |
GRADIO_NUM_PORTS=1 \ | |
GRADIO_SERVER_NAME=0.0.0.0 \ | |
GRADIO_THEME=huggingface \ | |
TQDM_POSITION=-1 \ | |
TQDM_MININTERVAL=1 \ | |
SYSTEM=spaces \ | |
LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cuda/lib64:${LD_LIBRARY_PATH} \ | |
NVIDIA_DRIVER_CAPABILITIES=compute,utility \ | |
NVIDIA_VISIBLE_DEVICES=all \ | |
OLLAMA_MODELS=${APPDIR}/.ollama/models \ | |
OLLAMA_HOST=127.0.0.1:0 | |
WORKDIR ${APPDIR} | |
ENTRYPOINT python app.py |