Spaces:

unclemusclez
/

ollamafy

Runtime error

App Files Files Community

unclemusclez commited on Jul 6, 2024

Commit

6b057bb

verified ·

1 Parent(s): 3d7c439

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -60

app.py CHANGED Viewed

@@ -18,20 +18,15 @@ from textwrap import dedent
 HF_TOKEN = os.environ.get("HF_TOKEN")
 OLLAMA_USERNAME = os.environ.get("OLLAMA_USERNAME").lower()
-ollama_pubkey = open("/home/ollamafy/.ollama/id_ed25519.pub", "r")
-# ollama_pubkey_read = print(ollama_pubkey.read())
-def process_model(model_id, q_method, latest, maintainer, oauth_token: gr.OAuthToken | None):
-#def process_model(model_id, q_method, latest):
     if oauth_token.token is None:
         raise ValueError("You must be logged in to use GGUF-my-repo")
     model_name = model_id.split('/')[-1]
-    model_maintainer = model_id.split('/')[-2]
-    ollama_model_name = model_maintainer.lower() + '_' + model_name.lower()
     try:
         api = HfApi(token=oauth_token.token)
@@ -57,51 +52,65 @@ def process_model(model_id, q_method, latest, maintainer, oauth_token: gr.OAuthT
         print(f"Current working directory: {os.getcwd()}")
         print(f"Model directory contents: {os.listdir(model_name)}")
-        model_file = model_name + '_modelfile'
-        # model_path = f"{HOME}/.cache/huggingface/hub/{model_id}"
-        model_modelfile = open(model_file, "w")
-        model_modelfile_path = f"FROM {HOME}/.cache/huggingface/hub/{model_id}"
-        model_modelfile.write(model_modelfile_path)
-        model_modelfile.close()
-        print(model_modelfile_path)
-        if q_method == "FP16":
-            ollama_conversion = f"ollama create  -f {model_file} {OLLAMA_USERNAME}/{ollama_model_name}:{q_method.lower()}"
-        else:
-            ollama_conversion = f"ollama create  -q {q_method} -f {model_file} {OLLAMA_USERNAME}/{ollama_model_name}:{q_method.lower()}"
-        ollama_conversion_result = subprocess.run(ollama_conversion, shell=True, capture_output=True)
-        print(ollama_conversion_result)
-        if ollama_conversion_result.returncode != 0:
-            raise Exception(f"Error converting to Ollama: {ollama_conversion_result.stderr}")
-        print("Model converted to Ollama successfully!")
-        if maintainer == True:
-            ollama_push =  f"ollama push  {OLLAMA_USERNAME}/{model_name}:{q_method.lower()}"
-        else:
-            ollama_push =  f"ollama push  {OLLAMA_USERNAME}/{ollama_model_name}:{q_method.lower()}"
-        ollama_push_result = subprocess.run(ollama_push, shell=True, capture_output=True)
-        print(ollama_push_result)
-        if ollama_push_result.returncode != 0:
-            raise Exception(f"Error converting to Ollama: {ollama_push_result.stderr}")
-        print("Model pushed to Ollama library successfully!")
-        if latest == True:
-            ollama_copy =  f"ollama cp  {OLLAMA_USERNAME}/{model_id.lower()}:{q_method.lower()} {OLLAMA_USERNAME}/{model_id.lower()}:latest"
-            ollama_copy_result = subprocess.run(ollama_copy, shell=True, capture_output=True)
-            print(ollama_copy_result)
-            if ollama_copy_result.returncode != 0:
-                raise Exception(f"Error converting to Ollama: {ollama_push_result.stderr}")
-            print("Model pushed to Ollama library successfully!")
-            if maintainer == True:
-                llama_push_latest =  f"ollama push  {OLLAMA_USERNAME}/{model_name}:latest"
-            else:
-                ollama_push_latest =  f"ollama push  {OLLAMA_USERNAME}/{ollama_model_name}:latest"
-            ollama_push_latest_result = subprocess.run(ollama_push_latest, shell=True, capture_output=True)
-            print(ollama_push_latest_result)
-            if ollama_push_latest_result.returncode != 0:
-                raise Exception(f"Error converting to Ollama: {ollama_push_result.stderr}")
-            print("Model pushed to Ollama library successfully!")
     except Exception as e:
@@ -116,10 +125,10 @@ css="""/* Custom CSS to allow scrolling */
 """
 # Create Gradio interface
 with gr.Blocks(css=css) as demo:
     gr.Markdown("You must be logged in to use Ollamafy.")
     gr.Markdown(ollama_pubkey.read().rstrip())
     ollama_pubkey.close()
-    gr.LoginButton(min_width=250)
     model_id = HuggingfaceHubSearch(
         label="Hub Model ID",
@@ -127,29 +136,32 @@ with gr.Blocks(css=css) as demo:
         search_type="model",
     )
-    q_method = gr.Dropdown(
-        ["FP16", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_1", "Q4_K_S", "Q4_K_M", "Q5_0", "Q5_1", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0"],
-        label="Quantization Method",
-        info="Ollama Quantization Types",
         value="FP16",
         filterable=False,
-        visible=True
     )
     latest = gr.Checkbox(
         value=False,
         label="Latest",
         info="Copy Model to Ollama Library with the :latest tag"
     )
     maintainer = gr.Checkbox(
         value=False,
         label="Maintainer",
         info="This is your original repository on both Hugging Face and Ollama. (DO NOT USE!!!)"
     )
     iface = gr.Interface(
         fn=process_model,
         inputs=[
             model_id,
-            q_method,
             latest,
             maintainer
         ],

 HF_TOKEN = os.environ.get("HF_TOKEN")
 OLLAMA_USERNAME = os.environ.get("OLLAMA_USERNAME").lower()
+ollama_pubkey = open("/home/user/.ollama/id_ed25519.pub", "r")
+ollama_q_methods = ["FP16","Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_1", "Q4_K_S", "Q4_K_M", "Q5_0", "Q5_1", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0"]
+def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_repo, train_data_file, split_model, split_max_tensors, split_max_size, ollamafy, latest, maintainer, oauth_token: gr.OAuthToken | None):
     if oauth_token.token is None:
         raise ValueError("You must be logged in to use GGUF-my-repo")
     model_name = model_id.split('/')[-1]
+    fp16 = f"{model_name}.fp16.gguf"
     try:
         api = HfApi(token=oauth_token.token)
         print(f"Current working directory: {os.getcwd()}")
         print(f"Model directory contents: {os.listdir(model_name)}")
+        conversion_script = "convert_hf_to_gguf.py"
+        fp16_conversion = f"python llama.cpp/{conversion_script} {model_name} --outtype f16 --outfile {fp16}"
+        result = subprocess.run(fp16_conversion, shell=True, capture_output=True)
+        print(result)
+        if result.returncode != 0:
+            raise Exception(f"Error converting to fp16: {result.stderr}")
+        print("Model converted to fp16 successfully!")
+        print(f"Converted model path: {fp16}")
+        ### Ollamafy ###
+        if ollama_model:
+            model_maintainer = model_id.split('/')[-2]
+            ollama_model_name = model_maintainer.lower() + '_' + model_name.lower()
+            ollama_modelfile_name = model_name + '_modelfile'
+            # model_path = f"{HOME}/.cache/huggingface/hub/{model_id}"
+            ollama_modelfile = open(ollama_modelfile_name, "w")
+            # ollama_modelfile_path = quantized_gguf_path
+            ollama_modelfile.write(quantized_gguf_path)
+            ollama_modelfile.close()
+            print(quantized_gguf_path)
+            for ollama_q_method in ollama_q_methods:
+                if ollama_q_method == "FP16":
+                    ollama_conversion = f"ollama create  -q {ollama_q_method} -f {model_file} {OLLAMA_USERNAME}/{ollama_model_name}:{ollama_q_method.lower()}"
+                else:
+                    ollama_conversion = f"ollama create -f {model_file} {OLLAMA_USERNAME}/{ollama_model_name}:{ollama_q_method.lower()}"
+                ollama_conversion_result = subprocess.run(ollama_conversion, shell=True, capture_output=True)
+                print(ollama_conversion_result)
+                if ollama_conversion_result.returncode != 0:
+                    raise Exception(f"Error converting to Ollama: {ollama_conversion_result.stderr}")
+                print("Model converted to Ollama successfully!")
+                if maintainer:
+                    ollama_push =  f"ollama push  {OLLAMA_USERNAME}/{model_name}:{q_method.lower()}"
+                else:
+                    ollama_push =  f"ollama push  {OLLAMA_USERNAME}/{ollama_model_name}:{q_method.lower()}"
+                ollama_push_result = subprocess.run(ollama_push, shell=True, capture_output=True)
+                print(ollama_push_result)
+                if ollama_push_result.returncode != 0:
+                    raise Exception(f"Error converting to Ollama: {ollama_push_result.stderr}")
+                print("Model pushed to Ollama library successfully!")
+                if latest == True:
+                    ollama_copy =  f"ollama cp  {OLLAMA_USERNAME}/{model_id.lower()}:{q_method.lower()} {OLLAMA_USERNAME}/{model_id.lower()}:latest"
+                    ollama_copy_result = subprocess.run(ollama_copy, shell=True, capture_output=True)
+                    print(ollama_copy_result)
+                    if ollama_copy_result.returncode != 0:
+                        raise Exception(f"Error converting to Ollama: {ollama_push_result.stderr}")
+                    print("Model pushed to Ollama library successfully!")
+                    if maintainer == True:
+                        llama_push_latest =  f"ollama push  {OLLAMA_USERNAME}/{model_name}:latest"
+                    else:
+                        ollama_push_latest =  f"ollama push  {OLLAMA_USERNAME}/{ollama_model_name}:latest"
+                    ollama_push_latest_result = subprocess.run(ollama_push_latest, shell=True, capture_output=True)
+                    print(ollama_push_latest_result)
+                    if ollama_push_latest_result.returncode != 0:
+                        raise Exception(f"Error converting to Ollama: {ollama_push_result.stderr}")
+                    print("Model pushed to Ollama library successfully!")
     except Exception as e:
 """
 # Create Gradio interface
 with gr.Blocks(css=css) as demo:
+    gr.LoginButton(min_width=250)
     gr.Markdown("You must be logged in to use Ollamafy.")
     gr.Markdown(ollama_pubkey.read().rstrip())
     ollama_pubkey.close()
     model_id = HuggingfaceHubSearch(
         label="Hub Model ID",
         search_type="model",
     )
+    ollama_q_method
+        latest = gr.Dropdown(
+        ollama_q_methods,
+        label="Ollama Lastest Quantization Method",
+        info="Chose which quantization will be labled with the latest tag in the Ollama Library",
         value="FP16",
         filterable=False,
+        visible=False
     )
     latest = gr.Checkbox(
         value=False,
         label="Latest",
         info="Copy Model to Ollama Library with the :latest tag"
     )
     maintainer = gr.Checkbox(
         value=False,
         label="Maintainer",
         info="This is your original repository on both Hugging Face and Ollama. (DO NOT USE!!!)"
     )
     iface = gr.Interface(
         fn=process_model,
         inputs=[
             model_id,
+            ollama_q_method,
             latest,
             maintainer
         ],