huu-ontocord commited on
Commit
8fd3f73
·
verified ·
1 Parent(s): a621f01

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +6 -0
README.md CHANGED
@@ -8,6 +8,7 @@ if not os.path.exists("expert_classify.ftz"):
8
  os.system("wget http://dl.turkunlp.org/register-labeling-model/fasttext_model.bin")
9
  os.system("wget https://huggingface.co/ontocord/riverbed/resolve/main/rj_model.bin")
10
  os.system("wget https://huggingface.co/ontocord/riverbed/resolve/main/expert_classify.ftz")
 
11
 
12
  ### red pajama filter. pred_label "__label__wiki" is data we do not wish to keep.
13
  red_pajama_model = fasttext.load_model("rj_model.bin")
@@ -23,6 +24,11 @@ domain_model = fasttext.load_model("fasttext_model.bin")
23
  ### Pile domain such as github, arxiv, etc.
24
  pile_model = fasttext.load_model("expert_classify.ftz")
25
  (pred_label, pred_prob) = pile_model.predict(text)
 
 
 
 
 
26
  ```
27
 
28
  See the files here: https://huggingface.co/ontocord/riverbed/tree/main
 
8
  os.system("wget http://dl.turkunlp.org/register-labeling-model/fasttext_model.bin")
9
  os.system("wget https://huggingface.co/ontocord/riverbed/resolve/main/rj_model.bin")
10
  os.system("wget https://huggingface.co/ontocord/riverbed/resolve/main/expert_classify.ftz")
11
+ os.system("wget https://huggingface.co/kenhktsui/llm-data-textbook-quality-fasttext-classifer-v1/resolve/main/model_textbook_quality.bin"
12
 
13
  ### red pajama filter. pred_label "__label__wiki" is data we do not wish to keep.
14
  red_pajama_model = fasttext.load_model("rj_model.bin")
 
24
  ### Pile domain such as github, arxiv, etc.
25
  pile_model = fasttext.load_model("expert_classify.ftz")
26
  (pred_label, pred_prob) = pile_model.predict(text)
27
+
28
+ ### Textbook quality - e.g., textbooks are all you need
29
+ textbook_model = fasttext.load_model("model_textbook_quality.bin")
30
+ (pred_label, pred_prob) = pile_model.predict(text)
31
+
32
  ```
33
 
34
  See the files here: https://huggingface.co/ontocord/riverbed/tree/main