Spaces:

sercetexam9
/

fake_news

No application file

App Files Files Community

sercetexam9 commited on Sep 1, 2024

Commit

1ba24b2

verified ·

1 Parent(s): 89f3791

Update fakenewsdetection/app.py

Browse files

Files changed (1) hide show

fakenewsdetection/app.py +32 -4

fakenewsdetection/app.py CHANGED Viewed

@@ -30,8 +30,35 @@ nltk.download('punkt')
 nltk.download('stopwords')
 from fastapi import FastAPI, Request
 import pickle
-model = pickle.load(open("https://huggingface.co/spaces/sercetexam9/fakenewsdetection/resolve/main/fakenews.sav", 'rb'))
 import gradio as gr
 def predict(text):
     text=pd.DataFrame([text], columns=["text"])
     text=text["text"]
@@ -103,7 +130,8 @@ def predict(text):
 demo = gr.Interface(
     fn=predict,
-    inputs=["text"],
-    outputs=["text"],
 )
-demo.launch(share=True)

 nltk.download('stopwords')
 from fastapi import FastAPI, Request
 import pickle
+model = pickle.load(open("/content/fakenewsdetection/fakenews.sav", 'rb'))
 import gradio as gr
+def wordpre(text):
+    """
+    Hàm wordpre có tác dụng chuyển tất cả kí tự trong văn bản thành chữ thường, xoá bỏ các kí tự dấu, đường link, dấu câu, dấu cách, xuống dòng,...
+    Những kí tự này không có ý nghĩa đáng kể trong việc phân loại tính chất của câu, việc loại bỏ chúng giúp tập trung vào các từ mang ý nghĩa
+    quan trọng, giúp đánh giá câu chính xác hơn, ngoài ra còn giảm kích thước từ điển và cải thiện hiệu suất của mô hình.
+    """
+    text = text.lower()
+    text = re.sub('\[.*?\]', '', text)
+    text = re.sub("\\W"," ",text) # remove special chars
+    text = re.sub('https?://\S+|www\.\S+', '', text)
+    text = re.sub('<.*?>+', '', text)
+    text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
+    text = re.sub('\n', '', text)
+    text = re.sub('\w*\d\w*', '', text)
+    return text
+def lower_and_tokenize(data):
+    """
+    nltk.word_tokenize: tách một đoạn văn bản thành các từ riêng biệt.(token hóa) Việc token hóa giúp
+    """
+    # Lowercasing and tokenization
+    data=data.str.lower()
+    data=data.apply(nltk.word_tokenize)
+    # Remove stopwords
+    stop_words= set(stopwords.words('english'))
+    data=data.apply(lambda x: [word for word in x if word not in stop_words])
+    return data
 def predict(text):
     text=pd.DataFrame([text], columns=["text"])
     text=text["text"]
 demo = gr.Interface(
     fn=predict,
+    inputs=[gr.Textbox(label="Text", lines=3)],
+    outputs=[gr.Textbox(label="Predict", lines=1)],
 )
+if __name__ == "__main__":
+    demo.launch(share=True)