OmPatel commited on
Commit
3c54e95
·
verified ·
1 Parent(s): 0462a2f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -20
app.py CHANGED
@@ -1,15 +1,21 @@
1
  from flask import Flask, request, render_template, jsonify
2
  import torch
3
  from nltk.tokenize import word_tokenize
4
- from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer, PegasusTokenizerFast, T5Tokenizer, T5ForConditionalGeneration, MBartForConditionalGeneration, MBart50TokenizerFast
5
  from LDict import find_legal_terms, legal_terms_lower
6
  import nltk
7
- import re
8
- import logging
 
 
 
 
9
  logging.basicConfig(level=logging.ERROR)
10
 
11
- nltk.download('punkt')
12
- nltk.download('punkt_tab')
 
 
13
 
14
  app = Flask(__name__)
15
 
@@ -54,22 +60,17 @@ def summarize_text(text, method):
54
  inputs_legal = port_tokenizer(text, max_length=1024, truncation=True, return_tensors="pt")
55
  summary_ids_legal = model_port.generate(inputs_legal["input_ids"], max_length=250, num_beams=4, early_stopping=True)
56
  Summarized_method2 = port_tokenizer.decode(summary_ids_legal[0], skip_special_tokens=True)
57
- print("\n\n\n Summarized MEthod2",Summarized_method2, "\n\n\n\n")
58
  cleaned_summary2 = remove_parentheses(Summarized_method2)
59
- print("\n\n\n Cleaned Summarized MEthod2",cleaned_summary2, "\n\n\n\n")
60
  #Paraphrase
61
  p_inputs = tokenizer_t5.encode(cleaned_summary2, return_tensors="pt", max_length=512, truncation=True)
62
  p_summary_ids = model_t5.generate(p_inputs, max_length=150, min_length=50, length_penalty=2.0, num_beams=4, early_stopping=True)
63
  method2 = tokenizer_t5.decode(p_summary_ids[0], skip_special_tokens=True)
64
- print("\n\n\n Summarized Paraphrased MEthod2",method2, "\n\n\n\n")
65
  return method2
66
 
67
  elif method == "method1":
68
  summarization_pipeline = pipeline('summarization', model=model_pegasus, tokenizer=tokenizer_pegasus, device=0 if device == "cuda" else -1)
69
  method1 = summarization_pipeline(text, max_length=100, min_length=30, truncation=True)[0]['summary_text']
70
- print("\n\n\n Summarized MEthod1",method1, "\n\n\n\n")
71
  cleaned_summary1 = remove_parentheses(method1)
72
- print("\n\n\n Summarized Cleaned MEthod1",cleaned_summary1, "\n\n\n\n")
73
  return cleaned_summary1
74
 
75
 
@@ -86,17 +87,13 @@ def index():
86
  if request.method == 'POST':
87
  try:
88
  input_text = request.form['input_text']
89
- logging.info(f"Received data for translation: {input_text}") # Log incoming data
90
  method = request.form['method']
91
 
92
  simplified_text = simplify_text(input_text)
93
- logging.info(f"Received data for translation: {simplified_text}")
94
  summarized_text = summarize_text(simplified_text, method)
95
- logging.info(f"Received data for translation: {summarized_text}")
96
 
97
  return jsonify({
98
- "summarized_text": summarized_text,
99
- })
100
  except Exception as e:
101
  logging.error(f"Error occurred: {e}", exc_info=True)
102
  return jsonify({"error": str(e)}), 500
@@ -106,18 +103,15 @@ def index():
106
  def translate():
107
  try:
108
  data = request.get_json()
109
- logging.info(f"Received data for translation: {data}") # Log incoming data
110
  text = data['text']
111
  translated_text = translate_to_hindi(text)
112
 
113
  return jsonify({
114
- "translated_text": translated_text
115
- })
116
  except Exception as e:
117
  logging.error(f"Error occurred during translation: {e}", exc_info=True)
118
  return jsonify({"error": str(e)}), 500
119
 
120
 
121
  if __name__ == '__main__':
122
- app.run(port=5003)
123
-
 
1
  from flask import Flask, request, render_template, jsonify
2
  import torch
3
  from nltk.tokenize import word_tokenize
4
+ from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer, T5Tokenizer, T5ForConditionalGeneration, MBartForConditionalGeneration, MBart50TokenizerFast
5
  from LDict import find_legal_terms, legal_terms_lower
6
  import nltk
7
+ import re,os, logging
8
+
9
+ # Set environment variables for writable directories
10
+ os.environ["TRANSFORMERS_CACHE"] = "/tmp/transformers_cache"
11
+ nltk.data.path.append("/tmp/nltk_data")
12
+
13
  logging.basicConfig(level=logging.ERROR)
14
 
15
+ # Download necessary NLTK data
16
+ nltk.download('punkt', download_dir="/tmp/nltk_data")
17
+ nltk.download('punkt_tab', download_dir="/tmp/nltk_data")
18
+
19
 
20
  app = Flask(__name__)
21
 
 
60
  inputs_legal = port_tokenizer(text, max_length=1024, truncation=True, return_tensors="pt")
61
  summary_ids_legal = model_port.generate(inputs_legal["input_ids"], max_length=250, num_beams=4, early_stopping=True)
62
  Summarized_method2 = port_tokenizer.decode(summary_ids_legal[0], skip_special_tokens=True)
 
63
  cleaned_summary2 = remove_parentheses(Summarized_method2)
 
64
  #Paraphrase
65
  p_inputs = tokenizer_t5.encode(cleaned_summary2, return_tensors="pt", max_length=512, truncation=True)
66
  p_summary_ids = model_t5.generate(p_inputs, max_length=150, min_length=50, length_penalty=2.0, num_beams=4, early_stopping=True)
67
  method2 = tokenizer_t5.decode(p_summary_ids[0], skip_special_tokens=True)
 
68
  return method2
69
 
70
  elif method == "method1":
71
  summarization_pipeline = pipeline('summarization', model=model_pegasus, tokenizer=tokenizer_pegasus, device=0 if device == "cuda" else -1)
72
  method1 = summarization_pipeline(text, max_length=100, min_length=30, truncation=True)[0]['summary_text']
 
73
  cleaned_summary1 = remove_parentheses(method1)
 
74
  return cleaned_summary1
75
 
76
 
 
87
  if request.method == 'POST':
88
  try:
89
  input_text = request.form['input_text']
 
90
  method = request.form['method']
91
 
92
  simplified_text = simplify_text(input_text)
 
93
  summarized_text = summarize_text(simplified_text, method)
 
94
 
95
  return jsonify({
96
+ "summarized_text": summarized_text, })
 
97
  except Exception as e:
98
  logging.error(f"Error occurred: {e}", exc_info=True)
99
  return jsonify({"error": str(e)}), 500
 
103
  def translate():
104
  try:
105
  data = request.get_json()
 
106
  text = data['text']
107
  translated_text = translate_to_hindi(text)
108
 
109
  return jsonify({
110
+ "translated_text": translated_text})
 
111
  except Exception as e:
112
  logging.error(f"Error occurred during translation: {e}", exc_info=True)
113
  return jsonify({"error": str(e)}), 500
114
 
115
 
116
  if __name__ == '__main__':
117
+ app.run(port=5003)