policyimport / backup_app.py
kivilaid's picture
Rename app.py to backup_app.py
97c4faa verified
import streamlit as st
import zipfile
import json
from io import BytesIO
import chardet
import pandas as pd
import openpyxl
# Set page configuration to wide mode by default
st.set_page_config(layout="wide")
# Function to extract and combine JSON files from a ZIP file
def extract_and_combine_zip(zip_file):
combined_data = {}
with zipfile.ZipFile(zip_file) as z:
# Extract all JSON files, ignoring macOS-specific hidden files
json_files = [name for name in z.namelist() if name.endswith('.json') and not name.startswith('__MACOSX')]
for json_file in json_files:
with z.open(json_file) as f:
content = f.read()
encoding = chardet.detect(content)['encoding']
try:
decoded_content = content.decode(encoding)
data = json.loads(decoded_content)
combined_data = flatten_json(data, combined_data)
except (UnicodeDecodeError, json.JSONDecodeError) as e:
st.warning(f"Warning: Could not decode {json_file}. Error: {str(e)}")
return combined_data
# Improved function to flatten and merge JSON data
def flatten_json(data, flattened=None, prefix=''):
if flattened is None:
flattened = {}
if isinstance(data, dict):
for key, value in data.items():
new_key = f"{prefix}.{key}" if prefix else key
if isinstance(value, (dict, list)):
flatten_json(value, flattened, new_key)
elif value is not None and value != "":
flattened[new_key] = value
elif isinstance(data, list):
for i, item in enumerate(data):
new_key = f"{prefix}[{i}]" if prefix else str(i)
flatten_json(item, flattened, new_key)
elif data is not None and data != "":
flattened[prefix] = data
return flattened
# Function to convert DataFrame to Excel
def to_excel(df):
output = BytesIO()
with pd.ExcelWriter(output, engine='openpyxl') as writer:
df.to_excel(writer, index=False, sheet_name='Sheet1')
processed_data = output.getvalue()
return processed_data
# Streamlit app setup
st.title("ZIP JSON Extractor & Flattener")
# File uploader widget
uploaded_zip = st.file_uploader("Upload ZIP file containing JSON files:", type="zip")
if uploaded_zip:
# Combine and flatten JSON data
flattened_json = extract_and_combine_zip(uploaded_zip)
# Create a DataFrame from the flattened JSON data
df = pd.DataFrame([flattened_json])
# Convert all object columns to string to avoid Arrow conversion issues
for col in df.select_dtypes(include=['object']).columns:
df[col] = df[col].astype(str)
# Create a downloadable JSON
flattened_json_str = json.dumps(flattened_json, indent=4)
json_bytes = flattened_json_str.encode()
# Create columns for download buttons
col1, col2 = st.columns(2)
# Button to download the flattened JSON data
with col1:
st.download_button(
label="Download Flattened JSON",
data=BytesIO(json_bytes),
file_name='flattened_json.json',
mime='application/json'
)
# Button to download the Excel file
with col2:
excel_data = to_excel(df)
st.download_button(
label="Download Excel File",
data=excel_data,
file_name='flattened_data.xlsx',
mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
)
# Add a success message
st.success("JSON data has been successfully processed and flattened into a single object. You can now download the flattened JSON file or the Excel file.")