Spaces:
Sleeping
Sleeping
import streamlit as st | |
import zipfile | |
import json | |
from io import BytesIO | |
import chardet | |
import pandas as pd | |
import openpyxl | |
# Set page configuration to wide mode by default | |
st.set_page_config(layout="wide") | |
# Function to extract and combine JSON files from a ZIP file | |
def extract_and_combine_zip(zip_file): | |
combined_data = {} | |
with zipfile.ZipFile(zip_file) as z: | |
# Extract all JSON files, ignoring macOS-specific hidden files | |
json_files = [name for name in z.namelist() if name.endswith('.json') and not name.startswith('__MACOSX')] | |
for json_file in json_files: | |
with z.open(json_file) as f: | |
content = f.read() | |
encoding = chardet.detect(content)['encoding'] | |
try: | |
decoded_content = content.decode(encoding) | |
data = json.loads(decoded_content) | |
combined_data = flatten_json(data, combined_data) | |
except (UnicodeDecodeError, json.JSONDecodeError) as e: | |
st.warning(f"Warning: Could not decode {json_file}. Error: {str(e)}") | |
return combined_data | |
# Improved function to flatten and merge JSON data | |
def flatten_json(data, flattened=None, prefix=''): | |
if flattened is None: | |
flattened = {} | |
if isinstance(data, dict): | |
for key, value in data.items(): | |
new_key = f"{prefix}.{key}" if prefix else key | |
if isinstance(value, (dict, list)): | |
flatten_json(value, flattened, new_key) | |
elif value is not None and value != "": | |
flattened[new_key] = value | |
elif isinstance(data, list): | |
for i, item in enumerate(data): | |
new_key = f"{prefix}[{i}]" if prefix else str(i) | |
flatten_json(item, flattened, new_key) | |
elif data is not None and data != "": | |
flattened[prefix] = data | |
return flattened | |
# Function to convert DataFrame to Excel | |
def to_excel(df): | |
output = BytesIO() | |
with pd.ExcelWriter(output, engine='openpyxl') as writer: | |
df.to_excel(writer, index=False, sheet_name='Sheet1') | |
processed_data = output.getvalue() | |
return processed_data | |
# Streamlit app setup | |
st.title("ZIP JSON Extractor & Flattener") | |
# File uploader widget | |
uploaded_zip = st.file_uploader("Upload ZIP file containing JSON files:", type="zip") | |
if uploaded_zip: | |
# Combine and flatten JSON data | |
flattened_json = extract_and_combine_zip(uploaded_zip) | |
# Create a DataFrame from the flattened JSON data | |
df = pd.DataFrame([flattened_json]) | |
# Convert all object columns to string to avoid Arrow conversion issues | |
for col in df.select_dtypes(include=['object']).columns: | |
df[col] = df[col].astype(str) | |
# Create a downloadable JSON | |
flattened_json_str = json.dumps(flattened_json, indent=4) | |
json_bytes = flattened_json_str.encode() | |
# Create columns for download buttons | |
col1, col2 = st.columns(2) | |
# Button to download the flattened JSON data | |
with col1: | |
st.download_button( | |
label="Download Flattened JSON", | |
data=BytesIO(json_bytes), | |
file_name='flattened_json.json', | |
mime='application/json' | |
) | |
# Button to download the Excel file | |
with col2: | |
excel_data = to_excel(df) | |
st.download_button( | |
label="Download Excel File", | |
data=excel_data, | |
file_name='flattened_data.xlsx', | |
mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' | |
) | |
# Add a success message | |
st.success("JSON data has been successfully processed and flattened into a single object. You can now download the flattened JSON file or the Excel file.") |