import streamlit as st import zipfile import json from io import BytesIO import chardet import pandas as pd import openpyxl # Set page configuration to wide mode by default st.set_page_config(layout="wide") # Function to extract and combine JSON files from a ZIP file def extract_and_combine_zip(zip_file): combined_data = {} with zipfile.ZipFile(zip_file) as z: # Extract all JSON files, ignoring macOS-specific hidden files json_files = [name for name in z.namelist() if name.endswith('.json') and not name.startswith('__MACOSX')] for json_file in json_files: with z.open(json_file) as f: content = f.read() encoding = chardet.detect(content)['encoding'] try: decoded_content = content.decode(encoding) data = json.loads(decoded_content) combined_data = flatten_json(data, combined_data) except (UnicodeDecodeError, json.JSONDecodeError) as e: st.warning(f"Warning: Could not decode {json_file} in {zip_file.name}. Error: {str(e)}") return combined_data # Improved function to flatten and merge JSON data def flatten_json(data, flattened=None, prefix=''): if flattened is None: flattened = {} if isinstance(data, dict): for key, value in data.items(): new_key = f"{prefix}.{key}" if prefix else key if isinstance(value, (dict, list)): flatten_json(value, flattened, new_key) elif value is not None and value != "": flattened[new_key] = value elif isinstance(data, list): for i, item in enumerate(data): new_key = f"{prefix}[{i}]" if prefix else str(i) flatten_json(item, flattened, new_key) elif data is not None and data != "": flattened[prefix] = value return flattened # Function to convert DataFrame to Excel def to_excel(df): output = BytesIO() with pd.ExcelWriter(output, engine='openpyxl') as writer: df.to_excel(writer, index=False, sheet_name='Sheet1') processed_data = output.getvalue() return processed_data # Streamlit app setup st.title("Multi-ZIP JSON Extractor & Flattener") # File uploader widget for multiple files uploaded_zips = st.file_uploader("Upload ZIP files containing JSON files:", type="zip", accept_multiple_files=True) if uploaded_zips: # Process all uploaded ZIP files all_flattened_json = [] for zip_file in uploaded_zips: flattened_json = extract_and_combine_zip(zip_file) flattened_json['zip_file_name'] = zip_file.name # Add ZIP file name to the flattened JSON all_flattened_json.append(flattened_json) # Create a DataFrame from the flattened JSON data df = pd.DataFrame(all_flattened_json) # Convert all object columns to string to avoid Arrow conversion issues for col in df.select_dtypes(include=['object']).columns: df[col] = df[col].astype(str) # Create a downloadable JSON flattened_json_str = json.dumps(all_flattened_json, indent=4) json_bytes = flattened_json_str.encode() # Display processing summary st.write(f"Processed {len(uploaded_zips)} ZIP file(s)") st.write(f"Total rows in the output: {len(all_flattened_json)}") # Create columns for download buttons col1, col2 = st.columns(2) # Button to download the flattened JSON data with col1: st.download_button( label="Download Flattened JSON", data=BytesIO(json_bytes), file_name='flattened_json.json', mime='application/json' ) # Button to download the Excel file with col2: excel_data = to_excel(df) st.download_button( label="Download Excel File", data=excel_data, file_name='flattened_data.xlsx', mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' ) # Add a success message st.success(f"All ZIP files have been successfully processed. Each ZIP file is represented as a separate row in the output. You can now download the flattened JSON file or the Excel file.") # Display a preview of the DataFrame st.write("Preview of the processed data:") st.dataframe(df.head())