How to Merge Multiple JSON Files in Python
Combining data from multiple JSON files is a common task in data processing and analysis.
This guide presents various Python techniques for merging JSON files, including:
- Iterating through files and merging their contents.
- Handling different JSON structures (lists of dictionaries, dictionaries, or line-delimited JSON).
- Using the
pandas
library for more complex merging scenarios.
Merging JSON Files Containing Lists
If your JSON files each contain a list of objects, the most straightforward approach is to read each file, extend a master list, and then write the combined list to a new file.
Example Files:
[
{"id": 1, "name": "Alice", "salary": "100"},
{"id": 2, "name": "Bob", "salary": "200"}
]
[
{"id": 3, "name": "Carl", "salary": "300"},
{"id": 4, "name": "Dan", "salary": "400"}
]
Python Code:
import json
import glob
import os
def merge_json_lists(file_paths, output_file):
merged_contents = []
for file_path in file_paths:
with open(file_path, 'r', encoding='utf-8') as file_in:
data = json.load(file_in)
if isinstance(data, list): # Important: Check if it's a list
merged_contents.extend(data)
else:
print(f"Warning: {file_path} does not contain a JSON array. Skipping.")
with open(output_file, 'w', encoding='utf-8') as file_out:
json.dump(merged_contents, file_out, indent=4)
# Get all .json file paths from the "json-files" directory
paths = glob.glob('json-files/*.json')
# Merge them and write to the output file.
merge_json_lists(paths, 'employees_final.json')
# The code below shows what the employee json files look like
def create_json_files():
"""Creates example JSON files for demonstration."""
os.makedirs("json-files", exist_ok=True) # Create directory if not exists
data1 = [
{"id": 1, "name": "Alice", "salary": "100"},
{"id": 2, "name": "Bob", "salary": "200"}
]
data2 = [
{"id": 3, "name": "Carl", "salary": "300"},
{"id": 4, "name": "Dan", "salary": "400"}
]
with open("json-files/employees_1.json", "w") as f:
json.dump(data1, f, indent=4)
with open("json-files/employees_2.json", "w") as f:
json.dump(data2, f, indent=4)
#Creates the example json files
create_json_files()
glob.glob('json-files/*.json')
: This uses theglob
module to find all.json
files within thejson-files
directory. This is much more robust than hardcoding filenames. It also handles the case where there are many files.isinstance(data, list)
: This crucial check ensures that we're only extending themerged_contents
list with other lists. If a file contains a dictionary (or some other structure), we print a warning and skip it. This prevents errors.indent=4
: This makes the output JSON file nicely formatted with an indent of 4 spaces.
Merging JSON Files Containing Dictionaries
If your JSON files each contain a dictionary, and you want to combine them into a single dictionary (e.g., merging configuration files), you'll need to decide how to handle key collisions. Here's an example that merges dictionaries, giving precedence to later files in case of duplicate keys:
import json
import glob
import os
def merge_json_dicts(file_paths, output_file):
merged_dict = {}
for file_path in file_paths:
with open(file_path, 'r', encoding='utf-8') as file_in:
try: # Use try/except
data = json.load(file_in)
if isinstance(data, dict): # Ensure it's a dictionary
merged_dict.update(data) # Update, overwriting existing keys
else:
print(f"Warning: {file_path} does not contain a JSON object. Skipping.")
except json.JSONDecodeError:
print(f"Error decoding json file: {file_path}")
continue # Skip to the next file
with open(output_file, 'w', encoding='utf-8') as file_out:
json.dump(merged_dict, file_out, indent=4)
# Get all .json file paths.
paths = glob.glob('json-files/*.json')
# Merge and write to the new output file.
merge_json_dicts(paths, 'employees_final.json')
# Create example json files (dictionaries, not lists)
def create_json_files():
"""Creates example JSON files for demonstration."""
os.makedirs("json-files", exist_ok=True)
data1 = {"id": 1, "name": "Alice", "salary": "100"}
data2 = {"id": 3, "name": "Carl", "salary": "300"} # Same key
with open("json-files/employees_1.json", "w") as f:
json.dump(data1, f, indent=4)
with open("json-files/employees_2.json", "w") as f:
json.dump(data2, f, indent=4)
create_json_files() # Create example files
- The dictionaries are merged using the
update()
method, which will override previous values if keys are the same.
Merging Line-Delimited JSON Files
Sometimes, JSON data is stored with one JSON object per line (often called "JSON Lines" or "ndjson"). Here's how to handle that:
import json
import glob
import os
def merge_json_lines(file_paths, output_file):
merged_contents = []
for file_path in file_paths:
with open(file_path, 'r', encoding='utf-8') as file_in:
for line in file_in:
try:
data = json.loads(line) # Load *each line* as JSON
merged_contents.append(data)
except json.JSONDecodeError:
print(f"Skipping invalid JSON line in {file_path}")
continue
with open(output_file, 'w', encoding='utf-8') as file_out:
json.dump(merged_contents, file_out, indent=4)
paths = glob.glob('json-files/*.json')
merge_json_lines(paths, 'employees_final.json')
def create_json_files():
# Create directory if it doesn't exist
os.makedirs("json-files", exist_ok=True)
data1 = {"id": 1, "name": "Alice", "salary": "100"}
data2 = {"id": 2, "name": "Bob", "salary": "200"}
with open("json-files/employees_1.json", "w") as f:
f.write(json.dumps(data1) + "\n") # Write each object to a new line.
f.write(json.dumps(data2) + "\n")
data3 = {"id": 3, "name": "Carl", "salary": "300"}
data4 = {"id": 4, "name": "Dan", "salary": "400"}
# Create json-files/employees_2.json
with open("json-files/employees_2.json", "w") as f:
f.write(json.dumps(data3) + '\n') # Write each object to a new line.
f.write(json.dumps(data4) + '\n')
create_json_files()
- The loop
for line in file_in:
iterates line by line. json.loads(line)
parses each individual line as a JSON object.- The result is a merged list of dictionaries.
Merging JSON Files with Pandas (Advanced)
Pandas can be used for more complex merging, especially if you need to handle different structures or perform data transformations. However, for simple concatenation, the above methods are usually sufficient. Pandas is overkill for the simple cases. This example shows one way to do it, if you already know your JSON files are lists of dictionaries with the same keys:
import json
import pandas as pd
import glob
import os
def merge_json_pandas(file_paths, output_file):
all_data = []
for file_path in file_paths:
with open(file_path, 'r', encoding='utf-8') as f:
try:
data = json.load(f)
if isinstance(data, list):
all_data.extend(data) # Extend with the content of data
else:
print(f"Warning: {file_path} is not a json array.")
except:
print("Error reading file ", file_path)
df = pd.DataFrame(all_data) # Convert the list into DataFrame
df.to_json(output_file, indent=4) # Write to json
paths = glob.glob('json-files/*.json')
merge_json_pandas(paths, 'employees_final.json')
# Create example json files (list of dictionaries)
def create_json_files():
# Create directory if it doesn't exist
os.makedirs("json-files", exist_ok=True)
data1 = [
{"id": 1, "name": "Alice", "salary": "100"},
{"id": 2, "name": "Bob", "salary": "200"}
]
data2 = [
{"id": 3, "name": "Carl", "salary": "300"},
{"id": 4, "name": "Dan", "salary": "400"}
]
with open("json-files/employees_1.json", "w") as f:
json.dump(data1, f, indent=4)
with open("json-files/employees_2.json", "w") as f:
json.dump(data2, f, indent=4)
create_json_files()
- The code iterates over json files and loads data from them using
json.load()
method. - The
isinstance
check ensures we are appending lists, to avoid issues. - Then the final list of data is converted to a
pandas.DataFrame
and saved toemployees_final.json
Conclusion
This article presented multiple ways to merge JSON files in Python.
- For simple concatenation of JSON arrays, iterating and extending a list is the most efficient method.
- For line-delimited JSON, iterate and parse each line.
- For more complex merging scenarios, consider using Pandas.
- Always validate the structure of your JSON files to avoid unexpected errors.
- The
glob
module is highly recommended for handling multiple files.