Generate synthetic data for Accounts Receivable
For those who would like to generate dummy/synthetic data for Accounts Receivable, use Python script below.
It can be run in Google Collab notebook if you don’t have local Python environment setup:
Edit variables - date period, # of customers and transactions, reporting date (default is today).
Run the code
Navigate to Files and download the 3 CSV files generated.
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random
# Set random seed for reproducibility
np.random.seed(42)
# Configuration
start_date = datetime(2023, 1, 1)
end_date = datetime(2025, 3, 31)
num_customers = 75
num_transactions = 10000
# Create customer segments
customer_segments = ['Enterprise', 'Mid-Market', 'Small Business', 'Government']
customer_credit_terms = [30, 45, 60, 90, 180]
# Create customers
customers = []
for i in range(1, num_customers + 1):
segment = random.choice(customer_segments)
credit_terms = random.choice(customer_credit_terms)
customers.append({
'CustomerID': f'CUST{i:04d}',
'CustomerName': f'Customer {i}',
'Segment': segment,
'CreditTerms': credit_terms
})
customers_df = pd.DataFrame(customers)
# Create invoices with seasonal patterns
invoices = []
transaction_id = 1
for _ in range(num_transactions):
# Select a random customer
customer = random.choice(customers)
# Create random invoice date
days_range = (end_date - start_date).days
random_days = random.randint(0, days_range)
invoice_date = start_date + timedelta(days=random_days)
# Add seasonality to invoice amounts
month = invoice_date.month
# Higher amounts in Q4 (Oct-Dec)
if month in [10, 11, 12]:
base_amount = np.random.normal(10000, 3000)
# Lower amounts in Q1 (Jan-Mar)
elif month in [1, 2, 3]:
base_amount = np.random.normal(5000, 1500)
# Medium amounts rest of year
else:
base_amount = np.random.normal(7500, 2000)
# Adjust amount based on customer segment
if customer['Segment'] == 'Enterprise':
amount = base_amount * 2.5
elif customer['Segment'] == 'Mid-Market':
amount = base_amount * 1.5
elif customer['Segment'] == 'Government':
amount = base_amount * 1.8
else: # Small Business
amount = base_amount * 0.7
# Round to 2 decimal places
invoice_amount = round(max(amount, 100), 2) # Minimum $100
# Calculate due date based on credit terms
due_date = invoice_date + timedelta(days=customer['CreditTerms'])
# Determine if paid and payment date with seasonal collection patterns
is_paid = random.random() < 0.85 # 85% of invoices eventually get paid
if is_paid:
# Create seasonal payment behavior
# December has faster payments (bonus incentives)
if due_date.month == 12:
days_to_pay = max(int(np.random.normal(-5, 10)), -15) # More likely to pay early
# January has slower payments
elif due_date.month == 1:
days_to_pay = max(int(np.random.normal(15, 12)), -5) # More likely to pay late
# Quarter-end months have slightly faster payments
elif due_date.month in [3, 6, 9]:
days_to_pay = int(np.random.normal(0, 15))
# Normal months
else:
days_to_pay = int(np.random.normal(5, 20))
# Adjust by customer segment
if customer['Segment'] == 'Enterprise':
days_to_pay += int(np.random.normal(0, 5))
elif customer['Segment'] == 'Small Business':
days_to_pay += int(np.random.normal(10, 8))
elif customer['Segment'] == 'Government':
days_to_pay += int(np.random.normal(15, 10))
payment_date = due_date + timedelta(days=days_to_pay)
# Ensure payment date is not before invoice date
if payment_date < invoice_date:
payment_date = invoice_date + timedelta(days=random.randint(0, 5))
# Ensure payment date is not in the future
if payment_date > datetime.now():
days_to_pay = None
payment_date = None
is_paid = False
else:
days_to_pay = None
payment_date = None
invoices.append({
'TransactionID': f'INV{transaction_id:06d}',
'CustomerID': customer['CustomerID'],
'InvoiceDate': invoice_date,
'DueDate': due_date,
'InvoiceAmount': invoice_amount,
'IsPaid': is_paid,
'PaymentDate': payment_date,
'DaysToPayment': days_to_pay
})
transaction_id += 1
# Create DataFrame
invoices_df = pd.DataFrame(invoices)
# Calculate aging buckets using current date for reporting
reporting_date = datetime.now() # Use current date for aging snapshot
def calculate_aging(row):
if row['IsPaid']:
if row['PaymentDate'] <= reporting_date:
return 'Paid'
days_outstanding = (reporting_date - row['DueDate']).days
if days_outstanding <= 0:
return 'Current'
elif days_outstanding <= 30:
return '1-30 Days'
elif days_outstanding <= 60:
return '31-60 Days'
elif days_outstanding <= 90:
return '61-90 Days'
else:
return '90+ Days'
invoices_df['AgingBucket'] = invoices_df.apply(calculate_aging, axis=1)
# Create monthly sales summary for forecasting
invoices_df['YearMonth'] = invoices_df['InvoiceDate'].dt.strftime('%Y-%m')
monthly_sales = invoices_df.groupby('YearMonth')['InvoiceAmount'].sum().reset_index()
monthly_sales = monthly_sales.sort_values('YearMonth')
# Save to CSV
invoices_df.to_csv('synthetic_ar_transactions.csv', index=False)
customers_df.to_csv('synthetic_customers.csv', index=False)
monthly_sales.to_csv('synthetic_monthly_sales.csv', index=False)
print("Generated synthetic AR dataset with:")
print(f"- {len(customers_df)} customers")
print(f"- {len(invoices_df)} invoice transactions")
print(f"- Data spanning from {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")
print("Files saved: synthetic_ar_transactions.csv, synthetic_customers.csv, synthetic_monthly_sales.csv")