Examples

This page provides complete working examples for common use cases.

Basic File Operations

from genro_storage import StorageManager

storage = StorageManager()
storage.configure([
    {'name': 'data', 'type': 'local', 'path': './data'}
])

# Write a file
node = storage.node('data:report.txt')
node.write("Q4 Report: Sales increased by 15%")

# Read the file
content = node.read()
print(content)

# Check file properties
print(f"Size: {node.size} bytes")
print(f"Exists: {node.exists}")

Multi-Cloud Setup

storage = StorageManager()
storage.configure([
    {'name': 'local', 'type': 'local', 'path': '/tmp'},
    {'name': 's3', 'type': 's3', 'bucket': 'my-bucket'},
    {'name': 'gcs', 'type': 'gcs', 'bucket': 'my-backups'}
])

# Process locally
local = storage.node('local:processing/data.json')
local.write('{"result": "processed"}')

# Upload to S3
local.copy_to('s3:results/data.json')

# Backup to GCS
local.copy_to('gcs:backups/data.json')

Directory Operations

# Create nested directories
storage.node('data:reports/2024/Q4').mkdir(parents=True)

# List directory contents
reports = storage.node('data:reports')
for child in reports.children():
    if child.isfile:
        print(f"File: {child.basename} ({child.size} bytes)")
    else:
        print(f"Dir: {child.basename}")

Configuration from File

Create config.yaml:

- name: home
  type: local
  path: /home/user

- name: uploads
  type: s3
  bucket: prod-uploads
  region: eu-west-1

Load and use:

storage = StorageManager()
storage.configure('config.yaml')

node = storage.node('uploads:users/123/avatar.jpg')

Working with External Tools

Use local_path() to integrate with external tools that require local filesystem access:

# Process video with ffmpeg
video = storage.node('s3:videos/input.mp4')
output = storage.node('s3:videos/output.mp4')

with video.local_path(mode='r') as input_path:
    with output.local_path(mode='w') as output_path:
        import subprocess
        subprocess.run([
            'ffmpeg', '-i', input_path,
            '-vcodec', 'h264', '-crf', '28',
            output_path
        ])
# Changes automatically uploaded to S3

# Modify image in place
image = storage.node('uploads:photo.jpg')
with image.local_path(mode='rw') as path:
    subprocess.run(['convert', path, '-resize', '800x600', path])

Dynamic Paths for Multi-User Apps

Use callable paths that resolve at runtime:

def get_user_directory():
    from flask import g  # or your framework's context
    return f'/data/users/{g.user_id}'

storage.configure([
    {'name': 'user', 'type': 'local', 'path': get_user_directory}
])

# Different user, different directory!
# User 123: /data/users/123/
# User 456: /data/users/456/
user_prefs = storage.node('user:preferences.json')

Cloud Metadata Management

Set and retrieve custom metadata on cloud files:

# Set metadata
doc = storage.node('s3:documents/report.pdf')
doc.set_metadata({
    'Author': 'John Doe',
    'Department': 'Engineering',
    'Version': '1.0',
    'Classification': 'Internal'
})

# Get metadata
metadata = doc.get_metadata()
print(f"Author: {metadata.get('Author')}")
print(f"Version: {metadata.get('Version')}")

URL Generation

Generate shareable URLs for files:

# Generate S3 presigned URL (expires in 1 hour)
file = storage.node('s3:documents/report.pdf')
url = file.url(expires_in=3600)
print(f"Share this: {url}")

# Custom expiration (24 hours)
long_url = file.url(expires_in=86400)

# Convert file to data URI
logo = storage.node('local:assets/logo.png')
data_uri = logo.to_base64()
# Use in HTML: <img src="data:image/png;base64,...">

Download from URLs

Download files from the internet directly to storage:

# Download to local storage
local_file = storage.node('data:downloads/dataset.csv')
local_file.fill_from_url('https://example.com/data.csv')

# Download to S3
s3_file = storage.node('s3:archives/backup.zip')
s3_file.fill_from_url('https://backups.example.com/latest.zip', timeout=300)

Intelligent Copy and Sync

Copy files with filtering, skip strategies, and progress tracking:

Basic Filtering

# Copy only specific file types
src = storage.node('local:project/')
dest = storage.node('s3:backup/')

# Only Python files
src.copy_to(dest, include='*.py')

# Multiple file types
src.copy_to(dest, include=['*.py', '*.json', '*.md'])

# Exclude patterns
src.copy_to(dest, exclude=['*.log', '*.tmp', '__pycache__/**'])

# Combine include and exclude
src.copy_to(dest,
         include='*.py',
         exclude='test_*.py')  # Python files, but no tests

Custom Filtering

Filter by file size, modification time, or custom logic:

# Only files smaller than 10MB
src.copy_to(dest, filter=lambda node, path: node.size < 10_000_000)

# Only recently modified files
from datetime import datetime, timedelta
cutoff = datetime.now() - timedelta(days=7)
src.copy_to(dest, filter=lambda n, p: n.mtime > cutoff.timestamp())

# Custom logic based on path
src.copy_to(dest, filter=lambda n, p: 'node_modules' not in p)

Skip Strategies for Incremental Sync

Avoid re-copying unchanged files:

# Skip if file exists (fastest)
src.copy_to(dest, skip='exists')

# Skip if same size (fast)
src.copy_to(dest, skip='size')

# Skip if same content/hash (accurate, uses MD5/ETag)
src.copy_to(dest, skip='hash')

Combine Filtering and Skip Logic

# Intelligent backup: filter what to copy, skip what's unchanged
src.copy_to(dest,
         include=['*.py', '*.js', '*.json'],  # Only code/config
         exclude=['*.log', '__pycache__/**'],  # No logs/cache
         filter=lambda n, p: n.size < 100_000_000,  # < 100MB
         skip='hash',  # Skip if content unchanged
         progress=lambda c, t: print(f"Progress: {c}/{t}"))

Real-World Examples

Source code backup:

# Backup source code, exclude generated files
project = storage.node('local:~/my-project/')
backup = storage.node('s3:backups/my-project/')

project.copy_to(backup,
             include=['*.py', '*.js', '*.json', '*.md', '*.yaml'],
             exclude=[
                 '*.pyc',
                 '__pycache__/**',
                 'node_modules/**',
                 '.git/**',
                 '*.log'
             ],
             skip='hash')  # Only changed files
print("Backup completed!")

Sync only recent changes:

# Sync files modified in last 30 days
from datetime import datetime, timedelta

src = storage.node('local:documents/')
dest = storage.node('s3:archives/')

thirty_days_ago = datetime.now() - timedelta(days=30)

src.copy_to(dest,
         filter=lambda n, p: n.mtime > thirty_days_ago.timestamp(),
         skip='hash')

Media files (no large videos):

# Copy images only, skip large files
media = storage.node('uploads:media/')
cdn = storage.node('s3:cdn/media/')

media.copy_to(cdn,
           include=['*.jpg', '*.png', '*.gif', '*.webp'],
           filter=lambda n, p: n.size < 5_000_000,  # < 5MB
           skip='exists')  # Don't re-upload

With Progress Tracking

Monitor copy operations with callbacks:

copied_files = []
skipped_files = []

def on_progress(current, total):
    percent = (current / total) * 100
    print(f"Progress: {current}/{total} ({percent:.1f}%)")

def on_file(node):
    copied_files.append(node.path)
    print(f"✓ Copied: {node.basename}")

def on_skip(node, reason):
    skipped_files.append((node.path, reason))
    print(f"⊘ Skipped: {node.basename} ({reason})")

src.copy_to(dest,
         exclude='*.log',
         skip='hash',
         progress=on_progress,
         on_file=on_file,
         on_skip=on_skip)

print(f"\nSummary:")
print(f"  Copied: {len(copied_files)} files")
print(f"  Skipped: {len(skipped_files)} files")

S3 Versioning

Access historical versions when S3 versioning is enabled:

# Get list of versions
doc = storage.node('s3:documents/contract.pdf')
versions = doc.versions

for v in versions:
    print(f"Version {v['version_id']}")
    print(f"  Modified: {v['last_modified']}")
    print(f"  Size: {v['size']} bytes")
    print(f"  Latest: {v['is_latest']}")

# Open specific version
if versions:
    old_version_id = versions[1]['version_id']
    with doc.open_version(old_version_id) as f:
        old_content = f.read()
        print("Previous version:", old_content)