tk5-c90-projects/scripts/poll_job.py

219 lines
7.5 KiB
Python
Raw Normal View History

2026-02-06 12:31:49 +00:00
#!/usr/bin/env python3
import sys
import os
import re
import time
import subprocess
import requests
from requests.auth import HTTPBasicAuth
2026-02-06 13:13:38 +00:00
CONSOLE_URL = os.environ.get("MVS_CONSOLE_URL", "http://oldcomputernerd.com:8038/cgi-bin/tasks/syslog")
CONSOLE_USER = os.environ.get("MVS_CONSOLE_USER", "gmgauthier")
2026-02-06 12:31:49 +00:00
CONSOLE_PASS = os.environ.get("MVS_CONSOLE_PASSWORD")
2026-02-06 13:13:38 +00:00
LINODE_HOST = os.environ.get("LINODE_SSH_HOST", "gmgauthier@socrates")
LINODE_PRINTOUT_DIR = os.environ.get("LINODE_PRINTOUT_DIR", "/home/gmgauthier/printouts")
2026-02-06 12:31:49 +00:00
def get_syslog():
"""Fetch the Hercules syslog via HTTP"""
try:
response = requests.get(
CONSOLE_URL,
auth=HTTPBasicAuth(CONSOLE_USER, CONSOLE_PASS),
timeout=10
)
response.raise_for_status()
return response.text
except requests.RequestException as e:
print(f"Failed to fetch syslog: {e}")
return None
def find_job_number(syslog, jobname):
"""Extract job number from $HASP100 message"""
# Pattern: /12.28.02 JOB 257 $HASP100 SIMPLE2 ON READER1
pattern = rf'/\d+\.\d+\.\d+\s+JOB\s+(\d+)\s+\$HASP100\s+{jobname}\s+ON\s+READER'
match = re.search(pattern, syslog, re.IGNORECASE)
if match:
return match.group(1)
return None
2026-02-06 17:33:37 +00:00
def check_job_ended(syslog, jobname, job_number):
"""Check if a job has ended (HASP395 ENDED)"""
# Pattern for job ended: /18.24.41 JOB 276 $HASP395 GMG0001 ENDED
ended_pattern = rf'/\d+\.\d+\.\d+\s+JOB\s+{job_number}\s+\$HASP395\s+{jobname}\s+ENDED'
return re.search(ended_pattern, syslog, re.IGNORECASE) is not None
def check_job_printed(syslog, jobname, job_number):
"""Check if a job has printed output (HASP150)"""
# Pattern for job printed: /12.28.03 JOB 257 $HASP150 SIMPLE2 ON PRINTER1
printed_pattern = rf'/\d+\.\d+\.\d+\s+JOB\s+{job_number}\s+\$HASP150\s+{jobname}\s+ON\s+PRINTER'
return re.search(printed_pattern, syslog, re.IGNORECASE) is not None
2026-02-06 12:31:49 +00:00
def list_pdfs_local(local_dir):
"""List PDF files in a local directory (for mounted volumes)"""
import glob
pdf_files = glob.glob(f"{local_dir}/v1403-*.pdf")
# Sort by modification time, newest first
pdf_files.sort(key=os.path.getmtime, reverse=True)
return pdf_files
def list_pdfs_remote():
"""List PDF files on remote Linode via SSH"""
cmd = f"ssh {LINODE_HOST} ls -t {LINODE_PRINTOUT_DIR}/v1403-*.pdf"
try:
result = subprocess.run(
cmd,
shell=True,
check=True,
capture_output=True,
text=True
)
return result.stdout.strip().split('\n')
except subprocess.CalledProcessError:
return []
def find_pdf_for_job(job_number, jname, local_printout_dir=None):
"""Find the PDF matching job number and name"""
pattern = f"v1403-J{job_number}_{jname}-"
# Try the local directory first (for mounted volumes in CI)
if local_printout_dir and os.path.isdir(str(local_printout_dir)):
pdfs = list_pdfs_local(local_printout_dir)
for pdf_path in pdfs:
if pattern in pdf_path:
return pdf_path
return None
# Fall back to remote SSH access
pdfs = list_pdfs_remote()
for pdf_path in pdfs:
if pattern in pdf_path:
return pdf_path
return None
def retrieve_pdf(source_path, local_filename, is_local=False):
"""Retrieve PDF either locally (copy) or remotely (SCP)"""
try:
if is_local:
2026-02-06 13:13:38 +00:00
# Local copy from a mounted volume
2026-02-06 12:31:49 +00:00
import shutil
shutil.copy2(source_path, local_filename)
print(f"Copied: {local_filename}")
else:
# Remote SCP
cmd = f"scp {LINODE_HOST}:{source_path} {local_filename}"
subprocess.run(cmd, shell=True, check=True)
print(f"Retrieved: {local_filename}")
return True
except (subprocess.CalledProcessError, IOError) as e:
print(f"Failed to retrieve PDF: {e}")
return False
def poll_for_job(jn, to=300, poll_interval=5):
"""Poll the console for job completion and retrieve PDF"""
jobname_upper = jn.upper()
start_time = time.time()
job_number = None
print(f"Polling for job: {jobname_upper}")
print(f"Timeout: {to}s, Poll interval: {poll_interval}s")
2026-02-06 13:13:38 +00:00
print(f"Console URL: {CONSOLE_URL}")
print(f"Console User: {CONSOLE_USER}")
print(f"Console Pass: {'***' if CONSOLE_PASS else 'NOT SET'}")
2026-02-06 12:31:49 +00:00
print()
# Phase 1: Find a job number
print("Phase 1: Looking for job submission ($HASP100)...")
while time.time() - start_time < to:
syslog = get_syslog()
if not syslog:
time.sleep(poll_interval)
continue
job_number = find_job_number(syslog, jobname_upper)
if job_number:
print(f"Found job number: J{job_number}")
break
time.sleep(poll_interval)
if not job_number:
print(f"Timeout: Job {jobname_upper} not found in console after {to}s")
return 1
# Phase 2: Wait for completion
2026-02-06 17:33:37 +00:00
print(f"Phase 2: Waiting for job completion ($HASP395 ENDED)...")
job_ended = False
job_printed = False
2026-02-06 12:31:49 +00:00
while time.time() - start_time < to:
syslog = get_syslog()
if not syslog:
time.sleep(poll_interval)
continue
2026-02-06 17:33:37 +00:00
job_ended = check_job_ended(syslog, jobname_upper, job_number)
job_printed = check_job_printed(syslog, jobname_upper, job_number)
if job_ended:
print(f"Job J{job_number} has ended")
2026-02-06 12:31:49 +00:00
break
time.sleep(poll_interval)
2026-02-06 17:33:37 +00:00
if not job_ended:
2026-02-06 12:31:49 +00:00
print(f"Timeout: Job J{job_number} did not complete after {to}s")
return 1
2026-02-06 17:33:37 +00:00
# Check if output was printed (required for PDF retrieval)
if not job_printed:
print(f"ERROR: Job J{job_number} completed but no output was printed ($HASP150 not found)")
print(f"This usually means MSGCLASS=H (hold) was used in the JCL")
print(f"Check TSO SDSF or console for job output manually")
print(f"To fix: Change JCL to use MSGCLASS=A for automatic printing")
return 1
print(f"Job J{job_number} completed and output printed!")
2026-02-06 12:31:49 +00:00
# Phase 3: Retrieve PDF
print("Phase 3: Retrieving PDF...")
# Give the PDF a moment to be written to disk
time.sleep(2)
# Check for local mounted directory (CI environment)
local_printout_dir = os.environ.get("LOCAL_PRINTOUT_DIR")
is_local = local_printout_dir and os.path.isdir(local_printout_dir)
if is_local:
print(f"Using local mounted directory: {local_printout_dir}")
pdf_path = find_pdf_for_job(job_number, jobname_upper, local_printout_dir)
if not pdf_path:
print(f"Error: PDF not found for J{job_number}_{jobname_upper}")
return 1
local_filename = f"{jobname_upper}_J{job_number}.pdf"
if retrieve_pdf(pdf_path, local_filename, is_local):
print(f"Success! Job output saved to: {local_filename}")
return 0
else:
return 1
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: poll_job.py <jobname> [timeout_seconds]")
print()
print("Arguments:")
print(" jobname - Job name to poll for (required)")
print(" timeout_seconds - Maximum time to wait (optional, default: 300)")
print()
print("Example:")
print(" poll_job.py SIMPLE2")
print(" poll_job.py SIMPLE2 600")
sys.exit(1)
jobname = sys.argv[1]
timeout = int(sys.argv[2]) if len(sys.argv) > 2 else 300
sys.exit(poll_for_job(jobname, timeout))