Odyssey
EStateGetFromS3.py
1 #!/usr/bin/env python3
2 
3 """Retrieve eStatement from AWS S3
4 
5 Only the correct content is streamed to stdout. Any error must be
6 streamed down to stderr descriptor.
7 
8 On failure, this script retries aws s3 api call for the number provided with
9 --s3-retries option argument. Default is MAX_RETRIES_DEFAULT.
10 
11 Script Output:
12  Status code:
13  0, if no error
14  1, if any error, exceptions
15 
16  Content (see usage for more detail):
17  PDF binary content, OR
18  S3 Presigned URL - has a short expiration duration
19  (ENV: AWS_S3_PRESIGNURL_EXPIRATION_SECONDS;
20  default: 120 seconds)
21 
22 References:
23 [1] https://docs.aws.amazon.com/cli/latest/reference/s3/presign.html
24 [2] https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.generate_presigned_url
25 
26 """
27 
28 
29 import sys
30 import boto3
31 import os
32 import logging
33 
34 from argparse import ArgumentParser
35 
36 format=("[%(levelname)-5s] %(asctime)-15s ({}|%(funcName)s|%(lineno)d) "
37  ":: %(message)s").format(__file__)
38 LOGGER = logging.getLogger(__name__)
39 
40 LOGGER.setLevel(logging.ERROR)
41 logging_handler = logging.StreamHandler()
42 logging_handler.setFormatter(logging.Formatter(format))
43 LOGGER.addHandler(logging_handler)
44 
45 
46 # [Prod]
47 AWS_REGION = 'us-east-1'
48 S3_STMNT_BUCKET = "homecu.stmnts"
49 
50 # [Dev] Use the following settings; simply uncomment the following and edit
51 # as necessary during development. Work with infra team to copy necessary
52 # prefixes and objects into this bucket and set appropriate permissions.
53 # AWS_REGION = 'us-east-2'
54 # S3_STMNT_BUCKET = "homecu.dev.stmnts"
55 
56 # Maximum retries on aws api call failure
57 MAX_RETRIES_DEFAULT = 2
58 # expiration period for pre-signed urls (in seconds)
59 PRESIGN_URL_EXPIRATION_SECONDS = int(os.environ.get(
60  'AWS_S3_PRESIGNURL_EXPIRATION_SECONDS', 120))
61 
62 
63 def get_parser():
64  """Prepare argument parser.
65 
66  Returns:
67  parser -- ArgumentParser object
68  """
69  parser = ArgumentParser(
70  description=("Get PDF EStatement Content from S3 (buffered"
71  "binary content OR s3 presigned url).")
72  )
73 
74  parser.add_argument("-f", "--file-estmnt",
75  help="Full path to estatement file for this period",
76  required=True
77  )
78 
79  parser.add_argument("-p", "--presignedurl", action = "store_true",
80  help=("stream s3 presign url to stdout; "
81  "otherwise streams actual pdf content")
82  )
83  parser.add_argument("-r", "--s3-retries",
84  help="Maximum number of retries on failure",
85  type=int,
86  default=MAX_RETRIES_DEFAULT)
87  return parser
88 
89 
90 def cleanup_temp_file(tmp_file):
91  """Cleanup temporarily downloaded estatement file"""
92  if os.path.exists(tmp_file):
93  os.remove(tmp_file)
94 
95 
96 def return_presigned_url(s3_client, object_key):
97  """Return an object's pre-signed"""
98  url = ""
99  try:
100  url = s3_client.generate_presigned_url(
101  ClientMethod='get_object',
102  Params={
103  "Bucket" : S3_STMNT_BUCKET,
104  "Key" : object_key
105  },
106  ExpiresIn=PRESIGN_URL_EXPIRATION_SECONDS
107  )
108  except Exception as e:
109  LOGGER.error("Estatement `%s` download failed." % object_key)
110  raise
111  else:
112  # return s3 presigned url
113  print(url)
114 
115 
116 def return_pdf_content(s3_client, s3_file_name):
117  """Download pdf from S3 and return stream content to stdout"""
118  tmp_file = os.path.join("/tmp", s3_file_name.replace("/", "_"))
119  try:
120  # download pdf object from S3
121  s3_client.download_file(S3_STMNT_BUCKET, s3_file_name, tmp_file)
122  except Exception as e:
123  cleanup_temp_file(tmp_file)
124  LOGGER.error("Estatement `%s` download failed." % s3_file_name)
125  raise
126  else:
127  # return pdf content
128  with open(tmp_file, 'rb') as f_read:
129  sys.stdout.buffer.write(f_read.read())
130  cleanup_temp_file(tmp_file)
131 
132 
133 def main(argv):
134  """Retrieve eStatement from AWS S3 estatement storing bucket"""
135 
136  parser=get_parser()
137  args = parser.parse_args(argv)
138  success_status = False
139 
140  file_parts = args.file_estmnt.split("/")[1:]
141 
142  if len(file_parts) != 7:
143  LOGGER.error(("Requested path to estatement '%s' is invalid; "
144  "Eg. correct path: "
145  "'/home/CUCODE/stmnt/pdf/yyyymmM/1/1XXXX.pdf" %
146  args.file_estmnt))
147  return success_status
148 
149  cucode = file_parts[1].strip()
150  s3_file_name = os.path.join(file_parts[1],
151  file_parts[2],
152  file_parts[3],
153  file_parts[4],
154  file_parts[5],
155  file_parts[6])
156  max_retries = args.s3_retries
157  tries = 0
158 
159  while tries < max_retries and not success_status:
160  tries += 1
161  try:
162  client = boto3.client('s3', region_name=AWS_REGION)
163 
164  if args.presignedurl:
165  return_presigned_url(client, s3_file_name)
166  else:
167  return_pdf_content(client, s3_file_name)
168 
169  except Exception as e:
170  LOGGER.error(("[%s] Estatement Retrieval from AWS S3 failed "
171  "(try# %s/%s): %s") % (cucode.upper(), tries,
172  max_retries, e))
173  else:
174  success_status = True
175 
176  return success_status
177 
178 
179 def run():
180  """Script entrypoint"""
181  sys.exit(main(sys.argv[1:]))
182 
183 
184 if __name__ == "__main__":
185  run()
def return_pdf_content(s3_client, s3_file_name)
def cleanup_temp_file(tmp_file)
def return_presigned_url(s3_client, object_key)