กวาด log เพื่อทำ Report สถิติ

มีโจทย์อยู่ว่า ต้องการสถิติการใช้งานของระบบ โดยที่ข้อมูลนั้นต้องเชื่อถือได้ สิ่งที่นึกออกคือการ นำ log file จับมาวิเคราะห์และทำ report สรุปส่งเมล์ออกไป code ด้านล่างเขียนด้วย python 2.7 ด้วยข้อจำกัดของระบบ

import os
import smtplib
from datetime import datetime, timedelta
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.application import MIMEApplication
from os.path import basename

class smtp_setting:
  smtp_domain = ''
  port = 25

def send_email(file_path, month_year_for_file, total_requests, total_responses):
  smtp = smtp_setting()
  sender_email = "[email protected]"
  receiver_email = "[email protected]"
  subject = "Usage Statistics ServiceLog {}".format(month_year_for_file)

  body = """<html>

  <p>Service : ServiceLog - {0}<br>
  Usage : (request) = {1} times<br>
  Usage : (response) = {2} times<br>
  Please find the attachment.</p>
  """.format(month_year_for_file, total_requests, total_responses)

  message = MIMEMultipart()
  message['From'] = sender_email
  message['To'] = receiver_email
  message['Subject'] = subject
  message.attach(MIMEText(body, "html"))

  with open(file_path, "rb") as attachment:
    part = MIMEApplication(attachment.read(), Name=basename(file_path))
    part['Content-Disposition'] = 'attachment; filename="{}"'.format(basename(file_path))

  server = smtplib.SMTP(smtp.smtp_domain, smtp.port)
  server.sendmail(sender_email, receiver_email, message.as_string())
  print('Email sent to {} successfully'.format(receiver_email))

def get_previous_month_year_str():
  today = datetime.now()
  first_day_of_current_month = today.replace(day=1)
  last_day_of_previous_month = first_day_of_current_month - timedelta(days=1)
  return last_day_of_previous_month.strftime("%Y%m"), last_day_of_previous_month.strftime("%m%Y")

def get_log_files(directory, target_month_year_str):
  log_files = []
  for dir_name, _, file_names in os.walk(directory):
    if target_month_year_str in dir_name:
      for file_name in file_names:
        if file_name.endswith('.log'):
          log_file = os.path.join(dir_name, file_name)
          print("Found log file:", log_file)
  return log_files

def search_text_in_files(log_files):
  statistics = {}
  response_status_stats = {}

  for file_path in log_files:
    if os.path.exists(file_path):
      with open(file_path, 'r') as file:
        current_service = None
        for line in file:
          if "(request)" in line:
            service_name = line.split(" : ")[1].split(" (")[0]
            current_service = service_name
            key = "{} (request)".format(service_name)
            statistics[key] = statistics.get(key, 0) + 1
          elif "(response)" in line:
            service_name = line.split(" : ")[1].split(" (")[0]
            current_service = service_name
            key = "{} (response)".format(service_name)
            statistics[key] = statistics.get(key, 0) + 1
          elif "Response Status:" in line and current_service:
            status = line.split("Response Status:")[1].strip()
            key = "{} {}".format(current_service, status)
            response_status_stats[key] = response_status_stats.get(key, 0) + 1

  return statistics, response_status_stats

def write_statistics_to_file(statistics, response_status_stats, file_path, month_year_str):
  total_requests = sum(value for key, value in statistics.items() if "(request)" in key)
  total_responses = sum(value for key, value in statistics.items() if "(response)" in key)

  with open(file_path, 'w') as file:
    file.write("Service : ServiceLog - {}\n".format(month_year_str))
    file.write("Usage : (request) = {} times\n".format(total_requests))
    file.write("Usage : (response) = {} times\n".format(total_responses))

    services = set(key.split(" (")[0] for key in statistics.keys())
    for service in sorted(services):
      status_counts = {"/AA": 0, ".AB": 0, "/0000000": 0}

      for status_key in response_status_stats:
        if service in status_key:
          status = status_key.split(" ")[-1]
          if status in status_counts:
            status_counts[status] = response_status_stats[status_key]

      req_key = "{} (request)".format(service)
      res_key = "{} (response)".format(service)
      if req_key in statistics:
        file.write("{} = {} times\n".format(req_key, statistics[req_key]))
      if res_key in statistics:
        file.write("{} = {} times\n".format(res_key, statistics[res_key]))
        for status, count in status_counts.items():
          if count > 0:
            file.write("{} = {} times\n".format(status, count))
  # print("Finished writing statistics to file.")
  return total_requests, total_responses

if __name__ == "__main__":
  directory = "/ServiceLog/"
  target_month_year_str, month_year_for_file = get_previous_month_year_str()
  log_files = get_log_files(directory, target_month_year_str)
  statistics, response_status_stats = search_text_in_files(log_files)
  output_file_path = '/home/log-tmp/ServiceLog-{}.txt'.format(month_year_for_file)
  total_requests, total_responses = write_statistics_to_file(statistics, response_status_stats, output_file_path, month_year_for_file)
  # print("Statistics written to {}".format(output_file_path))
  send_email(output_file_path, month_year_for_file, total_requests, total_responses)

จาก code ด้านบนจะใช้วิธีการตรวจสอบจากคำและนำมา count เพื่อหาตัวเลข

เมื่อได้สิ่งที่ต้องการ นำมาเขียนลงไฟล์ txt โดยจัดรูปแบบที่เราต้องการ
