logTools.py 5.65 KB
Newer Older
1
from pyparsing import Word, alphas, Suppress, Combine, nums, string, Optional, Regex, Literal
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
import os, re
import datetime
import uuid
import base64
import sqlite3
import PyRSS2Gen

def init_db(db_path):
  db = sqlite3.connect(db_path)
  c = db.cursor()
  c.executescript("""
CREATE TABLE IF NOT EXISTS rss_entry (
  name VARCHAR(25),
  datetime VARCHAR(15),
  status VARCHAR(20),
  method VARCHAR(25),
  title VARCHAR(255),
19 20
  url VARCHAR(255),
  content TEXT);
21 22 23 24
""")
  db.commit()
  db.close()

25 26 27 28 29 30 31 32 33 34 35 36 37
def getZopeLogRequestParser():
  integer = Word(nums)
  serverDateTime = Combine(integer + "-" + integer + "-" + integer + " " + 
                        integer + ":" + integer + ":" + integer + "," + integer)
  word = Word( alphas+nums+"@._-" )
  info = Regex("([\d\w\s:\.]+;){2}")#Combine(word + ";" + Literal(" ") + word + ";")
  request = Combine(Suppress("request: ") + Suppress(word+" ") + Regex(".*"))
  no_request = Combine(Suppress("[No request]") + Regex(".*"))
  bnf = serverDateTime.setResultsName("timestamp") +  Suppress("-") + \
          info.setResultsName("title") + \
          (no_request | request).setResultsName("link")
  return bnf

38 39
def getZopeParser():
  integer = Word(nums)
40 41
  serverDateTime = Combine(integer + "-" + integer + "-" + integer + " " + 
                      integer + ":" + integer + ":" + integer + "," + integer)
42
  status = Word(string.uppercase, max=7, min=3)
43
  word = Word( alphas+nums+"@._-:/#" )
44
  message = Regex(".*")
45 46 47
  bnf = serverDateTime.setResultsName("timestamp") + \
          status.setResultsName("statusCode") + \
          word.setResultsName("method") + message.setResultsName("title")
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
  return bnf

def isZopeLogBeginLine(line):
  # This expression will check if line start with a date string
  # XXX - if line match expression, then regex.group() return the date
  if not line or line.strip() == "------":
    return None
  regex = re.match(r"(^\d{2,4}-\d{2}-\d{1,2}\s+\d{2}:\d{2}:\d{2}?[,\d]+)",
                        line)
  return regex
  

def parseLog(path, parserbnf, method, filter_with="ERROR", start_date="", date_format=""):
  if not os.path.exists(path):
    print "ERROR: cannot get file: %s" % path
    return []
  log_result = []
  if not date_format:
    date_format = "%Y-%m-%d %H:%M:%S,%f"
67
  skip_entry = False
68 69 70 71 72
  with open(path, 'r') as logfile:
    index = 0
    for line in logfile:
      regex = method(line)
      if not regex:
73
        if index == 0 or line.strip() == "------" or skip_entry:
74
          continue
75
        # Add this line to log content, if entry is not skipped
76 77 78 79
        log_result[index - 1]['content'] += ("\n" + line)
      else:
        try:
          fields = parserbnf.parseString(line)
80 81
          skip_entry = filter_with and not fields.statusCode == filter_with
          if skip_entry:
82
            continue
83 84
          skip_entry = start_date and regex.group() < start_date
          if skip_entry:
85 86 87 88 89
            continue
          log_result.append(dict(datetime=datetime.datetime.strptime(
                            fields.timestamp , date_format),
                            status=fields.get('statusCode', ''),
                            method=fields.get('method', ''),
90 91 92
                            url=fields.get('link', ''),
                            title=fields.title,
                            content=fields.get('content', fields.title)))
93 94
          index += 1
        except Exception:
95 96
          continue
          #raise
97 98 99 100 101 102 103 104
          # print "WARNING: Could not parse log line. %s \n << %s >>" % (str(e), line)
  return log_result

def insertRssDb(db_path, entry_list, rss_name):
  init_db(db_path)
  db = sqlite3.connect(db_path)
  for entry in entry_list:
    date = entry['datetime'].strftime('%Y-%m-%d %H:%M:%S')
105 106
    db.execute("insert into rss_entry(name, datetime, status, method, title, url, content) values (?, ?, ?, ?, ?, ?, ?)",
                (rss_name, date, entry['status'], entry['method'], entry['title'], entry['url'], entry['content']))
107 108 109 110 111 112 113 114 115 116 117
  db.commit()
  db.close()

def truncateRssDb(db_path, to_date):
  db = sqlite3.connect(db_path)
  db.execute("delete from rss_entry where datetime<?", (to_date,))
  db.commit()
  db.close()

def selectRssDb(db_path, rss_name, start_date, limit=0):
  db = sqlite3.connect(db_path)
118
  query = "select name, datetime, status, method, title, url, content from rss_entry "
119 120 121 122 123 124 125 126 127 128 129
  query += "where name=? and datetime>=? order by datetime DESC"
  if limit:
    query += " limit ?"
    rows = db.execute(query, (rss_name, start_date, limit))
  else:
    rows = db.execute(query, (rss_name, start_date))
  #db.close()
  if rows:
    return rows
  return []

130
def generateRSS(db_path, name, rss_path, url_link, limit=10):
131 132 133
  items = []
  
  db = sqlite3.connect(db_path)
134
  query = "select name, datetime, status, method, title, url, content from rss_entry "
135
  query += "where name=? order by datetime DESC"
136 137
  if limit:
    query += " limit ?"
138
    entry_list = db.execute(query, (name, limit))
139
  else:
140
    entry_list = db.execute(query, (name,))
141 142
  
  for entry in entry_list:
143
    name, rss_date, status, method, title, url, content = entry
144 145
    if method:
      title = "[%s] %s" % (method, title)
146 147
    if status:
      title = "[%s] %s" % (status, title)
148 149
    rss_item = PyRSS2Gen.RSSItem(
        title = title,
150
        link = url,
151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
        description = content.replace('\n', '<br/>'),
        pubDate = rss_date,
        guid = PyRSS2Gen.Guid(base64.b64encode("%s, %s" % (rss_date, url_link)))
        )
    items.append(rss_item)
  db.close()
  
  ### Build the rss feed
  items.reverse()
  rss_feed = PyRSS2Gen.RSS2 (
    title = name,
    link = url_link,
    description = name,
    lastBuildDate = datetime.datetime.utcnow(),
    items = items
    )

  with open(rss_path, 'w') as rss_ouput:
    rss_ouput.write(rss_feed.to_xml())