Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
S
slapos.toolbox
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Lisa Casino
slapos.toolbox
Commits
4558d511
Commit
4558d511
authored
Jun 15, 2021
by
Lisa Casino
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
promise: new promise
parent
6c8e637c
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
358 additions
and
0 deletions
+358
-0
slapos/promise/plugin/check_advanced_free_disk_space.py
slapos/promise/plugin/check_advanced_free_disk_space.py
+358
-0
No files found.
slapos/promise/plugin/check_advanced_free_disk_space.py
0 → 100644
View file @
4558d511
from
__future__
import
division
from
zope.interface
import
implementer
from
slapos.grid.promise
import
interface
from
slapos.grid.promise.generic
import
GenericPromise
import
os
import
sys
import
pwd
import
sqlite3
import
argparse
import
datetime
import
psutil
from
slapos.collect.db
import
Database
@
implementer
(
interface
.
IPromise
)
class
RunPromise
(
GenericPromise
):
def
__init__
(
self
,
config
):
super
(
RunPromise
,
self
).
__init__
(
config
)
# check disk space at least every 3 minutes
self
.
setPeriodicity
(
minute
=
3
)
def
biggestPartitions
(
self
,
db_path
,
date
,
time
,
limit
=
3
):
database
=
Database
(
db_path
,
create
=
False
,
timeout
=
10
)
try
:
database
.
connect
()
date_time
=
date
+
' '
+
time
where_query
=
"datetime(date || ' ' || time) >= datetime('%s', '-1 days') AND datetime(date || ' ' || time) <= datetime('%s')"
result
=
database
.
select
(
"folder"
,
columns
=
"partition, disk_used*1024, max(datetime(date || ' ' || time))"
,
where
=
where_query
%
(
date_time
,
date_time
),
group
=
"partition"
,
order
=
"disk_used DESC"
,
limit
=
limit
).
fetchall
()
if
not
result
or
not
result
[
0
]:
self
.
logger
.
info
(
"No result from collector database in table folder: skipped"
)
return
None
except
sqlite3
.
OperationalError
as
e
:
# if database is still locked after timeout expiration (another process is using it)
# we print warning message and try the promise at next run until max warn count
locked_message
=
"database is locked"
if
locked_message
in
str
(
e
)
and
\
not
self
.
raiseOnDatabaseLocked
(
locked_message
):
return
None
raise
finally
:
try
:
database
.
close
()
except
Exception
:
pass
return
result
def
fastestPartitions
(
self
,
db_path
,
disk_partition
,
date
,
time
,
day_range
,
limit
=
3
):
database
=
Database
(
db_path
,
create
=
False
,
timeout
=
10
)
try
:
database
.
connect
()
# for each partition, we get two data: the actual size and
# the oldest (according to day_range) to compute the slope of the function
date_time
=
date
+
' '
+
time
where_query
=
"datetime(date) >= datetime('%s', '-1 days') AND datetime(date) <= datetime('%s')"
result_max
=
database
.
select
(
"folder"
,
columns
=
"partition, disk_used*1024, datetime(date || ' ' || time)"
,
where
=
where_query
%
(
date_time
,
date_time
),
group
=
"partition"
,
order
=
"partition"
).
fetchall
()
if
not
result_max
or
not
result_max
[
0
]:
self
.
logger
.
info
(
"No result from collector database in table folder: skipped"
)
return
None
result_min
=
database
.
select
(
"folder"
,
columns
=
"partition, disk_used*1024, min(datetime(date || ' ' || time))"
,
where
=
"datetime(date || ' ' || time) >= datetime('%s', '-%s days')"
%
(
result_max
[
0
][
2
],
day_range
),
group
=
"partition"
,
order
=
"partition"
).
fetchall
()
if
not
result_min
or
(
result_min
==
result_max
)
:
self
.
logger
.
info
(
"No result from collector database in table folder: skipped"
)
return
None
timep
=
'%Y-%m-%d %H:%M:%S'
ranked_results
=
[]
for
i
in
range
(
len
(
result_max
)):
timespan
=
datetime
.
datetime
.
strptime
(
result_max
[
i
][
2
],
timep
)
-
\
datetime
.
datetime
.
strptime
(
result_min
[
i
][
2
],
timep
)
delta_days
=
timespan
.
total_seconds
()
/
(
3600.
*
24
)
# if we don't have enough information OR information are the same
if
(
int
(
delta_days
)
<=
1
)
or
(
result_max
[
i
]
==
result_min
[
i
]):
continue
user
,
size_max
,
date_max
=
result_max
[
i
]
user
,
size_min
,
date_min
=
result_min
[
i
]
# slope/(1024*1024*1024) = number of giga per day
slope
=
(
size_max
-
size_min
)
/
delta_days
ranked_results
.
append
((
user
,
slope
,
date_min
,
date_max
,
delta_days
))
ranked_results
=
sorted
(
ranked_results
,
key
=
lambda
tup
:
tup
[
1
],
reverse
=
True
)
except
sqlite3
.
OperationalError
as
e
:
# if database is still locked after timeout expiration (another process is using it)
# we print warning message and try the promise at next run until max warn count
locked_message
=
"database is locked"
if
locked_message
in
str
(
e
)
and
\
not
self
.
raiseOnDatabaseLocked
(
locked_message
):
return
None
raise
finally
:
try
:
database
.
close
()
except
Exception
:
pass
return
ranked_results
[:
limit
]
def
getDaysUntilFull
(
self
,
disk_partition
,
database
,
date
,
time
,
day_range
):
"""Returns estimation of days until the disk_partition would become full
It uses date and time in order to find current disk free percentage, then rewinds
day_range back in history and calculates average speed of losing free space, which
is assumed constant and used to predict in how many days the disk would become full.
"""
database
=
Database
(
database
,
create
=
False
,
timeout
=
10
)
try
:
database
.
connect
()
result_max
=
database
.
select
(
"disk"
,
date
=
date
,
columns
=
"free*1.0/(used+free) AS percent, max(datetime(date || ' ' || time))"
,
where
=
"time between '%s:00' and '%s:30' and partition='%s'"
%
(
time
,
time
,
disk_partition
),
limit
=
1
).
fetchone
()
if
not
result_max
or
not
result_max
[
1
]:
return
None
result_min
=
database
.
select
(
"disk"
,
columns
=
"free*1.0/(used+free) AS percent, min(datetime(date || ' ' || time))"
,
where
=
"datetime(date || ' ' || time) >= datetime('%s', '-%s days') and partition='%s'"
%
(
result_max
[
1
],
day_range
,
disk_partition
),
limit
=
1
).
fetchone
()
if
not
result_min
or
not
result_min
[
1
]
or
result_min
==
result_max
:
return
None
change
=
result_max
[
0
]
-
result_min
[
0
]
if
change
>
0.
:
return
None
timep
=
'%Y-%m-%d %H:%M:%S'
timespan
=
datetime
.
datetime
.
strptime
(
result_max
[
1
],
timep
)
-
datetime
.
datetime
.
strptime
(
result_min
[
1
],
timep
)
delta_days
=
timespan
.
total_seconds
()
/
(
3600.
*
24
)
try
:
return
(
-
result_max
[
0
]
/
(
change
/
delta_days
),
result_min
[
1
],
result_min
[
0
],
result_max
[
1
],
result_max
[
0
],
delta_days
)
except
ZeroDivisionError
as
e
:
# no data
return
None
except
sqlite3
.
OperationalError
as
e
:
# if database is still locked after timeout expiration (another process is using it)
# we print warning message and try the promise at next run until max warn count
locked_message
=
"database is locked"
if
locked_message
in
str
(
e
)
and
\
not
self
.
raiseOnDatabaseLocked
(
locked_message
):
return
None
raise
finally
:
try
:
database
.
close
()
except
Exception
:
pass
def
getDiskSize
(
self
,
disk_partition
,
database
):
database
=
Database
(
database
,
create
=
False
,
timeout
=
10
)
try
:
# fetch disk size
database
.
connect
()
where_query
=
"partition='%s'"
%
(
disk_partition
)
order
=
"datetime(date || ' ' || time) DESC"
result
=
database
.
select
(
"disk"
,
columns
=
"free+used"
,
where
=
where_query
,
order
=
order
,
limit
=
1
).
fetchone
()
if
not
result
or
not
result
[
0
]:
return
None
disk_size
=
result
[
0
]
except
sqlite3
.
OperationalError
as
e
:
# if database is still locked after timeout expiration (another process is using it)
# we print warning message and try the promise at next run until max warn count
locked_message
=
"database is locked"
if
locked_message
in
str
(
e
)
and
\
not
self
.
raiseOnDatabaseLocked
(
locked_message
):
return
None
raise
finally
:
try
:
database
.
close
()
except
Exception
:
pass
return
disk_size
def
getFreeSpace
(
self
,
disk_partition
,
database
,
date
,
time
):
database
=
Database
(
database
,
create
=
False
,
timeout
=
10
)
try
:
# fetch free disk space
database
.
connect
()
where_query
=
"time between '%s:00' and '%s:30' and partition='%s'"
%
(
time
,
time
,
disk_partition
)
result
=
database
.
select
(
"disk"
,
date
=
date
,
columns
=
"free"
,
where
=
where_query
).
fetchone
()
if
not
result
or
not
result
[
0
]:
self
.
logger
.
info
(
"No result from collector database: disk check skipped"
)
return
0
disk_free
=
result
[
0
]
except
sqlite3
.
OperationalError
as
e
:
# if database is still locked after timeout expiration (another process is using it)
# we print warning message and try the promise at next run until max warn count
locked_message
=
"database is locked"
if
locked_message
in
str
(
e
)
and
\
not
self
.
raiseOnDatabaseLocked
(
locked_message
):
return
0
raise
finally
:
try
:
database
.
close
()
except
Exception
:
pass
return
int
(
disk_free
)
def
raiseOnDatabaseLocked
(
self
,
locked_message
):
max_warn
=
10
latest_result_list
=
self
.
getLastPromiseResultList
(
result_count
=
max_warn
)
warning_count
=
0
if
len
(
latest_result_list
)
<
max_warn
:
return
False
for
result
in
latest_result_list
[
0
]:
if
result
[
'status'
]
==
"ERROR"
and
locked_message
in
result
[
"message"
]:
return
True
for
result_list
in
latest_result_list
:
found
=
False
for
result
in
result_list
:
if
result
[
'status'
]
==
"WARNING"
and
locked_message
in
result
[
"message"
]:
found
=
True
warning_count
+=
1
break
if
not
found
:
break
if
warning_count
==
max_warn
:
# too many warning on database locked, now fail.
return
True
self
.
logger
.
warn
(
"collector database is locked by another process"
)
return
False
@
staticmethod
def
_checkInodeUsage
(
path
):
stat
=
os
.
statvfs
(
path
)
total_inode
=
stat
.
f_files
if
total_inode
:
usage
=
100
*
(
total_inode
-
stat
.
f_ffree
)
/
total_inode
if
usage
>=
98
:
return
"Disk Inodes usage is really high: %.4f%%"
%
usage
def
getInodeUsage
(
self
,
path
):
return
(
self
.
_checkInodeUsage
(
path
)
or
os
.
path
.
ismount
(
'/tmp'
)
and
self
.
_checkInodeUsage
(
'/tmp'
)
or
""
)
def
sense
(
self
):
# find if a disk is mounted on the path
disk_partition
=
""
db_path
=
self
.
getConfig
(
'collectordb'
)
check_date
=
self
.
getConfig
(
'test-check-date'
)
path
=
os
.
path
.
join
(
self
.
getPartitionFolder
(),
""
)
+
"extrafolder"
partitions
=
psutil
.
disk_partitions
()
while
path
is
not
'/'
:
if
not
disk_partition
:
path
=
os
.
path
.
dirname
(
path
)
else
:
break
for
p
in
partitions
:
if
p
.
mountpoint
==
path
:
disk_partition
=
p
.
device
break
if
not
disk_partition
:
self
.
logger
.
error
(
"Couldn't find disk partition"
)
return
if
db_path
.
endswith
(
"collector.db"
):
db_path
=
db_path
[:
-
len
(
"collector.db"
)]
if
check_date
:
# testing mode
currentdate
=
check_date
currenttime
=
self
.
getConfig
(
'test-check-time'
,
'09:17'
)
disk_partition
=
self
.
getConfig
(
'test-disk-partition'
,
'/dev/sda1'
)
else
:
# get last minute
now
=
datetime
.
datetime
.
utcnow
()
currentdate
=
now
.
strftime
(
'%Y-%m-%d'
)
currenttime
=
now
-
datetime
.
timedelta
(
minutes
=
1
)
currenttime
=
currenttime
.
time
().
strftime
(
'%H:%M'
)
disk_size
=
self
.
getDiskSize
(
disk_partition
,
db_path
)
default_threshold
=
None
if
disk_size
is
not
None
:
default_threshold
=
round
(
disk_size
/
(
1024
*
1024
*
1024
)
*
0.05
,
2
)
threshold
=
float
(
self
.
getConfig
(
'threshold'
,
default_threshold
)
or
2.0
)
threshold_days
=
float
(
self
.
getConfig
(
'threshold-days'
,
'20'
))
free_space
=
self
.
getFreeSpace
(
disk_partition
,
db_path
,
currentdate
,
currenttime
)
days_until_full_tuple
=
self
.
getDaysUntilFull
(
disk_partition
,
db_path
,
currentdate
,
currenttime
,
threshold_days
/
2
)
if
days_until_full_tuple
is
not
None
:
days_until_full
,
min_date
,
min_free
,
max_date
,
max_free
,
day_span
=
days_until_full_tuple
message
=
"Disk will become full in %.2f days (threshold: %.2f days), checked from %s to %s, %.2f days span"
%
(
days_until_full
,
threshold_days
,
min_date
,
max_date
,
day_span
)
if
days_until_full
<
threshold_days
:
message
+=
', free space dropped from %.1f%% to %.1f%%: ERROR. '
%
(
min_free
*
100
,
max_free
*
100
)
# display the 3 partitions with the highest usage rate in the last few days (threshold_days/2)
fast_partitions
=
self
.
fastestPartitions
(
db_path
,
disk_partition
,
currentdate
,
currenttime
,
threshold_days
/
2
)
if
fast_partitions
is
not
None
:
for
partition
in
fast_partitions
:
user_name
,
slope
,
date_min
,
date_max
,
delta_days
=
partition
message
+=
"The partition %s has used %s Giga per day for the last %s days (from %s to %s)"
%
(
user_name
,
slope
/
(
1024
*
1024
*
1024
),
delta_days
,
date_min
,
date_max
))
# display the final error message
self
.
logger
.
error
(
message
)
else
:
self
.
logger
.
info
(
message
+
': OK'
)
if
free_space
==
0
:
return
elif
free_space
>
threshold
*
1024
*
1024
*
1024
:
inode_usage
=
self
.
getInodeUsage
(
self
.
getPartitionFolder
())
if
inode_usage
:
self
.
logger
.
error
(
inode_usage
)
else
:
self
.
logger
.
info
(
"Disk usage: OK"
)
return
free_space
=
round
(
free_space
/
(
1024
*
1024
*
1024
),
2
)
message
=
'Free disk space low: remaining %s G (threshold: %s G).'
%
(
free_space
,
threshold
)
# display the 3 partitions that have the most storage capacity on the disk
big_partitions
=
self
.
biggestPartitions
(
db_path
,
currentdate
,
currenttime
)
if
big_partitions
is
not
None
:
for
partition
in
big_partitions
:
user_name
,
size_partition
,
date_checked
=
partition
message
+=
"The partition %s use %s Giga (date checked: %s)"
%
(
user_name
,
size_partition
,
date_checked
))
# display the final error message
self
.
logger
.
error
(
message
)
def
test
(
self
):
return
self
.
_test
(
result_count
=
1
,
failure_amount
=
1
)
def
anomaly
(
self
):
return
self
.
_test
(
result_count
=
3
,
failure_amount
=
3
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment