Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Simone Rossi
main
Commits
59a86198
Commit
59a86198
authored
Oct 04, 2017
by
Daniele Venzano
Browse files
Use core information to fit services in nodes
parent
3fdfe4fa
Changes
6
Expand all
Hide whitespace changes
Inline
Side-by-side
zoe_api/web/static/Chart.min.js
0 → 100644
View file @
59a86198
This diff is collapsed.
Click to expand it.
zoe_api/web/static/zoe.css
View file @
59a86198
...
...
@@ -194,34 +194,6 @@ div.status_line {
font-size
:
larger
;
}
div
.node_detail
div
.memory_total
,
div
.node_detail
div
.cores_total
{
border
:
1px
solid
black
;
background-color
:
green
;
width
:
100%
;
position
:
relative
;
z-index
:
0
;
}
div
.node_detail
div
.memory_reserved
,
div
.node_detail
div
.cores_reserved
{
background-color
:
red
;
display
:
inline-block
;
left
:
0
;
z-index
:
2
;
position
:
relative
;
float
:
left
;
}
div
.node_detail
div
.memory_total
span
,
div
.node_detail
div
.cores_total
span
{
text-align
:
end
;
width
:
100%
;
position
:
relative
;
left
:
0
;
display
:
block
;
z-index
:
1
;
}
div
.scheduler_queue
{
display
:
flex
;
overflow
:
auto
;
...
...
@@ -245,3 +217,16 @@ div.essential {
div
.running
{
border-color
:
green
;
}
div
.pie-plots
{
width
:
20em
;
height
:
22em
;
}
div
.plot-container
{
float
:
left
;
}
div
.plot-container
p
{
text-align
:
center
;
}
zoe_api/web/templates/status.html
View file @
59a86198
{% extends "base_user.html" %}
{% block title %}Zoe system status{% endblock %}
{% block custom_head %}
<script
src=
"/static/Chart.min.js"
type=
"application/javascript"
></script>
{% endblock %}
{% block content %}
<h2>
Zoe system status
</h2>
...
...
@@ -88,14 +92,107 @@
<div
class=
"platform_node_detail"
>
{% for node in stats.platform_stats.nodes %}
<div
class=
"node_detail"
>
<div
class=
"node_name"
>
{{ node['name'] }}
</div>
<div
class=
"node_name"
>
{{ node['name'] }}
{% if node['status'] == 'offline' %}
(node is offline/unreachable)
{% endif %}
</div>
<div
class=
"container_count"
>
{{ node['container_count'] }} containers
</div>
<div
class=
"memory_total"
>
<div
class=
"memory_reserved"
style=
"width: {{ node['memory_reserved'] * 100 / node['memory_total'] }}%;"
>
</div><span>
{{ '%0.2f' % (node['memory_reserved'] * 100 / node['memory_total'],) }}% memory reserved
</span>
<div
class=
"plot-container"
>
<p>
Memory
</p>
<div
class=
"pie-plots"
>
<canvas
class=
"node_status_canvas"
id=
"{{ node.name }}-mem-res"
></canvas>
<canvas
class=
"node_status_canvas"
id=
"{{ node.name }}-mem-use"
></canvas>
</div>
</div>
<div
class=
"cores_total"
>
<div
class=
"cores_reserved"
style=
"width: {{ node['cores_reserved'] * 100 / node['cores_total'] }}%;"
>
</div><span>
{{ '%0.2f' % (node['cores_reserved'] * 100 / node['cores_total'],) }}% cores reserved
</span>
<script>
data
=
{
datasets
:
[{
label
:
'
Reserved memory
'
,
data
:
[{{
node
[
'
memory_reserved
'
]
}},
{{
node
[
'
memory_total
'
]
-
node
[
'
memory_reserved
'
]
}}],
backgroundColor
:
[
'
rgba(0, 169, 225, 1.0)
'
,
'
rgba(145, 192, 46, 1.0)
'
]
}],
'
labels
'
:
[
'
Reserved
'
,
'
Free
'
]
};
ctx
=
document
.
getElementById
(
"
{{ node.name }}-mem-res
"
).
getContext
(
'
2d
'
);
new
Chart
(
ctx
,{
type
:
'
pie
'
,
data
:
data
,
options
:
{
animation
:
{
animateRotate
:
false
}
}
});
data
=
{
datasets
:
[{
label
:
'
Used memory
'
,
data
:
[{{
node
[
'
memory_total
'
]
-
node
[
'
memory_free
'
]
}},
{{
node
[
'
memory_free
'
]
}}],
backgroundColor
:
[
'
rgba(0, 169, 225, 1.0)
'
,
'
rgba(145, 192, 46, 1.0)
'
]
}],
'
labels
'
:
[
'
In-use
'
,
'
Free
'
]
};
ctx
=
document
.
getElementById
(
"
{{ node.name }}-mem-use
"
).
getContext
(
'
2d
'
);
myPieChart
=
new
Chart
(
ctx
,{
type
:
'
pie
'
,
data
:
data
,
options
:
{
animation
:
{
animateRotate
:
false
}
}
});
</script>
<div
class=
"plot-container"
>
<p>
Cores
</p>
<div
class=
"pie-plots"
>
<canvas
class=
"node_status_canvas"
id=
"{{ node.name }}-cpu-res"
></canvas>
<canvas
class=
"node_status_canvas"
id=
"{{ node.name }}-cpu-use"
></canvas>
</div>
</div>
<script>
data
=
{
datasets
:
[{
label
:
'
Reserved cores
'
,
data
:
[{{
node
[
'
cores_reserved
'
]
}},
{{
node
[
'
cores_total
'
]
-
node
[
'
cores_reserved
'
]
}}],
backgroundColor
:
[
'
rgba(0, 169, 225, 1.0)
'
,
'
rgba(145, 192, 46, 1.0)
'
]
}],
'
labels
'
:
[
'
Reserved
'
,
'
Free
'
]
};
ctx
=
document
.
getElementById
(
"
{{ node.name }}-cpu-res
"
).
getContext
(
'
2d
'
);
new
Chart
(
ctx
,{
type
:
'
pie
'
,
data
:
data
,
options
:
{
animation
:
{
animateRotate
:
false
}
}
});
data
=
{
datasets
:
[{
label
:
'
Used memory
'
,
data
:
[{{
node
[
'
cores_total
'
]
-
node
[
'
cores_free
'
]
}},
{{
node
[
'
cores_free
'
]
}}],
backgroundColor
:
[
'
rgba(0, 169, 225, 1.0)
'
,
'
rgba(145, 192, 46, 1.0)
'
]
}],
'
labels
'
:
[
'
In-use
'
,
'
Free
'
]
};
ctx
=
document
.
getElementById
(
"
{{ node.name }}-cpu-use
"
).
getContext
(
'
2d
'
);
myPieChart
=
new
Chart
(
ctx
,{
type
:
'
pie
'
,
data
:
data
,
options
:
{
animation
:
{
animateRotate
:
false
}
}
});
</script>
</div>
{% endfor %}
</div>
...
...
zoe_master/backends/docker/api_client.py
View file @
59a86198
...
...
@@ -104,7 +104,7 @@ class DockerClient:
run_args
[
'mem_reservation'
]
-=
1
if
service_instance
.
core_limit
is
not
None
:
run_args
[
'cpu_quota'
]
=
100000
*
service_instance
.
core_limit
.
max
run_args
[
'cpu_quota'
]
=
int
(
100000
*
service_instance
.
core_limit
.
max
)
if
get_conf
().
gelf_address
!=
''
:
run_args
[
'log_config'
]
=
{
...
...
@@ -184,6 +184,9 @@ class DockerClient:
else
:
info
[
'ports'
][
port
]
=
None
info
[
'cpu_period'
]
=
container
.
attrs
[
'HostConfig'
][
'CpuPeriod'
]
info
[
'cpu_quota'
]
=
container
.
attrs
[
'HostConfig'
][
'CpuQuota'
]
return
info
def
inspect_container
(
self
,
docker_id
:
str
)
->
Dict
[
str
,
Any
]:
...
...
zoe_master/backends/docker/threads.py
View file @
59a86198
...
...
@@ -19,6 +19,7 @@ import logging
import
threading
import
time
from
copy
import
deepcopy
from
datetime
import
datetime
from
zoe_lib.config
import
get_conf
from
zoe_lib.state
import
SQLManager
,
Service
...
...
@@ -71,6 +72,7 @@ class DockerStateSynchronizer(threading.Thread):
node_stats
.
status
=
'offline'
time
.
sleep
(
CHECK_INTERVAL
)
continue
node_stats
.
status
=
'online'
service_list
=
self
.
state
.
service_list
(
backend_host
=
host_config
.
name
)
try
:
...
...
@@ -118,6 +120,20 @@ class DockerStateSynchronizer(threading.Thread):
memory_in_use
=
sum
([
stat
[
'memory_stats'
][
'usage'
]
for
stat
in
stats
.
values
()
if
'usage'
in
stat
[
'memory_stats'
]])
node_stats
.
memory_free
=
node_stats
.
memory_total
-
memory_in_use
node_stats
.
cores_reserved
=
sum
([
cont
[
'cpu_quota'
]
/
cont
[
'cpu_period'
]
for
cont
in
container_list
if
cont
[
'cpu_period'
]
!=
0
])
node_stats
.
cores_free
=
node_stats
.
cores_total
-
sum
([
self
.
_get_core_usage
(
stat
)
for
stat
in
stats
.
values
()])
def
_get_core_usage
(
self
,
stat
):
try
:
this_read_ts
=
datetime
.
strptime
(
stat
[
'read'
],
'%Y-%m-%dT%H:%M:%S.%f'
)
except
ValueError
:
return
0
pre_read_ts
=
datetime
.
strptime
(
stat
[
'preread'
],
'%Y-%m-%dT%H:%M:%S.%f'
)
cpu_time_now
=
stat
[
'cpu_stats'
][
'cpu_usage'
][
'total_usage'
]
cpu_time_pre
=
stat
[
'precpu_stats'
][
'cpu_usage'
][
'total_usage'
]
return
(
cpu_time_now
-
cpu_time_pre
)
/
((
this_read_ts
-
pre_read_ts
).
total_seconds
()
*
1000000000
)
def
_update_service_status
(
self
,
service
:
Service
,
container
):
"""Update the service status."""
if
service
.
backend_status
!=
container
[
'state'
]:
...
...
zoe_master/scheduler/simulated_platform.py
View file @
59a86198
...
...
@@ -12,10 +12,12 @@ class SimulatedNode:
"""A simulated node where containers can be run"""
def
__init__
(
self
,
real_node
:
NodeStats
):
self
.
real_reservations
=
{
"memory"
:
real_node
.
memory_reserved
"memory"
:
real_node
.
memory_reserved
,
"cores"
:
real_node
.
cores_reserved
}
self
.
real_free_resources
=
{
"memory"
:
real_node
.
memory_free
"memory"
:
real_node
.
memory_free
,
"cores"
:
real_node
.
cores_free
}
self
.
real_active_containers
=
real_node
.
container_count
self
.
services
=
[]
...
...
@@ -23,7 +25,14 @@ class SimulatedNode:
def
service_fits
(
self
,
service
:
Service
)
->
bool
:
"""Checks whether a service can fit in this node"""
return
service
.
resource_reservation
.
memory
.
min
<
self
.
node_free_memory
()
return
service
.
resource_reservation
.
memory
.
min
<
self
.
node_free_memory
()
and
service
.
resource_reservation
.
cores
.
min
<=
self
.
node_free_cores
()
def
service_why_unfit
(
self
,
service
)
->
str
:
"""Generate an explanation of why the service does not fit this node."""
if
service
.
resource_reservation
.
memory
.
min
<
self
.
node_free_memory
():
return
'needs {} bytes of memory'
.
format
(
self
.
node_free_memory
()
-
service
.
resource_reservation
.
memory
.
min
)
elif
service
.
resource_reservation
.
cores
.
min
<=
self
.
node_free_cores
():
return
'needs {} more cores'
.
format
(
self
.
node_free_cores
()
-
service
.
resource_reservation
.
cores
.
min
)
def
service_add
(
self
,
service
):
"""Add a service in this node."""
...
...
@@ -52,19 +61,31 @@ class SimulatedNode:
simulated_reservation
=
0
for
service
in
self
.
services
:
# type: Service
simulated_reservation
+=
service
.
resource_reservation
.
memory
.
min
assert
(
self
.
real_free_resources
[
'memory'
]
-
simulated_reservation
)
>=
0
return
self
.
real_free_resources
[
'memory'
]
-
simulated_reservation
free
=
self
.
real_free_resources
[
'memory'
]
-
simulated_reservation
if
free
<
0
:
log
.
warning
(
'More memory reserved than there is free on node {}: {}'
.
format
(
self
.
name
,
free
))
return
free
def
node_free_cores
(
self
):
"""Return the amount of free cores available in this node."""
simulated_reservation
=
0
for
service
in
self
.
services
:
# type: Service
simulated_reservation
+=
service
.
resource_reservation
.
cores
.
min
free
=
self
.
real_free_resources
[
'cores'
]
-
simulated_reservation
if
free
<
0
:
log
.
warning
(
'More cores reserved than there are free on node {}: {}'
.
format
(
self
.
name
,
free
))
return
free
def
__repr__
(
self
):
out
=
'SN {} |
f
{}'
.
format
(
self
.
name
,
self
.
node_free_memory
())
out
=
'SN {} |
m {} | c
{}'
.
format
(
self
.
name
,
self
.
node_free_memory
()
,
self
.
node_free_cores
()
)
return
out
class
SimulatedPlatform
:
"""A simulated cluster, composed by simulated nodes"""
def
__init__
(
self
,
pla
s
tform_status
:
ClusterStats
):
def
__init__
(
self
,
platform_status
:
ClusterStats
):
self
.
nodes
=
{}
for
node
in
pla
s
tform_status
.
nodes
:
for
node
in
platform_status
.
nodes
:
if
node
.
status
==
'online'
:
self
.
nodes
[
node
.
name
]
=
SimulatedNode
(
node
)
...
...
@@ -75,9 +96,11 @@ class SimulatedPlatform:
for
node_id_
,
node
in
self
.
nodes
.
items
():
if
node
.
service_fits
(
service
):
candidate_nodes
.
append
(
node
)
else
:
log
.
debug
(
'Cannot fit service {} on node {}: {}'
.
format
(
service
.
id
,
node
.
name
,
node
.
service_why_unfit
(
service
)))
if
len
(
candidate_nodes
)
==
0
:
# this service does not fit anywhere
self
.
deallocate_essential
(
execution
)
log
.
debug
(
'Cannot fit essential service {}, bailing out'
.
format
(
service
.
id
))
log
.
debug
(
'Cannot fit essential service {}
anywhere
, bailing out'
.
format
(
service
.
id
))
return
False
candidate_nodes
.
sort
(
key
=
lambda
n
:
n
.
container_count
)
# smallest first
candidate_nodes
[
0
].
service_add
(
service
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment