Code:
#!/usr/bin/perl -w
# Zzztop.pl
#
# Copyright 2013, Phil Carmody
# Inspired by "PowerTOP", but renamed as it's more concerned with
# sleeping than it is with the actual power consumed. Detects tasks
# that prevent the system from sleeping, in particular those that
# poll files or incorrectly poll() them, and ones that play ping-pong
# with each other.
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details. This can be found
# at <http://www.gnu.org/licenses/>.
use strict qw(refs vars subs);
use warnings;
my $verbose=1;
my $all=0; # show all frequencies (etc.?) even if data is 0
my $incompat=0; # more accuracy then maemo/meego's powertop means different results
# Need to select which timer to use. Time::HiRes may not be available
my ($xtime,$calibrate);
eval { require Time::HiRes; };
if(!$@) { $xtime = \&Time::HiRes::time; }
else {
print STDERR "WARNING: Only low accuracy timing available.\n";
sub xtime {
open(S, "</proc/stat");
<S>; $_=<S>;
close(S);
my @a=split(/\s+/);
return ($a[1]+$a[2]+$a[3]+$a[4]+$a[5]+$a[6]+$a[7])/100;
}
$xtime=\&xtime;
$calibrate=1;
}
# Not every check is appropriate, depending on platform
my ($do_cpuidle, $do_cpufreq, $do_interrupts, $do_timerstats, $do_ctxtsw)=(1,1,1,1,1);
sub usage()
{
print<<HELP
ZzzTop: show wakeup reasons, and other PM-related CPU info
Usage:
zzztop [-h] [-s=<nnn>] [-t=<nnn>] [-a]
-h this help text
-s=<nnn> sleep for <nnn> seconds before collecting data
-t=<nnn> collect data for <nnn> seconds
-a for some data gathered (cpufreq), show even 0 records
HELP
}
my ($sleep,$time)=(10,30);
while($_=shift(@ARGV)) {
if(m/-s=(\d+)/) { $sleep=int($1); }
elsif(m/-t=(\d+)/) { $time=int($1); }
elsif(m/-a/) { $all=1; }
elsif(m/-h/) { usage(); exit; }
else { unshift(@ARGV, $_); last; }
}
my $mtime;
# gmtime(); # flush any one-time-only date prep now before the critical loop
my $cpudir='/sys/devices/system/cpu';
sub headstrip($)
{
open(F, "<", $_[0]) or die("failed to read $_[0]");
$_=<F>;
close(F);
chomp if($_);
$_;
}
# BEGIN cpu
sub get_cpu_range()
{
my $c=-1;
my @cpus=glob("$cpudir/cpu[0-9]");
foreach my $cpuname (@cpus) {
my ($cpu)=($cpuname=~m/(\d+)$/);
if($cpu>$c) { $c=$cpu; }
}
print("Detected ".($c+1)." cpus\n")
if($verbose);
$c;
}
my $maxcpu=0;
$maxcpu = get_cpu_range();
if($maxcpu<0) {
$do_cpuidle=0;
print STDERR "All cpu-specific data gathering disabled - no $cpudir/cpu*\n";
}
# END cpu
# BEGIN cpuidle
sub get_cpuidle_ranges($)
{
my $ret=$_[0];
my $s=-1;
foreach my $cpu (0..$maxcpu) {
my @states=glob("$cpudir/cpu$cpu/cpuidle/state[0-9]*");
my $t=-1;
foreach my $state (@states) {
my ($statenum)=($state =~ m/(\d+)$/);
if($statenum>$t) { $t=$statenum; }
}
if($t>=0) { $ret->[$cpu]=$t; }
if($t>$s) { $s=$t; }
}
print("Detected ".($s+1)." cpuidle states (@$ret)\n")
if($verbose);
$s;
}
my @maxstate;
my $maxstate = get_cpuidle_ranges(\@maxstate);
if($#maxstate<0) {
$do_cpuidle=0;
print STDERR "cpuidle disabled - no $cpudir/cpu*/cpuidle trees\n";
print STDERR " enable CPU_IDLE in kernel config\n"
if($verbose);
}
sub get_cpuidle_data() {
my @ret=();
foreach my $cpu (0..$maxcpu) {
$ret[$cpu]=[];
foreach my $state (0..$maxstate[$cpu]) {
my $time=headstrip("$cpudir/cpu$cpu/cpuidle/state$state/time");
my $usage=headstrip("$cpudir/cpu$cpu/cpuidle/state$state/usage");
$ret[$cpu]->[$state]=[$time,$usage];
}
}
@ret;
}
sub diff_cpuidle($$)
{
my ($pre, $post)=@_;
print("C-state Information\n");
print("===================\n");
my @totalidle=();
my $usagemax=0;
foreach my $cpu (0..$maxcpu) {
foreach my $state (0..$maxstate[$cpu]) {
my $dt = $post->[$cpu][$state][0] - $pre->[$cpu][$state][0];
my $du = $post->[$cpu][$state][1] - $pre->[$cpu][$state][1];
$totalidle[$cpu] += $dt;
my $residence = $du ? int($dt/$du) : 0;
if($residence > $usagemax) { $usagemax=$residence; }
}
}
my ($usagescale, $usageletter)=(1,'u');
if($usagemax>99999) { $usagescale=1000; $usageletter='m'; }
my ($h1,$h2,$h3);
$h1=" |";
$h2=" C# |";
$h3="----+";
foreach my $cpu (0..$maxcpu) {
$h1.=" CPU#$cpu |";
$h2.=" time | avg/${usageletter}s |";
$h3.="--------+--------+";
}
print("$h1\n$h2\n$h3\n");
printf(" C%u |", 0); # active is the fake state C0
foreach my $cpu (0..$maxcpu) {
printf(" %5.1f%% | |", 100-$totalidle[$cpu]/($mtime*10000));
}
print("\n");
foreach my $state (0..$maxstate) {
printf(" C%u |", $state+1);
foreach my $cpu (0..$maxcpu) {
if($state<=$maxstate[$cpu]) {
my $dt = $post->[$cpu][$state][0] - $pre->[$cpu][$state][0];
my $du = $post->[$cpu][$state][1] - $pre->[$cpu][$state][1];
# times in us, so divide by 1000000, but want % so use 10000.
# usage is number of transition
printf(" %5.1f%% | % 5s |",
$dt/($mtime*10000),
$du ? int($dt/$du/$usagescale) : "");
} else {
printf(" | |");
}
}
print("\n");
}
#With the fake C0 record for active, there's no need for the idle sum too
#print("Idle\t|");
#foreach my $cpu (0..$maxcpu) {
# printf(" %4.1f%%\t|\t|", $totalidle[$cpu]/($mtime*10000));
#}
#print("\n");
print("\n");
}
# END cpuidle
# BEGIN cpufreq
sub get_cpufreq_ranges()
{
return -r "$cpudir/cpu0/cpufreq/stats/time_in_state" ? 0 : -1;
}
if(get_cpufreq_ranges()<0) {
$do_cpufreq = 0;
print STDERR "cpufreq disabled - no $cpudir/cpu*/cpufreq/stats/time_in_state\n";
print STDERR " enable CPU_FREQ_STAT in kernel config\n"
if($verbose);
}
sub get_cpufreq_data() {
my @ret=();
foreach my $cpu (0..$maxcpu) {
$ret[$cpu]={};
open(F, "<$cpudir/cpu$cpu/cpufreq/stats/time_in_state") or die("cpufreq: 'read $cpudir/cpu$cpu/cpufreq/stats/time_in_state' $@");
while(<F>) {
if(m/(\d+)\s+(\d+)/) { $ret[$cpu]->{$1} = $2; }
else { die("WTF is this doing in cpufreq: $_"); }
}
close(F);
# print("cpu$cpu -> ", join(" ",keys(%{$ret[$cpu]})), "\n");
}
@ret;
}
sub diff_cpufreq($$)
{
my ($pre, $post)=(@_);
my %okfreq=();
print("CPUfreq statistics\n");
print("==================\n");
my @totalticks=();
my @freqs = sort { $a<=>$b } keys(%{$pre->[0]});
foreach my $cpu (0..$maxcpu) {
$totalticks[$cpu]=0;
foreach my $freq (@freqs) {
my $dt = $post->[$cpu]->{$freq} - $pre->[$cpu]->{$freq};
$totalticks[$cpu] += $dt;
if($dt) { $okfreq{$freq}++; }
}
}
my $h1="Frequency |";
my $h2="----------+";
foreach my $cpu (0..$maxcpu) {
$h1.=" CPU#$cpu |";
$h2.="--------+";
}
print("$h1\n$h2\n");
foreach my $freq (@freqs) {
if(!$all and !$okfreq{$freq}) { next; }
printf(" %4u MHz |", int($freq/1000));
foreach my $cpu (0..$maxcpu) {
my $dt = $post->[$cpu]->{$freq} - $pre->[$cpu]->{$freq};
printf(" %5.1f%% |", $dt*100/$totalticks[$cpu]);
}
print("\n");
}
print("\n");
}
# END cpufreq
# BEGIN interrupts
sub get_interrupt_names()
{
open(I, "</proc/interrupts") or return undef;
my @ret=();
while(<I>) {
if(m/^\s+CPU\d/) { next; }
elsif(m/\s*(\w+):\s+\d/) { push(@ret, $1); }
else { print("interrupts: what's: $_"); }
}
close(I);
@ret;
}
my @interrupt_names=get_interrupt_names();
if(scalar(@interrupt_names)<=0) {
$do_interrupts = 0;
print STDERR "interrupts disabled - no /proc/interrupts\n";
}
sub get_interrupt_data() {
my @ret=();
open(I, "</proc/interrupts");
while(<I>) {
if(m/^\s+CPU\d/) {
foreach my $cpu (0..$maxcpu+1) { $ret[$cpu]={}; }
next;
} elsif(s/\s*([\w]+):\s+//) {
my $int=$1;
foreach my $cpu (0..$maxcpu) {
s/(\d+)\s+//;
if($maxcpu>0 && !length($_)) { last; }
$ret[$cpu]->{$int}=$1;
}
chomp;
$ret[$maxcpu+1]->{$int} = $_;
}
else { print("interrupts: what's: $_"); }
}
close(I);
@ret;
}
sub diff_interrupts($$)
{
my ($pre, $post)=(@_);
my %okirq=();
my @output=();
my @activity=();
my $total=0;
foreach my $int (@interrupt_names) {
if($maxcpu>0 and !exists($pre->[1]->{$int})) { next; } # ERR or MIS, cpu-less
my $line=sprintf("%4s |", $int);
my $doit=0;
foreach my $cpu (0..$maxcpu) {
my $dt = $post->[$cpu]->{$int} - $pre->[$cpu]->{$int};
$line.=sprintf(" % 5u |", $dt);
$doit+=$dt;
}
if($doit) {
push(@output, "$line $pre->[$maxcpu+1]->{$int}\n");
push(@activity, $doit);
$total+=$doit;
}
}
print("Interrupt statistics\n");
print("====================\n");
my $h1=" INT |";
my $h2="-----+";
foreach my $cpu (0..$maxcpu) {
$h1.=" CPU#$cpu |";
$h2.="-------+";
}
print("$h1\n$h2\n");
my @indices = sort { $activity[$b]<=>$activity[$a]; } (0..$#activity);
foreach(@indices) { print($output[$_]); }
printf("Summary: %3.1f interrupts/s total\n", $total/$mtime);
print("\n");
}
# END interrupts
# BEGIN timerstats
sub get_timerstats()
{
open(T, ">/proc/timer_stats") or return -1;
print T "0\n";
close(T);
0;
}
if(get_timerstats()<0) {
$do_timerstats = 0;
print STDERR "timerstats disabled - no writeable /proc/timer_stats\n";
if($> != 0) { print STDERR " you need to be root!\n"; }
elsif($verbose) {
print STDERR " enable TIMER_STATS in kernel config\n";
}
}
sub get_timerstats_data($) # 1 then 0
{
my $stop=$_[0];
open(T, ">/proc/timer_stats") or die("timerstats: open() failed: $@");
print T (($stop?1:0),"\n");
close(T);
}
sub diff_timerstats()
{
my $ok=1;
my @output=();
my @activity=();
open(T, "</proc/timer_stats") or die("timerstats: open() failed: $@");
while(<T>) {
if(m/Timer Stats Version/) { next; }
if(m/Sample period: ([\d.]+)\s*s/) { if($1 eq "0.000") { $ok=0; last; } }
if(m/(\d+) total events/) { if($1 eq "0") { $ok=0; last; } }
if(m/\s+(\d+)(D?),\s+(\d+)\s(\S+)\s+(.*)$/) {
push(@output, sprintf(" % 5s | % 8s%1s| % 15s | %s\n",
$3, $1, $2, $4, $5));
push(@activity, $1+($3*10e-9)); # decimals keep tasks together ;-)
}
}
close(T);
if(!$ok) {
print("Timerstats was unable to gather any data from /proc/timer_stats\n\n");
return;
}
my @indices = sort { $activity[$b]<=>$activity[$a]; } (0..$#activity);
print("Timer statistics\n");
print("================\n");
print(" PID | Activity | task's comm | function\n");
print("-------+----------+-----------------+---------\n");
foreach(@indices) { print($output[$_]); }
print("\n");
}
# END timerstats
my %taskinfo=();
my $taskinfodone=0;
sub get_task_info($)
{
my $cmd=headstrip("/proc/$_[0]/cmdline");
if($cmd) {
$cmd=~tr/\0/ /;
}
$cmd;
}
sub get_tasks_info($) {
if(!$_[0] and $taskinfodone) { return; }
for my $p (glob("/proc/[0-9]*")) {
my ($pid)=($p=~m@/proc/(\d+)@);
$taskinfo{$pid}=get_task_info($pid);
}
}
# BEGIN context switch stats
sub get_ctxtsw_init()
{
get_tasks_info(0);
0;
}
if(get_ctxtsw_init() < 0) {
$do_ctxtsw = 0;
print STDERR "context_switches disabled - this is unexpected\n";
}
sub get_ctxtsw_data()
{
my %ret=();
for my $p (glob("/proc/[0-9]*")) {
open(T, "$p/status") or next;
my ($pid)=($p=~m@/proc/(\d+)@);
my @v=();
my $luserspace=0;
my ($name)=(<T>=~/Name:\s+(\S+)/);
while(<T>) {
if(m/^(Vm|State:\s+Z)/) { $luserspace=1; }
elsif(m/(\S*)voluntary_ctxt_switches:\s+(\d+)/) {
$v[length($1)?1:0] = $2;
}
}
if(!defined($taskinfo{$pid})) {
$taskinfo{$pid} = $luserspace ? get_task_info($pid) : "<$name>";
}
close(T);
$ret{$pid}=\@v;
}
%ret;
}
sub diff_ctxtsw($$)
{
my ($pre, $post)=@_;
print("Context switches per task\n",
"=========================\n");
print(" PID | vol'try | non-vol | Cmdline\n",
"-------+---------+---------+--------\n");
my @postpids=sort { $b<=>$a; } (keys(%$post));
foreach my $p (@postpids) {
my $prevals = $pre->{$p} || [0,0];
my $dv=$post->{$p}->[0] - $prevals->[0];
my $dn=$post->{$p}->[1] - $prevals->[1];
printf(" % 5u | % 7u | % 7u | %s\n", $p, $dv, $dn, $taskinfo{$p})
if($dv || $dn);
}
print("\n");
}
# END context switch stats
if(!$do_cpuidle && !$do_cpufreq && !$do_interrupts && !$do_timerstats && !$do_ctxtsw) {
print "No statistics can be gathered, aborting\n";
exit;
}
# Timing preparations
print("Sleeping for $sleep seconds before collecting data for $time seconds\n");
my ($timepre1, $timepre2, $timepost1, $timepost2);
my $timecali=&$xtime if($calibrate);
sleep($sleep);
$timepre1=&$xtime;
if($sleep and $calibrate) {
my $measured=$timepre1-$timecali;
my $ratio=$measured/$sleep;
if($ratio>1.01 or $ratio<0.99) { print("WARNING: No accurate timer found, relying on sleep itself.\n"); }
else { $calibrate=0; } # we trust this measure
}
# Now actually gather the data
my %ctxtsw_pre=get_ctxtsw_data() if($do_ctxtsw);
my @cpuidle_pre=get_cpuidle_data() if($do_cpuidle);
my @cpufreq_pre=get_cpufreq_data() if($do_cpufreq);
my @interrupts_pre=get_interrupt_data() if($do_interrupts);
get_timerstats_data(1) if($do_timerstats);
$timepre2=&$xtime;
sleep($time);
$timepost1=&$xtime;
my %ctxtsw_post=get_ctxtsw_data() if($do_ctxtsw);
my @cpuidle_post=get_cpuidle_data() if($do_cpuidle);
my @cpufreq_post=get_cpufreq_data() if($do_cpufreq);
my @interrupts_post=get_interrupt_data() if($do_interrupts);
get_timerstats_data(0) if($do_timerstats);
$timepost2=&$xtime;
# @$mtime is the measurement time, which will probably be
# longer than the time we requested.
if($calibrate) {
$mtime = $time;
printf("Presumably slept for %1.3fs\n", $mtime);
} else {
$mtime =($timepost1-$timepre2); # how long the sleep took
$mtime += (($timepre2-$timepre1)+($timepost2-$timepost1))/2
if($incompat); # plus the overhead of reading the values
printf("Actually slept for %1.3fs\n", $mtime);
}
diff_cpuidle(\@cpuidle_pre, \@cpuidle_post) if($do_cpuidle);
diff_cpufreq(\@cpufreq_pre, \@cpufreq_post) if($do_cpufreq);
diff_interrupts(\@interrupts_pre, \@interrupts_post) if($do_interrupts);
diff_timerstats() if($do_timerstats);
diff_ctxtsw(\%ctxtsw_pre, \%ctxtsw_post) if($do_ctxtsw);