测试数据分析

06. 测试数据分析

使用Pandas和Matplotlib进行数据分析和可视化

Pandas基础

数据读取

import pandas as pd

# 读取CSV
df = pd.read_csv('test_results.csv')

# 读取Excel
df = pd.read_excel('test_data.xlsx')

# 读取JSON
df = pd.read_json('api_results.json')

# 查看数据
print(df.head())  # 前5行
print(df.tail())  # 后5行
print(df.info())  # 数据信息
print(df.describe())  # 统计摘要

数据清洗

# 处理缺失值
df = df.dropna()  # 删除含缺失值的行
df = df.fillna(0)  # 填充缺失值

# 删除重复行
df = df.drop_duplicates()

# 数据类型转换
df['response_time'] = pd.to_numeric(df['response_time'])
df['timestamp'] = pd.to_datetime(df['timestamp'])

# 过滤数据
success_only = df[df['status'] == 'success']
slow_requests = df[df['response_time'] > 1000]

性能数据分析

分析测试结果

import pandas as pd
import matplotlib.pyplot as plt

class PerformanceAnalyzer:
    """性能测试数据分析器"""
    
    def __init__(self, csv_file):
        self.df = pd.read_csv(csv_file)
        self.df['timestamp'] = pd.to_datetime(self.df['timestamp'])
    
    def basic_stats(self):
        """基础统计"""
        print("=== 性能统计 ===")
        print(f"总请求数: {len(self.df)}")
        print(f"成功率: {(self.df['success']==True).mean()*100:.2f}%")
        print(f"\n响应时间统计:")
        print(self.df['response_time'].describe())
        
        # P95, P99
        p95 = self.df['response_time'].quantile(0.95)
        p99 = self.df['response_time'].quantile(0.99)
        print(f"P95: {p95:.2f}ms")
        print(f"P99: {p99:.2f}ms")
    
    def analyze_by_endpoint(self):
        """按接口分析"""
        grouped = self.df.groupby('endpoint').agg({
            'response_time': ['mean', 'median', 'min', 'max'],
            'success': 'mean'
        })
        print("\n=== 各接口性能 ===")
        print(grouped)
        return grouped
    
    def find_slow_requests(self, threshold=1000):
        """找出慢请求"""
        slow = self.df[self.df['response_time'] > threshold]
        print(f"\n=== 慢请求分析(>{threshold}ms) ===")
        print(f"慢请求数量: {len(slow)}")
        print(f"占比: {len(slow)/len(self.df)*100:.2f}%")
        print("\n慢请求详情:")
        print(slow[['endpoint', 'response_time', 'timestamp']])
        return slow

# 使用示例
analyzer = PerformanceAnalyzer('performance_results.csv')
analyzer.basic_stats()
analyzer.analyze_by_endpoint()
analyzer.find_slow_requests()

时间序列分析

def analyze_time_series(df):
    """时间序列分析"""
    # 按时间分组
    df['minute'] = df['timestamp'].dt.floor('1min')
    time_stats = df.groupby('minute').agg({
        'response_time': 'mean',
        'success': 'mean'
    })
    
    # 绘制趋势图
    fig, axes = plt.subplots(2, 1, figsize=(12, 8))
    
    # 响应时间趋势
    axes[0].plot(time_stats.index, time_stats['response_time'])
    axes[0].set_title('响应时间趋势')
    axes[0].set_ylabel('响应时间(ms)')
    axes[0].grid(True)
    
    # 成功率趋势
    axes[1].plot(time_stats.index, time_stats['success'] * 100)
    axes[1].set_title('成功率趋势')
    axes[1].set_ylabel('成功率(%)')
    axes[1].grid(True)
    
    plt.tight_layout()
    plt.savefig('time_series.png')
    print("时间序列图已保存")

数据可视化

Matplotlib基础

import matplotlib.pyplot as plt
import numpy as np

# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

# 基础图表
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# 1. 折线图
axes[0, 0].plot([1, 2, 3, 4], [10, 20, 15, 25])
axes[0, 0].set_title('响应时间趋势')
axes[0, 0].set_xlabel('时间')
axes[0, 0].set_ylabel('响应时间(ms)')

# 2. 柱状图
endpoints = ['登录', '查询', '更新', '删除']
times = [100, 80, 120, 90]
axes[0, 1].bar(endpoints, times)
axes[0, 1].set_title('各接口响应时间')

# 3. 饼图
labels = ['成功', '失败']
sizes = [95, 5]
axes[1, 0].pie(sizes, labels=labels, autopct='%1.1f%%')
axes[1, 0].set_title('请求成功率')

# 4. 直方图
data = np.random.normal(100, 20, 1000)
axes[1, 1].hist(data, bins=30)
axes[1, 1].set_title('响应时间分布')

plt.tight_layout()
plt.savefig('performance_dashboard.png')

性能仪表盘

class PerformanceDashboard:
    """性能数据可视化仪表盘"""
    
    def __init__(self, df):
        self.df = df
        plt.rcParams['font.sans-serif'] = ['SimHei']
    
    def create_dashboard(self, output_file='dashboard.png'):
        """创建完整仪表盘"""
        fig = plt.figure(figsize=(16, 12))
        gs = fig.add_gridspec(3, 3, hspace=0.3, wspace=0.3)
        
        # 1. 响应时间分布
        ax1 = fig.add_subplot(gs[0, :2])
        self.plot_response_time_distribution(ax1)
        
        # 2. 成功率饼图
        ax2 = fig.add_subplot(gs[0, 2])
        self.plot_success_rate_pie(ax2)
        
        # 3. 时间趋势
        ax3 = fig.add_subplot(gs[1, :])
        self.plot_time_trend(ax3)
        
        # 4. 各API对比
        ax4 = fig.add_subplot(gs[2, :2])
        self.plot_endpoint_comparison(ax4)
        
        # 5. Top慢请求
        ax5 = fig.add_subplot(gs[2, 2])
        self.plot_slow_requests(ax5)
        
        plt.savefig(output_file, dpi=150, bbox_inches='tight')
        print(f"仪表盘已保存: {output_file}")
    
    def plot_response_time_distribution(self, ax):
        """响应时间分布直方图"""
        ax.hist(self.df['response_time'], bins=50, edgecolor='black')
        ax.set_title('响应时间分布', fontsize=14, fontweight='bold')
        ax.set_xlabel('响应时间(ms)')
        ax.set_ylabel('请求数量')
        ax.axvline(self.df['response_time'].mean(), 
                   color='red', linestyle='--', label='平均值')
        ax.legend()
    
    def plot_success_rate_pie(self, ax):
        """成功率饼图"""
        success_count = (self.df['success'] == True).sum()
        fail_count = len(self.df) - success_count
        
        ax.pie([success_count, fail_count], 
               labels=['成功', '失败'],
               autopct='%1.1f%%',
               colors=['#2ecc71', '#e74c3c'])
        ax.set_title('请求成功率', fontsize=14, fontweight='bold')
    
    def plot_time_trend(self, ax):
        """时间趋势图"""
        self.df['minute'] = pd.to_datetime(self.df['timestamp']).dt.floor('1min')
        trend = self.df.groupby('minute')['response_time'].mean()
        
        ax.plot(trend.index, trend.values, linewidth=2)
        ax.fill_between(trend.index, trend.values, alpha=0.3)
        ax.set_title('响应时间趋势', fontsize=14, fontweight='bold')
        ax.set_xlabel('时间')
        ax.set_ylabel('平均响应时间(ms)')
        ax.grid(True, alpha=0.3)
    
    def plot_endpoint_comparison(self, ax):
        """各API性能对比"""
        grouped = self.df.groupby('endpoint')['response_time'].agg(['mean', 'median'])
        
        x = np.arange(len(grouped))
        width = 0.35
        
        ax.bar(x - width/2, grouped['mean'], width, label='平均值')
        ax.bar(x + width/2, grouped['median'], width, label='中位数')
        
        ax.set_title('各API响应时间对比', fontsize=14, fontweight='bold')
        ax.set_xlabel('API接口')
        ax.set_ylabel('响应时间(ms)')
        ax.set_xticks(x)
        ax.set_xticklabels(grouped.index, rotation=45, ha='right')
        ax.legend()
    
    def plot_slow_requests(self, ax):
        """慢请求TOP10"""
        slow = self.df.nlargest(10, 'response_time')
        
        ax.barh(range(len(slow)), slow['response_time'])
        ax.set_yticks(range(len(slow)))
        ax.set_yticklabels(slow['endpoint'], fontsize=8)
        ax.set_title('TOP10慢请求', fontsize=14, fontweight='bold')
        ax.set_xlabel('响应时间(ms)')
        ax.invert_yaxis()

# 使用示例
df = pd.read_csv('performance_results.csv')
dashboard = PerformanceDashboard(df)
dashboard.create_dashboard()

测试报告生成

HTML报告生成器

import pandas as pd
from jinja2 import Template

class TestReportGenerator:
    """测试报告生成器"""
    
    HTML_TEMPLATE = """
    <!DOCTYPE html>
    <html>
    <head>
        <meta charset="UTF-8">
        <title>性能测试报告</title>
        <style>
            body { font-family: Arial, sans-serif; margin: 40px; }
            h1 { color: #2c3e50; }
            h2 { color: #34495e; border-bottom: 2px solid #3498db; padding-bottom: 10px; }
            table { border-collapse: collapse; width: 100%; margin: 20px 0; }
            th, td { border: 1px solid #ddd; padding: 12px; text-align: left; }
            th { background-color: #3498db; color: white; }
            tr:nth-child(even) { background-color: #f2f2f2; }
            .success { color: #27ae60; font-weight: bold; }
            .fail { color: #e74c3c; font-weight: bold; }
            .chart { margin: 20px 0; text-align: center; }
            .summary { background: #ecf0f1; padding: 20px; border-radius: 5px; margin: 20px 0; }
        </style>
    </head>
    <body>
        <h1></h1>
        <p>测试时间: </p>
        
        <div class="summary">
            <h2>测试摘要</h2>
            <p>总请求数: </p>
            <p>成功率: <span class="success">%</span></p>
            <p>平均响应时间: ms</p>
            <p>P95响应时间: ms</p>
        </div>
        
        <h2>详细统计</h2>
        
        
        <h2>性能图表</h2>
        <div class="chart">
            <img src="dashboard.png" style="max-width: 100%;">
        </div>
        
        <h2>慢请求列表</h2>
        
    </body>
    </html>
    """
    
    def __init__(self, df):
        self.df = df
    
    def generate(self, output_file='test_report.html'):
        """生成报告"""
        from datetime import datetime
        
        # 计算统计数据
        total_requests = len(self.df)
        success_rate = (self.df['success'] == True).mean() * 100
        avg_response_time = self.df['response_time'].mean()
        p95_response_time = self.df['response_time'].quantile(0.95)
        
        # 按API统计
        stats = self.df.groupby('endpoint').agg({
            'response_time': ['mean', 'median', 'max'],
            'success': lambda x: (x == True).mean() * 100
        }).round(2)
        
        # 慢请求
        slow_requests = self.df.nlargest(20, 'response_time')
        
        # 渲染模板
        template = Template(self.HTML_TEMPLATE)
        html = template.render(
            title='游戏性能测试报告',
            test_time=datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
            total_requests=total_requests,
            success_rate=f'{success_rate:.2f}',
            avg_response_time=f'{avg_response_time:.2f}',
            p95_response_time=f'{p95_response_time:.2f}',
            stats_table=stats.to_html(classes='stats-table'),
            slow_requests_table=slow_requests[['endpoint', 'response_time', 'timestamp']].to_html()
        )
        
        with open(output_file, 'w', encoding='utf-8') as f:
            f.write(html)
        
        print(f"报告已生成: {output_file}")

实战案例

游戏登录性能分析

import pandas as pd
import matplotlib.pyplot as plt

# 读取测试数据
df = pd.read_csv('login_test_results.csv')
df['timestamp'] = pd.to_datetime(df['timestamp'])

# 基础统计
print("=== 登录性能分析 ===")
print(f"总登录次数: {len(df)}")
print(f"成功登录: {(df['success']==True).sum()}")
print(f"成功率: {(df['success']==True).mean()*100:.2f}%")

# 响应时间分析
print(f"\n响应时间统计:")
print(df['response_time'].describe())

# 找出慢登录
slow_logins = df[df['response_time'] > 2000]
print(f"\n慢登录(>2s): {len(slow_logins)}次")

# 按时段分析
df['hour'] = df['timestamp'].dt.hour
hourly_stats = df.groupby('hour').agg({
    'response_time': 'mean',
    'success': lambda x: (x==True).mean()
})

# 可视化
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# 响应时间分布
axes[0].hist(df['response_time'], bins=50)
axes[0].set_title('登录响应时间分布')
axes[0].set_xlabel('响应时间(ms)')
axes[0].set_ylabel('次数')

# 按小时统计
axes[1].plot(hourly_stats.index, hourly_stats['response_time'])
axes[1].set_title('各时段平均登录时间')
axes[1].set_xlabel('小时')
axes[1].set_ylabel('平均响应时间(ms)')
axes[1].grid(True)

plt.tight_layout()
plt.savefig('login_analysis.png')

数据对比

AB测试对比

def compare_ab_tests(file_a, file_b):
    """对比AB测试结果"""
    df_a = pd.read_csv(file_a)
    df_b = pd.read_csv(file_b)
    
    print("=== AB测试对比 ===")
    print(f"\n版本A:")
    print(f"  平均响应时间: {df_a['response_time'].mean():.2f}ms")
    print(f"  P95: {df_a['response_time'].quantile(0.95):.2f}ms")
    print(f"  成功率: {(df_a['success']==True).mean()*100:.2f}%")
    
    print(f"\n版本B:")
    print(f"  平均响应时间: {df_b['response_time'].mean():.2f}ms")
    print(f"  P95: {df_b['response_time'].quantile(0.95):.2f}ms")
    print(f"  成功率: {(df_b['success']==True).mean()*100:.2f}%")
    
    # 可视化对比
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    # 响应时间对比
    axes[0].hist(df_a['response_time'], alpha=0.5, label='版本A', bins=30)
    axes[0].hist(df_b['response_time'], alpha=0.5, label='版本B', bins=30)
    axes[0].set_title('响应时间分布对比')
    axes[0].legend()
    
    # 箱型图对比
    axes[1].boxplot([df_a['response_time'], df_b['response_time']], 
                    labels=['版本A', '版本B'])
    axes[1].set_title('响应时间箱型图')
    axes[1].set_ylabel('响应时间(ms)')
    
    plt.tight_layout()
    plt.savefig('ab_comparison.png')

下一步

继续学习:

目录

06. 测试数据分析

Pandas基础

数据读取

数据清洗

性能数据分析

分析测试结果

时间序列分析

数据可视化

Matplotlib基础

性能仪表盘

测试报告生成

HTML报告生成器

实战案例

游戏登录性能分析

数据对比

AB测试对比

下一步