当前位置: 首页 > article >正文

15分钟学 Go 实战项目六 :统计分析工具项目(30000字完整例子)

统计分析工具项目

1. 项目概述

功能模块说明难度
数据收集CSV文件读取和解析★★☆☆☆
数据分析基本统计和高级分析★★★☆☆
可视化生成图表和报告★★★★☆
导出功能支持多种格式导出★★☆☆☆
Web界面交互式数据分析★★★★☆

2. 项目架构

在这里插入图片描述

3. 核心代码实现

3.1 数据模型定义

// models/dataset.go
package models

import (
    "time"
)

// DataSet 数据集结构
type DataSet struct {
    Name        string
    Description string
    Columns     []Column
    Rows        []Row
    CreatedAt   time.Time
    UpdatedAt   time.Time
}

// Column 列定义
type Column struct {
    Name     string
    Type     string      // string, int, float, date等
    Stats    Statistics
}

// Row 数据行
type Row struct {
    Values []interface{}
}

// Statistics 统计信息
type Statistics struct {
    Count       int
    Mean        float64
    Median      float64
    Mode        float64
    StdDev      float64
    Min         float64
    Max         float64
    Percentiles map[int]float64
}

// AnalysisResult 分析结果
type AnalysisResult struct {
    DataSetName    string
    ColumnStats    map[string]Statistics
    Correlations   map[string]map[string]float64
    Trends         map[string][]float64
    GeneratedAt    time.Time
}

// ChartConfig 图表配置
type ChartConfig struct {
    Type      string // line, bar, scatter等
    Title     string
    XAxis     string
    YAxis     string
    Data      map[string]interface{}
    Options   map[string]interface{}
}

// ReportTemplate 报告模板
type ReportTemplate struct {
    Name        string
    Content     string
    Charts      []ChartConfig
    CreatedAt   time.Time
}

3.2 数据处理核心功能

// services/processor.go
package services

import (
    "encoding/csv"
    "math"
    "os"
    "sort"
    "strconv"

    "github.com/your/stats/models"
)

// DataProcessor 数据处理器
type DataProcessor struct {
    dataset *models.DataSet
}

// NewDataProcessor 创建数据处理器
func NewDataProcessor() *DataProcessor {
    return &DataProcessor{}
}

// LoadCSV 加载CSV文件
func (p *DataProcessor) LoadCSV(filename string) error {
    file, err := os.Open(filename)
    if err != nil {
        return err
    }
    defer file.Close()

    reader := csv.NewReader(file)
    
    // 读取表头
    headers, err := reader.Read()
    if err != nil {
        return err
    }

    // 初始化列
    columns := make([]models.Column, len(headers))
    for i, header := range headers {
        columns[i] = models.Column{
            Name: header,
            Type: "string", // 初始类型设为string,后续推断实际类型
        }
    }

    // 读取数据行
    var rows []models.Row
    for {
        record, err := reader.Read()
        if err != nil {
            break
        }

        values := make([]interface{}, len(record))
        for i, v := range record {
            values[i] = v
        }
        rows = append(rows, models.Row{Values: values})
    }

    p.dataset = &models.DataSet{
        Name:    filename,
        Columns: columns,
        Rows:    rows,
    }

    return p.inferDataTypes()
}

// 推断数据类型
func (p *DataProcessor) inferDataTypes() error {
    for colIndex := range p.dataset.Columns {
        isNumeric := true
        isDate := true

        for _, row := range p.dataset.Rows {
            value := row.Values[colIndex].(string)
            
            // 尝试解析为数字
            _, err := strconv.ParseFloat(value, 64)
            if err != nil {
                isNumeric = false
            }

            // 尝试解析为日期
            _, err = time.Parse("2006-01-02", value)
            if err != nil {
                isDate = false
            }
        }

        if isNumeric {
            p.dataset.Columns[colIndex].Type = "float"
        } else if isDate {
            p.dataset.Columns[colIndex].Type = "date"
        } else {
            p.dataset.Columns[colIndex].Type = "string"
        }
    }

    return nil
}

// 计算基本统计信息
func (p *DataProcessor) CalculateStatistics() error {
    for colIndex, col := range p.dataset.Columns {
        if col.Type != "float" {
            continue
        }

        values := make([]float64, 0, len(p.dataset.Rows))
        for _, row := range p.dataset.Rows {
            if v, err := strconv.ParseFloat(row.Values[colIndex].(string), 64); err == nil {
                values = append(values, v)
            }
        }

        stats := models.Statistics{
            Count:       len(values),
            Mean:       p.calculateMean(values),
            Median:     p.calculateMedian(values),
            Mode:       p.calculateMode(values),
            StdDev:     p.calculateStdDev(values),
            Min:        p.calculateMin(values),
            Max:        p.calculateMax(values),
            Percentiles: p.calculatePercentiles(values),
        }

        p.dataset.Columns[colIndex].Stats = stats
    }

    return nil
}

// 计算均值
func (p *DataProcessor) calculateMean(values []float64) float64 {
    if len(values) == 0 {
        return 0
    }

    sum := 0.0
    for _, v := range values {
        sum += v
    }
    return sum / float64(len(values))
}

// 计算中位数
func (p *DataProcessor) calculateMedian(values []float64) float64 {
    if len(values) == 0 {
        return 0
    }

    sorted := make([]float64, len(values))
    copy(sorted, values)
    sort.Float64s(sorted)

    if len(sorted)%2 == 0 {
        return (sorted[len(sorted)/2-1] + sorted[len(sorted)/2]) / 2
    }
    return sorted[len(sorted)/2]
}

// 计算标准差
func (p *DataProcessor) calculateStdDev(values []float64) float64 {
    if len(values) == 0 {
        return 0
    }

    mean := p.calculateMean(values)
    sum := 0.0
    for _, v := range values {
        sum += math.Pow(v-mean, 2)
    }
    return math.Sqrt(sum / float64(len(values)))
}

// 计算百分位数
func (p *DataProcessor) calculatePercentiles(values []float64) map[int]float64 {
    percentiles := make(map[int]float64)
    sorted := make([]float64, len(values))
    copy(sorted, values)
    sort.Float64s(sorted)

    for _, p := range []int{25, 50, 75, 90, 95, 99} {
        index := int(float64(p)/100 * float64(len(sorted)-1))
        percentiles[p] = sorted[index]
    }

    return percentiles
}

3.3 分析功能实现

// services/analyzer.go
package services

import (
    "math"
    "sort"
    "time"
    
    "github.com/your/stats/models"
)

// Analyzer 数据分析器
type Analyzer struct {
    dataset *models.DataSet
}

// NewAnalyzer 创建分析器实例
func NewAnalyzer(dataset *models.DataSet) *Analyzer {
    return &Analyzer{dataset: dataset}
}

// CalculateCorrelation 计算相关性
func (a *Analyzer) CalculateCorrelation(col1, col2 string) (float64, error) {
    values1, values2, err := a.getNumericColumns(col1, col2)
    if err != nil {
        return 0, err
    }

    // 计算Pearson相关系数
    mean1 := mean(values1)
    mean2 := mean(values2)

    var sum, sum1, sum2 float64
    for i := range values1 {
        diff1 := values1[i] - mean1
        diff2 := values2[i] - mean2
        sum += diff1 * diff2
        sum1 += diff1 * diff1
        sum2 += diff2 * diff2
    }

    return sum / math.Sqrt(sum1*sum2), nil
}

// AnalyzeTrends 分析趋势
func (a *Analyzer) AnalyzeTrends(timeCol, valueCol string) ([]models.TrendPoint, error) {
    dates, values, err := a.getTimeSeriesData(timeCol, valueCol)
    if err != nil {
        return nil, err
    }

    // 按时间排序
    type timeValue struct {
        time  time.Time
        value float64
    }
    
    combined := make([]timeValue, len(dates))
    for i := range dates {
        combined[i] = timeValue{dates[i], values[i]}
    }
    
    sort.Slice(combined, func(i, j int) bool {
        return combined[i].time.Before(combined[j].time)
    })

    // 计算移动平均
    windowSize := 5
    trends := make([]models.TrendPoint, 0)
    
    for i := windowSize - 1; i < len(combined); i++ {
        sum := 0.0
        for j := 0; j < windowSize; j++ {
            sum += combined[i-j].value
        }
        avg := sum / float64(windowSize)
        
        trends = append(trends, models.TrendPoint{
            Time:  combined[i].time,
            Value: combined[i].value,
            Trend: avg,
        })
    }

    return trends, nil
}

// CalculateDistribution 计算数据分布
func (a *Analyzer) CalculateDistribution(column string) (*models.Distribution, error) {
    values, err := a.getColumnValues(column)
    if err != nil {
        return nil, err
    }

    // 计算数据范围
    min, max := minMax(values)
    binCount := int(math.Sqrt(float64(len(values)))) // 使用平方根规则确定箱数
    
    // 创建直方图
    binSize := (max - min) / float64(binCount)
    bins := make([]models.HistogramBin, binCount)
    
    for i := range bins {
        bins[i] = models.HistogramBin{
            Start: min + float64(i)*binSize,
            End:   min + float64(i+1)*binSize,
            Count: 0,
        }
    }

    // 统计每个箱子中的数据点数量
    for _, v := range values {
        binIndex := int((v - min) / binSize)
        if binIndex >= binCount {
            binIndex = binCount - 1
        }
        bins[binIndex].Count++
    }

    // 计算分位数
    sorted := make([]float64, len(values))
    copy(sorted, values)
    sort.Float64s(sorted)

    distribution := &models.Distribution{
        Bins:       bins,
        Mean:       mean(values),
        Median:     median(sorted),
        StdDev:     stdDev(values),
        Quantiles:  make(map[float64]float64),
        Skewness:   skewness(values),
        Kurtosis:   kurtosis(values),
    }

    // 计算四分位数
    for _, q := range []float64{0.25, 0.5, 0.75} {
        idx := int(float64(len(sorted)-1) * q)
        distribution.Quantiles[q] = sorted[idx]
    }

    return distribution, nil
}

// PerformOutlierAnalysis 进行异常值分析
func (a *Analyzer) PerformOutlierAnalysis(column string) (*models.OutlierAnalysis, error) {
    values, err := a.getColumnValues(column)
    if err != nil {
        return nil, err
    }

    // 计算四分位距
    sorted := make([]float64, len(values))
    copy(sorted, values)
    sort.Float64s(sorted)
    
    q1 := sorted[int(float64(len(sorted))*0.25)]
    q3 := sorted[int(float64(len(sorted))*0.75)]
    iqr := q3 - q1
    
    lowerBound := q1 - 1.5*iqr
    upperBound := q3 + 1.5*iqr

    // 识别异常值
    outliers := make([]models.Outlier, 0)
    for i, v := range values {
        if v < lowerBound || v > upperBound {
            outliers = append(outliers, models.Outlier{
                Value:    v,
                Index:    i,
                ZScore:   (v - mean(values)) / stdDev(values),
                IQRScore: (v - q1) / iqr,
            })
        }
    }

    return &models.OutlierAnalysis{
        Q1:          q1,
        Q3:          q3,
        IQR:         iqr,
        LowerBound:  lowerBound,
        UpperBound:  upperBound,
        OutlierCount: len(outliers),
        Outliers:    outliers,
    }, nil
}

// 辅助函数
func (a *Analyzer) getNumericColumns(col1, col2 string) ([]float64, []float64, error) {
    // 实现获取数值列的逻辑
    return nil, nil, nil
}

func (a *Analyzer) getTimeSeriesData(timeCol, valueCol string) ([]time.Time, []float64, error) {
    // 实现获取时间序列数据的逻辑
    return nil, nil, nil
}

func (a *Analyzer) getColumnValues(column string) ([]float64, error) {
    // 实现获取列值的逻辑
    return nil, nil
}

3.4 报告生成功能

// services/report.go
package services

import (
    "bytes"
    "encoding/json"
    "html/template"
    "time"
    
    "github.com/jung-kurt/gofpdf"
    "github.com/xuri/excelize/v2"
    "github.com/your/stats/models"
)

// ReportGenerator 报告生成器
type ReportGenerator struct {
    dataset *models.DataSet
    analysis *models.AnalysisResult
}

// NewReportGenerator 创建报告生成器实例
func NewReportGenerator(dataset *models.DataSet, analysis *models.AnalysisResult) *ReportGenerator {
    return &ReportGenerator{
        dataset: dataset,
        analysis: analysis,
    }
}

// GenerateHTMLReport 生成HTML格式报告
func (r *ReportGenerator) GenerateHTMLReport() (string, error) {
    const reportTemplate = `
    <!DOCTYPE html>
    <html>
    <head>
        <title>数据分析报告</title>
        <style>
            body { font-family: Arial, sans-serif; }
            .header { text-align: center; margin: 20px 0; }
            .section { margin: 20px 0; }
            .table { width: 100%; border-collapse: collapse; }
            .table th, .table td { border: 1px solid #ddd; padding: 8px; }
            .chart { margin: 20px 0; }
        </style>
        <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
    </head>
    <body>
        <div class="header">
            <h1>数据分析报告</h1>
            <p>生成时间: {{.GeneratedAt}}</p>
        </div>

        <div class="section">
            <h2>数据集概览</h2>
            <p>数据集名称: {{.DataSetName}}</p>
            <p>记录数: {{.RowCount}}</p>
            <p>列数: {{.ColumnCount}}</p>
        </div>

        <div class="section">
            <h2>统计摘要</h2>
            <table class="table">
                <tr>
                    <th>列名</th>
                    <th>类型</th>
                    <th>均值</th>
                    <th>中位数</th>
                    <th>标准差</th>
                </tr>
                {{range .Columns}}
                <tr>
                    <td>{{.Name}}</td>
                    <td>{{.Type}}</td>
                    <td>{{printf "%.2f" .Stats.Mean}}</td>
                    <td>{{printf "%.2f" .Stats.Median}}</td>
                    <td>{{printf "%.2f" .Stats.StdDev}}</td>
                </tr>
                {{end}}
            </table>
        </div>

        <div class="section">
            <h2>相关性分析</h2>
            <div id="correlationHeatmap" class="chart"></div>
        </div>

        <div class="section">
            <h2>趋势分析</h2>
            <div id="trendChart" class="chart"></div>
        </div>

        <script>
            // 绘制相关性热图
            var correlationData = {{.CorrelationData}};
            Plotly.newPlot('correlationHeatmap', [{
                z: correlationData.values,
                x: correlationData.columns,
                y: correlationData.columns,
                type: 'heatmap',
                colorscale: 'Viridis'
            }]);

            // 绘制趋势图
            var trendData = {{.TrendData}};
            Plotly.newPlot('trendChart', [{
                x: trendData.dates,
                y: trendData.values,
                type: 'scatter',
                mode: 'lines+markers',
                name: '实际值'
            }, {
                x: trendData.dates,
                y: trendData.trend,
                type: 'scatter',
                mode: 'lines',
                name: '趋势'
            }]);
        </script>
    </body>
    </html>
    `

    tmpl, err := template.New("report").Parse(reportTemplate)
    if err != nil {
        return "", err
    }

    data := struct {
        GeneratedAt     string
        DataSetName     string
        RowCount        int
        ColumnCount     int
        Columns         []models.Column
        CorrelationData map[string]interface{}
        TrendData       map[string]interface{}
    }{
        GeneratedAt: time.Now().Format("2006-01-02 15:04:05"),
        DataSetName: r.dataset.Name,
        RowCount:    len(r.dataset.Rows),
        ColumnCount: len(r.dataset.Columns),
        Columns:     r.dataset.Columns,
        CorrelationData: r.prepareCorrelationData(),
        TrendData:       r.prepareTrendData(),
    }

    var buf bytes.Buffer
    if err := tmpl.Execute(&buf, data); err != nil {
        return "", err
    }

    return buf.String(), nil
}

// GeneratePDFReport 生成PDF格式报告
func (r *ReportGenerator) GeneratePDFReport() (*gofpdf.Fpdf, error) {
    pdf := gofpdf.New("P", "mm", "A4", "")
    pdf.AddPage()

    // 设置标题
    pdf.SetFont("Arial", "B", 16)
    pdf.Cell(190, 10, "数据分析报告")
    pdf.Ln(15)

    // 添加基本信息
    pdf.SetFont("Arial", "", 12)
    pdf.Cell(190, 8, "数据集: "+r.dataset.Name)
    pdf.Ln(10)
    pdf.Cell(190, 8, "生成时间: "+time.Now().Format("2006-01-02 15:04:05"))
    pdf.Ln(15)

    // 添加统计摘要表格
    pdf.SetFont("Arial", "B", 12)
    pdf.Cell(190, 10, "统计摘要")
    pdf.Ln(10)

    // 表格头部
    headers := []string{"列名", "类型", "均值", "中位数", "标准差"}
    for _, header := range headers {
        pdf.Cell(38, 10, header)
    }
    pdf.Ln(10)

    // 表格内容
    pdf.SetFont("Arial", "", 10)
    for _, col := range r.dataset.Columns {
        pdf.Cell(38, 8, col.Name)
        pdf.Cell(38, 8, col.Type)
        pdf.Cell(38, 8, fmt.Sprintf("%.2f", col.Stats.Mean))
        pdf.Cell(38, 8, fmt.Sprintf("%.2f", col.Stats.Median))
        pdf.Cell(38, 8, fmt.Sprintf("%.2f", col.Stats.StdDev))
        pdf.Ln(8)
    }

    return pdf, nil
}

// GenerateExcelReport 生成Excel格式报告
func (r *ReportGenerator) GenerateExcelReport() (*excelize.File, error) {
    f := excelize.NewFile()

    // 创建概览sheet
    overview := "概览"
    f.NewSheet(overview)
    f.SetCellValue(overview, "A1", "数据分析报告")
    f.SetCellValue(overview, "A2", "数据集名称")
    f.SetCellValue(overview, "B2", r.dataset.Name)
    f.SetCellValue(overview, "A3", "生成时间")
    f.SetCellValue(overview, "B3", time.Now().Format("2006-01-02 15:04:05"))

    // 创建统计摘要sheet
    summary := "统计摘要"
    f.NewSheet(summary)
    headers := []string{"列名", "类型", "均值", "中位数", "标准差", "最小值", "最大值"}
    for i, header := range headers {
        col := string(rune('A' + i))
        f.SetCellValue(summary, col+"1", header)
    }

    for i, col := range r.dataset.Columns {
        row := i + 2
        f.SetCellValue(summary, fmt.Sprintf("A%d", row), col.Name)
        f.SetCellValue(summary, fmt.Sprintf("B%d", row), col.Type)
        f.SetCellValue(summary, fmt.Sprintf("C%d", row), col.Stats.Mean)
        f.SetCellValue(summary, fmt.Sprintf("D%d", row), col.Stats.Median)
        f.SetCellValue(summary, fmt.Sprintf("E%d", row), col.Stats.StdDev)
        f.SetCellValue(summary, fmt.Sprintf("F%d", row), col.Stats.Min)
        f.SetCellValue(summary, fmt.Sprintf("G%d", row), col.Stats.Max)
    }

    return f, nil
}

// 准备相关性数据
func (r *ReportGenerator) prepareCorrelationData() map[string]interface{} {
    // 实现相关性数据准备逻辑
    return nil
}

// 准备趋势数据
func (r *ReportGenerator) prepareTrendData() map[string]interface{} {
    // 实现趋势数据准备逻辑
    return nil
}

3.5 Web界面实现

// handlers/web.go
package handlers

import (
    "encoding/json"
    "net/http"
    "path/filepath"
    
    "github.com/gin-gonic/gin"
    "github.com/your/stats/services"
    "github.com/your/stats/models"
)

// WebHandler Web处理器
type WebHandler struct {
    processor *services.DataProcessor
    analyzer  *services.Analyzer
    reporter  *services.ReportGenerator
}

// NewWebHandler 创建Web处理器实例
func NewWebHandler() *WebHandler {
    return &WebHandler{
        processor: services.NewDataProcessor(),
    }
}

// SetupRoutes 设置路由
func (h *WebHandler) SetupRoutes(r *gin.Engine) {
    // 静态文件
    r.Static("/static", "./static")
    r.LoadHTMLGlob("templates/*")

    // 页面路由
    r.GET("/", h.handleHome)
    r.GET("/upload", h.handleUploadPage)
    r.GET("/analyze", h.handleAnalyzePage)
    r.GET("/report", h.handleReportPage)

    // API路由
    api := r.Group("/api")
    {
        api.POST("/upload", h.handleFileUpload)
        api.GET("/columns", h.handleGetColumns)
        api.POST("/analyze", h.handleAnalyze)
        api.GET("/stats/:column", h.handleColumnStats)
        api.POST("/report", h.handleGenerateReport)
        api.GET("/download/:format", h.handleDownloadReport)
    }
}

// handleHome 处理首页请求
func (h *WebHandler) handleHome(c *gin.Context) {
    c.HTML(http.StatusOK, "index.html", gin.H{
        "title": "统计分析工具",
    })
}

// handleFileUpload 处理文件上传
func (h *WebHandler) handleFileUpload(c *gin.Context) {
    file, err := c.FormFile("file")
    if err != nil {
        c.JSON(http.StatusBadRequest, gin.H{"error": "文件上传失败"})
        return
    }

    // 检查文件类型
    ext := filepath.Ext(file.Filename)
    if ext != ".csv" {
        c.JSON(http.StatusBadRequest, gin.H{"error": "仅支持CSV文件"})
        return
    }

    // 保存文件
    filename := filepath.Join("uploads", file.Filename)
    if err := c.SaveUploadedFile(file, filename); err != nil {
        c.JSON(http.StatusInternalServerError, gin.H{"error": "文件保存失败"})
        return
    }

    // 加载并处理文件
    if err := h.processor.LoadCSV(filename); err != nil {
        c.JSON(http.StatusInternalServerError, gin.H{"error": "文件处理失败"})
        return
    }

    // 初始化分析器
    h.analyzer = services.NewAnalyzer(h.processor.GetDataSet())

    c.JSON(http.StatusOK, gin.H{
        "message": "文件上传成功",
        "columns": h.processor.GetDataSet().Columns,
    })
}

// handleAnalyze 处理分析请求
func (h *WebHandler) handleAnalyze(c *gin.Context) {
    var req struct {
        Columns []string `json:"columns"`
        Types   []string `json:"types"`
    }

    if err := c.ShouldBindJSON(&req); err != nil {
        c.JSON(http.StatusBadRequest, gin.H{"error": "无效的请求参数"})
        return
    }

    // 执行分析
    results := make(map[string]interface{})
    
    for i, col := range req.Columns {
        switch req.Types[i] {
        case "distribution":
            dist, err := h.analyzer.CalculateDistribution(col)
            if err != nil {
                continue
            }
            results[col+"_distribution"] = dist

        case "outliers":
            outliers, err := h.analyzer.PerformOutlierAnalysis(col)
            if err != nil {
                continue
            }
            results[col+"_outliers"] = outliers

        case "trend":
            trends, err := h.analyzer.AnalyzeTrends("date", col)
            if err != nil {
                continue
            }
            results[col+"_trend"] = trends
        }
    }

    c.JSON(http.StatusOK, results)
}

// handleGenerateReport 处理报告生成请求
func (h *WebHandler) handleGenerateReport(c *gin.Context) {
    var req struct {
        Format string   `json:"format"`
        Charts []string `json:"charts"`
    }

    if err := c.ShouldBindJSON(&req); err != nil {
        c.JSON(http.StatusBadRequest, gin.H{"error": "无效的请求参数"})
        return
    }

    // 初始化报告生成器
    h.reporter = services.NewReportGenerator(
        h.processor.GetDataSet(),
        h.analyzer.GetAnalysisResult(),
    )

    var result interface{}
    var err error

    switch req.Format {
    case "html":
        result, err = h.reporter.GenerateHTMLReport()
    case "pdf":
        result, err = h.reporter.GeneratePDFReport()
    case "excel":
        result, err = h.reporter.GenerateExcelReport()
    default:
        c.JSON(http.StatusBadRequest, gin.H{"error": "不支持的报告格式"})
        return
    }

    if err != nil {
        c.JSON(http.StatusInternalServerError, gin.H{"error": "报告生成失败"})
        return
    }

    c.JSON(http.StatusOK, gin.H{
        "message": "报告生成成功",
        "result":  result,
    })
}

// handleColumnStats 处理获取列统计信息请求
func (h *WebHandler) handleColumnStats(c *gin.Context) {
    column := c.Param("column")
    stats, err := h.analyzer.GetColumnStats(column)
    if err != nil {
        c.JSON(http.StatusInternalServerError, gin.H{"error": "统计信息获取失败"})
        return
    }

    c.JSON(http.StatusOK, stats)
}

3.6 前端界面实现

// templates/index.html
<!DOCTYPE html>
<html>
<head>
    <title>统计分析工具</title>
    <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2.19/dist/tailwind.min.css" rel="stylesheet">
    <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/axios/dist/axios.min.js"></script>
</head>
<body class="bg-gray-100">
    <div class="container mx-auto px-4 py-8">
        <!-- 头部 -->
        <header class="bg-white shadow rounded-lg p-6 mb-8">
            <h1 class="text-3xl font-bold text-gray-800">统计分析工具</h1>
            <p class="text-gray-600 mt-2">上传数据文件,进行分析并生成报告</p>
        </header>

        <!-- 文件上传区域 -->
        <div class="bg-white shadow rounded-lg p-6 mb-8">
            <h2 class="text-xl font-semibold mb-4">数据文件上传</h2>
            <div class="border-dashed border-2 border-gray-300 rounded-lg p-6 text-center">
                <input type="file" id="fileInput" class="hidden" accept=".csv">
                <label for="fileInput" class="cursor-pointer">
                    <div class="text-gray-600">
                        <p>点击或拖拽文件到此处</p>
                        <p class="text-sm mt-1">支持 CSV 格式文件</p>
                    </div>
                </label>
            </div>
        </div>

        <!-- 数据分析区域 -->
        <div class="bg-white shadow rounded-lg p-6 mb-8" id="analysisSection" style="display: none;">
            <h2 class="text-xl font-semibold mb-4">数据分析</h2>
            
            <!-- 列选择 -->
            <div class="mb-6">
                <h3 class="font-medium mb-2">选择要分析的列</h3>
                <div id="columnSelect" class="grid grid-cols-3 gap-4">
                    <!-- 列选择项将通过JavaScript动态添加 -->
                </div>
            </div>

            <!-- 分析类型选择 -->
            <div class="mb-6">
                <h3 class="font-medium mb-2">选择分析类型</h3>
                <div class="grid grid-cols-3 gap-4">
                    <label class="flex items-center space-x-2">
                        <input type="checkbox" class="form-checkbox" value="distribution">
                        <span>分布分析</span>
                    </label>
                    <label class="flex items-center space-x-2">
                        <input type="checkbox" class="form-checkbox" value="correlation">
                        <span>相关性分析</span>
                    </label>
                    <label class="flex items-center space-x-2">
                        <input type="checkbox" class="form-checkbox" value="trend">
                        <span>趋势分析</span>
                    </label>
                </div>
            </div>

            <button id="analyzeBtn" class="bg-blue-500 text-white px-4 py-2 rounded hover:bg-blue-600">
                开始分析
            </button>
        </div>

        <!-- 分析结果展示区域 -->
        <div class="bg-white shadow rounded-lg p-6 mb-8" id="resultsSection" style="display: none;">
            <h2 class="text-xl font-semibold mb-4">分析结果</h2>
            
            <!-- 基础统计信息 -->
            <div class="mb-6">
                <h3 class="font-medium mb-2">基础统计信息</h3>
                <div id="basicStats" class="overflow-x-auto">
                    <!-- 统计表格将通过JavaScript动态添加 -->
                </div>
            </div>

            <!-- 图表展示 -->
            <div class="grid grid-cols-2 gap-6">
                <div>
                    <h3 class="font-medium mb-2">分布图</h3>
                    <div id="distributionChart" class="h-64"></div>
                </div>
                <div>
                    <h3 class="font-medium mb-2">趋势图</h3>
                    <div id="trendChart" class="h-64"></div>
                </div>
                <div class="col-span-2">
                    <h3 class="font-medium mb-2">相关性热图</h3>
                    <div id="correlationChart" class="h-96"></div>
                </div>
            </div>
        </div>

        <!-- 报告生成区域 -->
        <div class="bg-white shadow rounded-lg p-6" id="reportSection" style="display: none;">
            <h2 class="text-xl font-semibold mb-4">生成报告</h2>
            
            <div class="grid grid-cols-3 gap-4 mb-6">
                <button class="bg-green-500 text-white px-4 py-2 rounded hover:bg-green-600"
                        onclick="generateReport('html')">
                    生成HTML报告
                </button>
                <button class="bg-red-500 text-white px-4 py-2 rounded hover:bg-red-600"
                        onclick="generateReport('pdf')">
                    生成PDF报告
                </button>
                <button class="bg-blue-500 text-white px-4 py-2 rounded hover:bg-blue-600"
                        onclick="generateReport('excel')">
                    生成Excel报告
                </button>
            </div>
        </div>
    </div>

    <script>
        // 实现前端交互逻辑
    </script>
</body>
</html>

3.7 前端逻辑实现

// static/js/main.js

// 全局状态管理
const state = {
    columns: [],
    currentData: null,
    analysisResults: null
};

// 初始化函数
document.addEventListener('DOMContentLoaded', () => {
    initializeFileUpload();
    initializeAnalysisControls();
});

// 文件上传处理
function initializeFileUpload() {
    const fileInput = document.getElementById('fileInput');
    const dropZone = document.querySelector('.border-dashed');

    // 文件拖拽处理
    dropZone.addEventListener('dragover', (e) => {
        e.preventDefault();
        dropZone.classList.add('border-blue-500');
    });

    dropZone.addEventListener('dragleave', () => {
        dropZone.classList.remove('border-blue-500');
    });

    dropZone.addEventListener('drop', (e) => {
        e.preventDefault();
        dropZone.classList.remove('border-blue-500');
        
        const file = e.dataTransfer.files[0];
        if (file && file.name.endsWith('.csv')) {
            handleFileUpload(file);
        } else {
            showError('请上传CSV文件');
        }
    });

    // 文件选择处理
    fileInput.addEventListener('change', (e) => {
        const file = e.target.files[0];
        if (file) {
            handleFileUpload(file);
        }
    });
}

// 处理文件上传
async function handleFileUpload(file) {
    const formData = new FormData();
    formData.append('file', file);

    try {
        const response = await axios.post('/api/upload', formData);
        state.columns = response.data.columns;
        
        // 显示分析区域
        document.getElementById('analysisSection').style.display = 'block';
        
        // 更新列选择器
        updateColumnSelect();
        
        showSuccess('文件上传成功');
    } catch (error) {
        showError('文件上传失败:' + error.message);
    }
}

// 更新列选择器
function updateColumnSelect() {
    const columnSelect = document.getElementById('columnSelect');
    columnSelect.innerHTML = state.columns.map(column => `
        <label class="flex items-center space-x-2">
            <input type="checkbox" class="form-checkbox" value="${column.Name}">
            <span>${column.Name} (${column.Type})</span>
        </label>
    `).join('');
}

// 初始化分析控制
function initializeAnalysisControls() {
    const analyzeBtn = document.getElementById('analyzeBtn');
    analyzeBtn.addEventListener('click', performAnalysis);
}

// 执行数据分析
async function performAnalysis() {
    // 获取选中的列和分析类型
    const selectedColumns = Array.from(document.querySelectorAll('#columnSelect input:checked'))
        .map(input => input.value);
    
    const selectedTypes = Array.from(document.querySelectorAll('input[type="checkbox"][value]:checked'))
        .map(input => input.value);

    if (selectedColumns.length === 0) {
        showError('请选择要分析的列');
        return;
    }

    try {
        const response = await axios.post('/api/analyze', {
            columns: selectedColumns,
            types: selectedTypes
        });

        state.analysisResults = response.data;
        
        // 显示结果区域
        document.getElementById('resultsSection').style.display = 'block';
        document.getElementById('reportSection').style.display = 'block';
        
        // 更新图表和统计信息
        updateResults();
        
        showSuccess('分析完成');
    } catch (error) {
        showError('分析失败:' + error.message);
    }
}

// 更新分析结果显示
function updateResults() {
    updateBasicStats();
    updateDistributionChart();
    updateTrendChart();
    updateCorrelationChart();
}

// 更新基础统计信息
function updateBasicStats() {
    const basicStats = document.getElementById('basicStats');
    const stats = state.analysisResults.basicStats;
    
    basicStats.innerHTML = `
        <table class="min-w-full">
            <thead>
                <tr>
                    <th class="px-4 py-2">列名</th>
                    <th class="px-4 py-2">均值</th>
                    <th class="px-4 py-2">中位数</th>
                    <th class="px-4 py-2">标准差</th>
                    <th class="px-4 py-2">最小值</th>
                    <th class="px-4 py-2">最大值</th>
                </tr>
            </thead>
            <tbody>
                ${Object.entries(stats).map(([column, stat]) => `
                    <tr>
                        <td class="border px-4 py-2">${column}</td>
                        <td class="border px-4 py-2">${stat.mean.toFixed(2)}</td>
                        <td class="border px-4 py-2">${stat.median.toFixed(2)}</td>
                        <td class="border px-4 py-2">${stat.stdDev.toFixed(2)}</td>
                        <td class="border px-4 py-2">${stat.min.toFixed(2)}</td>
                        <td class="border px-4 py-2">${stat.max.toFixed(2)}</td>
                    </tr>
                `).join('')}
            </tbody>
        </table>
    `;
}

// 更新分布图
function updateDistributionChart() {
    const distributions = state.analysisResults.distributions;
    Object.entries(distributions).forEach(([column, data]) => {
        Plotly.newPlot('distributionChart', [{
            x: data.values,
            type: 'histogram',
            name: column
        }], {
            title: `${column} 分布图`,
            xaxis: { title: '值' },
            yaxis: { title: '频数' }
        });
    });
}

// 更新趋势图
function updateTrendChart() {
    const trends = state.analysisResults.trends;
    const traces = Object.entries(trends).map(([column, data]) => ({
        x: data.dates,
        y: data.values,
        type: 'scatter',
        mode: 'lines+markers',
        name: column
    }));

    Plotly.newPlot('trendChart', traces, {
        title: '趋势分析',
        xaxis: { title: '时间' },
        yaxis: { title: '值' }
    });
}

// 更新相关性热图
function updateCorrelationChart() {
    const correlation = state.analysisResults.correlation;
    Plotly.newPlot('correlationChart', [{
        z: correlation.values,
        x: correlation.columns,
        y: correlation.columns,
        type: 'heatmap',
        colorscale: 'Viridis'
    }], {
        title: '相关性分析',
        width: 800,
        height: 800
    });
}

// 生成报告
async function generateReport(format) {
    try {
        const response = await axios.post('/api/report', {
            format,
            charts: ['distribution', 'trend', 'correlation']
        });

        if (format === 'html') {
            // 在新窗口中打开HTML报告
            const win = window.open();
            win.document.write(response.data.result);
        } else {
            // 下载PDF或Excel报告
            const blob = new Blob([response.data.result], {
                type: format === 'pdf' ? 'application/pdf' : 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
            });
            const url = window.URL.createObjectURL(blob);
            const a = document.createElement('a');
            a.href = url;
            a.download = `分析报告.${format}`;
            a.click();
            window.URL.revokeObjectURL(url);
        }

        showSuccess('报告生成成功');
    } catch (error) {
        showError('报告生成失败:' + error.message);
    }
}

// 工具函数:显示成功消息
function showSuccess(message) {
    // 实现提示消息显示逻辑
}

// 工具函数:显示错误消息
function showError(message) {
    // 实现错误消息显示逻辑
}

4. 项目测试实现

// tests/processor_test.go
package tests

import (
    "testing"
    "os"
    "github.com/stretchr/testify/assert"
    "github.com/your/stats/services"
)

func TestDataProcessor(t *testing.T) {
    // 准备测试数据
    testData := `Column1,Column2,Column3
1,2.5,text
2,3.5,sample
3,4.5,data
`
    tmpfile, err := os.CreateTemp("", "test.csv")
    if err != nil {
        t.Fatal(err)
    }
    defer os.Remove(tmpfile.Name())

    if _, err := tmpfile.Write([]byte(testData)); err != nil {
        t.Fatal(err)
    }
    if err := tmpfile.Close(); err != nil {
        t.Fatal(err)
    }

    // 创建处理器实例
    processor := services.NewDataProcessor()

    // 测试加载CSV文件
    t.Run("LoadCSV", func(t *testing.T) {
        err := processor.LoadCSV(tmpfile.Name())
        assert.NoError(t, err)

        dataset := processor.GetDataSet()
        assert.Equal(t, 3, len(dataset.Columns))
        assert.Equal(t, 3, len(dataset.Rows))
    })

    // 测试数据类型推断
    t.Run("DataTypeInference", func(t *testing.T) {
        dataset := processor.GetDataSet()
        assert.Equal(t, "float", dataset.Columns[0].Type)
        assert.Equal(t, "float", dataset.Columns[1].Type)
        assert.Equal(t, "string", dataset.Columns[2].Type)
    })

    // 测试统计计算
    t.Run("Statistics", func(t *testing.T) {
        err := processor.CalculateStatistics()
        assert.NoError(t, err)

        dataset := processor.GetDataSet()
        stats := dataset.Columns[0].Stats

        assert.Equal(t, 3, stats.Count)
        assert.InDelta(t, 2.0, stats.Mean, 0.001)
        assert.InDelta(t, 2.0, stats.Median, 0.001)
    })
}

// tests/analyzer_test.go
func TestAnalyzer(t *testing.T) {
    // 准备测试数据
    dataset := &models.DataSet{
        Columns: []models.Column{
            {Name: "Col1", Type: "float"},
            {Name: "Col2", Type: "float"},
        },
        Rows: []models.Row{
            {Values: []interface{}{"1.0", "2.0"}},
            {Values: []interface{}{"2.0", "4.0"}},
            {Values: []interface{}{"3.0", "6.0"}},
        },
    }

    analyzer := services.NewAnalyzer(dataset)

    // 测试相关性分析
    t.Run("Correlation", func(t *testing.T) {
        corr, err := analyzer.CalculateCorrelation("Col1", "Col2")
        assert.NoError(t, err)
        assert.InDelta(t, 1.0, corr, 0.001) // 完全正相关
    })

    // 测试分布分析
    t.Run("Distribution", func(t *testing.T) {
        dist, err := analyzer.CalculateDistribution("Col1")
        assert.NoError(t, err)
        assert.InDelta(t, 2.0, dist.Mean, 0.001)
        assert.InDelta(t, 1.0, dist.StdDev, 0.001)
    })

    // 测试异常值检测
    t.Run("Outliers", func(t *testing.T) {
        outliers, err := analyzer.PerformOutlierAnalysis("Col1")
        assert.NoError(t, err)
        assert.Equal(t, 0, outliers.OutlierCount) // 示例数据中没有异常值
    })
}

// tests/reporter_test.go
func TestReportGenerator(t *testing.T) {
    // 准备测试数据
    dataset := &models.DataSet{
        Name: "TestData",
        Columns: []models.Column{
            {
                Name: "Col1",
                Type: "float",
                Stats: models.Statistics{
                    Mean: 2.0,
                    Median: 2.0,
                    StdDev: 1.0,
                },
            },
        },
    }

    analysis := &models.AnalysisResult{
        DataSetName: "TestData",
        ColumnStats: map[string]models.Statistics{
            "Col1": dataset.Columns[0].Stats,
        },
    }

    reporter := services.NewReportGenerator(dataset, analysis)

    // 测试HTML报告生成
    t.Run("HTMLReport", func(t *testing.T) {
        html, err := reporter.GenerateHTMLReport()
        assert.NoError(t, err)
        assert.Contains(t, html, "数据分析报告")
        assert.Contains(t, html, "TestData")
    })

    // 测试PDF报告生成
    t.Run("PDFReport", func(t *testing.T) {
        pdf, err := reporter.GeneratePDFReport()
        assert.NoError(t, err)
        assert.NotNil(t, pdf)
    })

    // 测试Excel报告生成
    t.Run("ExcelReport", func(t *testing.T) {
        excel, err := reporter.GenerateExcelReport()
        assert.NoError(t, err)
        assert.NotNil(t, excel)
    })
}

// tests/integration_test.go
func TestIntegration(t *testing.T) {
    // 准备测试服务器
    router := gin.New()
    handler := handlers.NewWebHandler()
    handler.SetupRoutes(router)

    // 测试文件上传和分析流程
    t.Run("FullAnalysisFlow", func(t *testing.T) {
        // 1. 上传文件
        w := httptest.NewRecorder()
        req := createMultipartRequest(t, "test.csv", testData)
        router.ServeHTTP(w, req)
        assert.Equal(t, http.StatusOK, w.Code)

        // 2. 执行分析
        w = httptest.NewRecorder()
        analysisReq := `{"columns":["Col1"],"types":["distribution"]}`
        req = httptest.NewRequest("POST", "/api/analyze",
            bytes.NewBufferString(analysisReq))
        router.ServeHTTP(w, req)
        assert.Equal(t, http.StatusOK, w.Code)

        // 3. 生成报告
        w = httptest.NewRecorder()
        reportReq := `{"format":"html","charts":["distribution"]}`
        req = httptest.NewRequest("POST", "/api/report",
            bytes.NewBufferString(reportReq))
        router.ServeHTTP(w, req)
        assert.Equal(t, http.StatusOK, w.Code)
    })
}

// 辅助函数:创建多部分请求
func createMultipartRequest(t *testing.T, filename string, content string) *http.Request {
    var b bytes.Buffer
    writer := multipart.NewWriter(&b)
    
    part, err := writer.CreateFormFile("file", filename)
    if err != nil {
        t.Fatal(err)
    }
    part.Write([]byte(content))
    writer.Close()

    req := httptest.NewRequest("POST", "/api/upload", &b)
    req.Header.Set("Content-Type", writer.FormDataContentType())
    return req
}

5. 部署和运维

5.1 项目部署流程图

在这里插入图片描述

5.2 Docker配置文件

# Dockerfile
FROM golang:1.19-alpine AS builder

WORKDIR /app

# 安装基本依赖
RUN apk add --no-cache gcc musl-dev git

# 复制项目文件
COPY . .

# 下载依赖
RUN go mod download

# 编译
RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o main cmd/main.go

# 最终镜像
FROM alpine:3.14

WORKDIR /app

# 从builder阶段复制编译好的程序
COPY --from=builder /app/main .
COPY --from=builder /app/templates ./templates
COPY --from=builder /app/static ./static

# 创建必要的目录
RUN mkdir -p /app/uploads

# 设置环境变量
ENV GIN_MODE=release
ENV PORT=8080

EXPOSE 8080

CMD ["./main"]

# docker-compose.yml
version: '3.8'

services:
  stats-analyzer:
    build: .
    ports:
      - "8080:8080"
    volumes:
      - ./uploads:/app/uploads
      - ./configs:/app/configs
    environment:
      - GIN_MODE=release
      - PORT=8080
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "wget", "--spider", "-q", "http://localhost:8080/health"]
      interval: 30s
      timeout: 10s
      retries: 3

  prometheus:
    image: prom/prometheus:latest
    ports:
      - "9090:9090"
    volumes:
      - ./prometheus.yml:/etc/prometheus/prometheus.yml
    command:
      - --config.file=/etc/prometheus/prometheus.yml
    restart: unless-stopped

  grafana:
    image: grafana/grafana:latest
    ports:
      - "3000:3000"
    environment:
      - GF_SECURITY_ADMIN_PASSWORD=admin
    volumes:
      - grafana-storage:/var/lib/grafana
    depends_on:
      - prometheus
    restart: unless-stopped

volumes:
  grafana-storage:

5.3 监控配置

# prometheus.yml
global:
  scrape_interval: 15s
  evaluation_interval: 15s

alerting:
  alertmanagers:
    - static_configs:
        - targets:
          - alertmanager:9093

rule_files:
  - "rules/*.yml"

scrape_configs:
  - job_name: 'stats-analyzer'
    static_configs:
      - targets: ['stats-analyzer:8080']
        labels:
          service: 'stats-analyzer'

  - job_name: 'node-exporter'
    static_configs:
      - targets: ['node-exporter:9100']

# rules/alert_rules.yml
groups:
  - name: stats_analyzer_alerts
    rules:
      # 服务可用性告警
      - alert: ServiceDown
        expr: up{service="stats-analyzer"} == 0
        for: 1m
        labels:
          severity: critical
        annotations:
          summary: "统计分析服务不可用"
          description: "服务已停止运行超过1分钟"

      # 高错误率告警
      - alert: HighErrorRate
        expr: rate(http_requests_total{status=~"5.."}[5m]) / rate(http_requests_total[5m]) > 0.05
        for: 5m
        labels:
          severity: critical
        annotations:
          summary: "服务错误率过高"
          description: "5分钟内错误率超过5%"

      # 响应时间告警
      - alert: SlowResponse
        expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) > 1
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "服务响应过慢"
          description: "95%的请求响应时间超过1秒"

      # 内存使用告警
      - alert: HighMemoryUsage
        expr: process_resident_memory_bytes{service="stats-analyzer"} / node_memory_MemTotal_bytes * 100 > 80
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "内存使用率过高"
          description: "内存使用率超过80%"

# grafana/dashboards/stats_analyzer.json
{
  "annotations": {
    "list": []
  },
  "editable": true,
  "fiscalYearStartMonth": 0,
  "graphTooltip": 0,
  "id": 1,
  "links": [],
  "liveNow": false,
  "panels": [
    {
      "datasource": {
        "type": "prometheus",
        "uid": "prometheus"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "short"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 0
      },
      "id": 1,
      "options": {
        "legend": {
          "calcs": [],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
        }
      },
      "title": "请求数量",
      "type": "timeseries"
    }
  ],
  "refresh": "5s",
  "schemaVersion": 38,
  "style": "dark",
  "tags": ["stats-analyzer"],
  "templating": {
    "list": []
  },
  "time": {
    "from": "now-6h",
    "to": "now"
  },
  "timepicker": {},
  "timezone": "",
  "title": "统计分析服务监控",
  "uid": "stats_analyzer",
  "version": 1,
  "weekStart": ""
}

6. 项目文档

6.1 API文档

# 统计分析工具 API 文档

## 基本信息

- 基础路径: `/api/v1`
- 支持格式: JSON
- 认证方式: 无

## API 端点

### 1. 文件上传

#### POST /upload

上传CSV文件进行分析。

**请求参数:**

- Content-Type: multipart/form-data
- 参数名:file

**响应:**

```json
{
    "status": "success",
    "columns": [
        {
            "name": "Column1",
            "type": "float"
        },
        {
            "name": "Column2",
            "type": "string"
        }
    ]
}

2. 数据分析

POST /analyze

执行数据分析。

请求参数:

{
    "columns": ["Column1", "Column2"],
    "types": ["distribution", "correlation", "trend"]
}

响应:

{
    "status": "success",
    "results": {
        "distributions": {
            "Column1": {
                "mean": 45.6,
                "median": 42.0,
                "stdDev": 12.3,
                "bins": [
                    {"start": 0, "end": 10, "count": 5},
                    {"start": 10, "end": 20, "count": 8}
                ]
            }
        },
        "correlations": {
            "Column1": {
                "Column2": 0.85
            }
        },
        "trends": {
            "Column1": [
                {"date": "2024-01-01", "value": 42.1},
                {"date": "2024-01-02", "value": 43.5}
            ]
        }
    }
}

3. 报告生成

POST /report

生成分析报告。

请求参数:

{
    "format": "html",  // 支持: html, pdf, excel
    "charts": ["distribution", "trend", "correlation"]
}

响应:

  • Format: html

    {
        "status": "success",
        "content": "<html>...</html>"
    }
    
  • Format: pdf/excel

    {
        "status": "success",
        "download_url": "/downloads/report_123.pdf"
    }
    

4. 列统计信息

GET /stats/:column

获取指定列的统计信息。

响应:

{
    "status": "success",
    "stats": {
        "count": 1000,
        "mean": 45.6,
        "median": 42.0,
        "mode": 40.0,
        "stdDev": 12.3,
        "min": 10.0,
        "max": 90.0,
        "percentiles": {
            "25": 35.0,
            "50": 42.0,
            "75": 55.0,
            "95": 70.0
        }
    }
}

5. 异常值检测

POST /outliers

检测指定列的异常值。

请求参数:

{
    "column": "Column1",
    "method": "iqr"  // 支持: iqr, zscore
}

响应:

{
    "status": "success",
    "outliers": {
        "count": 5,
        "values": [
            {"index": 10, "value": 150.0, "score": 3.5},
            {"index": 20, "value": 5.0, "score": -2.8}
        ],
        "bounds": {
            "lower": 10.0,
            "upper": 90.0
        }
    }
}

错误码说明

错误码说明
400请求参数错误
404资源不存在
415不支持的文件类型
500服务器内部错误

使用示例

Python 示例

import requests

# 上传文件
files = {'file': open('data.csv', 'rb')}
response = requests.post('http://localhost:8080/api/v1/upload', files=files)
print(response.json())

# 执行分析
analysis_req = {
    'columns': ['Column1'],
    'types': ['distribution']
}
response = requests.post('http://localhost:8080/api/v1/analyze', json=analysis_req)
print(response.json())

JavaScript 示例

// 上传文件
const formData = new FormData();
formData.append('file', file);

fetch('/api/v1/upload', {
    method: 'POST',
    body: formData
})
.then(response => response.json())
.then(data => console.log(data));

// 执行分析
fetch('/api/v1/analyze', {
    method: 'POST',
    headers: {
        'Content-Type': 'application/json'
    },
    body: JSON.stringify({
        columns: ['Column1'],
        types: ['distribution']
    })
})
.then(response => response.json())
.then(data => console.log(data));

7. 项目总结

7.1 功能特点

  1. 数据处理能力

    • 支持大规模CSV文件处理
    • 自动数据类型推断
    • 智能数据清洗
    • 高效数据转换
  2. 分析功能

    • 全面的统计分析
    • 高级数据挖掘
    • 可视化图表生成
    • 自动报告生成
  3. 用户体验

    • 直观的Web界面
    • 交互式数据探索
    • 灵活的配置选项
    • 多格式报告导出
  4. 系统性能

    • 并发处理支持
    • 内存优化设计
    • 缓存加速
    • 异步任务处理

7.2 技术亮点

  1. Go语言优势运用

    • goroutine并发处理
    • channel通信机制
    • 接口设计模式
    • 高效内存管理
  2. 架构设计

    • 模块化组织
    • 松耦合设计
    • 可扩展接口
    • 清晰的代码结构
  3. 工程实践

    • 完整的测试覆盖
    • 持续集成部署
    • 监控告警机制
    • 容器化部署

7.3 后续优化方向

  1. 功能增强

    • 支持更多数据源
    • 添加机器学习模型
    • 扩展分析方法
    • 优化报告模板
  2. 性能提升

    • 分布式处理
    • 数据库优化
    • 缓存策略改进
    • 算法优化
  3. 用户体验

    • 界面美化
    • 操作流程优化
    • 响应速度提升
    • 移动端支持
  4. 运维支持

    • 自动化部署
    • 监控完善
    • 日志分析
    • 故障恢复

本项目展示了一个完整的Go语言统计分析工具的设计和实现过程,涵盖了从数据处理到可视化报告生成的全流程。通过合理的架构设计和模块划分,实现了高效、可靠的数据分析功能,为用户提供了便捷的数据分析工具。


怎么样今天的内容还满意吗?再次感谢观众老爷的观看,关注GZH:凡人的AI工具箱,回复666,送您价值199的AI大礼包。最后,祝您早日实现财务自由,还请给个赞,谢谢!


http://www.kler.cn/a/403038.html

相关文章:

  • 分层架构 IM 系统之架构演进
  • 【单元测试】【Android】JUnit 4 和 JUnit 5 的差异记录
  • 【linux】插入新硬盘如何配置:格式化、分区、自动挂载(Ubuntu)
  • Day24 回溯算法part03
  • MybatisPlus之1:快速入门
  • 招商蛇口|在低密园林里,开启生活的“任意门”
  • ssl证书,以 Nginx 为例
  • 如何构建高效的接口自动化测试框架?
  • Halcon 分割之区域生长法
  • 拓展Git相关知识(⭐版控工具⭐)
  • 量化交易系统开发-实时行情自动化交易-3.4.3.3.期货市场深度数据
  • Golang语言整合jwt+gin框架实现token
  • 学习threejs,对模型多个动画切换展示
  • Matlab多输入单输出之倾斜手写数字识别
  • os库的常见使用
  • 星融元与焱融科技AI分布式存储软硬件完成兼容性互认证
  • 13.C++内存管理2(C++ new和delete的使用和原理详解,内存泄漏问题)
  • 数据结构(双向链表——c语言实现)
  • Restful API 规范详解
  • 单片机学习笔记 2. LED灯闪烁
  • c++--------《set 和 map》
  • C++手写PCD文件
  • 使用Kotlin写一个将字符串加密成short数组,然后可以解密还原成原始的字符串的功能
  • 前端页面自适应等比例缩放 Flexible+rem方案
  • 小程序-基于java+SpringBoot+Vue的超市购物系统设计与实现
  • 【React 进阶】掌握 React18 全部 Hooks