evaluate_test.go
   1  package evaluate
   2  
   3  import (
   4  	"bytes"
   5  	"errors"
   6  	"os"
   7  	"path/filepath"
   8  	"sort"
   9  	"strings"
  10  	"testing"
  11  	"time"
  12  
  13  	"github.com/stretchr/testify/assert"
  14  	"github.com/stretchr/testify/mock"
  15  	"github.com/stretchr/testify/require"
  16  	"github.com/zimmski/osutil"
  17  	"github.com/zimmski/osutil/bytesutil"
  18  
  19  	"github.com/symflower/eval-dev-quality/evaluate/metrics"
  20  	metricstesting "github.com/symflower/eval-dev-quality/evaluate/metrics/testing"
  21  	evaluatetask "github.com/symflower/eval-dev-quality/evaluate/task"
  22  	"github.com/symflower/eval-dev-quality/language"
  23  	"github.com/symflower/eval-dev-quality/language/golang"
  24  	"github.com/symflower/eval-dev-quality/log"
  25  	"github.com/symflower/eval-dev-quality/model"
  26  	evalmodel "github.com/symflower/eval-dev-quality/model"
  27  	"github.com/symflower/eval-dev-quality/model/llm"
  28  	modeltesting "github.com/symflower/eval-dev-quality/model/testing"
  29  	"github.com/symflower/eval-dev-quality/provider"
  30  	providertesting "github.com/symflower/eval-dev-quality/provider/testing"
  31  	"github.com/symflower/eval-dev-quality/task"
  32  )
  33  
  34  var (
  35  	// ErrEmptyResponseFromModel indicates the model returned an empty response.
  36  	ErrEmptyResponseFromModel = errors.New("empty response from model")
  37  )
  38  
  39  // file represents a file with path and content.
  40  type file struct {
  41  	Path    string
  42  	Content string
  43  }
  44  
  45  // testFiles holds common test files.
  46  var testFiles = map[string]file{
  47  	"plain": file{
  48  		Path: "plain_test.go",
  49  		Content: bytesutil.StringTrimIndentations(`
  50  			package plain
  51  
  52  			import "testing"
  53  
  54  			func TestFunction(t *testing.T){}
  55  		`),
  56  	},
  57  	"plain-with-assert": file{
  58  		Path: "plain_test.go",
  59  		Content: bytesutil.StringTrimIndentations(`
  60  			package plain
  61  
  62  			import (
  63  				"testing"
  64  
  65  				"github.com/stretchr/testify/assert"
  66  			)
  67  
  68  			func TestFunction(t *testing.T){
  69  				assert.True(t, true)
  70  			}
  71  		`),
  72  	},
  73  }
  74  
  75  func TestEvaluate(t *testing.T) {
  76  	type testCase struct {
  77  		Name string
  78  
  79  		Before func(t *testing.T, logger *log.Logger, resultPath string)
  80  		After  func(t *testing.T, logger *log.Logger, resultPath string)
  81  
  82  		Context *Context
  83  
  84  		ExpectedAssessments    metricstesting.AssessmentTuples
  85  		ExpectedTotalScore     uint64
  86  		ExpectedOutputValidate func(t *testing.T, output string, resultPath string)
  87  		ExpectedResultFiles    map[string]func(t *testing.T, filePath string, data string)
  88  	}
  89  
  90  	validate := func(t *testing.T, tc *testCase) {
  91  		t.Run(tc.Name, func(t *testing.T) {
  92  			temporaryPath := t.TempDir()
  93  
  94  			logOutput, logger := log.Buffer()
  95  			defer func() {
  96  				log.CloseOpenLogFiles()
  97  
  98  				if t.Failed() {
  99  					t.Logf("Logging output: %s", logOutput.String())
 100  				}
 101  			}()
 102  
 103  			resultPath := temporaryPath
 104  			logger = logger.With(log.AttributeKeyResultPath, resultPath)
 105  
 106  			tc.Context.Log = logger
 107  			if tc.Context.QueryAttempts == 0 {
 108  				tc.Context.QueryAttempts = 1
 109  			}
 110  			tc.Context.ResultPath = resultPath
 111  			if tc.Context.TestdataPath == "" {
 112  				tc.Context.TestdataPath = filepath.Join("..", "testdata")
 113  			}
 114  			if tc.Context.Runs == 0 {
 115  				tc.Context.Runs = 1
 116  			}
 117  
 118  			if tc.Before != nil {
 119  				tc.Before(t, logger, temporaryPath)
 120  			}
 121  			if tc.After != nil {
 122  				defer tc.After(t, logger, temporaryPath)
 123  			}
 124  
 125  			assessmentStore, actualTotalScore := Evaluate(tc.Context)
 126  
 127  			var actualAssessments metricstesting.AssessmentTuples
 128  			require.NoError(t, assessmentStore.Walk(func(m evalmodel.Model, l language.Language, r string, ti task.Identifier, a metrics.Assessments) error {
 129  				// Normalize assessments.
 130  				if v, ok := a[metrics.AssessmentKeyProcessingTime]; ok {
 131  					if assert.Greater(t, v, uint64(0)) {
 132  						delete(a, metrics.AssessmentKeyProcessingTime)
 133  					}
 134  				}
 135  
 136  				actualAssessments = append(actualAssessments, &metricstesting.AssessmentTuple{
 137  					Model:          m,
 138  					Language:       l,
 139  					RepositoryPath: r,
 140  					Task:           ti,
 141  					Assessment:     a,
 142  				})
 143  
 144  				return nil
 145  			}))
 146  
 147  			assert.ElementsMatch(t, tc.ExpectedAssessments, actualAssessments)
 148  			assert.Equal(t, tc.ExpectedTotalScore, actualTotalScore)
 149  
 150  			if tc.ExpectedOutputValidate != nil {
 151  				tc.ExpectedOutputValidate(t, logOutput.String(), temporaryPath)
 152  			}
 153  
 154  			actualResultFiles, err := osutil.FilesRecursive(temporaryPath)
 155  			require.NoError(t, err)
 156  			for i, p := range actualResultFiles {
 157  				actualResultFiles[i], err = filepath.Rel(temporaryPath, p)
 158  				require.NoError(t, err)
 159  			}
 160  			sort.Strings(actualResultFiles)
 161  			expectedResultFiles := make([]string, 0, len(tc.ExpectedResultFiles))
 162  			for filePath, validate := range tc.ExpectedResultFiles {
 163  				expectedResultFiles = append(expectedResultFiles, filePath)
 164  
 165  				if validate != nil {
 166  					data, err := os.ReadFile(filepath.Join(temporaryPath, filePath))
 167  					if assert.NoError(t, err) {
 168  						validate(t, filePath, string(data))
 169  					}
 170  				}
 171  			}
 172  			sort.Strings(expectedResultFiles)
 173  			assert.Equal(t, expectedResultFiles, actualResultFiles)
 174  		})
 175  	}
 176  
 177  	{
 178  		languageGolang := &golang.Language{}
 179  		mockedModel := modeltesting.NewMockCapabilityWriteTestsNamed(t, "empty-response-model")
 180  		repositoryPath := filepath.Join("golang", "plain")
 181  
 182  		validate(t, &testCase{
 183  			Name: "Empty model responses are errors",
 184  
 185  			Before: func(t *testing.T, logger *log.Logger, resultPath string) {
 186  				// Set up mocks, when test is running.
 187  				mockedModel.MockCapabilityWriteTests.On("WriteTests", mock.Anything).Return(nil, ErrEmptyResponseFromModel)
 188  			},
 189  
 190  			Context: &Context{
 191  				Languages: []language.Language{
 192  					&golang.Language{},
 193  				},
 194  
 195  				Models: []evalmodel.Model{
 196  					mockedModel,
 197  				},
 198  			},
 199  
 200  			ExpectedAssessments: []*metricstesting.AssessmentTuple{
 201  				&metricstesting.AssessmentTuple{
 202  					Model:          mockedModel,
 203  					Language:       languageGolang,
 204  					RepositoryPath: repositoryPath,
 205  					Task:           evaluatetask.IdentifierWriteTests,
 206  					Assessment: metrics.Assessments{
 207  						metrics.AssessmentKeyFilesExecutedMaximumReachable: 1,
 208  					},
 209  				},
 210  				&metricstesting.AssessmentTuple{
 211  					Model:          mockedModel,
 212  					Language:       languageGolang,
 213  					RepositoryPath: repositoryPath,
 214  					Task:           evaluatetask.IdentifierWriteTestsSymflowerFix,
 215  					Assessment: metrics.Assessments{
 216  						metrics.AssessmentKeyFilesExecutedMaximumReachable: 1,
 217  					},
 218  				},
 219  			},
 220  			ExpectedTotalScore: 2,
 221  			ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
 222  				"evaluation.log": nil,
 223  				filepath.Join(string(evaluatetask.IdentifierWriteTests), mockedModel.ID(), "golang", "golang", "plain", "evaluation.log"): nil,
 224  				"evaluation.csv": nil,
 225  			},
 226  		})
 227  	}
 228  
 229  	t.Run("Failing model queries", func(t *testing.T) {
 230  		{
 231  			languageGolang := &golang.Language{}
 232  			mockedModelID := "testing-provider/empty-response-model"
 233  			mockedQuery := providertesting.NewMockQuery(t)
 234  			mockedModel := llm.NewModel(mockedQuery, mockedModelID)
 235  			repositoryPath := filepath.Join("golang", "plain")
 236  
 237  			validate(t, &testCase{
 238  				Name: "Single try fails",
 239  
 240  				Before: func(t *testing.T, logger *log.Logger, resultPath string) {
 241  					// Set up mocks, when test is running.
 242  					mockedQuery.On("Query", mock.Anything, mockedModelID, mock.Anything).Return("", ErrEmptyResponseFromModel)
 243  				},
 244  				After: func(t *testing.T, logger *log.Logger, resultPath string) {
 245  					mockedQuery.AssertNumberOfCalls(t, "Query", 1)
 246  				},
 247  
 248  				Context: &Context{
 249  					Languages: []language.Language{
 250  						languageGolang,
 251  					},
 252  
 253  					Models: []evalmodel.Model{
 254  						mockedModel,
 255  					},
 256  					QueryAttempts: 1,
 257  				},
 258  
 259  				ExpectedAssessments: []*metricstesting.AssessmentTuple{
 260  					&metricstesting.AssessmentTuple{
 261  						Model:          mockedModel,
 262  						Language:       languageGolang,
 263  						RepositoryPath: repositoryPath,
 264  						Task:           evaluatetask.IdentifierWriteTests,
 265  						Assessment: metrics.Assessments{
 266  							metrics.AssessmentKeyFilesExecutedMaximumReachable: 1,
 267  						},
 268  					},
 269  					&metricstesting.AssessmentTuple{
 270  						Model:          mockedModel,
 271  						Language:       languageGolang,
 272  						RepositoryPath: repositoryPath,
 273  						Task:           evaluatetask.IdentifierWriteTestsSymflowerFix,
 274  						Assessment: metrics.Assessments{
 275  							metrics.AssessmentKeyFilesExecutedMaximumReachable: 1,
 276  						},
 277  					},
 278  				},
 279  				ExpectedTotalScore: 2,
 280  				ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
 281  					"evaluation.log": nil,
 282  					filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain", "evaluation.log"): func(t *testing.T, filePath, data string) {
 283  						assert.Contains(t, data, ErrEmptyResponseFromModel.Error())
 284  					},
 285  					"evaluation.csv": nil,
 286  				},
 287  			})
 288  		}
 289  		{
 290  			languageGolang := &golang.Language{}
 291  			mockedModelID := "testing-provider/empty-response-model"
 292  			mockedQuery := providertesting.NewMockQuery(t)
 293  			mockedModel := llm.NewModel(mockedQuery, mockedModelID)
 294  			repositoryPath := filepath.Join("golang", "plain")
 295  
 296  			validate(t, &testCase{
 297  				Name: "Success after retry",
 298  
 299  				Before: func(t *testing.T, logger *log.Logger, resultPath string) {
 300  					// Set up mocks, when test is running.
 301  					mockedQuery.On("Query", mock.Anything, mockedModelID, mock.Anything).Return("", ErrEmptyResponseFromModel).Once()
 302  					mockedQuery.On("Query", mock.Anything, mockedModelID, mock.Anything).Return("model-response", nil).Once().After(10 * time.Millisecond) // Simulate a model response delay because our internal safety measures trigger when a query is done in 0 milliseconds.
 303  				},
 304  				After: func(t *testing.T, logger *log.Logger, resultPath string) {
 305  					mockedQuery.AssertNumberOfCalls(t, "Query", 2)
 306  				},
 307  
 308  				Context: &Context{
 309  					Languages: []language.Language{
 310  						&golang.Language{},
 311  					},
 312  
 313  					Models: []evalmodel.Model{
 314  						mockedModel,
 315  					},
 316  					QueryAttempts: 3,
 317  
 318  					RepositoryPaths: []string{
 319  						repositoryPath,
 320  					},
 321  				},
 322  
 323  				ExpectedAssessments: []*metricstesting.AssessmentTuple{
 324  					&metricstesting.AssessmentTuple{
 325  						Model:          mockedModel,
 326  						Language:       languageGolang,
 327  						RepositoryPath: repositoryPath,
 328  						Task:           evaluatetask.IdentifierWriteTests,
 329  						Assessment: map[metrics.AssessmentKey]uint64{
 330  							metrics.AssessmentKeyFilesExecutedMaximumReachable:      1,
 331  							metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 14,
 332  							metrics.AssessmentKeyResponseCharacterCount:             14,
 333  							metrics.AssessmentKeyResponseNoError:                    1,
 334  						},
 335  					},
 336  					&metricstesting.AssessmentTuple{
 337  						Model:          mockedModel,
 338  						Language:       languageGolang,
 339  						RepositoryPath: repositoryPath,
 340  						Task:           evaluatetask.IdentifierWriteTestsSymflowerFix,
 341  						Assessment: map[metrics.AssessmentKey]uint64{
 342  							metrics.AssessmentKeyFilesExecutedMaximumReachable:      1,
 343  							metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 14,
 344  							metrics.AssessmentKeyResponseCharacterCount:             14,
 345  							metrics.AssessmentKeyResponseNoError:                    1,
 346  						},
 347  					},
 348  				},
 349  				ExpectedTotalScore: 2,
 350  				ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
 351  					"evaluation.log": nil,
 352  					filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain", "evaluation.log"): func(t *testing.T, filePath, data string) {
 353  						assert.Contains(t, data, "Attempt 1/3: "+ErrEmptyResponseFromModel.Error())
 354  					},
 355  					filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain", "response-1.log"): nil,
 356  					"evaluation.csv": nil,
 357  				},
 358  			})
 359  		}
 360  		{
 361  			languageGolang := &golang.Language{}
 362  			mockedModelID := "testing-provider/empty-response-model"
 363  			mockedQuery := providertesting.NewMockQuery(t)
 364  			mockedModel := llm.NewModel(mockedQuery, mockedModelID)
 365  			repositoryPath := filepath.Join("golang", "plain")
 366  
 367  			validate(t, &testCase{
 368  				Name: "Immediate success",
 369  
 370  				Before: func(t *testing.T, logger *log.Logger, resultPath string) {
 371  					// Set up mocks, when test is running.
 372  					mockedQuery.On("Query", mock.Anything, mockedModelID, mock.Anything).Return("model-response", nil).After(10 * time.Millisecond) // Simulate a model response delay because our internal safety measures trigger when a query is done in 0 milliseconds.
 373  				},
 374  				After: func(t *testing.T, logger *log.Logger, resultPath string) {
 375  					mockedQuery.AssertNumberOfCalls(t, "Query", 1)
 376  				},
 377  
 378  				Context: &Context{
 379  					Languages: []language.Language{
 380  						&golang.Language{},
 381  					},
 382  
 383  					Models: []evalmodel.Model{
 384  						mockedModel,
 385  					},
 386  					QueryAttempts: 3,
 387  
 388  					RepositoryPaths: []string{
 389  						repositoryPath,
 390  					},
 391  				},
 392  
 393  				ExpectedAssessments: []*metricstesting.AssessmentTuple{
 394  					&metricstesting.AssessmentTuple{
 395  						Model:          mockedModel,
 396  						Language:       languageGolang,
 397  						RepositoryPath: repositoryPath,
 398  						Task:           evaluatetask.IdentifierWriteTests,
 399  						Assessment: map[metrics.AssessmentKey]uint64{
 400  							metrics.AssessmentKeyFilesExecutedMaximumReachable:      1,
 401  							metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 14,
 402  							metrics.AssessmentKeyResponseCharacterCount:             14,
 403  							metrics.AssessmentKeyResponseNoError:                    1,
 404  						},
 405  					},
 406  					&metricstesting.AssessmentTuple{
 407  						Model:          mockedModel,
 408  						Language:       languageGolang,
 409  						RepositoryPath: repositoryPath,
 410  						Task:           evaluatetask.IdentifierWriteTestsSymflowerFix,
 411  						Assessment: map[metrics.AssessmentKey]uint64{
 412  							metrics.AssessmentKeyFilesExecutedMaximumReachable:      1,
 413  							metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 14,
 414  							metrics.AssessmentKeyResponseCharacterCount:             14,
 415  							metrics.AssessmentKeyResponseNoError:                    1,
 416  						},
 417  					},
 418  				},
 419  				ExpectedTotalScore: 2,
 420  				ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
 421  					"evaluation.log": nil,
 422  					filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain", "evaluation.log"): func(t *testing.T, filePath, data string) {
 423  						assert.Contains(t, data, "DONE 0 tests, 1 error")
 424  					},
 425  					filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain", "response-1.log"): nil,
 426  					"evaluation.csv": nil,
 427  				},
 428  			})
 429  		}
 430  	})
 431  
 432  	t.Run("Failing basic language checks should exclude model", func(t *testing.T) {
 433  		repositoryPlainPath := filepath.Join("golang", "plain")
 434  		repositoryNextPath := filepath.Join("golang", "next")
 435  
 436  		temporaryTestdataPath := t.TempDir()
 437  		assert.NoError(t, osutil.CopyTree(filepath.Join("..", "testdata", repositoryPlainPath), filepath.Join(temporaryTestdataPath, repositoryPlainPath)))
 438  		assert.NoError(t, osutil.CopyTree(filepath.Join("..", "testdata", repositoryPlainPath), filepath.Join(temporaryTestdataPath, repositoryNextPath)))
 439  		repositoryNextConfigPath := filepath.Join(temporaryTestdataPath, repositoryNextPath, "go.mod")
 440  		d, err := os.ReadFile(repositoryNextConfigPath)
 441  		require.NoError(t, err)
 442  		d = bytes.ReplaceAll(d, []byte("plain"), []byte("next"))
 443  		require.NoError(t, os.WriteFile(repositoryNextConfigPath, d, 0))
 444  
 445  		generateTestsForFilePlainError := errors.New("generateTestsForFile error")
 446  
 447  		generateSuccess := func(mockedModel *modeltesting.MockModelCapabilityWriteTests) {
 448  			mockedModel.RegisterGenerateSuccess(t, testFiles["plain"].Path, testFiles["plain"].Content, metricstesting.AssessmentsWithProcessingTime).Once()
 449  		}
 450  		generateError := func(mockedModel *modeltesting.MockModelCapabilityWriteTests) {
 451  			mockedModel.RegisterGenerateError(generateTestsForFilePlainError).Once()
 452  		}
 453  
 454  		{
 455  			languageGolang := &golang.Language{}
 456  			mockedModelID := "mocked-generation-model"
 457  			mockedModel := modeltesting.NewMockCapabilityWriteTestsNamed(t, mockedModelID)
 458  
 459  			validate(t, &testCase{
 460  				Name: "Problems of previous runs shouldn't cancel successive runs",
 461  
 462  				Before: func(t *testing.T, logger *log.Logger, resultPath string) {
 463  					// Set up mocks, when test is running.
 464  					{
 465  						// Succeed on both "plain" runs.
 466  						generateSuccess(mockedModel)
 467  						generateSuccess(mockedModel)
 468  
 469  						// Error on the first run for the "next" repository.
 470  						generateError(mockedModel)
 471  						// Succeed on the second run for the "next" repository.
 472  						generateSuccess(mockedModel)
 473  					}
 474  				},
 475  				After: func(t *testing.T, logger *log.Logger, resultPath string) {
 476  					mockedModel.MockCapabilityWriteTests.AssertNumberOfCalls(t, "WriteTests", 4)
 477  				},
 478  
 479  				Context: &Context{
 480  					Languages: []language.Language{
 481  						&golang.Language{},
 482  					},
 483  
 484  					Models: []evalmodel.Model{
 485  						mockedModel,
 486  					},
 487  
 488  					RepositoryPaths: []string{
 489  						repositoryPlainPath,
 490  						repositoryNextPath,
 491  					},
 492  					TestdataPath: temporaryTestdataPath,
 493  
 494  					Runs: 2,
 495  				},
 496  
 497  				ExpectedAssessments: []*metricstesting.AssessmentTuple{
 498  					&metricstesting.AssessmentTuple{
 499  						Model:          mockedModel,
 500  						Language:       languageGolang,
 501  						RepositoryPath: repositoryNextPath,
 502  						Task:           evaluatetask.IdentifierWriteTests,
 503  						Assessment: map[metrics.AssessmentKey]uint64{
 504  							metrics.AssessmentKeyCoverage:                      0,
 505  							metrics.AssessmentKeyFilesExecuted:                 1,
 506  							metrics.AssessmentKeyFilesExecutedMaximumReachable: 2,
 507  							metrics.AssessmentKeyResponseNoError:               1,
 508  						},
 509  					},
 510  					&metricstesting.AssessmentTuple{
 511  						Model:          mockedModel,
 512  						Language:       languageGolang,
 513  						RepositoryPath: repositoryNextPath,
 514  						Task:           evaluatetask.IdentifierWriteTestsSymflowerFix,
 515  						Assessment: map[metrics.AssessmentKey]uint64{
 516  							metrics.AssessmentKeyCoverage:                      0,
 517  							metrics.AssessmentKeyFilesExecuted:                 1,
 518  							metrics.AssessmentKeyFilesExecutedMaximumReachable: 2,
 519  							metrics.AssessmentKeyResponseNoError:               1,
 520  						},
 521  					},
 522  					&metricstesting.AssessmentTuple{
 523  						Model:          mockedModel,
 524  						Language:       languageGolang,
 525  						RepositoryPath: repositoryPlainPath,
 526  						Task:           evaluatetask.IdentifierWriteTests,
 527  						Assessment: map[metrics.AssessmentKey]uint64{
 528  							metrics.AssessmentKeyCoverage:                      0,
 529  							metrics.AssessmentKeyFilesExecuted:                 2,
 530  							metrics.AssessmentKeyFilesExecutedMaximumReachable: 2,
 531  							metrics.AssessmentKeyResponseNoError:               2,
 532  						},
 533  					},
 534  					&metricstesting.AssessmentTuple{
 535  						Model:          mockedModel,
 536  						Language:       languageGolang,
 537  						RepositoryPath: repositoryPlainPath,
 538  						Task:           evaluatetask.IdentifierWriteTestsSymflowerFix,
 539  						Assessment: map[metrics.AssessmentKey]uint64{
 540  							metrics.AssessmentKeyCoverage:                      0,
 541  							metrics.AssessmentKeyFilesExecuted:                 2,
 542  							metrics.AssessmentKeyFilesExecutedMaximumReachable: 2,
 543  							metrics.AssessmentKeyResponseNoError:               2,
 544  						},
 545  					},
 546  				},
 547  				ExpectedTotalScore: 0,
 548  				ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
 549  					"evaluation.log": nil,
 550  					filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain", "evaluation.log"): nil,
 551  					filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "next", "evaluation.log"):  nil,
 552  					"evaluation.csv": nil,
 553  				},
 554  			})
 555  		}
 556  		{
 557  			languageGolang := &golang.Language{}
 558  			mockedModelID := "mocked-generation-model"
 559  			mockedModel := modeltesting.NewMockCapabilityWriteTestsNamed(t, mockedModelID)
 560  
 561  			validate(t, &testCase{
 562  				Name: "Solving basic checks once is enough",
 563  
 564  				Before: func(t *testing.T, logger *log.Logger, resultPath string) {
 565  					// Set up mocks, when test is running.
 566  					{
 567  						// Succeed on only one "plain" run.
 568  						generateError(mockedModel)
 569  						generateSuccess(mockedModel)
 570  
 571  						// Succeed on both "next" runs.
 572  						generateSuccess(mockedModel)
 573  						generateSuccess(mockedModel)
 574  					}
 575  				},
 576  				After: func(t *testing.T, logger *log.Logger, resultPath string) {
 577  					mockedModel.MockCapabilityWriteTests.AssertNumberOfCalls(t, "WriteTests", 4)
 578  				},
 579  
 580  				Context: &Context{
 581  					Languages: []language.Language{
 582  						&golang.Language{},
 583  					},
 584  
 585  					Models: []evalmodel.Model{
 586  						mockedModel,
 587  					},
 588  
 589  					RepositoryPaths: []string{
 590  						repositoryPlainPath,
 591  						repositoryNextPath,
 592  					},
 593  					TestdataPath: temporaryTestdataPath,
 594  
 595  					Runs: 2,
 596  				},
 597  
 598  				ExpectedAssessments: []*metricstesting.AssessmentTuple{
 599  					&metricstesting.AssessmentTuple{
 600  						Model:          mockedModel,
 601  						Language:       languageGolang,
 602  						RepositoryPath: repositoryNextPath,
 603  						Task:           evaluatetask.IdentifierWriteTests,
 604  						Assessment: map[metrics.AssessmentKey]uint64{
 605  							metrics.AssessmentKeyCoverage:                      0,
 606  							metrics.AssessmentKeyFilesExecuted:                 2,
 607  							metrics.AssessmentKeyFilesExecutedMaximumReachable: 2,
 608  							metrics.AssessmentKeyResponseNoError:               2,
 609  						},
 610  					},
 611  					&metricstesting.AssessmentTuple{
 612  						Model:          mockedModel,
 613  						Language:       languageGolang,
 614  						RepositoryPath: repositoryNextPath,
 615  						Task:           evaluatetask.IdentifierWriteTestsSymflowerFix,
 616  						Assessment: map[metrics.AssessmentKey]uint64{
 617  							metrics.AssessmentKeyCoverage:                      0,
 618  							metrics.AssessmentKeyFilesExecuted:                 2,
 619  							metrics.AssessmentKeyFilesExecutedMaximumReachable: 2,
 620  							metrics.AssessmentKeyResponseNoError:               2,
 621  						},
 622  					},
 623  					&metricstesting.AssessmentTuple{
 624  						Model:          mockedModel,
 625  						Language:       languageGolang,
 626  						RepositoryPath: repositoryPlainPath,
 627  						Task:           evaluatetask.IdentifierWriteTests,
 628  						Assessment: map[metrics.AssessmentKey]uint64{
 629  							metrics.AssessmentKeyCoverage:                      0,
 630  							metrics.AssessmentKeyFilesExecuted:                 1,
 631  							metrics.AssessmentKeyFilesExecutedMaximumReachable: 2,
 632  							metrics.AssessmentKeyResponseNoError:               1,
 633  						},
 634  					},
 635  					&metricstesting.AssessmentTuple{
 636  						Model:          mockedModel,
 637  						Language:       languageGolang,
 638  						RepositoryPath: repositoryPlainPath,
 639  						Task:           evaluatetask.IdentifierWriteTestsSymflowerFix,
 640  						Assessment: map[metrics.AssessmentKey]uint64{
 641  							metrics.AssessmentKeyCoverage:                      0,
 642  							metrics.AssessmentKeyFilesExecuted:                 1,
 643  							metrics.AssessmentKeyFilesExecutedMaximumReachable: 2,
 644  							metrics.AssessmentKeyResponseNoError:               1,
 645  						},
 646  					},
 647  				},
 648  				ExpectedTotalScore: 0,
 649  				ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
 650  					"evaluation.log": nil,
 651  					filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain", "evaluation.log"): nil,
 652  					filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "next", "evaluation.log"):  nil,
 653  					"evaluation.csv": nil,
 654  				},
 655  			})
 656  		}
 657  		{
 658  			languageGolang := &golang.Language{}
 659  			mockedModelID := "mocked-generation-model"
 660  			mockedModel := modeltesting.NewMockCapabilityWriteTestsNamed(t, mockedModelID)
 661  
 662  			validate(t, &testCase{
 663  				Name: "Never solving basic checks leads to exclusion",
 664  
 665  				Before: func(t *testing.T, logger *log.Logger, resultPath string) {
 666  					// Set up mocks, when test is running.
 667  					{
 668  						// Error on every "plain" run.
 669  						generateError(mockedModel)
 670  						generateError(mockedModel)
 671  					}
 672  				},
 673  				After: func(t *testing.T, logger *log.Logger, resultPath string) {
 674  					mockedModel.MockCapabilityWriteTests.AssertNumberOfCalls(t, "WriteTests", 2)
 675  				},
 676  
 677  				Context: &Context{
 678  					Languages: []language.Language{
 679  						&golang.Language{},
 680  					},
 681  
 682  					Models: []evalmodel.Model{
 683  						mockedModel,
 684  					},
 685  
 686  					RepositoryPaths: []string{
 687  						repositoryPlainPath,
 688  						repositoryNextPath,
 689  					},
 690  					TestdataPath: temporaryTestdataPath,
 691  
 692  					Runs: 2,
 693  				},
 694  
 695  				ExpectedAssessments: []*metricstesting.AssessmentTuple{
 696  					&metricstesting.AssessmentTuple{
 697  						Model:          mockedModel,
 698  						Language:       languageGolang,
 699  						RepositoryPath: repositoryPlainPath,
 700  						Task:           evaluatetask.IdentifierWriteTests,
 701  						Assessment: map[metrics.AssessmentKey]uint64{
 702  							metrics.AssessmentKeyFilesExecutedMaximumReachable: 2,
 703  						},
 704  					},
 705  					&metricstesting.AssessmentTuple{
 706  						Model:          mockedModel,
 707  						Language:       languageGolang,
 708  						RepositoryPath: repositoryPlainPath,
 709  						Task:           evaluatetask.IdentifierWriteTestsSymflowerFix,
 710  						Assessment: map[metrics.AssessmentKey]uint64{
 711  							metrics.AssessmentKeyFilesExecutedMaximumReachable: 2,
 712  						},
 713  					},
 714  				},
 715  				ExpectedTotalScore: 0,
 716  				ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
 717  					"evaluation.log": nil,
 718  					filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain", "evaluation.log"): nil,
 719  					"evaluation.csv": nil,
 720  				},
 721  			})
 722  		}
 723  	})
 724  	t.Run("Runs", func(t *testing.T) {
 725  		generateSuccess := func(mockedModel *modeltesting.MockModelCapabilityWriteTests) {
 726  			mockedModel.RegisterGenerateSuccess(t, testFiles["plain"].Path, testFiles["plain"].Content, metricstesting.AssessmentsWithProcessingTime)
 727  		}
 728  		{
 729  			languageGolang := &golang.Language{}
 730  			mockedModelID := "mocked-generation-model"
 731  			mockedModel := modeltesting.NewMockCapabilityWriteTestsNamed(t, mockedModelID)
 732  
 733  			repositoryPath := filepath.Join("golang", "plain")
 734  			validate(t, &testCase{
 735  				Name: "Interleaved",
 736  
 737  				Before: func(t *testing.T, logger *log.Logger, resultPath string) {
 738  					generateSuccess(mockedModel)
 739  				},
 740  
 741  				Context: &Context{
 742  					Languages: []language.Language{
 743  						&golang.Language{},
 744  					},
 745  
 746  					Models: []evalmodel.Model{
 747  						mockedModel,
 748  					},
 749  
 750  					RepositoryPaths: []string{
 751  						repositoryPath,
 752  					},
 753  
 754  					Runs:           3,
 755  					RunsSequential: false,
 756  				},
 757  
 758  				ExpectedAssessments: []*metricstesting.AssessmentTuple{
 759  					&metricstesting.AssessmentTuple{
 760  						Model:          mockedModel,
 761  						Language:       languageGolang,
 762  						RepositoryPath: repositoryPath,
 763  						Task:           evaluatetask.IdentifierWriteTests,
 764  						Assessment: map[metrics.AssessmentKey]uint64{
 765  							metrics.AssessmentKeyCoverage:                      0,
 766  							metrics.AssessmentKeyFilesExecuted:                 3,
 767  							metrics.AssessmentKeyFilesExecutedMaximumReachable: 3,
 768  							metrics.AssessmentKeyResponseNoError:               3,
 769  						},
 770  					},
 771  					&metricstesting.AssessmentTuple{
 772  						Model:          mockedModel,
 773  						Language:       languageGolang,
 774  						RepositoryPath: repositoryPath,
 775  						Task:           evaluatetask.IdentifierWriteTestsSymflowerFix,
 776  						Assessment: map[metrics.AssessmentKey]uint64{
 777  							metrics.AssessmentKeyCoverage:                      0,
 778  							metrics.AssessmentKeyFilesExecuted:                 3,
 779  							metrics.AssessmentKeyFilesExecutedMaximumReachable: 3,
 780  							metrics.AssessmentKeyResponseNoError:               3,
 781  						},
 782  					},
 783  				},
 784  				ExpectedTotalScore: 6,
 785  				ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
 786  					"evaluation.log": nil,
 787  					filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain", "evaluation.log"): nil,
 788  					"evaluation.csv": nil,
 789  				},
 790  				ExpectedOutputValidate: func(t *testing.T, output string, resultPath string) {
 791  					assert.Contains(t, output, "Run 1/3")
 792  					assert.Contains(t, output, "Run 2/3")
 793  					assert.Contains(t, output, "Run 3/3")
 794  					assert.NotRegexp(t, `Run \d+/\d+ for model`, output)
 795  
 796  					assert.Equal(t, 1, strings.Count(output, "Creating temporary repository"), "create only one temporary repository")
 797  				},
 798  			})
 799  		}
 800  		{
 801  			languageGolang := &golang.Language{}
 802  			mockedModelID := "mocked-generation-model"
 803  			mockedModel := modeltesting.NewMockCapabilityWriteTestsNamed(t, mockedModelID)
 804  
 805  			repositoryPath := filepath.Join("golang", "plain")
 806  			validate(t, &testCase{
 807  				Name: "Sequential",
 808  
 809  				Before: func(t *testing.T, logger *log.Logger, resultPath string) {
 810  					generateSuccess(mockedModel)
 811  				},
 812  
 813  				Context: &Context{
 814  					Languages: []language.Language{
 815  						&golang.Language{},
 816  					},
 817  
 818  					Models: []evalmodel.Model{
 819  						mockedModel,
 820  					},
 821  
 822  					RepositoryPaths: []string{
 823  						repositoryPath,
 824  					},
 825  
 826  					Runs:           3,
 827  					RunsSequential: true,
 828  				},
 829  
 830  				ExpectedAssessments: []*metricstesting.AssessmentTuple{
 831  					&metricstesting.AssessmentTuple{
 832  						Model:          mockedModel,
 833  						Language:       languageGolang,
 834  						RepositoryPath: repositoryPath,
 835  						Task:           evaluatetask.IdentifierWriteTests,
 836  						Assessment: map[metrics.AssessmentKey]uint64{
 837  							metrics.AssessmentKeyCoverage:                      0,
 838  							metrics.AssessmentKeyFilesExecuted:                 3,
 839  							metrics.AssessmentKeyFilesExecutedMaximumReachable: 3,
 840  							metrics.AssessmentKeyResponseNoError:               3,
 841  						},
 842  					},
 843  					&metricstesting.AssessmentTuple{
 844  						Model:          mockedModel,
 845  						Language:       languageGolang,
 846  						RepositoryPath: repositoryPath,
 847  						Task:           evaluatetask.IdentifierWriteTestsSymflowerFix,
 848  						Assessment: map[metrics.AssessmentKey]uint64{
 849  							metrics.AssessmentKeyCoverage:                      0,
 850  							metrics.AssessmentKeyFilesExecuted:                 3,
 851  							metrics.AssessmentKeyFilesExecutedMaximumReachable: 3,
 852  							metrics.AssessmentKeyResponseNoError:               3,
 853  						},
 854  					},
 855  				},
 856  				ExpectedTotalScore: 6,
 857  				ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
 858  					"evaluation.log": nil,
 859  					filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain", "evaluation.log"): nil,
 860  					"evaluation.csv": nil,
 861  				},
 862  				ExpectedOutputValidate: func(t *testing.T, output string, resultPath string) {
 863  					assert.Contains(t, output, "Run 1/3 for model")
 864  					assert.Contains(t, output, "Run 2/3 for model")
 865  					assert.Contains(t, output, "Run 3/3 for model")
 866  					assert.NotRegexp(t, `Run \d+/\d+$`, output)
 867  
 868  					assert.Equal(t, 1, strings.Count(output, "Creating temporary repository"), "create only one temporary repository")
 869  				},
 870  			})
 871  		}
 872  	})
 873  
 874  	t.Run("Preloading", func(t *testing.T) {
 875  		generateSuccess := func(mockedModel *modeltesting.MockModelCapabilityWriteTests) {
 876  			mockedModel.RegisterGenerateSuccess(t, testFiles["plain"].Path, testFiles["plain"].Content, metricstesting.AssessmentsWithProcessingTime)
 877  		}
 878  
 879  		{
 880  			// Setup provider and model mocking.
 881  			languageGolang := &golang.Language{}
 882  			mockedModelID := "testing-provider/testing-model"
 883  			mockedModel := modeltesting.NewMockCapabilityWriteTestsNamed(t, mockedModelID)
 884  			mockedProviderID := "testing-provider"
 885  			mockedProvider := providertesting.NewMockProviderNamedWithModels(t, mockedProviderID, []model.Model{mockedModel})
 886  			mockedLoader := providertesting.NewMockLoader(t)
 887  			embeddedProvider := &struct {
 888  				provider.Provider
 889  				provider.Loader
 890  			}{
 891  				Provider: mockedProvider,
 892  				Loader:   mockedLoader,
 893  			}
 894  			repositoryPath := filepath.Join("golang", "plain")
 895  
 896  			validate(t, &testCase{
 897  				Name: "Once for combined runs",
 898  
 899  				Before: func(t *testing.T, logger *log.Logger, resultPath string) {
 900  					generateSuccess(mockedModel)
 901  					mockedLoader.On("Load", mockedModelID).Return(nil)
 902  					mockedLoader.On("Unload", mockedModelID).Return(nil)
 903  				},
 904  				After: func(t *testing.T, logger *log.Logger, resultPath string) {
 905  					delete(provider.Providers, mockedProviderID)
 906  
 907  					mockedLoader.AssertNumberOfCalls(t, "Load", 1)
 908  					mockedLoader.AssertNumberOfCalls(t, "Unload", 1)
 909  				},
 910  
 911  				Context: &Context{
 912  					Languages: []language.Language{
 913  						languageGolang,
 914  					},
 915  
 916  					Models: []evalmodel.Model{
 917  						mockedModel,
 918  					},
 919  					ProviderForModel: map[evalmodel.Model]provider.Provider{
 920  						mockedModel: embeddedProvider,
 921  					},
 922  
 923  					RepositoryPaths: []string{
 924  						repositoryPath,
 925  					},
 926  
 927  					Runs:           3,
 928  					RunsSequential: true,
 929  				},
 930  
 931  				ExpectedAssessments: []*metricstesting.AssessmentTuple{
 932  					&metricstesting.AssessmentTuple{
 933  						Model:          mockedModel,
 934  						Language:       languageGolang,
 935  						RepositoryPath: repositoryPath,
 936  						Task:           evaluatetask.IdentifierWriteTests,
 937  						Assessment: map[metrics.AssessmentKey]uint64{
 938  							metrics.AssessmentKeyCoverage:                      0,
 939  							metrics.AssessmentKeyFilesExecuted:                 3,
 940  							metrics.AssessmentKeyFilesExecutedMaximumReachable: 3,
 941  							metrics.AssessmentKeyResponseNoError:               3,
 942  						},
 943  					},
 944  					&metricstesting.AssessmentTuple{
 945  						Model:          mockedModel,
 946  						Language:       languageGolang,
 947  						RepositoryPath: repositoryPath,
 948  						Task:           evaluatetask.IdentifierWriteTestsSymflowerFix,
 949  						Assessment: map[metrics.AssessmentKey]uint64{
 950  							metrics.AssessmentKeyCoverage:                      0,
 951  							metrics.AssessmentKeyFilesExecuted:                 3,
 952  							metrics.AssessmentKeyFilesExecutedMaximumReachable: 3,
 953  							metrics.AssessmentKeyResponseNoError:               3,
 954  						},
 955  					},
 956  				},
 957  				ExpectedTotalScore: 6,
 958  				ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
 959  					"evaluation.log": nil,
 960  					filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain", "evaluation.log"): nil,
 961  					"evaluation.csv": nil,
 962  				},
 963  			})
 964  		}
 965  		{
 966  			// Setup provider and model mocking.
 967  			languageGolang := &golang.Language{}
 968  			mockedModelID := "testing-provider/testing-model"
 969  			mockedModel := modeltesting.NewMockCapabilityWriteTestsNamed(t, mockedModelID)
 970  			mockedProviderID := "testing-provider"
 971  			mockedProvider := providertesting.NewMockProviderNamedWithModels(t, mockedProviderID, []model.Model{mockedModel})
 972  			mockedLoader := providertesting.NewMockLoader(t)
 973  			embeddedProvider := &struct {
 974  				provider.Provider
 975  				provider.Loader
 976  			}{
 977  				Provider: mockedProvider,
 978  				Loader:   mockedLoader,
 979  			}
 980  			repositoryPath := filepath.Join("golang", "plain")
 981  			validate(t, &testCase{
 982  				Name: "Multiple times for interleaved runs",
 983  
 984  				Before: func(t *testing.T, logger *log.Logger, resultPath string) {
 985  					generateSuccess(mockedModel)
 986  					mockedLoader.On("Load", mockedModelID).Return(nil)
 987  					mockedLoader.On("Unload", mockedModelID).Return(nil)
 988  				},
 989  				After: func(t *testing.T, logger *log.Logger, resultPath string) {
 990  					delete(provider.Providers, "testing-provider")
 991  
 992  					mockedLoader.AssertNumberOfCalls(t, "Load", 3)
 993  					mockedLoader.AssertNumberOfCalls(t, "Unload", 3)
 994  				},
 995  
 996  				Context: &Context{
 997  					Languages: []language.Language{
 998  						languageGolang,
 999  					},
1000  
1001  					Models: []evalmodel.Model{
1002  						mockedModel,
1003  					},
1004  					ProviderForModel: map[evalmodel.Model]provider.Provider{
1005  						mockedModel: embeddedProvider,
1006  					},
1007  
1008  					RepositoryPaths: []string{
1009  						repositoryPath,
1010  					},
1011  
1012  					Runs: 3,
1013  				},
1014  
1015  				ExpectedAssessments: []*metricstesting.AssessmentTuple{
1016  					&metricstesting.AssessmentTuple{
1017  						Model:          mockedModel,
1018  						Language:       languageGolang,
1019  						RepositoryPath: repositoryPath,
1020  						Task:           evaluatetask.IdentifierWriteTests,
1021  						Assessment: map[metrics.AssessmentKey]uint64{
1022  							metrics.AssessmentKeyCoverage:                      0,
1023  							metrics.AssessmentKeyFilesExecuted:                 3,
1024  							metrics.AssessmentKeyFilesExecutedMaximumReachable: 3,
1025  							metrics.AssessmentKeyResponseNoError:               3,
1026  						},
1027  					},
1028  					&metricstesting.AssessmentTuple{
1029  						Model:          mockedModel,
1030  						Language:       languageGolang,
1031  						RepositoryPath: repositoryPath,
1032  						Task:           evaluatetask.IdentifierWriteTestsSymflowerFix,
1033  						Assessment: map[metrics.AssessmentKey]uint64{
1034  							metrics.AssessmentKeyCoverage:                      0,
1035  							metrics.AssessmentKeyFilesExecuted:                 3,
1036  							metrics.AssessmentKeyFilesExecutedMaximumReachable: 3,
1037  							metrics.AssessmentKeyResponseNoError:               3,
1038  						},
1039  					},
1040  				},
1041  				ExpectedTotalScore: 6,
1042  				ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
1043  					"evaluation.log": nil,
1044  					filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain", "evaluation.log"): nil,
1045  					"evaluation.csv": nil,
1046  				},
1047  			})
1048  		}
1049  	})
1050  	{
1051  		// Setup provider and model mocking.
1052  		languageGolang := &golang.Language{}
1053  		mockedModelID := "testing-provider/testing-model"
1054  		mockedModel := modeltesting.NewMockCapabilityWriteTestsNamed(t, mockedModelID)
1055  
1056  		repositoryPath := filepath.Join("golang", "plain")
1057  
1058  		validate(t, &testCase{
1059  			Name: "Download Go dependencies",
1060  
1061  			Before: func(t *testing.T, logger *log.Logger, resultPath string) {
1062  				mockedModel.RegisterGenerateSuccess(t, testFiles["plain-with-assert"].Path, testFiles["plain-with-assert"].Content, metricstesting.AssessmentsWithProcessingTime)
1063  			},
1064  
1065  			Context: &Context{
1066  				Languages: []language.Language{
1067  					languageGolang,
1068  				},
1069  
1070  				Models: []evalmodel.Model{
1071  					mockedModel,
1072  				},
1073  
1074  				RepositoryPaths: []string{
1075  					repositoryPath,
1076  				},
1077  
1078  				Runs: 1,
1079  			},
1080  
1081  			ExpectedAssessments: []*metricstesting.AssessmentTuple{
1082  				&metricstesting.AssessmentTuple{
1083  					Model:          mockedModel,
1084  					Language:       languageGolang,
1085  					RepositoryPath: repositoryPath,
1086  					Task:           evaluatetask.IdentifierWriteTests,
1087  					Assessment: map[metrics.AssessmentKey]uint64{
1088  						metrics.AssessmentKeyCoverage:                      0,
1089  						metrics.AssessmentKeyFilesExecuted:                 1,
1090  						metrics.AssessmentKeyFilesExecutedMaximumReachable: 1,
1091  						metrics.AssessmentKeyResponseNoError:               1,
1092  					},
1093  				},
1094  				&metricstesting.AssessmentTuple{
1095  					Model:          mockedModel,
1096  					Language:       languageGolang,
1097  					RepositoryPath: repositoryPath,
1098  					Task:           evaluatetask.IdentifierWriteTestsSymflowerFix,
1099  					Assessment: map[metrics.AssessmentKey]uint64{
1100  						metrics.AssessmentKeyCoverage:                      0,
1101  						metrics.AssessmentKeyFilesExecuted:                 1,
1102  						metrics.AssessmentKeyFilesExecutedMaximumReachable: 1,
1103  						metrics.AssessmentKeyResponseNoError:               1,
1104  					},
1105  				},
1106  			},
1107  			ExpectedTotalScore: 2,
1108  			ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
1109  				"evaluation.log": nil,
1110  				filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain", "evaluation.log"): nil,
1111  				"evaluation.csv": nil,
1112  			},
1113  		})
1114  	}
1115  }