radoslavralev commited on
Commit
2fe69a4
·
verified ·
1 Parent(s): 280e0c4

Training in progress, step 5000

Browse files
config.json CHANGED
@@ -4,7 +4,7 @@
4
  ],
5
  "attention_probs_dropout_prob": 0.1,
6
  "classifier_dropout": null,
7
- "dtype": "float32",
8
  "hidden_act": "gelu",
9
  "hidden_dropout_prob": 0.1,
10
  "hidden_size": 384,
 
4
  ],
5
  "attention_probs_dropout_prob": 0.1,
6
  "classifier_dropout": null,
7
+ "dtype": "bfloat16",
8
  "hidden_act": "gelu",
9
  "hidden_dropout_prob": 0.1,
10
  "hidden_size": 384,
eval/Information-Retrieval_evaluation_test_results.csv CHANGED
@@ -6,3 +6,57 @@ epoch,steps,cosine-Accuracy@1,cosine-Precision@1,cosine-Recall@1,cosine-MRR@1,co
6
  31.25,500,0.9659982563208369,0.9659982563208369,0.9659982563208369,0.9659982563208369,0.9834981465528583,0.9796699368805629
7
  0,0,0.966870095902354,0.966870095902354,0.966870095902354,0.966870095902354,0.9836591064647642,0.9798987274369277
8
  31.25,500,0.9659982563208369,0.9659982563208369,0.9659982563208369,0.9659982563208369,0.9834981465528583,0.9796699368805629
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  31.25,500,0.9659982563208369,0.9659982563208369,0.9659982563208369,0.9659982563208369,0.9834981465528583,0.9796699368805629
7
  0,0,0.966870095902354,0.966870095902354,0.966870095902354,0.966870095902354,0.9836591064647642,0.9798987274369277
8
  31.25,500,0.9659982563208369,0.9659982563208369,0.9659982563208369,0.9659982563208369,0.9834981465528583,0.9796699368805629
9
+ 0,0,0.47255729794933654,0.47255729794933654,0.46854895456373136,0.47255729794933654,0.7715172100497037,0.6989648100924787
10
+ 0,0,0.47255729794933654,0.47255729794933654,0.46854895456373136,0.47255729794933654,0.7715172100497037,0.6989648100924787
11
+ 0,0,0.47255729794933654,0.47255729794933654,0.46854895456373136,0.47255729794933654,0.7715172100497037,0.6989648100924787
12
+ 0,0,0.5792781316348196,0.5792781316348196,0.570654358616142,0.5792781316348196,0.8117094152752716,0.7558793537038059
13
+ 0,0,0.5792781316348196,0.5792781316348196,0.570654358616142,0.5792781316348196,0.8117102979737694,0.7558824248326518
14
+ 0,0,0.5792781316348196,0.5792781316348196,0.570654358616142,0.5792781316348196,0.8117094152752716,0.7558793537038059
15
+ 0,0,0.5792781316348196,0.5792781316348196,0.570654358616142,0.5792781316348196,0.8117094152752716,0.7558793537038059
16
+ 0.01958403509459089,500,0.5797027600849257,0.5797027600849257,0.5711214499112588,0.5797027600849257,0.8117465114365338,0.7560109520068754
17
+ 0.03916807018918178,1000,0.579447983014862,0.579447983014862,0.5707298934846705,0.579447983014862,0.8103192235836243,0.755009447689694
18
+ 0.05875210528377267,1500,0.5698513800424628,0.5698513800424628,0.5614786549850244,0.5698513800424628,0.8023545621616985,0.7464109805816145
19
+ 0,0,0.58337852494577,0.58337852494577,0.5659288434285045,0.58337852494577,0.7668793411953839,0.7164626931297606,{1: np.float64(0.3566711936022159)},{1: np.float64(0.1429932394022761)}
20
+ 0,0,0.47255729794933654,0.47255729794933654,0.46854895456373136,0.47255729794933654,0.7715172100497037,0.6989648100924787,{1: np.float64(0.20253299618275594)},{1: np.float64(0.04903260565343626)}
21
+ 0,0,0.5830622204148027,0.5830622204148027,0.565614904314579,0.5830622204148027,0.766769668223301,0.7163126941720972,{1: np.float64(0.35646094419970875)},{1: np.float64(0.142974071960918)}
22
+ 0.009109293301025707,500,0.5835366680222313,0.5835366680222313,0.5661232410368238,0.5835366680222313,0.7669877259605453,0.716537912175589,{1: np.float64(0.3572237146154031)},{1: np.float64(0.14305513124640853)}
23
+ 0.018218586602051414,1000,0.5813338755591704,0.5813338755591704,0.5639675348919451,0.5813338755591704,0.7648309006662681,0.7144763273309307,{1: np.float64(0.357199015295471)},{1: np.float64(0.15009454288359583)}
24
+ 0.02732787990307712,1500,0.573369933577335,0.573369933577335,0.5561490336945298,0.573369933577335,0.7570827527464034,0.7064555815344021,{1: np.float64(0.35666973171836164)},{1: np.float64(0.1669701681559985)}
25
+ 0.03643717320410283,2000,0.555849261217297,0.555849261217297,0.5390725911478758,0.555849261217297,0.7388549607354099,0.688352093298213,{1: np.float64(0.35168882347389874)},{1: np.float64(0.2063956568562195)}
26
+ 0.04554646650512853,2500,0.545818083231666,0.545818083231666,0.5291049552525253,0.545818083231666,0.7271520187765876,0.6769884981704232,{1: np.float64(0.3466937601222253)},{1: np.float64(0.22980178682560468)}
27
+ 0.05465575980615424,3000,0.5384980344313407,0.5384980344313407,0.5218515550446722,0.5384980344313407,0.7191067620796453,0.6690437268450113,{1: np.float64(0.34443670388741765)},{1: np.float64(0.24123200081240848)}
28
+ 0.06376505310717995,3500,0.5330079978310966,0.5330079978310966,0.5165620290404247,0.5330079978310966,0.7131570915365721,0.6633675657515575,{1: np.float64(0.3413543419831487)},{1: np.float64(0.2491110005758098)}
29
+ 0,0,0.5830622204148027,0.5830622204148027,0.565614904314579,0.5830622204148027,0.766769668223301,0.7163126941720972,{1: np.float64(0.35646094419970875)},{1: np.float64(0.142974071960918)}
30
+ 0.006072825321252459,500,0.5829266639555375,0.5829266639555375,0.5655132369701299,0.5829266639555375,0.7667635614240592,0.7162298798522008,{1: np.float64(0.3564917136474307)},{1: np.float64(0.14294170729431657)}
31
+ 0.012145650642504918,1000,0.5818083231665989,0.5818083231665989,0.5644092230217179,0.5818083231665989,0.7651218988165507,0.7148045349503859,{1: np.float64(0.35772282237020625)},{1: np.float64(0.14927653926360696)}
32
+ 0.018218475963757377,1500,0.5733360444625186,0.5733360444625186,0.5561631541590366,0.5733360444625186,0.7576270981869245,0.7068647163357982,{1: np.float64(0.35683846436275146)},{1: np.float64(0.16441309974542384)}
33
+ 0.024291301285009836,2000,0.5570014911210519,0.5570014911210519,0.540258710166447,0.5570014911210519,0.7402393821372082,0.6896165481269393,{1: np.float64(0.3524138105166719)},{1: np.float64(0.2041747864905315)}
34
+ 0.0303641266062623,2500,0.5461569743798292,0.5461569743798292,0.5294706752832515,0.5461569743798292,0.7272840951984587,0.6771925250104588,{1: np.float64(0.3470360881277048)},{1: np.float64(0.2315328629530438)}
35
+ 0,0,0.5874881249780092,0.5874881249780092,0.5695910745521948,0.5874881249780092,0.7723568323569412,0.7217863771368539,{1: np.float64(0.3490081766878099)},{1: np.float64(0.1358367624863631)}
36
+ 0.01639398013049608,500,0.5874177544773231,0.5874177544773231,0.5695558893018517,0.5874177544773231,0.7722280432631579,0.7217320572359883,{1: np.float64(0.3496432089094968)},{1: np.float64(0.13627568199362303)}
37
+ 0.03278796026099216,1000,0.5842862671967911,0.5842862671967911,0.5665366226707312,0.5842862671967911,0.7689851590916371,0.7185314996109159,{1: np.float64(0.35112769299616237)},{1: np.float64(0.14675573942167353)}
38
+ 0.049181940391488245,1500,0.5737658773442174,0.5737658773442174,0.5561168039920548,0.5737658773442174,0.7586886043622868,0.7078365240886835,{1: np.float64(0.3498887145245635)},{1: np.float64(0.17255825103714081)}
39
+ 0,0,0.548650317572336,0.548650317572336,0.529780177773297,0.548650317572336,0.7467559051152127,0.691192638604471,{1: np.float64(0.31983377806645374)},{1: np.float64(0.15293509382911363)}
40
+ 0,0,0.548650317572336,0.548650317572336,0.529780177773297,0.548650317572336,0.7467559051152127,0.691192638604471,{1: np.float64(0.31983377806645374)},{1: np.float64(0.15293509382911363)}
41
+ 0,0,0.548650317572336,0.548650317572336,0.529780177773297,0.548650317572336,0.7467559051152127,0.691192638604471,{1: np.float64(0.31983377806645374)},{1: np.float64(0.15293509382911363)}
42
+ 0,0,0.548650317572336,0.548650317572336,0.529780177773297,0.548650317572336,0.7467559051152127,0.691192638604471,{1: np.float64(0.31983377806645374)},{1: np.float64(0.15293509382911363)}
43
+ 0,0,0.548650317572336,0.548650317572336,0.529780177773297,0.548650317572336,0.7467559051152127,0.691192638604471,{1: np.float64(0.31983377806645374)},{1: np.float64(0.15293509382911363)}
44
+ 0,0,0.548650317572336,0.548650317572336,0.529780177773297,0.548650317572336,0.7467559051152127,0.691192638604471,{1: np.float64(0.31983377806645374)},{1: np.float64(0.15293509382911363)}
45
+ 0,0,0.548650317572336,0.548650317572336,0.529780177773297,0.548650317572336,0.7467559051152127,0.691192638604471,{1: np.float64(0.31983377806645374)},{1: np.float64(0.15293509382911363)}
46
+ 0,0,0.548650317572336,0.548650317572336,0.529780177773297,0.548650317572336,0.7467559051152127,0.691192638604471,{1: np.float64(0.31983377806645374)},{1: np.float64(0.15293509382911363)}
47
+ 0.03075598203850649,500,0.5482092448835568,0.5482092448835568,0.5293611587189568,0.5482092448835568,0.746549748713122,0.6909518109415775,{1: np.float64(0.31978481522914715)},{1: np.float64(0.1527728619713398)}
48
+ 0.06151196407701298,1000,0.5468860268172194,0.5468860268172194,0.5281834946399167,0.5468860268172194,0.7452300750994265,0.6896585296122346,{1: np.float64(0.32255176984149164)},{1: np.float64(0.155631443997813)}
49
+ 0.09226794611551947,1500,0.5433133380381087,0.5433133380381087,0.5245295749739557,0.5433133380381087,0.740010276095208,0.68446200539013,{1: np.float64(0.32347819611365164)},{1: np.float64(0.16889713096852677)}
50
+ 0,0,0.5309191954834157,0.5309191954834157,0.5126434011325066,0.5309191954834157,0.7314305279871582,0.675229160358019,{1: np.float64(0.3015556447544318)},{1: np.float64(0.19690931872370762)}
51
+ 0.061508180588018206,500,0.5368295695130557,0.5368295695130557,0.5185342944517257,0.5368295695130557,0.737679462367251,0.681647826170938,{1: np.float64(0.304561904598063)},{1: np.float64(0.17557766278731865)}
52
+ 0,0,0.5309191954834157,0.5309191954834157,0.5126434011325066,0.5309191954834157,0.7314305279871582,0.675229160358019,{1: np.float64(0.3015556447544318)},{1: np.float64(0.19690931872370762)}
53
+ 0.061508180588018206,500,0.5368295695130557,0.5368295695130557,0.5185342944517257,0.5368295695130557,0.737679462367251,0.681647826170938,{1: np.float64(0.304561904598063)},{1: np.float64(0.17557766278731865)}
54
+ 0.12301636117603641,1000,0.5371824276640791,0.5371824276640791,0.51903307415062,0.5371824276640791,0.7347427602818126,0.6791839306903488,{1: np.float64(0.31651264823410247)},{1: np.float64(0.18745574590743033)}
55
+ 0.1845245417640546,1500,0.5327275935074101,0.5327275935074101,0.5145576566018081,0.5327275935074101,0.7282306154525781,0.6731678099851479,{1: np.float64(0.31953066101258204)},{1: np.float64(0.20099828473883902)}
56
+ 0.24603272235207282,2000,0.5288020465772759,0.5288020465772759,0.5108383111536782,0.5288020465772759,0.7240062624074245,0.6688309997230006,{1: np.float64(0.3200131379453577)},{1: np.float64(0.20416307450794952)}
57
+ 0.307540902940091,2500,0.5241707833450953,0.5241707833450953,0.5064150872063715,0.5241707833450953,0.7192578910730392,0.6643472812947812,{1: np.float64(0.31815988199805734)},{1: np.float64(0.20793381293927593)}
58
+ 0.3690490835281092,3000,0.5187455892731122,0.5187455892731122,0.5014566950633464,0.5187455892731122,0.7153955348318995,0.659967091441903,{1: np.float64(0.31591600543731346)},{1: np.float64(0.20898358159898273)}
59
+ 0.43055726411612744,3500,0.5165843330980946,0.5165843330980946,0.499185170716134,0.5165843330980946,0.7123742161083797,0.657103379541812,{1: np.float64(0.3142519028136429)},{1: np.float64(0.21191152346716996)}
60
+ 0.49206544470414565,4000,0.5136291460832745,0.5136291460832745,0.49609913213697615,0.5136291460832745,0.7094132063717694,0.6539726048972176,{1: np.float64(0.3119784463901498)},{1: np.float64(0.21389626227951036)}
61
+ 0.5535736252921638,4500,0.5121736062103035,0.5121736062103035,0.49458110696642804,0.5121736062103035,0.7075051193793919,0.6522760525794585,{1: np.float64(0.31343196470110707)},{1: np.float64(0.21483213049895286)}
62
+ 0.615081805880182,5000,0.5073659139026111,0.5073659139026111,0.48995940030917096,0.5073659139026111,0.704125968555372,0.6486868844121669,{1: np.float64(0.30976968186914705)},{1: np.float64(0.21846349203066784)}
final_metrics.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "test_cosine_accuracy@1": 0.5911401843817787,
3
- "test_cosine_precision@1": 0.5911401843817787,
4
- "test_cosine_recall@1": 0.5739416996352497,
5
- "test_cosine_ndcg@10": 0.7683554258363593,
6
- "test_cosine_mrr@1": 0.5911401843817787,
7
- "test_cosine_map@100": 0.7198000147801676,
8
- "test_cosine_auc_precision_cache_hit_ratio": 0.3630884994626306,
9
- "test_cosine_auc_similarity_distribution": 0.1804964046972829
10
  }
 
1
  {
2
+ "test_cosine_accuracy@1": 0.5488267466478476,
3
+ "test_cosine_precision@1": 0.5488267466478476,
4
+ "test_cosine_recall@1": 0.5299124995799307,
5
+ "test_cosine_ndcg@10": 0.7468543359493904,
6
+ "test_cosine_mrr@1": 0.5488267466478476,
7
+ "test_cosine_map@100": 0.6913077014908864,
8
+ "test_cosine_auc_precision_cache_hit_ratio": 0.3205152119302144,
9
+ "test_cosine_auc_similarity_distribution": 0.15285602105071638
10
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5946d60b741fce8e17e9be626f0c9025f6df3ab055d8fb749d78529a6ded89f7
3
- size 133462128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:551613faa4cf473038ea65dedbd058fb620e0c7f8d94dc653d50e5ee4508fe69
3
+ size 66742184
tokenizer_config.json CHANGED
@@ -43,23 +43,15 @@
43
  },
44
  "clean_up_tokenization_spaces": true,
45
  "cls_token": "[CLS]",
46
- "do_basic_tokenize": true,
47
  "do_lower_case": true,
48
  "extra_special_tokens": {},
49
  "mask_token": "[MASK]",
50
  "max_length": 64,
51
  "model_max_length": 512,
52
- "never_split": null,
53
- "pad_to_multiple_of": null,
54
  "pad_token": "[PAD]",
55
- "pad_token_type_id": 0,
56
- "padding_side": "right",
57
  "sep_token": "[SEP]",
58
- "stride": 0,
59
  "strip_accents": null,
60
  "tokenize_chinese_chars": true,
61
  "tokenizer_class": "BertTokenizer",
62
- "truncation_side": "right",
63
- "truncation_strategy": "longest_first",
64
  "unk_token": "[UNK]"
65
  }
 
43
  },
44
  "clean_up_tokenization_spaces": true,
45
  "cls_token": "[CLS]",
 
46
  "do_lower_case": true,
47
  "extra_special_tokens": {},
48
  "mask_token": "[MASK]",
49
  "max_length": 64,
50
  "model_max_length": 512,
 
 
51
  "pad_token": "[PAD]",
 
 
52
  "sep_token": "[SEP]",
 
53
  "strip_accents": null,
54
  "tokenize_chinese_chars": true,
55
  "tokenizer_class": "BertTokenizer",
 
 
56
  "unk_token": "[UNK]"
57
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd5bf31214aff7bf913e77cc0caf29d55df9595443c8ce1095cc781d9fd80349
3
- size 6289
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4ec5e27753a1494ed86180cda30a69319d61af98b822adc19821142a45f8eae
3
+ size 6353