diff --git a/data/awl/academic_synonyms.json b/data/awl/academic_synonyms.json new file mode 100644 index 0000000000000000000000000000000000000000..674af34b78ecfd276d9f6d176892abc63164e849 --- /dev/null +++ b/data/awl/academic_synonyms.json @@ -0,0 +1,58 @@ +{ + "big": ["substantial", "considerable", "significant"], + "small": ["minimal", "negligible", "marginal"], + "good": ["beneficial", "advantageous", "favourable"], + "bad": ["detrimental", "adverse", "deleterious"], + "show": ["demonstrate", "illustrate", "indicate"], + "use": ["utilise", "employ", "implement"], + "get": ["obtain", "acquire", "procure"], + "give": ["provide", "furnish", "allocate"], + "make": ["construct", "fabricate", "formulate"], + "help": ["facilitate", "assist", "enable"], + "try": ["attempt", "endeavour", "undertake"], + "start": ["commence", "initiate", "inaugurate"], + "end": ["conclude", "terminate", "finalise"], + "need": ["require", "necessitate", "demand"], + "want": ["seek", "desire", "aspire"], + "think": ["consider", "contemplate", "deliberate"], + "find": ["ascertain", "determine", "identify"], + "change": ["modify", "alter", "transform"], + "keep": ["maintain", "retain", "preserve"], + "go up": ["increase", "escalate", "augment"], + "go down": ["decrease", "diminish", "decline"], + "look at": ["examine", "investigate", "scrutinise"], + "talk about": ["discuss", "address", "elaborate"], + "deal with": ["address", "tackle", "confront"], + "carry out": ["conduct", "execute", "perform"], + "point out": ["indicate", "highlight", "emphasise"], + "make sure": ["ensure", "verify", "confirm"], + "come up with": ["develop", "devise", "formulate"], + "find out": ["ascertain", "discover", "determine"], + "set up": ["establish", "institute", "inaugurate"], + "bring about": ["cause", "engender", "precipitate"], + "put forward": ["propose", "submit", "advance"], + "speed up": ["accelerate", "expedite", "hasten"], + "slow down": ["decelerate", "impede", "retard"], + "break down": ["decompose", "analyse", "disintegrate"], + "build up": ["accumulate", "amass", "consolidate"], + "cut down": ["reduce", "curtail", "diminish"], + "leave out": ["omit", "exclude", "disregard"], + "turn into": ["transform", "convert", "transmute"], + "figure out": ["determine", "resolve", "decipher"], + "work out": ["calculate", "determine", "resolve"], + "enough": ["sufficient", "adequate", "ample"], + "about": ["approximately", "roughly", "circa"], + "like": ["similar", "analogous", "comparable"], + "important": ["significant", "crucial", "essential"], + "interesting": ["noteworthy", "compelling", "intriguing"], + "clear": ["evident", "apparent", "unambiguous"], + "hard": ["challenging", "arduous", "demanding"], + "easy": ["straightforward", "facile", "uncomplicated"], + "fast": ["rapid", "expeditious", "swift"], + "many": ["numerous", "manifold", "multitudinous"], + "few": ["scant", "sparse", "limited"], + "new": ["novel", "innovative", "unprecedented"], + "old": ["established", "longstanding", "antiquated"], + "right": ["correct", "accurate", "appropriate"], + "wrong": ["incorrect", "erroneous", "fallacious"] +} diff --git a/graphify-out/cache/003a2522e143af3abe66fbb7c0ae1a3bdccd843b03ce0e1049c5d9ca6cdfbd89.json b/graphify-out/cache/003a2522e143af3abe66fbb7c0ae1a3bdccd843b03ce0e1049c5d9ca6cdfbd89.json new file mode 100644 index 0000000000000000000000000000000000000000..da9d04142730ca5b2136878803f835b4dea6ea26 --- /dev/null +++ b/graphify-out/cache/003a2522e143af3abe66fbb7c0ae1a3bdccd843b03ce0e1049c5d9ca6cdfbd89.json @@ -0,0 +1,242 @@ +{ + "nodes": [ + { + "id": "src_style_emotion_classifier_py", + "label": "emotion_classifier.py", + "file_type": "code", + "source_file": "src/style/emotion_classifier.py" + }, + { + "id": "src_style_emotion_classifier_py_docstring", + "label": "Emotion/register classifier module.\nClassifies text emotional register (neutral,", + "file_type": "rationale", + "source_file": "src/style/emotion_classifier.py" + }, + { + "id": "src_style_emotion_classifier_py_EmotionClassifier", + "label": "EmotionClassifier", + "file_type": "code", + "source_file": "src/style/emotion_classifier.py", + "source_location": "line 11" + }, + { + "id": "src_style_emotion_classifier_py_EmotionClassifier_doc", + "label": "Classifies emotional register of text using keyword-based analysis.", + "file_type": "rationale", + "source_file": "src/style/emotion_classifier.py", + "source_location": "line 11" + }, + { + "id": "src_style_emotion_classifier_py___init__", + "label": "__init__()", + "file_type": "code", + "source_file": "src/style/emotion_classifier.py", + "source_location": "line 46" + }, + { + "id": "src_style_emotion_classifier_py_classify", + "label": "classify()", + "file_type": "code", + "source_file": "src/style/emotion_classifier.py", + "source_location": "line 49" + }, + { + "id": "src_style_emotion_classifier_py_classify_doc", + "label": "Return emotion distribution over register categories.\n\nReturns a dict with keys:", + "file_type": "rationale", + "source_file": "src/style/emotion_classifier.py", + "source_location": "line 49" + } + ], + "edges": [ + { + "source": "src_style_emotion_classifier_py", + "target": "src_style_emotion_classifier_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/emotion_classifier.py", + "weight": 0.5 + }, + { + "source": "src_style_emotion_classifier_py", + "target": "re", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/emotion_classifier.py", + "weight": 0.6 + }, + { + "source": "src_style_emotion_classifier_py", + "target": "typing", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/emotion_classifier.py", + "weight": 0.6 + }, + { + "source": "src_style_emotion_classifier_py", + "target": "src_style_emotion_classifier_py_EmotionClassifier", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/emotion_classifier.py", + "weight": 1.0 + }, + { + "source": "src_style_emotion_classifier_py_EmotionClassifier", + "target": "src_style_emotion_classifier_py_EmotionClassifier_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/emotion_classifier.py", + "weight": 0.5 + }, + { + "source": "src_style_emotion_classifier_py", + "target": "src_style_emotion_classifier_py___init__", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/emotion_classifier.py", + "weight": 1.0 + }, + { + "source": "src_style_emotion_classifier_py", + "target": "src_style_emotion_classifier_py_classify", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/emotion_classifier.py", + "weight": 1.0 + }, + { + "source": "src_style_emotion_classifier_py_classify", + "target": "src_style_emotion_classifier_py_classify_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/emotion_classifier.py", + "weight": 0.5 + }, + { + "source": "src_style_emotion_classifier_py_classify", + "target": "set", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/emotion_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_emotion_classifier_py_classify", + "target": "items", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/emotion_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_emotion_classifier_py_classify", + "target": "count", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/emotion_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_emotion_classifier_py_classify", + "target": "count", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/emotion_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_emotion_classifier_py_classify", + "target": "sum", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/emotion_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_emotion_classifier_py_classify", + "target": "split", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/emotion_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_emotion_classifier_py_classify", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/emotion_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_emotion_classifier_py_classify", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/emotion_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_emotion_classifier_py_classify", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/emotion_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_emotion_classifier_py_classify", + "target": "values", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/emotion_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_emotion_classifier_py_classify", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/emotion_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_emotion_classifier_py_classify", + "target": "items", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/emotion_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_emotion_classifier_py_classify", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/emotion_classifier.py", + "weight": 0.8 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/0216cdc6603f6072eee040b1ab40b110294c1a6bdbed201f38cfd8bf74a5b30a.json b/graphify-out/cache/0216cdc6603f6072eee040b1ab40b110294c1a6bdbed201f38cfd8bf74a5b30a.json new file mode 100644 index 0000000000000000000000000000000000000000..55168bd0642574178cc982310443f2a6f009e4ec --- /dev/null +++ b/graphify-out/cache/0216cdc6603f6072eee040b1ab40b110294c1a6bdbed201f38cfd8bf74a5b30a.json @@ -0,0 +1,308 @@ +{ + "nodes": [ + { + "id": "src_style_style_vector_py", + "label": "style_vector.py", + "file_type": "code", + "source_file": "src/style/style_vector.py" + }, + { + "id": "src_style_style_vector_py_docstring", + "label": "Style vector utilities.\nHelper functions for manipulating, comparing, and persis", + "file_type": "rationale", + "source_file": "src/style/style_vector.py" + }, + { + "id": "src_style_style_vector_py_cosine_similarity", + "label": "cosine_similarity()", + "file_type": "code", + "source_file": "src/style/style_vector.py", + "source_location": "line 11" + }, + { + "id": "src_style_style_vector_py_cosine_similarity_doc", + "label": "Compute cosine similarity between two style vectors.", + "file_type": "rationale", + "source_file": "src/style/style_vector.py", + "source_location": "line 11" + }, + { + "id": "src_style_style_vector_py_average_style_vectors", + "label": "average_style_vectors()", + "file_type": "code", + "source_file": "src/style/style_vector.py", + "source_location": "line 21" + }, + { + "id": "src_style_style_vector_py_average_style_vectors_doc", + "label": "Compute the mean style vector from a list of vectors.", + "file_type": "rationale", + "source_file": "src/style/style_vector.py", + "source_location": "line 21" + }, + { + "id": "src_style_style_vector_py_save_style_vector", + "label": "save_style_vector()", + "file_type": "code", + "source_file": "src/style/style_vector.py", + "source_location": "line 31" + }, + { + "id": "src_style_style_vector_py_save_style_vector_doc", + "label": "Persist a style vector to disk.", + "file_type": "rationale", + "source_file": "src/style/style_vector.py", + "source_location": "line 31" + }, + { + "id": "src_style_style_vector_py_load_style_vector", + "label": "load_style_vector()", + "file_type": "code", + "source_file": "src/style/style_vector.py", + "source_location": "line 36" + }, + { + "id": "src_style_style_vector_py_load_style_vector_doc", + "label": "Load a style vector from disk.", + "file_type": "rationale", + "source_file": "src/style/style_vector.py", + "source_location": "line 36" + } + ], + "edges": [ + { + "source": "src_style_style_vector_py", + "target": "src_style_style_vector_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/style_vector.py", + "weight": 0.5 + }, + { + "source": "src_style_style_vector_py", + "target": "torch", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/style_vector.py", + "weight": 0.6 + }, + { + "source": "src_style_style_vector_py", + "target": "torch.nn.functional", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/style_vector.py", + "weight": 0.6 + }, + { + "source": "src_style_style_vector_py", + "target": "typing", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/style_vector.py", + "weight": 0.6 + }, + { + "source": "src_style_style_vector_py", + "target": "src_style_style_vector_py_cosine_similarity", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/style_vector.py", + "weight": 1.0 + }, + { + "source": "src_style_style_vector_py_cosine_similarity", + "target": "src_style_style_vector_py_cosine_similarity_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/style_vector.py", + "weight": 0.5 + }, + { + "source": "src_style_style_vector_py_cosine_similarity", + "target": "cosine_similarity", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/style_vector.py", + "weight": 0.8 + }, + { + "source": "src_style_style_vector_py_cosine_similarity", + "target": "item", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/style_vector.py", + "weight": 0.8 + }, + { + "source": "src_style_style_vector_py_cosine_similarity", + "target": "dim", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/style_vector.py", + "weight": 0.8 + }, + { + "source": "src_style_style_vector_py_cosine_similarity", + "target": "unsqueeze", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/style_vector.py", + "weight": 0.8 + }, + { + "source": "src_style_style_vector_py_cosine_similarity", + "target": "dim", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/style_vector.py", + "weight": 0.8 + }, + { + "source": "src_style_style_vector_py_cosine_similarity", + "target": "unsqueeze", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/style_vector.py", + "weight": 0.8 + }, + { + "source": "src_style_style_vector_py", + "target": "src_style_style_vector_py_average_style_vectors", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/style_vector.py", + "weight": 1.0 + }, + { + "source": "src_style_style_vector_py_average_style_vectors", + "target": "src_style_style_vector_py_average_style_vectors_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/style_vector.py", + "weight": 0.5 + }, + { + "source": "src_style_style_vector_py_average_style_vectors", + "target": "stack", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/style_vector.py", + "weight": 0.8 + }, + { + "source": "src_style_style_vector_py_average_style_vectors", + "target": "mean", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/style_vector.py", + "weight": 0.8 + }, + { + "source": "src_style_style_vector_py_average_style_vectors", + "target": "normalize", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/style_vector.py", + "weight": 0.8 + }, + { + "source": "src_style_style_vector_py_average_style_vectors", + "target": "ValueError", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/style_vector.py", + "weight": 0.8 + }, + { + "source": "src_style_style_vector_py", + "target": "src_style_style_vector_py_save_style_vector", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/style_vector.py", + "weight": 1.0 + }, + { + "source": "src_style_style_vector_py_save_style_vector", + "target": "src_style_style_vector_py_save_style_vector_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/style_vector.py", + "weight": 0.5 + }, + { + "source": "src_style_style_vector_py_save_style_vector", + "target": "save", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/style_vector.py", + "weight": 0.8 + }, + { + "source": "src_style_style_vector_py_save_style_vector", + "target": "cpu", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/style_vector.py", + "weight": 0.8 + }, + { + "source": "src_style_style_vector_py_save_style_vector", + "target": "detach", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/style_vector.py", + "weight": 0.8 + }, + { + "source": "src_style_style_vector_py", + "target": "src_style_style_vector_py_load_style_vector", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/style_vector.py", + "weight": 1.0 + }, + { + "source": "src_style_style_vector_py_load_style_vector", + "target": "src_style_style_vector_py_load_style_vector_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/style_vector.py", + "weight": 0.5 + }, + { + "source": "src_style_style_vector_py_load_style_vector", + "target": "load", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/style_vector.py", + "weight": 0.8 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/04675d5f119414e13cde3cbaae0272fd91eb211e37c2977785c302f64829c916.json b/graphify-out/cache/04675d5f119414e13cde3cbaae0272fd91eb211e37c2977785c302f64829c916.json new file mode 100644 index 0000000000000000000000000000000000000000..c24ad75d5aee9a85f28f3f9928312591df5ec37f --- /dev/null +++ b/graphify-out/cache/04675d5f119414e13cde3cbaae0272fd91eb211e37c2977785c302f64829c916.json @@ -0,0 +1,784 @@ +{ + "nodes": [ + { + "id": "src_vocabulary_lexical_substitution_py", + "label": "lexical_substitution.py", + "file_type": "code", + "source_file": "src/vocabulary/lexical_substitution.py" + }, + { + "id": "src_vocabulary_lexical_substitution_py_docstring", + "label": "Post-generation academic vocabulary elevation module.\n\nPipeline:\n1. POS-tag the ", + "file_type": "rationale", + "source_file": "src/vocabulary/lexical_substitution.py" + }, + { + "id": "src_vocabulary_lexical_substitution_py_LexicalElevator", + "label": "LexicalElevator", + "file_type": "code", + "source_file": "src/vocabulary/lexical_substitution.py", + "source_location": "line 29" + }, + { + "id": "src_vocabulary_lexical_substitution_py_LexicalElevator_doc", + "label": "Elevates vocabulary to academic register using BERT-based substitution.", + "file_type": "rationale", + "source_file": "src/vocabulary/lexical_substitution.py", + "source_location": "line 29" + }, + { + "id": "src_vocabulary_lexical_substitution_py_RegisterFilter", + "label": "RegisterFilter", + "file_type": "code", + "source_file": "src/vocabulary/lexical_substitution.py", + "source_location": "line 178" + }, + { + "id": "src_vocabulary_lexical_substitution_py_RegisterFilter_doc", + "label": "Applies register-level corrections to ensure academic tone:\n- Converts contracti", + "file_type": "rationale", + "source_file": "src/vocabulary/lexical_substitution.py", + "source_location": "line 178" + }, + { + "id": "src_vocabulary_lexical_substitution_py___init__", + "label": "__init__()", + "file_type": "code", + "source_file": "src/vocabulary/lexical_substitution.py", + "source_location": "line 36" + }, + { + "id": "src_vocabulary_lexical_substitution_py__sem_similarity", + "label": "_sem_similarity()", + "file_type": "code", + "source_file": "src/vocabulary/lexical_substitution.py", + "source_location": "line 67" + }, + { + "id": "src_vocabulary_lexical_substitution_py__sem_similarity_doc", + "label": "Compute contextual semantic similarity using sentence embeddings.", + "file_type": "rationale", + "source_file": "src/vocabulary/lexical_substitution.py", + "source_location": "line 67" + }, + { + "id": "src_vocabulary_lexical_substitution_py__get_awl_substitutions", + "label": "_get_awl_substitutions()", + "file_type": "code", + "source_file": "src/vocabulary/lexical_substitution.py", + "source_location": "line 77" + }, + { + "id": "src_vocabulary_lexical_substitution_py__get_awl_substitutions_doc", + "label": "Generate candidate AWL substitutions using BERT fill-mask.", + "file_type": "rationale", + "source_file": "src/vocabulary/lexical_substitution.py", + "source_location": "line 77" + }, + { + "id": "src_vocabulary_lexical_substitution_py_elevate", + "label": "elevate()", + "file_type": "code", + "source_file": "src/vocabulary/lexical_substitution.py", + "source_location": "line 108" + }, + { + "id": "src_vocabulary_lexical_substitution_py_elevate_doc", + "label": "Main entry point: elevates vocabulary to academic register.", + "file_type": "rationale", + "source_file": "src/vocabulary/lexical_substitution.py", + "source_location": "line 108" + }, + { + "id": "src_vocabulary_lexical_substitution_py_apply", + "label": "apply()", + "file_type": "code", + "source_file": "src/vocabulary/lexical_substitution.py", + "source_location": "line 222" + }, + { + "id": "src_vocabulary_lexical_substitution_py_apply_doc", + "label": "Apply contraction expansion and colloquial-to-academic substitution.", + "file_type": "rationale", + "source_file": "src/vocabulary/lexical_substitution.py", + "source_location": "line 222" + } + ], + "edges": [ + { + "source": "src_vocabulary_lexical_substitution_py", + "target": "src_vocabulary_lexical_substitution_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.5 + }, + { + "source": "src_vocabulary_lexical_substitution_py", + "target": "spacy", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.6 + }, + { + "source": "src_vocabulary_lexical_substitution_py", + "target": "torch", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.6 + }, + { + "source": "src_vocabulary_lexical_substitution_py", + "target": "transformers", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.6 + }, + { + "source": "src_vocabulary_lexical_substitution_py", + "target": "sentence_transformers", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.6 + }, + { + "source": "src_vocabulary_lexical_substitution_py", + "target": "torch.nn.functional", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.6 + }, + { + "source": "src_vocabulary_lexical_substitution_py", + "target": "typing", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.6 + }, + { + "source": "src_vocabulary_lexical_substitution_py", + "target": "awl_loader", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.6 + }, + { + "source": "src_vocabulary_lexical_substitution_py", + "target": "loguru", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.6 + }, + { + "source": "src_vocabulary_lexical_substitution_py", + "target": "re", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.6 + }, + { + "source": "src_vocabulary_lexical_substitution_py", + "target": "src_vocabulary_lexical_substitution_py_LexicalElevator", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 1.0 + }, + { + "source": "src_vocabulary_lexical_substitution_py_LexicalElevator", + "target": "src_vocabulary_lexical_substitution_py_LexicalElevator_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.5 + }, + { + "source": "src_vocabulary_lexical_substitution_py", + "target": "src_vocabulary_lexical_substitution_py_RegisterFilter", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 1.0 + }, + { + "source": "src_vocabulary_lexical_substitution_py_RegisterFilter", + "target": "src_vocabulary_lexical_substitution_py_RegisterFilter_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.5 + }, + { + "source": "src_vocabulary_lexical_substitution_py", + "target": "src_vocabulary_lexical_substitution_py___init__", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 1.0 + }, + { + "source": "src_vocabulary_lexical_substitution_py___init__", + "target": "AWLLoader", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py___init__", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py___init__", + "target": "hf_pipeline", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py___init__", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py___init__", + "target": "SentenceTransformer", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py___init__", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py___init__", + "target": "load", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py___init__", + "target": "warning", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py___init__", + "target": "load", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py", + "target": "src_vocabulary_lexical_substitution_py__sem_similarity", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 1.0 + }, + { + "source": "src_vocabulary_lexical_substitution_py__sem_similarity", + "target": "src_vocabulary_lexical_substitution_py__sem_similarity_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.5 + }, + { + "source": "src_vocabulary_lexical_substitution_py__sem_similarity", + "target": "replace", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py__sem_similarity", + "target": "replace", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py__sem_similarity", + "target": "encode", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py__sem_similarity", + "target": "cosine_similarity", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py__sem_similarity", + "target": "item", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py__sem_similarity", + "target": "unsqueeze", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py__sem_similarity", + "target": "unsqueeze", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py", + "target": "src_vocabulary_lexical_substitution_py__get_awl_substitutions", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 1.0 + }, + { + "source": "src_vocabulary_lexical_substitution_py__get_awl_substitutions", + "target": "src_vocabulary_lexical_substitution_py__get_awl_substitutions_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.5 + }, + { + "source": "src_vocabulary_lexical_substitution_py__get_awl_substitutions", + "target": "replace", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py__get_awl_substitutions", + "target": "fill_mask", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py__get_awl_substitutions", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py__get_awl_substitutions", + "target": "debug", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py__get_awl_substitutions", + "target": "is_academic", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py__get_awl_substitutions", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py__get_awl_substitutions", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py__get_awl_substitutions", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py__get_awl_substitutions", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py", + "target": "src_vocabulary_lexical_substitution_py_elevate", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 1.0 + }, + { + "source": "src_vocabulary_lexical_substitution_py_elevate", + "target": "src_vocabulary_lexical_substitution_py_elevate_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.5 + }, + { + "source": "src_vocabulary_lexical_substitution_py_elevate", + "target": "nlp", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py_elevate", + "target": "set", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py_elevate", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py_elevate", + "target": "is_academic", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py_elevate", + "target": "any", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py_elevate", + "target": "_get_awl_substitutions", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py_elevate", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py_elevate", + "target": "_sem_similarity", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py_elevate", + "target": "isupper", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py_elevate", + "target": "add", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py_elevate", + "target": "replace", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py_elevate", + "target": "debug", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py_elevate", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py_elevate", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py_elevate", + "target": "capitalize", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py_elevate", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py", + "target": "src_vocabulary_lexical_substitution_py_apply", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 1.0 + }, + { + "source": "src_vocabulary_lexical_substitution_py_apply", + "target": "src_vocabulary_lexical_substitution_py_apply_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.5 + }, + { + "source": "src_vocabulary_lexical_substitution_py_apply", + "target": "items", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py_apply", + "target": "sorted", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py_apply", + "target": "compile", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py_apply", + "target": "sub", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py_apply", + "target": "items", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py_apply", + "target": "compile", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py_apply", + "target": "sub", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py_apply", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py_apply", + "target": "escape", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py_apply", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_lexical_substitution_py_apply", + "target": "escape", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/lexical_substitution.py", + "weight": 0.8 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/0951c000112fab36f41eed8f80aad285a1f2227a1ced83ea49ec7f5d77f4c38d.json b/graphify-out/cache/0951c000112fab36f41eed8f80aad285a1f2227a1ced83ea49ec7f5d77f4c38d.json new file mode 100644 index 0000000000000000000000000000000000000000..480b77ed23262f3b1b6c7bf5d9eea7e5ec839c9c --- /dev/null +++ b/graphify-out/cache/0951c000112fab36f41eed8f80aad285a1f2227a1ced83ea49ec7f5d77f4c38d.json @@ -0,0 +1,932 @@ +{ + "nodes": [ + { + "id": "scripts_pretrain_human_pattern_classifier_py", + "label": "pretrain_human_pattern_classifier.py", + "file_type": "code", + "source_file": "scripts/pretrain_human_pattern_classifier.py" + }, + { + "id": "scripts_pretrain_human_pattern_classifier_py_docstring", + "label": "Pre-trains the HumanPatternClassifier on both Kaggle datasets.\nRun this BEFORE t", + "file_type": "rationale", + "source_file": "scripts/pretrain_human_pattern_classifier.py" + }, + { + "id": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "label": "train_classifier()", + "file_type": "code", + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "source_location": "line 32" + }, + { + "id": "scripts_pretrain_human_pattern_classifier_py_train_classifier_doc", + "label": "Pre-train the human pattern classifier on Kaggle datasets.", + "file_type": "rationale", + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "source_location": "line 32" + } + ], + "edges": [ + { + "source": "scripts_pretrain_human_pattern_classifier_py", + "target": "scripts_pretrain_human_pattern_classifier_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.5 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py", + "target": "torch", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.6 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py", + "target": "torch.nn", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.6 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py", + "target": "torch.utils.data", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.6 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py", + "target": "sklearn.metrics", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.6 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py", + "target": "numpy", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.6 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py", + "target": "loguru", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.6 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py", + "target": "os", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.6 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py", + "target": "yaml", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.6 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py", + "target": "src.training.human_pattern_extractor", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.6 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py", + "target": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 1.0 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "scripts_pretrain_human_pattern_classifier_py_train_classifier_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.5 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "HumanPatternFeatureExtractor", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "KaggleHumanPatternDataset", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "precompute_features", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "int", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "random_split", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "DataLoader", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "DataLoader", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "HumanPatternClassifier", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "to", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "AdamW", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "CosineAnnealingLR", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "BCELoss", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "makedirs", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "range", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "open", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "safe_load", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "init", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "error", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "parameters", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "train", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "step", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "eval", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "accuracy_score", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "accuracy_score", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "finish", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "manual_seed", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "to", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "to", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "zero_grad", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "classifier", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "criterion", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "backward", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "clip_grad_norm_", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "step", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "extend", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "extend", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "no_grad", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "roc_auc_score", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "roc_auc_score", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "log", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "save", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "parameters", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "item", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "size", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "numpy", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "numpy", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "to", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "to", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "classifier", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "criterion", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "extend", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "extend", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "state_dict", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "Generator", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "float", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "item", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "size", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "numpy", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "numpy", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "cpu", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "cpu", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "float", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "get_last_lr", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "cpu", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "cpu", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py_train_classifier", + "target": "detach", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.8 + }, + { + "source": "scripts_pretrain_human_pattern_classifier_py", + "target": "wandb", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/pretrain_human_pattern_classifier.py", + "weight": 0.6 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/09d76b207e9eedddce45a975328e44abf4b845816499295145b814fccf4ef1c4.json b/graphify-out/cache/09d76b207e9eedddce45a975328e44abf4b845816499295145b814fccf4ef1c4.json new file mode 100644 index 0000000000000000000000000000000000000000..416f682d074ad233bd1ffa5e4995aab75e3ff695 --- /dev/null +++ b/graphify-out/cache/09d76b207e9eedddce45a975328e44abf4b845816499295145b814fccf4ef1c4.json @@ -0,0 +1,296 @@ +{ + "nodes": [ + { + "id": "src_evaluation_errant_evaluator_py", + "label": "errant_evaluator.py", + "file_type": "code", + "source_file": "src/evaluation/errant_evaluator.py" + }, + { + "id": "src_evaluation_errant_evaluator_py_docstring", + "label": "ERRANT-based grammatical error evaluation.\nUses the ERRANT toolkit for standardi", + "file_type": "rationale", + "source_file": "src/evaluation/errant_evaluator.py" + }, + { + "id": "src_evaluation_errant_evaluator_py_ERRANTEvaluator", + "label": "ERRANTEvaluator", + "file_type": "code", + "source_file": "src/evaluation/errant_evaluator.py", + "source_location": "line 11" + }, + { + "id": "src_evaluation_errant_evaluator_py_ERRANTEvaluator_doc", + "label": "Evaluates grammar correction quality using ERRANT annotations.", + "file_type": "rationale", + "source_file": "src/evaluation/errant_evaluator.py", + "source_location": "line 11" + }, + { + "id": "src_evaluation_errant_evaluator_py___init__", + "label": "__init__()", + "file_type": "code", + "source_file": "src/evaluation/errant_evaluator.py", + "source_location": "line 14" + }, + { + "id": "src_evaluation_errant_evaluator_py_evaluate", + "label": "evaluate()", + "file_type": "code", + "source_file": "src/evaluation/errant_evaluator.py", + "source_location": "line 23" + }, + { + "id": "src_evaluation_errant_evaluator_py_evaluate_doc", + "label": "Compute ERRANT precision, recall, F0.5.", + "file_type": "rationale", + "source_file": "src/evaluation/errant_evaluator.py", + "source_location": "line 23" + } + ], + "edges": [ + { + "source": "src_evaluation_errant_evaluator_py", + "target": "src_evaluation_errant_evaluator_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/errant_evaluator.py", + "weight": 0.5 + }, + { + "source": "src_evaluation_errant_evaluator_py", + "target": "typing", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/errant_evaluator.py", + "weight": 0.6 + }, + { + "source": "src_evaluation_errant_evaluator_py", + "target": "loguru", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/errant_evaluator.py", + "weight": 0.6 + }, + { + "source": "src_evaluation_errant_evaluator_py", + "target": "src_evaluation_errant_evaluator_py_ERRANTEvaluator", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/errant_evaluator.py", + "weight": 1.0 + }, + { + "source": "src_evaluation_errant_evaluator_py_ERRANTEvaluator", + "target": "src_evaluation_errant_evaluator_py_ERRANTEvaluator_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/errant_evaluator.py", + "weight": 0.5 + }, + { + "source": "src_evaluation_errant_evaluator_py", + "target": "src_evaluation_errant_evaluator_py___init__", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/errant_evaluator.py", + "weight": 1.0 + }, + { + "source": "src_evaluation_errant_evaluator_py___init__", + "target": "load", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/errant_evaluator.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_errant_evaluator_py___init__", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/errant_evaluator.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_errant_evaluator_py___init__", + "target": "warning", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/errant_evaluator.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_errant_evaluator_py", + "target": "src_evaluation_errant_evaluator_py_evaluate", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/errant_evaluator.py", + "weight": 1.0 + }, + { + "source": "src_evaluation_errant_evaluator_py_evaluate", + "target": "src_evaluation_errant_evaluator_py_evaluate_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/errant_evaluator.py", + "weight": 0.5 + }, + { + "source": "src_evaluation_errant_evaluator_py_evaluate", + "target": "zip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/errant_evaluator.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_errant_evaluator_py_evaluate", + "target": "warning", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/errant_evaluator.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_errant_evaluator_py_evaluate", + "target": "parse", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/errant_evaluator.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_errant_evaluator_py_evaluate", + "target": "parse", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/errant_evaluator.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_errant_evaluator_py_evaluate", + "target": "parse", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/errant_evaluator.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_errant_evaluator_py_evaluate", + "target": "annotate", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/errant_evaluator.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_errant_evaluator_py_evaluate", + "target": "annotate", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/errant_evaluator.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_errant_evaluator_py_evaluate", + "target": "set", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/errant_evaluator.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_errant_evaluator_py_evaluate", + "target": "set", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/errant_evaluator.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_errant_evaluator_py_evaluate", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/errant_evaluator.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_errant_evaluator_py_evaluate", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/errant_evaluator.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_errant_evaluator_py_evaluate", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/errant_evaluator.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_errant_evaluator_py_evaluate", + "target": "add", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/errant_evaluator.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_errant_evaluator_py_evaluate", + "target": "add", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/errant_evaluator.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_errant_evaluator_py_evaluate", + "target": "debug", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/errant_evaluator.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_errant_evaluator_py", + "target": "errant", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/errant_evaluator.py", + "weight": 0.6 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/0c2c9518e9cc2f657c34160e0bbd48eb943485365ddf9577cb28f09c10de1232.json b/graphify-out/cache/0c2c9518e9cc2f657c34160e0bbd48eb943485365ddf9577cb28f09c10de1232.json new file mode 100644 index 0000000000000000000000000000000000000000..58a79e05f616fc2d2fb54ea02d108e399e1f3873 --- /dev/null +++ b/graphify-out/cache/0c2c9518e9cc2f657c34160e0bbd48eb943485365ddf9577cb28f09c10de1232.json @@ -0,0 +1,326 @@ +{ + "nodes": [ + { + "id": "tests_test_evaluation_py", + "label": "test_evaluation.py", + "file_type": "code", + "source_file": "tests/test_evaluation.py" + }, + { + "id": "tests_test_evaluation_py_docstring", + "label": "Tests for the evaluation framework.", + "file_type": "rationale", + "source_file": "tests/test_evaluation.py" + }, + { + "id": "tests_test_evaluation_py_test_gleu_scorer_instantiation", + "label": "test_gleu_scorer_instantiation()", + "file_type": "code", + "source_file": "tests/test_evaluation.py", + "source_location": "line 7" + }, + { + "id": "tests_test_evaluation_py_test_gleu_scorer_instantiation_doc", + "label": "Test that GLEU scorer can be created.", + "file_type": "rationale", + "source_file": "tests/test_evaluation.py", + "source_location": "line 7" + }, + { + "id": "tests_test_evaluation_py_test_gleu_perfect_score", + "label": "test_gleu_perfect_score()", + "file_type": "code", + "source_file": "tests/test_evaluation.py", + "source_location": "line 13" + }, + { + "id": "tests_test_evaluation_py_test_gleu_perfect_score_doc", + "label": "Test that identical predictions and references score high.", + "file_type": "rationale", + "source_file": "tests/test_evaluation.py", + "source_location": "line 13" + }, + { + "id": "tests_test_evaluation_py_test_gleu_empty_input", + "label": "test_gleu_empty_input()", + "file_type": "code", + "source_file": "tests/test_evaluation.py", + "source_location": "line 22" + }, + { + "id": "tests_test_evaluation_py_test_gleu_empty_input_doc", + "label": "Test empty input handling.", + "file_type": "rationale", + "source_file": "tests/test_evaluation.py", + "source_location": "line 22" + }, + { + "id": "tests_test_evaluation_py_test_awl_coverage_score", + "label": "test_awl_coverage_score()", + "file_type": "code", + "source_file": "tests/test_evaluation.py", + "source_location": "line 28" + }, + { + "id": "tests_test_evaluation_py_test_awl_coverage_score_doc", + "label": "Test AWL coverage scoring.", + "file_type": "rationale", + "source_file": "tests/test_evaluation.py", + "source_location": "line 28" + } + ], + "edges": [ + { + "source": "tests_test_evaluation_py", + "target": "tests_test_evaluation_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_evaluation.py", + "weight": 0.5 + }, + { + "source": "tests_test_evaluation_py", + "target": "pytest", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_evaluation.py", + "weight": 0.6 + }, + { + "source": "tests_test_evaluation_py", + "target": "src.evaluation.gleu_scorer", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_evaluation.py", + "weight": 0.6 + }, + { + "source": "tests_test_evaluation_py", + "target": "tests_test_evaluation_py_test_gleu_scorer_instantiation", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_evaluation.py", + "weight": 1.0 + }, + { + "source": "tests_test_evaluation_py_test_gleu_scorer_instantiation", + "target": "tests_test_evaluation_py_test_gleu_scorer_instantiation_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_evaluation.py", + "weight": 0.5 + }, + { + "source": "tests_test_evaluation_py_test_gleu_scorer_instantiation", + "target": "GLEUScorer", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_evaluation.py", + "weight": 0.8 + }, + { + "source": "tests_test_evaluation_py", + "target": "tests_test_evaluation_py_test_gleu_perfect_score", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_evaluation.py", + "weight": 1.0 + }, + { + "source": "tests_test_evaluation_py_test_gleu_perfect_score", + "target": "tests_test_evaluation_py_test_gleu_perfect_score_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_evaluation.py", + "weight": 0.5 + }, + { + "source": "tests_test_evaluation_py_test_gleu_perfect_score", + "target": "GLEUScorer", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_evaluation.py", + "weight": 0.8 + }, + { + "source": "tests_test_evaluation_py_test_gleu_perfect_score", + "target": "compute_gleu", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_evaluation.py", + "weight": 0.8 + }, + { + "source": "tests_test_evaluation_py", + "target": "tests_test_evaluation_py_test_gleu_empty_input", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_evaluation.py", + "weight": 1.0 + }, + { + "source": "tests_test_evaluation_py_test_gleu_empty_input", + "target": "tests_test_evaluation_py_test_gleu_empty_input_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_evaluation.py", + "weight": 0.5 + }, + { + "source": "tests_test_evaluation_py_test_gleu_empty_input", + "target": "GLEUScorer", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_evaluation.py", + "weight": 0.8 + }, + { + "source": "tests_test_evaluation_py_test_gleu_empty_input", + "target": "compute_gleu", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_evaluation.py", + "weight": 0.8 + }, + { + "source": "tests_test_evaluation_py", + "target": "tests_test_evaluation_py_test_awl_coverage_score", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_evaluation.py", + "weight": 1.0 + }, + { + "source": "tests_test_evaluation_py_test_awl_coverage_score", + "target": "tests_test_evaluation_py_test_awl_coverage_score_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_evaluation.py", + "weight": 0.5 + }, + { + "source": "tests_test_evaluation_py_test_awl_coverage_score", + "target": "NamedTemporaryFile", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_evaluation.py", + "weight": 0.8 + }, + { + "source": "tests_test_evaluation_py_test_awl_coverage_score", + "target": "write", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_evaluation.py", + "weight": 0.8 + }, + { + "source": "tests_test_evaluation_py_test_awl_coverage_score", + "target": "AWLLoader", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_evaluation.py", + "weight": 0.8 + }, + { + "source": "tests_test_evaluation_py_test_awl_coverage_score", + "target": "StyleFingerprinter", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_evaluation.py", + "weight": 0.8 + }, + { + "source": "tests_test_evaluation_py_test_awl_coverage_score", + "target": "StyleEvaluator", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_evaluation.py", + "weight": 0.8 + }, + { + "source": "tests_test_evaluation_py_test_awl_coverage_score", + "target": "awl_coverage", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_evaluation.py", + "weight": 0.8 + }, + { + "source": "tests_test_evaluation_py_test_awl_coverage_score", + "target": "unlink", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_evaluation.py", + "weight": 0.8 + }, + { + "source": "tests_test_evaluation_py", + "target": "src.vocabulary.awl_loader", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_evaluation.py", + "weight": 0.6 + }, + { + "source": "tests_test_evaluation_py", + "target": "src.style.fingerprinter", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_evaluation.py", + "weight": 0.6 + }, + { + "source": "tests_test_evaluation_py", + "target": "src.evaluation.style_metrics", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_evaluation.py", + "weight": 0.6 + }, + { + "source": "tests_test_evaluation_py", + "target": "tempfile", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_evaluation.py", + "weight": 0.6 + }, + { + "source": "tests_test_evaluation_py", + "target": "os", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_evaluation.py", + "weight": 0.6 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/1dc5e0faf2457334fec7946e96eafe79627d0d52c67c5dc9e70b5f908fde30b6.json b/graphify-out/cache/1dc5e0faf2457334fec7946e96eafe79627d0d52c67c5dc9e70b5f908fde30b6.json new file mode 100644 index 0000000000000000000000000000000000000000..b38e5667513efa6ce56fb310dc827d5d26a1e33e --- /dev/null +++ b/graphify-out/cache/1dc5e0faf2457334fec7946e96eafe79627d0d52c67c5dc9e70b5f908fde30b6.json @@ -0,0 +1,2754 @@ +{ + "nodes": [ + { + "id": "src_training_human_pattern_extractor_py", + "label": "human_pattern_extractor.py", + "file_type": "code", + "source_file": "src/training/human_pattern_extractor.py" + }, + { + "id": "src_training_human_pattern_extractor_py_docstring", + "label": "Extracts the statistical signature of human writing vs AI writing.\nUses Kaggle d", + "file_type": "rationale", + "source_file": "src/training/human_pattern_extractor.py" + }, + { + "id": "src_training_human_pattern_extractor_py__compute_text_features", + "label": "_compute_text_features()", + "file_type": "code", + "source_file": "src/training/human_pattern_extractor.py", + "source_location": "line 68" + }, + { + "id": "src_training_human_pattern_extractor_py__compute_text_features_doc", + "label": "Compute the 16 non-perplexity features from raw text.\nReturns a 16-dim float32 a", + "file_type": "rationale", + "source_file": "src/training/human_pattern_extractor.py", + "source_location": "line 68" + }, + { + "id": "src_training_human_pattern_extractor_py_HumanPatternFeatureExtractor", + "label": "HumanPatternFeatureExtractor", + "file_type": "code", + "source_file": "src/training/human_pattern_extractor.py", + "source_location": "line 150" + }, + { + "id": "src_training_human_pattern_extractor_py_HumanPatternFeatureExtractor_doc", + "label": "Extracts 17-dimensional feature vector encoding human vs AI writing patterns.\n\nO", + "file_type": "rationale", + "source_file": "src/training/human_pattern_extractor.py", + "source_location": "line 150" + }, + { + "id": "src_training_human_pattern_extractor_py_KaggleHumanPatternDataset", + "label": "KaggleHumanPatternDataset", + "file_type": "code", + "source_file": "src/training/human_pattern_extractor.py", + "source_location": "line 367" + }, + { + "id": "src_training_human_pattern_extractor_py_KaggleHumanPatternDataset_doc", + "label": "Loads both Kaggle datasets and produces (feature_vector, label) pairs.\nlabel = 1", + "file_type": "rationale", + "source_file": "src/training/human_pattern_extractor.py", + "source_location": "line 367" + }, + { + "id": "src_training_human_pattern_extractor_py_HumanPatternClassifier", + "label": "HumanPatternClassifier", + "file_type": "code", + "source_file": "src/training/human_pattern_extractor.py", + "source_location": "line 506" + }, + { + "id": "src_training_human_pattern_extractor_py_HumanPatternClassifier_doc", + "label": "Lightweight MLP trained to distinguish human from AI writing.\nInput: feature vec", + "file_type": "rationale", + "source_file": "src/training/human_pattern_extractor.py", + "source_location": "line 506" + }, + { + "id": "src_training_human_pattern_extractor_py___init__", + "label": "__init__()", + "file_type": "code", + "source_file": "src/training/human_pattern_extractor.py", + "source_location": "line 158" + }, + { + "id": "src_training_human_pattern_extractor_py__perplexity", + "label": "_perplexity()", + "file_type": "code", + "source_file": "src/training/human_pattern_extractor.py", + "source_location": "line 184" + }, + { + "id": "src_training_human_pattern_extractor_py__perplexity_doc", + "label": "GPT-2 perplexity for a single text. Lower = more AI-like.", + "file_type": "rationale", + "source_file": "src/training/human_pattern_extractor.py", + "source_location": "line 184" + }, + { + "id": "src_training_human_pattern_extractor_py__perplexity_batch", + "label": "_perplexity_batch()", + "file_type": "code", + "source_file": "src/training/human_pattern_extractor.py", + "source_location": "line 203" + }, + { + "id": "src_training_human_pattern_extractor_py__perplexity_batch_doc", + "label": "Compute GPT-2 perplexity for a batch of texts efficiently on GPU.\n\nProcesses tex", + "file_type": "rationale", + "source_file": "src/training/human_pattern_extractor.py", + "source_location": "line 203" + }, + { + "id": "src_training_human_pattern_extractor_py_extract", + "label": "extract()", + "file_type": "code", + "source_file": "src/training/human_pattern_extractor.py", + "source_location": "line 266" + }, + { + "id": "src_training_human_pattern_extractor_py_extract_doc", + "label": "Extract full 17-dimensional feature vector for a single text.", + "file_type": "rationale", + "source_file": "src/training/human_pattern_extractor.py", + "source_location": "line 266" + }, + { + "id": "src_training_human_pattern_extractor_py_extract_batch", + "label": "extract_batch()", + "file_type": "code", + "source_file": "src/training/human_pattern_extractor.py", + "source_location": "line 284" + }, + { + "id": "src_training_human_pattern_extractor_py_extract_batch_doc", + "label": "Extract features for many texts efficiently.\n\nStrategy:\n 1. Compute perplexity ", + "file_type": "rationale", + "source_file": "src/training/human_pattern_extractor.py", + "source_location": "line 284" + }, + { + "id": "src_training_human_pattern_extractor_py___init__", + "label": "__init__()", + "file_type": "code", + "source_file": "src/training/human_pattern_extractor.py", + "source_location": "line 373" + }, + { + "id": "src_training_human_pattern_extractor_py_precompute_features", + "label": "precompute_features()", + "file_type": "code", + "source_file": "src/training/human_pattern_extractor.py", + "source_location": "line 462" + }, + { + "id": "src_training_human_pattern_extractor_py_precompute_features_doc", + "label": "Pre-compute all features using optimised batched extraction.", + "file_type": "rationale", + "source_file": "src/training/human_pattern_extractor.py", + "source_location": "line 462" + }, + { + "id": "src_training_human_pattern_extractor_py___len__", + "label": "__len__()", + "file_type": "code", + "source_file": "src/training/human_pattern_extractor.py", + "source_location": "line 488" + }, + { + "id": "src_training_human_pattern_extractor_py___getitem__", + "label": "__getitem__()", + "file_type": "code", + "source_file": "src/training/human_pattern_extractor.py", + "source_location": "line 491" + }, + { + "id": "src_training_human_pattern_extractor_py___init__", + "label": "__init__()", + "file_type": "code", + "source_file": "src/training/human_pattern_extractor.py", + "source_location": "line 515" + }, + { + "id": "src_training_human_pattern_extractor_py_forward", + "label": "forward()", + "file_type": "code", + "source_file": "src/training/human_pattern_extractor.py", + "source_location": "line 529" + }, + { + "id": "src_training_human_pattern_extractor_py_forward_doc", + "label": "Returns human-likeness score in [0, 1]. Higher = more human.", + "file_type": "rationale", + "source_file": "src/training/human_pattern_extractor.py", + "source_location": "line 529" + }, + { + "id": "src_training_human_pattern_extractor_py_score", + "label": "score()", + "file_type": "code", + "source_file": "src/training/human_pattern_extractor.py", + "source_location": "line 534" + }, + { + "id": "src_training_human_pattern_extractor_py_score_doc", + "label": "Convenience: score a single text string.", + "file_type": "rationale", + "source_file": "src/training/human_pattern_extractor.py", + "source_location": "line 534" + } + ], + "edges": [ + { + "source": "src_training_human_pattern_extractor_py", + "target": "src_training_human_pattern_extractor_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.5 + }, + { + "source": "src_training_human_pattern_extractor_py", + "target": "pandas", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.6 + }, + { + "source": "src_training_human_pattern_extractor_py", + "target": "numpy", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.6 + }, + { + "source": "src_training_human_pattern_extractor_py", + "target": "torch", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.6 + }, + { + "source": "src_training_human_pattern_extractor_py", + "target": "torch.nn", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.6 + }, + { + "source": "src_training_human_pattern_extractor_py", + "target": "torch.nn.functional", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.6 + }, + { + "source": "src_training_human_pattern_extractor_py", + "target": "torch.utils.data", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.6 + }, + { + "source": "src_training_human_pattern_extractor_py", + "target": "transformers", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.6 + }, + { + "source": "src_training_human_pattern_extractor_py", + "target": "sklearn.model_selection", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.6 + }, + { + "source": "src_training_human_pattern_extractor_py", + "target": "sklearn.preprocessing", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.6 + }, + { + "source": "src_training_human_pattern_extractor_py", + "target": "typing", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.6 + }, + { + "source": "src_training_human_pattern_extractor_py", + "target": "spacy", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.6 + }, + { + "source": "src_training_human_pattern_extractor_py", + "target": "collections", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.6 + }, + { + "source": "src_training_human_pattern_extractor_py", + "target": "math", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.6 + }, + { + "source": "src_training_human_pattern_extractor_py", + "target": "loguru", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.6 + }, + { + "source": "src_training_human_pattern_extractor_py", + "target": "concurrent.futures", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.6 + }, + { + "source": "src_training_human_pattern_extractor_py", + "target": "multiprocessing", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.6 + }, + { + "source": "src_training_human_pattern_extractor_py", + "target": "src_training_human_pattern_extractor_py__compute_text_features", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 1.0 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "src_training_human_pattern_extractor_py__compute_text_features_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.5 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "split", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "max", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "split", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "split", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "set", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "sum", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "set", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "array", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "zeros", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "mean", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "split", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "log1p", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "log1p", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "mean", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "split", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "std", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "split", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "split", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "float", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "tuple", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "count", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "count", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "count", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "range", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "count", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "count", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "std", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "set", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "set", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__compute_text_features", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py", + "target": "src_training_human_pattern_extractor_py_HumanPatternFeatureExtractor", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 1.0 + }, + { + "source": "src_training_human_pattern_extractor_py_HumanPatternFeatureExtractor", + "target": "src_training_human_pattern_extractor_py_HumanPatternFeatureExtractor_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.5 + }, + { + "source": "src_training_human_pattern_extractor_py", + "target": "src_training_human_pattern_extractor_py_KaggleHumanPatternDataset", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 1.0 + }, + { + "source": "src_training_human_pattern_extractor_py_KaggleHumanPatternDataset", + "target": "src_training_human_pattern_extractor_py_KaggleHumanPatternDataset_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.5 + }, + { + "source": "src_training_human_pattern_extractor_py_KaggleHumanPatternDataset", + "target": "Dataset", + "relation": "inherits", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 1.0 + }, + { + "source": "src_training_human_pattern_extractor_py", + "target": "src_training_human_pattern_extractor_py_HumanPatternClassifier", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 1.0 + }, + { + "source": "src_training_human_pattern_extractor_py_HumanPatternClassifier", + "target": "src_training_human_pattern_extractor_py_HumanPatternClassifier_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.5 + }, + { + "source": "src_training_human_pattern_extractor_py_HumanPatternClassifier", + "target": "Module", + "relation": "inherits", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 1.0 + }, + { + "source": "src_training_human_pattern_extractor_py", + "target": "re", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.6 + }, + { + "source": "src_training_human_pattern_extractor_py", + "target": "src_training_human_pattern_extractor_py___init__", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 1.0 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "from_pretrained", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "from_pretrained", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "eval", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "to", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "half", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "is_available", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py", + "target": "src_training_human_pattern_extractor_py__perplexity", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 1.0 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity", + "target": "src_training_human_pattern_extractor_py__perplexity_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.5 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity", + "target": "gpt2_tokenizer", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity", + "target": "to", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity", + "target": "exp", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity", + "target": "size", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity", + "target": "no_grad", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity", + "target": "gpt2_model", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity", + "target": "min", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity", + "target": "item", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity", + "target": "float", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py", + "target": "src_training_human_pattern_extractor_py__perplexity_batch", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 1.0 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity_batch", + "target": "src_training_human_pattern_extractor_py__perplexity_batch_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.5 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity_batch", + "target": "range", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity_batch", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity_batch", + "target": "gpt2_tokenizer", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity_batch", + "target": "to", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity_batch", + "target": "to", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity_batch", + "target": "contiguous", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity_batch", + "target": "contiguous", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity_batch", + "target": "contiguous", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity_batch", + "target": "CrossEntropyLoss", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity_batch", + "target": "view", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity_batch", + "target": "clamp", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity_batch", + "target": "no_grad", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity_batch", + "target": "autocast", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity_batch", + "target": "gpt2_model", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity_batch", + "target": "size", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity_batch", + "target": "float", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity_batch", + "target": "sum", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity_batch", + "target": "exp", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity_batch", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity_batch", + "target": "empty_cache", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity_batch", + "target": "loss_fct", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity_batch", + "target": "sum", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity_batch", + "target": "min", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity_batch", + "target": "view", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity_batch", + "target": "view", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity_batch", + "target": "item", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity_batch", + "target": "size", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity_batch", + "target": "float", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py__perplexity_batch", + "target": "float", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py", + "target": "src_training_human_pattern_extractor_py_extract", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 1.0 + }, + { + "source": "src_training_human_pattern_extractor_py_extract", + "target": "src_training_human_pattern_extractor_py_extract_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.5 + }, + { + "source": "src_training_human_pattern_extractor_py_extract", + "target": "_perplexity", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract", + "target": "_compute_text_features", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract", + "target": "empty", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract", + "target": "zeros", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py", + "target": "src_training_human_pattern_extractor_py_extract_batch", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 1.0 + }, + { + "source": "src_training_human_pattern_extractor_py_extract_batch", + "target": "src_training_human_pattern_extractor_py_extract_batch_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.5 + }, + { + "source": "src_training_human_pattern_extractor_py_extract_batch", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract_batch", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract_batch", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract_batch", + "target": "range", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract_batch", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract_batch", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract_batch", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract_batch", + "target": "empty", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract_batch", + "target": "array", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract_batch", + "target": "array", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract_batch", + "target": "min", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract_batch", + "target": "_perplexity_batch", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract_batch", + "target": "extend", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract_batch", + "target": "min", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract_batch", + "target": "enumerate", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract_batch", + "target": "max", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract_batch", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract_batch", + "target": "cpu_count", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract_batch", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract_batch", + "target": "ProcessPoolExecutor", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract_batch", + "target": "range", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract_batch", + "target": "min", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract_batch", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract_batch", + "target": "_compute_text_features", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract_batch", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract_batch", + "target": "min", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract_batch", + "target": "list", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract_batch", + "target": "extend", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract_batch", + "target": "get_device_properties", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract_batch", + "target": "int", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract_batch", + "target": "max", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract_batch", + "target": "map", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_extract_batch", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py", + "target": "src_training_human_pattern_extractor_py___init__", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 1.0 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "read_csv", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "tolist", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "tolist", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "extend", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "extend", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "extend", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "extend", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "read_parquet", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "tolist", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "tolist", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "extend", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "extend", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "extend", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "extend", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "warning", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "warning", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "warning", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "warning", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "head", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "head", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "head", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "head", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "dropna", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "dropna", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "dropna", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "dropna", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py", + "target": "src_training_human_pattern_extractor_py_precompute_features", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 1.0 + }, + { + "source": "src_training_human_pattern_extractor_py_precompute_features", + "target": "src_training_human_pattern_extractor_py_precompute_features_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.5 + }, + { + "source": "src_training_human_pattern_extractor_py_precompute_features", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_precompute_features", + "target": "extract_batch", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_precompute_features", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_precompute_features", + "target": "str", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_precompute_features", + "target": "range", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_precompute_features", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_precompute_features", + "target": "str", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_precompute_features", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_precompute_features", + "target": "str", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py", + "target": "src_training_human_pattern_extractor_py___len__", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 1.0 + }, + { + "source": "src_training_human_pattern_extractor_py___len__", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py", + "target": "src_training_human_pattern_extractor_py___getitem__", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 1.0 + }, + { + "source": "src_training_human_pattern_extractor_py___getitem__", + "target": "tensor", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___getitem__", + "target": "nan_to_num", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___getitem__", + "target": "extract", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___getitem__", + "target": "str", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py", + "target": "src_training_human_pattern_extractor_py___init__", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 1.0 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "__init__", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "Sequential", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "Linear", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "BatchNorm1d", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "ReLU", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "Dropout", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "Linear", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "BatchNorm1d", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "ReLU", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "Dropout", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "Linear", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py___init__", + "target": "super", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py", + "target": "src_training_human_pattern_extractor_py_forward", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 1.0 + }, + { + "source": "src_training_human_pattern_extractor_py_forward", + "target": "src_training_human_pattern_extractor_py_forward_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.5 + }, + { + "source": "src_training_human_pattern_extractor_py_forward", + "target": "net", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_forward", + "target": "squeeze", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_forward", + "target": "sigmoid", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py", + "target": "src_training_human_pattern_extractor_py_score", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 1.0 + }, + { + "source": "src_training_human_pattern_extractor_py_score", + "target": "src_training_human_pattern_extractor_py_score_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.5 + }, + { + "source": "src_training_human_pattern_extractor_py_score", + "target": "eval", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_score", + "target": "extract", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_score", + "target": "unsqueeze", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_score", + "target": "nan_to_num", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_score", + "target": "item", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_score", + "target": "no_grad", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_score", + "target": "forward", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + }, + { + "source": "src_training_human_pattern_extractor_py_score", + "target": "tensor", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/human_pattern_extractor.py", + "weight": 0.8 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/1f67757f912dc6b8c608d9764d2317281727aed2b6aaa958f0c9d12be6705477.json b/graphify-out/cache/1f67757f912dc6b8c608d9764d2317281727aed2b6aaa958f0c9d12be6705477.json new file mode 100644 index 0000000000000000000000000000000000000000..84195ec4e4b0cd1c602a1a26289111f9d7471442 --- /dev/null +++ b/graphify-out/cache/1f67757f912dc6b8c608d9764d2317281727aed2b6aaa958f0c9d12be6705477.json @@ -0,0 +1,699 @@ +{ + "nodes": [ + { + "id": "src_preprocessing_dyslexia_simulator_py", + "label": "dyslexia_simulator.py", + "file_type": "code", + "source_file": "src/preprocessing/dyslexia_simulator.py" + }, + { + "id": "src_preprocessing_dyslexia_simulator_py_docstring", + "label": "Programmatically generates dyslectic training data from clean text.\nUsed to augm", + "file_type": "rationale", + "source_file": "src/preprocessing/dyslexia_simulator.py" + }, + { + "id": "src_preprocessing_dyslexia_simulator_py_DyslexiaSimulator", + "label": "DyslexiaSimulator", + "file_type": "code", + "source_file": "src/preprocessing/dyslexia_simulator.py", + "source_location": "line 19" + }, + { + "id": "src_preprocessing_dyslexia_simulator_py_DyslexiaSimulator_doc", + "label": "Generates synthetic dyslectic text from clean input for data augmentation.", + "file_type": "rationale", + "source_file": "src/preprocessing/dyslexia_simulator.py", + "source_location": "line 19" + }, + { + "id": "src_preprocessing_dyslexia_simulator_py___init__", + "label": "__init__()", + "file_type": "code", + "source_file": "src/preprocessing/dyslexia_simulator.py", + "source_location": "line 34" + }, + { + "id": "src_preprocessing_dyslexia_simulator_py__transpose_letters", + "label": "_transpose_letters()", + "file_type": "code", + "source_file": "src/preprocessing/dyslexia_simulator.py", + "source_location": "line 38" + }, + { + "id": "src_preprocessing_dyslexia_simulator_py__transpose_letters_doc", + "label": "Swap two adjacent letters.", + "file_type": "rationale", + "source_file": "src/preprocessing/dyslexia_simulator.py", + "source_location": "line 38" + }, + { + "id": "src_preprocessing_dyslexia_simulator_py__omit_letter", + "label": "_omit_letter()", + "file_type": "code", + "source_file": "src/preprocessing/dyslexia_simulator.py", + "source_location": "line 48" + }, + { + "id": "src_preprocessing_dyslexia_simulator_py__omit_letter_doc", + "label": "Remove a random interior letter.", + "file_type": "rationale", + "source_file": "src/preprocessing/dyslexia_simulator.py", + "source_location": "line 48" + }, + { + "id": "src_preprocessing_dyslexia_simulator_py__double_letter", + "label": "_double_letter()", + "file_type": "code", + "source_file": "src/preprocessing/dyslexia_simulator.py", + "source_location": "line 55" + }, + { + "id": "src_preprocessing_dyslexia_simulator_py__double_letter_doc", + "label": "Double a random interior letter.", + "file_type": "rationale", + "source_file": "src/preprocessing/dyslexia_simulator.py", + "source_location": "line 55" + }, + { + "id": "src_preprocessing_dyslexia_simulator_py__reverse_letter", + "label": "_reverse_letter()", + "file_type": "code", + "source_file": "src/preprocessing/dyslexia_simulator.py", + "source_location": "line 62" + }, + { + "id": "src_preprocessing_dyslexia_simulator_py__reverse_letter_doc", + "label": "Swap b/d, p/q style reversals.", + "file_type": "rationale", + "source_file": "src/preprocessing/dyslexia_simulator.py", + "source_location": "line 62" + }, + { + "id": "src_preprocessing_dyslexia_simulator_py_corrupt_word", + "label": "corrupt_word()", + "file_type": "code", + "source_file": "src/preprocessing/dyslexia_simulator.py", + "source_location": "line 77" + }, + { + "id": "src_preprocessing_dyslexia_simulator_py_corrupt_word_doc", + "label": "Apply a single random error to a word.", + "file_type": "rationale", + "source_file": "src/preprocessing/dyslexia_simulator.py", + "source_location": "line 77" + }, + { + "id": "src_preprocessing_dyslexia_simulator_py_simulate", + "label": "simulate()", + "file_type": "code", + "source_file": "src/preprocessing/dyslexia_simulator.py", + "source_location": "line 103" + }, + { + "id": "src_preprocessing_dyslexia_simulator_py_simulate_doc", + "label": "Returns (corrupted_text, clean_text) training pair.", + "file_type": "rationale", + "source_file": "src/preprocessing/dyslexia_simulator.py", + "source_location": "line 103" + } + ], + "edges": [ + { + "source": "src_preprocessing_dyslexia_simulator_py", + "target": "src_preprocessing_dyslexia_simulator_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.5 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py", + "target": "random", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.6 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py", + "target": "re", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.6 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py", + "target": "typing", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.6 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py", + "target": "src_preprocessing_dyslexia_simulator_py_DyslexiaSimulator", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 1.0 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py_DyslexiaSimulator", + "target": "src_preprocessing_dyslexia_simulator_py_DyslexiaSimulator_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.5 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py", + "target": "src_preprocessing_dyslexia_simulator_py___init__", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 1.0 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py___init__", + "target": "Random", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py", + "target": "src_preprocessing_dyslexia_simulator_py__transpose_letters", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 1.0 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py__transpose_letters", + "target": "src_preprocessing_dyslexia_simulator_py__transpose_letters_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.5 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py__transpose_letters", + "target": "randint", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py__transpose_letters", + "target": "list", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py__transpose_letters", + "target": "join", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py__transpose_letters", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py__transpose_letters", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py", + "target": "src_preprocessing_dyslexia_simulator_py__omit_letter", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 1.0 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py__omit_letter", + "target": "src_preprocessing_dyslexia_simulator_py__omit_letter_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.5 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py__omit_letter", + "target": "randint", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py__omit_letter", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py__omit_letter", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py", + "target": "src_preprocessing_dyslexia_simulator_py__double_letter", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 1.0 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py__double_letter", + "target": "src_preprocessing_dyslexia_simulator_py__double_letter_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.5 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py__double_letter", + "target": "randint", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py__double_letter", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py__double_letter", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py", + "target": "src_preprocessing_dyslexia_simulator_py__reverse_letter", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 1.0 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py__reverse_letter", + "target": "src_preprocessing_dyslexia_simulator_py__reverse_letter_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.5 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py__reverse_letter", + "target": "list", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py__reverse_letter", + "target": "enumerate", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py__reverse_letter", + "target": "join", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py__reverse_letter", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py__reverse_letter", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py__reverse_letter", + "target": "isupper", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py__reverse_letter", + "target": "upper", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py", + "target": "src_preprocessing_dyslexia_simulator_py_corrupt_word", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 1.0 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py_corrupt_word", + "target": "src_preprocessing_dyslexia_simulator_py_corrupt_word_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.5 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py_corrupt_word", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py_corrupt_word", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py_corrupt_word", + "target": "choices", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py_corrupt_word", + "target": "_transpose_letters", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py_corrupt_word", + "target": "random", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py_corrupt_word", + "target": "isupper", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py_corrupt_word", + "target": "capitalize", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py_corrupt_word", + "target": "_omit_letter", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py_corrupt_word", + "target": "_double_letter", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py_corrupt_word", + "target": "_reverse_letter", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py", + "target": "src_preprocessing_dyslexia_simulator_py_simulate", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 1.0 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py_simulate", + "target": "src_preprocessing_dyslexia_simulator_py_simulate_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.5 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py_simulate", + "target": "split", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py_simulate", + "target": "join", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py_simulate", + "target": "rstrip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py_simulate", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py_simulate", + "target": "compile", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py_simulate", + "target": "sub", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py_simulate", + "target": "isalpha", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py_simulate", + "target": "corrupt_word", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py_simulate", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py_simulate", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py_simulate", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py_simulate", + "target": "random", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py_simulate", + "target": "escape", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py_simulate", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py_simulate", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dyslexia_simulator_py_simulate", + "target": "random", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dyslexia_simulator.py", + "weight": 0.8 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/28fb0fc510eba409deea9e36897111a36b70c0f8524295b86a2136497ceecb7c.json b/graphify-out/cache/28fb0fc510eba409deea9e36897111a36b70c0f8524295b86a2136497ceecb7c.json new file mode 100644 index 0000000000000000000000000000000000000000..babb64e067d2baab8a1834bf59b92a19c5e85c7f --- /dev/null +++ b/graphify-out/cache/28fb0fc510eba409deea9e36897111a36b70c0f8524295b86a2136497ceecb7c.json @@ -0,0 +1,247 @@ +{ + "nodes": [ + { + "id": "src_model_style_conditioner_py", + "label": "style_conditioner.py", + "file_type": "code", + "source_file": "src/model/style_conditioner.py" + }, + { + "id": "src_model_style_conditioner_py_docstring", + "label": "Injects the style vector into the model via soft prompt conditioning.\nThe style ", + "file_type": "rationale", + "source_file": "src/model/style_conditioner.py" + }, + { + "id": "src_model_style_conditioner_py_StyleConditioner", + "label": "StyleConditioner", + "file_type": "code", + "source_file": "src/model/style_conditioner.py", + "source_location": "line 19" + }, + { + "id": "src_model_style_conditioner_py_StyleConditioner_doc", + "label": "Projects a 512-dim style vector to n_prefix_tokens virtual tokens\nin the model's", + "file_type": "rationale", + "source_file": "src/model/style_conditioner.py", + "source_location": "line 19" + }, + { + "id": "src_model_style_conditioner_py_prepend_style_prefix", + "label": "prepend_style_prefix()", + "file_type": "code", + "source_file": "src/model/style_conditioner.py", + "source_location": "line 61" + }, + { + "id": "src_model_style_conditioner_py_prepend_style_prefix_doc", + "label": "Concatenates style prefix to input embeddings along sequence dimension.\n\nArgs:\n ", + "file_type": "rationale", + "source_file": "src/model/style_conditioner.py", + "source_location": "line 61" + }, + { + "id": "src_model_style_conditioner_py___init__", + "label": "__init__()", + "file_type": "code", + "source_file": "src/model/style_conditioner.py", + "source_location": "line 25" + }, + { + "id": "src_model_style_conditioner_py_forward", + "label": "forward()", + "file_type": "code", + "source_file": "src/model/style_conditioner.py", + "source_location": "line 44" + }, + { + "id": "src_model_style_conditioner_py_forward_doc", + "label": "Args:\n style_vector: [batch_size, 512]\nReturns:\n prefix_embeddings: [batch", + "file_type": "rationale", + "source_file": "src/model/style_conditioner.py", + "source_location": "line 44" + } + ], + "edges": [ + { + "source": "src_model_style_conditioner_py", + "target": "src_model_style_conditioner_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/style_conditioner.py", + "weight": 0.5 + }, + { + "source": "src_model_style_conditioner_py", + "target": "torch", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/style_conditioner.py", + "weight": 0.6 + }, + { + "source": "src_model_style_conditioner_py", + "target": "torch.nn", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/style_conditioner.py", + "weight": 0.6 + }, + { + "source": "src_model_style_conditioner_py", + "target": "src_model_style_conditioner_py_StyleConditioner", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/style_conditioner.py", + "weight": 1.0 + }, + { + "source": "src_model_style_conditioner_py_StyleConditioner", + "target": "src_model_style_conditioner_py_StyleConditioner_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/style_conditioner.py", + "weight": 0.5 + }, + { + "source": "src_model_style_conditioner_py_StyleConditioner", + "target": "Module", + "relation": "inherits", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/style_conditioner.py", + "weight": 1.0 + }, + { + "source": "src_model_style_conditioner_py", + "target": "src_model_style_conditioner_py_prepend_style_prefix", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/style_conditioner.py", + "weight": 1.0 + }, + { + "source": "src_model_style_conditioner_py_prepend_style_prefix", + "target": "src_model_style_conditioner_py_prepend_style_prefix_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/style_conditioner.py", + "weight": 0.5 + }, + { + "source": "src_model_style_conditioner_py_prepend_style_prefix", + "target": "cat", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/style_conditioner.py", + "weight": 0.8 + }, + { + "source": "src_model_style_conditioner_py", + "target": "src_model_style_conditioner_py___init__", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/style_conditioner.py", + "weight": 1.0 + }, + { + "source": "src_model_style_conditioner_py___init__", + "target": "__init__", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/style_conditioner.py", + "weight": 0.8 + }, + { + "source": "src_model_style_conditioner_py___init__", + "target": "Sequential", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/style_conditioner.py", + "weight": 0.8 + }, + { + "source": "src_model_style_conditioner_py___init__", + "target": "Linear", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/style_conditioner.py", + "weight": 0.8 + }, + { + "source": "src_model_style_conditioner_py___init__", + "target": "Tanh", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/style_conditioner.py", + "weight": 0.8 + }, + { + "source": "src_model_style_conditioner_py___init__", + "target": "super", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/style_conditioner.py", + "weight": 0.8 + }, + { + "source": "src_model_style_conditioner_py", + "target": "src_model_style_conditioner_py_forward", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/style_conditioner.py", + "weight": 1.0 + }, + { + "source": "src_model_style_conditioner_py_forward", + "target": "src_model_style_conditioner_py_forward_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/style_conditioner.py", + "weight": 0.5 + }, + { + "source": "src_model_style_conditioner_py_forward", + "target": "projection", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/style_conditioner.py", + "weight": 0.8 + }, + { + "source": "src_model_style_conditioner_py_forward", + "target": "size", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/style_conditioner.py", + "weight": 0.8 + }, + { + "source": "src_model_style_conditioner_py_forward", + "target": "view", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/style_conditioner.py", + "weight": 0.8 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/35b4992a8f35c67300b385cf738998f2d9a6601dfc9b48287fe9e8934a2cf901.json b/graphify-out/cache/35b4992a8f35c67300b385cf738998f2d9a6601dfc9b48287fe9e8934a2cf901.json new file mode 100644 index 0000000000000000000000000000000000000000..7f7b5434d628ce08def7f6c58cbef307110fe7fd --- /dev/null +++ b/graphify-out/cache/35b4992a8f35c67300b385cf738998f2d9a6601dfc9b48287fe9e8934a2cf901.json @@ -0,0 +1,442 @@ +{ + "nodes": [ + { + "id": "src_model_generation_utils_py", + "label": "generation_utils.py", + "file_type": "code", + "source_file": "src/model/generation_utils.py" + }, + { + "id": "src_model_generation_utils_py_docstring", + "label": "Generation utilities for text correction.\nHandles beam search, constrained decod", + "file_type": "rationale", + "source_file": "src/model/generation_utils.py" + }, + { + "id": "src_model_generation_utils_py_generate_correction", + "label": "generate_correction()", + "file_type": "code", + "source_file": "src/model/generation_utils.py", + "source_location": "line 12" + }, + { + "id": "src_model_generation_utils_py_generate_correction_doc", + "label": "Generate corrected text from input tokens.", + "file_type": "rationale", + "source_file": "src/model/generation_utils.py", + "source_location": "line 12" + }, + { + "id": "src_model_generation_utils_py_batch_generate", + "label": "batch_generate()", + "file_type": "code", + "source_file": "src/model/generation_utils.py", + "source_location": "line 48" + }, + { + "id": "src_model_generation_utils_py_batch_generate_doc", + "label": "Generate corrections for a batch of texts.", + "file_type": "rationale", + "source_file": "src/model/generation_utils.py", + "source_location": "line 48" + } + ], + "edges": [ + { + "source": "src_model_generation_utils_py", + "target": "src_model_generation_utils_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/generation_utils.py", + "weight": 0.5 + }, + { + "source": "src_model_generation_utils_py", + "target": "torch", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/generation_utils.py", + "weight": 0.6 + }, + { + "source": "src_model_generation_utils_py", + "target": "transformers", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/generation_utils.py", + "weight": 0.6 + }, + { + "source": "src_model_generation_utils_py", + "target": "typing", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/generation_utils.py", + "weight": 0.6 + }, + { + "source": "src_model_generation_utils_py", + "target": "loguru", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/generation_utils.py", + "weight": 0.6 + }, + { + "source": "src_model_generation_utils_py", + "target": "src_model_generation_utils_py_generate_correction", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/generation_utils.py", + "weight": 1.0 + }, + { + "source": "src_model_generation_utils_py_generate_correction", + "target": "src_model_generation_utils_py_generate_correction_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/generation_utils.py", + "weight": 0.5 + }, + { + "source": "src_model_generation_utils_py_generate_correction", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/generation_utils.py", + "weight": 0.8 + }, + { + "source": "src_model_generation_utils_py_generate_correction", + "target": "decode", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/generation_utils.py", + "weight": 0.8 + }, + { + "source": "src_model_generation_utils_py_generate_correction", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/generation_utils.py", + "weight": 0.8 + }, + { + "source": "src_model_generation_utils_py_generate_correction", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/generation_utils.py", + "weight": 0.8 + }, + { + "source": "src_model_generation_utils_py_generate_correction", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/generation_utils.py", + "weight": 0.8 + }, + { + "source": "src_model_generation_utils_py_generate_correction", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/generation_utils.py", + "weight": 0.8 + }, + { + "source": "src_model_generation_utils_py_generate_correction", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/generation_utils.py", + "weight": 0.8 + }, + { + "source": "src_model_generation_utils_py_generate_correction", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/generation_utils.py", + "weight": 0.8 + }, + { + "source": "src_model_generation_utils_py_generate_correction", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/generation_utils.py", + "weight": 0.8 + }, + { + "source": "src_model_generation_utils_py_generate_correction", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/generation_utils.py", + "weight": 0.8 + }, + { + "source": "src_model_generation_utils_py_generate_correction", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/generation_utils.py", + "weight": 0.8 + }, + { + "source": "src_model_generation_utils_py_generate_correction", + "target": "no_grad", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/generation_utils.py", + "weight": 0.8 + }, + { + "source": "src_model_generation_utils_py_generate_correction", + "target": "generate", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/generation_utils.py", + "weight": 0.8 + }, + { + "source": "src_model_generation_utils_py", + "target": "src_model_generation_utils_py_batch_generate", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/generation_utils.py", + "weight": 1.0 + }, + { + "source": "src_model_generation_utils_py_batch_generate", + "target": "src_model_generation_utils_py_batch_generate_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/generation_utils.py", + "weight": 0.5 + }, + { + "source": "src_model_generation_utils_py_batch_generate", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/generation_utils.py", + "weight": 0.8 + }, + { + "source": "src_model_generation_utils_py_batch_generate", + "target": "range", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/generation_utils.py", + "weight": 0.8 + }, + { + "source": "src_model_generation_utils_py_batch_generate", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/generation_utils.py", + "weight": 0.8 + }, + { + "source": "src_model_generation_utils_py_batch_generate", + "target": "tokenizer", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/generation_utils.py", + "weight": 0.8 + }, + { + "source": "src_model_generation_utils_py_batch_generate", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/generation_utils.py", + "weight": 0.8 + }, + { + "source": "src_model_generation_utils_py_batch_generate", + "target": "debug", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/generation_utils.py", + "weight": 0.8 + }, + { + "source": "src_model_generation_utils_py_batch_generate", + "target": "next", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/generation_utils.py", + "weight": 0.8 + }, + { + "source": "src_model_generation_utils_py_batch_generate", + "target": "to", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/generation_utils.py", + "weight": 0.8 + }, + { + "source": "src_model_generation_utils_py_batch_generate", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/generation_utils.py", + "weight": 0.8 + }, + { + "source": "src_model_generation_utils_py_batch_generate", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/generation_utils.py", + "weight": 0.8 + }, + { + "source": "src_model_generation_utils_py_batch_generate", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/generation_utils.py", + "weight": 0.8 + }, + { + "source": "src_model_generation_utils_py_batch_generate", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/generation_utils.py", + "weight": 0.8 + }, + { + "source": "src_model_generation_utils_py_batch_generate", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/generation_utils.py", + "weight": 0.8 + }, + { + "source": "src_model_generation_utils_py_batch_generate", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/generation_utils.py", + "weight": 0.8 + }, + { + "source": "src_model_generation_utils_py_batch_generate", + "target": "no_grad", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/generation_utils.py", + "weight": 0.8 + }, + { + "source": "src_model_generation_utils_py_batch_generate", + "target": "generate", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/generation_utils.py", + "weight": 0.8 + }, + { + "source": "src_model_generation_utils_py_batch_generate", + "target": "decode", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/generation_utils.py", + "weight": 0.8 + }, + { + "source": "src_model_generation_utils_py_batch_generate", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/generation_utils.py", + "weight": 0.8 + }, + { + "source": "src_model_generation_utils_py_batch_generate", + "target": "parameters", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/generation_utils.py", + "weight": 0.8 + }, + { + "source": "src_model_generation_utils_py_batch_generate", + "target": "items", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/generation_utils.py", + "weight": 0.8 + }, + { + "source": "src_model_generation_utils_py_batch_generate", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/generation_utils.py", + "weight": 0.8 + }, + { + "source": "src_model_generation_utils_py_batch_generate", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/generation_utils.py", + "weight": 0.8 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/37ceb44a0042e984e5257a7c6a645b3be32c234405869e5116f1668d7ad584e2.json b/graphify-out/cache/37ceb44a0042e984e5257a7c6a645b3be32c234405869e5116f1668d7ad584e2.json new file mode 100644 index 0000000000000000000000000000000000000000..88cadedc2d6e72baa806c3d5d84c4434cd9b88e2 --- /dev/null +++ b/graphify-out/cache/37ceb44a0042e984e5257a7c6a645b3be32c234405869e5116f1668d7ad584e2.json @@ -0,0 +1,940 @@ +{ + "nodes": [ + { + "id": "src_training_loss_functions_py", + "label": "loss_functions.py", + "file_type": "code", + "source_file": "src/training/loss_functions.py" + }, + { + "id": "src_training_loss_functions_py_docstring", + "label": "Combined training loss with Human-Pattern Term:\n\nL_total = L_CE + \u03bb\u2081 \u00b7 L_style +", + "file_type": "rationale", + "source_file": "src/training/loss_functions.py" + }, + { + "id": "src_training_loss_functions_py_CombinedCorrectionLoss", + "label": "CombinedCorrectionLoss", + "file_type": "code", + "source_file": "src/training/loss_functions.py", + "source_location": "line 24" + }, + { + "id": "src_training_loss_functions_py_CombinedCorrectionLoss_doc", + "label": "V1 combined loss: L_CE + \u03bb\u2081\u00b7L_style + \u03bb\u2082\u00b7L_semantic.", + "file_type": "rationale", + "source_file": "src/training/loss_functions.py", + "source_location": "line 24" + }, + { + "id": "src_training_loss_functions_py_CombinedCorrectionLossV2", + "label": "CombinedCorrectionLossV2", + "file_type": "code", + "source_file": "src/training/loss_functions.py", + "source_location": "line 119" + }, + { + "id": "src_training_loss_functions_py_CombinedCorrectionLossV2_doc", + "label": "V2 combined loss with human-pattern term: L_CE + \u03bb\u2081\u00b7L_style + \u03bb\u2082\u00b7L_semantic + \u03bb\u2083", + "file_type": "rationale", + "source_file": "src/training/loss_functions.py", + "source_location": "line 119" + }, + { + "id": "src_training_loss_functions_py___init__", + "label": "__init__()", + "file_type": "code", + "source_file": "src/training/loss_functions.py", + "source_location": "line 27" + }, + { + "id": "src_training_loss_functions_py__style_loss", + "label": "_style_loss()", + "file_type": "code", + "source_file": "src/training/loss_functions.py", + "source_location": "line 50" + }, + { + "id": "src_training_loss_functions_py__style_loss_doc", + "label": "1 - cosine_similarity(output_style, target_style).", + "file_type": "rationale", + "source_file": "src/training/loss_functions.py", + "source_location": "line 50" + }, + { + "id": "src_training_loss_functions_py__semantic_loss", + "label": "_semantic_loss()", + "file_type": "code", + "source_file": "src/training/loss_functions.py", + "source_location": "line 63" + }, + { + "id": "src_training_loss_functions_py__semantic_loss_doc", + "label": "Penalises meaning change between input and output.", + "file_type": "rationale", + "source_file": "src/training/loss_functions.py", + "source_location": "line 63" + }, + { + "id": "src_training_loss_functions_py_forward", + "label": "forward()", + "file_type": "code", + "source_file": "src/training/loss_functions.py", + "source_location": "line 77" + }, + { + "id": "src_training_loss_functions_py_forward_doc", + "label": "Compute combined loss.", + "file_type": "rationale", + "source_file": "src/training/loss_functions.py", + "source_location": "line 77" + }, + { + "id": "src_training_loss_functions_py___init__", + "label": "__init__()", + "file_type": "code", + "source_file": "src/training/loss_functions.py", + "source_location": "line 122" + }, + { + "id": "src_training_loss_functions_py__human_pattern_loss", + "label": "_human_pattern_loss()", + "file_type": "code", + "source_file": "src/training/loss_functions.py", + "source_location": "line 162" + }, + { + "id": "src_training_loss_functions_py__human_pattern_loss_doc", + "label": "Loss = 1 - human_score. Penalise AI-like outputs.", + "file_type": "rationale", + "source_file": "src/training/loss_functions.py", + "source_location": "line 162" + }, + { + "id": "src_training_loss_functions_py_forward", + "label": "forward()", + "file_type": "code", + "source_file": "src/training/loss_functions.py", + "source_location": "line 172" + }, + { + "id": "src_training_loss_functions_py_forward_doc", + "label": "Compute combined loss with human pattern term.", + "file_type": "rationale", + "source_file": "src/training/loss_functions.py", + "source_location": "line 172" + } + ], + "edges": [ + { + "source": "src_training_loss_functions_py", + "target": "src_training_loss_functions_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/loss_functions.py", + "weight": 0.5 + }, + { + "source": "src_training_loss_functions_py", + "target": "torch", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/loss_functions.py", + "weight": 0.6 + }, + { + "source": "src_training_loss_functions_py", + "target": "torch.nn", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/loss_functions.py", + "weight": 0.6 + }, + { + "source": "src_training_loss_functions_py", + "target": "torch.nn.functional", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/loss_functions.py", + "weight": 0.6 + }, + { + "source": "src_training_loss_functions_py", + "target": "sentence_transformers", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/loss_functions.py", + "weight": 0.6 + }, + { + "source": "src_training_loss_functions_py", + "target": "typing", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/loss_functions.py", + "weight": 0.6 + }, + { + "source": "src_training_loss_functions_py", + "target": "loguru", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/loss_functions.py", + "weight": 0.6 + }, + { + "source": "src_training_loss_functions_py", + "target": "src_training_loss_functions_py_CombinedCorrectionLoss", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/loss_functions.py", + "weight": 1.0 + }, + { + "source": "src_training_loss_functions_py_CombinedCorrectionLoss", + "target": "src_training_loss_functions_py_CombinedCorrectionLoss_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/loss_functions.py", + "weight": 0.5 + }, + { + "source": "src_training_loss_functions_py_CombinedCorrectionLoss", + "target": "Module", + "relation": "inherits", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/loss_functions.py", + "weight": 1.0 + }, + { + "source": "src_training_loss_functions_py", + "target": "src_training_loss_functions_py_CombinedCorrectionLossV2", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/loss_functions.py", + "weight": 1.0 + }, + { + "source": "src_training_loss_functions_py_CombinedCorrectionLossV2", + "target": "src_training_loss_functions_py_CombinedCorrectionLossV2_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/loss_functions.py", + "weight": 0.5 + }, + { + "source": "src_training_loss_functions_py_CombinedCorrectionLossV2", + "target": "Module", + "relation": "inherits", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/loss_functions.py", + "weight": 1.0 + }, + { + "source": "src_training_loss_functions_py", + "target": "src_training_loss_functions_py___init__", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/loss_functions.py", + "weight": 1.0 + }, + { + "source": "src_training_loss_functions_py___init__", + "target": "__init__", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py___init__", + "target": "CrossEntropyLoss", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py___init__", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py___init__", + "target": "SentenceTransformer", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py___init__", + "target": "eval", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py___init__", + "target": "parameters", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py___init__", + "target": "super", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py", + "target": "src_training_loss_functions_py__style_loss", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/loss_functions.py", + "weight": 1.0 + }, + { + "source": "src_training_loss_functions_py__style_loss", + "target": "src_training_loss_functions_py__style_loss_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/loss_functions.py", + "weight": 0.5 + }, + { + "source": "src_training_loss_functions_py__style_loss", + "target": "cosine_similarity", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py__style_loss", + "target": "mean", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py__style_loss", + "target": "dim", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py__style_loss", + "target": "unsqueeze", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py__style_loss", + "target": "dim", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py__style_loss", + "target": "unsqueeze", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py", + "target": "src_training_loss_functions_py__semantic_loss", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/loss_functions.py", + "weight": 1.0 + }, + { + "source": "src_training_loss_functions_py__semantic_loss", + "target": "src_training_loss_functions_py__semantic_loss_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/loss_functions.py", + "weight": 0.5 + }, + { + "source": "src_training_loss_functions_py__semantic_loss", + "target": "cosine_similarity", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py__semantic_loss", + "target": "mean", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py__semantic_loss", + "target": "no_grad", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py__semantic_loss", + "target": "encode", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py__semantic_loss", + "target": "encode", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py", + "target": "src_training_loss_functions_py_forward", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/loss_functions.py", + "weight": 1.0 + }, + { + "source": "src_training_loss_functions_py_forward", + "target": "src_training_loss_functions_py_forward_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/loss_functions.py", + "weight": 0.5 + }, + { + "source": "src_training_loss_functions_py_forward", + "target": "ce_loss", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py_forward", + "target": "dim", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py_forward", + "target": "view", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py_forward", + "target": "view", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py_forward", + "target": "_style_loss", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py_forward", + "target": "_semantic_loss", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py_forward", + "target": "size", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py", + "target": "src_training_loss_functions_py___init__", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/loss_functions.py", + "weight": 1.0 + }, + { + "source": "src_training_loss_functions_py___init__", + "target": "__init__", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py___init__", + "target": "CrossEntropyLoss", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py___init__", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py___init__", + "target": "SentenceTransformer", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py___init__", + "target": "eval", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py___init__", + "target": "HumanPatternClassifier", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py___init__", + "target": "eval", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py___init__", + "target": "parameters", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py___init__", + "target": "HumanPatternFeatureExtractor", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py___init__", + "target": "load", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py___init__", + "target": "load_state_dict", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py___init__", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py___init__", + "target": "super", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py___init__", + "target": "warning", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py", + "target": "src_training_loss_functions_py__human_pattern_loss", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/loss_functions.py", + "weight": 1.0 + }, + { + "source": "src_training_loss_functions_py__human_pattern_loss", + "target": "src_training_loss_functions_py__human_pattern_loss_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/loss_functions.py", + "weight": 0.5 + }, + { + "source": "src_training_loss_functions_py__human_pattern_loss", + "target": "tensor", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py__human_pattern_loss", + "target": "mean", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py__human_pattern_loss", + "target": "score", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py__human_pattern_loss", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py", + "target": "src_training_loss_functions_py_forward", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/loss_functions.py", + "weight": 1.0 + }, + { + "source": "src_training_loss_functions_py_forward", + "target": "src_training_loss_functions_py_forward_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/loss_functions.py", + "weight": 0.5 + }, + { + "source": "src_training_loss_functions_py_forward", + "target": "ce_loss", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py_forward", + "target": "dim", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py_forward", + "target": "view", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py_forward", + "target": "view", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py_forward", + "target": "to", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py_forward", + "target": "to", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py_forward", + "target": "cosine_similarity", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py_forward", + "target": "mean", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py_forward", + "target": "to", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py_forward", + "target": "to", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py_forward", + "target": "cosine_similarity", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py_forward", + "target": "mean", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py_forward", + "target": "_human_pattern_loss", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py_forward", + "target": "size", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py_forward", + "target": "dim", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py_forward", + "target": "unsqueeze", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py_forward", + "target": "dim", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py_forward", + "target": "unsqueeze", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py_forward", + "target": "no_grad", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py_forward", + "target": "encode", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py_forward", + "target": "encode", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/loss_functions.py", + "weight": 0.8 + }, + { + "source": "src_training_loss_functions_py", + "target": "human_pattern_extractor", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/loss_functions.py", + "weight": 0.6 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/4141eefacb859364c01d21ffbb9fb99f5c0377b0b52b23ddcf35ee1e16202a18.json b/graphify-out/cache/4141eefacb859364c01d21ffbb9fb99f5c0377b0b52b23ddcf35ee1e16202a18.json new file mode 100644 index 0000000000000000000000000000000000000000..a205c3c967c0bef850fc9efbeedf7aa6bcc08804 --- /dev/null +++ b/graphify-out/cache/4141eefacb859364c01d21ffbb9fb99f5c0377b0b52b23ddcf35ee1e16202a18.json @@ -0,0 +1,1528 @@ +{ + "nodes": [ + { + "id": "src_style_fingerprinter_py", + "label": "fingerprinter.py", + "file_type": "code", + "source_file": "src/style/fingerprinter.py" + }, + { + "id": "src_style_fingerprinter_py_docstring", + "label": "Extracts a numerical style vector from any text sample.\nThe style vector encodes", + "file_type": "rationale", + "source_file": "src/style/fingerprinter.py" + }, + { + "id": "src_style_fingerprinter_py_StyleProjectionMLP", + "label": "StyleProjectionMLP", + "file_type": "code", + "source_file": "src/style/fingerprinter.py", + "source_location": "line 63" + }, + { + "id": "src_style_fingerprinter_py_StyleProjectionMLP_doc", + "label": "Projects raw feature vector to 512-dim style embedding.", + "file_type": "rationale", + "source_file": "src/style/fingerprinter.py", + "source_location": "line 63" + }, + { + "id": "src_style_fingerprinter_py_StyleFingerprinter", + "label": "StyleFingerprinter", + "file_type": "code", + "source_file": "src/style/fingerprinter.py", + "source_location": "line 81" + }, + { + "id": "src_style_fingerprinter_py_StyleFingerprinter_doc", + "label": "Extracts style fingerprint vectors from text samples.", + "file_type": "rationale", + "source_file": "src/style/fingerprinter.py", + "source_location": "line 81" + }, + { + "id": "src_style_fingerprinter_py___init__", + "label": "__init__()", + "file_type": "code", + "source_file": "src/style/fingerprinter.py", + "source_location": "line 66" + }, + { + "id": "src_style_fingerprinter_py_forward", + "label": "forward()", + "file_type": "code", + "source_file": "src/style/fingerprinter.py", + "source_location": "line 77" + }, + { + "id": "src_style_fingerprinter_py___init__", + "label": "__init__()", + "file_type": "code", + "source_file": "src/style/fingerprinter.py", + "source_location": "line 84" + }, + { + "id": "src_style_fingerprinter_py__load_awl", + "label": "_load_awl()", + "file_type": "code", + "source_file": "src/style/fingerprinter.py", + "source_location": "line 102" + }, + { + "id": "src_style_fingerprinter_py__load_awl_doc", + "label": "Load Academic Word List from file.", + "file_type": "rationale", + "source_file": "src/style/fingerprinter.py", + "source_location": "line 102" + }, + { + "id": "src_style_fingerprinter_py__passive_voice_ratio", + "label": "_passive_voice_ratio()", + "file_type": "code", + "source_file": "src/style/fingerprinter.py", + "source_location": "line 115" + }, + { + "id": "src_style_fingerprinter_py__passive_voice_ratio_doc", + "label": "Compute ratio of passive voice constructions.", + "file_type": "rationale", + "source_file": "src/style/fingerprinter.py", + "source_location": "line 115" + }, + { + "id": "src_style_fingerprinter_py__avg_dep_tree_depth", + "label": "_avg_dep_tree_depth()", + "file_type": "code", + "source_file": "src/style/fingerprinter.py", + "source_location": "line 128" + }, + { + "id": "src_style_fingerprinter_py__avg_dep_tree_depth_doc", + "label": "Compute average dependency tree depth across all tokens.", + "file_type": "rationale", + "source_file": "src/style/fingerprinter.py", + "source_location": "line 128" + }, + { + "id": "src_style_fingerprinter_py__lexical_density", + "label": "_lexical_density()", + "file_type": "code", + "source_file": "src/style/fingerprinter.py", + "source_location": "line 145" + }, + { + "id": "src_style_fingerprinter_py__lexical_density_doc", + "label": "Compute ratio of content words to total words.", + "file_type": "rationale", + "source_file": "src/style/fingerprinter.py", + "source_location": "line 145" + }, + { + "id": "src_style_fingerprinter_py__count_syllables", + "label": "_count_syllables()", + "file_type": "code", + "source_file": "src/style/fingerprinter.py", + "source_location": "line 160" + }, + { + "id": "src_style_fingerprinter_py__count_syllables_doc", + "label": "Count syllables in a word using a vowel-group heuristic.\nAvoids NLTK cmudict whi", + "file_type": "rationale", + "source_file": "src/style/fingerprinter.py", + "source_location": "line 160" + }, + { + "id": "src_style_fingerprinter_py__avg_syllables_per_word", + "label": "_avg_syllables_per_word()", + "file_type": "code", + "source_file": "src/style/fingerprinter.py", + "source_location": "line 180" + }, + { + "id": "src_style_fingerprinter_py__avg_syllables_per_word_doc", + "label": "Average syllables per word.", + "file_type": "rationale", + "source_file": "src/style/fingerprinter.py", + "source_location": "line 180" + }, + { + "id": "src_style_fingerprinter_py__flesch_reading_ease", + "label": "_flesch_reading_ease()", + "file_type": "code", + "source_file": "src/style/fingerprinter.py", + "source_location": "line 188" + }, + { + "id": "src_style_fingerprinter_py__flesch_reading_ease_doc", + "label": "Compute Flesch Reading Ease score without textstat.\nFormula: 206.835 - 1.015 * A", + "file_type": "rationale", + "source_file": "src/style/fingerprinter.py", + "source_location": "line 188" + }, + { + "id": "src_style_fingerprinter_py_extract_raw_features", + "label": "extract_raw_features()", + "file_type": "code", + "source_file": "src/style/fingerprinter.py", + "source_location": "line 212" + }, + { + "id": "src_style_fingerprinter_py_extract_raw_features_doc", + "label": "Extract ~40 raw style features from text.", + "file_type": "rationale", + "source_file": "src/style/fingerprinter.py", + "source_location": "line 212" + }, + { + "id": "src_style_fingerprinter_py_extract_vector", + "label": "extract_vector()", + "file_type": "code", + "source_file": "src/style/fingerprinter.py", + "source_location": "line 309" + }, + { + "id": "src_style_fingerprinter_py_extract_vector_doc", + "label": "Returns a 512-dim style embedding tensor.", + "file_type": "rationale", + "source_file": "src/style/fingerprinter.py", + "source_location": "line 309" + }, + { + "id": "src_style_fingerprinter_py_blend_vectors", + "label": "blend_vectors()", + "file_type": "code", + "source_file": "src/style/fingerprinter.py", + "source_location": "line 333" + }, + { + "id": "src_style_fingerprinter_py_blend_vectors_doc", + "label": "Blend user style with master copy style.\nalpha = weight given to user's own styl", + "file_type": "rationale", + "source_file": "src/style/fingerprinter.py", + "source_location": "line 333" + }, + { + "id": "src_style_fingerprinter_py__depth", + "label": "_depth()", + "file_type": "code", + "source_file": "src/style/fingerprinter.py", + "source_location": "line 130" + } + ], + "edges": [ + { + "source": "src_style_fingerprinter_py", + "target": "src_style_fingerprinter_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 0.5 + }, + { + "source": "src_style_fingerprinter_py", + "target": "spacy", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 0.6 + }, + { + "source": "src_style_fingerprinter_py", + "target": "numpy", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 0.6 + }, + { + "source": "src_style_fingerprinter_py", + "target": "torch", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 0.6 + }, + { + "source": "src_style_fingerprinter_py", + "target": "torch.nn", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 0.6 + }, + { + "source": "src_style_fingerprinter_py", + "target": "torch.nn.functional", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 0.6 + }, + { + "source": "src_style_fingerprinter_py", + "target": "typing", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 0.6 + }, + { + "source": "src_style_fingerprinter_py", + "target": "scipy", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 0.6 + }, + { + "source": "src_style_fingerprinter_py", + "target": "loguru", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 0.6 + }, + { + "source": "src_style_fingerprinter_py", + "target": "src_style_fingerprinter_py_StyleProjectionMLP", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 1.0 + }, + { + "source": "src_style_fingerprinter_py_StyleProjectionMLP", + "target": "src_style_fingerprinter_py_StyleProjectionMLP_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 0.5 + }, + { + "source": "src_style_fingerprinter_py_StyleProjectionMLP", + "target": "Module", + "relation": "inherits", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 1.0 + }, + { + "source": "src_style_fingerprinter_py", + "target": "src_style_fingerprinter_py_StyleFingerprinter", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 1.0 + }, + { + "source": "src_style_fingerprinter_py_StyleFingerprinter", + "target": "src_style_fingerprinter_py_StyleFingerprinter_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 0.5 + }, + { + "source": "src_style_fingerprinter_py", + "target": "src_style_fingerprinter_py___init__", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 1.0 + }, + { + "source": "src_style_fingerprinter_py___init__", + "target": "__init__", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py___init__", + "target": "Sequential", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py___init__", + "target": "Linear", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py___init__", + "target": "LayerNorm", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py___init__", + "target": "GELU", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py___init__", + "target": "Dropout", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py___init__", + "target": "Linear", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py___init__", + "target": "LayerNorm", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py___init__", + "target": "super", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py", + "target": "src_style_fingerprinter_py_forward", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 1.0 + }, + { + "source": "src_style_fingerprinter_py_forward", + "target": "net", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py", + "target": "src_style_fingerprinter_py___init__", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 1.0 + }, + { + "source": "src_style_fingerprinter_py___init__", + "target": "_load_awl", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py___init__", + "target": "StyleProjectionMLP", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py___init__", + "target": "eval", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py___init__", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py___init__", + "target": "load", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py___init__", + "target": "warning", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py___init__", + "target": "load", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py___init__", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py", + "target": "src_style_fingerprinter_py__load_awl", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 1.0 + }, + { + "source": "src_style_fingerprinter_py__load_awl", + "target": "src_style_fingerprinter_py__load_awl_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 0.5 + }, + { + "source": "src_style_fingerprinter_py__load_awl", + "target": "set", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py__load_awl", + "target": "open", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py__load_awl", + "target": "warning", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py__load_awl", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py__load_awl", + "target": "add", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py__load_awl", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py", + "target": "src_style_fingerprinter_py__passive_voice_ratio", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 1.0 + }, + { + "source": "src_style_fingerprinter_py__passive_voice_ratio", + "target": "src_style_fingerprinter_py__passive_voice_ratio_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 0.5 + }, + { + "source": "src_style_fingerprinter_py", + "target": "src_style_fingerprinter_py__avg_dep_tree_depth", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 1.0 + }, + { + "source": "src_style_fingerprinter_py__avg_dep_tree_depth", + "target": "src_style_fingerprinter_py__avg_dep_tree_depth_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 0.5 + }, + { + "source": "src_style_fingerprinter_py__avg_dep_tree_depth", + "target": "_depth", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py__avg_dep_tree_depth", + "target": "sum", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py__avg_dep_tree_depth", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py", + "target": "src_style_fingerprinter_py__lexical_density", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 1.0 + }, + { + "source": "src_style_fingerprinter_py__lexical_density", + "target": "src_style_fingerprinter_py__lexical_density_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 0.5 + }, + { + "source": "src_style_fingerprinter_py", + "target": "src_style_fingerprinter_py__count_syllables", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 1.0 + }, + { + "source": "src_style_fingerprinter_py__count_syllables", + "target": "src_style_fingerprinter_py__count_syllables_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 0.5 + }, + { + "source": "src_style_fingerprinter_py__count_syllables", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py__count_syllables", + "target": "max", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py__count_syllables", + "target": "endswith", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py__count_syllables", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py", + "target": "src_style_fingerprinter_py__avg_syllables_per_word", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 1.0 + }, + { + "source": "src_style_fingerprinter_py__avg_syllables_per_word", + "target": "src_style_fingerprinter_py__avg_syllables_per_word_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 0.5 + }, + { + "source": "src_style_fingerprinter_py__avg_syllables_per_word", + "target": "sum", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py__avg_syllables_per_word", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py__avg_syllables_per_word", + "target": "_count_syllables", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py", + "target": "src_style_fingerprinter_py__flesch_reading_ease", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 1.0 + }, + { + "source": "src_style_fingerprinter_py__flesch_reading_ease", + "target": "src_style_fingerprinter_py__flesch_reading_ease_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 0.5 + }, + { + "source": "src_style_fingerprinter_py__flesch_reading_ease", + "target": "sum", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py__flesch_reading_ease", + "target": "max", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py__flesch_reading_ease", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py__flesch_reading_ease", + "target": "max", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py__flesch_reading_ease", + "target": "max", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py__flesch_reading_ease", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py__flesch_reading_ease", + "target": "endswith", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py__flesch_reading_ease", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py", + "target": "src_style_fingerprinter_py_extract_raw_features", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 1.0 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "src_style_fingerprinter_py_extract_raw_features_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 0.5 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "nlp", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "max", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "list", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "mean", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "set", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "_passive_voice_ratio", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "sum", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "_avg_dep_tree_depth", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "sum", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "score", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "_lexical_density", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "sum", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "sum", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "sum", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "sum", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "sum", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "sum", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "_avg_syllables_per_word", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "_flesch_reading_ease", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "max", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "std", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "float", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "mean", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "std", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "max", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "count", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "hasattr", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "FormalityClassifier", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "max", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "max", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "min", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "skew", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "range", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "any", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "endswith", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "endswith", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "endswith", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_raw_features", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py", + "target": "src_style_fingerprinter_py_extract_vector", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 1.0 + }, + { + "source": "src_style_fingerprinter_py_extract_vector", + "target": "src_style_fingerprinter_py_extract_vector_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 0.5 + }, + { + "source": "src_style_fingerprinter_py_extract_vector", + "target": "extract_raw_features", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_vector", + "target": "list", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_vector", + "target": "unsqueeze", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_vector", + "target": "normalize", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_vector", + "target": "squeeze", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_vector", + "target": "values", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_vector", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_vector", + "target": "extend", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_vector", + "target": "no_grad", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_vector", + "target": "projection", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_vector", + "target": "tensor", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_extract_vector", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py", + "target": "src_style_fingerprinter_py_blend_vectors", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 1.0 + }, + { + "source": "src_style_fingerprinter_py_blend_vectors", + "target": "src_style_fingerprinter_py_blend_vectors_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 0.5 + }, + { + "source": "src_style_fingerprinter_py_blend_vectors", + "target": "normalize", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py_blend_vectors", + "target": "normalize", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/fingerprinter.py", + "weight": 0.8 + }, + { + "source": "src_style_fingerprinter_py", + "target": "src_style_fingerprinter_py__depth", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 1.0 + }, + { + "source": "src_style_fingerprinter_py", + "target": "formality_classifier", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/fingerprinter.py", + "weight": 0.6 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/429695ce0e95e131f41d76d3637b9ce9db503192bd7d8700620160a8171c3829.json b/graphify-out/cache/429695ce0e95e131f41d76d3637b9ce9db503192bd7d8700620160a8171c3829.json new file mode 100644 index 0000000000000000000000000000000000000000..f25c37721654b67dc7b1238e7d0b0c8ed87c88b7 --- /dev/null +++ b/graphify-out/cache/429695ce0e95e131f41d76d3637b9ce9db503192bd7d8700620160a8171c3829.json @@ -0,0 +1,532 @@ +{ + "nodes": [ + { + "id": "src_vocabulary_awl_loader_py", + "label": "awl_loader.py", + "file_type": "code", + "source_file": "src/vocabulary/awl_loader.py" + }, + { + "id": "src_vocabulary_awl_loader_py_docstring", + "label": "Academic Word List (AWL) loader.\nLoads the Coxhead AWL and supplementary domain-", + "file_type": "rationale", + "source_file": "src/vocabulary/awl_loader.py" + }, + { + "id": "src_vocabulary_awl_loader_py_AWLLoader", + "label": "AWLLoader", + "file_type": "code", + "source_file": "src/vocabulary/awl_loader.py", + "source_location": "line 13" + }, + { + "id": "src_vocabulary_awl_loader_py_AWLLoader_doc", + "label": "Loads and manages Academic Word List data.", + "file_type": "rationale", + "source_file": "src/vocabulary/awl_loader.py", + "source_location": "line 13" + }, + { + "id": "src_vocabulary_awl_loader_py___init__", + "label": "__init__()", + "file_type": "code", + "source_file": "src/vocabulary/awl_loader.py", + "source_location": "line 16" + }, + { + "id": "src_vocabulary_awl_loader_py__load_word_list", + "label": "_load_word_list()", + "file_type": "code", + "source_file": "src/vocabulary/awl_loader.py", + "source_location": "line 39" + }, + { + "id": "src_vocabulary_awl_loader_py__load_word_list_doc", + "label": "Load a word list file into a set of lowercase words.", + "file_type": "rationale", + "source_file": "src/vocabulary/awl_loader.py", + "source_location": "line 39" + }, + { + "id": "src_vocabulary_awl_loader_py__load_synonyms", + "label": "_load_synonyms()", + "file_type": "code", + "source_file": "src/vocabulary/awl_loader.py", + "source_location": "line 52" + }, + { + "id": "src_vocabulary_awl_loader_py__load_synonyms_doc", + "label": "Load academic synonym mappings from JSON.", + "file_type": "rationale", + "source_file": "src/vocabulary/awl_loader.py", + "source_location": "line 52" + }, + { + "id": "src_vocabulary_awl_loader_py_is_academic", + "label": "is_academic()", + "file_type": "code", + "source_file": "src/vocabulary/awl_loader.py", + "source_location": "line 66" + }, + { + "id": "src_vocabulary_awl_loader_py_is_academic_doc", + "label": "Check if a word (or its lemma) is in the AWL.", + "file_type": "rationale", + "source_file": "src/vocabulary/awl_loader.py", + "source_location": "line 66" + }, + { + "id": "src_vocabulary_awl_loader_py_get_academic_synonyms", + "label": "get_academic_synonyms()", + "file_type": "code", + "source_file": "src/vocabulary/awl_loader.py", + "source_location": "line 70" + }, + { + "id": "src_vocabulary_awl_loader_py_get_academic_synonyms_doc", + "label": "Return academic synonyms for a colloquial word.", + "file_type": "rationale", + "source_file": "src/vocabulary/awl_loader.py", + "source_location": "line 70" + }, + { + "id": "src_vocabulary_awl_loader_py_all_words", + "label": "all_words()", + "file_type": "code", + "source_file": "src/vocabulary/awl_loader.py", + "source_location": "line 75" + }, + { + "id": "src_vocabulary_awl_loader_py_all_words_doc", + "label": "Return the full set of academic words.", + "file_type": "rationale", + "source_file": "src/vocabulary/awl_loader.py", + "source_location": "line 75" + } + ], + "edges": [ + { + "source": "src_vocabulary_awl_loader_py", + "target": "src_vocabulary_awl_loader_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.5 + }, + { + "source": "src_vocabulary_awl_loader_py", + "target": "pathlib", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.6 + }, + { + "source": "src_vocabulary_awl_loader_py", + "target": "typing", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.6 + }, + { + "source": "src_vocabulary_awl_loader_py", + "target": "json", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.6 + }, + { + "source": "src_vocabulary_awl_loader_py", + "target": "loguru", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.6 + }, + { + "source": "src_vocabulary_awl_loader_py", + "target": "src_vocabulary_awl_loader_py_AWLLoader", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 1.0 + }, + { + "source": "src_vocabulary_awl_loader_py_AWLLoader", + "target": "src_vocabulary_awl_loader_py_AWLLoader_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.5 + }, + { + "source": "src_vocabulary_awl_loader_py", + "target": "src_vocabulary_awl_loader_py___init__", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 1.0 + }, + { + "source": "src_vocabulary_awl_loader_py___init__", + "target": "_load_word_list", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_awl_loader_py___init__", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_awl_loader_py___init__", + "target": "_load_synonyms", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_awl_loader_py___init__", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_awl_loader_py___init__", + "target": "_load_word_list", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_awl_loader_py___init__", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_awl_loader_py___init__", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_awl_loader_py___init__", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_awl_loader_py___init__", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_awl_loader_py", + "target": "src_vocabulary_awl_loader_py__load_word_list", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 1.0 + }, + { + "source": "src_vocabulary_awl_loader_py__load_word_list", + "target": "src_vocabulary_awl_loader_py__load_word_list_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.5 + }, + { + "source": "src_vocabulary_awl_loader_py__load_word_list", + "target": "set", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_awl_loader_py__load_word_list", + "target": "open", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_awl_loader_py__load_word_list", + "target": "warning", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_awl_loader_py__load_word_list", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_awl_loader_py__load_word_list", + "target": "add", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_awl_loader_py__load_word_list", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_awl_loader_py__load_word_list", + "target": "startswith", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_awl_loader_py", + "target": "src_vocabulary_awl_loader_py__load_synonyms", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 1.0 + }, + { + "source": "src_vocabulary_awl_loader_py__load_synonyms", + "target": "src_vocabulary_awl_loader_py__load_synonyms_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.5 + }, + { + "source": "src_vocabulary_awl_loader_py__load_synonyms", + "target": "open", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_awl_loader_py__load_synonyms", + "target": "load", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_awl_loader_py__load_synonyms", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_awl_loader_py__load_synonyms", + "target": "warning", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_awl_loader_py__load_synonyms", + "target": "warning", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_awl_loader_py__load_synonyms", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_awl_loader_py__load_synonyms", + "target": "items", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_awl_loader_py", + "target": "src_vocabulary_awl_loader_py_is_academic", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 1.0 + }, + { + "source": "src_vocabulary_awl_loader_py_is_academic", + "target": "src_vocabulary_awl_loader_py_is_academic_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.5 + }, + { + "source": "src_vocabulary_awl_loader_py_is_academic", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_awl_loader_py_is_academic", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_awl_loader_py", + "target": "src_vocabulary_awl_loader_py_get_academic_synonyms", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 1.0 + }, + { + "source": "src_vocabulary_awl_loader_py_get_academic_synonyms", + "target": "src_vocabulary_awl_loader_py_get_academic_synonyms_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.5 + }, + { + "source": "src_vocabulary_awl_loader_py_get_academic_synonyms", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_awl_loader_py_get_academic_synonyms", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_awl_loader_py_get_academic_synonyms", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_awl_loader_py", + "target": "src_vocabulary_awl_loader_py_all_words", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 1.0 + }, + { + "source": "src_vocabulary_awl_loader_py_all_words", + "target": "src_vocabulary_awl_loader_py_all_words_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.5 + }, + { + "source": "src_vocabulary_awl_loader_py_all_words", + "target": "copy", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/awl_loader.py", + "weight": 0.8 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/47c00070239651bc59ba60cd4aff75b918b049b98f6612f7b81456d97c3ddc79.json b/graphify-out/cache/47c00070239651bc59ba60cd4aff75b918b049b98f6612f7b81456d97c3ddc79.json new file mode 100644 index 0000000000000000000000000000000000000000..4f847d900324f63b8e0afe755c3773bf655503e3 --- /dev/null +++ b/graphify-out/cache/47c00070239651bc59ba60cd4aff75b918b049b98f6612f7b81456d97c3ddc79.json @@ -0,0 +1,292 @@ +{ + "nodes": [ + { + "id": "src_training_callbacks_py", + "label": "callbacks.py", + "file_type": "code", + "source_file": "src/training/callbacks.py" + }, + { + "id": "src_training_callbacks_py_docstring", + "label": "Training callbacks for monitoring and checkpointing.\nIntegrates with Weights & B", + "file_type": "rationale", + "source_file": "src/training/callbacks.py" + }, + { + "id": "src_training_callbacks_py_StyleMetricsCallback", + "label": "StyleMetricsCallback", + "file_type": "code", + "source_file": "src/training/callbacks.py", + "source_location": "line 16" + }, + { + "id": "src_training_callbacks_py_StyleMetricsCallback_doc", + "label": "Logs style similarity metrics during evaluation.", + "file_type": "rationale", + "source_file": "src/training/callbacks.py", + "source_location": "line 16" + }, + { + "id": "src_training_callbacks_py_EarlyStoppingOnStyleDrift", + "label": "EarlyStoppingOnStyleDrift", + "file_type": "code", + "source_file": "src/training/callbacks.py", + "source_location": "line 34" + }, + { + "id": "src_training_callbacks_py_EarlyStoppingOnStyleDrift_doc", + "label": "Stops training if style similarity drops below threshold.", + "file_type": "rationale", + "source_file": "src/training/callbacks.py", + "source_location": "line 34" + }, + { + "id": "src_training_callbacks_py_on_evaluate", + "label": "on_evaluate()", + "file_type": "code", + "source_file": "src/training/callbacks.py", + "source_location": "line 19" + }, + { + "id": "src_training_callbacks_py___init__", + "label": "__init__()", + "file_type": "code", + "source_file": "src/training/callbacks.py", + "source_location": "line 37" + }, + { + "id": "src_training_callbacks_py_on_evaluate", + "label": "on_evaluate()", + "file_type": "code", + "source_file": "src/training/callbacks.py", + "source_location": "line 43" + } + ], + "edges": [ + { + "source": "src_training_callbacks_py", + "target": "src_training_callbacks_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/callbacks.py", + "weight": 0.5 + }, + { + "source": "src_training_callbacks_py", + "target": "transformers", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/callbacks.py", + "weight": 0.6 + }, + { + "source": "src_training_callbacks_py", + "target": "loguru", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/callbacks.py", + "weight": 0.6 + }, + { + "source": "src_training_callbacks_py", + "target": "src_training_callbacks_py_StyleMetricsCallback", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/callbacks.py", + "weight": 1.0 + }, + { + "source": "src_training_callbacks_py_StyleMetricsCallback", + "target": "src_training_callbacks_py_StyleMetricsCallback_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/callbacks.py", + "weight": 0.5 + }, + { + "source": "src_training_callbacks_py_StyleMetricsCallback", + "target": "TrainerCallback", + "relation": "inherits", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/callbacks.py", + "weight": 1.0 + }, + { + "source": "src_training_callbacks_py", + "target": "src_training_callbacks_py_EarlyStoppingOnStyleDrift", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/callbacks.py", + "weight": 1.0 + }, + { + "source": "src_training_callbacks_py_EarlyStoppingOnStyleDrift", + "target": "src_training_callbacks_py_EarlyStoppingOnStyleDrift_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/callbacks.py", + "weight": 0.5 + }, + { + "source": "src_training_callbacks_py_EarlyStoppingOnStyleDrift", + "target": "TrainerCallback", + "relation": "inherits", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/callbacks.py", + "weight": 1.0 + }, + { + "source": "src_training_callbacks_py", + "target": "wandb", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/callbacks.py", + "weight": 0.6 + }, + { + "source": "src_training_callbacks_py", + "target": "src_training_callbacks_py_on_evaluate", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/callbacks.py", + "weight": 1.0 + }, + { + "source": "src_training_callbacks_py_on_evaluate", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/callbacks.py", + "weight": 0.8 + }, + { + "source": "src_training_callbacks_py_on_evaluate", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/callbacks.py", + "weight": 0.8 + }, + { + "source": "src_training_callbacks_py_on_evaluate", + "target": "items", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/callbacks.py", + "weight": 0.8 + }, + { + "source": "src_training_callbacks_py_on_evaluate", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/callbacks.py", + "weight": 0.8 + }, + { + "source": "src_training_callbacks_py_on_evaluate", + "target": "log", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/callbacks.py", + "weight": 0.8 + }, + { + "source": "src_training_callbacks_py_on_evaluate", + "target": "isinstance", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/callbacks.py", + "weight": 0.8 + }, + { + "source": "src_training_callbacks_py_on_evaluate", + "target": "items", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/callbacks.py", + "weight": 0.8 + }, + { + "source": "src_training_callbacks_py_on_evaluate", + "target": "isinstance", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/callbacks.py", + "weight": 0.8 + }, + { + "source": "src_training_callbacks_py", + "target": "src_training_callbacks_py___init__", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/callbacks.py", + "weight": 1.0 + }, + { + "source": "src_training_callbacks_py", + "target": "src_training_callbacks_py_on_evaluate", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/callbacks.py", + "weight": 1.0 + }, + { + "source": "src_training_callbacks_py_on_evaluate", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/callbacks.py", + "weight": 0.8 + }, + { + "source": "src_training_callbacks_py_on_evaluate", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/callbacks.py", + "weight": 0.8 + }, + { + "source": "src_training_callbacks_py_on_evaluate", + "target": "warning", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/callbacks.py", + "weight": 0.8 + }, + { + "source": "src_training_callbacks_py_on_evaluate", + "target": "error", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/callbacks.py", + "weight": 0.8 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/4c12688f9457d989dba7df39bd5ea10fdcabf4f679740eb59a77a5a8ee1243a5.json b/graphify-out/cache/4c12688f9457d989dba7df39bd5ea10fdcabf4f679740eb59a77a5a8ee1243a5.json new file mode 100644 index 0000000000000000000000000000000000000000..ba3585d2d02d85d5cb9c404f6df6d556602f484c --- /dev/null +++ b/graphify-out/cache/4c12688f9457d989dba7df39bd5ea10fdcabf4f679740eb59a77a5a8ee1243a5.json @@ -0,0 +1,1018 @@ +{ + "nodes": [ + { + "id": "scripts_preprocess_data_py", + "label": "preprocess_data.py", + "file_type": "code", + "source_file": "scripts/preprocess_data.py" + }, + { + "id": "scripts_preprocess_data_py_docstring", + "label": "Converts all raw dataset formats into unified JSONL training format.\nOutput sche", + "file_type": "rationale", + "source_file": "scripts/preprocess_data.py" + }, + { + "id": "scripts_preprocess_data_py_apply_bea19_edits", + "label": "apply_bea19_edits()", + "file_type": "code", + "source_file": "scripts/preprocess_data.py", + "source_location": "line 19" + }, + { + "id": "scripts_preprocess_data_py_apply_bea19_edits_doc", + "label": "Apply BEA-2019 character-level edits to produce corrected text.\n\nedits_block for", + "file_type": "rationale", + "source_file": "scripts/preprocess_data.py", + "source_location": "line 19" + }, + { + "id": "scripts_preprocess_data_py_process_bea19_json", + "label": "process_bea19_json()", + "file_type": "code", + "source_file": "scripts/preprocess_data.py", + "source_location": "line 51" + }, + { + "id": "scripts_preprocess_data_py_process_bea19_json_doc", + "label": "Process a BEA-2019 format JSON file (FCE or W&I+LOCNESS).\nEach line is a JSON ob", + "file_type": "rationale", + "source_file": "scripts/preprocess_data.py", + "source_location": "line 51" + }, + { + "id": "scripts_preprocess_data_py_process_fce", + "label": "process_fce()", + "file_type": "code", + "source_file": "scripts/preprocess_data.py", + "source_location": "line 79" + }, + { + "id": "scripts_preprocess_data_py_process_fce_doc", + "label": "Process all FCE JSON files.", + "file_type": "rationale", + "source_file": "scripts/preprocess_data.py", + "source_location": "line 79" + }, + { + "id": "scripts_preprocess_data_py_process_wi_locness", + "label": "process_wi_locness()", + "file_type": "code", + "source_file": "scripts/preprocess_data.py", + "source_location": "line 93" + }, + { + "id": "scripts_preprocess_data_py_process_wi_locness_doc", + "label": "Process all W&I+LOCNESS JSON files.", + "file_type": "rationale", + "source_file": "scripts/preprocess_data.py", + "source_location": "line 93" + }, + { + "id": "scripts_preprocess_data_py_process_jfleg", + "label": "process_jfleg()", + "file_type": "code", + "source_file": "scripts/preprocess_data.py", + "source_location": "line 107" + }, + { + "id": "scripts_preprocess_data_py_process_jfleg_doc", + "label": "JFLEG: .src files (original) and .ref0..ref3 (4 human corrections).\nEach referen", + "file_type": "rationale", + "source_file": "scripts/preprocess_data.py", + "source_location": "line 107" + }, + { + "id": "scripts_preprocess_data_py_create_splits", + "label": "create_splits()", + "file_type": "code", + "source_file": "scripts/preprocess_data.py", + "source_location": "line 137" + }, + { + "id": "scripts_preprocess_data_py_create_splits_doc", + "label": "Split train.jsonl into train and val sets.", + "file_type": "rationale", + "source_file": "scripts/preprocess_data.py", + "source_location": "line 137" + }, + { + "id": "scripts_preprocess_data_py_main", + "label": "main()", + "file_type": "code", + "source_file": "scripts/preprocess_data.py", + "source_location": "line 167" + } + ], + "edges": [ + { + "source": "scripts_preprocess_data_py", + "target": "scripts_preprocess_data_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/preprocess_data.py", + "weight": 0.5 + }, + { + "source": "scripts_preprocess_data_py", + "target": "json", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/preprocess_data.py", + "weight": 0.6 + }, + { + "source": "scripts_preprocess_data_py", + "target": "os", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/preprocess_data.py", + "weight": 0.6 + }, + { + "source": "scripts_preprocess_data_py", + "target": "pathlib", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/preprocess_data.py", + "weight": 0.6 + }, + { + "source": "scripts_preprocess_data_py", + "target": "scripts_preprocess_data_py_apply_bea19_edits", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/preprocess_data.py", + "weight": 1.0 + }, + { + "source": "scripts_preprocess_data_py_apply_bea19_edits", + "target": "scripts_preprocess_data_py_apply_bea19_edits_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/preprocess_data.py", + "weight": 0.5 + }, + { + "source": "scripts_preprocess_data_py_apply_bea19_edits", + "target": "sorted", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_apply_bea19_edits", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py", + "target": "scripts_preprocess_data_py_process_bea19_json", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/preprocess_data.py", + "weight": 1.0 + }, + { + "source": "scripts_preprocess_data_py_process_bea19_json", + "target": "scripts_preprocess_data_py_process_bea19_json_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/preprocess_data.py", + "weight": 0.5 + }, + { + "source": "scripts_preprocess_data_py_process_bea19_json", + "target": "open", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_bea19_json", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_bea19_json", + "target": "loads", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_bea19_json", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_bea19_json", + "target": "apply_bea19_edits", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_bea19_json", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_bea19_json", + "target": "write", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_bea19_json", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_bea19_json", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_bea19_json", + "target": "dumps", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py", + "target": "scripts_preprocess_data_py_process_fce", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/preprocess_data.py", + "weight": 1.0 + }, + { + "source": "scripts_preprocess_data_py_process_fce", + "target": "scripts_preprocess_data_py_process_fce_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/preprocess_data.py", + "weight": 0.5 + }, + { + "source": "scripts_preprocess_data_py_process_fce", + "target": "sorted", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_fce", + "target": "Path", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_fce", + "target": "exists", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_fce", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_fce", + "target": "glob", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_fce", + "target": "process_bea19_json", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_fce", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_fce", + "target": "str", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py", + "target": "scripts_preprocess_data_py_process_wi_locness", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/preprocess_data.py", + "weight": 1.0 + }, + { + "source": "scripts_preprocess_data_py_process_wi_locness", + "target": "scripts_preprocess_data_py_process_wi_locness_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/preprocess_data.py", + "weight": 0.5 + }, + { + "source": "scripts_preprocess_data_py_process_wi_locness", + "target": "sorted", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_wi_locness", + "target": "Path", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_wi_locness", + "target": "exists", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_wi_locness", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_wi_locness", + "target": "glob", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_wi_locness", + "target": "process_bea19_json", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_wi_locness", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_wi_locness", + "target": "str", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py", + "target": "scripts_preprocess_data_py_process_jfleg", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/preprocess_data.py", + "weight": 1.0 + }, + { + "source": "scripts_preprocess_data_py_process_jfleg", + "target": "scripts_preprocess_data_py_process_jfleg_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/preprocess_data.py", + "weight": 0.5 + }, + { + "source": "scripts_preprocess_data_py_process_jfleg", + "target": "list", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_jfleg", + "target": "glob", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_jfleg", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_jfleg", + "target": "with_suffix", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_jfleg", + "target": "open", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_jfleg", + "target": "readlines", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_jfleg", + "target": "exists", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_jfleg", + "target": "Path", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_jfleg", + "target": "range", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_jfleg", + "target": "zip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_jfleg", + "target": "open", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_jfleg", + "target": "readlines", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_jfleg", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_jfleg", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_jfleg", + "target": "write", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_process_jfleg", + "target": "dumps", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py", + "target": "scripts_preprocess_data_py_create_splits", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/preprocess_data.py", + "weight": 1.0 + }, + { + "source": "scripts_preprocess_data_py_create_splits", + "target": "scripts_preprocess_data_py_create_splits_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/preprocess_data.py", + "weight": 0.5 + }, + { + "source": "scripts_preprocess_data_py_create_splits", + "target": "seed", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_create_splits", + "target": "shuffle", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_create_splits", + "target": "int", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_create_splits", + "target": "replace", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_create_splits", + "target": "min", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_create_splits", + "target": "replace", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_create_splits", + "target": "open", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_create_splits", + "target": "readlines", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_create_splits", + "target": "open", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_create_splits", + "target": "writelines", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_create_splits", + "target": "open", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_create_splits", + "target": "writelines", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_create_splits", + "target": "open", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_create_splits", + "target": "writelines", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_create_splits", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_create_splits", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_create_splits", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_create_splits", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_create_splits", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py", + "target": "scripts_preprocess_data_py_main", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/preprocess_data.py", + "weight": 1.0 + }, + { + "source": "scripts_preprocess_data_py_main", + "target": "makedirs", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_main", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_main", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_main", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_main", + "target": "create_splits", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_main", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_main", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_main", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_main", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_main", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_main", + "target": "open", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_main", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_main", + "target": "process_fce", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_main", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_main", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_main", + "target": "process_wi_locness", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_main", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_main", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_main", + "target": "process_jfleg", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py_main", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/preprocess_data.py", + "weight": 0.8 + }, + { + "source": "scripts_preprocess_data_py", + "target": "random", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/preprocess_data.py", + "weight": 0.6 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/56a8cafbcf4fd128b6ca0d1918bb81e0b401305d52c78e908b3d495b15c1ed9d.json b/graphify-out/cache/56a8cafbcf4fd128b6ca0d1918bb81e0b401305d52c78e908b3d495b15c1ed9d.json new file mode 100644 index 0000000000000000000000000000000000000000..511e4ee6710fb905634844b85186c548cb54231a --- /dev/null +++ b/graphify-out/cache/56a8cafbcf4fd128b6ca0d1918bb81e0b401305d52c78e908b3d495b15c1ed9d.json @@ -0,0 +1,392 @@ +{ + "nodes": [ + { + "id": "src_model_base_model_py", + "label": "base_model.py", + "file_type": "code", + "source_file": "src/model/base_model.py" + }, + { + "id": "src_model_base_model_py_docstring", + "label": "Loads and wraps the base pretrained model.\nSupported architectures:\n - google/f", + "file_type": "rationale", + "source_file": "src/model/base_model.py" + }, + { + "id": "src_model_base_model_py_load_model_and_tokenizer", + "label": "load_model_and_tokenizer()", + "file_type": "code", + "source_file": "src/model/base_model.py", + "source_location": "line 32" + }, + { + "id": "src_model_base_model_py_load_model_and_tokenizer_doc", + "label": "Load a pretrained model with optional LoRA and quantization.\n\nArgs:\n model_ke", + "file_type": "rationale", + "source_file": "src/model/base_model.py", + "source_location": "line 32" + } + ], + "edges": [ + { + "source": "src_model_base_model_py", + "target": "src_model_base_model_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/base_model.py", + "weight": 0.5 + }, + { + "source": "src_model_base_model_py", + "target": "transformers", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/base_model.py", + "weight": 0.6 + }, + { + "source": "src_model_base_model_py", + "target": "peft", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/base_model.py", + "weight": 0.6 + }, + { + "source": "src_model_base_model_py", + "target": "torch", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/base_model.py", + "weight": 0.6 + }, + { + "source": "src_model_base_model_py", + "target": "loguru", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/base_model.py", + "weight": 0.6 + }, + { + "source": "src_model_base_model_py", + "target": "src_model_base_model_py_load_model_and_tokenizer", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/base_model.py", + "weight": 1.0 + }, + { + "source": "src_model_base_model_py_load_model_and_tokenizer", + "target": "src_model_base_model_py_load_model_and_tokenizer_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/base_model.py", + "weight": 0.5 + }, + { + "source": "src_model_base_model_py_load_model_and_tokenizer", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/base_model.py", + "weight": 0.8 + }, + { + "source": "src_model_base_model_py_load_model_and_tokenizer", + "target": "from_pretrained", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/base_model.py", + "weight": 0.8 + }, + { + "source": "src_model_base_model_py_load_model_and_tokenizer", + "target": "is_available", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/base_model.py", + "weight": 0.8 + }, + { + "source": "src_model_base_model_py_load_model_and_tokenizer", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/base_model.py", + "weight": 0.8 + }, + { + "source": "src_model_base_model_py_load_model_and_tokenizer", + "target": "ValueError", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/base_model.py", + "weight": 0.8 + }, + { + "source": "src_model_base_model_py_load_model_and_tokenizer", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/base_model.py", + "weight": 0.8 + }, + { + "source": "src_model_base_model_py_load_model_and_tokenizer", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/base_model.py", + "weight": 0.8 + }, + { + "source": "src_model_base_model_py_load_model_and_tokenizer", + "target": "BitsAndBytesConfig", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/base_model.py", + "weight": 0.8 + }, + { + "source": "src_model_base_model_py_load_model_and_tokenizer", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/base_model.py", + "weight": 0.8 + }, + { + "source": "src_model_base_model_py_load_model_and_tokenizer", + "target": "from_pretrained", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/base_model.py", + "weight": 0.8 + }, + { + "source": "src_model_base_model_py_load_model_and_tokenizer", + "target": "from_pretrained", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/base_model.py", + "weight": 0.8 + }, + { + "source": "src_model_base_model_py_load_model_and_tokenizer", + "target": "to", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/base_model.py", + "weight": 0.8 + }, + { + "source": "src_model_base_model_py_load_model_and_tokenizer", + "target": "LoraConfig", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/base_model.py", + "weight": 0.8 + }, + { + "source": "src_model_base_model_py_load_model_and_tokenizer", + "target": "get_peft_model", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/base_model.py", + "weight": 0.8 + }, + { + "source": "src_model_base_model_py_load_model_and_tokenizer", + "target": "sum", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/base_model.py", + "weight": 0.8 + }, + { + "source": "src_model_base_model_py_load_model_and_tokenizer", + "target": "sum", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/base_model.py", + "weight": 0.8 + }, + { + "source": "src_model_base_model_py_load_model_and_tokenizer", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/base_model.py", + "weight": 0.8 + }, + { + "source": "src_model_base_model_py_load_model_and_tokenizer", + "target": "warning", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/base_model.py", + "weight": 0.8 + }, + { + "source": "src_model_base_model_py_load_model_and_tokenizer", + "target": "get_device_capability", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/base_model.py", + "weight": 0.8 + }, + { + "source": "src_model_base_model_py_load_model_and_tokenizer", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/base_model.py", + "weight": 0.8 + }, + { + "source": "src_model_base_model_py_load_model_and_tokenizer", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/base_model.py", + "weight": 0.8 + }, + { + "source": "src_model_base_model_py_load_model_and_tokenizer", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/base_model.py", + "weight": 0.8 + }, + { + "source": "src_model_base_model_py_load_model_and_tokenizer", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/base_model.py", + "weight": 0.8 + }, + { + "source": "src_model_base_model_py_load_model_and_tokenizer", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/base_model.py", + "weight": 0.8 + }, + { + "source": "src_model_base_model_py_load_model_and_tokenizer", + "target": "numel", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/base_model.py", + "weight": 0.8 + }, + { + "source": "src_model_base_model_py_load_model_and_tokenizer", + "target": "numel", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/base_model.py", + "weight": 0.8 + }, + { + "source": "src_model_base_model_py_load_model_and_tokenizer", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/base_model.py", + "weight": 0.8 + }, + { + "source": "src_model_base_model_py_load_model_and_tokenizer", + "target": "parameters", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/base_model.py", + "weight": 0.8 + }, + { + "source": "src_model_base_model_py_load_model_and_tokenizer", + "target": "parameters", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/base_model.py", + "weight": 0.8 + }, + { + "source": "src_model_base_model_py_load_model_and_tokenizer", + "target": "list", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/base_model.py", + "weight": 0.8 + }, + { + "source": "src_model_base_model_py_load_model_and_tokenizer", + "target": "list", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/base_model.py", + "weight": 0.8 + }, + { + "source": "src_model_base_model_py_load_model_and_tokenizer", + "target": "keys", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/base_model.py", + "weight": 0.8 + }, + { + "source": "src_model_base_model_py_load_model_and_tokenizer", + "target": "keys", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/base_model.py", + "weight": 0.8 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/59cd31cc237c6a59f79708f841723169445bb6bfda8d54162706c2a4b4cbb4ba.json b/graphify-out/cache/59cd31cc237c6a59f79708f841723169445bb6bfda8d54162706c2a4b4cbb4ba.json new file mode 100644 index 0000000000000000000000000000000000000000..46c370dfe1badeb16e51761208fed6f0f1c5889e --- /dev/null +++ b/graphify-out/cache/59cd31cc237c6a59f79708f841723169445bb6bfda8d54162706c2a4b4cbb4ba.json @@ -0,0 +1,240 @@ +{ + "nodes": [ + { + "id": "src_evaluation_gleu_scorer_py", + "label": "gleu_scorer.py", + "file_type": "code", + "source_file": "src/evaluation/gleu_scorer.py" + }, + { + "id": "src_evaluation_gleu_scorer_py_docstring", + "label": "GLEU (Generalized Language Evaluation Understanding) score.\nPreferred over BLEU ", + "file_type": "rationale", + "source_file": "src/evaluation/gleu_scorer.py" + }, + { + "id": "src_evaluation_gleu_scorer_py_GLEUScorer", + "label": "GLEUScorer", + "file_type": "code", + "source_file": "src/evaluation/gleu_scorer.py", + "source_location": "line 13" + }, + { + "id": "src_evaluation_gleu_scorer_py_GLEUScorer_doc", + "label": "Computes GLEU and BERTScore metrics for GEC evaluation.", + "file_type": "rationale", + "source_file": "src/evaluation/gleu_scorer.py", + "source_location": "line 13" + }, + { + "id": "src_evaluation_gleu_scorer_py_compute_gleu", + "label": "compute_gleu()", + "file_type": "code", + "source_file": "src/evaluation/gleu_scorer.py", + "source_location": "line 16" + }, + { + "id": "src_evaluation_gleu_scorer_py_compute_gleu_doc", + "label": "Corpus-level GLEU score (0-100).\n\nGLEU is the geometric mean of n-gram precision", + "file_type": "rationale", + "source_file": "src/evaluation/gleu_scorer.py", + "source_location": "line 16" + }, + { + "id": "src_evaluation_gleu_scorer_py_compute_bert_score", + "label": "compute_bert_score()", + "file_type": "code", + "source_file": "src/evaluation/gleu_scorer.py", + "source_location": "line 43" + }, + { + "id": "src_evaluation_gleu_scorer_py_compute_bert_score_doc", + "label": "Returns (precision, recall, F1) as averages over the batch.", + "file_type": "rationale", + "source_file": "src/evaluation/gleu_scorer.py", + "source_location": "line 43" + } + ], + "edges": [ + { + "source": "src_evaluation_gleu_scorer_py", + "target": "src_evaluation_gleu_scorer_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/gleu_scorer.py", + "weight": 0.5 + }, + { + "source": "src_evaluation_gleu_scorer_py", + "target": "sacrebleu", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/gleu_scorer.py", + "weight": 0.6 + }, + { + "source": "src_evaluation_gleu_scorer_py", + "target": "bert_score", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/gleu_scorer.py", + "weight": 0.6 + }, + { + "source": "src_evaluation_gleu_scorer_py", + "target": "typing", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/gleu_scorer.py", + "weight": 0.6 + }, + { + "source": "src_evaluation_gleu_scorer_py", + "target": "loguru", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/gleu_scorer.py", + "weight": 0.6 + }, + { + "source": "src_evaluation_gleu_scorer_py", + "target": "src_evaluation_gleu_scorer_py_GLEUScorer", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/gleu_scorer.py", + "weight": 1.0 + }, + { + "source": "src_evaluation_gleu_scorer_py_GLEUScorer", + "target": "src_evaluation_gleu_scorer_py_GLEUScorer_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/gleu_scorer.py", + "weight": 0.5 + }, + { + "source": "src_evaluation_gleu_scorer_py", + "target": "src_evaluation_gleu_scorer_py_compute_gleu", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/gleu_scorer.py", + "weight": 1.0 + }, + { + "source": "src_evaluation_gleu_scorer_py_compute_gleu", + "target": "src_evaluation_gleu_scorer_py_compute_gleu_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/gleu_scorer.py", + "weight": 0.5 + }, + { + "source": "src_evaluation_gleu_scorer_py_compute_gleu", + "target": "corpus_bleu", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/gleu_scorer.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_gleu_scorer_py", + "target": "src_evaluation_gleu_scorer_py_compute_bert_score", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/gleu_scorer.py", + "weight": 1.0 + }, + { + "source": "src_evaluation_gleu_scorer_py_compute_bert_score", + "target": "src_evaluation_gleu_scorer_py_compute_bert_score_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/gleu_scorer.py", + "weight": 0.5 + }, + { + "source": "src_evaluation_gleu_scorer_py_compute_bert_score", + "target": "bert_score_fn", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/gleu_scorer.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_gleu_scorer_py_compute_bert_score", + "target": "item", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/gleu_scorer.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_gleu_scorer_py_compute_bert_score", + "target": "item", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/gleu_scorer.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_gleu_scorer_py_compute_bert_score", + "target": "item", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/gleu_scorer.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_gleu_scorer_py_compute_bert_score", + "target": "warning", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/gleu_scorer.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_gleu_scorer_py_compute_bert_score", + "target": "mean", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/gleu_scorer.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_gleu_scorer_py_compute_bert_score", + "target": "mean", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/gleu_scorer.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_gleu_scorer_py_compute_bert_score", + "target": "mean", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/gleu_scorer.py", + "weight": 0.8 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/720fe46529312187a088992487158bc3bf97035f7820558f27cc6c3dfbd4aea4.json b/graphify-out/cache/720fe46529312187a088992487158bc3bf97035f7820558f27cc6c3dfbd4aea4.json new file mode 100644 index 0000000000000000000000000000000000000000..3905476f178c1d219e3960160bb2209815e65a0d --- /dev/null +++ b/graphify-out/cache/720fe46529312187a088992487158bc3bf97035f7820558f27cc6c3dfbd4aea4.json @@ -0,0 +1,263 @@ +{ + "nodes": [ + { + "id": "src_preprocessing_ner_tagger_py", + "label": "ner_tagger.py", + "file_type": "code", + "source_file": "src/preprocessing/ner_tagger.py" + }, + { + "id": "src_preprocessing_ner_tagger_py_docstring", + "label": "Named Entity Recognition tagger.\nIdentifies entities (persons, locations, organi", + "file_type": "rationale", + "source_file": "src/preprocessing/ner_tagger.py" + }, + { + "id": "src_preprocessing_ner_tagger_py_EntitySpan", + "label": "EntitySpan", + "file_type": "code", + "source_file": "src/preprocessing/ner_tagger.py", + "source_location": "line 14" + }, + { + "id": "src_preprocessing_ner_tagger_py_NERTagger", + "label": "NERTagger", + "file_type": "code", + "source_file": "src/preprocessing/ner_tagger.py", + "source_location": "line 21" + }, + { + "id": "src_preprocessing_ner_tagger_py_NERTagger_doc", + "label": "Tags named entities and produces protected spans.", + "file_type": "rationale", + "source_file": "src/preprocessing/ner_tagger.py", + "source_location": "line 21" + }, + { + "id": "src_preprocessing_ner_tagger_py___init__", + "label": "__init__()", + "file_type": "code", + "source_file": "src/preprocessing/ner_tagger.py", + "source_location": "line 24" + }, + { + "id": "src_preprocessing_ner_tagger_py_tag", + "label": "tag()", + "file_type": "code", + "source_file": "src/preprocessing/ner_tagger.py", + "source_location": "line 31" + }, + { + "id": "src_preprocessing_ner_tagger_py_tag_doc", + "label": "Extract all named entities from text.", + "file_type": "rationale", + "source_file": "src/preprocessing/ner_tagger.py", + "source_location": "line 31" + }, + { + "id": "src_preprocessing_ner_tagger_py_get_protected_spans", + "label": "get_protected_spans()", + "file_type": "code", + "source_file": "src/preprocessing/ner_tagger.py", + "source_location": "line 46" + }, + { + "id": "src_preprocessing_ner_tagger_py_get_protected_spans_doc", + "label": "Return (start, end) char spans that must not be modified.", + "file_type": "rationale", + "source_file": "src/preprocessing/ner_tagger.py", + "source_location": "line 46" + } + ], + "edges": [ + { + "source": "src_preprocessing_ner_tagger_py", + "target": "src_preprocessing_ner_tagger_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/ner_tagger.py", + "weight": 0.5 + }, + { + "source": "src_preprocessing_ner_tagger_py", + "target": "spacy", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/ner_tagger.py", + "weight": 0.6 + }, + { + "source": "src_preprocessing_ner_tagger_py", + "target": "typing", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/ner_tagger.py", + "weight": 0.6 + }, + { + "source": "src_preprocessing_ner_tagger_py", + "target": "dataclasses", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/ner_tagger.py", + "weight": 0.6 + }, + { + "source": "src_preprocessing_ner_tagger_py", + "target": "loguru", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/ner_tagger.py", + "weight": 0.6 + }, + { + "source": "src_preprocessing_ner_tagger_py", + "target": "src_preprocessing_ner_tagger_py_EntitySpan", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/ner_tagger.py", + "weight": 1.0 + }, + { + "source": "src_preprocessing_ner_tagger_py", + "target": "src_preprocessing_ner_tagger_py_NERTagger", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/ner_tagger.py", + "weight": 1.0 + }, + { + "source": "src_preprocessing_ner_tagger_py_NERTagger", + "target": "src_preprocessing_ner_tagger_py_NERTagger_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/ner_tagger.py", + "weight": 0.5 + }, + { + "source": "src_preprocessing_ner_tagger_py", + "target": "src_preprocessing_ner_tagger_py___init__", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/ner_tagger.py", + "weight": 1.0 + }, + { + "source": "src_preprocessing_ner_tagger_py___init__", + "target": "load", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/ner_tagger.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_ner_tagger_py___init__", + "target": "warning", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/ner_tagger.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_ner_tagger_py___init__", + "target": "load", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/ner_tagger.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_ner_tagger_py", + "target": "src_preprocessing_ner_tagger_py_tag", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/ner_tagger.py", + "weight": 1.0 + }, + { + "source": "src_preprocessing_ner_tagger_py_tag", + "target": "src_preprocessing_ner_tagger_py_tag_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/ner_tagger.py", + "weight": 0.5 + }, + { + "source": "src_preprocessing_ner_tagger_py_tag", + "target": "nlp", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/ner_tagger.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_ner_tagger_py_tag", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/ner_tagger.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_ner_tagger_py_tag", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/ner_tagger.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_ner_tagger_py_tag", + "target": "EntitySpan", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/ner_tagger.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_ner_tagger_py", + "target": "src_preprocessing_ner_tagger_py_get_protected_spans", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/ner_tagger.py", + "weight": 1.0 + }, + { + "source": "src_preprocessing_ner_tagger_py_get_protected_spans", + "target": "src_preprocessing_ner_tagger_py_get_protected_spans_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/ner_tagger.py", + "weight": 0.5 + }, + { + "source": "src_preprocessing_ner_tagger_py_get_protected_spans", + "target": "tag", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/ner_tagger.py", + "weight": 0.8 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/73acfb4db3d122647d360b092e781e1867616cf88a363241aad5da115f144221.json b/graphify-out/cache/73acfb4db3d122647d360b092e781e1867616cf88a363241aad5da115f144221.json new file mode 100644 index 0000000000000000000000000000000000000000..5d58c2a6cc2f196a295102e7dfc1c562b629d9a5 --- /dev/null +++ b/graphify-out/cache/73acfb4db3d122647d360b092e781e1867616cf88a363241aad5da115f144221.json @@ -0,0 +1,558 @@ +{ + "nodes": [ + { + "id": "tests_test_preprocessing_py", + "label": "test_preprocessing.py", + "file_type": "code", + "source_file": "tests/test_preprocessing.py" + }, + { + "id": "tests_test_preprocessing_py_docstring", + "label": "Tests for the preprocessing pipeline.", + "file_type": "rationale", + "source_file": "tests/test_preprocessing.py" + }, + { + "id": "tests_test_preprocessing_py_simulator", + "label": "simulator()", + "file_type": "code", + "source_file": "tests/test_preprocessing.py", + "source_location": "line 9" + }, + { + "id": "tests_test_preprocessing_py_corrector", + "label": "corrector()", + "file_type": "code", + "source_file": "tests/test_preprocessing.py", + "source_location": "line 14" + }, + { + "id": "tests_test_preprocessing_py_test_spell_correction_phonetic", + "label": "test_spell_correction_phonetic()", + "file_type": "code", + "source_file": "tests/test_preprocessing.py", + "source_location": "line 20" + }, + { + "id": "tests_test_preprocessing_py_test_spell_correction_phonetic_doc", + "label": "Test that common dyslexic misspellings are corrected.", + "file_type": "rationale", + "source_file": "tests/test_preprocessing.py", + "source_location": "line 20" + }, + { + "id": "tests_test_preprocessing_py_test_spell_correction_empty", + "label": "test_spell_correction_empty()", + "file_type": "code", + "source_file": "tests/test_preprocessing.py", + "source_location": "line 27" + }, + { + "id": "tests_test_preprocessing_py_test_spell_correction_empty_doc", + "label": "Test empty input handling.", + "file_type": "rationale", + "source_file": "tests/test_preprocessing.py", + "source_location": "line 27" + }, + { + "id": "tests_test_preprocessing_py_test_entity_protection", + "label": "test_entity_protection()", + "file_type": "code", + "source_file": "tests/test_preprocessing.py", + "source_location": "line 33" + }, + { + "id": "tests_test_preprocessing_py_test_entity_protection_doc", + "label": "Test that named entities are identified and protected.", + "file_type": "rationale", + "source_file": "tests/test_preprocessing.py", + "source_location": "line 33" + }, + { + "id": "tests_test_preprocessing_py_test_sentence_segmentation", + "label": "test_sentence_segmentation()", + "file_type": "code", + "source_file": "tests/test_preprocessing.py", + "source_location": "line 43" + }, + { + "id": "tests_test_preprocessing_py_test_sentence_segmentation_doc", + "label": "Test that text is correctly split into sentences.", + "file_type": "rationale", + "source_file": "tests/test_preprocessing.py", + "source_location": "line 43" + }, + { + "id": "tests_test_preprocessing_py_test_readability_scores", + "label": "test_readability_scores()", + "file_type": "code", + "source_file": "tests/test_preprocessing.py", + "source_location": "line 51" + }, + { + "id": "tests_test_preprocessing_py_test_readability_scores_doc", + "label": "Test that readability metrics are computed.", + "file_type": "rationale", + "source_file": "tests/test_preprocessing.py", + "source_location": "line 51" + }, + { + "id": "tests_test_preprocessing_py_test_dependency_trees", + "label": "test_dependency_trees()", + "file_type": "code", + "source_file": "tests/test_preprocessing.py", + "source_location": "line 61" + }, + { + "id": "tests_test_preprocessing_py_test_dependency_trees_doc", + "label": "Test that dependency trees are extracted.", + "file_type": "rationale", + "source_file": "tests/test_preprocessing.py", + "source_location": "line 61" + }, + { + "id": "tests_test_preprocessing_py_test_dyslexia_simulator", + "label": "test_dyslexia_simulator()", + "file_type": "code", + "source_file": "tests/test_preprocessing.py", + "source_location": "line 70" + }, + { + "id": "tests_test_preprocessing_py_test_dyslexia_simulator_doc", + "label": "Test that the simulator produces corrupted text.", + "file_type": "rationale", + "source_file": "tests/test_preprocessing.py", + "source_location": "line 70" + }, + { + "id": "tests_test_preprocessing_py_test_dyslexia_simulator_preserves_clean", + "label": "test_dyslexia_simulator_preserves_clean()", + "file_type": "code", + "source_file": "tests/test_preprocessing.py", + "source_location": "line 79" + }, + { + "id": "tests_test_preprocessing_py_test_dyslexia_simulator_preserves_clean_doc", + "label": "Test that the clean text is returned unchanged.", + "file_type": "rationale", + "source_file": "tests/test_preprocessing.py", + "source_location": "line 79" + } + ], + "edges": [ + { + "source": "tests_test_preprocessing_py", + "target": "tests_test_preprocessing_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_preprocessing.py", + "weight": 0.5 + }, + { + "source": "tests_test_preprocessing_py", + "target": "pytest", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_preprocessing.py", + "weight": 0.6 + }, + { + "source": "tests_test_preprocessing_py", + "target": "src.preprocessing.dyslexia_simulator", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_preprocessing.py", + "weight": 0.6 + }, + { + "source": "tests_test_preprocessing_py", + "target": "src.preprocessing.spell_corrector", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_preprocessing.py", + "weight": 0.6 + }, + { + "source": "tests_test_preprocessing_py", + "target": "tests_test_preprocessing_py_simulator", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_preprocessing.py", + "weight": 1.0 + }, + { + "source": "tests_test_preprocessing_py_simulator", + "target": "DyslexiaSimulator", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_preprocessing.py", + "weight": 0.8 + }, + { + "source": "tests_test_preprocessing_py", + "target": "tests_test_preprocessing_py_corrector", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_preprocessing.py", + "weight": 1.0 + }, + { + "source": "tests_test_preprocessing_py_corrector", + "target": "DyslexiaAwareSpellCorrector", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_preprocessing.py", + "weight": 0.8 + }, + { + "source": "tests_test_preprocessing_py_corrector", + "target": "close", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_preprocessing.py", + "weight": 0.8 + }, + { + "source": "tests_test_preprocessing_py", + "target": "tests_test_preprocessing_py_test_spell_correction_phonetic", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_preprocessing.py", + "weight": 1.0 + }, + { + "source": "tests_test_preprocessing_py_test_spell_correction_phonetic", + "target": "tests_test_preprocessing_py_test_spell_correction_phonetic_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_preprocessing.py", + "weight": 0.5 + }, + { + "source": "tests_test_preprocessing_py_test_spell_correction_phonetic", + "target": "_phonetic_pass", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_preprocessing.py", + "weight": 0.8 + }, + { + "source": "tests_test_preprocessing_py", + "target": "tests_test_preprocessing_py_test_spell_correction_empty", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_preprocessing.py", + "weight": 1.0 + }, + { + "source": "tests_test_preprocessing_py_test_spell_correction_empty", + "target": "tests_test_preprocessing_py_test_spell_correction_empty_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_preprocessing.py", + "weight": 0.5 + }, + { + "source": "tests_test_preprocessing_py_test_spell_correction_empty", + "target": "correct", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_preprocessing.py", + "weight": 0.8 + }, + { + "source": "tests_test_preprocessing_py_test_spell_correction_empty", + "target": "correct", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_preprocessing.py", + "weight": 0.8 + }, + { + "source": "tests_test_preprocessing_py", + "target": "tests_test_preprocessing_py_test_entity_protection", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_preprocessing.py", + "weight": 1.0 + }, + { + "source": "tests_test_preprocessing_py_test_entity_protection", + "target": "tests_test_preprocessing_py_test_entity_protection_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_preprocessing.py", + "weight": 0.5 + }, + { + "source": "tests_test_preprocessing_py_test_entity_protection", + "target": "NERTagger", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_preprocessing.py", + "weight": 0.8 + }, + { + "source": "tests_test_preprocessing_py_test_entity_protection", + "target": "tag", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_preprocessing.py", + "weight": 0.8 + }, + { + "source": "tests_test_preprocessing_py_test_entity_protection", + "target": "any", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_preprocessing.py", + "weight": 0.8 + }, + { + "source": "tests_test_preprocessing_py_test_entity_protection", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_preprocessing.py", + "weight": 0.8 + }, + { + "source": "tests_test_preprocessing_py", + "target": "tests_test_preprocessing_py_test_sentence_segmentation", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_preprocessing.py", + "weight": 1.0 + }, + { + "source": "tests_test_preprocessing_py_test_sentence_segmentation", + "target": "tests_test_preprocessing_py_test_sentence_segmentation_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_preprocessing.py", + "weight": 0.5 + }, + { + "source": "tests_test_preprocessing_py_test_sentence_segmentation", + "target": "SentenceSegmenter", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_preprocessing.py", + "weight": 0.8 + }, + { + "source": "tests_test_preprocessing_py_test_sentence_segmentation", + "target": "segment", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_preprocessing.py", + "weight": 0.8 + }, + { + "source": "tests_test_preprocessing_py_test_sentence_segmentation", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_preprocessing.py", + "weight": 0.8 + }, + { + "source": "tests_test_preprocessing_py", + "target": "tests_test_preprocessing_py_test_readability_scores", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_preprocessing.py", + "weight": 1.0 + }, + { + "source": "tests_test_preprocessing_py_test_readability_scores", + "target": "tests_test_preprocessing_py_test_readability_scores_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_preprocessing.py", + "weight": 0.5 + }, + { + "source": "tests_test_preprocessing_py_test_readability_scores", + "target": "PreprocessingPipeline", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_preprocessing.py", + "weight": 0.8 + }, + { + "source": "tests_test_preprocessing_py_test_readability_scores", + "target": "process", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_preprocessing.py", + "weight": 0.8 + }, + { + "source": "tests_test_preprocessing_py", + "target": "tests_test_preprocessing_py_test_dependency_trees", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_preprocessing.py", + "weight": 1.0 + }, + { + "source": "tests_test_preprocessing_py_test_dependency_trees", + "target": "tests_test_preprocessing_py_test_dependency_trees_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_preprocessing.py", + "weight": 0.5 + }, + { + "source": "tests_test_preprocessing_py_test_dependency_trees", + "target": "DependencyParser", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_preprocessing.py", + "weight": 0.8 + }, + { + "source": "tests_test_preprocessing_py_test_dependency_trees", + "target": "extract_svo", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_preprocessing.py", + "weight": 0.8 + }, + { + "source": "tests_test_preprocessing_py_test_dependency_trees", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_preprocessing.py", + "weight": 0.8 + }, + { + "source": "tests_test_preprocessing_py", + "target": "tests_test_preprocessing_py_test_dyslexia_simulator", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_preprocessing.py", + "weight": 1.0 + }, + { + "source": "tests_test_preprocessing_py_test_dyslexia_simulator", + "target": "tests_test_preprocessing_py_test_dyslexia_simulator_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_preprocessing.py", + "weight": 0.5 + }, + { + "source": "tests_test_preprocessing_py_test_dyslexia_simulator", + "target": "simulate", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_preprocessing.py", + "weight": 0.8 + }, + { + "source": "tests_test_preprocessing_py", + "target": "tests_test_preprocessing_py_test_dyslexia_simulator_preserves_clean", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_preprocessing.py", + "weight": 1.0 + }, + { + "source": "tests_test_preprocessing_py_test_dyslexia_simulator_preserves_clean", + "target": "tests_test_preprocessing_py_test_dyslexia_simulator_preserves_clean_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_preprocessing.py", + "weight": 0.5 + }, + { + "source": "tests_test_preprocessing_py_test_dyslexia_simulator_preserves_clean", + "target": "simulate", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_preprocessing.py", + "weight": 0.8 + }, + { + "source": "tests_test_preprocessing_py", + "target": "src.preprocessing.ner_tagger", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_preprocessing.py", + "weight": 0.6 + }, + { + "source": "tests_test_preprocessing_py", + "target": "src.preprocessing.sentence_segmenter", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_preprocessing.py", + "weight": 0.6 + }, + { + "source": "tests_test_preprocessing_py", + "target": "src.preprocessing.pipeline", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_preprocessing.py", + "weight": 0.6 + }, + { + "source": "tests_test_preprocessing_py", + "target": "src.preprocessing.dependency_parser", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_preprocessing.py", + "weight": 0.6 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/7b372c682c6f3981b063322307cd994c2fcfb8b600b1a0dc2a76e8d6d0842d71.json b/graphify-out/cache/7b372c682c6f3981b063322307cd994c2fcfb8b600b1a0dc2a76e8d6d0842d71.json new file mode 100644 index 0000000000000000000000000000000000000000..5ceac6215d2194113fef55fb2f1b634d34b5f8d5 --- /dev/null +++ b/graphify-out/cache/7b372c682c6f3981b063322307cd994c2fcfb8b600b1a0dc2a76e8d6d0842d71.json @@ -0,0 +1,169 @@ +{ + "nodes": [ + { + "id": "scripts_download_all_huggingface_datasets_py", + "label": "download_all_huggingface_datasets.py", + "file_type": "code", + "source_file": "scripts/download_all_huggingface_datasets.py" + }, + { + "id": "scripts_download_all_huggingface_datasets_py_docstring", + "label": "Downloads all publicly available HuggingFace datasets automatically.\nDatasets re", + "file_type": "rationale", + "source_file": "scripts/download_all_huggingface_datasets.py" + }, + { + "id": "scripts_download_all_huggingface_datasets_py_main", + "label": "main()", + "file_type": "code", + "source_file": "scripts/download_all_huggingface_datasets.py", + "source_location": "line 27" + } + ], + "edges": [ + { + "source": "scripts_download_all_huggingface_datasets_py", + "target": "scripts_download_all_huggingface_datasets_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/download_all_huggingface_datasets.py", + "weight": 0.5 + }, + { + "source": "scripts_download_all_huggingface_datasets_py", + "target": "datasets", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/download_all_huggingface_datasets.py", + "weight": 0.6 + }, + { + "source": "scripts_download_all_huggingface_datasets_py", + "target": "os", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/download_all_huggingface_datasets.py", + "weight": 0.6 + }, + { + "source": "scripts_download_all_huggingface_datasets_py", + "target": "scripts_download_all_huggingface_datasets_py_main", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/download_all_huggingface_datasets.py", + "weight": 1.0 + }, + { + "source": "scripts_download_all_huggingface_datasets_py_main", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/download_all_huggingface_datasets.py", + "weight": 0.8 + }, + { + "source": "scripts_download_all_huggingface_datasets_py_main", + "target": "items", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/download_all_huggingface_datasets.py", + "weight": 0.8 + }, + { + "source": "scripts_download_all_huggingface_datasets_py_main", + "target": "exists", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/download_all_huggingface_datasets.py", + "weight": 0.8 + }, + { + "source": "scripts_download_all_huggingface_datasets_py_main", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/download_all_huggingface_datasets.py", + "weight": 0.8 + }, + { + "source": "scripts_download_all_huggingface_datasets_py_main", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/download_all_huggingface_datasets.py", + "weight": 0.8 + }, + { + "source": "scripts_download_all_huggingface_datasets_py_main", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/download_all_huggingface_datasets.py", + "weight": 0.8 + }, + { + "source": "scripts_download_all_huggingface_datasets_py_main", + "target": "save_to_disk", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/download_all_huggingface_datasets.py", + "weight": 0.8 + }, + { + "source": "scripts_download_all_huggingface_datasets_py_main", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/download_all_huggingface_datasets.py", + "weight": 0.8 + }, + { + "source": "scripts_download_all_huggingface_datasets_py_main", + "target": "load_dataset", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/download_all_huggingface_datasets.py", + "weight": 0.8 + }, + { + "source": "scripts_download_all_huggingface_datasets_py_main", + "target": "load_dataset", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/download_all_huggingface_datasets.py", + "weight": 0.8 + }, + { + "source": "scripts_download_all_huggingface_datasets_py_main", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/download_all_huggingface_datasets.py", + "weight": 0.8 + }, + { + "source": "scripts_download_all_huggingface_datasets_py_main", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/download_all_huggingface_datasets.py", + "weight": 0.8 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/7bbbe861fbf4f16d4b829d034e1c0aa8c8d3109b5129b3264a7d961776435ddf.json b/graphify-out/cache/7bbbe861fbf4f16d4b829d034e1c0aa8c8d3109b5129b3264a7d961776435ddf.json new file mode 100644 index 0000000000000000000000000000000000000000..af72aa47f7c421e93551e15fbfbaf279b45b5843 --- /dev/null +++ b/graphify-out/cache/7bbbe861fbf4f16d4b829d034e1c0aa8c8d3109b5129b3264a7d961776435ddf.json @@ -0,0 +1,1753 @@ +{ + "nodes": [ + { + "id": "scripts_train_py", + "label": "train.py", + "file_type": "code", + "source_file": "scripts/train.py" + }, + { + "id": "scripts_train_py_docstring", + "label": "Full training entry point.\nRun: python scripts/train.py --config configs/trainin", + "file_type": "rationale", + "source_file": "scripts/train.py" + }, + { + "id": "scripts_train_py__setup_device", + "label": "_setup_device()", + "file_type": "code", + "source_file": "scripts/train.py", + "source_location": "line 31" + }, + { + "id": "scripts_train_py__setup_device_doc", + "label": "Detect GPU and configure hybrid VRAM management.\n\nReturns (device, gpu_info) whe", + "file_type": "rationale", + "source_file": "scripts/train.py", + "source_location": "line 31" + }, + { + "id": "scripts_train_py__auto_batch_size", + "label": "_auto_batch_size()", + "file_type": "code", + "source_file": "scripts/train.py", + "source_location": "line 82" + }, + { + "id": "scripts_train_py__auto_batch_size_doc", + "label": "Pick optimal batch size based on model size and available resources.", + "file_type": "rationale", + "source_file": "scripts/train.py", + "source_location": "line 82" + }, + { + "id": "scripts_train_py_train", + "label": "train()", + "file_type": "code", + "source_file": "scripts/train.py", + "source_location": "line 126" + }, + { + "id": "scripts_train_py_train_doc", + "label": "Launch the full training pipeline.", + "file_type": "rationale", + "source_file": "scripts/train.py", + "source_location": "line 126" + }, + { + "id": "scripts_train_py_CEOnlyLoss", + "label": "CEOnlyLoss", + "file_type": "code", + "source_file": "scripts/train.py", + "source_location": "line 221" + }, + { + "id": "scripts_train_py_CEOnlyLoss_doc", + "label": "Cross-entropy only loss \u2014 the only loss that provides gradient signal.", + "file_type": "rationale", + "source_file": "scripts/train.py", + "source_location": "line 221" + }, + { + "id": "scripts_train_py___init__", + "label": "__init__()", + "file_type": "code", + "source_file": "scripts/train.py", + "source_location": "line 223" + }, + { + "id": "scripts_train_py_forward", + "label": "forward()", + "file_type": "code", + "source_file": "scripts/train.py", + "source_location": "line 227" + } + ], + "edges": [ + { + "source": "scripts_train_py", + "target": "scripts_train_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/train.py", + "weight": 0.5 + }, + { + "source": "scripts_train_py", + "target": "click", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/train.py", + "weight": 0.6 + }, + { + "source": "scripts_train_py", + "target": "yaml", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/train.py", + "weight": 0.6 + }, + { + "source": "scripts_train_py", + "target": "torch", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/train.py", + "weight": 0.6 + }, + { + "source": "scripts_train_py", + "target": "os", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/train.py", + "weight": 0.6 + }, + { + "source": "scripts_train_py", + "target": "gc", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/train.py", + "weight": 0.6 + }, + { + "source": "scripts_train_py", + "target": "transformers", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/train.py", + "weight": 0.6 + }, + { + "source": "scripts_train_py", + "target": "loguru", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/train.py", + "weight": 0.6 + }, + { + "source": "scripts_train_py", + "target": "src.model.base_model", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/train.py", + "weight": 0.6 + }, + { + "source": "scripts_train_py", + "target": "src.model.style_conditioner", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/train.py", + "weight": 0.6 + }, + { + "source": "scripts_train_py", + "target": "src.training.dataset", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/train.py", + "weight": 0.6 + }, + { + "source": "scripts_train_py", + "target": "src.training.loss_functions", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/train.py", + "weight": 0.6 + }, + { + "source": "scripts_train_py", + "target": "src.training.trainer", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/train.py", + "weight": 0.6 + }, + { + "source": "scripts_train_py", + "target": "src.training.callbacks", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/train.py", + "weight": 0.6 + }, + { + "source": "scripts_train_py", + "target": "src.style.fingerprinter", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/train.py", + "weight": 0.6 + }, + { + "source": "scripts_train_py", + "target": "src.evaluation.gleu_scorer", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/train.py", + "weight": 0.6 + }, + { + "source": "scripts_train_py", + "target": "scripts_train_py__setup_device", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/train.py", + "weight": 1.0 + }, + { + "source": "scripts_train_py__setup_device", + "target": "scripts_train_py__setup_device_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/train.py", + "weight": 0.5 + }, + { + "source": "scripts_train_py__setup_device", + "target": "get_device_name", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py__setup_device", + "target": "get_device_capability", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py__setup_device", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py__setup_device", + "target": "int", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py__setup_device", + "target": "is_available", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py__setup_device", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py__setup_device", + "target": "memory_reserved", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py__setup_device", + "target": "memory_allocated", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py__setup_device", + "target": "min", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py__setup_device", + "target": "set_per_process_memory_fraction", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py__setup_device", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py__setup_device", + "target": "get_device_properties", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py__setup_device", + "target": "int", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py", + "target": "scripts_train_py__auto_batch_size", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/train.py", + "weight": 1.0 + }, + { + "source": "scripts_train_py__auto_batch_size", + "target": "scripts_train_py__auto_batch_size_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/train.py", + "weight": 0.5 + }, + { + "source": "scripts_train_py__auto_batch_size", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py__auto_batch_size", + "target": "max", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py__auto_batch_size", + "target": "min", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py__auto_batch_size", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py__auto_batch_size", + "target": "max", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py__auto_batch_size", + "target": "min", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py__auto_batch_size", + "target": "warning", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py__auto_batch_size", + "target": "min", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py", + "target": "scripts_train_py_train", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/train.py", + "weight": 1.0 + }, + { + "source": "scripts_train_py_train", + "target": "scripts_train_py_train_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/train.py", + "weight": 0.5 + }, + { + "source": "scripts_train_py_train", + "target": "command", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "option", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "option", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "_setup_device", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "load_model_and_tokenizer", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "hasattr", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "StyleFingerprinter", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "WritingCorrectionDataset", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "WritingCorrectionDataset", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "collect", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "CEOnlyLoss", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "_auto_batch_size", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "TrainingArguments", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "CorrectionTrainer", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "train", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "join", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "join", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "sorted", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "save_model", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "save_pretrained", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "open", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "safe_load", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "init", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "enable_input_require_grads", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "hasattr", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "empty_cache", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "glob", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "join", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "exists", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "isdir", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "join", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "exists", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "finish", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "compile", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "__init__", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "CrossEntropyLoss", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "ce_loss", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "join", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "load_adapter", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "warning", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "warning", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "dim", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "view", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "view", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "tensor", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "StyleMetricsCallback", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "EarlyStoppingOnStyleDrift", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "open", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "load", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "super", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "size", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_train", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py", + "target": "wandb", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/train.py", + "weight": 0.6 + }, + { + "source": "scripts_train_py", + "target": "torch", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/train.py", + "weight": 0.6 + }, + { + "source": "scripts_train_py", + "target": "scripts_train_py_CEOnlyLoss", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/train.py", + "weight": 1.0 + }, + { + "source": "scripts_train_py_CEOnlyLoss", + "target": "scripts_train_py_CEOnlyLoss_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/train.py", + "weight": 0.5 + }, + { + "source": "scripts_train_py_CEOnlyLoss", + "target": "Module", + "relation": "inherits", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/train.py", + "weight": 1.0 + }, + { + "source": "scripts_train_py", + "target": "glob", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/train.py", + "weight": 0.6 + }, + { + "source": "scripts_train_py", + "target": "scripts_train_py___init__", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/train.py", + "weight": 1.0 + }, + { + "source": "scripts_train_py___init__", + "target": "__init__", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py___init__", + "target": "CrossEntropyLoss", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py___init__", + "target": "super", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py", + "target": "scripts_train_py_forward", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/train.py", + "weight": 1.0 + }, + { + "source": "scripts_train_py_forward", + "target": "ce_loss", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_forward", + "target": "dim", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_forward", + "target": "view", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_forward", + "target": "view", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py_forward", + "target": "size", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/train.py", + "weight": 0.8 + }, + { + "source": "scripts_train_py", + "target": "peft", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/train.py", + "weight": 0.6 + }, + { + "source": "scripts_train_py", + "target": "json", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/train.py", + "weight": 0.6 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/7cf67782f2f94bf51f5cfb6817fb19ef6c01bae6ca4bfbeae400cf48d2290905.json b/graphify-out/cache/7cf67782f2f94bf51f5cfb6817fb19ef6c01bae6ca4bfbeae400cf48d2290905.json new file mode 100644 index 0000000000000000000000000000000000000000..f6e048fd818a58e05c88b30298d8757e41c3c2bb --- /dev/null +++ b/graphify-out/cache/7cf67782f2f94bf51f5cfb6817fb19ef6c01bae6ca4bfbeae400cf48d2290905.json @@ -0,0 +1,728 @@ +{ + "nodes": [ + { + "id": "src_preprocessing_spell_corrector_py", + "label": "spell_corrector.py", + "file_type": "code", + "source_file": "src/preprocessing/spell_corrector.py" + }, + { + "id": "src_preprocessing_spell_corrector_py_docstring", + "label": "Two-pass spell correction:\nPass 1: pyspellchecker (fast, context-free, catches s", + "file_type": "rationale", + "source_file": "src/preprocessing/spell_corrector.py" + }, + { + "id": "src_preprocessing_spell_corrector_py_DyslexiaAwareSpellCorrector", + "label": "DyslexiaAwareSpellCorrector", + "file_type": "code", + "source_file": "src/preprocessing/spell_corrector.py", + "source_location": "line 22" + }, + { + "id": "src_preprocessing_spell_corrector_py_DyslexiaAwareSpellCorrector_doc", + "label": "Two-pass spell corrector with dyslexia-specific phonetic pattern handling.", + "file_type": "rationale", + "source_file": "src/preprocessing/spell_corrector.py", + "source_location": "line 22" + }, + { + "id": "src_preprocessing_spell_corrector_py___init__", + "label": "__init__()", + "file_type": "code", + "source_file": "src/preprocessing/spell_corrector.py", + "source_location": "line 35" + }, + { + "id": "src_preprocessing_spell_corrector_py__phonetic_pass", + "label": "_phonetic_pass()", + "file_type": "code", + "source_file": "src/preprocessing/spell_corrector.py", + "source_location": "line 51" + }, + { + "id": "src_preprocessing_spell_corrector_py__phonetic_pass_doc", + "label": "Apply known dyslexic phonetic substitutions first.", + "file_type": "rationale", + "source_file": "src/preprocessing/spell_corrector.py", + "source_location": "line 51" + }, + { + "id": "src_preprocessing_spell_corrector_py__spellcheck_pass", + "label": "_spellcheck_pass()", + "file_type": "code", + "source_file": "src/preprocessing/spell_corrector.py", + "source_location": "line 64" + }, + { + "id": "src_preprocessing_spell_corrector_py__spellcheck_pass_doc", + "label": "pyspellchecker pass for simple token-level errors.", + "file_type": "rationale", + "source_file": "src/preprocessing/spell_corrector.py", + "source_location": "line 64" + }, + { + "id": "src_preprocessing_spell_corrector_py__languagetool_pass", + "label": "_languagetool_pass()", + "file_type": "code", + "source_file": "src/preprocessing/spell_corrector.py", + "source_location": "line 87" + }, + { + "id": "src_preprocessing_spell_corrector_py__languagetool_pass_doc", + "label": "LanguageTool pass for context-aware grammar + spelling corrections.", + "file_type": "rationale", + "source_file": "src/preprocessing/spell_corrector.py", + "source_location": "line 87" + }, + { + "id": "src_preprocessing_spell_corrector_py_correct", + "label": "correct()", + "file_type": "code", + "source_file": "src/preprocessing/spell_corrector.py", + "source_location": "line 108" + }, + { + "id": "src_preprocessing_spell_corrector_py_correct_doc", + "label": "Run all three correction passes in sequence.", + "file_type": "rationale", + "source_file": "src/preprocessing/spell_corrector.py", + "source_location": "line 108" + }, + { + "id": "src_preprocessing_spell_corrector_py_close", + "label": "close()", + "file_type": "code", + "source_file": "src/preprocessing/spell_corrector.py", + "source_location": "line 126" + }, + { + "id": "src_preprocessing_spell_corrector_py_close_doc", + "label": "Clean up LanguageTool resources.", + "file_type": "rationale", + "source_file": "src/preprocessing/spell_corrector.py", + "source_location": "line 126" + }, + { + "id": "src_preprocessing_spell_corrector_py__replace", + "label": "_replace()", + "file_type": "code", + "source_file": "src/preprocessing/spell_corrector.py", + "source_location": "line 53" + } + ], + "edges": [ + { + "source": "src_preprocessing_spell_corrector_py", + "target": "src_preprocessing_spell_corrector_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.5 + }, + { + "source": "src_preprocessing_spell_corrector_py", + "target": "language_tool_python", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.6 + }, + { + "source": "src_preprocessing_spell_corrector_py", + "target": "spellchecker", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.6 + }, + { + "source": "src_preprocessing_spell_corrector_py", + "target": "loguru", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.6 + }, + { + "source": "src_preprocessing_spell_corrector_py", + "target": "typing", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.6 + }, + { + "source": "src_preprocessing_spell_corrector_py", + "target": "re", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.6 + }, + { + "source": "src_preprocessing_spell_corrector_py", + "target": "src_preprocessing_spell_corrector_py_DyslexiaAwareSpellCorrector", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 1.0 + }, + { + "source": "src_preprocessing_spell_corrector_py_DyslexiaAwareSpellCorrector", + "target": "src_preprocessing_spell_corrector_py_DyslexiaAwareSpellCorrector_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.5 + }, + { + "source": "src_preprocessing_spell_corrector_py", + "target": "src_preprocessing_spell_corrector_py___init__", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 1.0 + }, + { + "source": "src_preprocessing_spell_corrector_py___init__", + "target": "SpellChecker", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py___init__", + "target": "compile", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py___init__", + "target": "LanguageTool", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py___init__", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py___init__", + "target": "warning", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py___init__", + "target": "join", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py___init__", + "target": "escape", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py___init__", + "target": "keys", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py", + "target": "src_preprocessing_spell_corrector_py__phonetic_pass", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 1.0 + }, + { + "source": "src_preprocessing_spell_corrector_py__phonetic_pass", + "target": "src_preprocessing_spell_corrector_py__phonetic_pass_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.5 + }, + { + "source": "src_preprocessing_spell_corrector_py__phonetic_pass", + "target": "sub", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py__phonetic_pass", + "target": "group", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py__phonetic_pass", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py__phonetic_pass", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py__phonetic_pass", + "target": "isupper", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py__phonetic_pass", + "target": "islower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py__phonetic_pass", + "target": "upper", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py", + "target": "src_preprocessing_spell_corrector_py__spellcheck_pass", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 1.0 + }, + { + "source": "src_preprocessing_spell_corrector_py__spellcheck_pass", + "target": "src_preprocessing_spell_corrector_py__spellcheck_pass_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.5 + }, + { + "source": "src_preprocessing_spell_corrector_py__spellcheck_pass", + "target": "split", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py__spellcheck_pass", + "target": "join", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py__spellcheck_pass", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py__spellcheck_pass", + "target": "correction", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py__spellcheck_pass", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py__spellcheck_pass", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py__spellcheck_pass", + "target": "isupper", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py__spellcheck_pass", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py__spellcheck_pass", + "target": "isupper", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py__spellcheck_pass", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py__spellcheck_pass", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py__spellcheck_pass", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py__spellcheck_pass", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py__spellcheck_pass", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py__spellcheck_pass", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py__spellcheck_pass", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py__spellcheck_pass", + "target": "capitalize", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py__spellcheck_pass", + "target": "lstrip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py", + "target": "src_preprocessing_spell_corrector_py__languagetool_pass", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 1.0 + }, + { + "source": "src_preprocessing_spell_corrector_py__languagetool_pass", + "target": "src_preprocessing_spell_corrector_py__languagetool_pass_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.5 + }, + { + "source": "src_preprocessing_spell_corrector_py__languagetool_pass", + "target": "check", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py__languagetool_pass", + "target": "sorted", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py__languagetool_pass", + "target": "warning", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py", + "target": "src_preprocessing_spell_corrector_py_correct", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 1.0 + }, + { + "source": "src_preprocessing_spell_corrector_py_correct", + "target": "src_preprocessing_spell_corrector_py_correct_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.5 + }, + { + "source": "src_preprocessing_spell_corrector_py_correct", + "target": "debug", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py_correct", + "target": "_phonetic_pass", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py_correct", + "target": "_spellcheck_pass", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py_correct", + "target": "_languagetool_pass", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py_correct", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py", + "target": "src_preprocessing_spell_corrector_py_close", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 1.0 + }, + { + "source": "src_preprocessing_spell_corrector_py_close", + "target": "src_preprocessing_spell_corrector_py_close_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.5 + }, + { + "source": "src_preprocessing_spell_corrector_py_close", + "target": "close", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py", + "target": "src_preprocessing_spell_corrector_py__replace", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 1.0 + }, + { + "source": "src_preprocessing_spell_corrector_py__replace", + "target": "group", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py__replace", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py__replace", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py__replace", + "target": "isupper", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py__replace", + "target": "islower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_spell_corrector_py__replace", + "target": "upper", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/spell_corrector.py", + "weight": 0.8 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/7e77a2aef2097af88a55903d119062bdb4cd9111aa9c606f3494098302e60437.json b/graphify-out/cache/7e77a2aef2097af88a55903d119062bdb4cd9111aa9c606f3494098302e60437.json new file mode 100644 index 0000000000000000000000000000000000000000..aeab06b02eb618ba3520254fa7d93d5818d286e9 --- /dev/null +++ b/graphify-out/cache/7e77a2aef2097af88a55903d119062bdb4cd9111aa9c606f3494098302e60437.json @@ -0,0 +1,531 @@ +{ + "nodes": [ + { + "id": "src_evaluation_style_metrics_py", + "label": "style_metrics.py", + "file_type": "code", + "source_file": "src/evaluation/style_metrics.py" + }, + { + "id": "src_evaluation_style_metrics_py_docstring", + "label": "Measures style preservation between input and output.\n\nKey metrics:\n - Style Ve", + "file_type": "rationale", + "source_file": "src/evaluation/style_metrics.py" + }, + { + "id": "src_evaluation_style_metrics_py_StyleEvaluator", + "label": "StyleEvaluator", + "file_type": "code", + "source_file": "src/evaluation/style_metrics.py", + "source_location": "line 19" + }, + { + "id": "src_evaluation_style_metrics_py_StyleEvaluator_doc", + "label": "Evaluates style preservation and academic vocabulary coverage.", + "file_type": "rationale", + "source_file": "src/evaluation/style_metrics.py", + "source_location": "line 19" + }, + { + "id": "src_evaluation_style_metrics_py___init__", + "label": "__init__()", + "file_type": "code", + "source_file": "src/evaluation/style_metrics.py", + "source_location": "line 22" + }, + { + "id": "src_evaluation_style_metrics_py_style_similarity", + "label": "style_similarity()", + "file_type": "code", + "source_file": "src/evaluation/style_metrics.py", + "source_location": "line 26" + }, + { + "id": "src_evaluation_style_metrics_py_style_similarity_doc", + "label": "Cosine similarity between style vectors. Target: > 0.85.", + "file_type": "rationale", + "source_file": "src/evaluation/style_metrics.py", + "source_location": "line 26" + }, + { + "id": "src_evaluation_style_metrics_py_awl_coverage", + "label": "awl_coverage()", + "file_type": "code", + "source_file": "src/evaluation/style_metrics.py", + "source_location": "line 39" + }, + { + "id": "src_evaluation_style_metrics_py_awl_coverage_doc", + "label": "Fraction of content words in AWL. Target: > 0.25.", + "file_type": "rationale", + "source_file": "src/evaluation/style_metrics.py", + "source_location": "line 39" + }, + { + "id": "src_evaluation_style_metrics_py_evaluate_batch", + "label": "evaluate_batch()", + "file_type": "code", + "source_file": "src/evaluation/style_metrics.py", + "source_location": "line 54" + }, + { + "id": "src_evaluation_style_metrics_py_evaluate_batch_doc", + "label": "Compute style and AWL metrics for a batch.", + "file_type": "rationale", + "source_file": "src/evaluation/style_metrics.py", + "source_location": "line 54" + } + ], + "edges": [ + { + "source": "src_evaluation_style_metrics_py", + "target": "src_evaluation_style_metrics_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.5 + }, + { + "source": "src_evaluation_style_metrics_py", + "target": "torch", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.6 + }, + { + "source": "src_evaluation_style_metrics_py", + "target": "torch.nn.functional", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.6 + }, + { + "source": "src_evaluation_style_metrics_py", + "target": "typing", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.6 + }, + { + "source": "src_evaluation_style_metrics_py", + "target": "style.fingerprinter", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.6 + }, + { + "source": "src_evaluation_style_metrics_py", + "target": "vocabulary.awl_loader", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.6 + }, + { + "source": "src_evaluation_style_metrics_py", + "target": "loguru", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.6 + }, + { + "source": "src_evaluation_style_metrics_py", + "target": "numpy", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.6 + }, + { + "source": "src_evaluation_style_metrics_py", + "target": "src_evaluation_style_metrics_py_StyleEvaluator", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/style_metrics.py", + "weight": 1.0 + }, + { + "source": "src_evaluation_style_metrics_py_StyleEvaluator", + "target": "src_evaluation_style_metrics_py_StyleEvaluator_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.5 + }, + { + "source": "src_evaluation_style_metrics_py", + "target": "src_evaluation_style_metrics_py___init__", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/style_metrics.py", + "weight": 1.0 + }, + { + "source": "src_evaluation_style_metrics_py", + "target": "src_evaluation_style_metrics_py_style_similarity", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/style_metrics.py", + "weight": 1.0 + }, + { + "source": "src_evaluation_style_metrics_py_style_similarity", + "target": "src_evaluation_style_metrics_py_style_similarity_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.5 + }, + { + "source": "src_evaluation_style_metrics_py_style_similarity", + "target": "extract_vector", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_style_metrics_py_style_similarity", + "target": "extract_vector", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_style_metrics_py_style_similarity", + "target": "cosine_similarity", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_style_metrics_py_style_similarity", + "target": "item", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_style_metrics_py_style_similarity", + "target": "dim", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_style_metrics_py_style_similarity", + "target": "unsqueeze", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_style_metrics_py_style_similarity", + "target": "dim", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_style_metrics_py_style_similarity", + "target": "unsqueeze", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_style_metrics_py", + "target": "src_evaluation_style_metrics_py_awl_coverage", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/style_metrics.py", + "weight": 1.0 + }, + { + "source": "src_evaluation_style_metrics_py_awl_coverage", + "target": "src_evaluation_style_metrics_py_awl_coverage_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.5 + }, + { + "source": "src_evaluation_style_metrics_py_awl_coverage", + "target": "split", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_style_metrics_py_awl_coverage", + "target": "sum", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_style_metrics_py_awl_coverage", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_style_metrics_py_awl_coverage", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_style_metrics_py_awl_coverage", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_style_metrics_py_awl_coverage", + "target": "isalpha", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_style_metrics_py_awl_coverage", + "target": "is_academic", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_style_metrics_py_awl_coverage", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_style_metrics_py", + "target": "src_evaluation_style_metrics_py_evaluate_batch", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/style_metrics.py", + "weight": 1.0 + }, + { + "source": "src_evaluation_style_metrics_py_evaluate_batch", + "target": "src_evaluation_style_metrics_py_evaluate_batch_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.5 + }, + { + "source": "src_evaluation_style_metrics_py_evaluate_batch", + "target": "zip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_style_metrics_py_evaluate_batch", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_style_metrics_py_evaluate_batch", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_style_metrics_py_evaluate_batch", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_style_metrics_py_evaluate_batch", + "target": "float", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_style_metrics_py_evaluate_batch", + "target": "float", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_style_metrics_py_evaluate_batch", + "target": "float", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_style_metrics_py_evaluate_batch", + "target": "float", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_style_metrics_py_evaluate_batch", + "target": "float", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_style_metrics_py_evaluate_batch", + "target": "style_similarity", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_style_metrics_py_evaluate_batch", + "target": "awl_coverage", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_style_metrics_py_evaluate_batch", + "target": "style_similarity", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_style_metrics_py_evaluate_batch", + "target": "mean", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_style_metrics_py_evaluate_batch", + "target": "std", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_style_metrics_py_evaluate_batch", + "target": "mean", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_style_metrics_py_evaluate_batch", + "target": "std", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_style_metrics_py_evaluate_batch", + "target": "mean", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/style_metrics.py", + "weight": 0.8 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/8443bddd5f97f03a093648cc3753aab32b5e7c679c840ecb59afb5b0d07cdaea.json b/graphify-out/cache/8443bddd5f97f03a093648cc3753aab32b5e7c679c840ecb59afb5b0d07cdaea.json new file mode 100644 index 0000000000000000000000000000000000000000..eda26be50c5e085db108d063f745279a3ad9d834 --- /dev/null +++ b/graphify-out/cache/8443bddd5f97f03a093648cc3753aab32b5e7c679c840ecb59afb5b0d07cdaea.json @@ -0,0 +1,333 @@ +{ + "nodes": [ + { + "id": "tests_test_style_py", + "label": "test_style.py", + "file_type": "code", + "source_file": "tests/test_style.py" + }, + { + "id": "tests_test_style_py_docstring", + "label": "Tests for the style fingerprinting module.", + "file_type": "rationale", + "source_file": "tests/test_style.py" + }, + { + "id": "tests_test_style_py_fingerprinter", + "label": "fingerprinter()", + "file_type": "code", + "source_file": "tests/test_style.py", + "source_location": "line 10" + }, + { + "id": "tests_test_style_py_test_style_vector_shape", + "label": "test_style_vector_shape()", + "file_type": "code", + "source_file": "tests/test_style.py", + "source_location": "line 16" + }, + { + "id": "tests_test_style_py_test_style_vector_shape_doc", + "label": "Test that style vectors have correct dimensionality.", + "file_type": "rationale", + "source_file": "tests/test_style.py", + "source_location": "line 16" + }, + { + "id": "tests_test_style_py_test_style_vector_different_texts", + "label": "test_style_vector_different_texts()", + "file_type": "code", + "source_file": "tests/test_style.py", + "source_location": "line 22" + }, + { + "id": "tests_test_style_py_test_style_vector_different_texts_doc", + "label": "Test that different writing styles produce different vectors.", + "file_type": "rationale", + "source_file": "tests/test_style.py", + "source_location": "line 22" + }, + { + "id": "tests_test_style_py_test_style_blend", + "label": "test_style_blend()", + "file_type": "code", + "source_file": "tests/test_style.py", + "source_location": "line 32" + }, + { + "id": "tests_test_style_py_test_style_blend_doc", + "label": "Test that blended vectors have unit norm.", + "file_type": "rationale", + "source_file": "tests/test_style.py", + "source_location": "line 32" + }, + { + "id": "tests_test_style_py_test_raw_features_keys", + "label": "test_raw_features_keys()", + "file_type": "code", + "source_file": "tests/test_style.py", + "source_location": "line 41" + }, + { + "id": "tests_test_style_py_test_raw_features_keys_doc", + "label": "Test that raw features contain expected keys.", + "file_type": "rationale", + "source_file": "tests/test_style.py", + "source_location": "line 41" + } + ], + "edges": [ + { + "source": "tests_test_style_py", + "target": "tests_test_style_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_style.py", + "weight": 0.5 + }, + { + "source": "tests_test_style_py", + "target": "pytest", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_style.py", + "weight": 0.6 + }, + { + "source": "tests_test_style_py", + "target": "torch", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_style.py", + "weight": 0.6 + }, + { + "source": "tests_test_style_py", + "target": "src.style.fingerprinter", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_style.py", + "weight": 0.6 + }, + { + "source": "tests_test_style_py", + "target": "src.style.style_vector", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_style.py", + "weight": 0.6 + }, + { + "source": "tests_test_style_py", + "target": "tests_test_style_py_fingerprinter", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_style.py", + "weight": 1.0 + }, + { + "source": "tests_test_style_py_fingerprinter", + "target": "write_text", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_style.py", + "weight": 0.8 + }, + { + "source": "tests_test_style_py_fingerprinter", + "target": "StyleFingerprinter", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_style.py", + "weight": 0.8 + }, + { + "source": "tests_test_style_py_fingerprinter", + "target": "str", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_style.py", + "weight": 0.8 + }, + { + "source": "tests_test_style_py", + "target": "tests_test_style_py_test_style_vector_shape", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_style.py", + "weight": 1.0 + }, + { + "source": "tests_test_style_py_test_style_vector_shape", + "target": "tests_test_style_py_test_style_vector_shape_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_style.py", + "weight": 0.5 + }, + { + "source": "tests_test_style_py_test_style_vector_shape", + "target": "extract_vector", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_style.py", + "weight": 0.8 + }, + { + "source": "tests_test_style_py", + "target": "tests_test_style_py_test_style_vector_different_texts", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_style.py", + "weight": 1.0 + }, + { + "source": "tests_test_style_py_test_style_vector_different_texts", + "target": "tests_test_style_py_test_style_vector_different_texts_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_style.py", + "weight": 0.5 + }, + { + "source": "tests_test_style_py_test_style_vector_different_texts", + "target": "extract_vector", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_style.py", + "weight": 0.8 + }, + { + "source": "tests_test_style_py_test_style_vector_different_texts", + "target": "extract_vector", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_style.py", + "weight": 0.8 + }, + { + "source": "tests_test_style_py_test_style_vector_different_texts", + "target": "cosine_similarity", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_style.py", + "weight": 0.8 + }, + { + "source": "tests_test_style_py", + "target": "tests_test_style_py_test_style_blend", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_style.py", + "weight": 1.0 + }, + { + "source": "tests_test_style_py_test_style_blend", + "target": "tests_test_style_py_test_style_blend_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_style.py", + "weight": 0.5 + }, + { + "source": "tests_test_style_py_test_style_blend", + "target": "extract_vector", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_style.py", + "weight": 0.8 + }, + { + "source": "tests_test_style_py_test_style_blend", + "target": "extract_vector", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_style.py", + "weight": 0.8 + }, + { + "source": "tests_test_style_py_test_style_blend", + "target": "blend_vectors", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_style.py", + "weight": 0.8 + }, + { + "source": "tests_test_style_py_test_style_blend", + "target": "item", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_style.py", + "weight": 0.8 + }, + { + "source": "tests_test_style_py_test_style_blend", + "target": "abs", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_style.py", + "weight": 0.8 + }, + { + "source": "tests_test_style_py_test_style_blend", + "target": "norm", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_style.py", + "weight": 0.8 + }, + { + "source": "tests_test_style_py", + "target": "tests_test_style_py_test_raw_features_keys", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_style.py", + "weight": 1.0 + }, + { + "source": "tests_test_style_py_test_raw_features_keys", + "target": "tests_test_style_py_test_raw_features_keys_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_style.py", + "weight": 0.5 + }, + { + "source": "tests_test_style_py_test_raw_features_keys", + "target": "extract_raw_features", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_style.py", + "weight": 0.8 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/846aded94977ffa1b3d8a7997f3064f628c84e1be1fb3870904d9e0ada37979a.json b/graphify-out/cache/846aded94977ffa1b3d8a7997f3064f628c84e1be1fb3870904d9e0ada37979a.json new file mode 100644 index 0000000000000000000000000000000000000000..adf85eeb463d8dc4113a82381332648b3a3767fb --- /dev/null +++ b/graphify-out/cache/846aded94977ffa1b3d8a7997f3064f628c84e1be1fb3870904d9e0ada37979a.json @@ -0,0 +1,324 @@ +{ + "nodes": [ + { + "id": "src_vocabulary_register_filter_py", + "label": "register_filter.py", + "file_type": "code", + "source_file": "src/vocabulary/register_filter.py" + }, + { + "id": "src_vocabulary_register_filter_py_docstring", + "label": "Register filter module.\nApplies register-level post-processing to ensure output ", + "file_type": "rationale", + "source_file": "src/vocabulary/register_filter.py" + }, + { + "id": "src_vocabulary_register_filter_py_RegisterFilterAdvanced", + "label": "RegisterFilterAdvanced", + "file_type": "code", + "source_file": "src/vocabulary/register_filter.py", + "source_location": "line 12" + }, + { + "id": "src_vocabulary_register_filter_py_RegisterFilterAdvanced_doc", + "label": "Advanced register filtering with nominalisation and hedging passes.", + "file_type": "rationale", + "source_file": "src/vocabulary/register_filter.py", + "source_location": "line 12" + }, + { + "id": "src_vocabulary_register_filter_py___init__", + "label": "__init__()", + "file_type": "code", + "source_file": "src/vocabulary/register_filter.py", + "source_location": "line 54" + }, + { + "id": "src_vocabulary_register_filter_py_nominalise", + "label": "nominalise()", + "file_type": "code", + "source_file": "src/vocabulary/register_filter.py", + "source_location": "line 57" + }, + { + "id": "src_vocabulary_register_filter_py_nominalise_doc", + "label": "Convert verbal phrases to nominal forms where appropriate.\n\nOnly applies to clea", + "file_type": "rationale", + "source_file": "src/vocabulary/register_filter.py", + "source_location": "line 57" + }, + { + "id": "src_vocabulary_register_filter_py_add_hedging", + "label": "add_hedging()", + "file_type": "code", + "source_file": "src/vocabulary/register_filter.py", + "source_location": "line 76" + }, + { + "id": "src_vocabulary_register_filter_py_add_hedging_doc", + "label": "Add hedging language where claims are too absolute.\n\nOnly applies to the first o", + "file_type": "rationale", + "source_file": "src/vocabulary/register_filter.py", + "source_location": "line 76" + }, + { + "id": "src_vocabulary_register_filter_py_check_formality", + "label": "check_formality()", + "file_type": "code", + "source_file": "src/vocabulary/register_filter.py", + "source_location": "line 93" + }, + { + "id": "src_vocabulary_register_filter_py_check_formality_doc", + "label": "Score text formality on 0-1 scale.", + "file_type": "rationale", + "source_file": "src/vocabulary/register_filter.py", + "source_location": "line 93" + } + ], + "edges": [ + { + "source": "src_vocabulary_register_filter_py", + "target": "src_vocabulary_register_filter_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/register_filter.py", + "weight": 0.5 + }, + { + "source": "src_vocabulary_register_filter_py", + "target": "re", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/register_filter.py", + "weight": 0.6 + }, + { + "source": "src_vocabulary_register_filter_py", + "target": "typing", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/register_filter.py", + "weight": 0.6 + }, + { + "source": "src_vocabulary_register_filter_py", + "target": "loguru", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/register_filter.py", + "weight": 0.6 + }, + { + "source": "src_vocabulary_register_filter_py", + "target": "src_vocabulary_register_filter_py_RegisterFilterAdvanced", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/register_filter.py", + "weight": 1.0 + }, + { + "source": "src_vocabulary_register_filter_py_RegisterFilterAdvanced", + "target": "src_vocabulary_register_filter_py_RegisterFilterAdvanced_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/register_filter.py", + "weight": 0.5 + }, + { + "source": "src_vocabulary_register_filter_py", + "target": "src_vocabulary_register_filter_py___init__", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/register_filter.py", + "weight": 1.0 + }, + { + "source": "src_vocabulary_register_filter_py", + "target": "src_vocabulary_register_filter_py_nominalise", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/register_filter.py", + "weight": 1.0 + }, + { + "source": "src_vocabulary_register_filter_py_nominalise", + "target": "src_vocabulary_register_filter_py_nominalise_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/register_filter.py", + "weight": 0.5 + }, + { + "source": "src_vocabulary_register_filter_py_nominalise", + "target": "items", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/register_filter.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_register_filter_py_nominalise", + "target": "compile", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/register_filter.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_register_filter_py_nominalise", + "target": "sub", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/register_filter.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_register_filter_py_nominalise", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/register_filter.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_register_filter_py_nominalise", + "target": "endswith", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/register_filter.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_register_filter_py_nominalise", + "target": "rstrip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/register_filter.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_register_filter_py_nominalise", + "target": "escape", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/register_filter.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_register_filter_py", + "target": "src_vocabulary_register_filter_py_add_hedging", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/register_filter.py", + "weight": 1.0 + }, + { + "source": "src_vocabulary_register_filter_py_add_hedging", + "target": "src_vocabulary_register_filter_py_add_hedging_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/register_filter.py", + "weight": 0.5 + }, + { + "source": "src_vocabulary_register_filter_py_add_hedging", + "target": "compile", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/register_filter.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_register_filter_py_add_hedging", + "target": "sub", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/register_filter.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_register_filter_py_add_hedging", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/register_filter.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_register_filter_py", + "target": "src_vocabulary_register_filter_py_check_formality", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/register_filter.py", + "weight": 1.0 + }, + { + "source": "src_vocabulary_register_filter_py_check_formality", + "target": "src_vocabulary_register_filter_py_check_formality_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/register_filter.py", + "weight": 0.5 + }, + { + "source": "src_vocabulary_register_filter_py_check_formality", + "target": "FormalityClassifier", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/register_filter.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_register_filter_py_check_formality", + "target": "score", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/register_filter.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_register_filter_py_check_formality", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/vocabulary/register_filter.py", + "weight": 0.8 + }, + { + "source": "src_vocabulary_register_filter_py", + "target": "src.style.formality_classifier", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/vocabulary/register_filter.py", + "weight": 0.6 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/8758bc39dca1b66afc6772b73f7ba09d2cf2b7fe71129bc5bba6bfe3dba9673e.json b/graphify-out/cache/8758bc39dca1b66afc6772b73f7ba09d2cf2b7fe71129bc5bba6bfe3dba9673e.json new file mode 100644 index 0000000000000000000000000000000000000000..6dab21408e29a0209ddfd47292e132e0507450f8 --- /dev/null +++ b/graphify-out/cache/8758bc39dca1b66afc6772b73f7ba09d2cf2b7fe71129bc5bba6bfe3dba9673e.json @@ -0,0 +1,340 @@ +{ + "nodes": [ + { + "id": "tests_test_model_py", + "label": "test_model.py", + "file_type": "code", + "source_file": "tests/test_model.py" + }, + { + "id": "tests_test_model_py_docstring", + "label": "Tests for the core model module.", + "file_type": "rationale", + "source_file": "tests/test_model.py" + }, + { + "id": "tests_test_model_py_test_model_registry_populated", + "label": "test_model_registry_populated()", + "file_type": "code", + "source_file": "tests/test_model.py", + "source_location": "line 11" + }, + { + "id": "tests_test_model_py_test_model_registry_populated_doc", + "label": "Test that model registries are defined.", + "file_type": "rationale", + "source_file": "tests/test_model.py", + "source_location": "line 11" + }, + { + "id": "tests_test_model_py_test_invalid_model_key", + "label": "test_invalid_model_key()", + "file_type": "code", + "source_file": "tests/test_model.py", + "source_location": "line 17" + }, + { + "id": "tests_test_model_py_test_invalid_model_key_doc", + "label": "Test that unknown model keys raise ValueError.", + "file_type": "rationale", + "source_file": "tests/test_model.py", + "source_location": "line 17" + }, + { + "id": "tests_test_model_py_test_style_conditioner_output_shape", + "label": "test_style_conditioner_output_shape()", + "file_type": "code", + "source_file": "tests/test_model.py", + "source_location": "line 23" + }, + { + "id": "tests_test_model_py_test_style_conditioner_output_shape_doc", + "label": "Test that style conditioner produces correct tensor shapes.", + "file_type": "rationale", + "source_file": "tests/test_model.py", + "source_location": "line 23" + }, + { + "id": "tests_test_model_py_test_prepend_style_prefix", + "label": "test_prepend_style_prefix()", + "file_type": "code", + "source_file": "tests/test_model.py", + "source_location": "line 32" + }, + { + "id": "tests_test_model_py_test_prepend_style_prefix_doc", + "label": "Test prefix prepending dimensions.", + "file_type": "rationale", + "source_file": "tests/test_model.py", + "source_location": "line 32" + }, + { + "id": "tests_test_model_py_test_lora_config_creation", + "label": "test_lora_config_creation()", + "file_type": "code", + "source_file": "tests/test_model.py", + "source_location": "line 40" + }, + { + "id": "tests_test_model_py_test_lora_config_creation_doc", + "label": "Test LoRA config creation.", + "file_type": "rationale", + "source_file": "tests/test_model.py", + "source_location": "line 40" + } + ], + "edges": [ + { + "source": "tests_test_model_py", + "target": "tests_test_model_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_model.py", + "weight": 0.5 + }, + { + "source": "tests_test_model_py", + "target": "pytest", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_model.py", + "weight": 0.6 + }, + { + "source": "tests_test_model_py", + "target": "torch", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_model.py", + "weight": 0.6 + }, + { + "source": "tests_test_model_py", + "target": "src.model.base_model", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_model.py", + "weight": 0.6 + }, + { + "source": "tests_test_model_py", + "target": "src.model.style_conditioner", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_model.py", + "weight": 0.6 + }, + { + "source": "tests_test_model_py", + "target": "src.model.lora_adapter", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_model.py", + "weight": 0.6 + }, + { + "source": "tests_test_model_py", + "target": "peft", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_model.py", + "weight": 0.6 + }, + { + "source": "tests_test_model_py", + "target": "tests_test_model_py_test_model_registry_populated", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_model.py", + "weight": 1.0 + }, + { + "source": "tests_test_model_py_test_model_registry_populated", + "target": "tests_test_model_py_test_model_registry_populated_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_model.py", + "weight": 0.5 + }, + { + "source": "tests_test_model_py_test_model_registry_populated", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_model.py", + "weight": 0.8 + }, + { + "source": "tests_test_model_py_test_model_registry_populated", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_model.py", + "weight": 0.8 + }, + { + "source": "tests_test_model_py", + "target": "tests_test_model_py_test_invalid_model_key", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_model.py", + "weight": 1.0 + }, + { + "source": "tests_test_model_py_test_invalid_model_key", + "target": "tests_test_model_py_test_invalid_model_key_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_model.py", + "weight": 0.5 + }, + { + "source": "tests_test_model_py_test_invalid_model_key", + "target": "raises", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_model.py", + "weight": 0.8 + }, + { + "source": "tests_test_model_py_test_invalid_model_key", + "target": "load_model_and_tokenizer", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_model.py", + "weight": 0.8 + }, + { + "source": "tests_test_model_py", + "target": "tests_test_model_py_test_style_conditioner_output_shape", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_model.py", + "weight": 1.0 + }, + { + "source": "tests_test_model_py_test_style_conditioner_output_shape", + "target": "tests_test_model_py_test_style_conditioner_output_shape_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_model.py", + "weight": 0.5 + }, + { + "source": "tests_test_model_py_test_style_conditioner_output_shape", + "target": "StyleConditioner", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_model.py", + "weight": 0.8 + }, + { + "source": "tests_test_model_py_test_style_conditioner_output_shape", + "target": "randn", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_model.py", + "weight": 0.8 + }, + { + "source": "tests_test_model_py_test_style_conditioner_output_shape", + "target": "conditioner", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_model.py", + "weight": 0.8 + }, + { + "source": "tests_test_model_py", + "target": "tests_test_model_py_test_prepend_style_prefix", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_model.py", + "weight": 1.0 + }, + { + "source": "tests_test_model_py_test_prepend_style_prefix", + "target": "tests_test_model_py_test_prepend_style_prefix_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_model.py", + "weight": 0.5 + }, + { + "source": "tests_test_model_py_test_prepend_style_prefix", + "target": "randn", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_model.py", + "weight": 0.8 + }, + { + "source": "tests_test_model_py_test_prepend_style_prefix", + "target": "randn", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_model.py", + "weight": 0.8 + }, + { + "source": "tests_test_model_py_test_prepend_style_prefix", + "target": "prepend_style_prefix", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_model.py", + "weight": 0.8 + }, + { + "source": "tests_test_model_py", + "target": "tests_test_model_py_test_lora_config_creation", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_model.py", + "weight": 1.0 + }, + { + "source": "tests_test_model_py_test_lora_config_creation", + "target": "tests_test_model_py_test_lora_config_creation_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_model.py", + "weight": 0.5 + }, + { + "source": "tests_test_model_py_test_lora_config_creation", + "target": "create_lora_config", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_model.py", + "weight": 0.8 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/87a99d1d5a857d8735bfac55724ec6247264c8cb36c27aa764b4bc96324dc7a0.json b/graphify-out/cache/87a99d1d5a857d8735bfac55724ec6247264c8cb36c27aa764b4bc96324dc7a0.json new file mode 100644 index 0000000000000000000000000000000000000000..4736f13109114e0c984a4c8d14eec4e3ee61476c --- /dev/null +++ b/graphify-out/cache/87a99d1d5a857d8735bfac55724ec6247264c8cb36c27aa764b4bc96324dc7a0.json @@ -0,0 +1,4 @@ +{ + "nodes": [], + "edges": [] +} \ No newline at end of file diff --git a/graphify-out/cache/91e12c5814ce913e7bc37094cbcfe682bcc1e66d77ee2dc48524a4dab7c5ba0c.json b/graphify-out/cache/91e12c5814ce913e7bc37094cbcfe682bcc1e66d77ee2dc48524a4dab7c5ba0c.json new file mode 100644 index 0000000000000000000000000000000000000000..36f92e5b1887fcb17cfa0f31d474632ea44c2430 --- /dev/null +++ b/graphify-out/cache/91e12c5814ce913e7bc37094cbcfe682bcc1e66d77ee2dc48524a4dab7c5ba0c.json @@ -0,0 +1,233 @@ +{ + "nodes": [ + { + "id": "src_evaluation_authorship_verifier_py", + "label": "authorship_verifier.py", + "file_type": "code", + "source_file": "src/evaluation/authorship_verifier.py" + }, + { + "id": "src_evaluation_authorship_verifier_py_docstring", + "label": "Authorship verification module.\nUses a fine-tuned model to verify whether the co", + "file_type": "rationale", + "source_file": "src/evaluation/authorship_verifier.py" + }, + { + "id": "src_evaluation_authorship_verifier_py_AuthorshipVerifier", + "label": "AuthorshipVerifier", + "file_type": "code", + "source_file": "src/evaluation/authorship_verifier.py", + "source_location": "line 14" + }, + { + "id": "src_evaluation_authorship_verifier_py_AuthorshipVerifier_doc", + "label": "Verifies authorship consistency between input and output text.", + "file_type": "rationale", + "source_file": "src/evaluation/authorship_verifier.py", + "source_location": "line 14" + }, + { + "id": "src_evaluation_authorship_verifier_py___init__", + "label": "__init__()", + "file_type": "code", + "source_file": "src/evaluation/authorship_verifier.py", + "source_location": "line 17" + }, + { + "id": "src_evaluation_authorship_verifier_py_verify", + "label": "verify()", + "file_type": "code", + "source_file": "src/evaluation/authorship_verifier.py", + "source_location": "line 26" + }, + { + "id": "src_evaluation_authorship_verifier_py_verify_doc", + "label": "Return probability that both texts were written by the same author.\n\nUses senten", + "file_type": "rationale", + "source_file": "src/evaluation/authorship_verifier.py", + "source_location": "line 26" + } + ], + "edges": [ + { + "source": "src_evaluation_authorship_verifier_py", + "target": "src_evaluation_authorship_verifier_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/authorship_verifier.py", + "weight": 0.5 + }, + { + "source": "src_evaluation_authorship_verifier_py", + "target": "typing", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/authorship_verifier.py", + "weight": 0.6 + }, + { + "source": "src_evaluation_authorship_verifier_py", + "target": "loguru", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/authorship_verifier.py", + "weight": 0.6 + }, + { + "source": "src_evaluation_authorship_verifier_py", + "target": "torch", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/authorship_verifier.py", + "weight": 0.6 + }, + { + "source": "src_evaluation_authorship_verifier_py", + "target": "torch.nn.functional", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/authorship_verifier.py", + "weight": 0.6 + }, + { + "source": "src_evaluation_authorship_verifier_py", + "target": "src_evaluation_authorship_verifier_py_AuthorshipVerifier", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/authorship_verifier.py", + "weight": 1.0 + }, + { + "source": "src_evaluation_authorship_verifier_py_AuthorshipVerifier", + "target": "src_evaluation_authorship_verifier_py_AuthorshipVerifier_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/authorship_verifier.py", + "weight": 0.5 + }, + { + "source": "src_evaluation_authorship_verifier_py", + "target": "src_evaluation_authorship_verifier_py___init__", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/authorship_verifier.py", + "weight": 1.0 + }, + { + "source": "src_evaluation_authorship_verifier_py___init__", + "target": "SentenceTransformer", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/authorship_verifier.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_authorship_verifier_py___init__", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/authorship_verifier.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_authorship_verifier_py___init__", + "target": "warning", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/authorship_verifier.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_authorship_verifier_py", + "target": "src_evaluation_authorship_verifier_py_verify", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/authorship_verifier.py", + "weight": 1.0 + }, + { + "source": "src_evaluation_authorship_verifier_py_verify", + "target": "src_evaluation_authorship_verifier_py_verify_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/authorship_verifier.py", + "weight": 0.5 + }, + { + "source": "src_evaluation_authorship_verifier_py_verify", + "target": "encode", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/authorship_verifier.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_authorship_verifier_py_verify", + "target": "cosine_similarity", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/authorship_verifier.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_authorship_verifier_py_verify", + "target": "unsqueeze", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/authorship_verifier.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_authorship_verifier_py_verify", + "target": "unsqueeze", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/authorship_verifier.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_authorship_verifier_py_verify", + "target": "warning", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/authorship_verifier.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_authorship_verifier_py_verify", + "target": "item", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/evaluation/authorship_verifier.py", + "weight": 0.8 + }, + { + "source": "src_evaluation_authorship_verifier_py", + "target": "sentence_transformers", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/evaluation/authorship_verifier.py", + "weight": 0.6 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/94f3e912e6d9ed7eca335bbb708a1fa61c891dfc1f603a62284ece3a10ccc110.json b/graphify-out/cache/94f3e912e6d9ed7eca335bbb708a1fa61c891dfc1f603a62284ece3a10ccc110.json new file mode 100644 index 0000000000000000000000000000000000000000..3bf39c71f4800a296d4dc65a4b9212115e5007ec --- /dev/null +++ b/graphify-out/cache/94f3e912e6d9ed7eca335bbb708a1fa61c891dfc1f603a62284ece3a10ccc110.json @@ -0,0 +1,581 @@ +{ + "nodes": [ + { + "id": "scripts_evaluate_py", + "label": "evaluate.py", + "file_type": "code", + "source_file": "scripts/evaluate.py" + }, + { + "id": "scripts_evaluate_py_docstring", + "label": "Evaluation script.\nRuns all evaluation metrics on the test set.\nRun: python scri", + "file_type": "rationale", + "source_file": "scripts/evaluate.py" + }, + { + "id": "scripts_evaluate_py_evaluate", + "label": "evaluate()", + "file_type": "code", + "source_file": "scripts/evaluate.py", + "source_location": "line 30" + }, + { + "id": "scripts_evaluate_py_evaluate_doc", + "label": "Run evaluation on the specified data split.", + "file_type": "rationale", + "source_file": "scripts/evaluate.py", + "source_location": "line 30" + } + ], + "edges": [ + { + "source": "scripts_evaluate_py", + "target": "scripts_evaluate_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/evaluate.py", + "weight": 0.5 + }, + { + "source": "scripts_evaluate_py", + "target": "click", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/evaluate.py", + "weight": 0.6 + }, + { + "source": "scripts_evaluate_py", + "target": "yaml", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/evaluate.py", + "weight": 0.6 + }, + { + "source": "scripts_evaluate_py", + "target": "json", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/evaluate.py", + "weight": 0.6 + }, + { + "source": "scripts_evaluate_py", + "target": "torch", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/evaluate.py", + "weight": 0.6 + }, + { + "source": "scripts_evaluate_py", + "target": "loguru", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/evaluate.py", + "weight": 0.6 + }, + { + "source": "scripts_evaluate_py", + "target": "rich.console", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/evaluate.py", + "weight": 0.6 + }, + { + "source": "scripts_evaluate_py", + "target": "rich.table", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/evaluate.py", + "weight": 0.6 + }, + { + "source": "scripts_evaluate_py", + "target": "src.model.base_model", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/evaluate.py", + "weight": 0.6 + }, + { + "source": "scripts_evaluate_py", + "target": "src.model.generation_utils", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/evaluate.py", + "weight": 0.6 + }, + { + "source": "scripts_evaluate_py", + "target": "src.evaluation.gleu_scorer", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/evaluate.py", + "weight": 0.6 + }, + { + "source": "scripts_evaluate_py", + "target": "src.evaluation.errant_evaluator", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/evaluate.py", + "weight": 0.6 + }, + { + "source": "scripts_evaluate_py", + "target": "src.evaluation.style_metrics", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/evaluate.py", + "weight": 0.6 + }, + { + "source": "scripts_evaluate_py", + "target": "src.style.fingerprinter", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/evaluate.py", + "weight": 0.6 + }, + { + "source": "scripts_evaluate_py", + "target": "src.vocabulary.awl_loader", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/evaluate.py", + "weight": 0.6 + }, + { + "source": "scripts_evaluate_py", + "target": "scripts_evaluate_py_evaluate", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/evaluate.py", + "weight": 1.0 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "scripts_evaluate_py_evaluate_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/evaluate.py", + "weight": 0.5 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "command", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "option", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "option", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "option", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "eval", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "batch_generate", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "GLEUScorer", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "compute_gleu", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "compute_bert_score", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "evaluate", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "StyleFingerprinter", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "evaluate_batch", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "Table", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "add_column", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "add_column", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "add_row", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "add_row", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "add_row", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "add_row", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "add_row", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "open", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "safe_load", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "from_pretrained", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "from_pretrained", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "open", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "enumerate", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "open", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "dump", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "load_model_and_tokenizer", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "loads", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "ERRANTEvaluator", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "StyleEvaluator", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "AWLLoader", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py_evaluate", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/evaluate.py", + "weight": 0.8 + }, + { + "source": "scripts_evaluate_py", + "target": "transformers", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/evaluate.py", + "weight": 0.6 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/95efc5d1b18575ef251a1d4eb3716430045343685d56a3f20e5193ce92653552.json b/graphify-out/cache/95efc5d1b18575ef251a1d4eb3716430045343685d56a3f20e5193ce92653552.json new file mode 100644 index 0000000000000000000000000000000000000000..fbe073510f681fe542d84f3035ac5d7153e41d97 --- /dev/null +++ b/graphify-out/cache/95efc5d1b18575ef251a1d4eb3716430045343685d56a3f20e5193ce92653552.json @@ -0,0 +1,563 @@ +{ + "nodes": [ + { + "id": "src_preprocessing_pipeline_py", + "label": "pipeline.py", + "file_type": "code", + "source_file": "src/preprocessing/pipeline.py" + }, + { + "id": "src_preprocessing_pipeline_py_docstring", + "label": "Master pre-processing pipeline. Runs all NLP stages in sequence.\nReturns a Prepr", + "file_type": "rationale", + "source_file": "src/preprocessing/pipeline.py" + }, + { + "id": "src_preprocessing_pipeline_py_EntitySpan", + "label": "EntitySpan", + "file_type": "code", + "source_file": "src/preprocessing/pipeline.py", + "source_location": "line 15" + }, + { + "id": "src_preprocessing_pipeline_py_PreprocessedDoc", + "label": "PreprocessedDoc", + "file_type": "code", + "source_file": "src/preprocessing/pipeline.py", + "source_location": "line 23" + }, + { + "id": "src_preprocessing_pipeline_py_PreprocessingPipeline", + "label": "PreprocessingPipeline", + "file_type": "code", + "source_file": "src/preprocessing/pipeline.py", + "source_location": "line 35" + }, + { + "id": "src_preprocessing_pipeline_py_PreprocessingPipeline_doc", + "label": "Orchestrates all pre-processing stages: spell correction, parsing, NER, readabil", + "file_type": "rationale", + "source_file": "src/preprocessing/pipeline.py", + "source_location": "line 35" + }, + { + "id": "src_preprocessing_pipeline_py___init__", + "label": "__init__()", + "file_type": "code", + "source_file": "src/preprocessing/pipeline.py", + "source_location": "line 38" + }, + { + "id": "src_preprocessing_pipeline_py__extract_readability", + "label": "_extract_readability()", + "file_type": "code", + "source_file": "src/preprocessing/pipeline.py", + "source_location": "line 50" + }, + { + "id": "src_preprocessing_pipeline_py__extract_readability_doc", + "label": "Compute readability scores (Flesch-Kincaid, Gunning Fog, etc.).", + "file_type": "rationale", + "source_file": "src/preprocessing/pipeline.py", + "source_location": "line 50" + }, + { + "id": "src_preprocessing_pipeline_py__extract_dep_tree", + "label": "_extract_dep_tree()", + "file_type": "code", + "source_file": "src/preprocessing/pipeline.py", + "source_location": "line 70" + }, + { + "id": "src_preprocessing_pipeline_py__extract_dep_tree_doc", + "label": "Extract grammatical skeleton: subject-verb-object per sentence.", + "file_type": "rationale", + "source_file": "src/preprocessing/pipeline.py", + "source_location": "line 70" + }, + { + "id": "src_preprocessing_pipeline_py_process", + "label": "process()", + "file_type": "code", + "source_file": "src/preprocessing/pipeline.py", + "source_location": "line 90" + }, + { + "id": "src_preprocessing_pipeline_py_process_doc", + "label": "Run full pre-processing pipeline on raw text.\n\n7-step pipeline:\n1. Spell correct", + "file_type": "rationale", + "source_file": "src/preprocessing/pipeline.py", + "source_location": "line 90" + } + ], + "edges": [ + { + "source": "src_preprocessing_pipeline_py", + "target": "src_preprocessing_pipeline_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.5 + }, + { + "source": "src_preprocessing_pipeline_py", + "target": "spacy", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.6 + }, + { + "source": "src_preprocessing_pipeline_py", + "target": "dataclasses", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.6 + }, + { + "source": "src_preprocessing_pipeline_py", + "target": "typing", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.6 + }, + { + "source": "src_preprocessing_pipeline_py", + "target": "spell_corrector", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.6 + }, + { + "source": "src_preprocessing_pipeline_py", + "target": "textstat", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.6 + }, + { + "source": "src_preprocessing_pipeline_py", + "target": "loguru", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.6 + }, + { + "source": "src_preprocessing_pipeline_py", + "target": "src_preprocessing_pipeline_py_EntitySpan", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/pipeline.py", + "weight": 1.0 + }, + { + "source": "src_preprocessing_pipeline_py", + "target": "src_preprocessing_pipeline_py_PreprocessedDoc", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/pipeline.py", + "weight": 1.0 + }, + { + "source": "src_preprocessing_pipeline_py", + "target": "src_preprocessing_pipeline_py_PreprocessingPipeline", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/pipeline.py", + "weight": 1.0 + }, + { + "source": "src_preprocessing_pipeline_py_PreprocessingPipeline", + "target": "src_preprocessing_pipeline_py_PreprocessingPipeline_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.5 + }, + { + "source": "src_preprocessing_pipeline_py", + "target": "src_preprocessing_pipeline_py___init__", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/pipeline.py", + "weight": 1.0 + }, + { + "source": "src_preprocessing_pipeline_py___init__", + "target": "DyslexiaAwareSpellCorrector", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_pipeline_py___init__", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_pipeline_py___init__", + "target": "load", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_pipeline_py___init__", + "target": "warning", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_pipeline_py___init__", + "target": "load", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_pipeline_py", + "target": "src_preprocessing_pipeline_py__extract_readability", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/pipeline.py", + "weight": 1.0 + }, + { + "source": "src_preprocessing_pipeline_py__extract_readability", + "target": "src_preprocessing_pipeline_py__extract_readability_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.5 + }, + { + "source": "src_preprocessing_pipeline_py__extract_readability", + "target": "flesch_kincaid_grade", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_pipeline_py__extract_readability", + "target": "gunning_fog", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_pipeline_py__extract_readability", + "target": "smog_index", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_pipeline_py__extract_readability", + "target": "automated_readability_index", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_pipeline_py__extract_readability", + "target": "flesch_reading_ease", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_pipeline_py__extract_readability", + "target": "coleman_liau_index", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_pipeline_py__extract_readability", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_pipeline_py", + "target": "src_preprocessing_pipeline_py__extract_dep_tree", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/pipeline.py", + "weight": 1.0 + }, + { + "source": "src_preprocessing_pipeline_py__extract_dep_tree", + "target": "src_preprocessing_pipeline_py__extract_dep_tree_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.5 + }, + { + "source": "src_preprocessing_pipeline_py__extract_dep_tree", + "target": "list", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_pipeline_py__extract_dep_tree", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_pipeline_py__extract_dep_tree", + "target": "fromkeys", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_pipeline_py__extract_dep_tree", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_pipeline_py__extract_dep_tree", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_pipeline_py", + "target": "src_preprocessing_pipeline_py_process", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/pipeline.py", + "weight": 1.0 + }, + { + "source": "src_preprocessing_pipeline_py_process", + "target": "src_preprocessing_pipeline_py_process_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.5 + }, + { + "source": "src_preprocessing_pipeline_py_process", + "target": "correct", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_pipeline_py_process", + "target": "nlp", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_pipeline_py_process", + "target": "_extract_readability", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_pipeline_py_process", + "target": "PreprocessedDoc", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_pipeline_py_process", + "target": "PreprocessedDoc", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_pipeline_py_process", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_pipeline_py_process", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_pipeline_py_process", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_pipeline_py_process", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_pipeline_py_process", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_pipeline_py_process", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_pipeline_py_process", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_pipeline_py_process", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_pipeline_py_process", + "target": "EntitySpan", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_pipeline_py_process", + "target": "_extract_dep_tree", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_pipeline_py_process", + "target": "split", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_pipeline_py_process", + "target": "_extract_readability", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/pipeline.py", + "weight": 0.8 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/96719597cf129832f77cf4d1ba51bdaa8f1456a3408b9a9ed16eb49dd4ad4d38.json b/graphify-out/cache/96719597cf129832f77cf4d1ba51bdaa8f1456a3408b9a9ed16eb49dd4ad4d38.json new file mode 100644 index 0000000000000000000000000000000000000000..2518909afaf4bf1314e0adb124f91dd95f888566 --- /dev/null +++ b/graphify-out/cache/96719597cf129832f77cf4d1ba51bdaa8f1456a3408b9a9ed16eb49dd4ad4d38.json @@ -0,0 +1,95 @@ +{ + "nodes": [ + { + "id": "src_api_schemas_py", + "label": "schemas.py", + "file_type": "code", + "source_file": "src/api/schemas.py" + }, + { + "id": "src_api_schemas_py_docstring", + "label": "Pydantic schemas for API request/response validation.", + "file_type": "rationale", + "source_file": "src/api/schemas.py" + }, + { + "id": "src_api_schemas_py_CorrectionRequest", + "label": "CorrectionRequest", + "file_type": "code", + "source_file": "src/api/schemas.py", + "source_location": "line 9" + }, + { + "id": "src_api_schemas_py_CorrectionResponse", + "label": "CorrectionResponse", + "file_type": "code", + "source_file": "src/api/schemas.py", + "source_location": "line 15" + } + ], + "edges": [ + { + "source": "src_api_schemas_py", + "target": "src_api_schemas_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/schemas.py", + "weight": 0.5 + }, + { + "source": "src_api_schemas_py", + "target": "pydantic", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/schemas.py", + "weight": 0.6 + }, + { + "source": "src_api_schemas_py", + "target": "typing", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/schemas.py", + "weight": 0.6 + }, + { + "source": "src_api_schemas_py", + "target": "src_api_schemas_py_CorrectionRequest", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/schemas.py", + "weight": 1.0 + }, + { + "source": "src_api_schemas_py_CorrectionRequest", + "target": "BaseModel", + "relation": "inherits", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/schemas.py", + "weight": 1.0 + }, + { + "source": "src_api_schemas_py", + "target": "src_api_schemas_py_CorrectionResponse", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/schemas.py", + "weight": 1.0 + }, + { + "source": "src_api_schemas_py_CorrectionResponse", + "target": "BaseModel", + "relation": "inherits", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/schemas.py", + "weight": 1.0 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/989eb0a201fa35ad0c1363eb3f7e7acaf8f427e56029f3ce56f910017e2c6b37.json b/graphify-out/cache/989eb0a201fa35ad0c1363eb3f7e7acaf8f427e56029f3ce56f910017e2c6b37.json new file mode 100644 index 0000000000000000000000000000000000000000..01907d3d6553749cc5ae55b157292a5745f6af0d --- /dev/null +++ b/graphify-out/cache/989eb0a201fa35ad0c1363eb3f7e7acaf8f427e56029f3ce56f910017e2c6b37.json @@ -0,0 +1,401 @@ +{ + "nodes": [ + { + "id": "scripts_run_inference_py", + "label": "run_inference.py", + "file_type": "code", + "source_file": "scripts/run_inference.py" + }, + { + "id": "scripts_run_inference_py_docstring", + "label": "Interactive inference script.\nRun: python scripts/run_inference.py --config conf", + "file_type": "rationale", + "source_file": "scripts/run_inference.py" + }, + { + "id": "scripts_run_inference_py_run_inference", + "label": "run_inference()", + "file_type": "code", + "source_file": "scripts/run_inference.py", + "source_location": "line 21" + }, + { + "id": "scripts_run_inference_py_run_inference_doc", + "label": "Run inference on text input.", + "file_type": "rationale", + "source_file": "scripts/run_inference.py", + "source_location": "line 21" + } + ], + "edges": [ + { + "source": "scripts_run_inference_py", + "target": "scripts_run_inference_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/run_inference.py", + "weight": 0.5 + }, + { + "source": "scripts_run_inference_py", + "target": "click", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/run_inference.py", + "weight": 0.6 + }, + { + "source": "scripts_run_inference_py", + "target": "yaml", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/run_inference.py", + "weight": 0.6 + }, + { + "source": "scripts_run_inference_py", + "target": "rich.console", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/run_inference.py", + "weight": 0.6 + }, + { + "source": "scripts_run_inference_py", + "target": "rich.panel", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/run_inference.py", + "weight": 0.6 + }, + { + "source": "scripts_run_inference_py", + "target": "rich.table", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/run_inference.py", + "weight": 0.6 + }, + { + "source": "scripts_run_inference_py", + "target": "src.inference.corrector", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/run_inference.py", + "weight": 0.6 + }, + { + "source": "scripts_run_inference_py", + "target": "scripts_run_inference_py_run_inference", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/run_inference.py", + "weight": 1.0 + }, + { + "source": "scripts_run_inference_py_run_inference", + "target": "scripts_run_inference_py_run_inference_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "scripts/run_inference.py", + "weight": 0.5 + }, + { + "source": "scripts_run_inference_py_run_inference", + "target": "command", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/run_inference.py", + "weight": 0.8 + }, + { + "source": "scripts_run_inference_py_run_inference", + "target": "option", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/run_inference.py", + "weight": 0.8 + }, + { + "source": "scripts_run_inference_py_run_inference", + "target": "option", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/run_inference.py", + "weight": 0.8 + }, + { + "source": "scripts_run_inference_py_run_inference", + "target": "option", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/run_inference.py", + "weight": 0.8 + }, + { + "source": "scripts_run_inference_py_run_inference", + "target": "option", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/run_inference.py", + "weight": 0.8 + }, + { + "source": "scripts_run_inference_py_run_inference", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/run_inference.py", + "weight": 0.8 + }, + { + "source": "scripts_run_inference_py_run_inference", + "target": "AcademicCorrector", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/run_inference.py", + "weight": 0.8 + }, + { + "source": "scripts_run_inference_py_run_inference", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/run_inference.py", + "weight": 0.8 + }, + { + "source": "scripts_run_inference_py_run_inference", + "target": "open", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/run_inference.py", + "weight": 0.8 + }, + { + "source": "scripts_run_inference_py_run_inference", + "target": "safe_load", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/run_inference.py", + "weight": 0.8 + }, + { + "source": "scripts_run_inference_py_run_inference", + "target": "correct", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/run_inference.py", + "weight": 0.8 + }, + { + "source": "scripts_run_inference_py_run_inference", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/run_inference.py", + "weight": 0.8 + }, + { + "source": "scripts_run_inference_py_run_inference", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/run_inference.py", + "weight": 0.8 + }, + { + "source": "scripts_run_inference_py_run_inference", + "target": "Table", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/run_inference.py", + "weight": 0.8 + }, + { + "source": "scripts_run_inference_py_run_inference", + "target": "add_column", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/run_inference.py", + "weight": 0.8 + }, + { + "source": "scripts_run_inference_py_run_inference", + "target": "add_column", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/run_inference.py", + "weight": 0.8 + }, + { + "source": "scripts_run_inference_py_run_inference", + "target": "add_row", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/run_inference.py", + "weight": 0.8 + }, + { + "source": "scripts_run_inference_py_run_inference", + "target": "add_row", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/run_inference.py", + "weight": 0.8 + }, + { + "source": "scripts_run_inference_py_run_inference", + "target": "items", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/run_inference.py", + "weight": 0.8 + }, + { + "source": "scripts_run_inference_py_run_inference", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/run_inference.py", + "weight": 0.8 + }, + { + "source": "scripts_run_inference_py_run_inference", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/run_inference.py", + "weight": 0.8 + }, + { + "source": "scripts_run_inference_py_run_inference", + "target": "Panel", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/run_inference.py", + "weight": 0.8 + }, + { + "source": "scripts_run_inference_py_run_inference", + "target": "Panel", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/run_inference.py", + "weight": 0.8 + }, + { + "source": "scripts_run_inference_py_run_inference", + "target": "add_row", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/run_inference.py", + "weight": 0.8 + }, + { + "source": "scripts_run_inference_py_run_inference", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/run_inference.py", + "weight": 0.8 + }, + { + "source": "scripts_run_inference_py_run_inference", + "target": "input", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/run_inference.py", + "weight": 0.8 + }, + { + "source": "scripts_run_inference_py_run_inference", + "target": "correct", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/run_inference.py", + "weight": 0.8 + }, + { + "source": "scripts_run_inference_py_run_inference", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/run_inference.py", + "weight": 0.8 + }, + { + "source": "scripts_run_inference_py_run_inference", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/run_inference.py", + "weight": 0.8 + }, + { + "source": "scripts_run_inference_py_run_inference", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/run_inference.py", + "weight": 0.8 + }, + { + "source": "scripts_run_inference_py_run_inference", + "target": "Panel", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/run_inference.py", + "weight": 0.8 + }, + { + "source": "scripts_run_inference_py_run_inference", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "scripts/run_inference.py", + "weight": 0.8 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/9adf602741570e2c2dd2ea0746cd51c3f3b6ec81fd986a6bcaf960e021e0c503.json b/graphify-out/cache/9adf602741570e2c2dd2ea0746cd51c3f3b6ec81fd986a6bcaf960e021e0c503.json new file mode 100644 index 0000000000000000000000000000000000000000..939f601db9075f5b8d7321d30e81916cfbea7c80 --- /dev/null +++ b/graphify-out/cache/9adf602741570e2c2dd2ea0746cd51c3f3b6ec81fd986a6bcaf960e021e0c503.json @@ -0,0 +1,377 @@ +{ + "nodes": [ + { + "id": "src_style_formality_classifier_py", + "label": "formality_classifier.py", + "file_type": "code", + "source_file": "src/style/formality_classifier.py" + }, + { + "id": "src_style_formality_classifier_py_docstring", + "label": "Formality classifier module.\nClassifies text on a 0-1 formality scale using ling", + "file_type": "rationale", + "source_file": "src/style/formality_classifier.py" + }, + { + "id": "src_style_formality_classifier_py_FormalityClassifier", + "label": "FormalityClassifier", + "file_type": "code", + "source_file": "src/style/formality_classifier.py", + "source_location": "line 11" + }, + { + "id": "src_style_formality_classifier_py_FormalityClassifier_doc", + "label": "Scores text formality on a 0-1 scale using rule-based heuristics.", + "file_type": "rationale", + "source_file": "src/style/formality_classifier.py", + "source_location": "line 11" + }, + { + "id": "src_style_formality_classifier_py___init__", + "label": "__init__()", + "file_type": "code", + "source_file": "src/style/formality_classifier.py", + "source_location": "line 36" + }, + { + "id": "src_style_formality_classifier_py_score", + "label": "score()", + "file_type": "code", + "source_file": "src/style/formality_classifier.py", + "source_location": "line 39" + }, + { + "id": "src_style_formality_classifier_py_score_doc", + "label": "Return formality score in [0, 1]. Higher = more formal.\n\nScoring based on:\n- Con", + "file_type": "rationale", + "source_file": "src/style/formality_classifier.py", + "source_location": "line 39" + } + ], + "edges": [ + { + "source": "src_style_formality_classifier_py", + "target": "src_style_formality_classifier_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/formality_classifier.py", + "weight": 0.5 + }, + { + "source": "src_style_formality_classifier_py", + "target": "re", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/formality_classifier.py", + "weight": 0.6 + }, + { + "source": "src_style_formality_classifier_py", + "target": "typing", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/formality_classifier.py", + "weight": 0.6 + }, + { + "source": "src_style_formality_classifier_py", + "target": "src_style_formality_classifier_py_FormalityClassifier", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/formality_classifier.py", + "weight": 1.0 + }, + { + "source": "src_style_formality_classifier_py_FormalityClassifier", + "target": "src_style_formality_classifier_py_FormalityClassifier_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/formality_classifier.py", + "weight": 0.5 + }, + { + "source": "src_style_formality_classifier_py", + "target": "src_style_formality_classifier_py___init__", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/formality_classifier.py", + "weight": 1.0 + }, + { + "source": "src_style_formality_classifier_py", + "target": "src_style_formality_classifier_py_score", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/formality_classifier.py", + "weight": 1.0 + }, + { + "source": "src_style_formality_classifier_py_score", + "target": "src_style_formality_classifier_py_score_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/style/formality_classifier.py", + "weight": 0.5 + }, + { + "source": "src_style_formality_classifier_py_score", + "target": "split", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/formality_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_formality_classifier_py_score", + "target": "max", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/formality_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_formality_classifier_py_score", + "target": "sum", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/formality_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_formality_classifier_py_score", + "target": "min", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/formality_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_formality_classifier_py_score", + "target": "sum", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/formality_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_formality_classifier_py_score", + "target": "min", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/formality_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_formality_classifier_py_score", + "target": "sum", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/formality_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_formality_classifier_py_score", + "target": "min", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/formality_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_formality_classifier_py_score", + "target": "sum", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/formality_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_formality_classifier_py_score", + "target": "min", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/formality_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_formality_classifier_py_score", + "target": "count", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/formality_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_formality_classifier_py_score", + "target": "min", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/formality_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_formality_classifier_py_score", + "target": "count", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/formality_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_formality_classifier_py_score", + "target": "min", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/formality_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_formality_classifier_py_score", + "target": "sum", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/formality_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_formality_classifier_py_score", + "target": "min", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/formality_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_formality_classifier_py_score", + "target": "max", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/formality_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_formality_classifier_py_score", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/formality_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_formality_classifier_py_score", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/formality_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_formality_classifier_py_score", + "target": "min", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/formality_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_formality_classifier_py_score", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/formality_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_formality_classifier_py_score", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/formality_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_formality_classifier_py_score", + "target": "split", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/formality_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_formality_classifier_py_score", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/formality_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_formality_classifier_py_score", + "target": "sum", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/formality_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_formality_classifier_py_score", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/formality_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_formality_classifier_py_score", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/formality_classifier.py", + "weight": 0.8 + }, + { + "source": "src_style_formality_classifier_py_score", + "target": "split", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/style/formality_classifier.py", + "weight": 0.8 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/9d86ed15464e67495058980d730ffde7b5a58b71841a5b526c00847b17b3a999.json b/graphify-out/cache/9d86ed15464e67495058980d730ffde7b5a58b71841a5b526c00847b17b3a999.json new file mode 100644 index 0000000000000000000000000000000000000000..973852742cf8e8c42f03211a5612f1622e8f343c --- /dev/null +++ b/graphify-out/cache/9d86ed15464e67495058980d730ffde7b5a58b71841a5b526c00847b17b3a999.json @@ -0,0 +1,206 @@ +{ + "nodes": [ + { + "id": "src_preprocessing_sentence_segmenter_py", + "label": "sentence_segmenter.py", + "file_type": "code", + "source_file": "src/preprocessing/sentence_segmenter.py" + }, + { + "id": "src_preprocessing_sentence_segmenter_py_docstring", + "label": "Sentence segmentation module.\nUses spaCy's sentence boundary detection for accur", + "file_type": "rationale", + "source_file": "src/preprocessing/sentence_segmenter.py" + }, + { + "id": "src_preprocessing_sentence_segmenter_py_SentenceSegmenter", + "label": "SentenceSegmenter", + "file_type": "code", + "source_file": "src/preprocessing/sentence_segmenter.py", + "source_location": "line 12" + }, + { + "id": "src_preprocessing_sentence_segmenter_py_SentenceSegmenter_doc", + "label": "Segments text into sentences using spaCy's transformer model.", + "file_type": "rationale", + "source_file": "src/preprocessing/sentence_segmenter.py", + "source_location": "line 12" + }, + { + "id": "src_preprocessing_sentence_segmenter_py___init__", + "label": "__init__()", + "file_type": "code", + "source_file": "src/preprocessing/sentence_segmenter.py", + "source_location": "line 15" + }, + { + "id": "src_preprocessing_sentence_segmenter_py_segment", + "label": "segment()", + "file_type": "code", + "source_file": "src/preprocessing/sentence_segmenter.py", + "source_location": "line 23" + }, + { + "id": "src_preprocessing_sentence_segmenter_py_segment_doc", + "label": "Split text into individual sentences.", + "file_type": "rationale", + "source_file": "src/preprocessing/sentence_segmenter.py", + "source_location": "line 23" + } + ], + "edges": [ + { + "source": "src_preprocessing_sentence_segmenter_py", + "target": "src_preprocessing_sentence_segmenter_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/sentence_segmenter.py", + "weight": 0.5 + }, + { + "source": "src_preprocessing_sentence_segmenter_py", + "target": "spacy", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/sentence_segmenter.py", + "weight": 0.6 + }, + { + "source": "src_preprocessing_sentence_segmenter_py", + "target": "typing", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/sentence_segmenter.py", + "weight": 0.6 + }, + { + "source": "src_preprocessing_sentence_segmenter_py", + "target": "loguru", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/sentence_segmenter.py", + "weight": 0.6 + }, + { + "source": "src_preprocessing_sentence_segmenter_py", + "target": "src_preprocessing_sentence_segmenter_py_SentenceSegmenter", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/sentence_segmenter.py", + "weight": 1.0 + }, + { + "source": "src_preprocessing_sentence_segmenter_py_SentenceSegmenter", + "target": "src_preprocessing_sentence_segmenter_py_SentenceSegmenter_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/sentence_segmenter.py", + "weight": 0.5 + }, + { + "source": "src_preprocessing_sentence_segmenter_py", + "target": "src_preprocessing_sentence_segmenter_py___init__", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/sentence_segmenter.py", + "weight": 1.0 + }, + { + "source": "src_preprocessing_sentence_segmenter_py___init__", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/sentence_segmenter.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_sentence_segmenter_py___init__", + "target": "load", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/sentence_segmenter.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_sentence_segmenter_py___init__", + "target": "warning", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/sentence_segmenter.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_sentence_segmenter_py___init__", + "target": "load", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/sentence_segmenter.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_sentence_segmenter_py", + "target": "src_preprocessing_sentence_segmenter_py_segment", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/sentence_segmenter.py", + "weight": 1.0 + }, + { + "source": "src_preprocessing_sentence_segmenter_py_segment", + "target": "src_preprocessing_sentence_segmenter_py_segment_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/sentence_segmenter.py", + "weight": 0.5 + }, + { + "source": "src_preprocessing_sentence_segmenter_py_segment", + "target": "nlp", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/sentence_segmenter.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_sentence_segmenter_py_segment", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/sentence_segmenter.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_sentence_segmenter_py_segment", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/sentence_segmenter.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_sentence_segmenter_py_segment", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/sentence_segmenter.py", + "weight": 0.8 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/a03f6897311103c16c84128894fe3b673973a3c039e421741d7d050b64162a76.json b/graphify-out/cache/a03f6897311103c16c84128894fe3b673973a3c039e421741d7d050b64162a76.json new file mode 100644 index 0000000000000000000000000000000000000000..86f43b6cda6d2e7a88e84afca7e13bd4b91b56be --- /dev/null +++ b/graphify-out/cache/a03f6897311103c16c84128894fe3b673973a3c039e421741d7d050b64162a76.json @@ -0,0 +1,312 @@ +{ + "nodes": [ + { + "id": "src_api_main_py", + "label": "main.py", + "file_type": "code", + "source_file": "src/api/main.py" + }, + { + "id": "src_api_main_py_docstring", + "label": "FastAPI server for the Dyslexia Academic Writing Corrector API.\nProvides RESTful", + "file_type": "rationale", + "source_file": "src/api/main.py" + }, + { + "id": "src_api_main_py_startup", + "label": "startup()", + "file_type": "code", + "source_file": "src/api/main.py", + "source_location": "line 31" + }, + { + "id": "src_api_main_py_startup_doc", + "label": "Load config and initialise corrector on startup.", + "file_type": "rationale", + "source_file": "src/api/main.py", + "source_location": "line 31" + }, + { + "id": "src_api_main_py_correct_text", + "label": "correct_text()", + "file_type": "code", + "source_file": "src/api/main.py", + "source_location": "line 45" + }, + { + "id": "src_api_main_py_correct_text_doc", + "label": "Correct dyslectic text with style preservation and academic elevation.", + "file_type": "rationale", + "source_file": "src/api/main.py", + "source_location": "line 45" + }, + { + "id": "src_api_main_py_health", + "label": "health()", + "file_type": "code", + "source_file": "src/api/main.py", + "source_location": "line 70" + }, + { + "id": "src_api_main_py_health_doc", + "label": "Health check endpoint.", + "file_type": "rationale", + "source_file": "src/api/main.py", + "source_location": "line 70" + } + ], + "edges": [ + { + "source": "src_api_main_py", + "target": "src_api_main_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/main.py", + "weight": 0.5 + }, + { + "source": "src_api_main_py", + "target": "fastapi", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/main.py", + "weight": 0.6 + }, + { + "source": "src_api_main_py", + "target": "fastapi.middleware.cors", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/main.py", + "weight": 0.6 + }, + { + "source": "src_api_main_py", + "target": "schemas", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/main.py", + "weight": 0.6 + }, + { + "source": "src_api_main_py", + "target": "inference.corrector", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/main.py", + "weight": 0.6 + }, + { + "source": "src_api_main_py", + "target": "yaml", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/main.py", + "weight": 0.6 + }, + { + "source": "src_api_main_py", + "target": "loguru", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/main.py", + "weight": 0.6 + }, + { + "source": "src_api_main_py", + "target": "src_api_main_py_startup", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/main.py", + "weight": 1.0 + }, + { + "source": "src_api_main_py_startup", + "target": "src_api_main_py_startup_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/main.py", + "weight": 0.5 + }, + { + "source": "src_api_main_py_startup", + "target": "on_event", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/api/main.py", + "weight": 0.8 + }, + { + "source": "src_api_main_py_startup", + "target": "AcademicCorrector", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/api/main.py", + "weight": 0.8 + }, + { + "source": "src_api_main_py_startup", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/api/main.py", + "weight": 0.8 + }, + { + "source": "src_api_main_py_startup", + "target": "open", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/api/main.py", + "weight": 0.8 + }, + { + "source": "src_api_main_py_startup", + "target": "safe_load", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/api/main.py", + "weight": 0.8 + }, + { + "source": "src_api_main_py_startup", + "target": "error", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/api/main.py", + "weight": 0.8 + }, + { + "source": "src_api_main_py_startup", + "target": "warning", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/api/main.py", + "weight": 0.8 + }, + { + "source": "src_api_main_py", + "target": "src_api_main_py_correct_text", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/main.py", + "weight": 1.0 + }, + { + "source": "src_api_main_py_correct_text", + "target": "src_api_main_py_correct_text_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/main.py", + "weight": 0.5 + }, + { + "source": "src_api_main_py_correct_text", + "target": "post", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/api/main.py", + "weight": 0.8 + }, + { + "source": "src_api_main_py_correct_text", + "target": "HTTPException", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/api/main.py", + "weight": 0.8 + }, + { + "source": "src_api_main_py_correct_text", + "target": "correct", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/api/main.py", + "weight": 0.8 + }, + { + "source": "src_api_main_py_correct_text", + "target": "CorrectionResponse", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/api/main.py", + "weight": 0.8 + }, + { + "source": "src_api_main_py_correct_text", + "target": "error", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/api/main.py", + "weight": 0.8 + }, + { + "source": "src_api_main_py_correct_text", + "target": "HTTPException", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/api/main.py", + "weight": 0.8 + }, + { + "source": "src_api_main_py_correct_text", + "target": "str", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/api/main.py", + "weight": 0.8 + }, + { + "source": "src_api_main_py", + "target": "src_api_main_py_health", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/main.py", + "weight": 1.0 + }, + { + "source": "src_api_main_py_health", + "target": "src_api_main_py_health_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/main.py", + "weight": 0.5 + }, + { + "source": "src_api_main_py_health", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/api/main.py", + "weight": 0.8 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/b2b9b7a9d23282d998d482f2a60ebedb51a89d9e271a1c27bcc433e362975b75.json b/graphify-out/cache/b2b9b7a9d23282d998d482f2a60ebedb51a89d9e271a1c27bcc433e362975b75.json new file mode 100644 index 0000000000000000000000000000000000000000..abbe5fb0bc273135461b6da0fb5171cbd1abd810 --- /dev/null +++ b/graphify-out/cache/b2b9b7a9d23282d998d482f2a60ebedb51a89d9e271a1c27bcc433e362975b75.json @@ -0,0 +1,1423 @@ +{ + "nodes": [ + { + "id": "data_raw_fce_json_to_m2_py", + "label": "json_to_m2.py", + "file_type": "code", + "source_file": "data/raw/fce/json_to_m2.py" + }, + { + "id": "data_raw_fce_json_to_m2_py_main", + "label": "main()", + "file_type": "code", + "source_file": "data/raw/fce/json_to_m2.py", + "source_location": "line 11" + }, + { + "id": "data_raw_fce_json_to_m2_py_parse_args", + "label": "parse_args()", + "file_type": "code", + "source_file": "data/raw/fce/json_to_m2.py", + "source_location": "line 132" + }, + { + "id": "data_raw_fce_json_to_m2_py_get_paras", + "label": "get_paras()", + "file_type": "code", + "source_file": "data/raw/fce/json_to_m2.py", + "source_location": "line 185" + }, + { + "id": "data_raw_fce_json_to_m2_py_clean_para", + "label": "clean_para()", + "file_type": "code", + "source_file": "data/raw/fce/json_to_m2.py", + "source_location": "line 237" + }, + { + "id": "data_raw_fce_json_to_m2_py_get_token_edits", + "label": "get_token_edits()", + "file_type": "code", + "source_file": "data/raw/fce/json_to_m2.py", + "source_location": "line 283" + }, + { + "id": "data_raw_fce_json_to_m2_py_get_all_tok_starts_and_ends", + "label": "get_all_tok_starts_and_ends()", + "file_type": "code", + "source_file": "data/raw/fce/json_to_m2.py", + "source_location": "line 324" + }, + { + "id": "data_raw_fce_json_to_m2_py_convert_char_to_tok", + "label": "convert_char_to_tok()", + "file_type": "code", + "source_file": "data/raw/fce/json_to_m2.py", + "source_location": "line 337" + }, + { + "id": "data_raw_fce_json_to_m2_py_get_sents", + "label": "get_sents()", + "file_type": "code", + "source_file": "data/raw/fce/json_to_m2.py", + "source_location": "line 385" + }, + { + "id": "data_raw_fce_json_to_m2_py_prepare_sent_edits_output", + "label": "prepare_sent_edits_output()", + "file_type": "code", + "source_file": "data/raw/fce/json_to_m2.py", + "source_location": "line 464" + }, + { + "id": "data_raw_fce_json_to_m2_py_noop_edit", + "label": "noop_edit()", + "file_type": "code", + "source_file": "data/raw/fce/json_to_m2.py", + "source_location": "line 481" + } + ], + "edges": [ + { + "source": "data_raw_fce_json_to_m2_py", + "target": "argparse", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.6 + }, + { + "source": "data_raw_fce_json_to_m2_py", + "target": "errant", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.6 + }, + { + "source": "data_raw_fce_json_to_m2_py", + "target": "json", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.6 + }, + { + "source": "data_raw_fce_json_to_m2_py", + "target": "re", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.6 + }, + { + "source": "data_raw_fce_json_to_m2_py", + "target": "spacy", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.6 + }, + { + "source": "data_raw_fce_json_to_m2_py", + "target": "bisect", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.6 + }, + { + "source": "data_raw_fce_json_to_m2_py", + "target": "operator", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.6 + }, + { + "source": "data_raw_fce_json_to_m2_py", + "target": "string", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.6 + }, + { + "source": "data_raw_fce_json_to_m2_py", + "target": "data_raw_fce_json_to_m2_py_main", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 1.0 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "parse_args", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "load", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "load", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "open", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "ord", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "open", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "items", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "loads", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "translate", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "sorted", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "enumerate", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "get_paras", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "keys", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "write", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "parse", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "write", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "clean_para", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "nlp", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "get_token_edits", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "get_sents", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "extend", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "join", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "parse", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "join", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "sorted", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "sorted", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "join", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "write", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "write", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "annotate", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "itemgetter", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "itemgetter", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "import_edit", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "import_edit", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "write", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "write", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "noop_edit", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "to_m2", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "noop_edit", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_main", + "target": "to_m2", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py", + "target": "data_raw_fce_json_to_m2_py_parse_args", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 1.0 + }, + { + "source": "data_raw_fce_json_to_m2_py_parse_args", + "target": "ArgumentParser", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_parse_args", + "target": "add_argument", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_parse_args", + "target": "add_mutually_exclusive_group", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_parse_args", + "target": "add_argument", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_parse_args", + "target": "add_argument", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_parse_args", + "target": "add_argument", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_parse_args", + "target": "add_argument", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_parse_args", + "target": "add_argument", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_parse_args", + "target": "add_argument", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_parse_args", + "target": "add_argument", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_parse_args", + "target": "add_argument", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_parse_args", + "target": "parse_args", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py", + "target": "data_raw_fce_json_to_m2_py_get_paras", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 1.0 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_paras", + "target": "finditer", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_paras", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_paras", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_paras", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_paras", + "target": "group", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_paras", + "target": "start", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_paras", + "target": "end", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_paras", + "target": "translate", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_paras", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_paras", + "target": "start", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_paras", + "target": "start", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_paras", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py", + "target": "data_raw_fce_json_to_m2_py_clean_para", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 1.0 + }, + { + "source": "data_raw_fce_json_to_m2_py_clean_para", + "target": "sub", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_clean_para", + "target": "search", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_clean_para", + "target": "startswith", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_clean_para", + "target": "start", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_clean_para", + "target": "search", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_clean_para", + "target": "lstrip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_clean_para", + "target": "startswith", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_clean_para", + "target": "endswith", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_clean_para", + "target": "max", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_clean_para", + "target": "max", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py", + "target": "data_raw_fce_json_to_m2_py_get_token_edits", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 1.0 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_token_edits", + "target": "get_all_tok_starts_and_ends", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_token_edits", + "target": "sorted", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_token_edits", + "target": "convert_char_to_tok", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_token_edits", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_token_edits", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_token_edits", + "target": "join", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_token_edits", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_token_edits", + "target": "join", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_token_edits", + "target": "index", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_token_edits", + "target": "nlp", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_token_edits", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py", + "target": "data_raw_fce_json_to_m2_py_get_all_tok_starts_and_ends", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 1.0 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_all_tok_starts_and_ends", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_all_tok_starts_and_ends", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_all_tok_starts_and_ends", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py", + "target": "data_raw_fce_json_to_m2_py_convert_char_to_tok", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 1.0 + }, + { + "source": "data_raw_fce_json_to_m2_py_convert_char_to_tok", + "target": "index", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_convert_char_to_tok", + "target": "bisect", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_convert_char_to_tok", + "target": "index", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_convert_char_to_tok", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_convert_char_to_tok", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_convert_char_to_tok", + "target": "index", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_convert_char_to_tok", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_convert_char_to_tok", + "target": "index", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_convert_char_to_tok", + "target": "index", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_convert_char_to_tok", + "target": "index", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_convert_char_to_tok", + "target": "bisect", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_convert_char_to_tok", + "target": "bisect", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_convert_char_to_tok", + "target": "index", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_convert_char_to_tok", + "target": "index", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_convert_char_to_tok", + "target": "index", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_convert_char_to_tok", + "target": "bisect", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_convert_char_to_tok", + "target": "index", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_convert_char_to_tok", + "target": "bisect", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py", + "target": "data_raw_fce_json_to_m2_py_get_sents", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 1.0 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_sents", + "target": "prepare_sent_edits_output", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_sents", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_sents", + "target": "enumerate", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_sents", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_sents", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_sents", + "target": "prepare_sent_edits_output", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_sents", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_sents", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_sents", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_sents", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_sents", + "target": "split", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_sents", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_sents", + "target": "split", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_sents", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_sents", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_sents", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_get_sents", + "target": "split", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py", + "target": "data_raw_fce_json_to_m2_py_prepare_sent_edits_output", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 1.0 + }, + { + "source": "data_raw_fce_json_to_m2_py_prepare_sent_edits_output", + "target": "split", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_prepare_sent_edits_output", + "target": "extend", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_prepare_sent_edits_output", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py_prepare_sent_edits_output", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + }, + { + "source": "data_raw_fce_json_to_m2_py", + "target": "data_raw_fce_json_to_m2_py_noop_edit", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 1.0 + }, + { + "source": "data_raw_fce_json_to_m2_py_noop_edit", + "target": "str", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/fce/json_to_m2.py", + "weight": 0.8 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/b34609507ef8597124990504eff673b205703694a5a86348eac61de1e98662c9.json b/graphify-out/cache/b34609507ef8597124990504eff673b205703694a5a86348eac61de1e98662c9.json new file mode 100644 index 0000000000000000000000000000000000000000..e1340347b755d090e3b234d3c8d69def58a9955a --- /dev/null +++ b/graphify-out/cache/b34609507ef8597124990504eff673b205703694a5a86348eac61de1e98662c9.json @@ -0,0 +1,364 @@ +{ + "nodes": [ + { + "id": "src_api_middleware_py", + "label": "middleware.py", + "file_type": "code", + "source_file": "src/api/middleware.py" + }, + { + "id": "src_api_middleware_py_docstring", + "label": "API middleware for request logging, rate limiting, and error handling.", + "file_type": "rationale", + "source_file": "src/api/middleware.py" + }, + { + "id": "src_api_middleware_py_RequestLoggingMiddleware", + "label": "RequestLoggingMiddleware", + "file_type": "code", + "source_file": "src/api/middleware.py", + "source_location": "line 13" + }, + { + "id": "src_api_middleware_py_RequestLoggingMiddleware_doc", + "label": "Logs all incoming requests with timing information.", + "file_type": "rationale", + "source_file": "src/api/middleware.py", + "source_location": "line 13" + }, + { + "id": "src_api_middleware_py_RateLimitMiddleware", + "label": "RateLimitMiddleware", + "file_type": "code", + "source_file": "src/api/middleware.py", + "source_location": "line 35" + }, + { + "id": "src_api_middleware_py_RateLimitMiddleware_doc", + "label": "Simple in-memory rate limiting.", + "file_type": "rationale", + "source_file": "src/api/middleware.py", + "source_location": "line 35" + }, + { + "id": "src_api_middleware_py_dispatch", + "label": "dispatch()", + "file_type": "code", + "source_file": "src/api/middleware.py", + "source_location": "line 16" + }, + { + "id": "src_api_middleware_py___init__", + "label": "__init__()", + "file_type": "code", + "source_file": "src/api/middleware.py", + "source_location": "line 38" + }, + { + "id": "src_api_middleware_py_dispatch", + "label": "dispatch()", + "file_type": "code", + "source_file": "src/api/middleware.py", + "source_location": "line 45" + } + ], + "edges": [ + { + "source": "src_api_middleware_py", + "target": "src_api_middleware_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/middleware.py", + "weight": 0.5 + }, + { + "source": "src_api_middleware_py", + "target": "fastapi", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/middleware.py", + "weight": 0.6 + }, + { + "source": "src_api_middleware_py", + "target": "fastapi.responses", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/middleware.py", + "weight": 0.6 + }, + { + "source": "src_api_middleware_py", + "target": "starlette.middleware.base", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/middleware.py", + "weight": 0.6 + }, + { + "source": "src_api_middleware_py", + "target": "loguru", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/middleware.py", + "weight": 0.6 + }, + { + "source": "src_api_middleware_py", + "target": "time", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/middleware.py", + "weight": 0.6 + }, + { + "source": "src_api_middleware_py", + "target": "collections", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/middleware.py", + "weight": 0.6 + }, + { + "source": "src_api_middleware_py", + "target": "src_api_middleware_py_RequestLoggingMiddleware", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/middleware.py", + "weight": 1.0 + }, + { + "source": "src_api_middleware_py_RequestLoggingMiddleware", + "target": "src_api_middleware_py_RequestLoggingMiddleware_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/middleware.py", + "weight": 0.5 + }, + { + "source": "src_api_middleware_py_RequestLoggingMiddleware", + "target": "BaseHTTPMiddleware", + "relation": "inherits", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/middleware.py", + "weight": 1.0 + }, + { + "source": "src_api_middleware_py", + "target": "src_api_middleware_py_RateLimitMiddleware", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/middleware.py", + "weight": 1.0 + }, + { + "source": "src_api_middleware_py_RateLimitMiddleware", + "target": "src_api_middleware_py_RateLimitMiddleware_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/middleware.py", + "weight": 0.5 + }, + { + "source": "src_api_middleware_py_RateLimitMiddleware", + "target": "BaseHTTPMiddleware", + "relation": "inherits", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/middleware.py", + "weight": 1.0 + }, + { + "source": "src_api_middleware_py", + "target": "src_api_middleware_py_dispatch", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/middleware.py", + "weight": 1.0 + }, + { + "source": "src_api_middleware_py_dispatch", + "target": "time", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/api/middleware.py", + "weight": 0.8 + }, + { + "source": "src_api_middleware_py_dispatch", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/api/middleware.py", + "weight": 0.8 + }, + { + "source": "src_api_middleware_py_dispatch", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/api/middleware.py", + "weight": 0.8 + }, + { + "source": "src_api_middleware_py_dispatch", + "target": "call_next", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/api/middleware.py", + "weight": 0.8 + }, + { + "source": "src_api_middleware_py_dispatch", + "target": "error", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/api/middleware.py", + "weight": 0.8 + }, + { + "source": "src_api_middleware_py_dispatch", + "target": "time", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/api/middleware.py", + "weight": 0.8 + }, + { + "source": "src_api_middleware_py", + "target": "src_api_middleware_py___init__", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/middleware.py", + "weight": 1.0 + }, + { + "source": "src_api_middleware_py___init__", + "target": "__init__", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/api/middleware.py", + "weight": 0.8 + }, + { + "source": "src_api_middleware_py___init__", + "target": "defaultdict", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/api/middleware.py", + "weight": 0.8 + }, + { + "source": "src_api_middleware_py___init__", + "target": "super", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/api/middleware.py", + "weight": 0.8 + }, + { + "source": "src_api_middleware_py", + "target": "src_api_middleware_py_dispatch", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/api/middleware.py", + "weight": 1.0 + }, + { + "source": "src_api_middleware_py_dispatch", + "target": "time", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/api/middleware.py", + "weight": 0.8 + }, + { + "source": "src_api_middleware_py_dispatch", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/api/middleware.py", + "weight": 0.8 + }, + { + "source": "src_api_middleware_py_dispatch", + "target": "popleft", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/api/middleware.py", + "weight": 0.8 + }, + { + "source": "src_api_middleware_py_dispatch", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/api/middleware.py", + "weight": 0.8 + }, + { + "source": "src_api_middleware_py_dispatch", + "target": "warning", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/api/middleware.py", + "weight": 0.8 + }, + { + "source": "src_api_middleware_py_dispatch", + "target": "JSONResponse", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/api/middleware.py", + "weight": 0.8 + }, + { + "source": "src_api_middleware_py_dispatch", + "target": "call_next", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/api/middleware.py", + "weight": 0.8 + }, + { + "source": "src_api_middleware_py_dispatch", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/api/middleware.py", + "weight": 0.8 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/b67f9a1fad002afb4a48ab84068d8f62c09b9efcc521e7ffd4c98742e5ca773f.json b/graphify-out/cache/b67f9a1fad002afb4a48ab84068d8f62c09b9efcc521e7ffd4c98742e5ca773f.json new file mode 100644 index 0000000000000000000000000000000000000000..332ebb65490ee262ff01c25a1c8dc2a41d8b80cd --- /dev/null +++ b/graphify-out/cache/b67f9a1fad002afb4a48ab84068d8f62c09b9efcc521e7ffd4c98742e5ca773f.json @@ -0,0 +1,301 @@ +{ + "nodes": [ + { + "id": "src_preprocessing_dependency_parser_py", + "label": "dependency_parser.py", + "file_type": "code", + "source_file": "src/preprocessing/dependency_parser.py" + }, + { + "id": "src_preprocessing_dependency_parser_py_docstring", + "label": "Dependency parser module.\nExtracts grammatical skeletons (subject-verb-object) f", + "file_type": "rationale", + "source_file": "src/preprocessing/dependency_parser.py" + }, + { + "id": "src_preprocessing_dependency_parser_py_DependencyParser", + "label": "DependencyParser", + "file_type": "code", + "source_file": "src/preprocessing/dependency_parser.py", + "source_location": "line 12" + }, + { + "id": "src_preprocessing_dependency_parser_py_DependencyParser_doc", + "label": "Extracts dependency trees and SVO triples from text.", + "file_type": "rationale", + "source_file": "src/preprocessing/dependency_parser.py", + "source_location": "line 12" + }, + { + "id": "src_preprocessing_dependency_parser_py___init__", + "label": "__init__()", + "file_type": "code", + "source_file": "src/preprocessing/dependency_parser.py", + "source_location": "line 15" + }, + { + "id": "src_preprocessing_dependency_parser_py_parse", + "label": "parse()", + "file_type": "code", + "source_file": "src/preprocessing/dependency_parser.py", + "source_location": "line 22" + }, + { + "id": "src_preprocessing_dependency_parser_py_parse_doc", + "label": "Extract dependency tree for each sentence.", + "file_type": "rationale", + "source_file": "src/preprocessing/dependency_parser.py", + "source_location": "line 22" + }, + { + "id": "src_preprocessing_dependency_parser_py_extract_svo", + "label": "extract_svo()", + "file_type": "code", + "source_file": "src/preprocessing/dependency_parser.py", + "source_location": "line 47" + }, + { + "id": "src_preprocessing_dependency_parser_py_extract_svo_doc", + "label": "Extract subject-verb-object triples per sentence.", + "file_type": "rationale", + "source_file": "src/preprocessing/dependency_parser.py", + "source_location": "line 47" + } + ], + "edges": [ + { + "source": "src_preprocessing_dependency_parser_py", + "target": "src_preprocessing_dependency_parser_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/dependency_parser.py", + "weight": 0.5 + }, + { + "source": "src_preprocessing_dependency_parser_py", + "target": "spacy", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/dependency_parser.py", + "weight": 0.6 + }, + { + "source": "src_preprocessing_dependency_parser_py", + "target": "typing", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/dependency_parser.py", + "weight": 0.6 + }, + { + "source": "src_preprocessing_dependency_parser_py", + "target": "loguru", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/dependency_parser.py", + "weight": 0.6 + }, + { + "source": "src_preprocessing_dependency_parser_py", + "target": "src_preprocessing_dependency_parser_py_DependencyParser", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/dependency_parser.py", + "weight": 1.0 + }, + { + "source": "src_preprocessing_dependency_parser_py_DependencyParser", + "target": "src_preprocessing_dependency_parser_py_DependencyParser_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/dependency_parser.py", + "weight": 0.5 + }, + { + "source": "src_preprocessing_dependency_parser_py", + "target": "src_preprocessing_dependency_parser_py___init__", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/dependency_parser.py", + "weight": 1.0 + }, + { + "source": "src_preprocessing_dependency_parser_py___init__", + "target": "load", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dependency_parser.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dependency_parser_py___init__", + "target": "warning", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dependency_parser.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dependency_parser_py___init__", + "target": "load", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dependency_parser.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dependency_parser_py", + "target": "src_preprocessing_dependency_parser_py_parse", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/dependency_parser.py", + "weight": 1.0 + }, + { + "source": "src_preprocessing_dependency_parser_py_parse", + "target": "src_preprocessing_dependency_parser_py_parse_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/dependency_parser.py", + "weight": 0.5 + }, + { + "source": "src_preprocessing_dependency_parser_py_parse", + "target": "nlp", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dependency_parser.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dependency_parser_py_parse", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dependency_parser.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dependency_parser_py_parse", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dependency_parser.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dependency_parser_py_parse", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dependency_parser.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dependency_parser_py", + "target": "src_preprocessing_dependency_parser_py_extract_svo", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/dependency_parser.py", + "weight": 1.0 + }, + { + "source": "src_preprocessing_dependency_parser_py_extract_svo", + "target": "src_preprocessing_dependency_parser_py_extract_svo_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/preprocessing/dependency_parser.py", + "weight": 0.5 + }, + { + "source": "src_preprocessing_dependency_parser_py_extract_svo", + "target": "nlp", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dependency_parser.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dependency_parser_py_extract_svo", + "target": "list", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dependency_parser.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dependency_parser_py_extract_svo", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dependency_parser.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dependency_parser_py_extract_svo", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dependency_parser.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dependency_parser_py_extract_svo", + "target": "fromkeys", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dependency_parser.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dependency_parser_py_extract_svo", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dependency_parser.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dependency_parser_py_extract_svo", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dependency_parser.py", + "weight": 0.8 + }, + { + "source": "src_preprocessing_dependency_parser_py_extract_svo", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/preprocessing/dependency_parser.py", + "weight": 0.8 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/b8bf3605b3c23a899734b406855dcfd8404ec7d8aa84cdfb25114439e5af9c29.json b/graphify-out/cache/b8bf3605b3c23a899734b406855dcfd8404ec7d8aa84cdfb25114439e5af9c29.json new file mode 100644 index 0000000000000000000000000000000000000000..c1aadb7eed36d171dbcdfc051685103e924145fa --- /dev/null +++ b/graphify-out/cache/b8bf3605b3c23a899734b406855dcfd8404ec7d8aa84cdfb25114439e5af9c29.json @@ -0,0 +1,1234 @@ +{ + "nodes": [ + { + "id": "data_raw_jfleg_repo_eval_gleu_py", + "label": "gleu.py", + "file_type": "code", + "source_file": "data/raw/jfleg_repo/eval/gleu.py" + }, + { + "id": "data_raw_jfleg_repo_eval_gleu_py_docstring", + "label": "(Note: This script computes sentence-level GLEU score.)\n\nThis script calculates ", + "file_type": "rationale", + "source_file": "data/raw/jfleg_repo/eval/gleu.py" + }, + { + "id": "data_raw_jfleg_repo_eval_gleu_py_GLEU", + "label": "GLEU", + "file_type": "code", + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "source_location": "line 30" + }, + { + "id": "data_raw_jfleg_repo_eval_gleu_py___init__", + "label": "__init__()", + "file_type": "code", + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "source_location": "line 32" + }, + { + "id": "data_raw_jfleg_repo_eval_gleu_py_load_hypothesis_sentence", + "label": "load_hypothesis_sentence()", + "file_type": "code", + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "source_location": "line 35" + }, + { + "id": "data_raw_jfleg_repo_eval_gleu_py_load_hypothesis_sentence_doc", + "label": "load ngrams for a single sentence", + "file_type": "rationale", + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "source_location": "line 35" + }, + { + "id": "data_raw_jfleg_repo_eval_gleu_py_load_sources", + "label": "load_sources()", + "file_type": "code", + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "source_location": "line 41" + }, + { + "id": "data_raw_jfleg_repo_eval_gleu_py_load_sources_doc", + "label": "load n-grams for all source sentences", + "file_type": "rationale", + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "source_location": "line 41" + }, + { + "id": "data_raw_jfleg_repo_eval_gleu_py_load_references", + "label": "load_references()", + "file_type": "code", + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "source_location": "line 47" + }, + { + "id": "data_raw_jfleg_repo_eval_gleu_py_load_references_doc", + "label": "load n-grams for all references", + "file_type": "rationale", + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "source_location": "line 47" + }, + { + "id": "data_raw_jfleg_repo_eval_gleu_py_get_ngram_counts", + "label": "get_ngram_counts()", + "file_type": "code", + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "source_location": "line 78" + }, + { + "id": "data_raw_jfleg_repo_eval_gleu_py_get_ngram_counts_doc", + "label": "get ngrams of order n for a tokenized sentence", + "file_type": "rationale", + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "source_location": "line 78" + }, + { + "id": "data_raw_jfleg_repo_eval_gleu_py_get_ngram_diff", + "label": "get_ngram_diff()", + "file_type": "code", + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "source_location": "line 83" + }, + { + "id": "data_raw_jfleg_repo_eval_gleu_py_get_ngram_diff_doc", + "label": "returns ngrams in a but not in b", + "file_type": "rationale", + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "source_location": "line 83" + }, + { + "id": "data_raw_jfleg_repo_eval_gleu_py_normalization", + "label": "normalization()", + "file_type": "code", + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "source_location": "line 90" + }, + { + "id": "data_raw_jfleg_repo_eval_gleu_py_normalization_doc", + "label": "get normalized n-gram count", + "file_type": "rationale", + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "source_location": "line 90" + }, + { + "id": "data_raw_jfleg_repo_eval_gleu_py_gleu_stats", + "label": "gleu_stats()", + "file_type": "code", + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "source_location": "line 94" + }, + { + "id": "data_raw_jfleg_repo_eval_gleu_py_gleu_stats_doc", + "label": "Collect BLEU-relevant statistics for a single hypothesis/reference pair.\nReturn ", + "file_type": "rationale", + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "source_location": "line 94" + }, + { + "id": "data_raw_jfleg_repo_eval_gleu_py_gleu", + "label": "gleu()", + "file_type": "code", + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "source_location": "line 120" + }, + { + "id": "data_raw_jfleg_repo_eval_gleu_py_gleu_doc", + "label": "Compute GLEU from collected statistics obtained by call(s) to gleu_stats", + "file_type": "rationale", + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "source_location": "line 120" + }, + { + "id": "data_raw_jfleg_repo_eval_gleu_py_get_gleu_stats", + "label": "get_gleu_stats()", + "file_type": "code", + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "source_location": "line 132" + }, + { + "id": "data_raw_jfleg_repo_eval_gleu_py_get_gleu_stats_doc", + "label": "calculate mean and confidence interval from all GLEU iterations", + "file_type": "rationale", + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "source_location": "line 132" + }, + { + "id": "data_raw_jfleg_repo_eval_gleu_py_run_iterations", + "label": "run_iterations()", + "file_type": "code", + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "source_location": "line 141" + }, + { + "id": "data_raw_jfleg_repo_eval_gleu_py_run_iterations_doc", + "label": "run specified number of iterations of GLEU, choosing a reference\nfor each senten", + "file_type": "rationale", + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "source_location": "line 141" + } + ], + "edges": [ + { + "source": "data_raw_jfleg_repo_eval_gleu_py", + "target": "data_raw_jfleg_repo_eval_gleu_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.5 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py", + "target": "math", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.6 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py", + "target": "numpy", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.6 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py", + "target": "scipy.stats", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.6 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py", + "target": "sys", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.6 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py", + "target": "random", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.6 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py", + "target": "argparse", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.6 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py", + "target": "collections", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.6 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py", + "target": "data_raw_jfleg_repo_eval_gleu_py_GLEU", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 1.0 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py", + "target": "data_raw_jfleg_repo_eval_gleu_py___init__", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 1.0 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py", + "target": "data_raw_jfleg_repo_eval_gleu_py_load_hypothesis_sentence", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 1.0 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_load_hypothesis_sentence", + "target": "data_raw_jfleg_repo_eval_gleu_py_load_hypothesis_sentence_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.5 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_load_hypothesis_sentence", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_load_hypothesis_sentence", + "target": "get_ngram_counts", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_load_hypothesis_sentence", + "target": "range", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py", + "target": "data_raw_jfleg_repo_eval_gleu_py_load_sources", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 1.0 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_load_sources", + "target": "data_raw_jfleg_repo_eval_gleu_py_load_sources_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.5 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_load_sources", + "target": "get_ngram_counts", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_load_sources", + "target": "open", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_load_sources", + "target": "split", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_load_sources", + "target": "range", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py", + "target": "data_raw_jfleg_repo_eval_gleu_py_load_references", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 1.0 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_load_references", + "target": "data_raw_jfleg_repo_eval_gleu_py_load_references_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.5 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_load_references", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_load_references", + "target": "enumerate", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_load_references", + "target": "Counter", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_load_references", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_load_references", + "target": "range", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_load_references", + "target": "range", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_load_references", + "target": "range", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_load_references", + "target": "open", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_load_references", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_load_references", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_load_references", + "target": "range", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_load_references", + "target": "get_ngram_counts", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_load_references", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_load_references", + "target": "keys", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_load_references", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_load_references", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_load_references", + "target": "split", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_load_references", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_load_references", + "target": "get_ngram_counts", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_load_references", + "target": "elements", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_load_references", + "target": "split", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_load_references", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py", + "target": "data_raw_jfleg_repo_eval_gleu_py_get_ngram_counts", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 1.0 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_get_ngram_counts", + "target": "data_raw_jfleg_repo_eval_gleu_py_get_ngram_counts_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.5 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_get_ngram_counts", + "target": "Counter", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_get_ngram_counts", + "target": "tuple", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_get_ngram_counts", + "target": "range", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_get_ngram_counts", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py", + "target": "data_raw_jfleg_repo_eval_gleu_py_get_ngram_diff", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 1.0 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_get_ngram_diff", + "target": "data_raw_jfleg_repo_eval_gleu_py_get_ngram_diff_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.5 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_get_ngram_diff", + "target": "Counter", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_get_ngram_diff", + "target": "set", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_get_ngram_diff", + "target": "set", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py", + "target": "data_raw_jfleg_repo_eval_gleu_py_normalization", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 1.0 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_normalization", + "target": "data_raw_jfleg_repo_eval_gleu_py_normalization_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.5 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_normalization", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py", + "target": "data_raw_jfleg_repo_eval_gleu_py_gleu_stats", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 1.0 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_gleu_stats", + "target": "data_raw_jfleg_repo_eval_gleu_py_gleu_stats_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.5 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_gleu_stats", + "target": "range", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_gleu_stats", + "target": "get_ngram_counts", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_gleu_stats", + "target": "get_ngram_diff", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_gleu_stats", + "target": "max", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_gleu_stats", + "target": "max", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_gleu_stats", + "target": "sum", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_gleu_stats", + "target": "sum", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_gleu_stats", + "target": "values", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_gleu_stats", + "target": "values", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py", + "target": "data_raw_jfleg_repo_eval_gleu_py_gleu", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 1.0 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_gleu", + "target": "data_raw_jfleg_repo_eval_gleu_py_gleu_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.5 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_gleu", + "target": "exp", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_gleu", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_gleu", + "target": "sum", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_gleu", + "target": "list", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_gleu", + "target": "min", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_gleu", + "target": "filter", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_gleu", + "target": "log", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_gleu", + "target": "zip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_gleu", + "target": "float", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_gleu", + "target": "float", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py", + "target": "data_raw_jfleg_repo_eval_gleu_py_get_gleu_stats", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 1.0 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_get_gleu_stats", + "target": "data_raw_jfleg_repo_eval_gleu_py_get_gleu_stats_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.5 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_get_gleu_stats", + "target": "mean", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_get_gleu_stats", + "target": "std", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_get_gleu_stats", + "target": "interval", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py", + "target": "data_raw_jfleg_repo_eval_gleu_py_run_iterations", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 1.0 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_run_iterations", + "target": "data_raw_jfleg_repo_eval_gleu_py_run_iterations_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.5 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_run_iterations", + "target": "range", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_run_iterations", + "target": "enumerate", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_run_iterations", + "target": "open", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_run_iterations", + "target": "split", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_run_iterations", + "target": "seed", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_run_iterations", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_run_iterations", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_run_iterations", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_run_iterations", + "target": "load_hypothesis_sentence", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_run_iterations", + "target": "range", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_run_iterations", + "target": "range", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_run_iterations", + "target": "range", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_run_iterations", + "target": "get_gleu_stats", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_run_iterations", + "target": "randint", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_run_iterations", + "target": "range", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_run_iterations", + "target": "range", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_run_iterations", + "target": "sum", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_run_iterations", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_run_iterations", + "target": "get_gleu_stats", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_run_iterations", + "target": "range", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_run_iterations", + "target": "zip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_run_iterations", + "target": "join", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_run_iterations", + "target": "gleu", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_run_iterations", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_run_iterations", + "target": "gleu_stats", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_run_iterations", + "target": "get_gleu_stats", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_run_iterations", + "target": "gleu", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_run_iterations", + "target": "gleu_stats", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + }, + { + "source": "data_raw_jfleg_repo_eval_gleu_py_run_iterations", + "target": "gleu", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "data/raw/jfleg_repo/eval/gleu.py", + "weight": 0.8 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/bd5e8905537b3ad5db3656e1c25f8f95a90e3567a436d40e007ae1fd4d2cd0dd.json b/graphify-out/cache/bd5e8905537b3ad5db3656e1c25f8f95a90e3567a436d40e007ae1fd4d2cd0dd.json new file mode 100644 index 0000000000000000000000000000000000000000..59e102616df7ceabd92bb773aaafedec3725504c --- /dev/null +++ b/graphify-out/cache/bd5e8905537b3ad5db3656e1c25f8f95a90e3567a436d40e007ae1fd4d2cd0dd.json @@ -0,0 +1,267 @@ +{ + "nodes": [ + { + "id": "src_model_lora_adapter_py", + "label": "lora_adapter.py", + "file_type": "code", + "source_file": "src/model/lora_adapter.py" + }, + { + "id": "src_model_lora_adapter_py_docstring", + "label": "LoRA adapter configuration and management.\nWraps PEFT LoRA utilities for applyin", + "file_type": "rationale", + "source_file": "src/model/lora_adapter.py" + }, + { + "id": "src_model_lora_adapter_py_create_lora_config", + "label": "create_lora_config()", + "file_type": "code", + "source_file": "src/model/lora_adapter.py", + "source_location": "line 12" + }, + { + "id": "src_model_lora_adapter_py_create_lora_config_doc", + "label": "Create a LoRA configuration for the given task type.", + "file_type": "rationale", + "source_file": "src/model/lora_adapter.py", + "source_location": "line 12" + }, + { + "id": "src_model_lora_adapter_py_apply_lora", + "label": "apply_lora()", + "file_type": "code", + "source_file": "src/model/lora_adapter.py", + "source_location": "line 36" + }, + { + "id": "src_model_lora_adapter_py_apply_lora_doc", + "label": "Apply LoRA adapters to a model and return the wrapped model.", + "file_type": "rationale", + "source_file": "src/model/lora_adapter.py", + "source_location": "line 36" + }, + { + "id": "src_model_lora_adapter_py_merge_lora_weights", + "label": "merge_lora_weights()", + "file_type": "code", + "source_file": "src/model/lora_adapter.py", + "source_location": "line 45" + }, + { + "id": "src_model_lora_adapter_py_merge_lora_weights_doc", + "label": "Merge LoRA weights into the base model for inference.\n\nAfter merging, the model ", + "file_type": "rationale", + "source_file": "src/model/lora_adapter.py", + "source_location": "line 45" + } + ], + "edges": [ + { + "source": "src_model_lora_adapter_py", + "target": "src_model_lora_adapter_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/lora_adapter.py", + "weight": 0.5 + }, + { + "source": "src_model_lora_adapter_py", + "target": "peft", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/lora_adapter.py", + "weight": 0.6 + }, + { + "source": "src_model_lora_adapter_py", + "target": "typing", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/lora_adapter.py", + "weight": 0.6 + }, + { + "source": "src_model_lora_adapter_py", + "target": "loguru", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/lora_adapter.py", + "weight": 0.6 + }, + { + "source": "src_model_lora_adapter_py", + "target": "src_model_lora_adapter_py_create_lora_config", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/lora_adapter.py", + "weight": 1.0 + }, + { + "source": "src_model_lora_adapter_py_create_lora_config", + "target": "src_model_lora_adapter_py_create_lora_config_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/lora_adapter.py", + "weight": 0.5 + }, + { + "source": "src_model_lora_adapter_py_create_lora_config", + "target": "LoraConfig", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/lora_adapter.py", + "weight": 0.8 + }, + { + "source": "src_model_lora_adapter_py_create_lora_config", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/lora_adapter.py", + "weight": 0.8 + }, + { + "source": "src_model_lora_adapter_py", + "target": "src_model_lora_adapter_py_apply_lora", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/lora_adapter.py", + "weight": 1.0 + }, + { + "source": "src_model_lora_adapter_py_apply_lora", + "target": "src_model_lora_adapter_py_apply_lora_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/lora_adapter.py", + "weight": 0.5 + }, + { + "source": "src_model_lora_adapter_py_apply_lora", + "target": "get_peft_model", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/lora_adapter.py", + "weight": 0.8 + }, + { + "source": "src_model_lora_adapter_py_apply_lora", + "target": "sum", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/lora_adapter.py", + "weight": 0.8 + }, + { + "source": "src_model_lora_adapter_py_apply_lora", + "target": "sum", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/lora_adapter.py", + "weight": 0.8 + }, + { + "source": "src_model_lora_adapter_py_apply_lora", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/lora_adapter.py", + "weight": 0.8 + }, + { + "source": "src_model_lora_adapter_py_apply_lora", + "target": "numel", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/lora_adapter.py", + "weight": 0.8 + }, + { + "source": "src_model_lora_adapter_py_apply_lora", + "target": "numel", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/lora_adapter.py", + "weight": 0.8 + }, + { + "source": "src_model_lora_adapter_py_apply_lora", + "target": "parameters", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/lora_adapter.py", + "weight": 0.8 + }, + { + "source": "src_model_lora_adapter_py_apply_lora", + "target": "parameters", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/lora_adapter.py", + "weight": 0.8 + }, + { + "source": "src_model_lora_adapter_py", + "target": "src_model_lora_adapter_py_merge_lora_weights", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/lora_adapter.py", + "weight": 1.0 + }, + { + "source": "src_model_lora_adapter_py_merge_lora_weights", + "target": "src_model_lora_adapter_py_merge_lora_weights_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/model/lora_adapter.py", + "weight": 0.5 + }, + { + "source": "src_model_lora_adapter_py_merge_lora_weights", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/lora_adapter.py", + "weight": 0.8 + }, + { + "source": "src_model_lora_adapter_py_merge_lora_weights", + "target": "merge_and_unload", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/lora_adapter.py", + "weight": 0.8 + }, + { + "source": "src_model_lora_adapter_py_merge_lora_weights", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/model/lora_adapter.py", + "weight": 0.8 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/be400a4056a4c0bbd8986e259d01ef5ef213c677a61d9a73f78db43c3806348c.json b/graphify-out/cache/be400a4056a4c0bbd8986e259d01ef5ef213c677a61d9a73f78db43c3806348c.json new file mode 100644 index 0000000000000000000000000000000000000000..ba4a7687f3426c700f29979eb3875fd540712356 --- /dev/null +++ b/graphify-out/cache/be400a4056a4c0bbd8986e259d01ef5ef213c677a61d9a73f78db43c3806348c.json @@ -0,0 +1,486 @@ +{ + "nodes": [ + { + "id": "src_inference_postprocessor_py", + "label": "postprocessor.py", + "file_type": "code", + "source_file": "src/inference/postprocessor.py" + }, + { + "id": "src_inference_postprocessor_py_docstring", + "label": "Post-processing utilities for generated text.\nHandles cleanup, formatting, and f", + "file_type": "rationale", + "source_file": "src/inference/postprocessor.py" + }, + { + "id": "src_inference_postprocessor_py_PostProcessor", + "label": "PostProcessor", + "file_type": "code", + "source_file": "src/inference/postprocessor.py", + "source_location": "line 11" + }, + { + "id": "src_inference_postprocessor_py_PostProcessor_doc", + "label": "Cleans and formats generated text after model output.", + "file_type": "rationale", + "source_file": "src/inference/postprocessor.py", + "source_location": "line 11" + }, + { + "id": "src_inference_postprocessor_py___init__", + "label": "__init__()", + "file_type": "code", + "source_file": "src/inference/postprocessor.py", + "source_location": "line 26" + }, + { + "id": "src_inference_postprocessor_py_clean", + "label": "clean()", + "file_type": "code", + "source_file": "src/inference/postprocessor.py", + "source_location": "line 33" + }, + { + "id": "src_inference_postprocessor_py_clean_doc", + "label": "Remove generation artifacts and normalise whitespace.", + "file_type": "rationale", + "source_file": "src/inference/postprocessor.py", + "source_location": "line 33" + }, + { + "id": "src_inference_postprocessor_py_restore_entities", + "label": "restore_entities()", + "file_type": "code", + "source_file": "src/inference/postprocessor.py", + "source_location": "line 62" + }, + { + "id": "src_inference_postprocessor_py_restore_entities_doc", + "label": "Restore named entities that may have been altered during generation.\n\nUses fuzzy", + "file_type": "rationale", + "source_file": "src/inference/postprocessor.py", + "source_location": "line 62" + }, + { + "id": "src_inference_postprocessor_py_format_output", + "label": "format_output()", + "file_type": "code", + "source_file": "src/inference/postprocessor.py", + "source_location": "line 90" + }, + { + "id": "src_inference_postprocessor_py_format_output_doc", + "label": "Apply final formatting (capitalisation, punctuation, spacing).", + "file_type": "rationale", + "source_file": "src/inference/postprocessor.py", + "source_location": "line 90" + } + ], + "edges": [ + { + "source": "src_inference_postprocessor_py", + "target": "src_inference_postprocessor_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/postprocessor.py", + "weight": 0.5 + }, + { + "source": "src_inference_postprocessor_py", + "target": "re", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/postprocessor.py", + "weight": 0.6 + }, + { + "source": "src_inference_postprocessor_py", + "target": "typing", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/postprocessor.py", + "weight": 0.6 + }, + { + "source": "src_inference_postprocessor_py", + "target": "loguru", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/postprocessor.py", + "weight": 0.6 + }, + { + "source": "src_inference_postprocessor_py", + "target": "src_inference_postprocessor_py_PostProcessor", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/postprocessor.py", + "weight": 1.0 + }, + { + "source": "src_inference_postprocessor_py_PostProcessor", + "target": "src_inference_postprocessor_py_PostProcessor_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/postprocessor.py", + "weight": 0.5 + }, + { + "source": "src_inference_postprocessor_py", + "target": "src_inference_postprocessor_py___init__", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/postprocessor.py", + "weight": 1.0 + }, + { + "source": "src_inference_postprocessor_py___init__", + "target": "compile", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/postprocessor.py", + "weight": 0.8 + }, + { + "source": "src_inference_postprocessor_py___init__", + "target": "join", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/postprocessor.py", + "weight": 0.8 + }, + { + "source": "src_inference_postprocessor_py___init__", + "target": "escape", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/postprocessor.py", + "weight": 0.8 + }, + { + "source": "src_inference_postprocessor_py___init__", + "target": "startswith", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/postprocessor.py", + "weight": 0.8 + }, + { + "source": "src_inference_postprocessor_py", + "target": "src_inference_postprocessor_py_clean", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/postprocessor.py", + "weight": 1.0 + }, + { + "source": "src_inference_postprocessor_py_clean", + "target": "src_inference_postprocessor_py_clean_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/postprocessor.py", + "weight": 0.5 + }, + { + "source": "src_inference_postprocessor_py_clean", + "target": "sub", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/postprocessor.py", + "weight": 0.8 + }, + { + "source": "src_inference_postprocessor_py_clean", + "target": "replace", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/postprocessor.py", + "weight": 0.8 + }, + { + "source": "src_inference_postprocessor_py_clean", + "target": "replace", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/postprocessor.py", + "weight": 0.8 + }, + { + "source": "src_inference_postprocessor_py_clean", + "target": "sub", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/postprocessor.py", + "weight": 0.8 + }, + { + "source": "src_inference_postprocessor_py_clean", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/postprocessor.py", + "weight": 0.8 + }, + { + "source": "src_inference_postprocessor_py_clean", + "target": "sub", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/postprocessor.py", + "weight": 0.8 + }, + { + "source": "src_inference_postprocessor_py_clean", + "target": "sub", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/postprocessor.py", + "weight": 0.8 + }, + { + "source": "src_inference_postprocessor_py_clean", + "target": "sub", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/postprocessor.py", + "weight": 0.8 + }, + { + "source": "src_inference_postprocessor_py_clean", + "target": "sub", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/postprocessor.py", + "weight": 0.8 + }, + { + "source": "src_inference_postprocessor_py_clean", + "target": "sub", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/postprocessor.py", + "weight": 0.8 + }, + { + "source": "src_inference_postprocessor_py_clean", + "target": "sub", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/postprocessor.py", + "weight": 0.8 + }, + { + "source": "src_inference_postprocessor_py_clean", + "target": "sub", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/postprocessor.py", + "weight": 0.8 + }, + { + "source": "src_inference_postprocessor_py", + "target": "src_inference_postprocessor_py_restore_entities", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/postprocessor.py", + "weight": 1.0 + }, + { + "source": "src_inference_postprocessor_py_restore_entities", + "target": "src_inference_postprocessor_py_restore_entities_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/postprocessor.py", + "weight": 0.5 + }, + { + "source": "src_inference_postprocessor_py_restore_entities", + "target": "compile", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/postprocessor.py", + "weight": 0.8 + }, + { + "source": "src_inference_postprocessor_py_restore_entities", + "target": "search", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/postprocessor.py", + "weight": 0.8 + }, + { + "source": "src_inference_postprocessor_py_restore_entities", + "target": "escape", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/postprocessor.py", + "weight": 0.8 + }, + { + "source": "src_inference_postprocessor_py_restore_entities", + "target": "sub", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/postprocessor.py", + "weight": 0.8 + }, + { + "source": "src_inference_postprocessor_py_restore_entities", + "target": "debug", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/postprocessor.py", + "weight": 0.8 + }, + { + "source": "src_inference_postprocessor_py", + "target": "src_inference_postprocessor_py_format_output", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/postprocessor.py", + "weight": 1.0 + }, + { + "source": "src_inference_postprocessor_py_format_output", + "target": "src_inference_postprocessor_py_format_output_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/postprocessor.py", + "weight": 0.5 + }, + { + "source": "src_inference_postprocessor_py_format_output", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/postprocessor.py", + "weight": 0.8 + }, + { + "source": "src_inference_postprocessor_py_format_output", + "target": "sub", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/postprocessor.py", + "weight": 0.8 + }, + { + "source": "src_inference_postprocessor_py_format_output", + "target": "sub", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/postprocessor.py", + "weight": 0.8 + }, + { + "source": "src_inference_postprocessor_py_format_output", + "target": "join", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/postprocessor.py", + "weight": 0.8 + }, + { + "source": "src_inference_postprocessor_py_format_output", + "target": "islower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/postprocessor.py", + "weight": 0.8 + }, + { + "source": "src_inference_postprocessor_py_format_output", + "target": "upper", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/postprocessor.py", + "weight": 0.8 + }, + { + "source": "src_inference_postprocessor_py_format_output", + "target": "rstrip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/postprocessor.py", + "weight": 0.8 + }, + { + "source": "src_inference_postprocessor_py_format_output", + "target": "group", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/postprocessor.py", + "weight": 0.8 + }, + { + "source": "src_inference_postprocessor_py_format_output", + "target": "upper", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/postprocessor.py", + "weight": 0.8 + }, + { + "source": "src_inference_postprocessor_py_format_output", + "target": "split", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/postprocessor.py", + "weight": 0.8 + }, + { + "source": "src_inference_postprocessor_py_format_output", + "target": "group", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/postprocessor.py", + "weight": 0.8 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/c003aaf10a4959059ef93de619b1f5a650efac063278d99aedd6b7631f485780.json b/graphify-out/cache/c003aaf10a4959059ef93de619b1f5a650efac063278d99aedd6b7631f485780.json new file mode 100644 index 0000000000000000000000000000000000000000..a9fbd755f90adf3ca267e488668119a1254c32e0 --- /dev/null +++ b/graphify-out/cache/c003aaf10a4959059ef93de619b1f5a650efac063278d99aedd6b7631f485780.json @@ -0,0 +1,308 @@ +{ + "nodes": [ + { + "id": "tests_test_vocabulary_py", + "label": "test_vocabulary.py", + "file_type": "code", + "source_file": "tests/test_vocabulary.py" + }, + { + "id": "tests_test_vocabulary_py_docstring", + "label": "Tests for the vocabulary elevation module.", + "file_type": "rationale", + "source_file": "tests/test_vocabulary.py" + }, + { + "id": "tests_test_vocabulary_py_test_awl_loader", + "label": "test_awl_loader()", + "file_type": "code", + "source_file": "tests/test_vocabulary.py", + "source_location": "line 8" + }, + { + "id": "tests_test_vocabulary_py_test_awl_loader_doc", + "label": "Test that AWL words are loaded correctly.", + "file_type": "rationale", + "source_file": "tests/test_vocabulary.py", + "source_location": "line 8" + }, + { + "id": "tests_test_vocabulary_py_test_awl_membership", + "label": "test_awl_membership()", + "file_type": "code", + "source_file": "tests/test_vocabulary.py", + "source_location": "line 16" + }, + { + "id": "tests_test_vocabulary_py_test_awl_membership_doc", + "label": "Test is_academic lookup.", + "file_type": "rationale", + "source_file": "tests/test_vocabulary.py", + "source_location": "line 16" + }, + { + "id": "tests_test_vocabulary_py_test_register_filter_contractions", + "label": "test_register_filter_contractions()", + "file_type": "code", + "source_file": "tests/test_vocabulary.py", + "source_location": "line 26" + }, + { + "id": "tests_test_vocabulary_py_test_register_filter_contractions_doc", + "label": "Test that contractions are expanded.", + "file_type": "rationale", + "source_file": "tests/test_vocabulary.py", + "source_location": "line 26" + }, + { + "id": "tests_test_vocabulary_py_test_register_filter_colloquialisms", + "label": "test_register_filter_colloquialisms()", + "file_type": "code", + "source_file": "tests/test_vocabulary.py", + "source_location": "line 34" + }, + { + "id": "tests_test_vocabulary_py_test_register_filter_colloquialisms_doc", + "label": "Test that colloquial phrases are replaced.", + "file_type": "rationale", + "source_file": "tests/test_vocabulary.py", + "source_location": "line 34" + } + ], + "edges": [ + { + "source": "tests_test_vocabulary_py", + "target": "tests_test_vocabulary_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_vocabulary.py", + "weight": 0.5 + }, + { + "source": "tests_test_vocabulary_py", + "target": "pytest", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_vocabulary.py", + "weight": 0.6 + }, + { + "source": "tests_test_vocabulary_py", + "target": "src.vocabulary.awl_loader", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_vocabulary.py", + "weight": 0.6 + }, + { + "source": "tests_test_vocabulary_py", + "target": "src.vocabulary.lexical_substitution", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_vocabulary.py", + "weight": 0.6 + }, + { + "source": "tests_test_vocabulary_py", + "target": "tests_test_vocabulary_py_test_awl_loader", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_vocabulary.py", + "weight": 1.0 + }, + { + "source": "tests_test_vocabulary_py_test_awl_loader", + "target": "tests_test_vocabulary_py_test_awl_loader_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_vocabulary.py", + "weight": 0.5 + }, + { + "source": "tests_test_vocabulary_py_test_awl_loader", + "target": "write_text", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_vocabulary.py", + "weight": 0.8 + }, + { + "source": "tests_test_vocabulary_py_test_awl_loader", + "target": "AWLLoader", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_vocabulary.py", + "weight": 0.8 + }, + { + "source": "tests_test_vocabulary_py_test_awl_loader", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_vocabulary.py", + "weight": 0.8 + }, + { + "source": "tests_test_vocabulary_py_test_awl_loader", + "target": "str", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_vocabulary.py", + "weight": 0.8 + }, + { + "source": "tests_test_vocabulary_py", + "target": "tests_test_vocabulary_py_test_awl_membership", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_vocabulary.py", + "weight": 1.0 + }, + { + "source": "tests_test_vocabulary_py_test_awl_membership", + "target": "tests_test_vocabulary_py_test_awl_membership_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_vocabulary.py", + "weight": 0.5 + }, + { + "source": "tests_test_vocabulary_py_test_awl_membership", + "target": "write_text", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_vocabulary.py", + "weight": 0.8 + }, + { + "source": "tests_test_vocabulary_py_test_awl_membership", + "target": "AWLLoader", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_vocabulary.py", + "weight": 0.8 + }, + { + "source": "tests_test_vocabulary_py_test_awl_membership", + "target": "is_academic", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_vocabulary.py", + "weight": 0.8 + }, + { + "source": "tests_test_vocabulary_py_test_awl_membership", + "target": "is_academic", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_vocabulary.py", + "weight": 0.8 + }, + { + "source": "tests_test_vocabulary_py_test_awl_membership", + "target": "is_academic", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_vocabulary.py", + "weight": 0.8 + }, + { + "source": "tests_test_vocabulary_py_test_awl_membership", + "target": "str", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_vocabulary.py", + "weight": 0.8 + }, + { + "source": "tests_test_vocabulary_py", + "target": "tests_test_vocabulary_py_test_register_filter_contractions", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_vocabulary.py", + "weight": 1.0 + }, + { + "source": "tests_test_vocabulary_py_test_register_filter_contractions", + "target": "tests_test_vocabulary_py_test_register_filter_contractions_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_vocabulary.py", + "weight": 0.5 + }, + { + "source": "tests_test_vocabulary_py_test_register_filter_contractions", + "target": "RegisterFilter", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_vocabulary.py", + "weight": 0.8 + }, + { + "source": "tests_test_vocabulary_py_test_register_filter_contractions", + "target": "apply", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_vocabulary.py", + "weight": 0.8 + }, + { + "source": "tests_test_vocabulary_py", + "target": "tests_test_vocabulary_py_test_register_filter_colloquialisms", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_vocabulary.py", + "weight": 1.0 + }, + { + "source": "tests_test_vocabulary_py_test_register_filter_colloquialisms", + "target": "tests_test_vocabulary_py_test_register_filter_colloquialisms_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "tests/test_vocabulary.py", + "weight": 0.5 + }, + { + "source": "tests_test_vocabulary_py_test_register_filter_colloquialisms", + "target": "RegisterFilter", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_vocabulary.py", + "weight": 0.8 + }, + { + "source": "tests_test_vocabulary_py_test_register_filter_colloquialisms", + "target": "apply", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "tests/test_vocabulary.py", + "weight": 0.8 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/c8ce7717cac958aa774187e9c51e779eade7fd17ccd3e740e96882dac0915075.json b/graphify-out/cache/c8ce7717cac958aa774187e9c51e779eade7fd17ccd3e740e96882dac0915075.json new file mode 100644 index 0000000000000000000000000000000000000000..1dc29d6af99568dae60e0c7be4f7cf9ee4d09897 --- /dev/null +++ b/graphify-out/cache/c8ce7717cac958aa774187e9c51e779eade7fd17ccd3e740e96882dac0915075.json @@ -0,0 +1,1158 @@ +{ + "nodes": [ + { + "id": "src_inference_corrector_py", + "label": "corrector.py", + "file_type": "code", + "source_file": "src/inference/corrector.py" + }, + { + "id": "src_inference_corrector_py_docstring", + "label": "End-to-end inference pipeline.\nAccepts raw dyslectic text (and optionally a mast", + "file_type": "rationale", + "source_file": "src/inference/corrector.py" + }, + { + "id": "src_inference_corrector_py_CorrectionResult", + "label": "CorrectionResult", + "file_type": "code", + "source_file": "src/inference/corrector.py", + "source_location": "line 34" + }, + { + "id": "src_inference_corrector_py_AcademicCorrector", + "label": "AcademicCorrector", + "file_type": "code", + "source_file": "src/inference/corrector.py", + "source_location": "line 44" + }, + { + "id": "src_inference_corrector_py_AcademicCorrector_doc", + "label": "Full inference pipeline: preprocess \u2192 fingerprint \u2192 generate \u2192 elevate \u2192 filter.", + "file_type": "rationale", + "source_file": "src/inference/corrector.py", + "source_location": "line 44" + }, + { + "id": "src_inference_corrector_py___init__", + "label": "__init__()", + "file_type": "code", + "source_file": "src/inference/corrector.py", + "source_location": "line 47" + }, + { + "id": "src_inference_corrector_py_correct", + "label": "correct()", + "file_type": "code", + "source_file": "src/inference/corrector.py", + "source_location": "line 147" + }, + { + "id": "src_inference_corrector_py_correct_doc", + "label": "Full correction pipeline:\n1. Pre-process (spell correct + parse)\n2. Style finger", + "file_type": "rationale", + "source_file": "src/inference/corrector.py", + "source_location": "line 147" + } + ], + "edges": [ + { + "source": "src_inference_corrector_py", + "target": "src_inference_corrector_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/corrector.py", + "weight": 0.5 + }, + { + "source": "src_inference_corrector_py", + "target": "preprocessing.pipeline", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/corrector.py", + "weight": 0.6 + }, + { + "source": "src_inference_corrector_py", + "target": "style.fingerprinter", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/corrector.py", + "weight": 0.6 + }, + { + "source": "src_inference_corrector_py", + "target": "vocabulary.lexical_substitution", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/corrector.py", + "weight": 0.6 + }, + { + "source": "src_inference_corrector_py", + "target": "model.base_model", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/corrector.py", + "weight": 0.6 + }, + { + "source": "src_inference_corrector_py", + "target": "model.style_conditioner", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/corrector.py", + "weight": 0.6 + }, + { + "source": "src_inference_corrector_py", + "target": "model.generation_utils", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/corrector.py", + "weight": 0.6 + }, + { + "source": "src_inference_corrector_py", + "target": "postprocessor", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/corrector.py", + "weight": 0.6 + }, + { + "source": "src_inference_corrector_py", + "target": "evaluation.style_metrics", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/corrector.py", + "weight": 0.6 + }, + { + "source": "src_inference_corrector_py", + "target": "vocabulary.awl_loader", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/corrector.py", + "weight": 0.6 + }, + { + "source": "src_inference_corrector_py", + "target": "torch", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/corrector.py", + "weight": 0.6 + }, + { + "source": "src_inference_corrector_py", + "target": "typing", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/corrector.py", + "weight": 0.6 + }, + { + "source": "src_inference_corrector_py", + "target": "dataclasses", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/corrector.py", + "weight": 0.6 + }, + { + "source": "src_inference_corrector_py", + "target": "loguru", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/corrector.py", + "weight": 0.6 + }, + { + "source": "src_inference_corrector_py", + "target": "yaml", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/corrector.py", + "weight": 0.6 + }, + { + "source": "src_inference_corrector_py", + "target": "src_inference_corrector_py_CorrectionResult", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/corrector.py", + "weight": 1.0 + }, + { + "source": "src_inference_corrector_py", + "target": "src_inference_corrector_py_AcademicCorrector", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/corrector.py", + "weight": 1.0 + }, + { + "source": "src_inference_corrector_py_AcademicCorrector", + "target": "src_inference_corrector_py_AcademicCorrector_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/corrector.py", + "weight": 0.5 + }, + { + "source": "src_inference_corrector_py", + "target": "src_inference_corrector_py___init__", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/corrector.py", + "weight": 1.0 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "eval", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "PreprocessingPipeline", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "StyleFingerprinter", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "hasattr", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "StyleConditioner", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "eval", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "RegisterFilter", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "PostProcessor", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "AWLLoader", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "StyleEvaluator", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "hasattr", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "LexicalElevator", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "load_model_and_tokenizer", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "from_pretrained", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "load_model_and_tokenizer", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "warning", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "warning", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "load_model_and_tokenizer", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "from_pretrained", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "from_pretrained", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "warning", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py___init__", + "target": "load_model_and_tokenizer", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py", + "target": "src_inference_corrector_py_correct", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/corrector.py", + "weight": 1.0 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "src_inference_corrector_py_correct_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/corrector.py", + "weight": 0.5 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "process", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "extract_vector", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "nlp", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "enumerate", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "join", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "clean", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "restore_entities", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "apply", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "format_output", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "style_similarity", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "awl_coverage", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "CorrectionResult", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "extract_vector", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "blend_vectors", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "encode", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "next", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "tokenizer", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "to", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "to", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "generate_correction", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "debug", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "join", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "encode", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "join", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "parameters", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "elevate", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "join", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "warning", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "split", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py_correct", + "target": "split", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/inference/corrector.py", + "weight": 0.8 + }, + { + "source": "src_inference_corrector_py", + "target": "os", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/corrector.py", + "weight": 0.6 + }, + { + "source": "src_inference_corrector_py", + "target": "peft", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/corrector.py", + "weight": 0.6 + }, + { + "source": "src_inference_corrector_py", + "target": "transformers", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/inference/corrector.py", + "weight": 0.6 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/cb4f9067a6ab1921a7088b250a9ba466a84029d88994f3e47fdaf34d3bec1e31.json b/graphify-out/cache/cb4f9067a6ab1921a7088b250a9ba466a84029d88994f3e47fdaf34d3bec1e31.json new file mode 100644 index 0000000000000000000000000000000000000000..4736f13109114e0c984a4c8d14eec4e3ee61476c --- /dev/null +++ b/graphify-out/cache/cb4f9067a6ab1921a7088b250a9ba466a84029d88994f3e47fdaf34d3bec1e31.json @@ -0,0 +1,4 @@ +{ + "nodes": [], + "edges": [] +} \ No newline at end of file diff --git a/graphify-out/cache/d208aab65533ab8b493c355cf460f42cad435c2eca86f22ac62918ff36a67649.json b/graphify-out/cache/d208aab65533ab8b493c355cf460f42cad435c2eca86f22ac62918ff36a67649.json new file mode 100644 index 0000000000000000000000000000000000000000..e588e98233ee1b960892e1d1a00b96c5027f7d00 --- /dev/null +++ b/graphify-out/cache/d208aab65533ab8b493c355cf460f42cad435c2eca86f22ac62918ff36a67649.json @@ -0,0 +1,2657 @@ +{ + "nodes": [ + { + "id": "graph_codebase_py", + "label": "graph_codebase.py", + "file_type": "code", + "source_file": "graph_codebase.py" + }, + { + "id": "graph_codebase_py_docstring", + "label": "graphify_rebuild.py \u2014 One-shot NudR knowledge graph regeneration.\n\nUsage:\n py", + "file_type": "rationale", + "source_file": "graph_codebase.py" + }, + { + "id": "graph_codebase_py_detect_files", + "label": "detect_files()", + "file_type": "code", + "source_file": "graph_codebase.py", + "source_location": "line 55" + }, + { + "id": "graph_codebase_py_detect_files_doc", + "label": "Walk the project and return list of relevant files with metadata.", + "file_type": "rationale", + "source_file": "graph_codebase.py", + "source_location": "line 55" + }, + { + "id": "graph_codebase_py_get_changed_files", + "label": "get_changed_files()", + "file_type": "code", + "source_file": "graph_codebase.py", + "source_location": "line 94" + }, + { + "id": "graph_codebase_py_get_changed_files_doc", + "label": "Compare against manifest to find changed files.", + "file_type": "rationale", + "source_file": "graph_codebase.py", + "source_location": "line 94" + }, + { + "id": "graph_codebase_py_hash_file", + "label": "hash_file()", + "file_type": "code", + "source_file": "graph_codebase.py", + "source_location": "line 109" + }, + { + "id": "graph_codebase_py_hash_file_doc", + "label": "SHA-256 hash for cache keying.", + "file_type": "rationale", + "source_file": "graph_codebase.py", + "source_location": "line 109" + }, + { + "id": "graph_codebase_py_extract_ast_file", + "label": "extract_ast_file()", + "file_type": "code", + "source_file": "graph_codebase.py", + "source_location": "line 119" + }, + { + "id": "graph_codebase_py_extract_ast_file_doc", + "label": "Extract AST nodes and edges from a single Python file.", + "file_type": "rationale", + "source_file": "graph_codebase.py", + "source_location": "line 119" + }, + { + "id": "graph_codebase_py__get_call_name", + "label": "_get_call_name()", + "file_type": "code", + "source_file": "graph_codebase.py", + "source_location": "line 271" + }, + { + "id": "graph_codebase_py__get_call_name_doc", + "label": "Extract callable name from ast.Call node.", + "file_type": "rationale", + "source_file": "graph_codebase.py", + "source_location": "line 271" + }, + { + "id": "graph_codebase_py__get_name", + "label": "_get_name()", + "file_type": "code", + "source_file": "graph_codebase.py", + "source_location": "line 280" + }, + { + "id": "graph_codebase_py__get_name_doc", + "label": "Extract name from various AST node types.", + "file_type": "rationale", + "source_file": "graph_codebase.py", + "source_location": "line 280" + }, + { + "id": "graph_codebase_py__resolve_edges", + "label": "_resolve_edges()", + "file_type": "code", + "source_file": "graph_codebase.py", + "source_location": "line 289" + }, + { + "id": "graph_codebase_py__resolve_edges_doc", + "label": "Post-process edges to resolve bare names to actual node IDs.\n\nThe per-file AST e", + "file_type": "rationale", + "source_file": "graph_codebase.py", + "source_location": "line 289" + }, + { + "id": "graph_codebase_py_run_ast_extraction", + "label": "run_ast_extraction()", + "file_type": "code", + "source_file": "graph_codebase.py", + "source_location": "line 395" + }, + { + "id": "graph_codebase_py_run_ast_extraction_doc", + "label": "Run AST extraction on all Python files, with caching.", + "file_type": "rationale", + "source_file": "graph_codebase.py", + "source_location": "line 395" + }, + { + "id": "graph_codebase_py_build_semantic_nodes", + "label": "build_semantic_nodes()", + "file_type": "code", + "source_file": "graph_codebase.py", + "source_location": "line 446" + }, + { + "id": "graph_codebase_py_build_semantic_nodes_doc", + "label": "Build semantic nodes from documentation files.\nThese capture high-level architec", + "file_type": "rationale", + "source_file": "graph_codebase.py", + "source_location": "line 446" + }, + { + "id": "graph_codebase_py_merge_and_build", + "label": "merge_and_build()", + "file_type": "code", + "source_file": "graph_codebase.py", + "source_location": "line 623" + }, + { + "id": "graph_codebase_py_merge_and_build_doc", + "label": "Merge AST + semantic, build NetworkX graph, cluster, analyze.", + "file_type": "rationale", + "source_file": "graph_codebase.py", + "source_location": "line 623" + }, + { + "id": "graph_codebase_py_generate_outputs", + "label": "generate_outputs()", + "file_type": "code", + "source_file": "graph_codebase.py", + "source_location": "line 717" + }, + { + "id": "graph_codebase_py_generate_outputs_doc", + "label": "Generate report, HTML, JSON, and manifest.", + "file_type": "rationale", + "source_file": "graph_codebase.py", + "source_location": "line 717" + }, + { + "id": "graph_codebase_py_run_pipeline", + "label": "run_pipeline()", + "file_type": "code", + "source_file": "graph_codebase.py", + "source_location": "line 765" + }, + { + "id": "graph_codebase_py_run_pipeline_doc", + "label": "Execute the full graphify pipeline.", + "file_type": "rationale", + "source_file": "graph_codebase.py", + "source_location": "line 765" + }, + { + "id": "graph_codebase_py_watch_mode", + "label": "watch_mode()", + "file_type": "code", + "source_file": "graph_codebase.py", + "source_location": "line 815" + }, + { + "id": "graph_codebase_py_watch_mode_doc", + "label": "Watch for file changes and rebuild automatically.", + "file_type": "rationale", + "source_file": "graph_codebase.py", + "source_location": "line 815" + } + ], + "edges": [ + { + "source": "graph_codebase_py", + "target": "graph_codebase_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 0.5 + }, + { + "source": "graph_codebase_py", + "target": "sys", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 0.6 + }, + { + "source": "graph_codebase_py", + "target": "io", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 0.6 + }, + { + "source": "graph_codebase_py", + "target": "os", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 0.6 + }, + { + "source": "graph_codebase_py", + "target": "json", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 0.6 + }, + { + "source": "graph_codebase_py", + "target": "ast", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 0.6 + }, + { + "source": "graph_codebase_py", + "target": "hashlib", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 0.6 + }, + { + "source": "graph_codebase_py", + "target": "time", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 0.6 + }, + { + "source": "graph_codebase_py", + "target": "argparse", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 0.6 + }, + { + "source": "graph_codebase_py", + "target": "pathlib", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 0.6 + }, + { + "source": "graph_codebase_py", + "target": "datetime", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 0.6 + }, + { + "source": "graph_codebase_py", + "target": "graph_codebase_py_detect_files", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 1.0 + }, + { + "source": "graph_codebase_py_detect_files", + "target": "graph_codebase_py_detect_files_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 0.5 + }, + { + "source": "graph_codebase_py_detect_files", + "target": "walk", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_detect_files", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_detect_files", + "target": "relative_to", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_detect_files", + "target": "any", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_detect_files", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_detect_files", + "target": "Path", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_detect_files", + "target": "startswith", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_detect_files", + "target": "stat", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_detect_files", + "target": "stat", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_detect_files", + "target": "read_text", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_detect_files", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_detect_files", + "target": "str", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_detect_files", + "target": "split", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py", + "target": "graph_codebase_py_get_changed_files", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 1.0 + }, + { + "source": "graph_codebase_py_get_changed_files", + "target": "graph_codebase_py_get_changed_files_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 0.5 + }, + { + "source": "graph_codebase_py_get_changed_files", + "target": "exists", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_get_changed_files", + "target": "loads", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_get_changed_files", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_get_changed_files", + "target": "read_text", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_get_changed_files", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py", + "target": "graph_codebase_py_hash_file", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 1.0 + }, + { + "source": "graph_codebase_py_hash_file", + "target": "graph_codebase_py_hash_file_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 0.5 + }, + { + "source": "graph_codebase_py_hash_file", + "target": "sha256", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_hash_file", + "target": "hexdigest", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_hash_file", + "target": "update", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_hash_file", + "target": "read_bytes", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_hash_file", + "target": "update", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_hash_file", + "target": "encode", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_hash_file", + "target": "Path", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py", + "target": "graph_codebase_py_extract_ast_file", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 1.0 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "graph_codebase_py_extract_ast_file_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 0.5 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "str", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "replace", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "get_docstring", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "walk", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "relative_to", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "read_text", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "parse", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "isinstance", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "replace", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "get_docstring", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "walk", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "isinstance", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "str", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "isinstance", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "get_docstring", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "isinstance", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "replace", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "_get_call_name", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "_get_name", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "isinstance", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_extract_ast_file", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py", + "target": "graph_codebase_py__get_call_name", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 1.0 + }, + { + "source": "graph_codebase_py__get_call_name", + "target": "graph_codebase_py__get_call_name_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 0.5 + }, + { + "source": "graph_codebase_py__get_call_name", + "target": "isinstance", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py__get_call_name", + "target": "isinstance", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py", + "target": "graph_codebase_py__get_name", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 1.0 + }, + { + "source": "graph_codebase_py__get_name", + "target": "graph_codebase_py__get_name_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 0.5 + }, + { + "source": "graph_codebase_py__get_name", + "target": "isinstance", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py__get_name", + "target": "isinstance", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py", + "target": "graph_codebase_py__resolve_edges", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 1.0 + }, + { + "source": "graph_codebase_py__resolve_edges", + "target": "graph_codebase_py__resolve_edges_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 0.5 + }, + { + "source": "graph_codebase_py__resolve_edges", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py__resolve_edges", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py__resolve_edges", + "target": "endswith", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py__resolve_edges", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py__resolve_edges", + "target": "rstrip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py__resolve_edges", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py__resolve_edges", + "target": "removesuffix", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py__resolve_edges", + "target": "removesuffix", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py__resolve_edges", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py__resolve_edges", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py__resolve_edges", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py__resolve_edges", + "target": "replace", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py__resolve_edges", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py__resolve_edges", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py__resolve_edges", + "target": "split", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py__resolve_edges", + "target": "range", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py__resolve_edges", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py__resolve_edges", + "target": "setdefault", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py__resolve_edges", + "target": "replace", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py__resolve_edges", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py__resolve_edges", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py__resolve_edges", + "target": "join", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py__resolve_edges", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py__resolve_edges", + "target": "replace", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py__resolve_edges", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py__resolve_edges", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py__resolve_edges", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py__resolve_edges", + "target": "replace", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py__resolve_edges", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py__resolve_edges", + "target": "replace", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py__resolve_edges", + "target": "rsplit", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py__resolve_edges", + "target": "rsplit", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py__resolve_edges", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py__resolve_edges", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py", + "target": "graph_codebase_py_run_ast_extraction", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 1.0 + }, + { + "source": "graph_codebase_py_run_ast_extraction", + "target": "graph_codebase_py_run_ast_extraction_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 0.5 + }, + { + "source": "graph_codebase_py_run_ast_extraction", + "target": "mkdir", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_ast_extraction", + "target": "set", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_ast_extraction", + "target": "glob", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_ast_extraction", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_ast_extraction", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_ast_extraction", + "target": "_resolve_edges", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_ast_extraction", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_ast_extraction", + "target": "hash_file", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_ast_extraction", + "target": "add", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_ast_extraction", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_ast_extraction", + "target": "exists", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_ast_extraction", + "target": "loads", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_ast_extraction", + "target": "extend", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_ast_extraction", + "target": "extend", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_ast_extraction", + "target": "extract_ast_file", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_ast_extraction", + "target": "extend", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_ast_extraction", + "target": "extend", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_ast_extraction", + "target": "write_text", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_ast_extraction", + "target": "unlink", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_ast_extraction", + "target": "read_text", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_ast_extraction", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_ast_extraction", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_ast_extraction", + "target": "dumps", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_ast_extraction", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_ast_extraction", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_ast_extraction", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_ast_extraction", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_ast_extraction", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py", + "target": "graph_codebase_py_build_semantic_nodes", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 1.0 + }, + { + "source": "graph_codebase_py_build_semantic_nodes", + "target": "graph_codebase_py_build_semantic_nodes_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 0.5 + }, + { + "source": "graph_codebase_py_build_semantic_nodes", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_build_semantic_nodes", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_build_semantic_nodes", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_build_semantic_nodes", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_build_semantic_nodes", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_build_semantic_nodes", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_build_semantic_nodes", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_build_semantic_nodes", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_build_semantic_nodes", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_build_semantic_nodes", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py", + "target": "graph_codebase_py_merge_and_build", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 1.0 + }, + { + "source": "graph_codebase_py_merge_and_build", + "target": "graph_codebase_py_merge_and_build_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 0.5 + }, + { + "source": "graph_codebase_py_merge_and_build", + "target": "list", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_merge_and_build", + "target": "build_from_json", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_merge_and_build", + "target": "cluster", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_merge_and_build", + "target": "score_all", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_merge_and_build", + "target": "god_nodes", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_merge_and_build", + "target": "surprising_connections", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_merge_and_build", + "target": "items", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_merge_and_build", + "target": "suggest_questions", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_merge_and_build", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_merge_and_build", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_merge_and_build", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_merge_and_build", + "target": "add", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_merge_and_build", + "target": "join", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_merge_and_build", + "target": "number_of_nodes", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_merge_and_build", + "target": "number_of_edges", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_merge_and_build", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py", + "target": "graph_codebase_py_generate_outputs", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 1.0 + }, + { + "source": "graph_codebase_py_generate_outputs", + "target": "graph_codebase_py_generate_outputs_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 0.5 + }, + { + "source": "graph_codebase_py_generate_outputs", + "target": "mkdir", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_generate_outputs", + "target": "generate", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_generate_outputs", + "target": "write_text", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_generate_outputs", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_generate_outputs", + "target": "to_json", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_generate_outputs", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_generate_outputs", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_generate_outputs", + "target": "write_text", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_generate_outputs", + "target": "exists", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_generate_outputs", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_generate_outputs", + "target": "write_text", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_generate_outputs", + "target": "str", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_generate_outputs", + "target": "str", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_generate_outputs", + "target": "number_of_nodes", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_generate_outputs", + "target": "to_html", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_generate_outputs", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_generate_outputs", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_generate_outputs", + "target": "get", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_generate_outputs", + "target": "dumps", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_generate_outputs", + "target": "loads", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_generate_outputs", + "target": "dumps", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_generate_outputs", + "target": "str", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_generate_outputs", + "target": "read_text", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_generate_outputs", + "target": "isoformat", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_generate_outputs", + "target": "number_of_nodes", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_generate_outputs", + "target": "number_of_edges", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_generate_outputs", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_generate_outputs", + "target": "relative_to", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_generate_outputs", + "target": "relative_to", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_generate_outputs", + "target": "relative_to", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_generate_outputs", + "target": "number_of_nodes", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_generate_outputs", + "target": "now", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py", + "target": "graph_codebase_py_run_pipeline", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 1.0 + }, + { + "source": "graph_codebase_py_run_pipeline", + "target": "graph_codebase_py_run_pipeline_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 0.5 + }, + { + "source": "graph_codebase_py_run_pipeline", + "target": "time", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_pipeline", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_pipeline", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_pipeline", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_pipeline", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_pipeline", + "target": "detect_files", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_pipeline", + "target": "get_changed_files", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_pipeline", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_pipeline", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_pipeline", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_pipeline", + "target": "run_ast_extraction", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_pipeline", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_pipeline", + "target": "merge_and_build", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_pipeline", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_pipeline", + "target": "generate_outputs", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_pipeline", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_pipeline", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_pipeline", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_pipeline", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_pipeline", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_pipeline", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_pipeline", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_pipeline", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_pipeline", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_pipeline", + "target": "build_semantic_nodes", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_pipeline", + "target": "time", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_pipeline", + "target": "strftime", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_pipeline", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_pipeline", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_pipeline", + "target": "number_of_nodes", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_pipeline", + "target": "number_of_edges", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_pipeline", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_run_pipeline", + "target": "now", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py", + "target": "graph_codebase_py_watch_mode", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 1.0 + }, + { + "source": "graph_codebase_py_watch_mode", + "target": "graph_codebase_py_watch_mode_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 0.5 + }, + { + "source": "graph_codebase_py_watch_mode", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_watch_mode", + "target": "walk", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_watch_mode", + "target": "sleep", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_watch_mode", + "target": "run_pipeline", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_watch_mode", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_watch_mode", + "target": "str", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_watch_mode", + "target": "Path", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_watch_mode", + "target": "lower", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_watch_mode", + "target": "relative_to", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_watch_mode", + "target": "print", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py_watch_mode", + "target": "stat", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "graph_codebase.py", + "weight": 0.8 + }, + { + "source": "graph_codebase_py", + "target": "graphify.build", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 0.6 + }, + { + "source": "graph_codebase_py", + "target": "graphify.cluster", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 0.6 + }, + { + "source": "graph_codebase_py", + "target": "graphify.analyze", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 0.6 + }, + { + "source": "graph_codebase_py", + "target": "graphify.report", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 0.6 + }, + { + "source": "graph_codebase_py", + "target": "graphify.export", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "graph_codebase.py", + "weight": 0.6 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/d2540ae2cd66e78f452e3838443f91fa732806bdcbb365737d6f7c7f0c622d24.json b/graphify-out/cache/d2540ae2cd66e78f452e3838443f91fa732806bdcbb365737d6f7c7f0c622d24.json new file mode 100644 index 0000000000000000000000000000000000000000..4736f13109114e0c984a4c8d14eec4e3ee61476c --- /dev/null +++ b/graphify-out/cache/d2540ae2cd66e78f452e3838443f91fa732806bdcbb365737d6f7c7f0c622d24.json @@ -0,0 +1,4 @@ +{ + "nodes": [], + "edges": [] +} \ No newline at end of file diff --git a/graphify-out/cache/e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855.json b/graphify-out/cache/e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855.json new file mode 100644 index 0000000000000000000000000000000000000000..49f2289ec278b6b2b5f9160fbb868cd677316c6f --- /dev/null +++ b/graphify-out/cache/e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855.json @@ -0,0 +1,11 @@ +{ + "nodes": [ + { + "id": "data_raw_jfleg_repo_EACL_exp_m2converter_util___init___py", + "label": "__init__.py", + "file_type": "code", + "source_file": "data/raw/jfleg_repo/EACL_exp/m2converter/util/__init__.py" + } + ], + "edges": [] +} \ No newline at end of file diff --git a/graphify-out/cache/e8bccd02269ff3006ec908e7b1ecb970f32e5b91f9ab9fa555be10cdda9b2725.json b/graphify-out/cache/e8bccd02269ff3006ec908e7b1ecb970f32e5b91f9ab9fa555be10cdda9b2725.json new file mode 100644 index 0000000000000000000000000000000000000000..9c1d5785774e33b15ec9739ef66bcaf1410d7e9b --- /dev/null +++ b/graphify-out/cache/e8bccd02269ff3006ec908e7b1ecb970f32e5b91f9ab9fa555be10cdda9b2725.json @@ -0,0 +1,324 @@ +{ + "nodes": [ + { + "id": "src_training_trainer_py", + "label": "trainer.py", + "file_type": "code", + "source_file": "src/training/trainer.py" + }, + { + "id": "src_training_trainer_py_docstring", + "label": "Custom HuggingFace Trainer subclass.\nUses the model's built-in cross-entropy los", + "file_type": "rationale", + "source_file": "src/training/trainer.py" + }, + { + "id": "src_training_trainer_py_CorrectionTrainer", + "label": "CorrectionTrainer", + "file_type": "code", + "source_file": "src/training/trainer.py", + "source_location": "line 12" + }, + { + "id": "src_training_trainer_py_CorrectionTrainer_doc", + "label": "Custom trainer \u2014 uses model's built-in loss directly.", + "file_type": "rationale", + "source_file": "src/training/trainer.py", + "source_location": "line 12" + }, + { + "id": "src_training_trainer_py___init__", + "label": "__init__()", + "file_type": "code", + "source_file": "src/training/trainer.py", + "source_location": "line 15" + }, + { + "id": "src_training_trainer_py__strip_custom_fields", + "label": "_strip_custom_fields()", + "file_type": "code", + "source_file": "src/training/trainer.py", + "source_location": "line 21" + }, + { + "id": "src_training_trainer_py__strip_custom_fields_doc", + "label": "Remove dataset fields that T5 doesn't accept.", + "file_type": "rationale", + "source_file": "src/training/trainer.py", + "source_location": "line 21" + }, + { + "id": "src_training_trainer_py_compute_loss", + "label": "compute_loss()", + "file_type": "code", + "source_file": "src/training/trainer.py", + "source_location": "line 28" + }, + { + "id": "src_training_trainer_py_compute_loss_doc", + "label": "Use model's built-in CE loss \u2014 avoids double-computing logits loss.", + "file_type": "rationale", + "source_file": "src/training/trainer.py", + "source_location": "line 28" + }, + { + "id": "src_training_trainer_py_prediction_step", + "label": "prediction_step()", + "file_type": "code", + "source_file": "src/training/trainer.py", + "source_location": "line 39" + }, + { + "id": "src_training_trainer_py_prediction_step_doc", + "label": "Compute eval loss directly \u2014 strips custom fields and runs forward.\n\nThe parent'", + "file_type": "rationale", + "source_file": "src/training/trainer.py", + "source_location": "line 39" + } + ], + "edges": [ + { + "source": "src_training_trainer_py", + "target": "src_training_trainer_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/trainer.py", + "weight": 0.5 + }, + { + "source": "src_training_trainer_py", + "target": "transformers", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/trainer.py", + "weight": 0.6 + }, + { + "source": "src_training_trainer_py", + "target": "torch", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/trainer.py", + "weight": 0.6 + }, + { + "source": "src_training_trainer_py", + "target": "loguru", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/trainer.py", + "weight": 0.6 + }, + { + "source": "src_training_trainer_py", + "target": "src_training_trainer_py_CorrectionTrainer", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/trainer.py", + "weight": 1.0 + }, + { + "source": "src_training_trainer_py_CorrectionTrainer", + "target": "src_training_trainer_py_CorrectionTrainer_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/trainer.py", + "weight": 0.5 + }, + { + "source": "src_training_trainer_py_CorrectionTrainer", + "target": "Trainer", + "relation": "inherits", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/trainer.py", + "weight": 1.0 + }, + { + "source": "src_training_trainer_py", + "target": "src_training_trainer_py___init__", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/trainer.py", + "weight": 1.0 + }, + { + "source": "src_training_trainer_py___init__", + "target": "__init__", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/trainer.py", + "weight": 0.8 + }, + { + "source": "src_training_trainer_py___init__", + "target": "super", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/trainer.py", + "weight": 0.8 + }, + { + "source": "src_training_trainer_py", + "target": "src_training_trainer_py__strip_custom_fields", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/trainer.py", + "weight": 1.0 + }, + { + "source": "src_training_trainer_py__strip_custom_fields", + "target": "src_training_trainer_py__strip_custom_fields_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/trainer.py", + "weight": 0.5 + }, + { + "source": "src_training_trainer_py__strip_custom_fields", + "target": "pop", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/trainer.py", + "weight": 0.8 + }, + { + "source": "src_training_trainer_py__strip_custom_fields", + "target": "pop", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/trainer.py", + "weight": 0.8 + }, + { + "source": "src_training_trainer_py__strip_custom_fields", + "target": "pop", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/trainer.py", + "weight": 0.8 + }, + { + "source": "src_training_trainer_py__strip_custom_fields", + "target": "items", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/trainer.py", + "weight": 0.8 + }, + { + "source": "src_training_trainer_py", + "target": "src_training_trainer_py_compute_loss", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/trainer.py", + "weight": 1.0 + }, + { + "source": "src_training_trainer_py_compute_loss", + "target": "src_training_trainer_py_compute_loss_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/trainer.py", + "weight": 0.5 + }, + { + "source": "src_training_trainer_py_compute_loss", + "target": "_strip_custom_fields", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/trainer.py", + "weight": 0.8 + }, + { + "source": "src_training_trainer_py_compute_loss", + "target": "model", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/trainer.py", + "weight": 0.8 + }, + { + "source": "src_training_trainer_py", + "target": "src_training_trainer_py_prediction_step", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/trainer.py", + "weight": 1.0 + }, + { + "source": "src_training_trainer_py_prediction_step", + "target": "src_training_trainer_py_prediction_step_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/trainer.py", + "weight": 0.5 + }, + { + "source": "src_training_trainer_py_prediction_step", + "target": "_strip_custom_fields", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/trainer.py", + "weight": 0.8 + }, + { + "source": "src_training_trainer_py_prediction_step", + "target": "_prepare_inputs", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/trainer.py", + "weight": 0.8 + }, + { + "source": "src_training_trainer_py_prediction_step", + "target": "no_grad", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/trainer.py", + "weight": 0.8 + }, + { + "source": "src_training_trainer_py_prediction_step", + "target": "model", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/trainer.py", + "weight": 0.8 + }, + { + "source": "src_training_trainer_py_prediction_step", + "target": "detach", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/trainer.py", + "weight": 0.8 + } + ] +} \ No newline at end of file diff --git a/graphify-out/cache/f50f0fa7a5839532cc535fc6117b20a9620e13a4da8ce83d917f5702c7e7b07a.json b/graphify-out/cache/f50f0fa7a5839532cc535fc6117b20a9620e13a4da8ce83d917f5702c7e7b07a.json new file mode 100644 index 0000000000000000000000000000000000000000..3d582da0aeec2494679c496a616087918322d05e --- /dev/null +++ b/graphify-out/cache/f50f0fa7a5839532cc535fc6117b20a9620e13a4da8ce83d917f5702c7e7b07a.json @@ -0,0 +1,872 @@ +{ + "nodes": [ + { + "id": "src_training_dataset_py", + "label": "dataset.py", + "file_type": "code", + "source_file": "src/training/dataset.py" + }, + { + "id": "src_training_dataset_py_docstring", + "label": "Dataset class that handles all data sources and produces training triplets:\n(inp", + "file_type": "rationale", + "source_file": "src/training/dataset.py" + }, + { + "id": "src_training_dataset_py_WritingCorrectionDataset", + "label": "WritingCorrectionDataset", + "file_type": "code", + "source_file": "src/training/dataset.py", + "source_location": "line 45" + }, + { + "id": "src_training_dataset_py_WritingCorrectionDataset_doc", + "label": "PyTorch dataset for writing correction training triplets.\n\nFully pre-computed at", + "file_type": "rationale", + "source_file": "src/training/dataset.py", + "source_location": "line 45" + }, + { + "id": "src_training_dataset_py___init__", + "label": "__init__()", + "file_type": "code", + "source_file": "src/training/dataset.py", + "source_location": "line 54" + }, + { + "id": "src_training_dataset_py__compute_cache_key", + "label": "_compute_cache_key()", + "file_type": "code", + "source_file": "src/training/dataset.py", + "source_location": "line 95" + }, + { + "id": "src_training_dataset_py__compute_cache_key_doc", + "label": "Generate a cache key based on data file content and processing params.", + "file_type": "rationale", + "source_file": "src/training/dataset.py", + "source_location": "line 95" + }, + { + "id": "src_training_dataset_py__load", + "label": "_load()", + "file_type": "code", + "source_file": "src/training/dataset.py", + "source_location": "line 107" + }, + { + "id": "src_training_dataset_py__load_doc", + "label": "Load JSONL data file.", + "file_type": "rationale", + "source_file": "src/training/dataset.py", + "source_location": "line 107" + }, + { + "id": "src_training_dataset_py__add_synthetic", + "label": "_add_synthetic()", + "file_type": "code", + "source_file": "src/training/dataset.py", + "source_location": "line 126" + }, + { + "id": "src_training_dataset_py__add_synthetic_doc", + "label": "Augment dataset with synthetic dyslexia examples.", + "file_type": "rationale", + "source_file": "src/training/dataset.py", + "source_location": "line 126" + }, + { + "id": "src_training_dataset_py__precompute_all", + "label": "_precompute_all()", + "file_type": "code", + "source_file": "src/training/dataset.py", + "source_location": "line 150" + }, + { + "id": "src_training_dataset_py__precompute_all_doc", + "label": "Pre-compute tokenisation + style vectors for ALL examples.\nThis makes __getitem_", + "file_type": "rationale", + "source_file": "src/training/dataset.py", + "source_location": "line 150" + }, + { + "id": "src_training_dataset_py___len__", + "label": "__len__()", + "file_type": "code", + "source_file": "src/training/dataset.py", + "source_location": "line 206" + }, + { + "id": "src_training_dataset_py___getitem__", + "label": "__getitem__()", + "file_type": "code", + "source_file": "src/training/dataset.py", + "source_location": "line 209" + }, + { + "id": "src_training_dataset_py___getitem___doc", + "label": "Pure dict return \u2014 zero computation per batch.", + "file_type": "rationale", + "source_file": "src/training/dataset.py", + "source_location": "line 209" + } + ], + "edges": [ + { + "source": "src_training_dataset_py", + "target": "src_training_dataset_py_docstring", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/dataset.py", + "weight": 0.5 + }, + { + "source": "src_training_dataset_py", + "target": "json", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/dataset.py", + "weight": 0.6 + }, + { + "source": "src_training_dataset_py", + "target": "os", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/dataset.py", + "weight": 0.6 + }, + { + "source": "src_training_dataset_py", + "target": "pathlib", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/dataset.py", + "weight": 0.6 + }, + { + "source": "src_training_dataset_py", + "target": "typing", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/dataset.py", + "weight": 0.6 + }, + { + "source": "src_training_dataset_py", + "target": "torch", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/dataset.py", + "weight": 0.6 + }, + { + "source": "src_training_dataset_py", + "target": "torch.utils.data", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/dataset.py", + "weight": 0.6 + }, + { + "source": "src_training_dataset_py", + "target": "transformers", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/dataset.py", + "weight": 0.6 + }, + { + "source": "src_training_dataset_py", + "target": "style.fingerprinter", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/dataset.py", + "weight": 0.6 + }, + { + "source": "src_training_dataset_py", + "target": "preprocessing.dyslexia_simulator", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/dataset.py", + "weight": 0.6 + }, + { + "source": "src_training_dataset_py", + "target": "loguru", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/dataset.py", + "weight": 0.6 + }, + { + "source": "src_training_dataset_py", + "target": "random", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/dataset.py", + "weight": 0.6 + }, + { + "source": "src_training_dataset_py", + "target": "hashlib", + "relation": "imports", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/dataset.py", + "weight": 0.6 + }, + { + "source": "src_training_dataset_py", + "target": "src_training_dataset_py_WritingCorrectionDataset", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/dataset.py", + "weight": 1.0 + }, + { + "source": "src_training_dataset_py_WritingCorrectionDataset", + "target": "src_training_dataset_py_WritingCorrectionDataset_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/dataset.py", + "weight": 0.5 + }, + { + "source": "src_training_dataset_py_WritingCorrectionDataset", + "target": "Dataset", + "relation": "inherits", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/dataset.py", + "weight": 1.0 + }, + { + "source": "src_training_dataset_py", + "target": "src_training_dataset_py___init__", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/dataset.py", + "weight": 1.0 + }, + { + "source": "src_training_dataset_py___init__", + "target": "_load", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py___init__", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py___init__", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py___init__", + "target": "_compute_cache_key", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py___init__", + "target": "exists", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py___init__", + "target": "_add_synthetic", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py___init__", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py___init__", + "target": "load", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py___init__", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py___init__", + "target": "_precompute_all", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py___init__", + "target": "mkdir", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py___init__", + "target": "save", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py___init__", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py___init__", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py___init__", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py___init__", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py", + "target": "src_training_dataset_py__compute_cache_key", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/dataset.py", + "weight": 1.0 + }, + { + "source": "src_training_dataset_py__compute_cache_key", + "target": "src_training_dataset_py__compute_cache_key_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/dataset.py", + "weight": 0.5 + }, + { + "source": "src_training_dataset_py__compute_cache_key", + "target": "md5", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py__compute_cache_key", + "target": "update", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py__compute_cache_key", + "target": "update", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py__compute_cache_key", + "target": "encode", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py__compute_cache_key", + "target": "hexdigest", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py__compute_cache_key", + "target": "read_bytes", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py__compute_cache_key", + "target": "update", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py__compute_cache_key", + "target": "encode", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py__compute_cache_key", + "target": "Path", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py", + "target": "src_training_dataset_py__load", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/dataset.py", + "weight": 1.0 + }, + { + "source": "src_training_dataset_py__load", + "target": "src_training_dataset_py__load_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/dataset.py", + "weight": 0.5 + }, + { + "source": "src_training_dataset_py__load", + "target": "open", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py__load", + "target": "warning", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py__load", + "target": "strip", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py__load", + "target": "loads", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py__load", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py", + "target": "src_training_dataset_py__add_synthetic", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/dataset.py", + "weight": 1.0 + }, + { + "source": "src_training_dataset_py__add_synthetic", + "target": "src_training_dataset_py__add_synthetic_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/dataset.py", + "weight": 0.5 + }, + { + "source": "src_training_dataset_py__add_synthetic", + "target": "DyslexiaSimulator", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py__add_synthetic", + "target": "int", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py__add_synthetic", + "target": "choices", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py__add_synthetic", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py__add_synthetic", + "target": "simulate", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py__add_synthetic", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py__add_synthetic", + "target": "Random", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py__add_synthetic", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py", + "target": "src_training_dataset_py__precompute_all", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/dataset.py", + "weight": 1.0 + }, + { + "source": "src_training_dataset_py__precompute_all", + "target": "src_training_dataset_py__precompute_all_doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/dataset.py", + "weight": 0.5 + }, + { + "source": "src_training_dataset_py__precompute_all", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py__precompute_all", + "target": "enumerate", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py__precompute_all", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py__precompute_all", + "target": "tokenizer", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py__precompute_all", + "target": "tokenizer", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py__precompute_all", + "target": "squeeze", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py__precompute_all", + "target": "append", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py__precompute_all", + "target": "hexdigest", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py__precompute_all", + "target": "info", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py__precompute_all", + "target": "no_grad", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py__precompute_all", + "target": "extract_vector", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py__precompute_all", + "target": "squeeze", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py__precompute_all", + "target": "squeeze", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py__precompute_all", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py__precompute_all", + "target": "md5", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py__precompute_all", + "target": "encode", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py__precompute_all", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py", + "target": "src_training_dataset_py___len__", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/dataset.py", + "weight": 1.0 + }, + { + "source": "src_training_dataset_py___len__", + "target": "len", + "relation": "calls", + "confidence": "INFERRED", + "confidence_score": 0.7, + "source_file": "src/training/dataset.py", + "weight": 0.8 + }, + { + "source": "src_training_dataset_py", + "target": "src_training_dataset_py___getitem__", + "relation": "defines", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/dataset.py", + "weight": 1.0 + }, + { + "source": "src_training_dataset_py___getitem__", + "target": "src_training_dataset_py___getitem___doc", + "relation": "has_rationale", + "confidence": "EXTRACTED", + "confidence_score": 1.0, + "source_file": "src/training/dataset.py", + "weight": 0.5 + } + ] +} \ No newline at end of file diff --git a/scripts/__pycache__/pretrain_human_pattern_classifier.cpython-312.pyc b/scripts/__pycache__/pretrain_human_pattern_classifier.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..30878def50feb94f4da340410b0522944d149dcb Binary files /dev/null and b/scripts/__pycache__/pretrain_human_pattern_classifier.cpython-312.pyc differ diff --git a/scripts/__pycache__/run_inference.cpython-312.pyc b/scripts/__pycache__/run_inference.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4b4924f638be57cb7b3133e5bc499ed21ec0d744 Binary files /dev/null and b/scripts/__pycache__/run_inference.cpython-312.pyc differ diff --git a/scripts/__pycache__/train.cpython-312.pyc b/scripts/__pycache__/train.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5daa4c72a31c331a33062b90df9620af5a534417 Binary files /dev/null and b/scripts/__pycache__/train.cpython-312.pyc differ diff --git a/wandb/run-20260502_203519-fib23yhh/files/output.log b/wandb/run-20260502_203519-fib23yhh/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..4377e3acb29b6f5119d70fbcc782ac3295d64df2 --- /dev/null +++ b/wandb/run-20260502_203519-fib23yhh/files/output.log @@ -0,0 +1,35 @@ +2026-05-02 20:35:21.490 | INFO  | __main__:train:59 - Step 3: Loading model and tokenizer... +2026-05-02 20:35:21.490 | INFO  | src.model.base_model:load_model_and_tokenizer:55 - Loading model: google/flan-t5-large (seq2seq=True, quantize=False, lora=True) +2026-05-02 20:35:26.884 | INFO  | src.model.base_model:load_model_and_tokenizer:99 - Model loaded on cuda with dtype torch.bfloat16 +2026-05-02 20:35:33.566 | INFO  | src.model.base_model:load_model_and_tokenizer:126 - LoRA applied: 18,284,544 trainable params / 801,434,624 total (2.28%) +2026-05-02 20:35:33.566 | INFO  | __main__:train:72 - Step 4: Creating style fingerprinter... +2026-05-02 20:35:34.012 | INFO  | src.style.fingerprinter:__init__:100 - StyleFingerprinter initialised (AWL size: 549) +2026-05-02 20:35:34.013 | INFO  | __main__:train:79 - Step 5: Loading datasets... +2026-05-02 20:35:34.053 | INFO  | src.training.dataset:__init__:69 - Loaded 7555 examples from data/processed/train.jsonl +2026-05-02 20:35:34.196 | INFO  | src.training.dataset:_add_synthetic:121 - Added 2125 synthetic augmentation examples +2026-05-02 20:35:34.197 | INFO  | src.training.dataset:__init__:75 - Total dataset size: 9680 examples +2026-05-02 20:35:34.197 | INFO  | src.training.dataset:_precompute_style_vectors:128 - Pre-computing style vectors for all examples... +2026-05-02 20:36:28.835 | INFO  | src.training.dataset:_precompute_style_vectors:143 -  Style vectors: 2000/9680 +2026-05-02 20:37:15.248 | INFO  | src.training.dataset:_precompute_style_vectors:143 -  Style vectors: 4000/9680 +2026-05-02 20:38:16.030 | INFO  | src.training.dataset:_precompute_style_vectors:143 -  Style vectors: 6000/9680 +2026-05-02 20:38:51.282 | INFO  | src.training.dataset:_precompute_style_vectors:143 -  Style vectors: 8000/9680 +2026-05-02 20:38:51.287 | INFO  | src.training.dataset:_precompute_style_vectors:145 - Style vector pre-computation complete (7231 unique texts cached) +2026-05-02 20:38:51.292 | INFO  | src.training.dataset:__init__:69 - Loaded 839 examples from data/processed/val.jsonl +2026-05-02 20:38:51.292 | INFO  | src.training.dataset:__init__:75 - Total dataset size: 839 examples +2026-05-02 20:38:51.292 | INFO  | src.training.dataset:_precompute_style_vectors:128 - Pre-computing style vectors for all examples... +2026-05-02 20:39:10.193 | INFO  | src.training.dataset:_precompute_style_vectors:145 - Style vector pre-computation complete (833 unique texts cached) +2026-05-02 20:39:10.193 | INFO  | __main__:train:99 - Train: 9680 | Val: 839 +2026-05-02 20:39:10.194 | INFO  | __main__:train:102 - Step 6: Creating loss function... +2026-05-02 20:39:10.194 | INFO  | src.training.loss_functions:__init__:141 - Loading sentence transformer for loss: all-mpnet-base-v2 (on CPU) +2026-05-02 20:39:16.475 | INFO  | src.training.loss_functions:__init__:151 - Loaded human pattern classifier from checkpoints/human_pattern_classifier.pt +2026-05-02 20:39:16.475 | INFO  | src.training.human_pattern_extractor:__init__:166 - Loading GPT-2 for perplexity calculation... +2026-05-02 20:39:18.883 | INFO  | src.training.human_pattern_extractor:__init__:180 - GPT-2 loaded on cpu +2026-05-02 20:39:18.883 | INFO  | src.training.human_pattern_extractor:__init__:182 - HumanPatternFeatureExtractor initialised +2026-05-02 20:39:18.883 | INFO  | __main__:train:125 - Step 7: Creating training arguments... +2026-05-02 20:39:18.909 | INFO  | __main__:train:178 - Step 8: Creating trainer... +No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead. +2026-05-02 20:39:18.963 | INFO  | __main__:train:194 - Step 9: Starting training... +wandb: WARNING The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter. + 0%| | 0/1515 [00:00 +human_pattern: + value: + classifier_path: checkpoints/human_pattern_classifier.pt + max_samples_per_source: 50000 + pretrain_batch_size: 512 + pretrain_epochs: 20 + pretrain_lr: 0.001 + shanegerami_path: data/raw/shanegerami/AI_Human.csv + starblasters_path: data/raw/starblasters8/data.parquet + target_auc: 0.88 +id2label: + value: + "0": LABEL_0 + "1": LABEL_1 +ignore_data_skip: + value: false +include_for_metrics: + value: [] +include_inputs_for_metrics: + value: false +include_num_input_tokens_seen: + value: false +include_tokens_per_second: + value: false +initializer_factor: + value: 1 +is_decoder: + value: false +is_encoder_decoder: + value: true +is_gated_act: + value: true +jit_mode_eval: + value: false +label_names: + value: null +label_smoothing_factor: + value: 0 +label2id: + value: + LABEL_0: 0 + LABEL_1: 1 +layer_norm_epsilon: + value: 1e-06 +learning_rate: + value: 0.0003 +length_column_name: + value: length +length_penalty: + value: 1 +liger_kernel_config: + value: null +load_best_model_at_end: + value: true +local_rank: + value: 0 +log_level: + value: passive +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: logs/ +logging_first_step: + value: false +logging_nan_inf_filter: + value: true +logging_steps: + value: 50 +logging_strategy: + value: steps +lora: + value: + lora_alpha: 32 + lora_dropout: 0.05 + r: 16 + target_modules: + - q + - v + - k + - o + - wi_0 + - wi_1 + - wo +loss: + value: + lambda_human_pattern: 0.4 + lambda_semantic: 0.5 + lambda_style: 0.3 + sem_model_name: all-mpnet-base-v2 +lr_scheduler_type: + value: cosine +max_grad_norm: + value: 1 +max_length: + value: 20 +max_steps: + value: -1 +metric_for_best_model: + value: eval_loss +min_length: + value: 0 +model: + value: + key: flan-t5-base + quantize: false + use_lora: true +model/num_parameters: + value: 254360832 +model_type: + value: t5 +mp_parameters: + value: "" +n_positions: + value: 512 +neftune_noise_alpha: + value: null +no_cuda: + value: false +no_repeat_ngram_size: + value: 0 +num_beam_groups: + value: 1 +num_beams: + value: 1 +num_decoder_layers: + value: 12 +num_heads: + value: 12 +num_layers: + value: 12 +num_return_sequences: + value: 1 +num_train_epochs: + value: 3 +optim: + value: adamw_torch +optim_args: + value: null +optim_target_modules: + value: null +output_attentions: + value: false +output_dir: + value: checkpoints/ +output_hidden_states: + value: false +output_past: + value: true +output_scores: + value: false +overwrite_output_dir: + value: false +pad_token_id: + value: 0 +past_index: + value: -1 +peft_config: + value: + default: + alora_invocation_tokens: null + arrow_config: null + auto_mapping: null + base_model_name_or_path: google/flan-t5-base + bias: none + corda_config: null + ensure_weight_tying: false + eva_config: null + exclude_modules: null + fan_in_fan_out: false + inference_mode: false + init_lora_weights: true + layer_replication: null + layers_pattern: null + layers_to_transform: null + lora_alpha: 32 + lora_bias: false + lora_dropout: 0.05 + lora_ga_config: null + megatron_config: null + megatron_core: megatron.core + modules_to_save: null + peft_type: LORA + peft_version: 0.19.1 + qalora_group_size: 16 + r: 16 + revision: null + runtime_config: + ephemeral_gpu_offload: false + target_modules: + - v + - o + - q + - k + - wi_0 + - wo + - wi_1 + target_parameters: null + task_type: SEQ_2_SEQ_LM + trainable_token_indices: null + use_bdlora: null + use_dora: false + use_qalora: false + use_rslora: false +per_device_eval_batch_size: + value: 8 +per_device_train_batch_size: + value: 8 +per_gpu_eval_batch_size: + value: null +per_gpu_train_batch_size: + value: null +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +push_to_hub: + value: false +push_to_hub_model_id: + value: null +push_to_hub_organization: + value: null +push_to_hub_token: + value: +ray_scope: + value: last +relative_attention_max_distance: + value: 128 +relative_attention_num_buckets: + value: 32 +remove_invalid_values: + value: false +remove_unused_columns: + value: false +repetition_penalty: + value: 1 +report_to: + value: + - wandb + - tensorboard +restore_callback_states_from_checkpoint: + value: false +resume_from_checkpoint: + value: null +return_dict: + value: true +return_dict_in_generate: + value: false +run_name: + value: checkpoints/ +save_on_each_node: + value: false +save_only_model: + value: false +save_safetensors: + value: true +save_steps: + value: 200 +save_strategy: + value: steps +save_total_limit: + value: 3 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +suppress_tokens: + value: null +task_specific_params: + value: + summarization: + early_stopping: true + length_penalty: 2 + max_length: 200 + min_length: 30 + no_repeat_ngram_size: 3 + num_beams: 4 + prefix: 'summarize: ' + translation_en_to_de: + early_stopping: true + max_length: 300 + num_beams: 4 + prefix: 'translate English to German: ' + translation_en_to_fr: + early_stopping: true + max_length: 300 + num_beams: 4 + prefix: 'translate English to French: ' + translation_en_to_ro: + early_stopping: true + max_length: 300 + num_beams: 4 + prefix: 'translate English to Romanian: ' +temperature: + value: 1 +tf_legacy_loss: + value: false +tf32: + value: null +tie_encoder_decoder: + value: false +tie_word_embeddings: + value: false +tokenizer_class: + value: null +top_k: + value: 50 +top_p: + value: 1 +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_dtype: + value: bfloat16 +torch_empty_cache_steps: + value: null +torchdynamo: + value: null +torchscript: + value: false +tpu_metrics_debug: + value: false +tpu_num_cores: + value: null +training: + value: + bf16: true + dataloader_num_workers: 4 + eval_steps: 200 + evaluation_strategy: steps + fp16: false + gradient_accumulation_steps: 4 + greater_is_better: false + learning_rate: 0.0003 + load_best_model_at_end: true + logging_dir: logs/ + logging_steps: 50 + lr_scheduler_type: cosine + metric_for_best_model: eval_loss + num_train_epochs: 3 + output_dir: checkpoints/ + per_device_eval_batch_size: 8 + per_device_train_batch_size: 8 + push_to_hub: false + report_to: + - wandb + - tensorboard + save_steps: 200 + save_strategy: steps + save_total_limit: 3 + seed: 42 + warmup_ratio: 0.05 + weight_decay: 0.01 +transformers_version: + value: 4.53.2 +typical_p: + value: 1 +use_bfloat16: + value: false +use_cache: + value: true +use_cpu: + value: false +use_ipex: + value: false +use_legacy_prediction_loop: + value: false +use_liger_kernel: + value: false +use_mps_device: + value: false +vocab_size: + value: 32128 +warmup_ratio: + value: 0.05 +warmup_steps: + value: 0 +weight_decay: + value: 0.01 diff --git a/wandb/run-20260502_204834-03roqvb7/files/output.log b/wandb/run-20260502_204834-03roqvb7/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..b56ae17aa4c8d0b56fd759ab70e4aed415dcb9c7 --- /dev/null +++ b/wandb/run-20260502_204834-03roqvb7/files/output.log @@ -0,0 +1,168 @@ +2026-05-02 20:48:36.936 | INFO  | __main__:train:59 - Step 3: Loading model and tokenizer... +2026-05-02 20:48:36.936 | INFO  | src.model.base_model:load_model_and_tokenizer:56 - Loading model: google/flan-t5-base (seq2seq=True, quantize=False, lora=True) +tokenizer_config.json: 2.54kB [00:00, 7.20MB/s] +spiece.model: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 792k/792k [00:02<00:00, 301kB/s] +tokenizer.json: 2.42MB [00:00, 19.4MB/s] +special_tokens_map.json: 2.20kB [00:00, 5.21MB/s] +config.json: 1.40kB [00:00, 4.63MB/s] +model.safetensors: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 990M/990M [03:04<00:00, 5.38MB/s] +generation_config.json: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 147/147 [00:00<00:00, 956kB/s] +2026-05-02 20:51:50.017 | INFO  | src.model.base_model:load_model_and_tokenizer:100 - Model loaded on cuda with dtype torch.bfloat16 +2026-05-02 20:52:08.726 | INFO  | src.model.base_model:load_model_and_tokenizer:128 - LoRA applied: 6,782,976 trainable params / 254,360,832 total (2.67%) +2026-05-02 20:52:08.728 | INFO  | __main__:train:72 - Step 4: Creating style fingerprinter... +2026-05-02 20:52:12.819 | INFO  | src.style.fingerprinter:__init__:100 - StyleFingerprinter initialised (AWL size: 549) +2026-05-02 20:52:12.821 | INFO  | __main__:train:79 - Step 5: Loading datasets... +2026-05-02 20:52:13.227 | INFO  | src.training.dataset:__init__:75 - Loaded 7555 examples from data/processed/train.jsonl +2026-05-02 20:52:14.848 | INFO  | src.training.dataset:_add_synthetic:127 - Added 2125 synthetic augmentation examples +2026-05-02 20:52:14.850 | INFO  | src.training.dataset:__init__:81 - Total dataset size: 9680 examples +2026-05-02 20:52:14.851 | INFO  | src.training.dataset:_precompute_all:133 - Pre-computing tokenisation and style vectors for all examples... +2026-05-02 20:54:54.365 | INFO  | src.training.dataset:_precompute_all:180 -  Pre-computed: 2000/9680 +2026-05-02 20:56:52.958 | INFO  | src.training.dataset:_precompute_all:180 -  Pre-computed: 4000/9680 +2026-05-02 20:58:39.532 | INFO  | src.training.dataset:_precompute_all:180 -  Pre-computed: 6000/9680 +2026-05-02 20:59:48.612 | INFO  | src.training.dataset:_precompute_all:180 -  Pre-computed: 8000/9680 +2026-05-02 21:00:02.305 | INFO  | src.training.dataset:_precompute_all:182 - Pre-computation complete (7231 unique style vectors) +2026-05-02 21:00:02.310 | INFO  | src.training.dataset:__init__:75 - Loaded 839 examples from data/processed/val.jsonl +2026-05-02 21:00:02.311 | INFO  | src.training.dataset:__init__:81 - Total dataset size: 839 examples +2026-05-02 21:00:02.311 | INFO  | src.training.dataset:_precompute_all:133 - Pre-computing tokenisation and style vectors for all examples... +2026-05-02 21:00:30.963 | INFO  | src.training.dataset:_precompute_all:182 - Pre-computation complete (833 unique style vectors) +2026-05-02 21:00:30.964 | INFO  | __main__:train:99 - Train: 9680 | Val: 839 +2026-05-02 21:00:30.964 | INFO  | __main__:train:102 - Step 6: Creating loss function... +2026-05-02 21:00:30.964 | INFO  | src.training.loss_functions:__init__:141 - Loading sentence transformer for loss: all-mpnet-base-v2 (on CPU) +2026-05-02 21:00:42.455 | INFO  | src.training.loss_functions:__init__:151 - Loaded human pattern classifier from checkpoints/human_pattern_classifier.pt +2026-05-02 21:00:42.458 | INFO  | src.training.human_pattern_extractor:__init__:166 - Loading GPT-2 for perplexity calculation... +2026-05-02 21:00:46.112 | INFO  | src.training.human_pattern_extractor:__init__:180 - GPT-2 loaded on cpu +2026-05-02 21:00:46.114 | INFO  | src.training.human_pattern_extractor:__init__:182 - HumanPatternFeatureExtractor initialised +2026-05-02 21:00:46.117 | INFO  | __main__:train:125 - Step 7: Creating training arguments... +2026-05-02 21:00:46.282 | INFO  | __main__:train:178 - Step 8: Creating trainer... +No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead. +2026-05-02 21:00:46.808 | INFO  | __main__:train:194 - Step 9: Starting training... +wandb: WARNING The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter. + 0%| | 0/909 [00:00 + train() + ~~~~~^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/click/core.py", line 1485, in __call__ + return self.main(*args, **kwargs) + ~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/click/core.py", line 1406, in main + rv = self.invoke(ctx) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/click/core.py", line 1269, in invoke + return ctx.invoke(self.callback, **ctx.params) + ~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/click/core.py", line 824, in invoke + return callback(*args, **kwargs) + File "/run/media/morpheuslord/Personal_Files/Projects/Rewriter/scripts/train.py", line 195, in train + trainer.train() + ~~~~~~~~~~~~~^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/transformers/trainer.py", line 2206, in train + return inner_training_loop( + args=args, + ...<2 lines>... + ignore_keys_for_eval=ignore_keys_for_eval, + ) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/transformers/trainer.py", line 2548, in _inner_training_loop + tr_loss_step = self.training_step(model, inputs, num_items_in_batch) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/transformers/trainer.py", line 3749, in training_step + loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch) + File "/run/media/morpheuslord/Personal_Files/Projects/Rewriter/src/training/trainer.py", line 53, in compute_loss + outputs = model(**model_inputs) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/nn/modules/module.py", line 1779, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/nn/modules/module.py", line 1790, in _call_impl + return forward_call(*args, **kwargs) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/accelerate/utils/operations.py", line 823, in forward + return model_forward(*args, **kwargs) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/accelerate/utils/operations.py", line 811, in __call__ + return convert_to_fp32(self.model_forward(*args, **kwargs)) + ~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast + return func(*args, **kwargs) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/peft/peft_model.py", line 2342, in forward + return self.base_model( + ~~~~~~~~~~~~~~~^ + input_ids=input_ids, + ^^^^^^^^^^^^^^^^^^^^ + ...<9 lines>... + **kwargs, + ^^^^^^^^^ + ) + ^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/nn/modules/module.py", line 1779, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/nn/modules/module.py", line 1790, in _call_impl + return forward_call(*args, **kwargs) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/peft/tuners/tuners_utils.py", line 330, in forward + return self.model.forward(*args, **kwargs) + ~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/transformers/models/t5/modeling_t5.py", line 1792, in forward + decoder_outputs = self.decoder( + input_ids=decoder_input_ids, + ...<11 lines>... + cache_position=cache_position, + ) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/nn/modules/module.py", line 1779, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/nn/modules/module.py", line 1790, in _call_impl + return forward_call(*args, **kwargs) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/transformers/models/t5/modeling_t5.py", line 1105, in forward + layer_outputs = layer_module( + hidden_states, + ...<11 lines>... + cache_position=cache_position, + ) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/transformers/modeling_layers.py", line 83, in __call__ + return super().__call__(*args, **kwargs) + ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/nn/modules/module.py", line 1779, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/nn/modules/module.py", line 1790, in _call_impl + return forward_call(*args, **kwargs) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/transformers/models/t5/modeling_t5.py", line 678, in forward + self_attention_outputs = self.layer[0]( + hidden_states, + ...<6 lines>... + cache_position=cache_position, + ) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/nn/modules/module.py", line 1779, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/nn/modules/module.py", line 1790, in _call_impl + return forward_call(*args, **kwargs) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/transformers/models/t5/modeling_t5.py", line 596, in forward + attention_output = self.SelfAttention( + normed_hidden_states, + ...<6 lines>... + cache_position=cache_position, + ) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/nn/modules/module.py", line 1779, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/nn/modules/module.py", line 1790, in _call_impl + return forward_call(*args, **kwargs) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/transformers/models/t5/modeling_t5.py", line 566, in forward + attn_output = self.o(attn_output) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/nn/modules/module.py", line 1779, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/nn/modules/module.py", line 1790, in _call_impl + return forward_call(*args, **kwargs) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/peft/tuners/lora/layer.py", line 969, in forward + result = result + lora_B(lora_A(dropout(x))) * scaling + ~~~~~~~^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/nn/modules/module.py", line 1779, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/nn/modules/module.py", line 1790, in _call_impl + return forward_call(*args, **kwargs) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/nn/modules/dropout.py", line 73, in forward + return F.dropout(input, self.p, self.training, self.inplace) + ~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/nn/functional.py", line 1443, in dropout + _VF.dropout_(input, p, training) if inplace else _VF.dropout(input, p, training) + ~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^ +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 3.68 GiB of which 7.25 MiB is free. Including non-PyTorch memory, this process has 3.67 GiB memory in use. Of the allocated memory 3.57 GiB is allocated by PyTorch, and 2.31 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://docs.pytorch.org/docs/stable/notes/cuda.html#optimizing-memory-usage-with-pytorch-cuda-alloc-conf) diff --git a/wandb/run-20260502_204834-03roqvb7/files/requirements.txt b/wandb/run-20260502_204834-03roqvb7/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..ae63c201cc2d6bd6485b666008506c9fdde380ed --- /dev/null +++ b/wandb/run-20260502_204834-03roqvb7/files/requirements.txt @@ -0,0 +1,362 @@ +Pygments==2.19.2 +pluggy==1.6.0 +iniconfig==2.3.0 +pytest==9.0.2 +sqlite-vec==0.1.6 +peewee==3.19.0 +boltons==21.0.0 +zipp==3.23.0 +wrapt==1.17.3 +tree-sitter-python==0.25.0 +tree-sitter-javascript==0.25.0 +tree-sitter-java==0.23.5 +tree-sitter-cpp==0.23.4 +tree-sitter-c==0.24.1 +tree-sitter==0.25.2 +tqdm==4.67.3 +tomli==2.0.2 +tabulate==0.10.0 +stevedore==5.7.0 +sniffio==1.3.1 +smmap==5.0.3 +shellingham==1.5.4 +semantic-version==2.10.0 +ruamel.yaml.clib==0.2.14 +ruamel.yaml==0.19.1 +python-multipart==0.0.22 +python-dotenv==1.2.2 +PyJWT==2.11.0 +protobuf==6.33.5 +opentelemetry-util-http==0.58b0 +networkx==3.6.1 +mdurl==0.1.2 +MarkupSafe==3.0.3 +jiter==0.13.0 +hyperframe==6.1.0 +httpx-sse==0.4.3 +hpack==4.1.0 +face==26.0.0 +exceptiongroup==1.2.2 +docstring_parser==0.17.0 +colorama==0.4.6 +bracex==2.6 +annotated-doc==0.0.4 +aiofiles==25.1.0 +wcmatch==8.5.2 +uvicorn==0.41.0 +starlette==0.52.1 +opentelemetry-proto==1.37.0 +markdown-it-py==4.0.0 +Jinja2==3.1.6 +importlib_metadata==8.7.1 +h2==4.3.0 +googleapis-common-protos==1.73.0 +glom==25.12.0 +gitdb==4.0.12 +click-option-group==0.5.9 +sse-starlette==3.3.2 +rich==14.3.3 +pydantic-settings==2.13.1 +opentelemetry-exporter-otlp-proto-common==1.37.0 +opentelemetry-api==1.37.0 +openai==2.26.0 +jsonschema==4.25.1 +GitPython==3.1.46 +typer==0.23.1 +opentelemetry-semantic-conventions==0.58b0 +bandit==1.9.4 +opentelemetry-sdk==1.37.0 +opentelemetry-instrumentation==0.58b0 +mcp==1.23.3 +agno==2.5.9 +opentelemetry-instrumentation-threading==0.58b0 +opentelemetry-instrumentation-requests==0.58b0 +opentelemetry-exporter-otlp-proto-http==1.37.0 +semgrep==1.154.0 +acvas==1.0.0 +Werkzeug==3.1.8 +websockets==13.1 +wcwidth==0.6.0 +sqlparse==0.5.5 +semver==3.0.4 +itsdangerous==2.2.0 +frida==17.9.1 +click==8.3.2 +blinker==1.9.0 +prompt_toolkit==3.0.52 +Flask==3.1.3 +delegator.py==0.1.1 +frida-tools==14.8.1 +cli_helpers==2.12.0 +litecli==1.17.1 +objection==1.12.4 +userpath==1.9.2 +platformdirs==4.9.4 +argcomplete==3.6.3 +pipx==1.11.1 +distlib==0.4.0 +filelock==3.25.2 +python-discovery==1.2.1 +virtualenv==21.2.0 +pyelftools==0.32 +cigam==0.0.3 +xmltodict==1.0.4 +apkutils2==1.0.0 +svgwrite==1.4.3 +rdflib==7.6.0 +python-louvain==0.16 +wheel==0.46.3 +pip==26.0.1 +threadpoolctl==3.6.0 +scipy==1.17.1 +joblib==1.5.3 +scikit-learn==1.8.0 +pandas==3.0.2 +seaborn==0.13.2 +text-unidecode==1.3 +fastjsonschema==2.21.2 +traitlets==5.14.3 +python-slugify==8.0.4 +bleach==6.3.0 +mdit-py-plugins==0.5.0 +kagglesdk==0.1.23 +jupyter_core==5.9.1 +nbformat==5.10.4 +jupytext==1.19.1 +kaggle==2.1.0 +xxhash==3.7.0 +pyarrow==24.0.0 +propcache==0.4.1 +multidict==6.7.1 +hf-xet==1.4.3 +fsspec==2026.2.0 +frozenlist==1.8.0 +dill==0.4.1 +aiohappyeyeballs==2.6.1 +yarl==1.23.0 +multiprocess==0.70.19 +aiosignal==1.4.0 +aiohttp==3.13.5 +datasets==4.8.5 +torchaudio==2.11.0 +nvidia-cusparselt-cu13==0.8.0 +mpmath==1.3.0 +cuda-toolkit==13.0.2 +wasabi==1.1.3 +uvloop==0.22.1 +triton==3.6.0 +toml==0.10.2 +tensorboard-data-server==0.7.2 +sympy==1.14.0 +spacy-loggers==1.0.5 +spacy-legacy==3.0.12 +spacy-alignments==0.9.1 +smart_open==7.6.0 +setuptools==81.0.0 +sentencepiece==0.2.1 +safetensors==0.7.0 +pyspellchecker==0.9.0 +pyphen==0.17.2 +portalocker==3.2.0 +nvidia-nvtx==13.0.85 +nvidia-nvshmem-cu13==3.4.5 +nvidia-nvjitlink==13.0.88 +nvidia-nccl-cu13==2.28.9 +nvidia-curand==10.4.0.35 +nvidia-cufile==1.15.1.6 +nvidia-cuda-runtime==13.0.96 +nvidia-cuda-nvrtc==13.0.88 +nvidia-cuda-cupti==13.0.85 +nvidia-cublas==13.1.0.3 +nltk==3.9.4 +murmurhash==1.0.15 +msgpack==1.1.2 +Markdown==3.10.2 +loguru==0.7.3 +locate==1.1.1 +langcodes==3.5.1 +httptools==0.7.1 +grpcio==1.80.0 +ftfy==6.3.1 +faiss-cpu==1.13.2 +cymem==2.0.13 +cuda-pathfinder==1.5.4 +confection==1.3.3 +cloudpathlib==0.24.0 +catalogue==2.0.10 +blis==1.3.3 +absl-py==2.4.0 +wordfreq==3.1.1 +watchfiles==1.1.1 +textstat==0.7.13 +tensorboard==2.20.0 +srsly==2.5.3 +sacrebleu==2.6.0 +rouge_score==0.1.2 +preshed==3.0.13 +nvidia-cusparse==12.6.3.3 +nvidia-cufft==12.0.0.61 +nvidia-cudnn-cu13==9.19.0.56 +language_tool_python==3.3.0 +huggingface_hub==0.36.2 +cuda-bindings==13.2.0 +wandb==0.26.1 +tokenizers==0.21.4 +thinc==8.3.13 +nvidia-cusolver==12.0.4.66 +fastapi==0.136.1 +weasel==1.0.0 +transformers==4.53.2 +torch==2.11.0 +spacy==3.8.13 +torchvision==0.26.0 +spacy-transformers==1.4.0 +sentence-transformers==5.4.1 +optimum==2.1.0 +errant==3.0.0 +bitsandbytes==0.49.2 +bert-score==0.3.13 +accelerate==1.13.0 +peft==0.19.1 +en_core_web_sm==3.8.0 +tree-sitter-zig==1.1.2 +tree-sitter-verilog==1.0.3 +tree-sitter-typescript==0.23.2 +tree-sitter-swift==0.0.1 +tree-sitter-scala==0.26.0 +tree-sitter-rust==0.24.2 +tree-sitter-ruby==0.23.1 +tree-sitter-powershell==0.26.3 +tree-sitter-php==0.24.1 +tree-sitter-objc==3.0.2 +tree-sitter-lua==0.5.0 +tree-sitter-kotlin==1.1.0 +tree-sitter-julia==0.23.1 +tree-sitter-go==0.25.0 +tree-sitter-elixir==0.3.5 +tree-sitter-c-sharp==0.23.5 +graphifyy==0.6.5 +Brlapi==0.8.7 +PyGObject==3.54.5 +cffi==2.0.0 +contourpy==1.3.3 +cryptography==46.0.7 +dbus-python==1.4.0 +evdev==1.9.3 +fonttools==4.61.0 +gbinder-python==1.3.0 +kiwisolver==1.5.0 +libvirt-python==11.6.0 +lxml==6.0.1 +matplotlib==3.10.8 +numpy==2.3.5 +perf==0.1 +pillow==11.3.0 +psutil==7.0.0 +PyAudio==0.2.13 +pycairo==1.28.0 +pycups==2.0.4 +pydantic_core==2.41.5 +pyscard==2.2.2 +PyYAML==6.0.2 +pyzstd==0.16.2 +RapidFuzz==3.12.2 +regex==2026.2.28 +rpds-py==0.27.0 +rpm==6.0.1 +selinux==3.9 +setools==4.6.0 +setproctitle==1.3.7 +systemd-python==235 +PySocks==1.7.1 +annotated-types==0.7.0 +anyio==4.12.1 +appdirs==1.4.4 +attrs==25.4.0 +beautifulsoup4==4.14.3 +boto3==1.42.84 +botocore==1.42.84 +certifi==2025.7.9 +charset-normalizer==3.4.3 +click==8.1.7 +cockpit==360.1 +configobj==5.0.9 +cssselect==1.3.0 +cupshelpers==1.0 +cycler==0.11.0 +dasbus==1.7 +distro==1.9.0 +fido2==2.0.0 +file-magic==0.4.0 +filelock==3.15.4 +fw-fanctrl==1.0.2 +h11==0.16.0 +httpcore==1.0.9 +httpx==0.28.1 +icoextract==0.2.0 +idna==3.10 +inkex==1.4.0 +input-remapper==2.2.0 +jaraco.classes==3.4.0 +jaraco.context==6.0.1 +jaraco.functools==4.1.0 +jeepney==0.9.0 +jmespath==1.0.1 +jsonschema==4.23.0 +jsonschema-specifications==2024.10.1 +keyring==25.7.0 +langtable==0.0.69 +libevdev==0.12 +louis==3.33.0 +lutris==0.5.22 +moddb==0.12.0 +more-itertools==10.5.0 +nftables==0.1 +olefile==0.47 +packaging==25.0 +pefile==2024.8.26 +pexpect==4.9.0 +pip==25.1.1 +ply==3.11 +protobuf==3.19.6 +ptyprocess==0.7.0 +pycparser==2.22 +pydantic==2.12.5 +pydbus==0.6.0 +pyenchant==3.2.2 +pygdbmi==0.11.0.0 +Pygments==2.19.1 +pyinotify==0.9.6 +pyparsing==3.1.2 +pypresence==4.3.0 +pyserial==3.5 +python-dateutil==2.9.0.post0 +python-linux-procfs==0.7.3 +python-pskc==1.4 +python-xlib==0.33 +pyudev==0.24.3 +pyxdg==0.28 +pyynl==0.0.1 +referencing==0.36.2 +requests==2.32.5 +requests-file==2.0.0 +s3transfer==0.16.0 +scour==0.38.2 +SecretStorage==3.3.3 +sentry-sdk==2.35.0 +sepolicy==3.9 +setroubleshoot==3.3.36 +setuptools==78.1.1 +shtab==1.7.2 +six==1.17.0 +sos==4.11.0 +soupsieve==2.8 +tinycss2==1.5.1 +tldextract==5.3.0 +typing_extensions==4.15.0 +typing-inspection==0.4.2 +udica==0.2.8 +umu-launcher==1.2.9 +urllib3==2.6.3 +webencodings==0.5.1 +yubikey-manager==5.9.0 diff --git a/wandb/run-20260502_204834-03roqvb7/files/wandb-metadata.json b/wandb/run-20260502_204834-03roqvb7/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..41849071a313d458f65cc539d3061da693c211e5 --- /dev/null +++ b/wandb/run-20260502_204834-03roqvb7/files/wandb-metadata.json @@ -0,0 +1,41 @@ +{ + "os": "Linux-6.17.7-ba29.fc43.x86_64-x86_64-with-glibc2.42", + "python": "CPython 3.14.3", + "startedAt": "2026-05-02T15:18:34.734531Z", + "args": [ + "--config", + "configs/training_config.yaml", + "--use-v2-loss" + ], + "program": "/run/media/morpheuslord/Personal_Files/Projects/Rewriter/scripts/train.py", + "codePath": "scripts/train.py", + "codePathLocal": "scripts/train.py", + "email": "morpheuslord@protonmail.com", + "root": "/run/media/morpheuslord/Personal_Files/Projects/Rewriter", + "host": "bazzite", + "executable": "/usr/bin/python3", + "cpu_count": 8, + "cpu_count_logical": 16, + "gpu": "NVIDIA GeForce RTX 3050 Laptop GPU", + "gpu_count": 1, + "disk": { + "/": { + "total": "60248064", + "used": "60248064" + } + }, + "memory": { + "total": "32944783360" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA GeForce RTX 3050 Laptop GPU", + "memoryTotal": "4294967296", + "cudaCores": 2048, + "architecture": "Ampere", + "uuid": "GPU-861554d7-d187-39e9-e77c-881f0287b963" + } + ], + "cudaVersion": "13.2", + "writerId": "d9afasti9jrko2ov39xld8rscjpj5kuo" +} \ No newline at end of file diff --git a/wandb/run-20260502_204834-03roqvb7/files/wandb-summary.json b/wandb/run-20260502_204834-03roqvb7/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..052f52131aec5695120a4b23773632e554af958a --- /dev/null +++ b/wandb/run-20260502_204834-03roqvb7/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":737},"_runtime":737} \ No newline at end of file diff --git a/wandb/run-20260502_204834-03roqvb7/logs/debug-core.log b/wandb/run-20260502_204834-03roqvb7/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..2068cf53ca479463360cd8275396077e898771e9 --- /dev/null +++ b/wandb/run-20260502_204834-03roqvb7/logs/debug-core.log @@ -0,0 +1,15 @@ +{"time":"2026-05-02T20:48:34.930621946+05:30","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmph57wu8xf/port-215021.txt","pid":215021,"detached":false,"idle-timeout":600000000000,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-05-02T20:48:34.931081601+05:30","level":"INFO","msg":"server: will exit if parent process dies","ppid":215021} +{"time":"2026-05-02T20:48:34.931077444+05:30","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-215021-215135-2695649495/socket","Net":"unix"}} +{"time":"2026-05-02T20:48:35.120441573+05:30","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-05-02T20:48:35.128958855+05:30","level":"INFO","msg":"handleInformInit: received","streamId":"03roqvb7","id":"1(@)"} +{"time":"2026-05-02T20:48:35.859884094+05:30","level":"INFO","msg":"handleInformInit: stream started","streamId":"03roqvb7","id":"1(@)"} +{"time":"2026-05-02T20:48:41.936699578+05:30","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"hi8fq674uelj"} +{"time":"2026-05-02T21:00:54.697392816+05:30","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-05-02T21:00:54.697906604+05:30","level":"INFO","msg":"server is shutting down"} +{"time":"2026-05-02T21:00:54.69801092+05:30","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-05-02T21:00:54.698252145+05:30","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-215021-215135-2695649495/socket","Net":"unix"}} +{"time":"2026-05-02T21:00:54.698418057+05:30","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-05-02T21:00:57.460304896+05:30","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-05-02T21:00:57.460506165+05:30","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-05-02T21:00:57.460686114+05:30","level":"INFO","msg":"server is closed"} diff --git a/wandb/run-20260502_204834-03roqvb7/logs/debug-internal.log b/wandb/run-20260502_204834-03roqvb7/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..de770ac9760899b861d4df6c4717db14fffd5894 --- /dev/null +++ b/wandb/run-20260502_204834-03roqvb7/logs/debug-internal.log @@ -0,0 +1,114 @@ +{"time":"2026-05-02T20:48:35.129242619+05:30","level":"INFO","msg":"wandb-core"} +{"time":"2026-05-02T20:48:35.130040552+05:30","level":"INFO","msg":"stream: starting","core version":"0.26.1"} +{"time":"2026-05-02T20:48:35.859139621+05:30","level":"INFO","msg":"stream: created new stream","id":"03roqvb7"} +{"time":"2026-05-02T20:48:35.859390614+05:30","level":"INFO","msg":"handler: started"} +{"time":"2026-05-02T20:48:35.859852724+05:30","level":"INFO","msg":"stream: started"} +{"time":"2026-05-02T20:48:35.859881278+05:30","level":"INFO","msg":"writer: started","stream_id":"03roqvb7"} +{"time":"2026-05-02T20:48:35.859896797+05:30","level":"INFO","msg":"sender: started"} +{"time":"2026-05-02T20:48:36.944566671+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"console_offset":0,"console_lines":2} +{"time":"2026-05-02T20:48:37.702777822+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:48:51.945660744+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":0,"events_lines":2,"console_offset":1,"console_lines":7,"uploaded_len":2} +{"time":"2026-05-02T20:48:52.40980667+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:49:06.944922521+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":2,"events_lines":2,"console_offset":7,"console_lines":1} +{"time":"2026-05-02T20:49:07.349453577+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:49:21.945075376+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":4,"events_lines":2,"console_offset":7,"console_lines":1} +{"time":"2026-05-02T20:49:22.388726942+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:49:36.945174109+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":6,"events_lines":2,"console_offset":7,"console_lines":1} +{"time":"2026-05-02T20:49:37.449000739+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:49:51.945311806+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":8,"events_lines":2,"console_offset":7,"console_lines":1} +{"time":"2026-05-02T20:49:52.501720519+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:50:06.945018511+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":10,"events_lines":2,"console_offset":7,"console_lines":1} +{"time":"2026-05-02T20:50:07.461090854+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:50:21.945397031+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":12,"events_lines":2,"console_offset":7,"console_lines":1} +{"time":"2026-05-02T20:50:22.49283057+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:50:36.944969563+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":14,"events_lines":2,"console_offset":7,"console_lines":1} +{"time":"2026-05-02T20:50:37.463745689+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:50:51.944724218+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":16,"events_lines":2,"console_offset":7,"console_lines":1} +{"time":"2026-05-02T20:50:52.367858171+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:51:06.945930678+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":18,"events_lines":2,"console_offset":7,"console_lines":1} +{"time":"2026-05-02T20:51:07.432780403+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:51:21.944942504+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":20,"events_lines":2,"console_offset":7,"console_lines":1} +{"time":"2026-05-02T20:51:22.332487128+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:51:36.945846223+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":22,"events_lines":2,"console_offset":7,"console_lines":1} +{"time":"2026-05-02T20:51:37.404165785+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:51:51.945493757+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":24,"events_lines":2,"console_offset":7,"console_lines":3} +{"time":"2026-05-02T20:51:52.469414171+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:52:06.944919632+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":26,"events_lines":2} +{"time":"2026-05-02T20:52:07.359068853+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:52:21.946083276+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":28,"events_lines":2,"console_offset":10,"console_lines":8} +{"time":"2026-05-02T20:52:22.336822617+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:52:36.945007868+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":30,"events_lines":2} +{"time":"2026-05-02T20:52:37.34805534+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:52:51.945393411+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":32,"events_lines":2} +{"time":"2026-05-02T20:52:52.477559466+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:53:06.945318307+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":34,"events_lines":2} +{"time":"2026-05-02T20:53:07.632809552+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:53:21.945448179+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":36,"events_lines":2} +{"time":"2026-05-02T20:53:22.590526844+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:53:36.945389386+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":38,"events_lines":2} +{"time":"2026-05-02T20:53:37.431182892+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:53:51.946139386+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":40,"events_lines":2} +{"time":"2026-05-02T20:53:52.589437248+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:54:06.945248558+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":42,"events_lines":2} +{"time":"2026-05-02T20:54:07.433536563+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:54:21.945555365+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":44,"events_lines":2} +{"time":"2026-05-02T20:54:22.38779521+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:54:36.945538192+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":46,"events_lines":2} +{"time":"2026-05-02T20:54:37.43753369+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:54:51.944861636+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":48,"events_lines":2} +{"time":"2026-05-02T20:54:52.388840848+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:55:06.945422783+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":50,"events_lines":2,"console_offset":18,"console_lines":1} +{"time":"2026-05-02T20:55:07.383938654+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:55:21.945872118+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":52,"events_lines":2} +{"time":"2026-05-02T20:55:22.391150616+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:55:36.945718217+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":54,"events_lines":2} +{"time":"2026-05-02T20:55:37.445794315+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:55:51.945816782+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":56,"events_lines":2} +{"time":"2026-05-02T20:55:52.397251846+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:56:06.945138053+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":58,"events_lines":2} +{"time":"2026-05-02T20:56:07.341460184+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:56:21.945766545+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":60,"events_lines":2} +{"time":"2026-05-02T20:56:22.345069129+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:56:36.94579641+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":62,"events_lines":2} +{"time":"2026-05-02T20:56:37.350482744+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:56:51.944858122+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":64,"events_lines":2} +{"time":"2026-05-02T20:56:52.401035789+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:57:06.945129713+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":66,"events_lines":2,"console_offset":19,"console_lines":1} +{"time":"2026-05-02T20:57:07.453860642+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:57:21.945440908+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":68,"events_lines":2} +{"time":"2026-05-02T20:57:22.40680318+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:57:36.945157583+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":70,"events_lines":2} +{"time":"2026-05-02T20:57:37.355267363+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:57:51.944829675+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":72,"events_lines":2} +{"time":"2026-05-02T20:57:52.408742381+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:58:06.945229317+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":74,"events_lines":2} +{"time":"2026-05-02T20:58:07.317076931+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:58:21.945517409+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":76,"events_lines":2} +{"time":"2026-05-02T20:58:22.412318815+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:58:36.945560519+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":78,"events_lines":2} +{"time":"2026-05-02T20:58:37.363399075+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:58:51.944610018+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":80,"events_lines":2,"console_offset":20,"console_lines":1} +{"time":"2026-05-02T20:58:52.32787722+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:59:06.944944367+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":82,"events_lines":2} +{"time":"2026-05-02T20:59:07.365877301+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:59:21.945808824+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":84,"events_lines":2} +{"time":"2026-05-02T20:59:22.419105534+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:59:36.944653167+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":86,"events_lines":2} +{"time":"2026-05-02T20:59:37.368734821+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T20:59:51.945167497+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":88,"events_lines":2,"console_offset":21,"console_lines":1} +{"time":"2026-05-02T20:59:52.422732085+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:00:06.945559936+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":90,"events_lines":2,"console_offset":22,"console_lines":4} +{"time":"2026-05-02T21:00:07.37219515+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:00:21.94520128+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":92,"events_lines":2} +{"time":"2026-05-02T21:00:22.425136152+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:00:36.946236278+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":94,"events_lines":2,"console_offset":26,"console_lines":4} +{"time":"2026-05-02T21:00:37.376742283+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:00:51.945363524+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":96,"events_lines":2,"console_offset":30,"console_lines":10} +{"time":"2026-05-02T21:00:52.53408923+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:00:56.93579104+05:30","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-05-02T21:00:56.938995947+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"console_offset":39,"console_lines":129,"uploaded_len":3,"complete":true,"exit_code":1} +{"time":"2026-05-02T21:00:57.448099758+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:00:57.449271776+05:30","level":"INFO","msg":"stream: finishing up"} +{"time":"2026-05-02T21:00:57.449739727+05:30","level":"INFO","msg":"handler: closed"} +{"time":"2026-05-02T21:00:57.451176844+05:30","level":"INFO","msg":"sender: closed"} +{"time":"2026-05-02T21:00:57.451677036+05:30","level":"INFO","msg":"stream: all finished"} diff --git a/wandb/run-20260502_204834-03roqvb7/logs/debug.log b/wandb/run-20260502_204834-03roqvb7/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..730fa2e241fb293d811fea3064b049c0d2a82b7b --- /dev/null +++ b/wandb/run-20260502_204834-03roqvb7/logs/debug.log @@ -0,0 +1,24 @@ +2026-05-02 20:48:34,738 INFO MainThread:215021 [wandb_setup.py:_flush():81] Current SDK version is 0.26.1 +2026-05-02 20:48:34,738 INFO MainThread:215021 [wandb_setup.py:_flush():81] Configure stats pid to 215021 +2026-05-02 20:48:34,738 INFO MainThread:215021 [wandb_setup.py:_flush():81] Loading settings from environment variables +2026-05-02 20:48:34,739 INFO MainThread:215021 [wandb_init.py:setup_run_log_directory():723] Logging user logs to /run/media/morpheuslord/Personal_Files/Projects/Rewriter/wandb/run-20260502_204834-03roqvb7/logs/debug.log +2026-05-02 20:48:34,739 INFO MainThread:215021 [wandb_init.py:setup_run_log_directory():724] Logging internal logs to /run/media/morpheuslord/Personal_Files/Projects/Rewriter/wandb/run-20260502_204834-03roqvb7/logs/debug-internal.log +2026-05-02 20:48:34,739 INFO MainThread:215021 [wandb_init.py:init():850] calling init triggers +2026-05-02 20:48:34,739 INFO MainThread:215021 [wandb_init.py:init():855] wandb.init called with sweep_config: {} +config: {'model': {'key': 'flan-t5-base', 'quantize': False, 'use_lora': True}, 'lora': {'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'target_modules': ['q', 'v', 'k', 'o', 'wi_0', 'wi_1', 'wo']}, 'data': {'train_path': 'data/processed/train.jsonl', 'val_path': 'data/processed/val.jsonl', 'test_path': 'data/processed/test.jsonl', 'max_input_length': 256, 'max_target_length': 256, 'augment_synthetic': True, 'synthetic_ratio': 0.3}, 'training': {'output_dir': 'checkpoints/', 'num_train_epochs': 3, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'gradient_accumulation_steps': 4, 'learning_rate': 0.0003, 'lr_scheduler_type': 'cosine', 'warmup_ratio': 0.05, 'weight_decay': 0.01, 'fp16': False, 'bf16': True, 'evaluation_strategy': 'steps', 'eval_steps': 200, 'save_strategy': 'steps', 'save_steps': 200, 'save_total_limit': 3, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_loss', 'greater_is_better': False, 'logging_dir': 'logs/', 'logging_steps': 50, 'report_to': ['wandb', 'tensorboard'], 'dataloader_num_workers': 4, 'seed': 42, 'push_to_hub': False}, 'loss': {'lambda_style': 0.3, 'lambda_semantic': 0.5, 'lambda_human_pattern': 0.4, 'sem_model_name': 'all-mpnet-base-v2'}, 'generation': {'num_beams': 5, 'length_penalty': 1.0, 'no_repeat_ngram_size': 3, 'min_length': 10, 'max_new_tokens': 512, 'early_stopping': True}, 'human_pattern': {'classifier_path': 'checkpoints/human_pattern_classifier.pt', 'shanegerami_path': 'data/raw/shanegerami/AI_Human.csv', 'starblasters_path': 'data/raw/starblasters8/data.parquet', 'max_samples_per_source': 50000, 'pretrain_epochs': 20, 'pretrain_lr': 0.001, 'pretrain_batch_size': 512, 'target_auc': 0.88}, '_wandb': {}} +2026-05-02 20:48:34,739 INFO MainThread:215021 [wandb_init.py:init():898] starting backend +2026-05-02 20:48:35,120 INFO MainThread:215021 [wandb_init.py:init():913] sending inform_init request +2026-05-02 20:48:35,860 INFO MainThread:215021 [wandb_init.py:init():918] backend started and connected +2026-05-02 20:48:35,861 INFO MainThread:215021 [wandb_init.py:init():988] updated telemetry +2026-05-02 20:48:35,863 INFO MainThread:215021 [wandb_init.py:init():1011] communicating run to backend with 90.0 second timeout +2026-05-02 20:48:36,783 INFO MainThread:215021 [wandb_init.py:init():1056] starting run threads in backend +2026-05-02 20:48:36,934 INFO MainThread:215021 [wandb_run.py:_console_start():2554] atexit reg +2026-05-02 20:48:36,934 INFO MainThread:215021 [wandb_run.py:_redirect():2403] redirect: wrap_raw +2026-05-02 20:48:36,934 INFO MainThread:215021 [wandb_run.py:_redirect():2472] Wrapping output streams. +2026-05-02 20:48:36,934 INFO MainThread:215021 [wandb_run.py:_redirect():2495] Redirects installed. +2026-05-02 20:48:36,936 INFO MainThread:215021 [wandb_init.py:init():1094] run started, returning control to user process +2026-05-02 21:00:49,529 INFO MainThread:215021 [wandb_run.py:_config_callback():1415] config_cb None None {'peft_config': {'default': {'task_type': 'SEQ_2_SEQ_LM', 'peft_type': 'LORA', 'auto_mapping': None, 'peft_version': '0.19.1', 'base_model_name_or_path': 'google/flan-t5-base', 'revision': None, 'inference_mode': False, 'r': 16, 'target_modules': ['v', 'o', 'q', 'k', 'wi_0', 'wo', 'wi_1'], 'exclude_modules': None, 'lora_alpha': 32, 'lora_dropout': 0.05, 'fan_in_fan_out': False, 'bias': 'none', 'use_rslora': False, 'modules_to_save': None, 'init_lora_weights': True, 'layers_to_transform': None, 'layers_pattern': None, 'rank_pattern': {}, 'alpha_pattern': {}, 'megatron_config': None, 'megatron_core': 'megatron.core', 'trainable_token_indices': None, 'loftq_config': {}, 'eva_config': None, 'corda_config': None, 'lora_ga_config': None, 'use_dora': False, 'alora_invocation_tokens': None, 'use_qalora': False, 'qalora_group_size': 16, 'layer_replication': None, 'runtime_config': {'ephemeral_gpu_offload': False}, 'lora_bias': False, 'target_parameters': None, 'use_bdlora': None, 'arrow_config': None, 'ensure_weight_tying': False}}, 'vocab_size': 32128, 'd_model': 768, 'd_kv': 64, 'd_ff': 2048, 'num_layers': 12, 'num_decoder_layers': 12, 'num_heads': 12, 'relative_attention_num_buckets': 32, 'relative_attention_max_distance': 128, 'dropout_rate': 0.1, 'classifier_dropout': 0.0, 'layer_norm_epsilon': 1e-06, 'initializer_factor': 1.0, 'feed_forward_proj': 'gated-gelu', 'use_cache': True, 'dense_act_fn': 'gelu_new', 'is_gated_act': True, 'return_dict': True, 'output_hidden_states': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['T5ForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': 1, 'sep_token_id': None, 'decoder_start_token_id': 0, 'task_specific_params': {'summarization': {'early_stopping': True, 'length_penalty': 2.0, 'max_length': 200, 'min_length': 30, 'no_repeat_ngram_size': 3, 'num_beams': 4, 'prefix': 'summarize: '}, 'translation_en_to_de': {'early_stopping': True, 'max_length': 300, 'num_beams': 4, 'prefix': 'translate English to German: '}, 'translation_en_to_fr': {'early_stopping': True, 'max_length': 300, 'num_beams': 4, 'prefix': 'translate English to French: '}, 'translation_en_to_ro': {'early_stopping': True, 'max_length': 300, 'num_beams': 4, 'prefix': 'translate English to Romanian: '}}, 'problem_type': None, '_name_or_path': 'google/flan-t5-base', 'transformers_version': '4.53.2', 'model_type': 't5', 'n_positions': 512, 'output_past': True, 'output_attentions': False, 'output_dir': 'checkpoints/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 0.0003, 'weight_decay': 0.01, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3, 'max_steps': -1, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.05, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'logs/', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 50, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 200, 'save_total_limit': 3, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 200, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'checkpoints/', 'disable_tqdm': False, 'remove_unused_columns': False, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_loss', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb', 'tensorboard'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': None, 'hub_always_push': False, 'hub_revision': None, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'liger_kernel_config': None, 'eval_use_gather_object': False, 'average_tokens_across_devices': False} +2026-05-02 21:00:49,584 INFO MainThread:215021 [wandb_config.py:__setitem__():155] [no run ID] config set model/num_parameters = 254360832 - > +2026-05-02 21:00:49,585 INFO MainThread:215021 [wandb_run.py:_config_callback():1415] config_cb model/num_parameters 254360832 None +2026-05-02 21:00:54,700 INFO wandb-AsyncioManager-main:215021 [service_client.py:_forward_responses():134] Reached EOF. +2026-05-02 21:00:54,701 INFO wandb-AsyncioManager-main:215021 [mailbox.py:close():155] Closing mailbox, abandoning 1 handles. diff --git a/wandb/run-20260502_210534-j0t4q38m/files/config.yaml b/wandb/run-20260502_210534-j0t4q38m/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e0601a8ae25dd3f7b476c44d64bf9f12134aea95 --- /dev/null +++ b/wandb/run-20260502_210534-j0t4q38m/files/config.yaml @@ -0,0 +1,152 @@ +_wandb: + value: + cli_version: 0.26.1 + e: + vj7iupa1nplsn4rj59u3092ryg7aihk3: + args: + - --config + - configs/training_config.yaml + - --use-v2-loss + codePath: scripts/train.py + codePathLocal: scripts/train.py + cpu_count: 8 + cpu_count_logical: 16 + cudaVersion: "13.2" + disk: + /: + total: "60248064" + used: "60248064" + email: morpheuslord@protonmail.com + executable: /usr/bin/python3 + gpu: NVIDIA GeForce RTX 3050 Laptop GPU + gpu_count: 1 + gpu_nvidia: + - architecture: Ampere + cudaCores: 2048 + memoryTotal: "4294967296" + name: NVIDIA GeForce RTX 3050 Laptop GPU + uuid: GPU-861554d7-d187-39e9-e77c-881f0287b963 + host: bazzite + memory: + total: "32944783360" + os: Linux-6.17.7-ba29.fc43.x86_64-x86_64-with-glibc2.42 + program: /run/media/morpheuslord/Personal_Files/Projects/Rewriter/scripts/train.py + python: CPython 3.14.3 + root: /run/media/morpheuslord/Personal_Files/Projects/Rewriter + startedAt: "2026-05-02T15:35:34.656190Z" + writerId: vj7iupa1nplsn4rj59u3092ryg7aihk3 + m: [] + python_version: 3.14.3 + t: + "1": + - 1 + - 5 + - 11 + - 33 + - 41 + - 49 + - 51 + - 53 + - 71 + - 75 + - 98 + "2": + - 1 + - 5 + - 11 + - 33 + - 41 + - 49 + - 51 + - 53 + - 71 + - 75 + - 98 + "3": + - 13 + - 16 + "4": 3.14.3 + "5": 0.26.1 + "6": 4.53.2 + "12": 0.26.1 + "13": linux-x86_64 +data: + value: + augment_synthetic: true + max_input_length: 256 + max_target_length: 256 + synthetic_ratio: 0.3 + test_path: data/processed/test.jsonl + train_path: data/processed/train.jsonl + val_path: data/processed/val.jsonl +generation: + value: + early_stopping: true + length_penalty: 1 + max_new_tokens: 512 + min_length: 10 + no_repeat_ngram_size: 3 + num_beams: 5 +human_pattern: + value: + classifier_path: checkpoints/human_pattern_classifier.pt + max_samples_per_source: 50000 + pretrain_batch_size: 512 + pretrain_epochs: 20 + pretrain_lr: 0.001 + shanegerami_path: data/raw/shanegerami/AI_Human.csv + starblasters_path: data/raw/starblasters8/data.parquet + target_auc: 0.88 +lora: + value: + lora_alpha: 32 + lora_dropout: 0.05 + r: 16 + target_modules: + - q + - v + - k + - o + - wi_0 + - wi_1 + - wo +loss: + value: + lambda_human_pattern: 0.4 + lambda_semantic: 0.5 + lambda_style: 0.3 + sem_model_name: all-mpnet-base-v2 +model: + value: + key: flan-t5-base + quantize: false + use_lora: true +training: + value: + bf16: true + dataloader_num_workers: 4 + eval_steps: 200 + evaluation_strategy: steps + fp16: false + gradient_accumulation_steps: 8 + greater_is_better: false + learning_rate: 0.0003 + load_best_model_at_end: true + logging_dir: logs/ + logging_steps: 50 + lr_scheduler_type: cosine + metric_for_best_model: eval_loss + num_train_epochs: 3 + output_dir: checkpoints/ + per_device_eval_batch_size: 4 + per_device_train_batch_size: 4 + push_to_hub: false + report_to: + - wandb + - tensorboard + save_steps: 200 + save_strategy: steps + save_total_limit: 3 + seed: 42 + warmup_ratio: 0.05 + weight_decay: 0.01 diff --git a/wandb/run-20260502_210534-j0t4q38m/files/output.log b/wandb/run-20260502_210534-j0t4q38m/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..514a9fe6b0254958d26cc95a06c84d48c8a34017 --- /dev/null +++ b/wandb/run-20260502_210534-j0t4q38m/files/output.log @@ -0,0 +1,41 @@ +2026-05-02 21:05:40.793 | INFO  | __main__:train:59 - Step 3: Loading model and tokenizer... +2026-05-02 21:05:40.796 | INFO  | src.model.base_model:load_model_and_tokenizer:56 - Loading model: google/flan-t5-base (seq2seq=True, quantize=False, lora=True) +2026-05-02 21:05:45.395 | INFO  | src.model.base_model:load_model_and_tokenizer:100 - Model loaded on cuda with dtype torch.bfloat16 +2026-05-02 21:06:04.507 | INFO  | src.model.base_model:load_model_and_tokenizer:128 - LoRA applied: 6,782,976 trainable params / 254,360,832 total (2.67%) +2026-05-02 21:06:04.509 | INFO  | __main__:train:72 - Step 4: Creating style fingerprinter... +2026-05-02 21:06:08.700 | INFO  | src.style.fingerprinter:__init__:100 - StyleFingerprinter initialised (AWL size: 549) +2026-05-02 21:06:08.702 | INFO  | __main__:train:79 - Step 5: Loading datasets... +2026-05-02 21:06:09.241 | INFO  | src.training.dataset:__init__:75 - Loaded 7555 examples from data/processed/train.jsonl +2026-05-02 21:06:11.169 | INFO  | src.training.dataset:_add_synthetic:127 - Added 2125 synthetic augmentation examples +2026-05-02 21:06:11.170 | INFO  | src.training.dataset:__init__:81 - Total dataset size: 9680 examples +2026-05-02 21:06:11.173 | INFO  | src.training.dataset:_precompute_all:133 - Pre-computing tokenisation and style vectors for all examples... +2026-05-02 21:09:08.232 | INFO  | src.training.dataset:_precompute_all:180 -  Pre-computed: 2000/9680 +2026-05-02 21:12:19.101 | INFO  | src.training.dataset:_precompute_all:180 -  Pre-computed: 4000/9680 +2026-05-02 21:13:08.144 | INFO  | src.training.dataset:_precompute_all:180 -  Pre-computed: 6000/9680 +2026-05-02 21:15:18.499 | INFO  | src.training.dataset:_precompute_all:180 -  Pre-computed: 8000/9680 +2026-05-02 21:15:28.475 | INFO  | src.training.dataset:_precompute_all:182 - Pre-computation complete (7231 unique style vectors) +2026-05-02 21:15:28.480 | INFO  | src.training.dataset:__init__:75 - Loaded 839 examples from data/processed/val.jsonl +2026-05-02 21:15:28.480 | INFO  | src.training.dataset:__init__:81 - Total dataset size: 839 examples +2026-05-02 21:15:28.480 | INFO  | src.training.dataset:_precompute_all:133 - Pre-computing tokenisation and style vectors for all examples... +2026-05-02 21:16:20.072 | INFO  | src.training.dataset:_precompute_all:182 - Pre-computation complete (833 unique style vectors) +2026-05-02 21:16:20.072 | INFO  | __main__:train:99 - Train: 9680 | Val: 839 +2026-05-02 21:16:20.072 | INFO  | __main__:train:122 - Using CE-only loss (aux models skipped to save memory) +2026-05-02 21:16:20.072 | INFO  | __main__:train:125 - Step 7: Creating training arguments... +Traceback (most recent call last): + File "/run/media/morpheuslord/Personal_Files/Projects/Rewriter/scripts/train.py", line 211, in + train() + ~~~~~^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/click/core.py", line 1485, in __call__ + return self.main(*args, **kwargs) + ~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/click/core.py", line 1406, in main + rv = self.invoke(ctx) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/click/core.py", line 1269, in invoke + return ctx.invoke(self.callback, **ctx.params) + ~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/click/core.py", line 824, in invoke + return callback(*args, **kwargs) + File "/run/media/morpheuslord/Personal_Files/Projects/Rewriter/scripts/train.py", line 130, in train + if device == "cuda": + ^^^^^^ +NameError: name 'device' is not defined diff --git a/wandb/run-20260502_210534-j0t4q38m/files/requirements.txt b/wandb/run-20260502_210534-j0t4q38m/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..ae63c201cc2d6bd6485b666008506c9fdde380ed --- /dev/null +++ b/wandb/run-20260502_210534-j0t4q38m/files/requirements.txt @@ -0,0 +1,362 @@ +Pygments==2.19.2 +pluggy==1.6.0 +iniconfig==2.3.0 +pytest==9.0.2 +sqlite-vec==0.1.6 +peewee==3.19.0 +boltons==21.0.0 +zipp==3.23.0 +wrapt==1.17.3 +tree-sitter-python==0.25.0 +tree-sitter-javascript==0.25.0 +tree-sitter-java==0.23.5 +tree-sitter-cpp==0.23.4 +tree-sitter-c==0.24.1 +tree-sitter==0.25.2 +tqdm==4.67.3 +tomli==2.0.2 +tabulate==0.10.0 +stevedore==5.7.0 +sniffio==1.3.1 +smmap==5.0.3 +shellingham==1.5.4 +semantic-version==2.10.0 +ruamel.yaml.clib==0.2.14 +ruamel.yaml==0.19.1 +python-multipart==0.0.22 +python-dotenv==1.2.2 +PyJWT==2.11.0 +protobuf==6.33.5 +opentelemetry-util-http==0.58b0 +networkx==3.6.1 +mdurl==0.1.2 +MarkupSafe==3.0.3 +jiter==0.13.0 +hyperframe==6.1.0 +httpx-sse==0.4.3 +hpack==4.1.0 +face==26.0.0 +exceptiongroup==1.2.2 +docstring_parser==0.17.0 +colorama==0.4.6 +bracex==2.6 +annotated-doc==0.0.4 +aiofiles==25.1.0 +wcmatch==8.5.2 +uvicorn==0.41.0 +starlette==0.52.1 +opentelemetry-proto==1.37.0 +markdown-it-py==4.0.0 +Jinja2==3.1.6 +importlib_metadata==8.7.1 +h2==4.3.0 +googleapis-common-protos==1.73.0 +glom==25.12.0 +gitdb==4.0.12 +click-option-group==0.5.9 +sse-starlette==3.3.2 +rich==14.3.3 +pydantic-settings==2.13.1 +opentelemetry-exporter-otlp-proto-common==1.37.0 +opentelemetry-api==1.37.0 +openai==2.26.0 +jsonschema==4.25.1 +GitPython==3.1.46 +typer==0.23.1 +opentelemetry-semantic-conventions==0.58b0 +bandit==1.9.4 +opentelemetry-sdk==1.37.0 +opentelemetry-instrumentation==0.58b0 +mcp==1.23.3 +agno==2.5.9 +opentelemetry-instrumentation-threading==0.58b0 +opentelemetry-instrumentation-requests==0.58b0 +opentelemetry-exporter-otlp-proto-http==1.37.0 +semgrep==1.154.0 +acvas==1.0.0 +Werkzeug==3.1.8 +websockets==13.1 +wcwidth==0.6.0 +sqlparse==0.5.5 +semver==3.0.4 +itsdangerous==2.2.0 +frida==17.9.1 +click==8.3.2 +blinker==1.9.0 +prompt_toolkit==3.0.52 +Flask==3.1.3 +delegator.py==0.1.1 +frida-tools==14.8.1 +cli_helpers==2.12.0 +litecli==1.17.1 +objection==1.12.4 +userpath==1.9.2 +platformdirs==4.9.4 +argcomplete==3.6.3 +pipx==1.11.1 +distlib==0.4.0 +filelock==3.25.2 +python-discovery==1.2.1 +virtualenv==21.2.0 +pyelftools==0.32 +cigam==0.0.3 +xmltodict==1.0.4 +apkutils2==1.0.0 +svgwrite==1.4.3 +rdflib==7.6.0 +python-louvain==0.16 +wheel==0.46.3 +pip==26.0.1 +threadpoolctl==3.6.0 +scipy==1.17.1 +joblib==1.5.3 +scikit-learn==1.8.0 +pandas==3.0.2 +seaborn==0.13.2 +text-unidecode==1.3 +fastjsonschema==2.21.2 +traitlets==5.14.3 +python-slugify==8.0.4 +bleach==6.3.0 +mdit-py-plugins==0.5.0 +kagglesdk==0.1.23 +jupyter_core==5.9.1 +nbformat==5.10.4 +jupytext==1.19.1 +kaggle==2.1.0 +xxhash==3.7.0 +pyarrow==24.0.0 +propcache==0.4.1 +multidict==6.7.1 +hf-xet==1.4.3 +fsspec==2026.2.0 +frozenlist==1.8.0 +dill==0.4.1 +aiohappyeyeballs==2.6.1 +yarl==1.23.0 +multiprocess==0.70.19 +aiosignal==1.4.0 +aiohttp==3.13.5 +datasets==4.8.5 +torchaudio==2.11.0 +nvidia-cusparselt-cu13==0.8.0 +mpmath==1.3.0 +cuda-toolkit==13.0.2 +wasabi==1.1.3 +uvloop==0.22.1 +triton==3.6.0 +toml==0.10.2 +tensorboard-data-server==0.7.2 +sympy==1.14.0 +spacy-loggers==1.0.5 +spacy-legacy==3.0.12 +spacy-alignments==0.9.1 +smart_open==7.6.0 +setuptools==81.0.0 +sentencepiece==0.2.1 +safetensors==0.7.0 +pyspellchecker==0.9.0 +pyphen==0.17.2 +portalocker==3.2.0 +nvidia-nvtx==13.0.85 +nvidia-nvshmem-cu13==3.4.5 +nvidia-nvjitlink==13.0.88 +nvidia-nccl-cu13==2.28.9 +nvidia-curand==10.4.0.35 +nvidia-cufile==1.15.1.6 +nvidia-cuda-runtime==13.0.96 +nvidia-cuda-nvrtc==13.0.88 +nvidia-cuda-cupti==13.0.85 +nvidia-cublas==13.1.0.3 +nltk==3.9.4 +murmurhash==1.0.15 +msgpack==1.1.2 +Markdown==3.10.2 +loguru==0.7.3 +locate==1.1.1 +langcodes==3.5.1 +httptools==0.7.1 +grpcio==1.80.0 +ftfy==6.3.1 +faiss-cpu==1.13.2 +cymem==2.0.13 +cuda-pathfinder==1.5.4 +confection==1.3.3 +cloudpathlib==0.24.0 +catalogue==2.0.10 +blis==1.3.3 +absl-py==2.4.0 +wordfreq==3.1.1 +watchfiles==1.1.1 +textstat==0.7.13 +tensorboard==2.20.0 +srsly==2.5.3 +sacrebleu==2.6.0 +rouge_score==0.1.2 +preshed==3.0.13 +nvidia-cusparse==12.6.3.3 +nvidia-cufft==12.0.0.61 +nvidia-cudnn-cu13==9.19.0.56 +language_tool_python==3.3.0 +huggingface_hub==0.36.2 +cuda-bindings==13.2.0 +wandb==0.26.1 +tokenizers==0.21.4 +thinc==8.3.13 +nvidia-cusolver==12.0.4.66 +fastapi==0.136.1 +weasel==1.0.0 +transformers==4.53.2 +torch==2.11.0 +spacy==3.8.13 +torchvision==0.26.0 +spacy-transformers==1.4.0 +sentence-transformers==5.4.1 +optimum==2.1.0 +errant==3.0.0 +bitsandbytes==0.49.2 +bert-score==0.3.13 +accelerate==1.13.0 +peft==0.19.1 +en_core_web_sm==3.8.0 +tree-sitter-zig==1.1.2 +tree-sitter-verilog==1.0.3 +tree-sitter-typescript==0.23.2 +tree-sitter-swift==0.0.1 +tree-sitter-scala==0.26.0 +tree-sitter-rust==0.24.2 +tree-sitter-ruby==0.23.1 +tree-sitter-powershell==0.26.3 +tree-sitter-php==0.24.1 +tree-sitter-objc==3.0.2 +tree-sitter-lua==0.5.0 +tree-sitter-kotlin==1.1.0 +tree-sitter-julia==0.23.1 +tree-sitter-go==0.25.0 +tree-sitter-elixir==0.3.5 +tree-sitter-c-sharp==0.23.5 +graphifyy==0.6.5 +Brlapi==0.8.7 +PyGObject==3.54.5 +cffi==2.0.0 +contourpy==1.3.3 +cryptography==46.0.7 +dbus-python==1.4.0 +evdev==1.9.3 +fonttools==4.61.0 +gbinder-python==1.3.0 +kiwisolver==1.5.0 +libvirt-python==11.6.0 +lxml==6.0.1 +matplotlib==3.10.8 +numpy==2.3.5 +perf==0.1 +pillow==11.3.0 +psutil==7.0.0 +PyAudio==0.2.13 +pycairo==1.28.0 +pycups==2.0.4 +pydantic_core==2.41.5 +pyscard==2.2.2 +PyYAML==6.0.2 +pyzstd==0.16.2 +RapidFuzz==3.12.2 +regex==2026.2.28 +rpds-py==0.27.0 +rpm==6.0.1 +selinux==3.9 +setools==4.6.0 +setproctitle==1.3.7 +systemd-python==235 +PySocks==1.7.1 +annotated-types==0.7.0 +anyio==4.12.1 +appdirs==1.4.4 +attrs==25.4.0 +beautifulsoup4==4.14.3 +boto3==1.42.84 +botocore==1.42.84 +certifi==2025.7.9 +charset-normalizer==3.4.3 +click==8.1.7 +cockpit==360.1 +configobj==5.0.9 +cssselect==1.3.0 +cupshelpers==1.0 +cycler==0.11.0 +dasbus==1.7 +distro==1.9.0 +fido2==2.0.0 +file-magic==0.4.0 +filelock==3.15.4 +fw-fanctrl==1.0.2 +h11==0.16.0 +httpcore==1.0.9 +httpx==0.28.1 +icoextract==0.2.0 +idna==3.10 +inkex==1.4.0 +input-remapper==2.2.0 +jaraco.classes==3.4.0 +jaraco.context==6.0.1 +jaraco.functools==4.1.0 +jeepney==0.9.0 +jmespath==1.0.1 +jsonschema==4.23.0 +jsonschema-specifications==2024.10.1 +keyring==25.7.0 +langtable==0.0.69 +libevdev==0.12 +louis==3.33.0 +lutris==0.5.22 +moddb==0.12.0 +more-itertools==10.5.0 +nftables==0.1 +olefile==0.47 +packaging==25.0 +pefile==2024.8.26 +pexpect==4.9.0 +pip==25.1.1 +ply==3.11 +protobuf==3.19.6 +ptyprocess==0.7.0 +pycparser==2.22 +pydantic==2.12.5 +pydbus==0.6.0 +pyenchant==3.2.2 +pygdbmi==0.11.0.0 +Pygments==2.19.1 +pyinotify==0.9.6 +pyparsing==3.1.2 +pypresence==4.3.0 +pyserial==3.5 +python-dateutil==2.9.0.post0 +python-linux-procfs==0.7.3 +python-pskc==1.4 +python-xlib==0.33 +pyudev==0.24.3 +pyxdg==0.28 +pyynl==0.0.1 +referencing==0.36.2 +requests==2.32.5 +requests-file==2.0.0 +s3transfer==0.16.0 +scour==0.38.2 +SecretStorage==3.3.3 +sentry-sdk==2.35.0 +sepolicy==3.9 +setroubleshoot==3.3.36 +setuptools==78.1.1 +shtab==1.7.2 +six==1.17.0 +sos==4.11.0 +soupsieve==2.8 +tinycss2==1.5.1 +tldextract==5.3.0 +typing_extensions==4.15.0 +typing-inspection==0.4.2 +udica==0.2.8 +umu-launcher==1.2.9 +urllib3==2.6.3 +webencodings==0.5.1 +yubikey-manager==5.9.0 diff --git a/wandb/run-20260502_210534-j0t4q38m/files/wandb-metadata.json b/wandb/run-20260502_210534-j0t4q38m/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..f28a6f1d35fd20d72eb33bd36221c666fa91127f --- /dev/null +++ b/wandb/run-20260502_210534-j0t4q38m/files/wandb-metadata.json @@ -0,0 +1,41 @@ +{ + "os": "Linux-6.17.7-ba29.fc43.x86_64-x86_64-with-glibc2.42", + "python": "CPython 3.14.3", + "startedAt": "2026-05-02T15:35:34.656190Z", + "args": [ + "--config", + "configs/training_config.yaml", + "--use-v2-loss" + ], + "program": "/run/media/morpheuslord/Personal_Files/Projects/Rewriter/scripts/train.py", + "codePath": "scripts/train.py", + "codePathLocal": "scripts/train.py", + "email": "morpheuslord@protonmail.com", + "root": "/run/media/morpheuslord/Personal_Files/Projects/Rewriter", + "host": "bazzite", + "executable": "/usr/bin/python3", + "cpu_count": 8, + "cpu_count_logical": 16, + "gpu": "NVIDIA GeForce RTX 3050 Laptop GPU", + "gpu_count": 1, + "disk": { + "/": { + "total": "60248064", + "used": "60248064" + } + }, + "memory": { + "total": "32944783360" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA GeForce RTX 3050 Laptop GPU", + "memoryTotal": "4294967296", + "cudaCores": 2048, + "architecture": "Ampere", + "uuid": "GPU-861554d7-d187-39e9-e77c-881f0287b963" + } + ], + "cudaVersion": "13.2", + "writerId": "vj7iupa1nplsn4rj59u3092ryg7aihk3" +} \ No newline at end of file diff --git a/wandb/run-20260502_210534-j0t4q38m/files/wandb-summary.json b/wandb/run-20260502_210534-j0t4q38m/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..bfb9856cb1874c952ae2a13a52296a15b153efed --- /dev/null +++ b/wandb/run-20260502_210534-j0t4q38m/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":641},"_runtime":641} \ No newline at end of file diff --git a/wandb/run-20260502_210534-j0t4q38m/logs/debug-core.log b/wandb/run-20260502_210534-j0t4q38m/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..02bd4e040bc5fda31302a608fdae4e45036b5821 --- /dev/null +++ b/wandb/run-20260502_210534-j0t4q38m/logs/debug-core.log @@ -0,0 +1,15 @@ +{"time":"2026-05-02T21:05:37.04478727+05:30","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp9zisi5jn/port-218982.txt","pid":218982,"detached":false,"idle-timeout":600000000000,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-05-02T21:05:37.052456174+05:30","level":"INFO","msg":"server: will exit if parent process dies","ppid":218982} +{"time":"2026-05-02T21:05:37.052373709+05:30","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-218982-219133-553832284/socket","Net":"unix"}} +{"time":"2026-05-02T21:05:37.14156088+05:30","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-05-02T21:05:37.230365171+05:30","level":"INFO","msg":"handleInformInit: received","streamId":"j0t4q38m","id":"1(@)"} +{"time":"2026-05-02T21:05:38.356150322+05:30","level":"INFO","msg":"handleInformInit: stream started","streamId":"j0t4q38m","id":"1(@)"} +{"time":"2026-05-02T21:05:45.78379369+05:30","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"ff034tnpey41"} +{"time":"2026-05-02T21:16:20.076447485+05:30","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-05-02T21:16:20.076495055+05:30","level":"INFO","msg":"server is shutting down"} +{"time":"2026-05-02T21:16:20.076500795+05:30","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-05-02T21:16:20.076539989+05:30","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-05-02T21:16:20.076551681+05:30","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-218982-219133-553832284/socket","Net":"unix"}} +{"time":"2026-05-02T21:16:22.132160555+05:30","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-05-02T21:16:22.132188898+05:30","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-05-02T21:16:22.132203917+05:30","level":"INFO","msg":"server is closed"} diff --git a/wandb/run-20260502_210534-j0t4q38m/logs/debug-internal.log b/wandb/run-20260502_210534-j0t4q38m/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..94a515f0b8912d76d7de4101461f212ae2524ae1 --- /dev/null +++ b/wandb/run-20260502_210534-j0t4q38m/logs/debug-internal.log @@ -0,0 +1,100 @@ +{"time":"2026-05-02T21:05:37.234190686+05:30","level":"INFO","msg":"wandb-core"} +{"time":"2026-05-02T21:05:37.242320258+05:30","level":"INFO","msg":"stream: starting","core version":"0.26.1"} +{"time":"2026-05-02T21:05:38.349568606+05:30","level":"INFO","msg":"stream: created new stream","id":"j0t4q38m"} +{"time":"2026-05-02T21:05:38.350574811+05:30","level":"INFO","msg":"handler: started"} +{"time":"2026-05-02T21:05:38.355359292+05:30","level":"INFO","msg":"stream: started"} +{"time":"2026-05-02T21:05:38.35549061+05:30","level":"INFO","msg":"writer: started","stream_id":"j0t4q38m"} +{"time":"2026-05-02T21:05:38.35556+05:30","level":"INFO","msg":"sender: started"} +{"time":"2026-05-02T21:05:40.936825956+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"console_offset":0,"console_lines":1} +{"time":"2026-05-02T21:05:41.517207406+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:05:55.937910808+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":0,"events_lines":2,"console_offset":0,"console_lines":3,"uploaded_len":2} +{"time":"2026-05-02T21:05:56.458521673+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:06:10.937424371+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":2,"events_lines":2,"console_offset":3,"console_lines":5} +{"time":"2026-05-02T21:06:11.410126381+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:06:25.936471967+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":4,"events_lines":2,"console_offset":8,"console_lines":3} +{"time":"2026-05-02T21:06:26.461511422+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:06:40.936753857+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":6,"events_lines":2} +{"time":"2026-05-02T21:06:41.720748126+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:06:55.936666621+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":8,"events_lines":2} +{"time":"2026-05-02T21:06:56.411238293+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:07:10.936327662+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":10,"events_lines":2} +{"time":"2026-05-02T21:07:12.235432737+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:07:25.937680731+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":12,"events_lines":2} +{"time":"2026-05-02T21:07:26.471142249+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:07:40.937184957+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":14,"events_lines":2} +{"time":"2026-05-02T21:07:41.625071931+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:07:55.936799029+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":16,"events_lines":2} +{"time":"2026-05-02T21:07:56.473354525+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:08:10.936685855+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":18,"events_lines":2} +{"time":"2026-05-02T21:08:11.557070962+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:08:25.937672733+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":20,"events_lines":2} +{"time":"2026-05-02T21:08:26.475361494+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:08:40.937403866+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":22,"events_lines":2} +{"time":"2026-05-02T21:08:41.32578519+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:08:55.937285352+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":24,"events_lines":2} +{"time":"2026-05-02T21:08:56.328478452+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:09:10.936416745+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":26,"events_lines":2,"console_offset":11,"console_lines":1} +{"time":"2026-05-02T21:09:11.37015592+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:09:25.936996847+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":28,"events_lines":2} +{"time":"2026-05-02T21:09:26.378993224+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:09:40.936300494+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":30,"events_lines":2} +{"time":"2026-05-02T21:09:41.433281934+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:09:55.93643677+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":32,"events_lines":2} +{"time":"2026-05-02T21:09:56.382444512+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:10:10.937221808+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":34,"events_lines":2} +{"time":"2026-05-02T21:10:11.355786432+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:10:25.937097794+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":36,"events_lines":2} +{"time":"2026-05-02T21:10:26.387813182+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:10:40.936994138+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":38,"events_lines":2} +{"time":"2026-05-02T21:10:41.338188084+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:10:55.936853211+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":40,"events_lines":2} +{"time":"2026-05-02T21:10:56.389385642+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:11:10.937142189+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":42,"events_lines":2} +{"time":"2026-05-02T21:11:11.443210814+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:11:25.937540869+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":44,"events_lines":2} +{"time":"2026-05-02T21:11:26.393951371+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:11:40.936498594+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":46,"events_lines":2} +{"time":"2026-05-02T21:11:41.446287355+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:11:55.937324787+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":48,"events_lines":2} +{"time":"2026-05-02T21:11:56.397677735+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:12:10.937373587+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":50,"events_lines":2} +{"time":"2026-05-02T21:12:11.45049789+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:12:25.937004289+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":52,"events_lines":2,"console_offset":12,"console_lines":1} +{"time":"2026-05-02T21:12:26.502961324+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:12:40.936126354+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":54,"events_lines":2} +{"time":"2026-05-02T21:12:41.350362758+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:12:55.936882516+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":56,"events_lines":2} +{"time":"2026-05-02T21:12:56.403462561+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:13:10.936872365+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":58,"events_lines":2,"console_offset":13,"console_lines":1} +{"time":"2026-05-02T21:13:11.355253204+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:13:25.937227431+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":60,"events_lines":2} +{"time":"2026-05-02T21:13:26.407873524+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:13:40.936591922+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":62,"events_lines":2} +{"time":"2026-05-02T21:13:41.358129237+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:13:55.936827744+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":64,"events_lines":2} +{"time":"2026-05-02T21:13:56.514263578+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:14:10.93673107+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":66,"events_lines":2} +{"time":"2026-05-02T21:14:11.361558781+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:14:25.937004793+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":68,"events_lines":2} +{"time":"2026-05-02T21:14:26.414521436+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:14:40.936483279+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":70,"events_lines":2} +{"time":"2026-05-02T21:14:41.366513559+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:14:55.936352881+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":72,"events_lines":2} +{"time":"2026-05-02T21:14:56.301887204+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:15:10.937474415+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":74,"events_lines":2} +{"time":"2026-05-02T21:15:11.370984949+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:15:25.937316725+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":76,"events_lines":2,"console_offset":14,"console_lines":1} +{"time":"2026-05-02T21:15:26.422069265+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:15:40.936617176+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":78,"events_lines":2,"console_offset":15,"console_lines":4} +{"time":"2026-05-02T21:15:41.372355836+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:15:55.937088581+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":80,"events_lines":2} +{"time":"2026-05-02T21:15:56.42507933+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:16:10.936723622+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":82,"events_lines":2} +{"time":"2026-05-02T21:16:11.303754934+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:16:21.61488856+05:30","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-05-02T21:16:21.615181522+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"console_offset":19,"console_lines":22,"uploaded_len":3,"complete":true,"exit_code":1} +{"time":"2026-05-02T21:16:22.127110203+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:16:22.12733649+05:30","level":"INFO","msg":"stream: finishing up"} +{"time":"2026-05-02T21:16:22.1274004+05:30","level":"INFO","msg":"handler: closed"} +{"time":"2026-05-02T21:16:22.127656222+05:30","level":"INFO","msg":"sender: closed"} +{"time":"2026-05-02T21:16:22.127699253+05:30","level":"INFO","msg":"stream: all finished"} diff --git a/wandb/run-20260502_210534-j0t4q38m/logs/debug.log b/wandb/run-20260502_210534-j0t4q38m/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..de012f766a55a2577dce003fa69f2b2226b1cf90 --- /dev/null +++ b/wandb/run-20260502_210534-j0t4q38m/logs/debug.log @@ -0,0 +1,21 @@ +2026-05-02 21:05:34,716 INFO MainThread:218982 [wandb_setup.py:_flush():81] Current SDK version is 0.26.1 +2026-05-02 21:05:34,717 INFO MainThread:218982 [wandb_setup.py:_flush():81] Configure stats pid to 218982 +2026-05-02 21:05:34,718 INFO MainThread:218982 [wandb_setup.py:_flush():81] Loading settings from environment variables +2026-05-02 21:05:34,720 INFO MainThread:218982 [wandb_init.py:setup_run_log_directory():723] Logging user logs to /run/media/morpheuslord/Personal_Files/Projects/Rewriter/wandb/run-20260502_210534-j0t4q38m/logs/debug.log +2026-05-02 21:05:34,722 INFO MainThread:218982 [wandb_init.py:setup_run_log_directory():724] Logging internal logs to /run/media/morpheuslord/Personal_Files/Projects/Rewriter/wandb/run-20260502_210534-j0t4q38m/logs/debug-internal.log +2026-05-02 21:05:34,723 INFO MainThread:218982 [wandb_init.py:init():850] calling init triggers +2026-05-02 21:05:34,724 INFO MainThread:218982 [wandb_init.py:init():855] wandb.init called with sweep_config: {} +config: {'model': {'key': 'flan-t5-base', 'quantize': False, 'use_lora': True}, 'lora': {'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'target_modules': ['q', 'v', 'k', 'o', 'wi_0', 'wi_1', 'wo']}, 'data': {'train_path': 'data/processed/train.jsonl', 'val_path': 'data/processed/val.jsonl', 'test_path': 'data/processed/test.jsonl', 'max_input_length': 256, 'max_target_length': 256, 'augment_synthetic': True, 'synthetic_ratio': 0.3}, 'training': {'output_dir': 'checkpoints/', 'num_train_epochs': 3, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 4, 'gradient_accumulation_steps': 8, 'learning_rate': 0.0003, 'lr_scheduler_type': 'cosine', 'warmup_ratio': 0.05, 'weight_decay': 0.01, 'fp16': False, 'bf16': True, 'evaluation_strategy': 'steps', 'eval_steps': 200, 'save_strategy': 'steps', 'save_steps': 200, 'save_total_limit': 3, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_loss', 'greater_is_better': False, 'logging_dir': 'logs/', 'logging_steps': 50, 'report_to': ['wandb', 'tensorboard'], 'dataloader_num_workers': 4, 'seed': 42, 'push_to_hub': False}, 'loss': {'lambda_style': 0.3, 'lambda_semantic': 0.5, 'lambda_human_pattern': 0.4, 'sem_model_name': 'all-mpnet-base-v2'}, 'generation': {'num_beams': 5, 'length_penalty': 1.0, 'no_repeat_ngram_size': 3, 'min_length': 10, 'max_new_tokens': 512, 'early_stopping': True}, 'human_pattern': {'classifier_path': 'checkpoints/human_pattern_classifier.pt', 'shanegerami_path': 'data/raw/shanegerami/AI_Human.csv', 'starblasters_path': 'data/raw/starblasters8/data.parquet', 'max_samples_per_source': 50000, 'pretrain_epochs': 20, 'pretrain_lr': 0.001, 'pretrain_batch_size': 512, 'target_auc': 0.88}, '_wandb': {}} +2026-05-02 21:05:34,725 INFO MainThread:218982 [wandb_init.py:init():898] starting backend +2026-05-02 21:05:37,142 INFO MainThread:218982 [wandb_init.py:init():913] sending inform_init request +2026-05-02 21:05:38,359 INFO MainThread:218982 [wandb_init.py:init():918] backend started and connected +2026-05-02 21:05:38,372 INFO MainThread:218982 [wandb_init.py:init():988] updated telemetry +2026-05-02 21:05:38,395 INFO MainThread:218982 [wandb_init.py:init():1011] communicating run to backend with 90.0 second timeout +2026-05-02 21:05:38,961 INFO MainThread:218982 [wandb_init.py:init():1056] starting run threads in backend +2026-05-02 21:05:40,768 INFO MainThread:218982 [wandb_run.py:_console_start():2554] atexit reg +2026-05-02 21:05:40,770 INFO MainThread:218982 [wandb_run.py:_redirect():2403] redirect: wrap_raw +2026-05-02 21:05:40,770 INFO MainThread:218982 [wandb_run.py:_redirect():2472] Wrapping output streams. +2026-05-02 21:05:40,772 INFO MainThread:218982 [wandb_run.py:_redirect():2495] Redirects installed. +2026-05-02 21:05:40,792 INFO MainThread:218982 [wandb_init.py:init():1094] run started, returning control to user process +2026-05-02 21:16:20,076 INFO wandb-AsyncioManager-main:218982 [service_client.py:_forward_responses():134] Reached EOF. +2026-05-02 21:16:20,076 INFO wandb-AsyncioManager-main:218982 [mailbox.py:close():155] Closing mailbox, abandoning 1 handles. diff --git a/wandb/run-20260502_212127-vl8pftkj/files/config.yaml b/wandb/run-20260502_212127-vl8pftkj/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3c875de2c4e1e92cb403b34a247699962bc56a7d --- /dev/null +++ b/wandb/run-20260502_212127-vl8pftkj/files/config.yaml @@ -0,0 +1,661 @@ +_name_or_path: + value: google/flan-t5-base +_wandb: + value: + cli_version: 0.26.1 + e: + 52xh0heeb459ptjgbij5mcijfyh3w5cd: + args: + - --config + - configs/training_config.yaml + - --use-v2-loss + codePath: scripts/train.py + codePathLocal: scripts/train.py + cpu_count: 8 + cpu_count_logical: 16 + cudaVersion: "13.2" + disk: + /: + total: "60248064" + used: "60248064" + email: morpheuslord@protonmail.com + executable: /usr/bin/python3 + gpu: NVIDIA GeForce RTX 3050 Laptop GPU + gpu_count: 1 + gpu_nvidia: + - architecture: Ampere + cudaCores: 2048 + memoryTotal: "4294967296" + name: NVIDIA GeForce RTX 3050 Laptop GPU + uuid: GPU-861554d7-d187-39e9-e77c-881f0287b963 + host: bazzite + memory: + total: "32944783360" + os: Linux-6.17.7-ba29.fc43.x86_64-x86_64-with-glibc2.42 + program: /run/media/morpheuslord/Personal_Files/Projects/Rewriter/scripts/train.py + python: CPython 3.14.3 + root: /run/media/morpheuslord/Personal_Files/Projects/Rewriter + startedAt: "2026-05-02T15:51:27.387378Z" + writerId: 52xh0heeb459ptjgbij5mcijfyh3w5cd + m: + - "1": train/global_step + "6": + - 3 + "7": [] + - "2": '*' + "5": 1 + "6": + - 1 + "7": [] + python_version: 3.14.3 + t: + "1": + - 1 + - 5 + - 11 + - 33 + - 41 + - 49 + - 51 + - 53 + - 71 + - 75 + - 98 + "2": + - 1 + - 5 + - 11 + - 33 + - 41 + - 49 + - 51 + - 53 + - 71 + - 75 + - 98 + "3": + - 7 + - 13 + - 16 + - 19 + - 66 + "4": 3.14.3 + "5": 0.26.1 + "6": 4.53.2 + "9": + "1": transformers_trainer + "12": 0.26.1 + "13": linux-x86_64 +accelerator_config: + value: + dispatch_batches: null + even_batches: true + gradient_accumulation_kwargs: null + non_blocking: false + split_batches: false + use_seedable_sampler: true +adafactor: + value: false +adam_beta1: + value: 0.9 +adam_beta2: + value: 0.999 +adam_epsilon: + value: 1e-08 +add_cross_attention: + value: false +architectures: + value: + - T5ForConditionalGeneration +auto_find_batch_size: + value: false +average_tokens_across_devices: + value: false +bad_words_ids: + value: null +batch_eval_metrics: + value: false +begin_suppress_tokens: + value: null +bf16: + value: true +bf16_full_eval: + value: false +bos_token_id: + value: null +chunk_size_feed_forward: + value: 0 +classifier_dropout: + value: 0 +cross_attention_hidden_size: + value: null +d_ff: + value: 2048 +d_kv: + value: 64 +d_model: + value: 768 +data: + value: + augment_synthetic: true + max_input_length: 256 + max_target_length: 256 + synthetic_ratio: 0.3 + test_path: data/processed/test.jsonl + train_path: data/processed/train.jsonl + val_path: data/processed/val.jsonl +data_seed: + value: null +dataloader_drop_last: + value: false +dataloader_num_workers: + value: 0 +dataloader_persistent_workers: + value: false +dataloader_pin_memory: + value: true +dataloader_prefetch_factor: + value: null +ddp_backend: + value: null +ddp_broadcast_buffers: + value: null +ddp_bucket_cap_mb: + value: null +ddp_find_unused_parameters: + value: null +ddp_timeout: + value: 1800 +debug: + value: [] +decoder_start_token_id: + value: 0 +deepspeed: + value: null +dense_act_fn: + value: gelu_new +disable_tqdm: + value: false +diversity_penalty: + value: 0 +do_eval: + value: true +do_predict: + value: false +do_sample: + value: false +do_train: + value: false +dropout_rate: + value: 0.1 +early_stopping: + value: false +encoder_no_repeat_ngram_size: + value: 0 +eos_token_id: + value: 1 +eval_accumulation_steps: + value: null +eval_delay: + value: 0 +eval_do_concat_batches: + value: true +eval_on_start: + value: false +eval_steps: + value: 200 +eval_strategy: + value: steps +eval_use_gather_object: + value: false +exponential_decay_length_penalty: + value: null +feed_forward_proj: + value: gated-gelu +finetuning_task: + value: null +forced_bos_token_id: + value: null +forced_eos_token_id: + value: null +fp16: + value: false +fp16_backend: + value: auto +fp16_full_eval: + value: false +fp16_opt_level: + value: O1 +fsdp: + value: [] +fsdp_config: + value: + min_num_params: 0 + xla: false + xla_fsdp_grad_ckpt: false + xla_fsdp_v2: false +fsdp_min_num_params: + value: 0 +fsdp_transformer_layer_cls_to_wrap: + value: null +full_determinism: + value: false +generation: + value: + early_stopping: true + length_penalty: 1 + max_new_tokens: 512 + min_length: 10 + no_repeat_ngram_size: 3 + num_beams: 5 +gradient_accumulation_steps: + value: 8 +gradient_checkpointing: + value: false +gradient_checkpointing_kwargs: + value: null +greater_is_better: + value: false +group_by_length: + value: false +half_precision_backend: + value: auto +hub_always_push: + value: false +hub_model_id: + value: null +hub_private_repo: + value: null +hub_revision: + value: null +hub_strategy: + value: every_save +hub_token: + value: +human_pattern: + value: + classifier_path: checkpoints/human_pattern_classifier.pt + max_samples_per_source: 50000 + pretrain_batch_size: 512 + pretrain_epochs: 20 + pretrain_lr: 0.001 + shanegerami_path: data/raw/shanegerami/AI_Human.csv + starblasters_path: data/raw/starblasters8/data.parquet + target_auc: 0.88 +id2label: + value: + "0": LABEL_0 + "1": LABEL_1 +ignore_data_skip: + value: false +include_for_metrics: + value: [] +include_inputs_for_metrics: + value: false +include_num_input_tokens_seen: + value: false +include_tokens_per_second: + value: false +initializer_factor: + value: 1 +is_decoder: + value: false +is_encoder_decoder: + value: true +is_gated_act: + value: true +jit_mode_eval: + value: false +label_names: + value: null +label_smoothing_factor: + value: 0 +label2id: + value: + LABEL_0: 0 + LABEL_1: 1 +layer_norm_epsilon: + value: 1e-06 +learning_rate: + value: 0.0003 +length_column_name: + value: length +length_penalty: + value: 1 +liger_kernel_config: + value: null +load_best_model_at_end: + value: true +local_rank: + value: 0 +log_level: + value: passive +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: logs/ +logging_first_step: + value: false +logging_nan_inf_filter: + value: true +logging_steps: + value: 50 +logging_strategy: + value: steps +lora: + value: + lora_alpha: 32 + lora_dropout: 0.05 + r: 16 + target_modules: + - q + - v + - k + - o + - wi_0 + - wi_1 + - wo +loss: + value: + lambda_human_pattern: 0.4 + lambda_semantic: 0.5 + lambda_style: 0.3 + sem_model_name: all-mpnet-base-v2 +lr_scheduler_type: + value: cosine +max_grad_norm: + value: 1 +max_length: + value: 20 +max_steps: + value: -1 +metric_for_best_model: + value: eval_loss +min_length: + value: 0 +model: + value: + key: flan-t5-base + quantize: false + use_lora: true +model/num_parameters: + value: 254360832 +model_type: + value: t5 +mp_parameters: + value: "" +n_positions: + value: 512 +neftune_noise_alpha: + value: null +no_cuda: + value: false +no_repeat_ngram_size: + value: 0 +num_beam_groups: + value: 1 +num_beams: + value: 1 +num_decoder_layers: + value: 12 +num_heads: + value: 12 +num_layers: + value: 12 +num_return_sequences: + value: 1 +num_train_epochs: + value: 3 +optim: + value: adamw_torch +optim_args: + value: null +optim_target_modules: + value: null +output_attentions: + value: false +output_dir: + value: checkpoints/ +output_hidden_states: + value: false +output_past: + value: true +output_scores: + value: false +overwrite_output_dir: + value: false +pad_token_id: + value: 0 +past_index: + value: -1 +peft_config: + value: + default: + alora_invocation_tokens: null + arrow_config: null + auto_mapping: null + base_model_name_or_path: google/flan-t5-base + bias: none + corda_config: null + ensure_weight_tying: false + eva_config: null + exclude_modules: null + fan_in_fan_out: false + inference_mode: false + init_lora_weights: true + layer_replication: null + layers_pattern: null + layers_to_transform: null + lora_alpha: 32 + lora_bias: false + lora_dropout: 0.05 + lora_ga_config: null + megatron_config: null + megatron_core: megatron.core + modules_to_save: null + peft_type: LORA + peft_version: 0.19.1 + qalora_group_size: 16 + r: 16 + revision: null + runtime_config: + ephemeral_gpu_offload: false + target_modules: + - v + - q + - k + - wo + - o + - wi_1 + - wi_0 + target_parameters: null + task_type: SEQ_2_SEQ_LM + trainable_token_indices: null + use_bdlora: null + use_dora: false + use_qalora: false + use_rslora: false +per_device_eval_batch_size: + value: 4 +per_device_train_batch_size: + value: 4 +per_gpu_eval_batch_size: + value: null +per_gpu_train_batch_size: + value: null +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +push_to_hub: + value: false +push_to_hub_model_id: + value: null +push_to_hub_organization: + value: null +push_to_hub_token: + value: +ray_scope: + value: last +relative_attention_max_distance: + value: 128 +relative_attention_num_buckets: + value: 32 +remove_invalid_values: + value: false +remove_unused_columns: + value: false +repetition_penalty: + value: 1 +report_to: + value: + - wandb + - tensorboard +restore_callback_states_from_checkpoint: + value: false +resume_from_checkpoint: + value: null +return_dict: + value: true +return_dict_in_generate: + value: false +run_name: + value: checkpoints/ +save_on_each_node: + value: false +save_only_model: + value: false +save_safetensors: + value: true +save_steps: + value: 200 +save_strategy: + value: steps +save_total_limit: + value: 3 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +suppress_tokens: + value: null +task_specific_params: + value: + summarization: + early_stopping: true + length_penalty: 2 + max_length: 200 + min_length: 30 + no_repeat_ngram_size: 3 + num_beams: 4 + prefix: 'summarize: ' + translation_en_to_de: + early_stopping: true + max_length: 300 + num_beams: 4 + prefix: 'translate English to German: ' + translation_en_to_fr: + early_stopping: true + max_length: 300 + num_beams: 4 + prefix: 'translate English to French: ' + translation_en_to_ro: + early_stopping: true + max_length: 300 + num_beams: 4 + prefix: 'translate English to Romanian: ' +temperature: + value: 1 +tf_legacy_loss: + value: false +tf32: + value: null +tie_encoder_decoder: + value: false +tie_word_embeddings: + value: false +tokenizer_class: + value: null +top_k: + value: 50 +top_p: + value: 1 +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_dtype: + value: bfloat16 +torch_empty_cache_steps: + value: null +torchdynamo: + value: null +torchscript: + value: false +tpu_metrics_debug: + value: false +tpu_num_cores: + value: null +training: + value: + bf16: true + dataloader_num_workers: 4 + eval_steps: 200 + evaluation_strategy: steps + fp16: false + gradient_accumulation_steps: 8 + greater_is_better: false + learning_rate: 0.0003 + load_best_model_at_end: true + logging_dir: logs/ + logging_steps: 50 + lr_scheduler_type: cosine + metric_for_best_model: eval_loss + num_train_epochs: 3 + output_dir: checkpoints/ + per_device_eval_batch_size: 4 + per_device_train_batch_size: 4 + push_to_hub: false + report_to: + - wandb + - tensorboard + save_steps: 200 + save_strategy: steps + save_total_limit: 3 + seed: 42 + warmup_ratio: 0.05 + weight_decay: 0.01 +transformers_version: + value: 4.53.2 +typical_p: + value: 1 +use_bfloat16: + value: false +use_cache: + value: true +use_cpu: + value: false +use_ipex: + value: false +use_legacy_prediction_loop: + value: false +use_liger_kernel: + value: false +use_mps_device: + value: false +vocab_size: + value: 32128 +warmup_ratio: + value: 0.05 +warmup_steps: + value: 0 +weight_decay: + value: 0.01 diff --git a/wandb/run-20260502_212127-vl8pftkj/files/output.log b/wandb/run-20260502_212127-vl8pftkj/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..b81660630e1ce79be9ed5b620007e39ccc373b2e --- /dev/null +++ b/wandb/run-20260502_212127-vl8pftkj/files/output.log @@ -0,0 +1,117 @@ +2026-05-02 21:21:29.284 | INFO  | __main__:train:59 - Step 3: Loading model and tokenizer... +2026-05-02 21:21:29.285 | INFO  | src.model.base_model:load_model_and_tokenizer:56 - Loading model: google/flan-t5-base (seq2seq=True, quantize=False, lora=True) +2026-05-02 21:21:32.511 | INFO  | src.model.base_model:load_model_and_tokenizer:100 - Model loaded on cuda with dtype torch.bfloat16 +2026-05-02 21:21:55.894 | INFO  | src.model.base_model:load_model_and_tokenizer:128 - LoRA applied: 6,782,976 trainable params / 254,360,832 total (2.67%) +2026-05-02 21:21:55.896 | INFO  | __main__:train:72 - Step 4: Creating style fingerprinter... +2026-05-02 21:22:01.399 | INFO  | src.style.fingerprinter:__init__:100 - StyleFingerprinter initialised (AWL size: 549) +2026-05-02 21:22:01.401 | INFO  | __main__:train:79 - Step 5: Loading datasets... +2026-05-02 21:22:01.942 | INFO  | src.training.dataset:__init__:71 - Loaded 7555 examples from data/processed/train.jsonl +2026-05-02 21:22:04.922 | INFO  | src.training.dataset:_add_synthetic:148 - Added 2125 synthetic augmentation examples +2026-05-02 21:22:04.923 | INFO  | src.training.dataset:__init__:77 - Total dataset size: 9680 examples +2026-05-02 21:22:05.131 | INFO  | src.training.dataset:_precompute_all:154 - Pre-computing tokenisation and style vectors for all examples... +2026-05-02 21:25:52.460 | INFO  | src.training.dataset:_precompute_all:201 -  Pre-computed: 2000/9680 +2026-05-02 21:29:03.914 | INFO  | src.training.dataset:_precompute_all:201 -  Pre-computed: 4000/9680 +2026-05-02 21:31:13.516 | INFO  | src.training.dataset:_precompute_all:201 -  Pre-computed: 6000/9680 +2026-05-02 21:31:59.662 | INFO  | src.training.dataset:_precompute_all:201 -  Pre-computed: 8000/9680 +2026-05-02 21:32:01.602 | INFO  | src.training.dataset:_precompute_all:203 - Pre-computation complete (7231 unique style vectors) +2026-05-02 21:32:03.344 | INFO  | src.training.dataset:__init__:93 - Saved pre-computed dataset to cache: data/cache/432c0ea44d86bebd.pt +2026-05-02 21:32:03.353 | INFO  | src.training.dataset:__init__:71 - Loaded 839 examples from data/processed/val.jsonl +2026-05-02 21:32:03.353 | INFO  | src.training.dataset:__init__:77 - Total dataset size: 839 examples +2026-05-02 21:32:03.355 | INFO  | src.training.dataset:_precompute_all:154 - Pre-computing tokenisation and style vectors for all examples... +2026-05-02 21:33:13.612 | INFO  | src.training.dataset:_precompute_all:203 - Pre-computation complete (833 unique style vectors) +2026-05-02 21:33:13.770 | INFO  | src.training.dataset:__init__:93 - Saved pre-computed dataset to cache: data/cache/e3f36fd95bc98712.pt +2026-05-02 21:33:13.771 | INFO  | __main__:train:99 - Train: 9680 | Val: 839 +2026-05-02 21:33:13.771 | INFO  | __main__:train:122 - Using CE-only loss (aux models skipped to save memory) +2026-05-02 21:33:13.771 | INFO  | __main__:train:125 - Step 7: Creating training arguments... +2026-05-02 21:33:13.801 | INFO  | __main__:train:179 - Step 8: Creating trainer... +No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead. +2026-05-02 21:33:13.845 | INFO  | __main__:train:195 - Step 9: Starting training... +wandb: WARNING The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter. + 0%| | 0/909 [00:00 + train() + ~~~~~^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/click/core.py", line 1485, in __call__ + return self.main(*args, **kwargs) + ~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/click/core.py", line 1406, in main + rv = self.invoke(ctx) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/click/core.py", line 1269, in invoke + return ctx.invoke(self.callback, **ctx.params) + ~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/click/core.py", line 824, in invoke + return callback(*args, **kwargs) + File "/run/media/morpheuslord/Personal_Files/Projects/Rewriter/scripts/train.py", line 196, in train + trainer.train() + ~~~~~~~~~~~~~^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/transformers/trainer.py", line 2206, in train + return inner_training_loop( + args=args, + ...<2 lines>... + ignore_keys_for_eval=ignore_keys_for_eval, + ) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/transformers/trainer.py", line 2548, in _inner_training_loop + tr_loss_step = self.training_step(model, inputs, num_items_in_batch) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/transformers/trainer.py", line 3749, in training_step + loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch) + File "/run/media/morpheuslord/Personal_Files/Projects/Rewriter/src/training/trainer.py", line 41, in compute_loss + outputs = model(**model_inputs) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/nn/modules/module.py", line 1779, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/nn/modules/module.py", line 1790, in _call_impl + return forward_call(*args, **kwargs) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/accelerate/utils/operations.py", line 823, in forward + return model_forward(*args, **kwargs) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/accelerate/utils/operations.py", line 811, in __call__ + return convert_to_fp32(self.model_forward(*args, **kwargs)) + ~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast + return func(*args, **kwargs) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/peft/peft_model.py", line 2342, in forward + return self.base_model( + ~~~~~~~~~~~~~~~^ + input_ids=input_ids, + ^^^^^^^^^^^^^^^^^^^^ + ...<9 lines>... + **kwargs, + ^^^^^^^^^ + ) + ^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/nn/modules/module.py", line 1779, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/nn/modules/module.py", line 1790, in _call_impl + return forward_call(*args, **kwargs) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/peft/tuners/tuners_utils.py", line 330, in forward + return self.model.forward(*args, **kwargs) + ~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/transformers/models/t5/modeling_t5.py", line 1828, in forward + loss = loss_fct(lm_logits.view(-1, lm_logits.size(-1)), labels.view(-1)) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/nn/modules/module.py", line 1779, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/nn/modules/module.py", line 1790, in _call_impl + return forward_call(*args, **kwargs) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/nn/modules/loss.py", line 1394, in forward + return F.cross_entropy( + ~~~~~~~~~~~~~~~^ + input, + ^^^^^^ + ...<4 lines>... + label_smoothing=self.label_smoothing, + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + ) + ^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/nn/functional.py", line 3504, in cross_entropy + return torch._C._nn.cross_entropy_loss( + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^ + input, + ^^^^^^ + ...<5 lines>... + label_smoothing, + ^^^^^^^^^^^^^^^^ + ) + ^ +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 64.00 MiB. GPU 0 has a total capacity of 3.68 GiB of which 15.12 MiB is free. Including non-PyTorch memory, this process has 3.66 GiB memory in use. Of the allocated memory 3.49 GiB is allocated by PyTorch, and 75.47 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://docs.pytorch.org/docs/stable/notes/cuda.html#optimizing-memory-usage-with-pytorch-cuda-alloc-conf) diff --git a/wandb/run-20260502_212127-vl8pftkj/files/requirements.txt b/wandb/run-20260502_212127-vl8pftkj/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..ae63c201cc2d6bd6485b666008506c9fdde380ed --- /dev/null +++ b/wandb/run-20260502_212127-vl8pftkj/files/requirements.txt @@ -0,0 +1,362 @@ +Pygments==2.19.2 +pluggy==1.6.0 +iniconfig==2.3.0 +pytest==9.0.2 +sqlite-vec==0.1.6 +peewee==3.19.0 +boltons==21.0.0 +zipp==3.23.0 +wrapt==1.17.3 +tree-sitter-python==0.25.0 +tree-sitter-javascript==0.25.0 +tree-sitter-java==0.23.5 +tree-sitter-cpp==0.23.4 +tree-sitter-c==0.24.1 +tree-sitter==0.25.2 +tqdm==4.67.3 +tomli==2.0.2 +tabulate==0.10.0 +stevedore==5.7.0 +sniffio==1.3.1 +smmap==5.0.3 +shellingham==1.5.4 +semantic-version==2.10.0 +ruamel.yaml.clib==0.2.14 +ruamel.yaml==0.19.1 +python-multipart==0.0.22 +python-dotenv==1.2.2 +PyJWT==2.11.0 +protobuf==6.33.5 +opentelemetry-util-http==0.58b0 +networkx==3.6.1 +mdurl==0.1.2 +MarkupSafe==3.0.3 +jiter==0.13.0 +hyperframe==6.1.0 +httpx-sse==0.4.3 +hpack==4.1.0 +face==26.0.0 +exceptiongroup==1.2.2 +docstring_parser==0.17.0 +colorama==0.4.6 +bracex==2.6 +annotated-doc==0.0.4 +aiofiles==25.1.0 +wcmatch==8.5.2 +uvicorn==0.41.0 +starlette==0.52.1 +opentelemetry-proto==1.37.0 +markdown-it-py==4.0.0 +Jinja2==3.1.6 +importlib_metadata==8.7.1 +h2==4.3.0 +googleapis-common-protos==1.73.0 +glom==25.12.0 +gitdb==4.0.12 +click-option-group==0.5.9 +sse-starlette==3.3.2 +rich==14.3.3 +pydantic-settings==2.13.1 +opentelemetry-exporter-otlp-proto-common==1.37.0 +opentelemetry-api==1.37.0 +openai==2.26.0 +jsonschema==4.25.1 +GitPython==3.1.46 +typer==0.23.1 +opentelemetry-semantic-conventions==0.58b0 +bandit==1.9.4 +opentelemetry-sdk==1.37.0 +opentelemetry-instrumentation==0.58b0 +mcp==1.23.3 +agno==2.5.9 +opentelemetry-instrumentation-threading==0.58b0 +opentelemetry-instrumentation-requests==0.58b0 +opentelemetry-exporter-otlp-proto-http==1.37.0 +semgrep==1.154.0 +acvas==1.0.0 +Werkzeug==3.1.8 +websockets==13.1 +wcwidth==0.6.0 +sqlparse==0.5.5 +semver==3.0.4 +itsdangerous==2.2.0 +frida==17.9.1 +click==8.3.2 +blinker==1.9.0 +prompt_toolkit==3.0.52 +Flask==3.1.3 +delegator.py==0.1.1 +frida-tools==14.8.1 +cli_helpers==2.12.0 +litecli==1.17.1 +objection==1.12.4 +userpath==1.9.2 +platformdirs==4.9.4 +argcomplete==3.6.3 +pipx==1.11.1 +distlib==0.4.0 +filelock==3.25.2 +python-discovery==1.2.1 +virtualenv==21.2.0 +pyelftools==0.32 +cigam==0.0.3 +xmltodict==1.0.4 +apkutils2==1.0.0 +svgwrite==1.4.3 +rdflib==7.6.0 +python-louvain==0.16 +wheel==0.46.3 +pip==26.0.1 +threadpoolctl==3.6.0 +scipy==1.17.1 +joblib==1.5.3 +scikit-learn==1.8.0 +pandas==3.0.2 +seaborn==0.13.2 +text-unidecode==1.3 +fastjsonschema==2.21.2 +traitlets==5.14.3 +python-slugify==8.0.4 +bleach==6.3.0 +mdit-py-plugins==0.5.0 +kagglesdk==0.1.23 +jupyter_core==5.9.1 +nbformat==5.10.4 +jupytext==1.19.1 +kaggle==2.1.0 +xxhash==3.7.0 +pyarrow==24.0.0 +propcache==0.4.1 +multidict==6.7.1 +hf-xet==1.4.3 +fsspec==2026.2.0 +frozenlist==1.8.0 +dill==0.4.1 +aiohappyeyeballs==2.6.1 +yarl==1.23.0 +multiprocess==0.70.19 +aiosignal==1.4.0 +aiohttp==3.13.5 +datasets==4.8.5 +torchaudio==2.11.0 +nvidia-cusparselt-cu13==0.8.0 +mpmath==1.3.0 +cuda-toolkit==13.0.2 +wasabi==1.1.3 +uvloop==0.22.1 +triton==3.6.0 +toml==0.10.2 +tensorboard-data-server==0.7.2 +sympy==1.14.0 +spacy-loggers==1.0.5 +spacy-legacy==3.0.12 +spacy-alignments==0.9.1 +smart_open==7.6.0 +setuptools==81.0.0 +sentencepiece==0.2.1 +safetensors==0.7.0 +pyspellchecker==0.9.0 +pyphen==0.17.2 +portalocker==3.2.0 +nvidia-nvtx==13.0.85 +nvidia-nvshmem-cu13==3.4.5 +nvidia-nvjitlink==13.0.88 +nvidia-nccl-cu13==2.28.9 +nvidia-curand==10.4.0.35 +nvidia-cufile==1.15.1.6 +nvidia-cuda-runtime==13.0.96 +nvidia-cuda-nvrtc==13.0.88 +nvidia-cuda-cupti==13.0.85 +nvidia-cublas==13.1.0.3 +nltk==3.9.4 +murmurhash==1.0.15 +msgpack==1.1.2 +Markdown==3.10.2 +loguru==0.7.3 +locate==1.1.1 +langcodes==3.5.1 +httptools==0.7.1 +grpcio==1.80.0 +ftfy==6.3.1 +faiss-cpu==1.13.2 +cymem==2.0.13 +cuda-pathfinder==1.5.4 +confection==1.3.3 +cloudpathlib==0.24.0 +catalogue==2.0.10 +blis==1.3.3 +absl-py==2.4.0 +wordfreq==3.1.1 +watchfiles==1.1.1 +textstat==0.7.13 +tensorboard==2.20.0 +srsly==2.5.3 +sacrebleu==2.6.0 +rouge_score==0.1.2 +preshed==3.0.13 +nvidia-cusparse==12.6.3.3 +nvidia-cufft==12.0.0.61 +nvidia-cudnn-cu13==9.19.0.56 +language_tool_python==3.3.0 +huggingface_hub==0.36.2 +cuda-bindings==13.2.0 +wandb==0.26.1 +tokenizers==0.21.4 +thinc==8.3.13 +nvidia-cusolver==12.0.4.66 +fastapi==0.136.1 +weasel==1.0.0 +transformers==4.53.2 +torch==2.11.0 +spacy==3.8.13 +torchvision==0.26.0 +spacy-transformers==1.4.0 +sentence-transformers==5.4.1 +optimum==2.1.0 +errant==3.0.0 +bitsandbytes==0.49.2 +bert-score==0.3.13 +accelerate==1.13.0 +peft==0.19.1 +en_core_web_sm==3.8.0 +tree-sitter-zig==1.1.2 +tree-sitter-verilog==1.0.3 +tree-sitter-typescript==0.23.2 +tree-sitter-swift==0.0.1 +tree-sitter-scala==0.26.0 +tree-sitter-rust==0.24.2 +tree-sitter-ruby==0.23.1 +tree-sitter-powershell==0.26.3 +tree-sitter-php==0.24.1 +tree-sitter-objc==3.0.2 +tree-sitter-lua==0.5.0 +tree-sitter-kotlin==1.1.0 +tree-sitter-julia==0.23.1 +tree-sitter-go==0.25.0 +tree-sitter-elixir==0.3.5 +tree-sitter-c-sharp==0.23.5 +graphifyy==0.6.5 +Brlapi==0.8.7 +PyGObject==3.54.5 +cffi==2.0.0 +contourpy==1.3.3 +cryptography==46.0.7 +dbus-python==1.4.0 +evdev==1.9.3 +fonttools==4.61.0 +gbinder-python==1.3.0 +kiwisolver==1.5.0 +libvirt-python==11.6.0 +lxml==6.0.1 +matplotlib==3.10.8 +numpy==2.3.5 +perf==0.1 +pillow==11.3.0 +psutil==7.0.0 +PyAudio==0.2.13 +pycairo==1.28.0 +pycups==2.0.4 +pydantic_core==2.41.5 +pyscard==2.2.2 +PyYAML==6.0.2 +pyzstd==0.16.2 +RapidFuzz==3.12.2 +regex==2026.2.28 +rpds-py==0.27.0 +rpm==6.0.1 +selinux==3.9 +setools==4.6.0 +setproctitle==1.3.7 +systemd-python==235 +PySocks==1.7.1 +annotated-types==0.7.0 +anyio==4.12.1 +appdirs==1.4.4 +attrs==25.4.0 +beautifulsoup4==4.14.3 +boto3==1.42.84 +botocore==1.42.84 +certifi==2025.7.9 +charset-normalizer==3.4.3 +click==8.1.7 +cockpit==360.1 +configobj==5.0.9 +cssselect==1.3.0 +cupshelpers==1.0 +cycler==0.11.0 +dasbus==1.7 +distro==1.9.0 +fido2==2.0.0 +file-magic==0.4.0 +filelock==3.15.4 +fw-fanctrl==1.0.2 +h11==0.16.0 +httpcore==1.0.9 +httpx==0.28.1 +icoextract==0.2.0 +idna==3.10 +inkex==1.4.0 +input-remapper==2.2.0 +jaraco.classes==3.4.0 +jaraco.context==6.0.1 +jaraco.functools==4.1.0 +jeepney==0.9.0 +jmespath==1.0.1 +jsonschema==4.23.0 +jsonschema-specifications==2024.10.1 +keyring==25.7.0 +langtable==0.0.69 +libevdev==0.12 +louis==3.33.0 +lutris==0.5.22 +moddb==0.12.0 +more-itertools==10.5.0 +nftables==0.1 +olefile==0.47 +packaging==25.0 +pefile==2024.8.26 +pexpect==4.9.0 +pip==25.1.1 +ply==3.11 +protobuf==3.19.6 +ptyprocess==0.7.0 +pycparser==2.22 +pydantic==2.12.5 +pydbus==0.6.0 +pyenchant==3.2.2 +pygdbmi==0.11.0.0 +Pygments==2.19.1 +pyinotify==0.9.6 +pyparsing==3.1.2 +pypresence==4.3.0 +pyserial==3.5 +python-dateutil==2.9.0.post0 +python-linux-procfs==0.7.3 +python-pskc==1.4 +python-xlib==0.33 +pyudev==0.24.3 +pyxdg==0.28 +pyynl==0.0.1 +referencing==0.36.2 +requests==2.32.5 +requests-file==2.0.0 +s3transfer==0.16.0 +scour==0.38.2 +SecretStorage==3.3.3 +sentry-sdk==2.35.0 +sepolicy==3.9 +setroubleshoot==3.3.36 +setuptools==78.1.1 +shtab==1.7.2 +six==1.17.0 +sos==4.11.0 +soupsieve==2.8 +tinycss2==1.5.1 +tldextract==5.3.0 +typing_extensions==4.15.0 +typing-inspection==0.4.2 +udica==0.2.8 +umu-launcher==1.2.9 +urllib3==2.6.3 +webencodings==0.5.1 +yubikey-manager==5.9.0 diff --git a/wandb/run-20260502_212127-vl8pftkj/files/wandb-metadata.json b/wandb/run-20260502_212127-vl8pftkj/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..fea076b3b316bb3321f160b213baa10efb75e92b --- /dev/null +++ b/wandb/run-20260502_212127-vl8pftkj/files/wandb-metadata.json @@ -0,0 +1,41 @@ +{ + "os": "Linux-6.17.7-ba29.fc43.x86_64-x86_64-with-glibc2.42", + "python": "CPython 3.14.3", + "startedAt": "2026-05-02T15:51:27.387378Z", + "args": [ + "--config", + "configs/training_config.yaml", + "--use-v2-loss" + ], + "program": "/run/media/morpheuslord/Personal_Files/Projects/Rewriter/scripts/train.py", + "codePath": "scripts/train.py", + "codePathLocal": "scripts/train.py", + "email": "morpheuslord@protonmail.com", + "root": "/run/media/morpheuslord/Personal_Files/Projects/Rewriter", + "host": "bazzite", + "executable": "/usr/bin/python3", + "cpu_count": 8, + "cpu_count_logical": 16, + "gpu": "NVIDIA GeForce RTX 3050 Laptop GPU", + "gpu_count": 1, + "disk": { + "/": { + "total": "60248064", + "used": "60248064" + } + }, + "memory": { + "total": "32944783360" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA GeForce RTX 3050 Laptop GPU", + "memoryTotal": "4294967296", + "cudaCores": 2048, + "architecture": "Ampere", + "uuid": "GPU-861554d7-d187-39e9-e77c-881f0287b963" + } + ], + "cudaVersion": "13.2", + "writerId": "52xh0heeb459ptjgbij5mcijfyh3w5cd" +} \ No newline at end of file diff --git a/wandb/run-20260502_212127-vl8pftkj/files/wandb-summary.json b/wandb/run-20260502_212127-vl8pftkj/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..624637f34879b28dc8eb22ce3fa8d86e4e3ad0b7 --- /dev/null +++ b/wandb/run-20260502_212127-vl8pftkj/files/wandb-summary.json @@ -0,0 +1 @@ +{"_runtime":709,"_wandb":{"runtime":709}} \ No newline at end of file diff --git a/wandb/run-20260502_212127-vl8pftkj/logs/debug-core.log b/wandb/run-20260502_212127-vl8pftkj/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..57c6e95e020a0772471939af7f6f3939aa01065a --- /dev/null +++ b/wandb/run-20260502_212127-vl8pftkj/logs/debug-core.log @@ -0,0 +1,15 @@ +{"time":"2026-05-02T21:21:27.588729971+05:30","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpyq649gqx/port-222265.txt","pid":222265,"detached":false,"idle-timeout":600000000000,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-05-02T21:21:27.589272242+05:30","level":"INFO","msg":"server: will exit if parent process dies","ppid":222265} +{"time":"2026-05-02T21:21:27.589259719+05:30","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-222265-222797-359724811/socket","Net":"unix"}} +{"time":"2026-05-02T21:21:27.777471514+05:30","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-05-02T21:21:27.786446978+05:30","level":"INFO","msg":"handleInformInit: received","streamId":"vl8pftkj","id":"1(@)"} +{"time":"2026-05-02T21:21:28.512110333+05:30","level":"INFO","msg":"handleInformInit: stream started","streamId":"vl8pftkj","id":"1(@)"} +{"time":"2026-05-02T21:21:34.290314068+05:30","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"g4jxebsrszjc"} +{"time":"2026-05-02T21:33:18.789464502+05:30","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-05-02T21:33:18.789977027+05:30","level":"INFO","msg":"server is shutting down"} +{"time":"2026-05-02T21:33:18.790034686+05:30","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-05-02T21:33:18.790413419+05:30","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-05-02T21:33:18.790393191+05:30","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-222265-222797-359724811/socket","Net":"unix"}} +{"time":"2026-05-02T21:33:21.484467297+05:30","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-05-02T21:33:21.484733027+05:30","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-05-02T21:33:21.484874944+05:30","level":"INFO","msg":"server is closed"} diff --git a/wandb/run-20260502_212127-vl8pftkj/logs/debug-internal.log b/wandb/run-20260502_212127-vl8pftkj/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..d54e040a56dcabeb103694e3cd603c91b59dc04b --- /dev/null +++ b/wandb/run-20260502_212127-vl8pftkj/logs/debug-internal.log @@ -0,0 +1,110 @@ +{"time":"2026-05-02T21:21:27.78669254+05:30","level":"INFO","msg":"wandb-core"} +{"time":"2026-05-02T21:21:27.787376628+05:30","level":"INFO","msg":"stream: starting","core version":"0.26.1"} +{"time":"2026-05-02T21:21:28.511483312+05:30","level":"INFO","msg":"stream: created new stream","id":"vl8pftkj"} +{"time":"2026-05-02T21:21:28.511632763+05:30","level":"INFO","msg":"handler: started"} +{"time":"2026-05-02T21:21:28.512065638+05:30","level":"INFO","msg":"stream: started"} +{"time":"2026-05-02T21:21:28.51208223+05:30","level":"INFO","msg":"writer: started","stream_id":"vl8pftkj"} +{"time":"2026-05-02T21:21:28.512089223+05:30","level":"INFO","msg":"sender: started"} +{"time":"2026-05-02T21:21:29.29327255+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"console_offset":0,"console_lines":2} +{"time":"2026-05-02T21:21:30.1501207+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:21:44.294002042+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":0,"events_lines":2,"console_offset":1,"console_lines":2,"uploaded_len":2} +{"time":"2026-05-02T21:21:44.793772465+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:21:59.294243565+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":2,"events_lines":2,"console_offset":3,"console_lines":2} +{"time":"2026-05-02T21:21:59.744240789+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:22:14.294496268+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":4,"events_lines":2,"console_offset":5,"console_lines":6} +{"time":"2026-05-02T21:22:14.796960025+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:22:29.293993119+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":6,"events_lines":2} +{"time":"2026-05-02T21:22:29.748040931+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:22:44.294066364+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":8,"events_lines":2} +{"time":"2026-05-02T21:22:44.79321116+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:22:59.294191925+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":10,"events_lines":2} +{"time":"2026-05-02T21:22:59.750488102+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:23:14.295373468+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":12,"events_lines":2} +{"time":"2026-05-02T21:23:14.701477417+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:23:29.293549241+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":14,"events_lines":2} +{"time":"2026-05-02T21:23:29.754048964+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:23:44.294034803+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":16,"events_lines":2} +{"time":"2026-05-02T21:23:44.807117768+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:23:59.293560027+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":18,"events_lines":2} +{"time":"2026-05-02T21:23:59.757246111+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:24:14.295187259+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":20,"events_lines":2} +{"time":"2026-05-02T21:24:14.809413037+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:24:29.294379887+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":22,"events_lines":2} +{"time":"2026-05-02T21:24:29.761465053+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:24:44.294154921+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":24,"events_lines":2} +{"time":"2026-05-02T21:24:44.707758113+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:24:59.293673311+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":26,"events_lines":2} +{"time":"2026-05-02T21:24:59.760422485+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:25:14.29531446+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":28,"events_lines":2} +{"time":"2026-05-02T21:25:14.71542625+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:25:29.294442646+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":30,"events_lines":2} +{"time":"2026-05-02T21:25:29.768358237+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:25:44.294379315+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":32,"events_lines":2} +{"time":"2026-05-02T21:25:44.821155549+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:25:59.29468603+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":34,"events_lines":2,"console_offset":11,"console_lines":1} +{"time":"2026-05-02T21:25:59.874139524+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:26:14.29530836+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":36,"events_lines":2} +{"time":"2026-05-02T21:26:14.724731476+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:26:29.293996006+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":38,"events_lines":2} +{"time":"2026-05-02T21:26:29.777355382+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:26:44.294521911+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":40,"events_lines":2} +{"time":"2026-05-02T21:26:44.728764653+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:26:59.293563682+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":42,"events_lines":2} +{"time":"2026-05-02T21:26:59.762840997+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:27:14.293971057+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":44,"events_lines":2} +{"time":"2026-05-02T21:27:14.731721016+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:27:29.294450598+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":46,"events_lines":2} +{"time":"2026-05-02T21:27:29.783489351+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:27:44.293394888+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":48,"events_lines":2} +{"time":"2026-05-02T21:27:44.731341266+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:27:59.294234858+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":50,"events_lines":2} +{"time":"2026-05-02T21:27:59.786724509+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:28:14.294112415+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":52,"events_lines":2} +{"time":"2026-05-02T21:28:14.737010339+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:28:29.294220827+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":54,"events_lines":2} +{"time":"2026-05-02T21:28:29.788666803+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:28:44.293869353+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":56,"events_lines":2} +{"time":"2026-05-02T21:28:44.738903139+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:28:59.294346439+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":58,"events_lines":2} +{"time":"2026-05-02T21:28:59.691983261+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:29:14.293787124+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":60,"events_lines":2,"console_offset":12,"console_lines":1} +{"time":"2026-05-02T21:29:14.741823996+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:29:29.294138464+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":62,"events_lines":2} +{"time":"2026-05-02T21:29:29.795527395+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:29:44.294778126+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":64,"events_lines":2} +{"time":"2026-05-02T21:29:44.747544646+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:29:59.294265389+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":66,"events_lines":2} +{"time":"2026-05-02T21:29:59.697269659+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:30:14.294561263+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":68,"events_lines":2} +{"time":"2026-05-02T21:30:14.75292263+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:30:29.294762861+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":70,"events_lines":2} +{"time":"2026-05-02T21:30:29.804727674+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:30:44.29391438+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":72,"events_lines":2} +{"time":"2026-05-02T21:30:44.753481267+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:30:59.293481172+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":74,"events_lines":2} +{"time":"2026-05-02T21:30:59.731413313+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:31:14.294065841+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":76,"events_lines":2,"console_offset":13,"console_lines":1} +{"time":"2026-05-02T21:31:14.755912762+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:31:29.293784338+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":78,"events_lines":2} +{"time":"2026-05-02T21:31:29.912920647+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:31:44.294309676+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":80,"events_lines":2} +{"time":"2026-05-02T21:31:44.864957655+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:31:59.293314389+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":82,"events_lines":2} +{"time":"2026-05-02T21:31:59.711608736+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:32:14.295102645+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":84,"events_lines":2,"console_offset":14,"console_lines":6} +{"time":"2026-05-02T21:32:14.763797753+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:32:29.294292647+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":86,"events_lines":2} +{"time":"2026-05-02T21:32:29.818281482+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:32:44.294004332+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":88,"events_lines":2} +{"time":"2026-05-02T21:32:44.768010974+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:32:59.294554286+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":90,"events_lines":2} +{"time":"2026-05-02T21:32:59.716192831+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:33:14.293864245+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":92,"events_lines":2,"console_offset":20,"console_lines":6} +{"time":"2026-05-02T21:33:14.873893793+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:33:20.99869691+05:30","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-05-02T21:33:21.001207037+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"console_offset":26,"console_lines":91,"uploaded_len":3,"complete":true,"exit_code":1} +{"time":"2026-05-02T21:33:21.433955115+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-02T21:33:21.43515202+05:30","level":"INFO","msg":"stream: finishing up"} +{"time":"2026-05-02T21:33:21.435870392+05:30","level":"INFO","msg":"handler: closed"} +{"time":"2026-05-02T21:33:21.43702112+05:30","level":"INFO","msg":"sender: closed"} +{"time":"2026-05-02T21:33:21.437441341+05:30","level":"INFO","msg":"stream: all finished"} diff --git a/wandb/run-20260502_212127-vl8pftkj/logs/debug.log b/wandb/run-20260502_212127-vl8pftkj/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..8ca8836cb0360cb449393a3779cd0ff881f9c23e --- /dev/null +++ b/wandb/run-20260502_212127-vl8pftkj/logs/debug.log @@ -0,0 +1,24 @@ +2026-05-02 21:21:27,391 INFO MainThread:222265 [wandb_setup.py:_flush():81] Current SDK version is 0.26.1 +2026-05-02 21:21:27,392 INFO MainThread:222265 [wandb_setup.py:_flush():81] Configure stats pid to 222265 +2026-05-02 21:21:27,392 INFO MainThread:222265 [wandb_setup.py:_flush():81] Loading settings from environment variables +2026-05-02 21:21:27,392 INFO MainThread:222265 [wandb_init.py:setup_run_log_directory():723] Logging user logs to /run/media/morpheuslord/Personal_Files/Projects/Rewriter/wandb/run-20260502_212127-vl8pftkj/logs/debug.log +2026-05-02 21:21:27,392 INFO MainThread:222265 [wandb_init.py:setup_run_log_directory():724] Logging internal logs to /run/media/morpheuslord/Personal_Files/Projects/Rewriter/wandb/run-20260502_212127-vl8pftkj/logs/debug-internal.log +2026-05-02 21:21:27,392 INFO MainThread:222265 [wandb_init.py:init():850] calling init triggers +2026-05-02 21:21:27,392 INFO MainThread:222265 [wandb_init.py:init():855] wandb.init called with sweep_config: {} +config: {'model': {'key': 'flan-t5-base', 'quantize': False, 'use_lora': True}, 'lora': {'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'target_modules': ['q', 'v', 'k', 'o', 'wi_0', 'wi_1', 'wo']}, 'data': {'train_path': 'data/processed/train.jsonl', 'val_path': 'data/processed/val.jsonl', 'test_path': 'data/processed/test.jsonl', 'max_input_length': 256, 'max_target_length': 256, 'augment_synthetic': True, 'synthetic_ratio': 0.3}, 'training': {'output_dir': 'checkpoints/', 'num_train_epochs': 3, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 4, 'gradient_accumulation_steps': 8, 'learning_rate': 0.0003, 'lr_scheduler_type': 'cosine', 'warmup_ratio': 0.05, 'weight_decay': 0.01, 'fp16': False, 'bf16': True, 'evaluation_strategy': 'steps', 'eval_steps': 200, 'save_strategy': 'steps', 'save_steps': 200, 'save_total_limit': 3, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_loss', 'greater_is_better': False, 'logging_dir': 'logs/', 'logging_steps': 50, 'report_to': ['wandb', 'tensorboard'], 'dataloader_num_workers': 4, 'seed': 42, 'push_to_hub': False}, 'loss': {'lambda_style': 0.3, 'lambda_semantic': 0.5, 'lambda_human_pattern': 0.4, 'sem_model_name': 'all-mpnet-base-v2'}, 'generation': {'num_beams': 5, 'length_penalty': 1.0, 'no_repeat_ngram_size': 3, 'min_length': 10, 'max_new_tokens': 512, 'early_stopping': True}, 'human_pattern': {'classifier_path': 'checkpoints/human_pattern_classifier.pt', 'shanegerami_path': 'data/raw/shanegerami/AI_Human.csv', 'starblasters_path': 'data/raw/starblasters8/data.parquet', 'max_samples_per_source': 50000, 'pretrain_epochs': 20, 'pretrain_lr': 0.001, 'pretrain_batch_size': 512, 'target_auc': 0.88}, '_wandb': {}} +2026-05-02 21:21:27,392 INFO MainThread:222265 [wandb_init.py:init():898] starting backend +2026-05-02 21:21:27,777 INFO MainThread:222265 [wandb_init.py:init():913] sending inform_init request +2026-05-02 21:21:28,512 INFO MainThread:222265 [wandb_init.py:init():918] backend started and connected +2026-05-02 21:21:28,514 INFO MainThread:222265 [wandb_init.py:init():988] updated telemetry +2026-05-02 21:21:28,517 INFO MainThread:222265 [wandb_init.py:init():1011] communicating run to backend with 90.0 second timeout +2026-05-02 21:21:29,127 INFO MainThread:222265 [wandb_init.py:init():1056] starting run threads in backend +2026-05-02 21:21:29,282 INFO MainThread:222265 [wandb_run.py:_console_start():2554] atexit reg +2026-05-02 21:21:29,282 INFO MainThread:222265 [wandb_run.py:_redirect():2403] redirect: wrap_raw +2026-05-02 21:21:29,283 INFO MainThread:222265 [wandb_run.py:_redirect():2472] Wrapping output streams. +2026-05-02 21:21:29,283 INFO MainThread:222265 [wandb_run.py:_redirect():2495] Redirects installed. +2026-05-02 21:21:29,284 INFO MainThread:222265 [wandb_init.py:init():1094] run started, returning control to user process +2026-05-02 21:33:14,389 INFO MainThread:222265 [wandb_run.py:_config_callback():1415] config_cb None None {'peft_config': {'default': {'task_type': 'SEQ_2_SEQ_LM', 'peft_type': 'LORA', 'auto_mapping': None, 'peft_version': '0.19.1', 'base_model_name_or_path': 'google/flan-t5-base', 'revision': None, 'inference_mode': False, 'r': 16, 'target_modules': ['v', 'q', 'k', 'wo', 'o', 'wi_1', 'wi_0'], 'exclude_modules': None, 'lora_alpha': 32, 'lora_dropout': 0.05, 'fan_in_fan_out': False, 'bias': 'none', 'use_rslora': False, 'modules_to_save': None, 'init_lora_weights': True, 'layers_to_transform': None, 'layers_pattern': None, 'rank_pattern': {}, 'alpha_pattern': {}, 'megatron_config': None, 'megatron_core': 'megatron.core', 'trainable_token_indices': None, 'loftq_config': {}, 'eva_config': None, 'corda_config': None, 'lora_ga_config': None, 'use_dora': False, 'alora_invocation_tokens': None, 'use_qalora': False, 'qalora_group_size': 16, 'layer_replication': None, 'runtime_config': {'ephemeral_gpu_offload': False}, 'lora_bias': False, 'target_parameters': None, 'use_bdlora': None, 'arrow_config': None, 'ensure_weight_tying': False}}, 'vocab_size': 32128, 'd_model': 768, 'd_kv': 64, 'd_ff': 2048, 'num_layers': 12, 'num_decoder_layers': 12, 'num_heads': 12, 'relative_attention_num_buckets': 32, 'relative_attention_max_distance': 128, 'dropout_rate': 0.1, 'classifier_dropout': 0.0, 'layer_norm_epsilon': 1e-06, 'initializer_factor': 1.0, 'feed_forward_proj': 'gated-gelu', 'use_cache': True, 'dense_act_fn': 'gelu_new', 'is_gated_act': True, 'return_dict': True, 'output_hidden_states': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['T5ForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': 1, 'sep_token_id': None, 'decoder_start_token_id': 0, 'task_specific_params': {'summarization': {'early_stopping': True, 'length_penalty': 2.0, 'max_length': 200, 'min_length': 30, 'no_repeat_ngram_size': 3, 'num_beams': 4, 'prefix': 'summarize: '}, 'translation_en_to_de': {'early_stopping': True, 'max_length': 300, 'num_beams': 4, 'prefix': 'translate English to German: '}, 'translation_en_to_fr': {'early_stopping': True, 'max_length': 300, 'num_beams': 4, 'prefix': 'translate English to French: '}, 'translation_en_to_ro': {'early_stopping': True, 'max_length': 300, 'num_beams': 4, 'prefix': 'translate English to Romanian: '}}, 'problem_type': None, '_name_or_path': 'google/flan-t5-base', 'transformers_version': '4.53.2', 'model_type': 't5', 'n_positions': 512, 'output_past': True, 'output_attentions': False, 'output_dir': 'checkpoints/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 4, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 8, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 0.0003, 'weight_decay': 0.01, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3, 'max_steps': -1, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.05, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'logs/', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 50, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 200, 'save_total_limit': 3, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 200, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'checkpoints/', 'disable_tqdm': False, 'remove_unused_columns': False, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_loss', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb', 'tensorboard'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': None, 'hub_always_push': False, 'hub_revision': None, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'liger_kernel_config': None, 'eval_use_gather_object': False, 'average_tokens_across_devices': False} +2026-05-02 21:33:14,396 INFO MainThread:222265 [wandb_config.py:__setitem__():155] [no run ID] config set model/num_parameters = 254360832 - > +2026-05-02 21:33:14,397 INFO MainThread:222265 [wandb_run.py:_config_callback():1415] config_cb model/num_parameters 254360832 None +2026-05-02 21:33:18,790 INFO wandb-AsyncioManager-main:222265 [service_client.py:_forward_responses():134] Reached EOF. +2026-05-02 21:33:18,791 INFO wandb-AsyncioManager-main:222265 [mailbox.py:close():155] Closing mailbox, abandoning 1 handles. diff --git a/wandb/run-20260502_213822-mmm9bdu9/files/config.yaml b/wandb/run-20260502_213822-mmm9bdu9/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..453cb614fc6cc3c1a0939ad92ae312c13d626444 --- /dev/null +++ b/wandb/run-20260502_213822-mmm9bdu9/files/config.yaml @@ -0,0 +1,661 @@ +_name_or_path: + value: google/flan-t5-base +_wandb: + value: + cli_version: 0.26.1 + e: + h449lnsxmjagbnru5ppzi7jn6s3v5bc7: + args: + - --config + - configs/training_config.yaml + - --use-v2-loss + codePath: scripts/train.py + codePathLocal: scripts/train.py + cpu_count: 8 + cpu_count_logical: 16 + cudaVersion: "13.2" + disk: + /: + total: "60248064" + used: "60248064" + email: morpheuslord@protonmail.com + executable: /usr/bin/python3 + gpu: NVIDIA GeForce RTX 3050 Laptop GPU + gpu_count: 1 + gpu_nvidia: + - architecture: Ampere + cudaCores: 2048 + memoryTotal: "4294967296" + name: NVIDIA GeForce RTX 3050 Laptop GPU + uuid: GPU-861554d7-d187-39e9-e77c-881f0287b963 + host: bazzite + memory: + total: "32944783360" + os: Linux-6.17.7-ba29.fc43.x86_64-x86_64-with-glibc2.42 + program: /run/media/morpheuslord/Personal_Files/Projects/Rewriter/scripts/train.py + python: CPython 3.14.3 + root: /run/media/morpheuslord/Personal_Files/Projects/Rewriter + startedAt: "2026-05-02T16:08:22.589924Z" + writerId: h449lnsxmjagbnru5ppzi7jn6s3v5bc7 + m: + - "1": train/global_step + "6": + - 3 + "7": [] + - "2": '*' + "5": 1 + "6": + - 1 + "7": [] + python_version: 3.14.3 + t: + "1": + - 1 + - 5 + - 11 + - 33 + - 41 + - 49 + - 51 + - 53 + - 71 + - 75 + - 98 + "2": + - 1 + - 5 + - 11 + - 33 + - 41 + - 49 + - 51 + - 53 + - 71 + - 75 + - 98 + "3": + - 7 + - 13 + - 16 + - 19 + - 66 + "4": 3.14.3 + "5": 0.26.1 + "6": 4.53.2 + "9": + "1": transformers_trainer + "12": 0.26.1 + "13": linux-x86_64 +accelerator_config: + value: + dispatch_batches: null + even_batches: true + gradient_accumulation_kwargs: null + non_blocking: false + split_batches: false + use_seedable_sampler: true +adafactor: + value: false +adam_beta1: + value: 0.9 +adam_beta2: + value: 0.999 +adam_epsilon: + value: 1e-08 +add_cross_attention: + value: false +architectures: + value: + - T5ForConditionalGeneration +auto_find_batch_size: + value: false +average_tokens_across_devices: + value: false +bad_words_ids: + value: null +batch_eval_metrics: + value: false +begin_suppress_tokens: + value: null +bf16: + value: true +bf16_full_eval: + value: false +bos_token_id: + value: null +chunk_size_feed_forward: + value: 0 +classifier_dropout: + value: 0 +cross_attention_hidden_size: + value: null +d_ff: + value: 2048 +d_kv: + value: 64 +d_model: + value: 768 +data: + value: + augment_synthetic: true + max_input_length: 128 + max_target_length: 128 + synthetic_ratio: 0.3 + test_path: data/processed/test.jsonl + train_path: data/processed/train.jsonl + val_path: data/processed/val.jsonl +data_seed: + value: null +dataloader_drop_last: + value: false +dataloader_num_workers: + value: 0 +dataloader_persistent_workers: + value: false +dataloader_pin_memory: + value: true +dataloader_prefetch_factor: + value: null +ddp_backend: + value: null +ddp_broadcast_buffers: + value: null +ddp_bucket_cap_mb: + value: null +ddp_find_unused_parameters: + value: null +ddp_timeout: + value: 1800 +debug: + value: [] +decoder_start_token_id: + value: 0 +deepspeed: + value: null +dense_act_fn: + value: gelu_new +disable_tqdm: + value: false +diversity_penalty: + value: 0 +do_eval: + value: true +do_predict: + value: false +do_sample: + value: false +do_train: + value: false +dropout_rate: + value: 0.1 +early_stopping: + value: false +encoder_no_repeat_ngram_size: + value: 0 +eos_token_id: + value: 1 +eval_accumulation_steps: + value: null +eval_delay: + value: 0 +eval_do_concat_batches: + value: true +eval_on_start: + value: false +eval_steps: + value: 200 +eval_strategy: + value: steps +eval_use_gather_object: + value: false +exponential_decay_length_penalty: + value: null +feed_forward_proj: + value: gated-gelu +finetuning_task: + value: null +forced_bos_token_id: + value: null +forced_eos_token_id: + value: null +fp16: + value: false +fp16_backend: + value: auto +fp16_full_eval: + value: false +fp16_opt_level: + value: O1 +fsdp: + value: [] +fsdp_config: + value: + min_num_params: 0 + xla: false + xla_fsdp_grad_ckpt: false + xla_fsdp_v2: false +fsdp_min_num_params: + value: 0 +fsdp_transformer_layer_cls_to_wrap: + value: null +full_determinism: + value: false +generation: + value: + early_stopping: true + length_penalty: 1 + max_new_tokens: 512 + min_length: 10 + no_repeat_ngram_size: 3 + num_beams: 5 +gradient_accumulation_steps: + value: 16 +gradient_checkpointing: + value: true +gradient_checkpointing_kwargs: + value: null +greater_is_better: + value: false +group_by_length: + value: false +half_precision_backend: + value: auto +hub_always_push: + value: false +hub_model_id: + value: null +hub_private_repo: + value: null +hub_revision: + value: null +hub_strategy: + value: every_save +hub_token: + value: +human_pattern: + value: + classifier_path: checkpoints/human_pattern_classifier.pt + max_samples_per_source: 50000 + pretrain_batch_size: 512 + pretrain_epochs: 20 + pretrain_lr: 0.001 + shanegerami_path: data/raw/shanegerami/AI_Human.csv + starblasters_path: data/raw/starblasters8/data.parquet + target_auc: 0.88 +id2label: + value: + "0": LABEL_0 + "1": LABEL_1 +ignore_data_skip: + value: false +include_for_metrics: + value: [] +include_inputs_for_metrics: + value: false +include_num_input_tokens_seen: + value: false +include_tokens_per_second: + value: false +initializer_factor: + value: 1 +is_decoder: + value: false +is_encoder_decoder: + value: true +is_gated_act: + value: true +jit_mode_eval: + value: false +label_names: + value: null +label_smoothing_factor: + value: 0 +label2id: + value: + LABEL_0: 0 + LABEL_1: 1 +layer_norm_epsilon: + value: 1e-06 +learning_rate: + value: 0.0003 +length_column_name: + value: length +length_penalty: + value: 1 +liger_kernel_config: + value: null +load_best_model_at_end: + value: true +local_rank: + value: 0 +log_level: + value: passive +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: logs/ +logging_first_step: + value: false +logging_nan_inf_filter: + value: true +logging_steps: + value: 50 +logging_strategy: + value: steps +lora: + value: + lora_alpha: 32 + lora_dropout: 0.05 + r: 16 + target_modules: + - q + - v + - k + - o + - wi_0 + - wi_1 + - wo +loss: + value: + lambda_human_pattern: 0.4 + lambda_semantic: 0.5 + lambda_style: 0.3 + sem_model_name: all-mpnet-base-v2 +lr_scheduler_type: + value: cosine +max_grad_norm: + value: 1 +max_length: + value: 20 +max_steps: + value: -1 +metric_for_best_model: + value: eval_loss +min_length: + value: 0 +model: + value: + key: flan-t5-base + quantize: false + use_lora: true +model/num_parameters: + value: 254360832 +model_type: + value: t5 +mp_parameters: + value: "" +n_positions: + value: 512 +neftune_noise_alpha: + value: null +no_cuda: + value: false +no_repeat_ngram_size: + value: 0 +num_beam_groups: + value: 1 +num_beams: + value: 1 +num_decoder_layers: + value: 12 +num_heads: + value: 12 +num_layers: + value: 12 +num_return_sequences: + value: 1 +num_train_epochs: + value: 3 +optim: + value: adamw_torch +optim_args: + value: null +optim_target_modules: + value: null +output_attentions: + value: false +output_dir: + value: checkpoints/ +output_hidden_states: + value: false +output_past: + value: true +output_scores: + value: false +overwrite_output_dir: + value: false +pad_token_id: + value: 0 +past_index: + value: -1 +peft_config: + value: + default: + alora_invocation_tokens: null + arrow_config: null + auto_mapping: null + base_model_name_or_path: google/flan-t5-base + bias: none + corda_config: null + ensure_weight_tying: false + eva_config: null + exclude_modules: null + fan_in_fan_out: false + inference_mode: false + init_lora_weights: true + layer_replication: null + layers_pattern: null + layers_to_transform: null + lora_alpha: 32 + lora_bias: false + lora_dropout: 0.05 + lora_ga_config: null + megatron_config: null + megatron_core: megatron.core + modules_to_save: null + peft_type: LORA + peft_version: 0.19.1 + qalora_group_size: 16 + r: 16 + revision: null + runtime_config: + ephemeral_gpu_offload: false + target_modules: + - wi_1 + - k + - wi_0 + - wo + - q + - v + - o + target_parameters: null + task_type: SEQ_2_SEQ_LM + trainable_token_indices: null + use_bdlora: null + use_dora: false + use_qalora: false + use_rslora: false +per_device_eval_batch_size: + value: 2 +per_device_train_batch_size: + value: 2 +per_gpu_eval_batch_size: + value: null +per_gpu_train_batch_size: + value: null +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +push_to_hub: + value: false +push_to_hub_model_id: + value: null +push_to_hub_organization: + value: null +push_to_hub_token: + value: +ray_scope: + value: last +relative_attention_max_distance: + value: 128 +relative_attention_num_buckets: + value: 32 +remove_invalid_values: + value: false +remove_unused_columns: + value: false +repetition_penalty: + value: 1 +report_to: + value: + - wandb + - tensorboard +restore_callback_states_from_checkpoint: + value: false +resume_from_checkpoint: + value: null +return_dict: + value: true +return_dict_in_generate: + value: false +run_name: + value: checkpoints/ +save_on_each_node: + value: false +save_only_model: + value: false +save_safetensors: + value: true +save_steps: + value: 200 +save_strategy: + value: steps +save_total_limit: + value: 3 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +suppress_tokens: + value: null +task_specific_params: + value: + summarization: + early_stopping: true + length_penalty: 2 + max_length: 200 + min_length: 30 + no_repeat_ngram_size: 3 + num_beams: 4 + prefix: 'summarize: ' + translation_en_to_de: + early_stopping: true + max_length: 300 + num_beams: 4 + prefix: 'translate English to German: ' + translation_en_to_fr: + early_stopping: true + max_length: 300 + num_beams: 4 + prefix: 'translate English to French: ' + translation_en_to_ro: + early_stopping: true + max_length: 300 + num_beams: 4 + prefix: 'translate English to Romanian: ' +temperature: + value: 1 +tf_legacy_loss: + value: false +tf32: + value: null +tie_encoder_decoder: + value: false +tie_word_embeddings: + value: false +tokenizer_class: + value: null +top_k: + value: 50 +top_p: + value: 1 +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_dtype: + value: bfloat16 +torch_empty_cache_steps: + value: null +torchdynamo: + value: null +torchscript: + value: false +tpu_metrics_debug: + value: false +tpu_num_cores: + value: null +training: + value: + bf16: true + dataloader_num_workers: 4 + eval_steps: 200 + evaluation_strategy: steps + fp16: false + gradient_accumulation_steps: 16 + greater_is_better: false + learning_rate: 0.0003 + load_best_model_at_end: true + logging_dir: logs/ + logging_steps: 50 + lr_scheduler_type: cosine + metric_for_best_model: eval_loss + num_train_epochs: 3 + output_dir: checkpoints/ + per_device_eval_batch_size: 2 + per_device_train_batch_size: 2 + push_to_hub: false + report_to: + - wandb + - tensorboard + save_steps: 200 + save_strategy: steps + save_total_limit: 3 + seed: 42 + warmup_ratio: 0.05 + weight_decay: 0.01 +transformers_version: + value: 4.53.2 +typical_p: + value: 1 +use_bfloat16: + value: false +use_cache: + value: true +use_cpu: + value: false +use_ipex: + value: false +use_legacy_prediction_loop: + value: false +use_liger_kernel: + value: false +use_mps_device: + value: false +vocab_size: + value: 32128 +warmup_ratio: + value: 0.05 +warmup_steps: + value: 0 +weight_decay: + value: 0.01 diff --git a/wandb/run-20260502_213822-mmm9bdu9/files/output.log b/wandb/run-20260502_213822-mmm9bdu9/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..1d9003fe9cbc77698f06e75d8d0d855942ebe8a0 --- /dev/null +++ b/wandb/run-20260502_213822-mmm9bdu9/files/output.log @@ -0,0 +1,33 @@ +2026-05-02 21:38:26.470 | INFO  | __main__:train:59 - Step 3: Loading model and tokenizer... +2026-05-02 21:38:26.472 | INFO  | src.model.base_model:load_model_and_tokenizer:56 - Loading model: google/flan-t5-base (seq2seq=True, quantize=False, lora=True) +2026-05-02 21:38:31.723 | INFO  | src.model.base_model:load_model_and_tokenizer:100 - Model loaded on cuda with dtype torch.bfloat16 +2026-05-02 21:38:33.265 | INFO  | src.model.base_model:load_model_and_tokenizer:128 - LoRA applied: 6,782,976 trainable params / 254,360,832 total (2.67%) +2026-05-02 21:38:33.266 | INFO  | __main__:train:72 - Step 4: Creating style fingerprinter... +2026-05-02 21:38:33.626 | INFO  | src.style.fingerprinter:__init__:100 - StyleFingerprinter initialised (AWL size: 549) +2026-05-02 21:38:33.626 | INFO  | __main__:train:79 - Step 5: Loading datasets... +2026-05-02 21:38:33.665 | INFO  | src.training.dataset:__init__:71 - Loaded 7555 examples from data/processed/train.jsonl +2026-05-02 21:38:33.806 | INFO  | src.training.dataset:_add_synthetic:148 - Added 2125 synthetic augmentation examples +2026-05-02 21:38:33.806 | INFO  | src.training.dataset:__init__:77 - Total dataset size: 9680 examples +2026-05-02 21:38:33.826 | INFO  | src.training.dataset:_precompute_all:154 - Pre-computing tokenisation and style vectors for all examples... +2026-05-02 21:40:18.404 | INFO  | src.training.dataset:_precompute_all:201 -  Pre-computed: 2000/9680 +2026-05-02 21:42:44.854 | INFO  | src.training.dataset:_precompute_all:201 -  Pre-computed: 4000/9680 +2026-05-02 21:43:42.552 | INFO  | src.training.dataset:_precompute_all:201 -  Pre-computed: 6000/9680 +2026-05-02 21:44:21.292 | INFO  | src.training.dataset:_precompute_all:201 -  Pre-computed: 8000/9680 +2026-05-02 21:44:23.224 | INFO  | src.training.dataset:_precompute_all:203 - Pre-computation complete (7231 unique style vectors) +2026-05-02 21:44:24.666 | INFO  | src.training.dataset:__init__:93 - Saved pre-computed dataset to cache: data/cache/1356ff2104663316.pt +2026-05-02 21:44:24.675 | INFO  | src.training.dataset:__init__:71 - Loaded 839 examples from data/processed/val.jsonl +2026-05-02 21:44:24.675 | INFO  | src.training.dataset:__init__:77 - Total dataset size: 839 examples +2026-05-02 21:44:24.677 | INFO  | src.training.dataset:_precompute_all:154 - Pre-computing tokenisation and style vectors for all examples... +2026-05-02 21:44:45.237 | INFO  | src.training.dataset:_precompute_all:203 - Pre-computation complete (833 unique style vectors) +2026-05-02 21:44:45.369 | INFO  | src.training.dataset:__init__:93 - Saved pre-computed dataset to cache: data/cache/d6a64358c3ef403f.pt +2026-05-02 21:44:45.369 | INFO  | __main__:train:99 - Train: 9680 | Val: 839 +2026-05-02 21:44:45.369 | INFO  | __main__:train:122 - Using CE-only loss (aux models skipped to save memory) +2026-05-02 21:44:45.369 | INFO  | __main__:train:125 - Step 7: Creating training arguments... +2026-05-02 21:44:45.399 | INFO  | __main__:train:179 - Step 8: Creating trainer... +No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead. +2026-05-02 21:44:45.446 | INFO  | __main__:train:195 - Step 9: Starting training... +wandb: WARNING The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter. + 0%| | 0/909 [00:00', 'hub_private_repo': None, 'hub_always_push': False, 'hub_revision': None, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'liger_kernel_config': None, 'eval_use_gather_object': False, 'average_tokens_across_devices': False} +2026-05-02 21:44:45,926 INFO MainThread:226596 [wandb_config.py:__setitem__():155] [no run ID] config set model/num_parameters = 254360832 - > +2026-05-02 21:44:45,926 INFO MainThread:226596 [wandb_run.py:_config_callback():1415] config_cb model/num_parameters 254360832 None +2026-05-02 21:48:35,194 INFO wandb-AsyncioManager-main:226596 [service_client.py:_forward_responses():134] Reached EOF. +2026-05-02 21:48:35,195 INFO wandb-AsyncioManager-main:226596 [mailbox.py:close():155] Closing mailbox, abandoning 1 handles. diff --git a/wandb/run-20260503_104137-zjr4w5ln/files/config.yaml b/wandb/run-20260503_104137-zjr4w5ln/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ca7ba5df06c1055db13df91c36cb6db882f1acab --- /dev/null +++ b/wandb/run-20260503_104137-zjr4w5ln/files/config.yaml @@ -0,0 +1,661 @@ +_name_or_path: + value: google/flan-t5-base +_wandb: + value: + cli_version: 0.26.1 + e: + lgpvwgt4cmzkutpudd7mmj3upqg5rts0: + args: + - --config + - configs/training_config.yaml + - --use-v2-loss + codePath: scripts/train.py + codePathLocal: scripts/train.py + cpu_count: 8 + cpu_count_logical: 16 + cudaVersion: "13.2" + disk: + /: + total: "60248064" + used: "60248064" + email: morpheuslord@protonmail.com + executable: /usr/bin/python3 + gpu: NVIDIA GeForce RTX 3050 Laptop GPU + gpu_count: 1 + gpu_nvidia: + - architecture: Ampere + cudaCores: 2048 + memoryTotal: "4294967296" + name: NVIDIA GeForce RTX 3050 Laptop GPU + uuid: GPU-861554d7-d187-39e9-e77c-881f0287b963 + host: bazzite + memory: + total: "32944803840" + os: Linux-6.17.7-ba29.fc43.x86_64-x86_64-with-glibc2.42 + program: /run/media/morpheuslord/Personal_Files/Projects/Rewriter/scripts/train.py + python: CPython 3.14.3 + root: /run/media/morpheuslord/Personal_Files/Projects/Rewriter + startedAt: "2026-05-03T05:11:37.158988Z" + writerId: lgpvwgt4cmzkutpudd7mmj3upqg5rts0 + m: + - "1": train/global_step + "6": + - 3 + "7": [] + - "2": '*' + "5": 1 + "6": + - 1 + "7": [] + python_version: 3.14.3 + t: + "1": + - 1 + - 5 + - 11 + - 33 + - 41 + - 49 + - 51 + - 53 + - 71 + - 75 + - 98 + "2": + - 1 + - 5 + - 11 + - 33 + - 41 + - 49 + - 51 + - 53 + - 71 + - 75 + - 98 + "3": + - 7 + - 13 + - 16 + - 19 + - 66 + "4": 3.14.3 + "5": 0.26.1 + "6": 4.53.2 + "9": + "1": transformers_trainer + "12": 0.26.1 + "13": linux-x86_64 +accelerator_config: + value: + dispatch_batches: null + even_batches: true + gradient_accumulation_kwargs: null + non_blocking: false + split_batches: false + use_seedable_sampler: true +adafactor: + value: false +adam_beta1: + value: 0.9 +adam_beta2: + value: 0.999 +adam_epsilon: + value: 1e-08 +add_cross_attention: + value: false +architectures: + value: + - T5ForConditionalGeneration +auto_find_batch_size: + value: false +average_tokens_across_devices: + value: false +bad_words_ids: + value: null +batch_eval_metrics: + value: false +begin_suppress_tokens: + value: null +bf16: + value: true +bf16_full_eval: + value: false +bos_token_id: + value: null +chunk_size_feed_forward: + value: 0 +classifier_dropout: + value: 0 +cross_attention_hidden_size: + value: null +d_ff: + value: 2048 +d_kv: + value: 64 +d_model: + value: 768 +data: + value: + augment_synthetic: true + max_input_length: 128 + max_target_length: 128 + synthetic_ratio: 0.3 + test_path: data/processed/test.jsonl + train_path: data/processed/train.jsonl + val_path: data/processed/val.jsonl +data_seed: + value: null +dataloader_drop_last: + value: false +dataloader_num_workers: + value: 0 +dataloader_persistent_workers: + value: false +dataloader_pin_memory: + value: true +dataloader_prefetch_factor: + value: null +ddp_backend: + value: null +ddp_broadcast_buffers: + value: null +ddp_bucket_cap_mb: + value: null +ddp_find_unused_parameters: + value: null +ddp_timeout: + value: 1800 +debug: + value: [] +decoder_start_token_id: + value: 0 +deepspeed: + value: null +dense_act_fn: + value: gelu_new +disable_tqdm: + value: false +diversity_penalty: + value: 0 +do_eval: + value: true +do_predict: + value: false +do_sample: + value: false +do_train: + value: false +dropout_rate: + value: 0.1 +early_stopping: + value: false +encoder_no_repeat_ngram_size: + value: 0 +eos_token_id: + value: 1 +eval_accumulation_steps: + value: null +eval_delay: + value: 0 +eval_do_concat_batches: + value: true +eval_on_start: + value: false +eval_steps: + value: 200 +eval_strategy: + value: steps +eval_use_gather_object: + value: false +exponential_decay_length_penalty: + value: null +feed_forward_proj: + value: gated-gelu +finetuning_task: + value: null +forced_bos_token_id: + value: null +forced_eos_token_id: + value: null +fp16: + value: false +fp16_backend: + value: auto +fp16_full_eval: + value: false +fp16_opt_level: + value: O1 +fsdp: + value: [] +fsdp_config: + value: + min_num_params: 0 + xla: false + xla_fsdp_grad_ckpt: false + xla_fsdp_v2: false +fsdp_min_num_params: + value: 0 +fsdp_transformer_layer_cls_to_wrap: + value: null +full_determinism: + value: false +generation: + value: + early_stopping: true + length_penalty: 1 + max_new_tokens: 512 + min_length: 10 + no_repeat_ngram_size: 3 + num_beams: 5 +gradient_accumulation_steps: + value: 16 +gradient_checkpointing: + value: true +gradient_checkpointing_kwargs: + value: null +greater_is_better: + value: false +group_by_length: + value: false +half_precision_backend: + value: auto +hub_always_push: + value: false +hub_model_id: + value: null +hub_private_repo: + value: null +hub_revision: + value: null +hub_strategy: + value: every_save +hub_token: + value: +human_pattern: + value: + classifier_path: checkpoints/human_pattern_classifier.pt + max_samples_per_source: 50000 + pretrain_batch_size: 512 + pretrain_epochs: 20 + pretrain_lr: 0.001 + shanegerami_path: data/raw/shanegerami/AI_Human.csv + starblasters_path: data/raw/starblasters8/data.parquet + target_auc: 0.88 +id2label: + value: + "0": LABEL_0 + "1": LABEL_1 +ignore_data_skip: + value: false +include_for_metrics: + value: [] +include_inputs_for_metrics: + value: false +include_num_input_tokens_seen: + value: false +include_tokens_per_second: + value: false +initializer_factor: + value: 1 +is_decoder: + value: false +is_encoder_decoder: + value: true +is_gated_act: + value: true +jit_mode_eval: + value: false +label_names: + value: null +label_smoothing_factor: + value: 0 +label2id: + value: + LABEL_0: 0 + LABEL_1: 1 +layer_norm_epsilon: + value: 1e-06 +learning_rate: + value: 0.0003 +length_column_name: + value: length +length_penalty: + value: 1 +liger_kernel_config: + value: null +load_best_model_at_end: + value: true +local_rank: + value: 0 +log_level: + value: passive +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: logs/ +logging_first_step: + value: false +logging_nan_inf_filter: + value: true +logging_steps: + value: 50 +logging_strategy: + value: steps +lora: + value: + lora_alpha: 32 + lora_dropout: 0.05 + r: 16 + target_modules: + - q + - v + - k + - o + - wi_0 + - wi_1 + - wo +loss: + value: + lambda_human_pattern: 0.4 + lambda_semantic: 0.5 + lambda_style: 0.3 + sem_model_name: all-mpnet-base-v2 +lr_scheduler_type: + value: cosine +max_grad_norm: + value: 1 +max_length: + value: 20 +max_steps: + value: -1 +metric_for_best_model: + value: eval_loss +min_length: + value: 0 +model: + value: + key: flan-t5-base + quantize: false + use_lora: true +model/num_parameters: + value: 254360832 +model_type: + value: t5 +mp_parameters: + value: "" +n_positions: + value: 512 +neftune_noise_alpha: + value: null +no_cuda: + value: false +no_repeat_ngram_size: + value: 0 +num_beam_groups: + value: 1 +num_beams: + value: 1 +num_decoder_layers: + value: 12 +num_heads: + value: 12 +num_layers: + value: 12 +num_return_sequences: + value: 1 +num_train_epochs: + value: 3 +optim: + value: adamw_torch +optim_args: + value: null +optim_target_modules: + value: null +output_attentions: + value: false +output_dir: + value: checkpoints/ +output_hidden_states: + value: false +output_past: + value: true +output_scores: + value: false +overwrite_output_dir: + value: false +pad_token_id: + value: 0 +past_index: + value: -1 +peft_config: + value: + default: + alora_invocation_tokens: null + arrow_config: null + auto_mapping: null + base_model_name_or_path: google/flan-t5-base + bias: none + corda_config: null + ensure_weight_tying: false + eva_config: null + exclude_modules: null + fan_in_fan_out: false + inference_mode: false + init_lora_weights: true + layer_replication: null + layers_pattern: null + layers_to_transform: null + lora_alpha: 32 + lora_bias: false + lora_dropout: 0.05 + lora_ga_config: null + megatron_config: null + megatron_core: megatron.core + modules_to_save: null + peft_type: LORA + peft_version: 0.19.1 + qalora_group_size: 16 + r: 16 + revision: null + runtime_config: + ephemeral_gpu_offload: false + target_modules: + - k + - wi_1 + - wo + - v + - o + - wi_0 + - q + target_parameters: null + task_type: SEQ_2_SEQ_LM + trainable_token_indices: null + use_bdlora: null + use_dora: false + use_qalora: false + use_rslora: false +per_device_eval_batch_size: + value: 2 +per_device_train_batch_size: + value: 2 +per_gpu_eval_batch_size: + value: null +per_gpu_train_batch_size: + value: null +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +push_to_hub: + value: false +push_to_hub_model_id: + value: null +push_to_hub_organization: + value: null +push_to_hub_token: + value: +ray_scope: + value: last +relative_attention_max_distance: + value: 128 +relative_attention_num_buckets: + value: 32 +remove_invalid_values: + value: false +remove_unused_columns: + value: false +repetition_penalty: + value: 1 +report_to: + value: + - wandb + - tensorboard +restore_callback_states_from_checkpoint: + value: false +resume_from_checkpoint: + value: null +return_dict: + value: true +return_dict_in_generate: + value: false +run_name: + value: checkpoints/ +save_on_each_node: + value: false +save_only_model: + value: false +save_safetensors: + value: true +save_steps: + value: 200 +save_strategy: + value: steps +save_total_limit: + value: 3 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +suppress_tokens: + value: null +task_specific_params: + value: + summarization: + early_stopping: true + length_penalty: 2 + max_length: 200 + min_length: 30 + no_repeat_ngram_size: 3 + num_beams: 4 + prefix: 'summarize: ' + translation_en_to_de: + early_stopping: true + max_length: 300 + num_beams: 4 + prefix: 'translate English to German: ' + translation_en_to_fr: + early_stopping: true + max_length: 300 + num_beams: 4 + prefix: 'translate English to French: ' + translation_en_to_ro: + early_stopping: true + max_length: 300 + num_beams: 4 + prefix: 'translate English to Romanian: ' +temperature: + value: 1 +tf_legacy_loss: + value: false +tf32: + value: null +tie_encoder_decoder: + value: false +tie_word_embeddings: + value: false +tokenizer_class: + value: null +top_k: + value: 50 +top_p: + value: 1 +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_dtype: + value: bfloat16 +torch_empty_cache_steps: + value: null +torchdynamo: + value: null +torchscript: + value: false +tpu_metrics_debug: + value: false +tpu_num_cores: + value: null +training: + value: + bf16: true + dataloader_num_workers: 4 + eval_steps: 200 + evaluation_strategy: steps + fp16: false + gradient_accumulation_steps: 16 + greater_is_better: false + learning_rate: 0.0003 + load_best_model_at_end: true + logging_dir: logs/ + logging_steps: 50 + lr_scheduler_type: cosine + metric_for_best_model: eval_loss + num_train_epochs: 3 + output_dir: checkpoints/ + per_device_eval_batch_size: 2 + per_device_train_batch_size: 2 + push_to_hub: false + report_to: + - wandb + - tensorboard + save_steps: 200 + save_strategy: steps + save_total_limit: 3 + seed: 42 + warmup_ratio: 0.05 + weight_decay: 0.01 +transformers_version: + value: 4.53.2 +typical_p: + value: 1 +use_bfloat16: + value: false +use_cache: + value: true +use_cpu: + value: false +use_ipex: + value: false +use_legacy_prediction_loop: + value: false +use_liger_kernel: + value: false +use_mps_device: + value: false +vocab_size: + value: 32128 +warmup_ratio: + value: 0.05 +warmup_steps: + value: 0 +weight_decay: + value: 0.01 diff --git a/wandb/run-20260503_104137-zjr4w5ln/files/output.log b/wandb/run-20260503_104137-zjr4w5ln/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..04a244f14c3ed55d0d8719541e74a08ff4ac3066 --- /dev/null +++ b/wandb/run-20260503_104137-zjr4w5ln/files/output.log @@ -0,0 +1,29 @@ +2026-05-03 10:41:44.192 | INFO  | __main__:train:59 - Step 3: Loading model and tokenizer... +2026-05-03 10:41:44.192 | INFO  | src.model.base_model:load_model_and_tokenizer:56 - Loading model: google/flan-t5-base (seq2seq=True, quantize=False, lora=True) +2026-05-03 10:41:47.493 | INFO  | src.model.base_model:load_model_and_tokenizer:100 - Model loaded on cuda with dtype torch.bfloat16 +2026-05-03 10:41:49.232 | INFO  | src.model.base_model:load_model_and_tokenizer:128 - LoRA applied: 6,782,976 trainable params / 254,360,832 total (2.67%) +2026-05-03 10:41:49.233 | INFO  | __main__:train:72 - Step 4: Creating style fingerprinter... +2026-05-03 10:41:49.640 | INFO  | src.style.fingerprinter:__init__:100 - StyleFingerprinter initialised (AWL size: 549) +2026-05-03 10:41:49.640 | INFO  | __main__:train:79 - Step 5: Loading datasets... +2026-05-03 10:41:49.687 | INFO  | src.training.dataset:__init__:71 - Loaded 7555 examples from data/processed/train.jsonl +2026-05-03 10:41:49.830 | INFO  | src.training.dataset:_add_synthetic:148 - Added 2125 synthetic augmentation examples +2026-05-03 10:41:49.830 | INFO  | src.training.dataset:__init__:77 - Total dataset size: 9680 examples +2026-05-03 10:41:49.852 | INFO  | src.training.dataset:__init__:85 - Loading pre-computed dataset from cache: data/cache/1356ff2104663316.pt +2026-05-03 10:41:51.191 | INFO  | src.training.dataset:__init__:87 - Loaded 9680 cached examples +2026-05-03 10:41:51.198 | INFO  | src.training.dataset:__init__:71 - Loaded 839 examples from data/processed/val.jsonl +2026-05-03 10:41:51.199 | INFO  | src.training.dataset:__init__:77 - Total dataset size: 839 examples +2026-05-03 10:41:51.201 | INFO  | src.training.dataset:__init__:85 - Loading pre-computed dataset from cache: data/cache/d6a64358c3ef403f.pt +2026-05-03 10:41:51.306 | INFO  | src.training.dataset:__init__:87 - Loaded 839 cached examples +2026-05-03 10:41:51.306 | INFO  | __main__:train:99 - Train: 9680 | Val: 839 +2026-05-03 10:41:51.306 | INFO  | __main__:train:122 - Using CE-only loss (aux models skipped to save memory) +2026-05-03 10:41:51.306 | INFO  | __main__:train:125 - Step 7: Creating training arguments... +2026-05-03 10:41:51.335 | INFO  | __main__:train:179 - Step 8: Creating trainer... +No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead. +2026-05-03 10:41:51.411 | INFO  | __main__:train:195 - Step 9: Starting training... +wandb: WARNING The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter. + 0%| | 0/909 [00:00', 'hub_private_repo': None, 'hub_always_push': False, 'hub_revision': None, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'liger_kernel_config': None, 'eval_use_gather_object': False, 'average_tokens_across_devices': False} +2026-05-03 10:41:51,798 INFO MainThread:5847 [wandb_config.py:__setitem__():155] [no run ID] config set model/num_parameters = 254360832 - > +2026-05-03 10:41:51,798 INFO MainThread:5847 [wandb_run.py:_config_callback():1415] config_cb model/num_parameters 254360832 None +2026-05-03 11:50:29,280 INFO wandb-AsyncioManager-main:5847 [service_client.py:_forward_responses():134] Reached EOF. +2026-05-03 11:50:29,281 INFO wandb-AsyncioManager-main:5847 [mailbox.py:close():155] Closing mailbox, abandoning 1 handles. +2026-05-03 11:50:29,488 ERROR wandb-AsyncioManager-main:5847 [asyncio_manager.py:fn_wrap_exceptions():184] Uncaught exception in run_soon callback. +Traceback (most recent call last): + File "/home/morpheuslord/.local/lib/python3.14/site-packages/wandb/sdk/lib/asyncio_manager.py", line 182, in fn_wrap_exceptions + await fn() + File "/home/morpheuslord/.local/lib/python3.14/site-packages/wandb/sdk/lib/service/service_client.py", line 45, in publish + await self._send_server_request(request) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/wandb/sdk/lib/service/service_client.py", line 87, in _send_server_request + await self._drain_writer() + File "/home/morpheuslord/.local/lib/python3.14/site-packages/wandb/sdk/lib/service/service_client.py", line 96, in _drain_writer + await self._writer.drain() + File "/usr/lib64/python3.14/asyncio/streams.py", line 386, in drain + await self._protocol._drain_helper() + File "/usr/lib64/python3.14/asyncio/streams.py", line 166, in _drain_helper + raise ConnectionResetError('Connection lost') +ConnectionResetError: Connection lost +2026-05-03 11:50:29,494 ERROR wandb-AsyncioManager-main:5847 [asyncio_manager.py:fn_wrap_exceptions():184] Uncaught exception in run_soon callback. +Traceback (most recent call last): + File "/home/morpheuslord/.local/lib/python3.14/site-packages/wandb/sdk/lib/asyncio_manager.py", line 182, in fn_wrap_exceptions + await fn() + File "/home/morpheuslord/.local/lib/python3.14/site-packages/wandb/sdk/lib/service/service_client.py", line 45, in publish + await self._send_server_request(request) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/wandb/sdk/lib/service/service_client.py", line 87, in _send_server_request + await self._drain_writer() + File "/home/morpheuslord/.local/lib/python3.14/site-packages/wandb/sdk/lib/service/service_client.py", line 96, in _drain_writer + await self._writer.drain() + File "/usr/lib64/python3.14/asyncio/streams.py", line 386, in drain + await self._protocol._drain_helper() + File "/usr/lib64/python3.14/asyncio/streams.py", line 166, in _drain_helper + raise ConnectionResetError('Connection lost') +ConnectionResetError: Connection lost +2026-05-03 11:50:29,496 ERROR wandb-AsyncioManager-main:5847 [asyncio_manager.py:fn_wrap_exceptions():184] Uncaught exception in run_soon callback. +Traceback (most recent call last): + File "/home/morpheuslord/.local/lib/python3.14/site-packages/wandb/sdk/lib/asyncio_manager.py", line 182, in fn_wrap_exceptions + await fn() + File "/home/morpheuslord/.local/lib/python3.14/site-packages/wandb/sdk/lib/service/service_client.py", line 45, in publish + await self._send_server_request(request) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/wandb/sdk/lib/service/service_client.py", line 87, in _send_server_request + await self._drain_writer() + File "/home/morpheuslord/.local/lib/python3.14/site-packages/wandb/sdk/lib/service/service_client.py", line 96, in _drain_writer + await self._writer.drain() + File "/usr/lib64/python3.14/asyncio/streams.py", line 386, in drain + await self._protocol._drain_helper() + File "/usr/lib64/python3.14/asyncio/streams.py", line 166, in _drain_helper + raise ConnectionResetError('Connection lost') +ConnectionResetError: Connection lost +2026-05-03 11:50:29,498 ERROR wandb-AsyncioManager-main:5847 [asyncio_manager.py:fn_wrap_exceptions():184] Uncaught exception in run_soon callback. +Traceback (most recent call last): + File "/home/morpheuslord/.local/lib/python3.14/site-packages/wandb/sdk/lib/asyncio_manager.py", line 182, in fn_wrap_exceptions + await fn() + File "/home/morpheuslord/.local/lib/python3.14/site-packages/wandb/sdk/lib/service/service_client.py", line 45, in publish + await self._send_server_request(request) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/wandb/sdk/lib/service/service_client.py", line 87, in _send_server_request + await self._drain_writer() + File "/home/morpheuslord/.local/lib/python3.14/site-packages/wandb/sdk/lib/service/service_client.py", line 96, in _drain_writer + await self._writer.drain() + File "/usr/lib64/python3.14/asyncio/streams.py", line 386, in drain + await self._protocol._drain_helper() + File "/usr/lib64/python3.14/asyncio/streams.py", line 166, in _drain_helper + raise ConnectionResetError('Connection lost') +ConnectionResetError: Connection lost diff --git a/wandb/run-20260503_120130-xzkygl93/files/config.yaml b/wandb/run-20260503_120130-xzkygl93/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c0cdc88eadad618b981120211b61c8a804c2c698 --- /dev/null +++ b/wandb/run-20260503_120130-xzkygl93/files/config.yaml @@ -0,0 +1,135 @@ +_wandb: + value: + cli_version: 0.26.1 + e: + aflgcds9dyfqbcgq38x74419uxijaxse: + args: + - --config + - configs/training_config.yaml + - --use-v2-loss + codePath: scripts/train.py + codePathLocal: scripts/train.py + email: morpheuslord@protonmail.com + executable: /usr/bin/python3 + host: bazzite + os: Linux-6.19.14-ogc1.1.fc44.x86_64-x86_64-with-glibc2.43 + program: /run/media/morpheuslord/Personal_Files/Projects/Rewriter/scripts/train.py + python: CPython 3.14.4 + root: /run/media/morpheuslord/Personal_Files/Projects/Rewriter + startedAt: "2026-05-03T06:31:30.240634Z" + writerId: aflgcds9dyfqbcgq38x74419uxijaxse + m: [] + python_version: 3.14.4 + t: + "1": + - 1 + - 5 + - 11 + - 33 + - 41 + - 49 + - 51 + - 53 + - 71 + - 75 + - 98 + "2": + - 1 + - 5 + - 11 + - 33 + - 41 + - 49 + - 51 + - 53 + - 71 + - 75 + - 98 + "3": + - 13 + - 16 + "4": 3.14.4 + "5": 0.26.1 + "6": 4.53.2 + "12": 0.26.1 + "13": linux-x86_64 +data: + value: + augment_synthetic: true + max_input_length: 128 + max_target_length: 128 + synthetic_ratio: 0.3 + test_path: data/processed/test.jsonl + train_path: data/processed/train.jsonl + val_path: data/processed/val.jsonl +generation: + value: + early_stopping: true + length_penalty: 1 + max_new_tokens: 512 + min_length: 10 + no_repeat_ngram_size: 3 + num_beams: 5 +human_pattern: + value: + classifier_path: checkpoints/human_pattern_classifier.pt + max_samples_per_source: 50000 + pretrain_batch_size: 512 + pretrain_epochs: 20 + pretrain_lr: 0.001 + shanegerami_path: data/raw/shanegerami/AI_Human.csv + starblasters_path: data/raw/starblasters8/data.parquet + target_auc: 0.88 +lora: + value: + lora_alpha: 16 + lora_dropout: 0.05 + r: 8 + target_modules: + - q + - v + - k + - o + - wi_0 + - wi_1 + - wo +loss: + value: + lambda_human_pattern: 0.4 + lambda_semantic: 0.5 + lambda_style: 0.3 + sem_model_name: all-mpnet-base-v2 +model: + value: + key: flan-t5-small + quantize: false + use_lora: true +training: + value: + bf16: true + dataloader_num_workers: 4 + eval_steps: 100 + evaluation_strategy: steps + fp16: false + gradient_accumulation_steps: 8 + greater_is_better: false + learning_rate: 0.0003 + load_best_model_at_end: true + logging_dir: logs/ + logging_steps: 25 + lr_scheduler_type: cosine + metric_for_best_model: eval_loss + num_train_epochs: 5 + output_dir: checkpoints/ + per_device_eval_batch_size: 8 + per_device_train_batch_size: 4 + push_to_hub: false + report_to: + - wandb + - tensorboard + save_steps: 100 + save_strategy: steps + save_total_limit: 3 + seed: 42 + warmup_ratio: 0.05 + weight_decay: 0.01 diff --git a/wandb/run-20260503_120130-xzkygl93/files/output.log b/wandb/run-20260503_120130-xzkygl93/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..f863b61074e4721e7e095db59002d7ccba70d70a --- /dev/null +++ b/wandb/run-20260503_120130-xzkygl93/files/output.log @@ -0,0 +1,22 @@ +2026-05-03 12:01:35.714 | INFO  | __main__:train:153 - Step 3: Setting up device (hybrid GPU mode)... +Traceback (most recent call last): + File "/run/media/morpheuslord/Personal_Files/Projects/Rewriter/scripts/train.py", line 356, in + train() + ~~~~~^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/click/core.py", line 1485, in __call__ + return self.main(*args, **kwargs) + ~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/click/core.py", line 1406, in main + rv = self.invoke(ctx) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/click/core.py", line 1269, in invoke + return ctx.invoke(self.callback, **ctx.params) + ~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/click/core.py", line 824, in invoke + return callback(*args, **kwargs) + File "/run/media/morpheuslord/Personal_Files/Projects/Rewriter/scripts/train.py", line 154, in train + device, gpu_info = _setup_device() + ~~~~~~~~~~~~~^^ + File "/run/media/morpheuslord/Personal_Files/Projects/Rewriter/scripts/train.py", line 53, in _setup_device + vram_total = torch.cuda.get_device_properties(0).total_mem // (1024 * 1024) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +AttributeError: 'torch._C._CudaDeviceProperties' object has no attribute 'total_mem'. Did you mean: 'total_memory'? diff --git a/wandb/run-20260503_120130-xzkygl93/files/requirements.txt b/wandb/run-20260503_120130-xzkygl93/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..0a9158bdf370f073be66c392b3c09dcdfbfed3be --- /dev/null +++ b/wandb/run-20260503_120130-xzkygl93/files/requirements.txt @@ -0,0 +1,351 @@ +Pygments==2.19.2 +pluggy==1.6.0 +iniconfig==2.3.0 +pytest==9.0.2 +sqlite-vec==0.1.6 +peewee==3.19.0 +boltons==21.0.0 +zipp==3.23.0 +wrapt==1.17.3 +tree-sitter-python==0.25.0 +tree-sitter-javascript==0.25.0 +tree-sitter-java==0.23.5 +tree-sitter-cpp==0.23.4 +tree-sitter-c==0.24.1 +tree-sitter==0.25.2 +tqdm==4.67.3 +tomli==2.0.2 +tabulate==0.10.0 +stevedore==5.7.0 +sniffio==1.3.1 +smmap==5.0.3 +shellingham==1.5.4 +semantic-version==2.10.0 +ruamel.yaml.clib==0.2.14 +ruamel.yaml==0.19.1 +python-multipart==0.0.22 +python-dotenv==1.2.2 +PyJWT==2.11.0 +protobuf==6.33.5 +opentelemetry-util-http==0.58b0 +networkx==3.6.1 +mdurl==0.1.2 +MarkupSafe==3.0.3 +jiter==0.13.0 +hyperframe==6.1.0 +httpx-sse==0.4.3 +hpack==4.1.0 +face==26.0.0 +exceptiongroup==1.2.2 +docstring_parser==0.17.0 +colorama==0.4.6 +bracex==2.6 +annotated-doc==0.0.4 +aiofiles==25.1.0 +wcmatch==8.5.2 +uvicorn==0.41.0 +starlette==0.52.1 +opentelemetry-proto==1.37.0 +markdown-it-py==4.0.0 +Jinja2==3.1.6 +importlib_metadata==8.7.1 +h2==4.3.0 +googleapis-common-protos==1.73.0 +glom==25.12.0 +gitdb==4.0.12 +click-option-group==0.5.9 +sse-starlette==3.3.2 +rich==14.3.3 +pydantic-settings==2.13.1 +opentelemetry-exporter-otlp-proto-common==1.37.0 +opentelemetry-api==1.37.0 +openai==2.26.0 +jsonschema==4.25.1 +GitPython==3.1.46 +typer==0.23.1 +opentelemetry-semantic-conventions==0.58b0 +bandit==1.9.4 +opentelemetry-sdk==1.37.0 +opentelemetry-instrumentation==0.58b0 +mcp==1.23.3 +agno==2.5.9 +opentelemetry-instrumentation-threading==0.58b0 +opentelemetry-instrumentation-requests==0.58b0 +opentelemetry-exporter-otlp-proto-http==1.37.0 +semgrep==1.154.0 +acvas==1.0.0 +Werkzeug==3.1.8 +websockets==13.1 +wcwidth==0.6.0 +sqlparse==0.5.5 +semver==3.0.4 +itsdangerous==2.2.0 +frida==17.9.1 +click==8.3.2 +blinker==1.9.0 +prompt_toolkit==3.0.52 +Flask==3.1.3 +delegator.py==0.1.1 +frida-tools==14.8.1 +cli_helpers==2.12.0 +litecli==1.17.1 +objection==1.12.4 +userpath==1.9.2 +platformdirs==4.9.4 +argcomplete==3.6.3 +pipx==1.11.1 +distlib==0.4.0 +filelock==3.25.2 +python-discovery==1.2.1 +virtualenv==21.2.0 +pyelftools==0.32 +cigam==0.0.3 +xmltodict==1.0.4 +apkutils2==1.0.0 +svgwrite==1.4.3 +rdflib==7.6.0 +python-louvain==0.16 +wheel==0.46.3 +pip==26.0.1 +threadpoolctl==3.6.0 +scipy==1.17.1 +joblib==1.5.3 +scikit-learn==1.8.0 +pandas==3.0.2 +seaborn==0.13.2 +text-unidecode==1.3 +fastjsonschema==2.21.2 +traitlets==5.14.3 +python-slugify==8.0.4 +bleach==6.3.0 +mdit-py-plugins==0.5.0 +kagglesdk==0.1.23 +jupyter_core==5.9.1 +nbformat==5.10.4 +jupytext==1.19.1 +kaggle==2.1.0 +xxhash==3.7.0 +pyarrow==24.0.0 +propcache==0.4.1 +multidict==6.7.1 +hf-xet==1.4.3 +fsspec==2026.2.0 +frozenlist==1.8.0 +dill==0.4.1 +aiohappyeyeballs==2.6.1 +yarl==1.23.0 +multiprocess==0.70.19 +aiosignal==1.4.0 +aiohttp==3.13.5 +datasets==4.8.5 +torchaudio==2.11.0 +nvidia-cusparselt-cu13==0.8.0 +mpmath==1.3.0 +cuda-toolkit==13.0.2 +wasabi==1.1.3 +uvloop==0.22.1 +triton==3.6.0 +toml==0.10.2 +tensorboard-data-server==0.7.2 +sympy==1.14.0 +spacy-loggers==1.0.5 +spacy-legacy==3.0.12 +spacy-alignments==0.9.1 +smart_open==7.6.0 +setuptools==81.0.0 +sentencepiece==0.2.1 +safetensors==0.7.0 +pyspellchecker==0.9.0 +pyphen==0.17.2 +portalocker==3.2.0 +nvidia-nvtx==13.0.85 +nvidia-nvshmem-cu13==3.4.5 +nvidia-nvjitlink==13.0.88 +nvidia-nccl-cu13==2.28.9 +nvidia-curand==10.4.0.35 +nvidia-cufile==1.15.1.6 +nvidia-cuda-runtime==13.0.96 +nvidia-cuda-nvrtc==13.0.88 +nvidia-cuda-cupti==13.0.85 +nvidia-cublas==13.1.0.3 +nltk==3.9.4 +murmurhash==1.0.15 +msgpack==1.1.2 +Markdown==3.10.2 +loguru==0.7.3 +locate==1.1.1 +langcodes==3.5.1 +httptools==0.7.1 +grpcio==1.80.0 +ftfy==6.3.1 +faiss-cpu==1.13.2 +cymem==2.0.13 +cuda-pathfinder==1.5.4 +confection==1.3.3 +cloudpathlib==0.24.0 +catalogue==2.0.10 +blis==1.3.3 +absl-py==2.4.0 +wordfreq==3.1.1 +watchfiles==1.1.1 +textstat==0.7.13 +tensorboard==2.20.0 +srsly==2.5.3 +sacrebleu==2.6.0 +rouge_score==0.1.2 +preshed==3.0.13 +nvidia-cusparse==12.6.3.3 +nvidia-cufft==12.0.0.61 +nvidia-cudnn-cu13==9.19.0.56 +language_tool_python==3.3.0 +huggingface_hub==0.36.2 +cuda-bindings==13.2.0 +wandb==0.26.1 +tokenizers==0.21.4 +thinc==8.3.13 +nvidia-cusolver==12.0.4.66 +fastapi==0.136.1 +weasel==1.0.0 +transformers==4.53.2 +torch==2.11.0 +spacy==3.8.13 +torchvision==0.26.0 +spacy-transformers==1.4.0 +sentence-transformers==5.4.1 +optimum==2.1.0 +errant==3.0.0 +bitsandbytes==0.49.2 +bert-score==0.3.13 +accelerate==1.13.0 +peft==0.19.1 +en_core_web_sm==3.8.0 +tree-sitter-zig==1.1.2 +tree-sitter-verilog==1.0.3 +tree-sitter-typescript==0.23.2 +tree-sitter-swift==0.0.1 +tree-sitter-scala==0.26.0 +tree-sitter-rust==0.24.2 +tree-sitter-ruby==0.23.1 +tree-sitter-powershell==0.26.3 +tree-sitter-php==0.24.1 +tree-sitter-objc==3.0.2 +tree-sitter-lua==0.5.0 +tree-sitter-kotlin==1.1.0 +tree-sitter-julia==0.23.1 +tree-sitter-go==0.25.0 +tree-sitter-elixir==0.3.5 +tree-sitter-c-sharp==0.23.5 +graphifyy==0.6.5 +numpy==2.4.4 +kiwisolver==1.5.0 +fonttools==4.62.1 +cycler==0.12.1 +contourpy==1.3.3 +matplotlib==3.10.9 +Brlapi==0.8.7 +PyGObject==3.56.2 +cffi==2.0.0 +cryptography==46.0.7 +dbus-python==1.4.0 +evdev==1.9.3 +gbinder-python==1.3.0 +lxml==6.0.2 +perf==0.1 +pillow==12.2.0 +psutil==7.2.2 +PyAudio==0.2.13 +pycairo==1.28.0 +pycups==2.0.4 +pydantic_core==2.41.5 +pyscard==2.2.2 +PyYAML==6.0.3 +RapidFuzz==3.14.3 +regex==2026.2.28 +rpds-py==0.29.0 +rpm==6.0.1 +selinux==3.10 +setools==4.6.0 +setproctitle==1.3.7 +systemd-python==235 +PySocks==1.7.1 +annotated-types==0.7.0 +anyio==4.12.1 +attrs==25.4.0 +beautifulsoup4==4.14.3 +boto3==1.42.84 +botocore==1.42.84 +certifi==2026.1.4 +charset-normalizer==3.4.4 +click==8.3.3 +cockpit==361 +configobj==5.0.9 +cupshelpers==1.0 +dasbus==1.7 +distro==1.9.0 +fido2==2.0.0 +file-magic==0.4.0 +filelock==3.15.4 +fw-fanctrl==1.0.2 +h11==0.16.0 +html2text==2025.4.15 +httpcore==1.0.9 +httpx==0.28.1 +icoextract==0.2.0 +idna==3.11 +input-remapper==2.2.0 +jaraco.classes==3.4.0 +jaraco.context==6.1.0 +jaraco.functools==4.3.0 +jeepney==0.9.0 +jmespath==1.0.1 +jsonschema==4.23.0 +jsonschema-specifications==2024.10.1 +keyring==25.7.0 +langtable==0.0.70 +libevdev==0.13.1 +louis==3.33.0 +lutris==0.5.22 +moddb==0.12.0 +more-itertools==10.5.0 +nftables==0.1 +olefile==0.47 +packaging==25.0 +pefile==2024.8.26 +pexpect==4.9.0 +pip==26.0.1 +protobuf==3.19.6 +ptyprocess==0.7.0 +pycparser==2.22 +pydantic==2.12.5 +pydbus==0.6.0 +pyenchant==3.3.0 +pygdbmi==0.11.0.0 +pyinotify==0.9.6 +pyparsing==3.1.2 +pypresence==4.3.0 +python-dateutil==2.9.0.post0 +python-linux-procfs==0.7.4 +python-pskc==1.4 +python-xlib==0.33 +pyudev==0.24.4 +pyxdg==0.28 +referencing==0.36.2 +requests==2.32.5 +requests-file==3.0.0 +s3transfer==0.16.0 +SecretStorage==3.5.0 +sentry-sdk==2.48.0 +sepolicy==3.10 +setroubleshoot==3.3.36 +setuptools==80.10.2 +shtab==1.7.2 +six==1.17.0 +sos==4.11.1 +soupsieve==2.8.3 +tldextract==5.3.0 +typing_extensions==4.15.0 +typing-inspection==0.4.2 +udica==0.2.8 +umu-launcher==1.4.0 +urllib3==2.6.3 +yubikey-manager==5.9.1 diff --git a/wandb/run-20260503_120130-xzkygl93/files/wandb-metadata.json b/wandb/run-20260503_120130-xzkygl93/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..748ee84ece1b936e967daec1cbe5a8fd3cfce392 --- /dev/null +++ b/wandb/run-20260503_120130-xzkygl93/files/wandb-metadata.json @@ -0,0 +1,18 @@ +{ + "os": "Linux-6.19.14-ogc1.1.fc44.x86_64-x86_64-with-glibc2.43", + "python": "CPython 3.14.4", + "startedAt": "2026-05-03T06:31:30.240634Z", + "args": [ + "--config", + "configs/training_config.yaml", + "--use-v2-loss" + ], + "program": "/run/media/morpheuslord/Personal_Files/Projects/Rewriter/scripts/train.py", + "codePath": "scripts/train.py", + "codePathLocal": "scripts/train.py", + "email": "morpheuslord@protonmail.com", + "root": "/run/media/morpheuslord/Personal_Files/Projects/Rewriter", + "host": "bazzite", + "executable": "/usr/bin/python3", + "writerId": "aflgcds9dyfqbcgq38x74419uxijaxse" +} \ No newline at end of file diff --git a/wandb/run-20260503_120130-xzkygl93/files/wandb-summary.json b/wandb/run-20260503_120130-xzkygl93/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..b0a620d0c1047a4dd8a400939b6da246ed8063a7 --- /dev/null +++ b/wandb/run-20260503_120130-xzkygl93/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":0},"_runtime":0} \ No newline at end of file diff --git a/wandb/run-20260503_120130-xzkygl93/logs/debug-core.log b/wandb/run-20260503_120130-xzkygl93/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..1474fcc43295e9fbc7d25603b2f4c090bd1d9dcd --- /dev/null +++ b/wandb/run-20260503_120130-xzkygl93/logs/debug-core.log @@ -0,0 +1,15 @@ +{"time":"2026-05-03T12:01:33.448484097+05:30","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpr_hc1tiy/port-13304.txt","pid":13304,"detached":false,"idle-timeout":600000000000,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-05-03T12:01:33.454956879+05:30","level":"INFO","msg":"server: will exit if parent process dies","ppid":13304} +{"time":"2026-05-03T12:01:33.454802392+05:30","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-13304-13776-3200678995/socket","Net":"unix"}} +{"time":"2026-05-03T12:01:33.549045967+05:30","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-05-03T12:01:33.67164572+05:30","level":"INFO","msg":"handleInformInit: received","streamId":"xzkygl93","id":"1(@)"} +{"time":"2026-05-03T12:01:34.770640101+05:30","level":"INFO","msg":"handleInformInit: stream started","streamId":"xzkygl93","id":"1(@)"} +{"time":"2026-05-03T12:01:35.720061556+05:30","level":"ERROR","msg":"processOutgoingData: flush error","error":"write unix /tmp/wandb-13304-13776-3200678995/socket->@: write: broken pipe","id":"1(@)"} +{"time":"2026-05-03T12:01:35.721210867+05:30","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-05-03T12:01:35.721241223+05:30","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-05-03T12:01:35.721253897+05:30","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-05-03T12:01:35.721315712+05:30","level":"INFO","msg":"server is shutting down"} +{"time":"2026-05-03T12:01:35.721348112+05:30","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-13304-13776-3200678995/socket","Net":"unix"}} +{"time":"2026-05-03T12:01:37.869878118+05:30","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-05-03T12:01:37.869913644+05:30","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-05-03T12:01:37.869933761+05:30","level":"INFO","msg":"server is closed"} diff --git a/wandb/run-20260503_120130-xzkygl93/logs/debug-internal.log b/wandb/run-20260503_120130-xzkygl93/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..45015539d1f3bed298a02b0bdf1ad73d0862a823 --- /dev/null +++ b/wandb/run-20260503_120130-xzkygl93/logs/debug-internal.log @@ -0,0 +1,16 @@ +{"time":"2026-05-03T12:01:33.673857782+05:30","level":"INFO","msg":"wandb-core"} +{"time":"2026-05-03T12:01:33.681619473+05:30","level":"INFO","msg":"stream: starting","core version":"0.26.1"} +{"time":"2026-05-03T12:01:34.765086574+05:30","level":"INFO","msg":"stream: created new stream","id":"xzkygl93"} +{"time":"2026-05-03T12:01:34.766037066+05:30","level":"INFO","msg":"handler: started"} +{"time":"2026-05-03T12:01:34.770027736+05:30","level":"INFO","msg":"stream: started"} +{"time":"2026-05-03T12:01:34.770225492+05:30","level":"INFO","msg":"writer: started","stream_id":"xzkygl93"} +{"time":"2026-05-03T12:01:34.770274403+05:30","level":"INFO","msg":"sender: started"} +{"time":"2026-05-03T12:01:35.72039248+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"console_offset":0,"console_lines":1} +{"time":"2026-05-03T12:01:36.681875359+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-03T12:01:37.400288982+05:30","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-05-03T12:01:37.400554725+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"console_offset":0,"console_lines":22,"uploaded_len":5,"complete":true,"exit_code":1} +{"time":"2026-05-03T12:01:37.809635567+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-03T12:01:37.80983677+05:30","level":"INFO","msg":"stream: finishing up"} +{"time":"2026-05-03T12:01:37.809909094+05:30","level":"INFO","msg":"handler: closed"} +{"time":"2026-05-03T12:01:37.810705741+05:30","level":"INFO","msg":"sender: closed"} +{"time":"2026-05-03T12:01:37.810774459+05:30","level":"INFO","msg":"stream: all finished"} diff --git a/wandb/run-20260503_120130-xzkygl93/logs/debug.log b/wandb/run-20260503_120130-xzkygl93/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..a53bbdbfcd14fc45b31b95d82fa5859e25c18ea9 --- /dev/null +++ b/wandb/run-20260503_120130-xzkygl93/logs/debug.log @@ -0,0 +1,21 @@ +2026-05-03 12:01:30,299 INFO MainThread:13304 [wandb_setup.py:_flush():81] Current SDK version is 0.26.1 +2026-05-03 12:01:30,300 INFO MainThread:13304 [wandb_setup.py:_flush():81] Configure stats pid to 13304 +2026-05-03 12:01:30,301 INFO MainThread:13304 [wandb_setup.py:_flush():81] Loading settings from environment variables +2026-05-03 12:01:30,304 INFO MainThread:13304 [wandb_init.py:setup_run_log_directory():723] Logging user logs to /run/media/morpheuslord/Personal_Files/Projects/Rewriter/wandb/run-20260503_120130-xzkygl93/logs/debug.log +2026-05-03 12:01:30,305 INFO MainThread:13304 [wandb_init.py:setup_run_log_directory():724] Logging internal logs to /run/media/morpheuslord/Personal_Files/Projects/Rewriter/wandb/run-20260503_120130-xzkygl93/logs/debug-internal.log +2026-05-03 12:01:30,307 INFO MainThread:13304 [wandb_init.py:init():850] calling init triggers +2026-05-03 12:01:30,308 INFO MainThread:13304 [wandb_init.py:init():855] wandb.init called with sweep_config: {} +config: {'model': {'key': 'flan-t5-small', 'quantize': False, 'use_lora': True}, 'lora': {'r': 8, 'lora_alpha': 16, 'lora_dropout': 0.05, 'target_modules': ['q', 'v', 'k', 'o', 'wi_0', 'wi_1', 'wo']}, 'data': {'train_path': 'data/processed/train.jsonl', 'val_path': 'data/processed/val.jsonl', 'test_path': 'data/processed/test.jsonl', 'max_input_length': 128, 'max_target_length': 128, 'augment_synthetic': True, 'synthetic_ratio': 0.3}, 'training': {'output_dir': 'checkpoints/', 'num_train_epochs': 5, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8, 'gradient_accumulation_steps': 8, 'learning_rate': 0.0003, 'lr_scheduler_type': 'cosine', 'warmup_ratio': 0.05, 'weight_decay': 0.01, 'fp16': False, 'bf16': True, 'evaluation_strategy': 'steps', 'eval_steps': 100, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 3, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_loss', 'greater_is_better': False, 'logging_dir': 'logs/', 'logging_steps': 25, 'report_to': ['wandb', 'tensorboard'], 'dataloader_num_workers': 4, 'seed': 42, 'push_to_hub': False}, 'loss': {'lambda_style': 0.3, 'lambda_semantic': 0.5, 'lambda_human_pattern': 0.4, 'sem_model_name': 'all-mpnet-base-v2'}, 'generation': {'num_beams': 5, 'length_penalty': 1.0, 'no_repeat_ngram_size': 3, 'min_length': 10, 'max_new_tokens': 512, 'early_stopping': True}, 'human_pattern': {'classifier_path': 'checkpoints/human_pattern_classifier.pt', 'shanegerami_path': 'data/raw/shanegerami/AI_Human.csv', 'starblasters_path': 'data/raw/starblasters8/data.parquet', 'max_samples_per_source': 50000, 'pretrain_epochs': 20, 'pretrain_lr': 0.001, 'pretrain_batch_size': 512, 'target_auc': 0.88}, '_wandb': {}} +2026-05-03 12:01:30,309 INFO MainThread:13304 [wandb_init.py:init():898] starting backend +2026-05-03 12:01:33,550 INFO MainThread:13304 [wandb_init.py:init():913] sending inform_init request +2026-05-03 12:01:34,773 INFO MainThread:13304 [wandb_init.py:init():918] backend started and connected +2026-05-03 12:01:34,785 INFO MainThread:13304 [wandb_init.py:init():988] updated telemetry +2026-05-03 12:01:34,803 INFO MainThread:13304 [wandb_init.py:init():1011] communicating run to backend with 90.0 second timeout +2026-05-03 12:01:35,556 INFO MainThread:13304 [wandb_init.py:init():1056] starting run threads in backend +2026-05-03 12:01:35,710 INFO MainThread:13304 [wandb_run.py:_console_start():2554] atexit reg +2026-05-03 12:01:35,711 INFO MainThread:13304 [wandb_run.py:_redirect():2403] redirect: wrap_raw +2026-05-03 12:01:35,711 INFO MainThread:13304 [wandb_run.py:_redirect():2472] Wrapping output streams. +2026-05-03 12:01:35,711 INFO MainThread:13304 [wandb_run.py:_redirect():2495] Redirects installed. +2026-05-03 12:01:35,713 INFO MainThread:13304 [wandb_init.py:init():1094] run started, returning control to user process +2026-05-03 12:01:35,718 INFO wandb-AsyncioManager-main:13304 [service_client.py:_forward_responses():134] Reached EOF. +2026-05-03 12:01:35,718 INFO wandb-AsyncioManager-main:13304 [mailbox.py:close():155] Closing mailbox, abandoning 3 handles. diff --git a/wandb/run-20260503_120403-cbb6slr5/files/config.yaml b/wandb/run-20260503_120403-cbb6slr5/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7da1125cbdf2c7d1270970ed1afb853f841d0bcc --- /dev/null +++ b/wandb/run-20260503_120403-cbb6slr5/files/config.yaml @@ -0,0 +1,661 @@ +_name_or_path: + value: google/flan-t5-small +_wandb: + value: + cli_version: 0.26.1 + e: + wdl3stk45pm5fr1wq23xw6ggjxanoild: + args: + - --config + - configs/training_config.yaml + - --use-v2-loss + codePath: scripts/train.py + codePathLocal: scripts/train.py + cpu_count: 8 + cpu_count_logical: 16 + cudaVersion: "13.2" + disk: + /: + total: "65773568" + used: "65773568" + email: morpheuslord@protonmail.com + executable: /usr/bin/python3 + gpu: NVIDIA GeForce RTX 3050 Laptop GPU + gpu_count: 1 + gpu_nvidia: + - architecture: Ampere + cudaCores: 2048 + memoryTotal: "4294967296" + name: NVIDIA GeForce RTX 3050 Laptop GPU + uuid: GPU-861554d7-d187-39e9-e77c-881f0287b963 + host: bazzite + memory: + total: "32939671552" + os: Linux-6.19.14-ogc1.1.fc44.x86_64-x86_64-with-glibc2.43 + program: /run/media/morpheuslord/Personal_Files/Projects/Rewriter/scripts/train.py + python: CPython 3.14.4 + root: /run/media/morpheuslord/Personal_Files/Projects/Rewriter + startedAt: "2026-05-03T06:34:03.241358Z" + writerId: wdl3stk45pm5fr1wq23xw6ggjxanoild + m: + - "1": train/global_step + "6": + - 3 + "7": [] + - "2": '*' + "5": 1 + "6": + - 1 + "7": [] + python_version: 3.14.4 + t: + "1": + - 1 + - 5 + - 11 + - 33 + - 41 + - 49 + - 51 + - 53 + - 71 + - 75 + - 98 + "2": + - 1 + - 5 + - 11 + - 33 + - 41 + - 49 + - 51 + - 53 + - 71 + - 75 + - 98 + "3": + - 7 + - 13 + - 16 + - 19 + - 66 + "4": 3.14.4 + "5": 0.26.1 + "6": 4.53.2 + "9": + "1": transformers_trainer + "12": 0.26.1 + "13": linux-x86_64 +accelerator_config: + value: + dispatch_batches: null + even_batches: true + gradient_accumulation_kwargs: null + non_blocking: false + split_batches: false + use_seedable_sampler: true +adafactor: + value: false +adam_beta1: + value: 0.9 +adam_beta2: + value: 0.999 +adam_epsilon: + value: 1e-08 +add_cross_attention: + value: false +architectures: + value: + - T5ForConditionalGeneration +auto_find_batch_size: + value: false +average_tokens_across_devices: + value: false +bad_words_ids: + value: null +batch_eval_metrics: + value: false +begin_suppress_tokens: + value: null +bf16: + value: true +bf16_full_eval: + value: false +bos_token_id: + value: null +chunk_size_feed_forward: + value: 0 +classifier_dropout: + value: 0 +cross_attention_hidden_size: + value: null +d_ff: + value: 1024 +d_kv: + value: 64 +d_model: + value: 512 +data: + value: + augment_synthetic: true + max_input_length: 128 + max_target_length: 128 + synthetic_ratio: 0.3 + test_path: data/processed/test.jsonl + train_path: data/processed/train.jsonl + val_path: data/processed/val.jsonl +data_seed: + value: null +dataloader_drop_last: + value: false +dataloader_num_workers: + value: 4 +dataloader_persistent_workers: + value: false +dataloader_pin_memory: + value: true +dataloader_prefetch_factor: + value: null +ddp_backend: + value: null +ddp_broadcast_buffers: + value: null +ddp_bucket_cap_mb: + value: null +ddp_find_unused_parameters: + value: null +ddp_timeout: + value: 1800 +debug: + value: [] +decoder_start_token_id: + value: 0 +deepspeed: + value: null +dense_act_fn: + value: gelu_new +disable_tqdm: + value: false +diversity_penalty: + value: 0 +do_eval: + value: true +do_predict: + value: false +do_sample: + value: false +do_train: + value: false +dropout_rate: + value: 0.1 +early_stopping: + value: false +encoder_no_repeat_ngram_size: + value: 0 +eos_token_id: + value: 1 +eval_accumulation_steps: + value: null +eval_delay: + value: 0 +eval_do_concat_batches: + value: true +eval_on_start: + value: false +eval_steps: + value: 100 +eval_strategy: + value: steps +eval_use_gather_object: + value: false +exponential_decay_length_penalty: + value: null +feed_forward_proj: + value: gated-gelu +finetuning_task: + value: null +forced_bos_token_id: + value: null +forced_eos_token_id: + value: null +fp16: + value: false +fp16_backend: + value: auto +fp16_full_eval: + value: false +fp16_opt_level: + value: O1 +fsdp: + value: [] +fsdp_config: + value: + min_num_params: 0 + xla: false + xla_fsdp_grad_ckpt: false + xla_fsdp_v2: false +fsdp_min_num_params: + value: 0 +fsdp_transformer_layer_cls_to_wrap: + value: null +full_determinism: + value: false +generation: + value: + early_stopping: true + length_penalty: 1 + max_new_tokens: 512 + min_length: 10 + no_repeat_ngram_size: 3 + num_beams: 5 +gradient_accumulation_steps: + value: 8 +gradient_checkpointing: + value: false +gradient_checkpointing_kwargs: + value: null +greater_is_better: + value: false +group_by_length: + value: false +half_precision_backend: + value: auto +hub_always_push: + value: false +hub_model_id: + value: null +hub_private_repo: + value: null +hub_revision: + value: null +hub_strategy: + value: every_save +hub_token: + value: +human_pattern: + value: + classifier_path: checkpoints/human_pattern_classifier.pt + max_samples_per_source: 50000 + pretrain_batch_size: 512 + pretrain_epochs: 20 + pretrain_lr: 0.001 + shanegerami_path: data/raw/shanegerami/AI_Human.csv + starblasters_path: data/raw/starblasters8/data.parquet + target_auc: 0.88 +id2label: + value: + "0": LABEL_0 + "1": LABEL_1 +ignore_data_skip: + value: false +include_for_metrics: + value: [] +include_inputs_for_metrics: + value: false +include_num_input_tokens_seen: + value: false +include_tokens_per_second: + value: false +initializer_factor: + value: 1 +is_decoder: + value: false +is_encoder_decoder: + value: true +is_gated_act: + value: true +jit_mode_eval: + value: false +label_names: + value: null +label_smoothing_factor: + value: 0 +label2id: + value: + LABEL_0: 0 + LABEL_1: 1 +layer_norm_epsilon: + value: 1e-06 +learning_rate: + value: 0.0003 +length_column_name: + value: length +length_penalty: + value: 1 +liger_kernel_config: + value: null +load_best_model_at_end: + value: true +local_rank: + value: 0 +log_level: + value: passive +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: logs/ +logging_first_step: + value: false +logging_nan_inf_filter: + value: true +logging_steps: + value: 25 +logging_strategy: + value: steps +lora: + value: + lora_alpha: 16 + lora_dropout: 0.05 + r: 8 + target_modules: + - q + - v + - k + - o + - wi_0 + - wi_1 + - wo +loss: + value: + lambda_human_pattern: 0.4 + lambda_semantic: 0.5 + lambda_style: 0.3 + sem_model_name: all-mpnet-base-v2 +lr_scheduler_type: + value: cosine +max_grad_norm: + value: 1 +max_length: + value: 20 +max_steps: + value: -1 +metric_for_best_model: + value: eval_loss +min_length: + value: 0 +model: + value: + key: flan-t5-small + quantize: false + use_lora: true +model/num_parameters: + value: 78239104 +model_type: + value: t5 +mp_parameters: + value: "" +n_positions: + value: 512 +neftune_noise_alpha: + value: null +no_cuda: + value: false +no_repeat_ngram_size: + value: 0 +num_beam_groups: + value: 1 +num_beams: + value: 1 +num_decoder_layers: + value: 8 +num_heads: + value: 6 +num_layers: + value: 8 +num_return_sequences: + value: 1 +num_train_epochs: + value: 5 +optim: + value: adamw_torch +optim_args: + value: null +optim_target_modules: + value: null +output_attentions: + value: false +output_dir: + value: checkpoints/ +output_hidden_states: + value: false +output_past: + value: true +output_scores: + value: false +overwrite_output_dir: + value: false +pad_token_id: + value: 0 +past_index: + value: -1 +peft_config: + value: + default: + alora_invocation_tokens: null + arrow_config: null + auto_mapping: null + base_model_name_or_path: google/flan-t5-small + bias: none + corda_config: null + ensure_weight_tying: false + eva_config: null + exclude_modules: null + fan_in_fan_out: false + inference_mode: false + init_lora_weights: true + layer_replication: null + layers_pattern: null + layers_to_transform: null + lora_alpha: 16 + lora_bias: false + lora_dropout: 0.05 + lora_ga_config: null + megatron_config: null + megatron_core: megatron.core + modules_to_save: null + peft_type: LORA + peft_version: 0.19.1 + qalora_group_size: 16 + r: 8 + revision: null + runtime_config: + ephemeral_gpu_offload: false + target_modules: + - wo + - k + - wi_0 + - v + - q + - o + - wi_1 + target_parameters: null + task_type: SEQ_2_SEQ_LM + trainable_token_indices: null + use_bdlora: null + use_dora: false + use_qalora: false + use_rslora: false +per_device_eval_batch_size: + value: 8 +per_device_train_batch_size: + value: 4 +per_gpu_eval_batch_size: + value: null +per_gpu_train_batch_size: + value: null +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +push_to_hub: + value: false +push_to_hub_model_id: + value: null +push_to_hub_organization: + value: null +push_to_hub_token: + value: +ray_scope: + value: last +relative_attention_max_distance: + value: 128 +relative_attention_num_buckets: + value: 32 +remove_invalid_values: + value: false +remove_unused_columns: + value: false +repetition_penalty: + value: 1 +report_to: + value: + - wandb + - tensorboard +restore_callback_states_from_checkpoint: + value: false +resume_from_checkpoint: + value: null +return_dict: + value: true +return_dict_in_generate: + value: false +run_name: + value: checkpoints/ +save_on_each_node: + value: false +save_only_model: + value: false +save_safetensors: + value: true +save_steps: + value: 100 +save_strategy: + value: steps +save_total_limit: + value: 3 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +suppress_tokens: + value: null +task_specific_params: + value: + summarization: + early_stopping: true + length_penalty: 2 + max_length: 200 + min_length: 30 + no_repeat_ngram_size: 3 + num_beams: 4 + prefix: 'summarize: ' + translation_en_to_de: + early_stopping: true + max_length: 300 + num_beams: 4 + prefix: 'translate English to German: ' + translation_en_to_fr: + early_stopping: true + max_length: 300 + num_beams: 4 + prefix: 'translate English to French: ' + translation_en_to_ro: + early_stopping: true + max_length: 300 + num_beams: 4 + prefix: 'translate English to Romanian: ' +temperature: + value: 1 +tf_legacy_loss: + value: false +tf32: + value: null +tie_encoder_decoder: + value: false +tie_word_embeddings: + value: false +tokenizer_class: + value: null +top_k: + value: 50 +top_p: + value: 1 +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_dtype: + value: bfloat16 +torch_empty_cache_steps: + value: null +torchdynamo: + value: null +torchscript: + value: false +tpu_metrics_debug: + value: false +tpu_num_cores: + value: null +training: + value: + bf16: true + dataloader_num_workers: 4 + eval_steps: 100 + evaluation_strategy: steps + fp16: false + gradient_accumulation_steps: 8 + greater_is_better: false + learning_rate: 0.0003 + load_best_model_at_end: true + logging_dir: logs/ + logging_steps: 25 + lr_scheduler_type: cosine + metric_for_best_model: eval_loss + num_train_epochs: 5 + output_dir: checkpoints/ + per_device_eval_batch_size: 8 + per_device_train_batch_size: 4 + push_to_hub: false + report_to: + - wandb + - tensorboard + save_steps: 100 + save_strategy: steps + save_total_limit: 3 + seed: 42 + warmup_ratio: 0.05 + weight_decay: 0.01 +transformers_version: + value: 4.53.2 +typical_p: + value: 1 +use_bfloat16: + value: false +use_cache: + value: true +use_cpu: + value: false +use_ipex: + value: false +use_legacy_prediction_loop: + value: false +use_liger_kernel: + value: false +use_mps_device: + value: false +vocab_size: + value: 32128 +warmup_ratio: + value: 0.05 +warmup_steps: + value: 0 +weight_decay: + value: 0.01 diff --git a/wandb/run-20260503_120403-cbb6slr5/files/output.log b/wandb/run-20260503_120403-cbb6slr5/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..fb523b474f02a666fd25a5d3d7b0ec1a1b305e6b --- /dev/null +++ b/wandb/run-20260503_120403-cbb6slr5/files/output.log @@ -0,0 +1,103 @@ +2026-05-03 12:04:05.126 | INFO  | __main__:train:153 - Step 3: Setting up device (hybrid GPU mode)... +2026-05-03 12:04:05.127 | INFO  | __main__:_setup_device:61 - GPU: NVIDIA GeForce RTX 3050 Laptop GPU | VRAM: 0MB used / 3770MB total (3770MB free) | Compute: (8, 6) +2026-05-03 12:04:05.127 | INFO  | __main__:_setup_device:74 - Hybrid GPU mode: capped PyTorch VRAM to 85% (~3204MB), leaving room for system +2026-05-03 12:04:05.127 | INFO  | __main__:train:157 - Step 4: Loading model and tokenizer... +2026-05-03 12:04:05.127 | INFO  | src.model.base_model:load_model_and_tokenizer:57 - Loading model: google/flan-t5-small (seq2seq=True, quantize=False, lora=True) +tokenizer_config.json: 2.54kB [00:00, 8.57MB/s] +spiece.model: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 792k/792k [00:02<00:00, 302kB/s] +tokenizer.json: 2.42MB [00:00, 12.0MB/s] +special_tokens_map.json: 2.20kB [00:00, 5.18MB/s] +config.json: 1.40kB [00:00, 4.63MB/s] +model.safetensors: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 308M/308M [00:49<00:00, 6.23MB/s] +generation_config.json: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 147/147 [00:00<00:00, 988kB/s] +2026-05-03 12:05:03.069 | INFO  | src.model.base_model:load_model_and_tokenizer:101 - Model loaded on cuda with dtype torch.bfloat16 +2026-05-03 12:05:04.628 | INFO  | src.model.base_model:load_model_and_tokenizer:130 - LoRA applied: 1,277,952 trainable params / 78,239,104 total (1.63%) +2026-05-03 12:05:04.629 | INFO  | __main__:train:173 - Applying torch.compile(mode='reduce-overhead')... +2026-05-03 12:05:04.632 | INFO  | __main__:train:175 - ✓ torch.compile applied — first few steps will be slower (compiling) +2026-05-03 12:05:04.632 | INFO  | __main__:train:180 - Step 5: Creating style fingerprinter... +2026-05-03 12:05:05.032 | INFO  | src.style.fingerprinter:__init__:100 - StyleFingerprinter initialised (AWL size: 549) +2026-05-03 12:05:05.032 | INFO  | __main__:train:187 - Step 6: Loading datasets... +2026-05-03 12:05:05.079 | INFO  | src.training.dataset:__init__:71 - Loaded 7555 examples from data/processed/train.jsonl +2026-05-03 12:05:05.220 | INFO  | src.training.dataset:_add_synthetic:148 - Added 2125 synthetic augmentation examples +2026-05-03 12:05:05.220 | INFO  | src.training.dataset:__init__:77 - Total dataset size: 9680 examples +2026-05-03 12:05:05.235 | INFO  | src.training.dataset:_precompute_all:154 - Pre-computing tokenisation and style vectors for all examples... +2026-05-03 12:05:53.677 | INFO  | src.training.dataset:_precompute_all:201 -  Pre-computed: 2000/9680 +2026-05-03 12:06:40.847 | INFO  | src.training.dataset:_precompute_all:201 -  Pre-computed: 4000/9680 +2026-05-03 12:07:27.735 | INFO  | src.training.dataset:_precompute_all:201 -  Pre-computed: 6000/9680 +2026-05-03 12:08:04.442 | INFO  | src.training.dataset:_precompute_all:201 -  Pre-computed: 8000/9680 +2026-05-03 12:08:06.207 | INFO  | src.training.dataset:_precompute_all:203 - Pre-computation complete (7231 unique style vectors) +2026-05-03 12:08:07.620 | INFO  | src.training.dataset:__init__:93 - Saved pre-computed dataset to cache: data/cache/1356ff2104663316.pt +2026-05-03 12:08:07.631 | INFO  | src.training.dataset:__init__:71 - Loaded 839 examples from data/processed/val.jsonl +2026-05-03 12:08:07.631 | INFO  | src.training.dataset:__init__:77 - Total dataset size: 839 examples +2026-05-03 12:08:07.633 | INFO  | src.training.dataset:_precompute_all:154 - Pre-computing tokenisation and style vectors for all examples... +2026-05-03 12:08:27.352 | INFO  | src.training.dataset:_precompute_all:203 - Pre-computation complete (833 unique style vectors) +2026-05-03 12:08:27.476 | INFO  | src.training.dataset:__init__:93 - Saved pre-computed dataset to cache: data/cache/d6a64358c3ef403f.pt +2026-05-03 12:08:27.476 | INFO  | __main__:train:207 - Train: 9680 | Val: 839 +2026-05-03 12:08:27.832 | INFO  | __main__:train:235 - Using CE-only loss (aux models skipped to save memory) +2026-05-03 12:08:27.832 | INFO  | __main__:train:238 - Step 8: Creating training arguments... +2026-05-03 12:08:27.832 | INFO  | __main__:train:246 - Using BF16 (Ampere+ GPU) +2026-05-03 12:08:27.833 | INFO  | __main__:_auto_batch_size:115 - Auto batch size: 4 (model ~160MB + 4×60MB = ~400MB / 3770MB free) +2026-05-03 12:08:27.833 | INFO  | __main__:train:273 - Gradient checkpointing: OFF (small model fits in VRAM) +2026-05-03 12:08:27.864 | INFO  | __main__:train:315 - Step 9: Creating trainer... +2026-05-03 12:08:28.031 | INFO  | __main__:train:331 - Step 10: Starting training... +2026-05-03 12:08:28.031 | INFO  | __main__:train:332 - Config summary: model=flan-t5-small | batch=4 | accum=8 | effective_batch=32 | epochs=5 | precision=bf16 | grad_ckpt=False | device=cuda +wandb: WARNING The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter. + 0%| | 0/1515 [00:00 + train() + ~~~~~^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/click/core.py", line 1485, in __call__ + return self.main(*args, **kwargs) + ~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/click/core.py", line 1406, in main + rv = self.invoke(ctx) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/click/core.py", line 1269, in invoke + return ctx.invoke(self.callback, **ctx.params) + ~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/click/core.py", line 824, in invoke + return callback(*args, **kwargs) + File "/run/media/morpheuslord/Personal_Files/Projects/Rewriter/scripts/train.py", line 340, in train + trainer.train() + ~~~~~~~~~~~~~^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/transformers/trainer.py", line 2206, in train + return inner_training_loop( + args=args, + ...<2 lines>... + ignore_keys_for_eval=ignore_keys_for_eval, + ) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/transformers/trainer.py", line 2502, in _inner_training_loop + batch_samples, num_items_in_batch = self.get_batch_samples(epoch_iterator, num_batches, args.device) + ~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/transformers/trainer.py", line 5300, in get_batch_samples + batch_samples.append(next(epoch_iterator)) + ~~~~^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/accelerate/data_loader.py", line 574, in __iter__ + dataloader_iter = self.base_dataloader.__iter__() + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/utils/data/dataloader.py", line 500, in __iter__ + return self._get_iterator() + ~~~~~~~~~~~~~~~~~~^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/utils/data/dataloader.py", line 433, in _get_iterator + return _MultiProcessingDataLoaderIter(self) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/utils/data/dataloader.py", line 1192, in __init__ + w.start() + ~~~~~~~^^ + File "/usr/lib64/python3.14/multiprocessing/process.py", line 121, in start + self._popen = self._Popen(self) + ~~~~~~~~~~~^^^^^^ + File "/usr/lib64/python3.14/multiprocessing/context.py", line 230, in _Popen + return _default_context.get_context().Process._Popen(process_obj) + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^ + File "/usr/lib64/python3.14/multiprocessing/context.py", line 306, in _Popen + return Popen(process_obj) + File "/usr/lib64/python3.14/multiprocessing/popen_forkserver.py", line 35, in __init__ + super().__init__(process_obj) + ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^ + File "/usr/lib64/python3.14/multiprocessing/popen_fork.py", line 20, in __init__ + self._launch(process_obj) + ~~~~~~~~~~~~^^^^^^^^^^^^^ + File "/usr/lib64/python3.14/multiprocessing/popen_forkserver.py", line 51, in _launch + self.sentinel, w = forkserver.connect_to_new_process(self._fds) + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^ + File "/usr/lib64/python3.14/multiprocessing/forkserver.py", line 92, in connect_to_new_process + raise ValueError('too many fds') +ValueError: too many fds diff --git a/wandb/run-20260503_120403-cbb6slr5/files/requirements.txt b/wandb/run-20260503_120403-cbb6slr5/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..0a9158bdf370f073be66c392b3c09dcdfbfed3be --- /dev/null +++ b/wandb/run-20260503_120403-cbb6slr5/files/requirements.txt @@ -0,0 +1,351 @@ +Pygments==2.19.2 +pluggy==1.6.0 +iniconfig==2.3.0 +pytest==9.0.2 +sqlite-vec==0.1.6 +peewee==3.19.0 +boltons==21.0.0 +zipp==3.23.0 +wrapt==1.17.3 +tree-sitter-python==0.25.0 +tree-sitter-javascript==0.25.0 +tree-sitter-java==0.23.5 +tree-sitter-cpp==0.23.4 +tree-sitter-c==0.24.1 +tree-sitter==0.25.2 +tqdm==4.67.3 +tomli==2.0.2 +tabulate==0.10.0 +stevedore==5.7.0 +sniffio==1.3.1 +smmap==5.0.3 +shellingham==1.5.4 +semantic-version==2.10.0 +ruamel.yaml.clib==0.2.14 +ruamel.yaml==0.19.1 +python-multipart==0.0.22 +python-dotenv==1.2.2 +PyJWT==2.11.0 +protobuf==6.33.5 +opentelemetry-util-http==0.58b0 +networkx==3.6.1 +mdurl==0.1.2 +MarkupSafe==3.0.3 +jiter==0.13.0 +hyperframe==6.1.0 +httpx-sse==0.4.3 +hpack==4.1.0 +face==26.0.0 +exceptiongroup==1.2.2 +docstring_parser==0.17.0 +colorama==0.4.6 +bracex==2.6 +annotated-doc==0.0.4 +aiofiles==25.1.0 +wcmatch==8.5.2 +uvicorn==0.41.0 +starlette==0.52.1 +opentelemetry-proto==1.37.0 +markdown-it-py==4.0.0 +Jinja2==3.1.6 +importlib_metadata==8.7.1 +h2==4.3.0 +googleapis-common-protos==1.73.0 +glom==25.12.0 +gitdb==4.0.12 +click-option-group==0.5.9 +sse-starlette==3.3.2 +rich==14.3.3 +pydantic-settings==2.13.1 +opentelemetry-exporter-otlp-proto-common==1.37.0 +opentelemetry-api==1.37.0 +openai==2.26.0 +jsonschema==4.25.1 +GitPython==3.1.46 +typer==0.23.1 +opentelemetry-semantic-conventions==0.58b0 +bandit==1.9.4 +opentelemetry-sdk==1.37.0 +opentelemetry-instrumentation==0.58b0 +mcp==1.23.3 +agno==2.5.9 +opentelemetry-instrumentation-threading==0.58b0 +opentelemetry-instrumentation-requests==0.58b0 +opentelemetry-exporter-otlp-proto-http==1.37.0 +semgrep==1.154.0 +acvas==1.0.0 +Werkzeug==3.1.8 +websockets==13.1 +wcwidth==0.6.0 +sqlparse==0.5.5 +semver==3.0.4 +itsdangerous==2.2.0 +frida==17.9.1 +click==8.3.2 +blinker==1.9.0 +prompt_toolkit==3.0.52 +Flask==3.1.3 +delegator.py==0.1.1 +frida-tools==14.8.1 +cli_helpers==2.12.0 +litecli==1.17.1 +objection==1.12.4 +userpath==1.9.2 +platformdirs==4.9.4 +argcomplete==3.6.3 +pipx==1.11.1 +distlib==0.4.0 +filelock==3.25.2 +python-discovery==1.2.1 +virtualenv==21.2.0 +pyelftools==0.32 +cigam==0.0.3 +xmltodict==1.0.4 +apkutils2==1.0.0 +svgwrite==1.4.3 +rdflib==7.6.0 +python-louvain==0.16 +wheel==0.46.3 +pip==26.0.1 +threadpoolctl==3.6.0 +scipy==1.17.1 +joblib==1.5.3 +scikit-learn==1.8.0 +pandas==3.0.2 +seaborn==0.13.2 +text-unidecode==1.3 +fastjsonschema==2.21.2 +traitlets==5.14.3 +python-slugify==8.0.4 +bleach==6.3.0 +mdit-py-plugins==0.5.0 +kagglesdk==0.1.23 +jupyter_core==5.9.1 +nbformat==5.10.4 +jupytext==1.19.1 +kaggle==2.1.0 +xxhash==3.7.0 +pyarrow==24.0.0 +propcache==0.4.1 +multidict==6.7.1 +hf-xet==1.4.3 +fsspec==2026.2.0 +frozenlist==1.8.0 +dill==0.4.1 +aiohappyeyeballs==2.6.1 +yarl==1.23.0 +multiprocess==0.70.19 +aiosignal==1.4.0 +aiohttp==3.13.5 +datasets==4.8.5 +torchaudio==2.11.0 +nvidia-cusparselt-cu13==0.8.0 +mpmath==1.3.0 +cuda-toolkit==13.0.2 +wasabi==1.1.3 +uvloop==0.22.1 +triton==3.6.0 +toml==0.10.2 +tensorboard-data-server==0.7.2 +sympy==1.14.0 +spacy-loggers==1.0.5 +spacy-legacy==3.0.12 +spacy-alignments==0.9.1 +smart_open==7.6.0 +setuptools==81.0.0 +sentencepiece==0.2.1 +safetensors==0.7.0 +pyspellchecker==0.9.0 +pyphen==0.17.2 +portalocker==3.2.0 +nvidia-nvtx==13.0.85 +nvidia-nvshmem-cu13==3.4.5 +nvidia-nvjitlink==13.0.88 +nvidia-nccl-cu13==2.28.9 +nvidia-curand==10.4.0.35 +nvidia-cufile==1.15.1.6 +nvidia-cuda-runtime==13.0.96 +nvidia-cuda-nvrtc==13.0.88 +nvidia-cuda-cupti==13.0.85 +nvidia-cublas==13.1.0.3 +nltk==3.9.4 +murmurhash==1.0.15 +msgpack==1.1.2 +Markdown==3.10.2 +loguru==0.7.3 +locate==1.1.1 +langcodes==3.5.1 +httptools==0.7.1 +grpcio==1.80.0 +ftfy==6.3.1 +faiss-cpu==1.13.2 +cymem==2.0.13 +cuda-pathfinder==1.5.4 +confection==1.3.3 +cloudpathlib==0.24.0 +catalogue==2.0.10 +blis==1.3.3 +absl-py==2.4.0 +wordfreq==3.1.1 +watchfiles==1.1.1 +textstat==0.7.13 +tensorboard==2.20.0 +srsly==2.5.3 +sacrebleu==2.6.0 +rouge_score==0.1.2 +preshed==3.0.13 +nvidia-cusparse==12.6.3.3 +nvidia-cufft==12.0.0.61 +nvidia-cudnn-cu13==9.19.0.56 +language_tool_python==3.3.0 +huggingface_hub==0.36.2 +cuda-bindings==13.2.0 +wandb==0.26.1 +tokenizers==0.21.4 +thinc==8.3.13 +nvidia-cusolver==12.0.4.66 +fastapi==0.136.1 +weasel==1.0.0 +transformers==4.53.2 +torch==2.11.0 +spacy==3.8.13 +torchvision==0.26.0 +spacy-transformers==1.4.0 +sentence-transformers==5.4.1 +optimum==2.1.0 +errant==3.0.0 +bitsandbytes==0.49.2 +bert-score==0.3.13 +accelerate==1.13.0 +peft==0.19.1 +en_core_web_sm==3.8.0 +tree-sitter-zig==1.1.2 +tree-sitter-verilog==1.0.3 +tree-sitter-typescript==0.23.2 +tree-sitter-swift==0.0.1 +tree-sitter-scala==0.26.0 +tree-sitter-rust==0.24.2 +tree-sitter-ruby==0.23.1 +tree-sitter-powershell==0.26.3 +tree-sitter-php==0.24.1 +tree-sitter-objc==3.0.2 +tree-sitter-lua==0.5.0 +tree-sitter-kotlin==1.1.0 +tree-sitter-julia==0.23.1 +tree-sitter-go==0.25.0 +tree-sitter-elixir==0.3.5 +tree-sitter-c-sharp==0.23.5 +graphifyy==0.6.5 +numpy==2.4.4 +kiwisolver==1.5.0 +fonttools==4.62.1 +cycler==0.12.1 +contourpy==1.3.3 +matplotlib==3.10.9 +Brlapi==0.8.7 +PyGObject==3.56.2 +cffi==2.0.0 +cryptography==46.0.7 +dbus-python==1.4.0 +evdev==1.9.3 +gbinder-python==1.3.0 +lxml==6.0.2 +perf==0.1 +pillow==12.2.0 +psutil==7.2.2 +PyAudio==0.2.13 +pycairo==1.28.0 +pycups==2.0.4 +pydantic_core==2.41.5 +pyscard==2.2.2 +PyYAML==6.0.3 +RapidFuzz==3.14.3 +regex==2026.2.28 +rpds-py==0.29.0 +rpm==6.0.1 +selinux==3.10 +setools==4.6.0 +setproctitle==1.3.7 +systemd-python==235 +PySocks==1.7.1 +annotated-types==0.7.0 +anyio==4.12.1 +attrs==25.4.0 +beautifulsoup4==4.14.3 +boto3==1.42.84 +botocore==1.42.84 +certifi==2026.1.4 +charset-normalizer==3.4.4 +click==8.3.3 +cockpit==361 +configobj==5.0.9 +cupshelpers==1.0 +dasbus==1.7 +distro==1.9.0 +fido2==2.0.0 +file-magic==0.4.0 +filelock==3.15.4 +fw-fanctrl==1.0.2 +h11==0.16.0 +html2text==2025.4.15 +httpcore==1.0.9 +httpx==0.28.1 +icoextract==0.2.0 +idna==3.11 +input-remapper==2.2.0 +jaraco.classes==3.4.0 +jaraco.context==6.1.0 +jaraco.functools==4.3.0 +jeepney==0.9.0 +jmespath==1.0.1 +jsonschema==4.23.0 +jsonschema-specifications==2024.10.1 +keyring==25.7.0 +langtable==0.0.70 +libevdev==0.13.1 +louis==3.33.0 +lutris==0.5.22 +moddb==0.12.0 +more-itertools==10.5.0 +nftables==0.1 +olefile==0.47 +packaging==25.0 +pefile==2024.8.26 +pexpect==4.9.0 +pip==26.0.1 +protobuf==3.19.6 +ptyprocess==0.7.0 +pycparser==2.22 +pydantic==2.12.5 +pydbus==0.6.0 +pyenchant==3.3.0 +pygdbmi==0.11.0.0 +pyinotify==0.9.6 +pyparsing==3.1.2 +pypresence==4.3.0 +python-dateutil==2.9.0.post0 +python-linux-procfs==0.7.4 +python-pskc==1.4 +python-xlib==0.33 +pyudev==0.24.4 +pyxdg==0.28 +referencing==0.36.2 +requests==2.32.5 +requests-file==3.0.0 +s3transfer==0.16.0 +SecretStorage==3.5.0 +sentry-sdk==2.48.0 +sepolicy==3.10 +setroubleshoot==3.3.36 +setuptools==80.10.2 +shtab==1.7.2 +six==1.17.0 +sos==4.11.1 +soupsieve==2.8.3 +tldextract==5.3.0 +typing_extensions==4.15.0 +typing-inspection==0.4.2 +udica==0.2.8 +umu-launcher==1.4.0 +urllib3==2.6.3 +yubikey-manager==5.9.1 diff --git a/wandb/run-20260503_120403-cbb6slr5/files/wandb-metadata.json b/wandb/run-20260503_120403-cbb6slr5/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..9399290df2bfc143dbad404f2714c45730b2ffd0 --- /dev/null +++ b/wandb/run-20260503_120403-cbb6slr5/files/wandb-metadata.json @@ -0,0 +1,41 @@ +{ + "os": "Linux-6.19.14-ogc1.1.fc44.x86_64-x86_64-with-glibc2.43", + "python": "CPython 3.14.4", + "startedAt": "2026-05-03T06:34:03.241358Z", + "args": [ + "--config", + "configs/training_config.yaml", + "--use-v2-loss" + ], + "program": "/run/media/morpheuslord/Personal_Files/Projects/Rewriter/scripts/train.py", + "codePath": "scripts/train.py", + "codePathLocal": "scripts/train.py", + "email": "morpheuslord@protonmail.com", + "root": "/run/media/morpheuslord/Personal_Files/Projects/Rewriter", + "host": "bazzite", + "executable": "/usr/bin/python3", + "cpu_count": 8, + "cpu_count_logical": 16, + "gpu": "NVIDIA GeForce RTX 3050 Laptop GPU", + "gpu_count": 1, + "disk": { + "/": { + "total": "65773568", + "used": "65773568" + } + }, + "memory": { + "total": "32939671552" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA GeForce RTX 3050 Laptop GPU", + "memoryTotal": "4294967296", + "cudaCores": 2048, + "architecture": "Ampere", + "uuid": "GPU-861554d7-d187-39e9-e77c-881f0287b963" + } + ], + "cudaVersion": "13.2", + "writerId": "wdl3stk45pm5fr1wq23xw6ggjxanoild" +} \ No newline at end of file diff --git a/wandb/run-20260503_120403-cbb6slr5/files/wandb-summary.json b/wandb/run-20260503_120403-cbb6slr5/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..a8db514277c6bd6a7668f28c7d929b07266dd903 --- /dev/null +++ b/wandb/run-20260503_120403-cbb6slr5/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":266},"_runtime":266} \ No newline at end of file diff --git a/wandb/run-20260503_120403-cbb6slr5/logs/debug-core.log b/wandb/run-20260503_120403-cbb6slr5/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..8e6adc09911532d91ce2e2b9ee4d3b7e6d6e6f57 --- /dev/null +++ b/wandb/run-20260503_120403-cbb6slr5/logs/debug-core.log @@ -0,0 +1,15 @@ +{"time":"2026-05-03T12:04:03.451410598+05:30","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpmlad7n6f/port-14979.txt","pid":14979,"detached":false,"idle-timeout":600000000000,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-05-03T12:04:03.451803116+05:30","level":"INFO","msg":"server: will exit if parent process dies","ppid":14979} +{"time":"2026-05-03T12:04:03.451799991+05:30","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-14979-15256-934596103/socket","Net":"unix"}} +{"time":"2026-05-03T12:04:03.641978187+05:30","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-05-03T12:04:03.650818369+05:30","level":"INFO","msg":"handleInformInit: received","streamId":"cbb6slr5","id":"1(@)"} +{"time":"2026-05-03T12:04:04.35046968+05:30","level":"INFO","msg":"handleInformInit: stream started","streamId":"cbb6slr5","id":"1(@)"} +{"time":"2026-05-03T12:04:10.126967428+05:30","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"zk3peqix4p8s"} +{"time":"2026-05-03T12:08:31.664545023+05:30","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-05-03T12:08:31.664579068+05:30","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-05-03T12:08:31.664621307+05:30","level":"INFO","msg":"server is shutting down"} +{"time":"2026-05-03T12:08:31.664640323+05:30","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-05-03T12:08:31.664770048+05:30","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-14979-15256-934596103/socket","Net":"unix"}} +{"time":"2026-05-03T12:08:35.140785779+05:30","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-05-03T12:08:35.141191575+05:30","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-05-03T12:08:35.141315659+05:30","level":"INFO","msg":"server is closed"} diff --git a/wandb/run-20260503_120403-cbb6slr5/logs/debug-internal.log b/wandb/run-20260503_120403-cbb6slr5/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..58c530107a709914ff01aecb7a65b31f2caa9eb0 --- /dev/null +++ b/wandb/run-20260503_120403-cbb6slr5/logs/debug-internal.log @@ -0,0 +1,50 @@ +{"time":"2026-05-03T12:04:03.651069261+05:30","level":"INFO","msg":"wandb-core"} +{"time":"2026-05-03T12:04:03.651810184+05:30","level":"INFO","msg":"stream: starting","core version":"0.26.1"} +{"time":"2026-05-03T12:04:04.349680576+05:30","level":"INFO","msg":"stream: created new stream","id":"cbb6slr5"} +{"time":"2026-05-03T12:04:04.349900229+05:30","level":"INFO","msg":"handler: started"} +{"time":"2026-05-03T12:04:04.350436307+05:30","level":"INFO","msg":"stream: started"} +{"time":"2026-05-03T12:04:04.350452407+05:30","level":"INFO","msg":"sender: started"} +{"time":"2026-05-03T12:04:04.350450223+05:30","level":"INFO","msg":"writer: started","stream_id":"cbb6slr5"} +{"time":"2026-05-03T12:04:05.134664368+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"console_offset":0,"console_lines":2} +{"time":"2026-05-03T12:04:05.783522652+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-03T12:04:20.135019056+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":0,"events_lines":2,"console_offset":1,"console_lines":10,"uploaded_len":2} +{"time":"2026-05-03T12:04:20.550838551+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-03T12:04:35.135620073+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":2,"events_lines":2,"console_offset":10,"console_lines":1} +{"time":"2026-05-03T12:04:35.503870372+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-03T12:04:50.135174052+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":4,"events_lines":2,"console_offset":10,"console_lines":1} +{"time":"2026-05-03T12:04:50.539976612+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-03T12:05:05.135016444+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":6,"events_lines":2,"console_offset":10,"console_lines":10} +{"time":"2026-05-03T12:05:05.790589033+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-03T12:05:20.135630998+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":8,"events_lines":2,"console_offset":20,"console_lines":3} +{"time":"2026-05-03T12:05:20.740300492+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-03T12:05:35.135220604+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":10,"events_lines":2} +{"time":"2026-05-03T12:05:35.688456406+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-03T12:05:50.134906816+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":12,"events_lines":2} +{"time":"2026-05-03T12:05:50.649729204+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-03T12:06:05.135599574+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":14,"events_lines":2,"console_offset":23,"console_lines":1} +{"time":"2026-05-03T12:06:05.695119845+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-03T12:06:20.135316678+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":16,"events_lines":2} +{"time":"2026-05-03T12:06:20.747491501+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-03T12:06:35.134853419+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":18,"events_lines":2} +{"time":"2026-05-03T12:06:35.698351909+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-03T12:06:50.134918805+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":20,"events_lines":2,"console_offset":24,"console_lines":1} +{"time":"2026-05-03T12:06:50.649707669+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-03T12:07:05.135398582+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":22,"events_lines":2} +{"time":"2026-05-03T12:07:05.701723947+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-03T12:07:20.135106713+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":24,"events_lines":2} +{"time":"2026-05-03T12:07:20.653102773+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-03T12:07:35.135632691+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":26,"events_lines":2,"console_offset":25,"console_lines":1} +{"time":"2026-05-03T12:07:35.603937019+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-03T12:07:50.135033742+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":28,"events_lines":2} +{"time":"2026-05-03T12:07:50.554570666+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-03T12:08:05.134699844+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":30,"events_lines":2,"console_offset":26,"console_lines":1} +{"time":"2026-05-03T12:08:05.583974381+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-03T12:08:20.135445473+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":32,"events_lines":2,"console_offset":27,"console_lines":5} +{"time":"2026-05-03T12:08:20.659707136+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-03T12:08:34.485643582+05:30","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-05-03T12:08:34.487706123+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"console_offset":32,"console_lines":71,"uploaded_len":3,"complete":true,"exit_code":1} +{"time":"2026-05-03T12:08:35.099653337+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-03T12:08:35.101629825+05:30","level":"INFO","msg":"stream: finishing up"} +{"time":"2026-05-03T12:08:35.102419705+05:30","level":"INFO","msg":"handler: closed"} +{"time":"2026-05-03T12:08:35.106431844+05:30","level":"INFO","msg":"sender: closed"} +{"time":"2026-05-03T12:08:35.106988965+05:30","level":"INFO","msg":"stream: all finished"} diff --git a/wandb/run-20260503_120403-cbb6slr5/logs/debug.log b/wandb/run-20260503_120403-cbb6slr5/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..5ee371317e5ad32d21f0b94443cb583a00208e5b --- /dev/null +++ b/wandb/run-20260503_120403-cbb6slr5/logs/debug.log @@ -0,0 +1,24 @@ +2026-05-03 12:04:03,245 INFO MainThread:14979 [wandb_setup.py:_flush():81] Current SDK version is 0.26.1 +2026-05-03 12:04:03,246 INFO MainThread:14979 [wandb_setup.py:_flush():81] Configure stats pid to 14979 +2026-05-03 12:04:03,246 INFO MainThread:14979 [wandb_setup.py:_flush():81] Loading settings from environment variables +2026-05-03 12:04:03,246 INFO MainThread:14979 [wandb_init.py:setup_run_log_directory():723] Logging user logs to /run/media/morpheuslord/Personal_Files/Projects/Rewriter/wandb/run-20260503_120403-cbb6slr5/logs/debug.log +2026-05-03 12:04:03,246 INFO MainThread:14979 [wandb_init.py:setup_run_log_directory():724] Logging internal logs to /run/media/morpheuslord/Personal_Files/Projects/Rewriter/wandb/run-20260503_120403-cbb6slr5/logs/debug-internal.log +2026-05-03 12:04:03,246 INFO MainThread:14979 [wandb_init.py:init():850] calling init triggers +2026-05-03 12:04:03,246 INFO MainThread:14979 [wandb_init.py:init():855] wandb.init called with sweep_config: {} +config: {'model': {'key': 'flan-t5-small', 'quantize': False, 'use_lora': True}, 'lora': {'r': 8, 'lora_alpha': 16, 'lora_dropout': 0.05, 'target_modules': ['q', 'v', 'k', 'o', 'wi_0', 'wi_1', 'wo']}, 'data': {'train_path': 'data/processed/train.jsonl', 'val_path': 'data/processed/val.jsonl', 'test_path': 'data/processed/test.jsonl', 'max_input_length': 128, 'max_target_length': 128, 'augment_synthetic': True, 'synthetic_ratio': 0.3}, 'training': {'output_dir': 'checkpoints/', 'num_train_epochs': 5, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8, 'gradient_accumulation_steps': 8, 'learning_rate': 0.0003, 'lr_scheduler_type': 'cosine', 'warmup_ratio': 0.05, 'weight_decay': 0.01, 'fp16': False, 'bf16': True, 'evaluation_strategy': 'steps', 'eval_steps': 100, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 3, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_loss', 'greater_is_better': False, 'logging_dir': 'logs/', 'logging_steps': 25, 'report_to': ['wandb', 'tensorboard'], 'dataloader_num_workers': 4, 'seed': 42, 'push_to_hub': False}, 'loss': {'lambda_style': 0.3, 'lambda_semantic': 0.5, 'lambda_human_pattern': 0.4, 'sem_model_name': 'all-mpnet-base-v2'}, 'generation': {'num_beams': 5, 'length_penalty': 1.0, 'no_repeat_ngram_size': 3, 'min_length': 10, 'max_new_tokens': 512, 'early_stopping': True}, 'human_pattern': {'classifier_path': 'checkpoints/human_pattern_classifier.pt', 'shanegerami_path': 'data/raw/shanegerami/AI_Human.csv', 'starblasters_path': 'data/raw/starblasters8/data.parquet', 'max_samples_per_source': 50000, 'pretrain_epochs': 20, 'pretrain_lr': 0.001, 'pretrain_batch_size': 512, 'target_auc': 0.88}, '_wandb': {}} +2026-05-03 12:04:03,246 INFO MainThread:14979 [wandb_init.py:init():898] starting backend +2026-05-03 12:04:03,642 INFO MainThread:14979 [wandb_init.py:init():913] sending inform_init request +2026-05-03 12:04:04,351 INFO MainThread:14979 [wandb_init.py:init():918] backend started and connected +2026-05-03 12:04:04,352 INFO MainThread:14979 [wandb_init.py:init():988] updated telemetry +2026-05-03 12:04:04,353 INFO MainThread:14979 [wandb_init.py:init():1011] communicating run to backend with 90.0 second timeout +2026-05-03 12:04:04,966 INFO MainThread:14979 [wandb_init.py:init():1056] starting run threads in backend +2026-05-03 12:04:05,124 INFO MainThread:14979 [wandb_run.py:_console_start():2554] atexit reg +2026-05-03 12:04:05,124 INFO MainThread:14979 [wandb_run.py:_redirect():2403] redirect: wrap_raw +2026-05-03 12:04:05,124 INFO MainThread:14979 [wandb_run.py:_redirect():2472] Wrapping output streams. +2026-05-03 12:04:05,124 INFO MainThread:14979 [wandb_run.py:_redirect():2495] Redirects installed. +2026-05-03 12:04:05,126 INFO MainThread:14979 [wandb_init.py:init():1094] run started, returning control to user process +2026-05-03 12:08:28,393 INFO MainThread:14979 [wandb_run.py:_config_callback():1415] config_cb None None {'peft_config': {'default': {'task_type': 'SEQ_2_SEQ_LM', 'peft_type': 'LORA', 'auto_mapping': None, 'peft_version': '0.19.1', 'base_model_name_or_path': 'google/flan-t5-small', 'revision': None, 'inference_mode': False, 'r': 8, 'target_modules': ['wo', 'k', 'wi_0', 'v', 'q', 'o', 'wi_1'], 'exclude_modules': None, 'lora_alpha': 16, 'lora_dropout': 0.05, 'fan_in_fan_out': False, 'bias': 'none', 'use_rslora': False, 'modules_to_save': None, 'init_lora_weights': True, 'layers_to_transform': None, 'layers_pattern': None, 'rank_pattern': {}, 'alpha_pattern': {}, 'megatron_config': None, 'megatron_core': 'megatron.core', 'trainable_token_indices': None, 'loftq_config': {}, 'eva_config': None, 'corda_config': None, 'lora_ga_config': None, 'use_dora': False, 'alora_invocation_tokens': None, 'use_qalora': False, 'qalora_group_size': 16, 'layer_replication': None, 'runtime_config': {'ephemeral_gpu_offload': False}, 'lora_bias': False, 'target_parameters': None, 'use_bdlora': None, 'arrow_config': None, 'ensure_weight_tying': False}}, 'vocab_size': 32128, 'd_model': 512, 'd_kv': 64, 'd_ff': 1024, 'num_layers': 8, 'num_decoder_layers': 8, 'num_heads': 6, 'relative_attention_num_buckets': 32, 'relative_attention_max_distance': 128, 'dropout_rate': 0.1, 'classifier_dropout': 0.0, 'layer_norm_epsilon': 1e-06, 'initializer_factor': 1.0, 'feed_forward_proj': 'gated-gelu', 'use_cache': True, 'dense_act_fn': 'gelu_new', 'is_gated_act': True, 'return_dict': True, 'output_hidden_states': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['T5ForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': 1, 'sep_token_id': None, 'decoder_start_token_id': 0, 'task_specific_params': {'summarization': {'early_stopping': True, 'length_penalty': 2.0, 'max_length': 200, 'min_length': 30, 'no_repeat_ngram_size': 3, 'num_beams': 4, 'prefix': 'summarize: '}, 'translation_en_to_de': {'early_stopping': True, 'max_length': 300, 'num_beams': 4, 'prefix': 'translate English to German: '}, 'translation_en_to_fr': {'early_stopping': True, 'max_length': 300, 'num_beams': 4, 'prefix': 'translate English to French: '}, 'translation_en_to_ro': {'early_stopping': True, 'max_length': 300, 'num_beams': 4, 'prefix': 'translate English to Romanian: '}}, 'problem_type': None, '_name_or_path': 'google/flan-t5-small', 'transformers_version': '4.53.2', 'model_type': 't5', 'n_positions': 512, 'output_past': True, 'output_attentions': False, 'output_dir': 'checkpoints/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 8, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 0.0003, 'weight_decay': 0.01, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5, 'max_steps': -1, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.05, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'logs/', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 3, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 100, 'dataloader_num_workers': 4, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'checkpoints/', 'disable_tqdm': False, 'remove_unused_columns': False, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_loss', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb', 'tensorboard'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': None, 'hub_always_push': False, 'hub_revision': None, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'liger_kernel_config': None, 'eval_use_gather_object': False, 'average_tokens_across_devices': False} +2026-05-03 12:08:28,398 INFO MainThread:14979 [wandb_config.py:__setitem__():155] [no run ID] config set model/num_parameters = 78239104 - > +2026-05-03 12:08:28,398 INFO MainThread:14979 [wandb_run.py:_config_callback():1415] config_cb model/num_parameters 78239104 None +2026-05-03 12:08:31,664 INFO wandb-AsyncioManager-main:14979 [service_client.py:_forward_responses():134] Reached EOF. +2026-05-03 12:08:31,664 INFO wandb-AsyncioManager-main:14979 [mailbox.py:close():155] Closing mailbox, abandoning 1 handles. diff --git a/wandb/run-20260503_121016-impcgg4z/files/config.yaml b/wandb/run-20260503_121016-impcgg4z/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b6e756bff5f0040e358ffc546e5f742599598250 --- /dev/null +++ b/wandb/run-20260503_121016-impcgg4z/files/config.yaml @@ -0,0 +1,661 @@ +_name_or_path: + value: google/flan-t5-small +_wandb: + value: + cli_version: 0.26.1 + e: + 4grhtgbrsanlpr1ottdp8810yu8ci7gn: + args: + - --config + - configs/training_config.yaml + - --use-v2-loss + codePath: scripts/train.py + codePathLocal: scripts/train.py + cpu_count: 8 + cpu_count_logical: 16 + cudaVersion: "13.2" + disk: + /: + total: "65773568" + used: "65773568" + email: morpheuslord@protonmail.com + executable: /usr/bin/python3 + gpu: NVIDIA GeForce RTX 3050 Laptop GPU + gpu_count: 1 + gpu_nvidia: + - architecture: Ampere + cudaCores: 2048 + memoryTotal: "4294967296" + name: NVIDIA GeForce RTX 3050 Laptop GPU + uuid: GPU-861554d7-d187-39e9-e77c-881f0287b963 + host: bazzite + memory: + total: "32939671552" + os: Linux-6.19.14-ogc1.1.fc44.x86_64-x86_64-with-glibc2.43 + program: /run/media/morpheuslord/Personal_Files/Projects/Rewriter/scripts/train.py + python: CPython 3.14.4 + root: /run/media/morpheuslord/Personal_Files/Projects/Rewriter + startedAt: "2026-05-03T06:40:16.374846Z" + writerId: 4grhtgbrsanlpr1ottdp8810yu8ci7gn + m: + - "1": train/global_step + "6": + - 3 + "7": [] + - "2": '*' + "5": 1 + "6": + - 1 + "7": [] + python_version: 3.14.4 + t: + "1": + - 1 + - 5 + - 11 + - 33 + - 41 + - 49 + - 51 + - 53 + - 71 + - 75 + - 98 + "2": + - 1 + - 5 + - 11 + - 33 + - 41 + - 49 + - 51 + - 53 + - 71 + - 75 + - 98 + "3": + - 7 + - 13 + - 16 + - 19 + - 66 + "4": 3.14.4 + "5": 0.26.1 + "6": 4.53.2 + "9": + "1": transformers_trainer + "12": 0.26.1 + "13": linux-x86_64 +accelerator_config: + value: + dispatch_batches: null + even_batches: true + gradient_accumulation_kwargs: null + non_blocking: false + split_batches: false + use_seedable_sampler: true +adafactor: + value: false +adam_beta1: + value: 0.9 +adam_beta2: + value: 0.999 +adam_epsilon: + value: 1e-08 +add_cross_attention: + value: false +architectures: + value: + - T5ForConditionalGeneration +auto_find_batch_size: + value: false +average_tokens_across_devices: + value: false +bad_words_ids: + value: null +batch_eval_metrics: + value: false +begin_suppress_tokens: + value: null +bf16: + value: true +bf16_full_eval: + value: false +bos_token_id: + value: null +chunk_size_feed_forward: + value: 0 +classifier_dropout: + value: 0 +cross_attention_hidden_size: + value: null +d_ff: + value: 1024 +d_kv: + value: 64 +d_model: + value: 512 +data: + value: + augment_synthetic: true + max_input_length: 128 + max_target_length: 128 + synthetic_ratio: 0.3 + test_path: data/processed/test.jsonl + train_path: data/processed/train.jsonl + val_path: data/processed/val.jsonl +data_seed: + value: null +dataloader_drop_last: + value: false +dataloader_num_workers: + value: 4 +dataloader_persistent_workers: + value: false +dataloader_pin_memory: + value: true +dataloader_prefetch_factor: + value: null +ddp_backend: + value: null +ddp_broadcast_buffers: + value: null +ddp_bucket_cap_mb: + value: null +ddp_find_unused_parameters: + value: null +ddp_timeout: + value: 1800 +debug: + value: [] +decoder_start_token_id: + value: 0 +deepspeed: + value: null +dense_act_fn: + value: gelu_new +disable_tqdm: + value: false +diversity_penalty: + value: 0 +do_eval: + value: true +do_predict: + value: false +do_sample: + value: false +do_train: + value: false +dropout_rate: + value: 0.1 +early_stopping: + value: false +encoder_no_repeat_ngram_size: + value: 0 +eos_token_id: + value: 1 +eval_accumulation_steps: + value: null +eval_delay: + value: 0 +eval_do_concat_batches: + value: true +eval_on_start: + value: false +eval_steps: + value: 100 +eval_strategy: + value: steps +eval_use_gather_object: + value: false +exponential_decay_length_penalty: + value: null +feed_forward_proj: + value: gated-gelu +finetuning_task: + value: null +forced_bos_token_id: + value: null +forced_eos_token_id: + value: null +fp16: + value: false +fp16_backend: + value: auto +fp16_full_eval: + value: false +fp16_opt_level: + value: O1 +fsdp: + value: [] +fsdp_config: + value: + min_num_params: 0 + xla: false + xla_fsdp_grad_ckpt: false + xla_fsdp_v2: false +fsdp_min_num_params: + value: 0 +fsdp_transformer_layer_cls_to_wrap: + value: null +full_determinism: + value: false +generation: + value: + early_stopping: true + length_penalty: 1 + max_new_tokens: 512 + min_length: 10 + no_repeat_ngram_size: 3 + num_beams: 5 +gradient_accumulation_steps: + value: 8 +gradient_checkpointing: + value: false +gradient_checkpointing_kwargs: + value: null +greater_is_better: + value: false +group_by_length: + value: false +half_precision_backend: + value: auto +hub_always_push: + value: false +hub_model_id: + value: null +hub_private_repo: + value: null +hub_revision: + value: null +hub_strategy: + value: every_save +hub_token: + value: +human_pattern: + value: + classifier_path: checkpoints/human_pattern_classifier.pt + max_samples_per_source: 50000 + pretrain_batch_size: 512 + pretrain_epochs: 20 + pretrain_lr: 0.001 + shanegerami_path: data/raw/shanegerami/AI_Human.csv + starblasters_path: data/raw/starblasters8/data.parquet + target_auc: 0.88 +id2label: + value: + "0": LABEL_0 + "1": LABEL_1 +ignore_data_skip: + value: false +include_for_metrics: + value: [] +include_inputs_for_metrics: + value: false +include_num_input_tokens_seen: + value: false +include_tokens_per_second: + value: false +initializer_factor: + value: 1 +is_decoder: + value: false +is_encoder_decoder: + value: true +is_gated_act: + value: true +jit_mode_eval: + value: false +label_names: + value: null +label_smoothing_factor: + value: 0 +label2id: + value: + LABEL_0: 0 + LABEL_1: 1 +layer_norm_epsilon: + value: 1e-06 +learning_rate: + value: 0.0003 +length_column_name: + value: length +length_penalty: + value: 1 +liger_kernel_config: + value: null +load_best_model_at_end: + value: true +local_rank: + value: 0 +log_level: + value: passive +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: logs/ +logging_first_step: + value: false +logging_nan_inf_filter: + value: true +logging_steps: + value: 25 +logging_strategy: + value: steps +lora: + value: + lora_alpha: 16 + lora_dropout: 0.05 + r: 8 + target_modules: + - q + - v + - k + - o + - wi_0 + - wi_1 + - wo +loss: + value: + lambda_human_pattern: 0.4 + lambda_semantic: 0.5 + lambda_style: 0.3 + sem_model_name: all-mpnet-base-v2 +lr_scheduler_type: + value: cosine +max_grad_norm: + value: 1 +max_length: + value: 20 +max_steps: + value: -1 +metric_for_best_model: + value: eval_loss +min_length: + value: 0 +model: + value: + key: flan-t5-small + quantize: false + use_lora: true +model/num_parameters: + value: 78239104 +model_type: + value: t5 +mp_parameters: + value: "" +n_positions: + value: 512 +neftune_noise_alpha: + value: null +no_cuda: + value: false +no_repeat_ngram_size: + value: 0 +num_beam_groups: + value: 1 +num_beams: + value: 1 +num_decoder_layers: + value: 8 +num_heads: + value: 6 +num_layers: + value: 8 +num_return_sequences: + value: 1 +num_train_epochs: + value: 5 +optim: + value: adamw_torch +optim_args: + value: null +optim_target_modules: + value: null +output_attentions: + value: false +output_dir: + value: checkpoints/ +output_hidden_states: + value: false +output_past: + value: true +output_scores: + value: false +overwrite_output_dir: + value: false +pad_token_id: + value: 0 +past_index: + value: -1 +peft_config: + value: + default: + alora_invocation_tokens: null + arrow_config: null + auto_mapping: null + base_model_name_or_path: google/flan-t5-small + bias: none + corda_config: null + ensure_weight_tying: false + eva_config: null + exclude_modules: null + fan_in_fan_out: false + inference_mode: false + init_lora_weights: true + layer_replication: null + layers_pattern: null + layers_to_transform: null + lora_alpha: 16 + lora_bias: false + lora_dropout: 0.05 + lora_ga_config: null + megatron_config: null + megatron_core: megatron.core + modules_to_save: null + peft_type: LORA + peft_version: 0.19.1 + qalora_group_size: 16 + r: 8 + revision: null + runtime_config: + ephemeral_gpu_offload: false + target_modules: + - wo + - wi_1 + - q + - wi_0 + - o + - v + - k + target_parameters: null + task_type: SEQ_2_SEQ_LM + trainable_token_indices: null + use_bdlora: null + use_dora: false + use_qalora: false + use_rslora: false +per_device_eval_batch_size: + value: 8 +per_device_train_batch_size: + value: 4 +per_gpu_eval_batch_size: + value: null +per_gpu_train_batch_size: + value: null +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +push_to_hub: + value: false +push_to_hub_model_id: + value: null +push_to_hub_organization: + value: null +push_to_hub_token: + value: +ray_scope: + value: last +relative_attention_max_distance: + value: 128 +relative_attention_num_buckets: + value: 32 +remove_invalid_values: + value: false +remove_unused_columns: + value: false +repetition_penalty: + value: 1 +report_to: + value: + - wandb + - tensorboard +restore_callback_states_from_checkpoint: + value: false +resume_from_checkpoint: + value: null +return_dict: + value: true +return_dict_in_generate: + value: false +run_name: + value: checkpoints/ +save_on_each_node: + value: false +save_only_model: + value: false +save_safetensors: + value: true +save_steps: + value: 100 +save_strategy: + value: steps +save_total_limit: + value: 3 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +suppress_tokens: + value: null +task_specific_params: + value: + summarization: + early_stopping: true + length_penalty: 2 + max_length: 200 + min_length: 30 + no_repeat_ngram_size: 3 + num_beams: 4 + prefix: 'summarize: ' + translation_en_to_de: + early_stopping: true + max_length: 300 + num_beams: 4 + prefix: 'translate English to German: ' + translation_en_to_fr: + early_stopping: true + max_length: 300 + num_beams: 4 + prefix: 'translate English to French: ' + translation_en_to_ro: + early_stopping: true + max_length: 300 + num_beams: 4 + prefix: 'translate English to Romanian: ' +temperature: + value: 1 +tf_legacy_loss: + value: false +tf32: + value: null +tie_encoder_decoder: + value: false +tie_word_embeddings: + value: false +tokenizer_class: + value: null +top_k: + value: 50 +top_p: + value: 1 +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_dtype: + value: bfloat16 +torch_empty_cache_steps: + value: null +torchdynamo: + value: null +torchscript: + value: false +tpu_metrics_debug: + value: false +tpu_num_cores: + value: null +training: + value: + bf16: true + dataloader_num_workers: 4 + eval_steps: 100 + evaluation_strategy: steps + fp16: false + gradient_accumulation_steps: 8 + greater_is_better: false + learning_rate: 0.0003 + load_best_model_at_end: true + logging_dir: logs/ + logging_steps: 25 + lr_scheduler_type: cosine + metric_for_best_model: eval_loss + num_train_epochs: 5 + output_dir: checkpoints/ + per_device_eval_batch_size: 8 + per_device_train_batch_size: 4 + push_to_hub: false + report_to: + - wandb + - tensorboard + save_steps: 100 + save_strategy: steps + save_total_limit: 3 + seed: 42 + warmup_ratio: 0.05 + weight_decay: 0.01 +transformers_version: + value: 4.53.2 +typical_p: + value: 1 +use_bfloat16: + value: false +use_cache: + value: true +use_cpu: + value: false +use_ipex: + value: false +use_legacy_prediction_loop: + value: false +use_liger_kernel: + value: false +use_mps_device: + value: false +vocab_size: + value: 32128 +warmup_ratio: + value: 0.05 +warmup_steps: + value: 0 +weight_decay: + value: 0.01 diff --git a/wandb/run-20260503_121016-impcgg4z/files/output.log b/wandb/run-20260503_121016-impcgg4z/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..b4b15343c1a25a560c5eaca34780bae7f9cf9f69 --- /dev/null +++ b/wandb/run-20260503_121016-impcgg4z/files/output.log @@ -0,0 +1,90 @@ +2026-05-03 12:10:18.169 | INFO  | __main__:train:153 - Step 3: Setting up device (hybrid GPU mode)... +2026-05-03 12:10:18.170 | INFO  | __main__:_setup_device:61 - GPU: NVIDIA GeForce RTX 3050 Laptop GPU | VRAM: 0MB used / 3770MB total (3770MB free) | Compute: (8, 6) +2026-05-03 12:10:18.170 | INFO  | __main__:_setup_device:74 - Hybrid GPU mode: capped PyTorch VRAM to 85% (~3204MB), leaving room for system +2026-05-03 12:10:18.170 | INFO  | __main__:train:157 - Step 4: Loading model and tokenizer... +2026-05-03 12:10:18.170 | INFO  | src.model.base_model:load_model_and_tokenizer:57 - Loading model: google/flan-t5-small (seq2seq=True, quantize=False, lora=True) +2026-05-03 12:10:20.487 | INFO  | src.model.base_model:load_model_and_tokenizer:101 - Model loaded on cuda with dtype torch.bfloat16 +2026-05-03 12:10:21.856 | INFO  | src.model.base_model:load_model_and_tokenizer:130 - LoRA applied: 1,277,952 trainable params / 78,239,104 total (1.63%) +2026-05-03 12:10:21.856 | INFO  | __main__:train:173 - Applying torch.compile(mode='reduce-overhead')... +2026-05-03 12:10:21.859 | INFO  | __main__:train:175 - ✓ torch.compile applied — first few steps will be slower (compiling) +2026-05-03 12:10:21.860 | INFO  | __main__:train:180 - Step 5: Creating style fingerprinter... +2026-05-03 12:10:22.229 | INFO  | src.style.fingerprinter:__init__:100 - StyleFingerprinter initialised (AWL size: 549) +2026-05-03 12:10:22.229 | INFO  | __main__:train:187 - Step 6: Loading datasets... +2026-05-03 12:10:22.266 | INFO  | src.training.dataset:__init__:71 - Loaded 7555 examples from data/processed/train.jsonl +2026-05-03 12:10:22.984 | INFO  | src.training.dataset:_add_synthetic:148 - Added 2125 synthetic augmentation examples +2026-05-03 12:10:22.985 | INFO  | src.training.dataset:__init__:77 - Total dataset size: 9680 examples +2026-05-03 12:10:23.144 | INFO  | src.training.dataset:__init__:85 - Loading pre-computed dataset from cache: data/cache/1356ff2104663316.pt +2026-05-03 12:10:31.366 | INFO  | src.training.dataset:__init__:87 - Loaded 9680 cached examples +2026-05-03 12:10:31.371 | INFO  | src.training.dataset:__init__:71 - Loaded 839 examples from data/processed/val.jsonl +2026-05-03 12:10:31.371 | INFO  | src.training.dataset:__init__:77 - Total dataset size: 839 examples +2026-05-03 12:10:31.373 | INFO  | src.training.dataset:__init__:85 - Loading pre-computed dataset from cache: data/cache/d6a64358c3ef403f.pt +2026-05-03 12:10:31.473 | INFO  | src.training.dataset:__init__:87 - Loaded 839 cached examples +2026-05-03 12:10:31.473 | INFO  | __main__:train:207 - Train: 9680 | Val: 839 +2026-05-03 12:10:31.759 | INFO  | __main__:train:235 - Using CE-only loss (aux models skipped to save memory) +2026-05-03 12:10:31.759 | INFO  | __main__:train:238 - Step 8: Creating training arguments... +2026-05-03 12:10:31.759 | INFO  | __main__:train:246 - Using BF16 (Ampere+ GPU) +2026-05-03 12:10:31.759 | INFO  | __main__:_auto_batch_size:115 - Auto batch size: 4 (model ~160MB + 4×60MB = ~400MB / 3770MB free) +2026-05-03 12:10:31.759 | INFO  | __main__:train:273 - Gradient checkpointing: OFF (small model fits in VRAM) +2026-05-03 12:10:31.783 | INFO  | __main__:train:316 - Step 9: Creating trainer... +2026-05-03 12:10:31.911 | INFO  | __main__:train:332 - Step 10: Starting training... +2026-05-03 12:10:31.911 | INFO  | __main__:train:333 - Config summary: model=flan-t5-small | batch=4 | accum=8 | effective_batch=32 | epochs=5 | precision=bf16 | grad_ckpt=False | device=cuda +wandb: WARNING The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter. + 0%| | 0/1515 [00:00 + train() + ~~~~~^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/click/core.py", line 1485, in __call__ + return self.main(*args, **kwargs) + ~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/click/core.py", line 1406, in main + rv = self.invoke(ctx) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/click/core.py", line 1269, in invoke + return ctx.invoke(self.callback, **ctx.params) + ~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/click/core.py", line 824, in invoke + return callback(*args, **kwargs) + File "/run/media/morpheuslord/Personal_Files/Projects/Rewriter/scripts/train.py", line 341, in train + trainer.train() + ~~~~~~~~~~~~~^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/transformers/trainer.py", line 2206, in train + return inner_training_loop( + args=args, + ...<2 lines>... + ignore_keys_for_eval=ignore_keys_for_eval, + ) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/transformers/trainer.py", line 2502, in _inner_training_loop + batch_samples, num_items_in_batch = self.get_batch_samples(epoch_iterator, num_batches, args.device) + ~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/transformers/trainer.py", line 5300, in get_batch_samples + batch_samples.append(next(epoch_iterator)) + ~~~~^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/accelerate/data_loader.py", line 574, in __iter__ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/utils/data/dataloader.py", line 500, in __iter__ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/utils/data/dataloader.py", line 433, in _get_iterator + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/utils/data/dataloader.py", line 1192, in __init__ + File "/usr/lib64/python3.14/multiprocessing/process.py", line 121, in start + File "/usr/lib64/python3.14/multiprocessing/context.py", line 230, in _Popen + File "/usr/lib64/python3.14/multiprocessing/context.py", line 306, in _Popen + File "/usr/lib64/python3.14/multiprocessing/popen_forkserver.py", line 35, in __init__ + File "/usr/lib64/python3.14/multiprocessing/popen_fork.py", line 20, in __init__ + File "/usr/lib64/python3.14/multiprocessing/popen_forkserver.py", line 47, in _launch + File "/usr/lib64/python3.14/multiprocessing/reduction.py", line 60, in dump + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/multiprocessing/reductions.py", line 615, in reduce_storage + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/storage.py", line 449, in wrapper + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/storage.py", line 527, in _share_fd_cpu_ +RuntimeError: unable to open shared memory object in read-write mode: Too many open files (24) +when serializing tuple item 0 +when serializing torch.storage.TypedStorage reconstructor arguments +when serializing torch.storage.TypedStorage object +when serializing tuple item 1 +when serializing torch.Tensor reconstructor arguments +when serializing torch.Tensor object +when serializing dict item 'labels' +when serializing list item 239 +when serializing dict item '_precomputed' +when serializing src.training.dataset.WritingCorrectionDataset state +when serializing src.training.dataset.WritingCorrectionDataset object +when serializing tuple item 1 +when serializing dict item '_args' +when serializing multiprocessing.context.Process state +when serializing multiprocessing.context.Process object diff --git a/wandb/run-20260503_121016-impcgg4z/files/requirements.txt b/wandb/run-20260503_121016-impcgg4z/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..0a9158bdf370f073be66c392b3c09dcdfbfed3be --- /dev/null +++ b/wandb/run-20260503_121016-impcgg4z/files/requirements.txt @@ -0,0 +1,351 @@ +Pygments==2.19.2 +pluggy==1.6.0 +iniconfig==2.3.0 +pytest==9.0.2 +sqlite-vec==0.1.6 +peewee==3.19.0 +boltons==21.0.0 +zipp==3.23.0 +wrapt==1.17.3 +tree-sitter-python==0.25.0 +tree-sitter-javascript==0.25.0 +tree-sitter-java==0.23.5 +tree-sitter-cpp==0.23.4 +tree-sitter-c==0.24.1 +tree-sitter==0.25.2 +tqdm==4.67.3 +tomli==2.0.2 +tabulate==0.10.0 +stevedore==5.7.0 +sniffio==1.3.1 +smmap==5.0.3 +shellingham==1.5.4 +semantic-version==2.10.0 +ruamel.yaml.clib==0.2.14 +ruamel.yaml==0.19.1 +python-multipart==0.0.22 +python-dotenv==1.2.2 +PyJWT==2.11.0 +protobuf==6.33.5 +opentelemetry-util-http==0.58b0 +networkx==3.6.1 +mdurl==0.1.2 +MarkupSafe==3.0.3 +jiter==0.13.0 +hyperframe==6.1.0 +httpx-sse==0.4.3 +hpack==4.1.0 +face==26.0.0 +exceptiongroup==1.2.2 +docstring_parser==0.17.0 +colorama==0.4.6 +bracex==2.6 +annotated-doc==0.0.4 +aiofiles==25.1.0 +wcmatch==8.5.2 +uvicorn==0.41.0 +starlette==0.52.1 +opentelemetry-proto==1.37.0 +markdown-it-py==4.0.0 +Jinja2==3.1.6 +importlib_metadata==8.7.1 +h2==4.3.0 +googleapis-common-protos==1.73.0 +glom==25.12.0 +gitdb==4.0.12 +click-option-group==0.5.9 +sse-starlette==3.3.2 +rich==14.3.3 +pydantic-settings==2.13.1 +opentelemetry-exporter-otlp-proto-common==1.37.0 +opentelemetry-api==1.37.0 +openai==2.26.0 +jsonschema==4.25.1 +GitPython==3.1.46 +typer==0.23.1 +opentelemetry-semantic-conventions==0.58b0 +bandit==1.9.4 +opentelemetry-sdk==1.37.0 +opentelemetry-instrumentation==0.58b0 +mcp==1.23.3 +agno==2.5.9 +opentelemetry-instrumentation-threading==0.58b0 +opentelemetry-instrumentation-requests==0.58b0 +opentelemetry-exporter-otlp-proto-http==1.37.0 +semgrep==1.154.0 +acvas==1.0.0 +Werkzeug==3.1.8 +websockets==13.1 +wcwidth==0.6.0 +sqlparse==0.5.5 +semver==3.0.4 +itsdangerous==2.2.0 +frida==17.9.1 +click==8.3.2 +blinker==1.9.0 +prompt_toolkit==3.0.52 +Flask==3.1.3 +delegator.py==0.1.1 +frida-tools==14.8.1 +cli_helpers==2.12.0 +litecli==1.17.1 +objection==1.12.4 +userpath==1.9.2 +platformdirs==4.9.4 +argcomplete==3.6.3 +pipx==1.11.1 +distlib==0.4.0 +filelock==3.25.2 +python-discovery==1.2.1 +virtualenv==21.2.0 +pyelftools==0.32 +cigam==0.0.3 +xmltodict==1.0.4 +apkutils2==1.0.0 +svgwrite==1.4.3 +rdflib==7.6.0 +python-louvain==0.16 +wheel==0.46.3 +pip==26.0.1 +threadpoolctl==3.6.0 +scipy==1.17.1 +joblib==1.5.3 +scikit-learn==1.8.0 +pandas==3.0.2 +seaborn==0.13.2 +text-unidecode==1.3 +fastjsonschema==2.21.2 +traitlets==5.14.3 +python-slugify==8.0.4 +bleach==6.3.0 +mdit-py-plugins==0.5.0 +kagglesdk==0.1.23 +jupyter_core==5.9.1 +nbformat==5.10.4 +jupytext==1.19.1 +kaggle==2.1.0 +xxhash==3.7.0 +pyarrow==24.0.0 +propcache==0.4.1 +multidict==6.7.1 +hf-xet==1.4.3 +fsspec==2026.2.0 +frozenlist==1.8.0 +dill==0.4.1 +aiohappyeyeballs==2.6.1 +yarl==1.23.0 +multiprocess==0.70.19 +aiosignal==1.4.0 +aiohttp==3.13.5 +datasets==4.8.5 +torchaudio==2.11.0 +nvidia-cusparselt-cu13==0.8.0 +mpmath==1.3.0 +cuda-toolkit==13.0.2 +wasabi==1.1.3 +uvloop==0.22.1 +triton==3.6.0 +toml==0.10.2 +tensorboard-data-server==0.7.2 +sympy==1.14.0 +spacy-loggers==1.0.5 +spacy-legacy==3.0.12 +spacy-alignments==0.9.1 +smart_open==7.6.0 +setuptools==81.0.0 +sentencepiece==0.2.1 +safetensors==0.7.0 +pyspellchecker==0.9.0 +pyphen==0.17.2 +portalocker==3.2.0 +nvidia-nvtx==13.0.85 +nvidia-nvshmem-cu13==3.4.5 +nvidia-nvjitlink==13.0.88 +nvidia-nccl-cu13==2.28.9 +nvidia-curand==10.4.0.35 +nvidia-cufile==1.15.1.6 +nvidia-cuda-runtime==13.0.96 +nvidia-cuda-nvrtc==13.0.88 +nvidia-cuda-cupti==13.0.85 +nvidia-cublas==13.1.0.3 +nltk==3.9.4 +murmurhash==1.0.15 +msgpack==1.1.2 +Markdown==3.10.2 +loguru==0.7.3 +locate==1.1.1 +langcodes==3.5.1 +httptools==0.7.1 +grpcio==1.80.0 +ftfy==6.3.1 +faiss-cpu==1.13.2 +cymem==2.0.13 +cuda-pathfinder==1.5.4 +confection==1.3.3 +cloudpathlib==0.24.0 +catalogue==2.0.10 +blis==1.3.3 +absl-py==2.4.0 +wordfreq==3.1.1 +watchfiles==1.1.1 +textstat==0.7.13 +tensorboard==2.20.0 +srsly==2.5.3 +sacrebleu==2.6.0 +rouge_score==0.1.2 +preshed==3.0.13 +nvidia-cusparse==12.6.3.3 +nvidia-cufft==12.0.0.61 +nvidia-cudnn-cu13==9.19.0.56 +language_tool_python==3.3.0 +huggingface_hub==0.36.2 +cuda-bindings==13.2.0 +wandb==0.26.1 +tokenizers==0.21.4 +thinc==8.3.13 +nvidia-cusolver==12.0.4.66 +fastapi==0.136.1 +weasel==1.0.0 +transformers==4.53.2 +torch==2.11.0 +spacy==3.8.13 +torchvision==0.26.0 +spacy-transformers==1.4.0 +sentence-transformers==5.4.1 +optimum==2.1.0 +errant==3.0.0 +bitsandbytes==0.49.2 +bert-score==0.3.13 +accelerate==1.13.0 +peft==0.19.1 +en_core_web_sm==3.8.0 +tree-sitter-zig==1.1.2 +tree-sitter-verilog==1.0.3 +tree-sitter-typescript==0.23.2 +tree-sitter-swift==0.0.1 +tree-sitter-scala==0.26.0 +tree-sitter-rust==0.24.2 +tree-sitter-ruby==0.23.1 +tree-sitter-powershell==0.26.3 +tree-sitter-php==0.24.1 +tree-sitter-objc==3.0.2 +tree-sitter-lua==0.5.0 +tree-sitter-kotlin==1.1.0 +tree-sitter-julia==0.23.1 +tree-sitter-go==0.25.0 +tree-sitter-elixir==0.3.5 +tree-sitter-c-sharp==0.23.5 +graphifyy==0.6.5 +numpy==2.4.4 +kiwisolver==1.5.0 +fonttools==4.62.1 +cycler==0.12.1 +contourpy==1.3.3 +matplotlib==3.10.9 +Brlapi==0.8.7 +PyGObject==3.56.2 +cffi==2.0.0 +cryptography==46.0.7 +dbus-python==1.4.0 +evdev==1.9.3 +gbinder-python==1.3.0 +lxml==6.0.2 +perf==0.1 +pillow==12.2.0 +psutil==7.2.2 +PyAudio==0.2.13 +pycairo==1.28.0 +pycups==2.0.4 +pydantic_core==2.41.5 +pyscard==2.2.2 +PyYAML==6.0.3 +RapidFuzz==3.14.3 +regex==2026.2.28 +rpds-py==0.29.0 +rpm==6.0.1 +selinux==3.10 +setools==4.6.0 +setproctitle==1.3.7 +systemd-python==235 +PySocks==1.7.1 +annotated-types==0.7.0 +anyio==4.12.1 +attrs==25.4.0 +beautifulsoup4==4.14.3 +boto3==1.42.84 +botocore==1.42.84 +certifi==2026.1.4 +charset-normalizer==3.4.4 +click==8.3.3 +cockpit==361 +configobj==5.0.9 +cupshelpers==1.0 +dasbus==1.7 +distro==1.9.0 +fido2==2.0.0 +file-magic==0.4.0 +filelock==3.15.4 +fw-fanctrl==1.0.2 +h11==0.16.0 +html2text==2025.4.15 +httpcore==1.0.9 +httpx==0.28.1 +icoextract==0.2.0 +idna==3.11 +input-remapper==2.2.0 +jaraco.classes==3.4.0 +jaraco.context==6.1.0 +jaraco.functools==4.3.0 +jeepney==0.9.0 +jmespath==1.0.1 +jsonschema==4.23.0 +jsonschema-specifications==2024.10.1 +keyring==25.7.0 +langtable==0.0.70 +libevdev==0.13.1 +louis==3.33.0 +lutris==0.5.22 +moddb==0.12.0 +more-itertools==10.5.0 +nftables==0.1 +olefile==0.47 +packaging==25.0 +pefile==2024.8.26 +pexpect==4.9.0 +pip==26.0.1 +protobuf==3.19.6 +ptyprocess==0.7.0 +pycparser==2.22 +pydantic==2.12.5 +pydbus==0.6.0 +pyenchant==3.3.0 +pygdbmi==0.11.0.0 +pyinotify==0.9.6 +pyparsing==3.1.2 +pypresence==4.3.0 +python-dateutil==2.9.0.post0 +python-linux-procfs==0.7.4 +python-pskc==1.4 +python-xlib==0.33 +pyudev==0.24.4 +pyxdg==0.28 +referencing==0.36.2 +requests==2.32.5 +requests-file==3.0.0 +s3transfer==0.16.0 +SecretStorage==3.5.0 +sentry-sdk==2.48.0 +sepolicy==3.10 +setroubleshoot==3.3.36 +setuptools==80.10.2 +shtab==1.7.2 +six==1.17.0 +sos==4.11.1 +soupsieve==2.8.3 +tldextract==5.3.0 +typing_extensions==4.15.0 +typing-inspection==0.4.2 +udica==0.2.8 +umu-launcher==1.4.0 +urllib3==2.6.3 +yubikey-manager==5.9.1 diff --git a/wandb/run-20260503_121016-impcgg4z/files/wandb-metadata.json b/wandb/run-20260503_121016-impcgg4z/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..b4bf6436be22401abaf62c01093825cbd5fd0849 --- /dev/null +++ b/wandb/run-20260503_121016-impcgg4z/files/wandb-metadata.json @@ -0,0 +1,41 @@ +{ + "os": "Linux-6.19.14-ogc1.1.fc44.x86_64-x86_64-with-glibc2.43", + "python": "CPython 3.14.4", + "startedAt": "2026-05-03T06:40:16.374846Z", + "args": [ + "--config", + "configs/training_config.yaml", + "--use-v2-loss" + ], + "program": "/run/media/morpheuslord/Personal_Files/Projects/Rewriter/scripts/train.py", + "codePath": "scripts/train.py", + "codePathLocal": "scripts/train.py", + "email": "morpheuslord@protonmail.com", + "root": "/run/media/morpheuslord/Personal_Files/Projects/Rewriter", + "host": "bazzite", + "executable": "/usr/bin/python3", + "cpu_count": 8, + "cpu_count_logical": 16, + "gpu": "NVIDIA GeForce RTX 3050 Laptop GPU", + "gpu_count": 1, + "disk": { + "/": { + "total": "65773568", + "used": "65773568" + } + }, + "memory": { + "total": "32939671552" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA GeForce RTX 3050 Laptop GPU", + "memoryTotal": "4294967296", + "cudaCores": 2048, + "architecture": "Ampere", + "uuid": "GPU-861554d7-d187-39e9-e77c-881f0287b963" + } + ], + "cudaVersion": "13.2", + "writerId": "4grhtgbrsanlpr1ottdp8810yu8ci7gn" +} \ No newline at end of file diff --git a/wandb/run-20260503_121016-impcgg4z/files/wandb-summary.json b/wandb/run-20260503_121016-impcgg4z/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..b83728ff622654da73d628f469e3fbb77e031a30 --- /dev/null +++ b/wandb/run-20260503_121016-impcgg4z/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb":{"runtime":14},"_runtime":14} \ No newline at end of file diff --git a/wandb/run-20260503_121016-impcgg4z/logs/debug-core.log b/wandb/run-20260503_121016-impcgg4z/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..5de961b5377998f9859cb1dc39f822a942f4ccd4 --- /dev/null +++ b/wandb/run-20260503_121016-impcgg4z/logs/debug-core.log @@ -0,0 +1,15 @@ +{"time":"2026-05-03T12:10:16.580335854+05:30","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp675m8dz7/port-18166.txt","pid":18166,"detached":false,"idle-timeout":600000000000,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-05-03T12:10:16.58088898+05:30","level":"INFO","msg":"server: will exit if parent process dies","ppid":18166} +{"time":"2026-05-03T12:10:16.580825924+05:30","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-18166-18438-4073895957/socket","Net":"unix"}} +{"time":"2026-05-03T12:10:16.770634987+05:30","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-05-03T12:10:16.778929278+05:30","level":"INFO","msg":"handleInformInit: received","streamId":"impcgg4z","id":"1(@)"} +{"time":"2026-05-03T12:10:17.398795572+05:30","level":"INFO","msg":"handleInformInit: stream started","streamId":"impcgg4z","id":"1(@)"} +{"time":"2026-05-03T12:10:23.467029482+05:30","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"e25pw72o4r33"} +{"time":"2026-05-03T12:10:32.645763825+05:30","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-05-03T12:10:32.645828674+05:30","level":"INFO","msg":"server is shutting down"} +{"time":"2026-05-03T12:10:32.645833643+05:30","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-05-03T12:10:32.645906638+05:30","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-05-03T12:10:32.645932034+05:30","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-18166-18438-4073895957/socket","Net":"unix"}} +{"time":"2026-05-03T12:10:34.707679794+05:30","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-05-03T12:10:34.707708516+05:30","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-05-03T12:10:34.707728623+05:30","level":"INFO","msg":"server is closed"} diff --git a/wandb/run-20260503_121016-impcgg4z/logs/debug-internal.log b/wandb/run-20260503_121016-impcgg4z/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..ebbbd36c3737e49778d6d159a6051461f5fe7a1e --- /dev/null +++ b/wandb/run-20260503_121016-impcgg4z/logs/debug-internal.log @@ -0,0 +1,18 @@ +{"time":"2026-05-03T12:10:16.779154863+05:30","level":"INFO","msg":"wandb-core"} +{"time":"2026-05-03T12:10:16.779830284+05:30","level":"INFO","msg":"stream: starting","core version":"0.26.1"} +{"time":"2026-05-03T12:10:17.3981305+05:30","level":"INFO","msg":"stream: created new stream","id":"impcgg4z"} +{"time":"2026-05-03T12:10:17.398337881+05:30","level":"INFO","msg":"handler: started"} +{"time":"2026-05-03T12:10:17.398757642+05:30","level":"INFO","msg":"stream: started"} +{"time":"2026-05-03T12:10:17.398787457+05:30","level":"INFO","msg":"sender: started"} +{"time":"2026-05-03T12:10:17.398781376+05:30","level":"INFO","msg":"writer: started","stream_id":"impcgg4z"} +{"time":"2026-05-03T12:10:18.177187082+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"console_offset":0,"console_lines":2} +{"time":"2026-05-03T12:10:18.830031935+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-03T12:10:33.177894461+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"console_offset":1,"console_lines":89,"uploaded_len":2} +{"time":"2026-05-03T12:10:33.779023124+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-03T12:10:34.194451202+05:30","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-05-03T12:10:34.194606147+05:30","level":"INFO","msg":"filestream: sending request","total_files":0,"uploaded_len":3,"complete":true,"exit_code":1} +{"time":"2026-05-03T12:10:34.702028639+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-03T12:10:34.702303354+05:30","level":"INFO","msg":"stream: finishing up"} +{"time":"2026-05-03T12:10:34.702410701+05:30","level":"INFO","msg":"handler: closed"} +{"time":"2026-05-03T12:10:34.702767828+05:30","level":"INFO","msg":"sender: closed"} +{"time":"2026-05-03T12:10:34.702804315+05:30","level":"INFO","msg":"stream: all finished"} diff --git a/wandb/run-20260503_121016-impcgg4z/logs/debug.log b/wandb/run-20260503_121016-impcgg4z/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..93b7da5569f595191a7dd69f42921dcc10499d86 --- /dev/null +++ b/wandb/run-20260503_121016-impcgg4z/logs/debug.log @@ -0,0 +1,74 @@ +2026-05-03 12:10:16,379 INFO MainThread:18166 [wandb_setup.py:_flush():81] Current SDK version is 0.26.1 +2026-05-03 12:10:16,379 INFO MainThread:18166 [wandb_setup.py:_flush():81] Configure stats pid to 18166 +2026-05-03 12:10:16,379 INFO MainThread:18166 [wandb_setup.py:_flush():81] Loading settings from environment variables +2026-05-03 12:10:16,379 INFO MainThread:18166 [wandb_init.py:setup_run_log_directory():723] Logging user logs to /run/media/morpheuslord/Personal_Files/Projects/Rewriter/wandb/run-20260503_121016-impcgg4z/logs/debug.log +2026-05-03 12:10:16,379 INFO MainThread:18166 [wandb_init.py:setup_run_log_directory():724] Logging internal logs to /run/media/morpheuslord/Personal_Files/Projects/Rewriter/wandb/run-20260503_121016-impcgg4z/logs/debug-internal.log +2026-05-03 12:10:16,379 INFO MainThread:18166 [wandb_init.py:init():850] calling init triggers +2026-05-03 12:10:16,379 INFO MainThread:18166 [wandb_init.py:init():855] wandb.init called with sweep_config: {} +config: {'model': {'key': 'flan-t5-small', 'quantize': False, 'use_lora': True}, 'lora': {'r': 8, 'lora_alpha': 16, 'lora_dropout': 0.05, 'target_modules': ['q', 'v', 'k', 'o', 'wi_0', 'wi_1', 'wo']}, 'data': {'train_path': 'data/processed/train.jsonl', 'val_path': 'data/processed/val.jsonl', 'test_path': 'data/processed/test.jsonl', 'max_input_length': 128, 'max_target_length': 128, 'augment_synthetic': True, 'synthetic_ratio': 0.3}, 'training': {'output_dir': 'checkpoints/', 'num_train_epochs': 5, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8, 'gradient_accumulation_steps': 8, 'learning_rate': 0.0003, 'lr_scheduler_type': 'cosine', 'warmup_ratio': 0.05, 'weight_decay': 0.01, 'fp16': False, 'bf16': True, 'evaluation_strategy': 'steps', 'eval_steps': 100, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 3, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_loss', 'greater_is_better': False, 'logging_dir': 'logs/', 'logging_steps': 25, 'report_to': ['wandb', 'tensorboard'], 'dataloader_num_workers': 4, 'seed': 42, 'push_to_hub': False}, 'loss': {'lambda_style': 0.3, 'lambda_semantic': 0.5, 'lambda_human_pattern': 0.4, 'sem_model_name': 'all-mpnet-base-v2'}, 'generation': {'num_beams': 5, 'length_penalty': 1.0, 'no_repeat_ngram_size': 3, 'min_length': 10, 'max_new_tokens': 512, 'early_stopping': True}, 'human_pattern': {'classifier_path': 'checkpoints/human_pattern_classifier.pt', 'shanegerami_path': 'data/raw/shanegerami/AI_Human.csv', 'starblasters_path': 'data/raw/starblasters8/data.parquet', 'max_samples_per_source': 50000, 'pretrain_epochs': 20, 'pretrain_lr': 0.001, 'pretrain_batch_size': 512, 'target_auc': 0.88}, '_wandb': {}} +2026-05-03 12:10:16,379 INFO MainThread:18166 [wandb_init.py:init():898] starting backend +2026-05-03 12:10:16,770 INFO MainThread:18166 [wandb_init.py:init():913] sending inform_init request +2026-05-03 12:10:17,399 INFO MainThread:18166 [wandb_init.py:init():918] backend started and connected +2026-05-03 12:10:17,400 INFO MainThread:18166 [wandb_init.py:init():988] updated telemetry +2026-05-03 12:10:17,402 INFO MainThread:18166 [wandb_init.py:init():1011] communicating run to backend with 90.0 second timeout +2026-05-03 12:10:18,013 INFO MainThread:18166 [wandb_init.py:init():1056] starting run threads in backend +2026-05-03 12:10:18,167 INFO MainThread:18166 [wandb_run.py:_console_start():2554] atexit reg +2026-05-03 12:10:18,167 INFO MainThread:18166 [wandb_run.py:_redirect():2403] redirect: wrap_raw +2026-05-03 12:10:18,167 INFO MainThread:18166 [wandb_run.py:_redirect():2472] Wrapping output streams. +2026-05-03 12:10:18,168 INFO MainThread:18166 [wandb_run.py:_redirect():2495] Redirects installed. +2026-05-03 12:10:18,169 INFO MainThread:18166 [wandb_init.py:init():1094] run started, returning control to user process +2026-05-03 12:10:32,247 INFO MainThread:18166 [wandb_run.py:_config_callback():1415] config_cb None None {'peft_config': {'default': {'task_type': 'SEQ_2_SEQ_LM', 'peft_type': 'LORA', 'auto_mapping': None, 'peft_version': '0.19.1', 'base_model_name_or_path': 'google/flan-t5-small', 'revision': None, 'inference_mode': False, 'r': 8, 'target_modules': ['wo', 'wi_1', 'q', 'wi_0', 'o', 'v', 'k'], 'exclude_modules': None, 'lora_alpha': 16, 'lora_dropout': 0.05, 'fan_in_fan_out': False, 'bias': 'none', 'use_rslora': False, 'modules_to_save': None, 'init_lora_weights': True, 'layers_to_transform': None, 'layers_pattern': None, 'rank_pattern': {}, 'alpha_pattern': {}, 'megatron_config': None, 'megatron_core': 'megatron.core', 'trainable_token_indices': None, 'loftq_config': {}, 'eva_config': None, 'corda_config': None, 'lora_ga_config': None, 'use_dora': False, 'alora_invocation_tokens': None, 'use_qalora': False, 'qalora_group_size': 16, 'layer_replication': None, 'runtime_config': {'ephemeral_gpu_offload': False}, 'lora_bias': False, 'target_parameters': None, 'use_bdlora': None, 'arrow_config': None, 'ensure_weight_tying': False}}, 'vocab_size': 32128, 'd_model': 512, 'd_kv': 64, 'd_ff': 1024, 'num_layers': 8, 'num_decoder_layers': 8, 'num_heads': 6, 'relative_attention_num_buckets': 32, 'relative_attention_max_distance': 128, 'dropout_rate': 0.1, 'classifier_dropout': 0.0, 'layer_norm_epsilon': 1e-06, 'initializer_factor': 1.0, 'feed_forward_proj': 'gated-gelu', 'use_cache': True, 'dense_act_fn': 'gelu_new', 'is_gated_act': True, 'return_dict': True, 'output_hidden_states': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['T5ForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': 1, 'sep_token_id': None, 'decoder_start_token_id': 0, 'task_specific_params': {'summarization': {'early_stopping': True, 'length_penalty': 2.0, 'max_length': 200, 'min_length': 30, 'no_repeat_ngram_size': 3, 'num_beams': 4, 'prefix': 'summarize: '}, 'translation_en_to_de': {'early_stopping': True, 'max_length': 300, 'num_beams': 4, 'prefix': 'translate English to German: '}, 'translation_en_to_fr': {'early_stopping': True, 'max_length': 300, 'num_beams': 4, 'prefix': 'translate English to French: '}, 'translation_en_to_ro': {'early_stopping': True, 'max_length': 300, 'num_beams': 4, 'prefix': 'translate English to Romanian: '}}, 'problem_type': None, '_name_or_path': 'google/flan-t5-small', 'transformers_version': '4.53.2', 'model_type': 't5', 'n_positions': 512, 'output_past': True, 'output_attentions': False, 'output_dir': 'checkpoints/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 8, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 0.0003, 'weight_decay': 0.01, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5, 'max_steps': -1, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.05, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'logs/', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 3, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 100, 'dataloader_num_workers': 4, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'checkpoints/', 'disable_tqdm': False, 'remove_unused_columns': False, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_loss', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb', 'tensorboard'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': None, 'hub_always_push': False, 'hub_revision': None, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'liger_kernel_config': None, 'eval_use_gather_object': False, 'average_tokens_across_devices': False} +2026-05-03 12:10:32,252 INFO MainThread:18166 [wandb_config.py:__setitem__():155] [no run ID] config set model/num_parameters = 78239104 - > +2026-05-03 12:10:32,252 INFO MainThread:18166 [wandb_run.py:_config_callback():1415] config_cb model/num_parameters 78239104 None +2026-05-03 12:10:32,646 INFO wandb-AsyncioManager-main:18166 [service_client.py:_forward_responses():134] Reached EOF. +2026-05-03 12:10:32,646 INFO wandb-AsyncioManager-main:18166 [mailbox.py:close():155] Closing mailbox, abandoning 1 handles. +2026-05-03 12:10:34,712 ERROR wandb-AsyncioManager-main:18166 [asyncio_manager.py:fn_wrap_exceptions():184] Uncaught exception in run_soon callback. +Traceback (most recent call last): + File "/home/morpheuslord/.local/lib/python3.14/site-packages/wandb/sdk/lib/asyncio_manager.py", line 182, in fn_wrap_exceptions + File "/home/morpheuslord/.local/lib/python3.14/site-packages/wandb/sdk/lib/service/service_client.py", line 45, in publish + File "/home/morpheuslord/.local/lib/python3.14/site-packages/wandb/sdk/lib/service/service_client.py", line 87, in _send_server_request + File "/home/morpheuslord/.local/lib/python3.14/site-packages/wandb/sdk/lib/service/service_client.py", line 96, in _drain_writer + File "/usr/lib64/python3.14/asyncio/streams.py", line 386, in drain + File "/usr/lib64/python3.14/asyncio/streams.py", line 166, in _drain_helper +ConnectionResetError: Connection lost +2026-05-03 12:10:34,713 ERROR wandb-AsyncioManager-main:18166 [asyncio_manager.py:fn_wrap_exceptions():184] Uncaught exception in run_soon callback. +Traceback (most recent call last): + File "/home/morpheuslord/.local/lib/python3.14/site-packages/wandb/sdk/lib/asyncio_manager.py", line 182, in fn_wrap_exceptions + await fn() + File "/home/morpheuslord/.local/lib/python3.14/site-packages/wandb/sdk/lib/service/service_client.py", line 45, in publish + await self._send_server_request(request) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/wandb/sdk/lib/service/service_client.py", line 87, in _send_server_request + await self._drain_writer() + File "/home/morpheuslord/.local/lib/python3.14/site-packages/wandb/sdk/lib/service/service_client.py", line 96, in _drain_writer + await self._writer.drain() + File "/usr/lib64/python3.14/asyncio/streams.py", line 386, in drain + File "/usr/lib64/python3.14/asyncio/streams.py", line 166, in _drain_helper +ConnectionResetError: Connection lost +2026-05-03 12:10:34,715 ERROR wandb-AsyncioManager-main:18166 [asyncio_manager.py:fn_wrap_exceptions():184] Uncaught exception in run_soon callback. +Traceback (most recent call last): + File "/home/morpheuslord/.local/lib/python3.14/site-packages/wandb/sdk/lib/asyncio_manager.py", line 182, in fn_wrap_exceptions + await fn() + File "/home/morpheuslord/.local/lib/python3.14/site-packages/wandb/sdk/lib/service/service_client.py", line 45, in publish + await self._send_server_request(request) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/wandb/sdk/lib/service/service_client.py", line 87, in _send_server_request + await self._drain_writer() + File "/home/morpheuslord/.local/lib/python3.14/site-packages/wandb/sdk/lib/service/service_client.py", line 96, in _drain_writer + await self._writer.drain() + File "/usr/lib64/python3.14/asyncio/streams.py", line 386, in drain + File "/usr/lib64/python3.14/asyncio/streams.py", line 166, in _drain_helper +ConnectionResetError: Connection lost +2026-05-03 12:10:34,715 ERROR wandb-AsyncioManager-main:18166 [asyncio_manager.py:fn_wrap_exceptions():184] Uncaught exception in run_soon callback. +Traceback (most recent call last): + File "/home/morpheuslord/.local/lib/python3.14/site-packages/wandb/sdk/lib/asyncio_manager.py", line 182, in fn_wrap_exceptions + await fn() + File "/home/morpheuslord/.local/lib/python3.14/site-packages/wandb/sdk/lib/service/service_client.py", line 45, in publish + await self._send_server_request(request) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/wandb/sdk/lib/service/service_client.py", line 87, in _send_server_request + await self._drain_writer() + File "/home/morpheuslord/.local/lib/python3.14/site-packages/wandb/sdk/lib/service/service_client.py", line 96, in _drain_writer + await self._writer.drain() + File "/usr/lib64/python3.14/asyncio/streams.py", line 386, in drain + await self._protocol._drain_helper() + File "/usr/lib64/python3.14/asyncio/streams.py", line 166, in _drain_helper + raise ConnectionResetError('Connection lost') +ConnectionResetError: Connection lost diff --git a/wandb/run-20260503_121312-l9gn41e7/files/output.log b/wandb/run-20260503_121312-l9gn41e7/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..904e7bb398684b39e5a7343bab0741edecb4d2d5 --- /dev/null +++ b/wandb/run-20260503_121312-l9gn41e7/files/output.log @@ -0,0 +1,100 @@ +2026-05-03 12:13:14.298 | INFO  | __main__:train:153 - Step 3: Setting up device (hybrid GPU mode)... +2026-05-03 12:13:14.299 | INFO  | __main__:_setup_device:61 - GPU: NVIDIA GeForce RTX 3050 Laptop GPU | VRAM: 0MB used / 3770MB total (3770MB free) | Compute: (8, 6) +2026-05-03 12:13:14.299 | INFO  | __main__:_setup_device:74 - Hybrid GPU mode: capped PyTorch VRAM to 85% (~3204MB), leaving room for system +2026-05-03 12:13:14.299 | INFO  | __main__:train:157 - Step 4: Loading model and tokenizer... +2026-05-03 12:13:14.300 | INFO  | src.model.base_model:load_model_and_tokenizer:57 - Loading model: google/flan-t5-small (seq2seq=True, quantize=False, lora=True) +2026-05-03 12:13:16.922 | INFO  | src.model.base_model:load_model_and_tokenizer:101 - Model loaded on cuda with dtype torch.bfloat16 +2026-05-03 12:13:18.277 | INFO  | src.model.base_model:load_model_and_tokenizer:130 - LoRA applied: 1,277,952 trainable params / 78,239,104 total (1.63%) +2026-05-03 12:13:18.277 | INFO  | __main__:train:173 - Applying torch.compile(mode='reduce-overhead')... +2026-05-03 12:13:18.281 | INFO  | __main__:train:175 - ✓ torch.compile applied — first few steps will be slower (compiling) +2026-05-03 12:13:18.281 | INFO  | __main__:train:180 - Step 5: Creating style fingerprinter... +2026-05-03 12:13:18.644 | INFO  | src.style.fingerprinter:__init__:100 - StyleFingerprinter initialised (AWL size: 549) +2026-05-03 12:13:18.644 | INFO  | __main__:train:187 - Step 6: Loading datasets... +2026-05-03 12:13:18.683 | INFO  | src.training.dataset:__init__:71 - Loaded 7555 examples from data/processed/train.jsonl +2026-05-03 12:13:18.828 | INFO  | src.training.dataset:_add_synthetic:148 - Added 2125 synthetic augmentation examples +2026-05-03 12:13:18.829 | INFO  | src.training.dataset:__init__:77 - Total dataset size: 9680 examples +2026-05-03 12:13:18.845 | INFO  | src.training.dataset:__init__:85 - Loading pre-computed dataset from cache: data/cache/1356ff2104663316.pt +2026-05-03 12:13:20.097 | INFO  | src.training.dataset:__init__:87 - Loaded 9680 cached examples +2026-05-03 12:13:20.102 | INFO  | src.training.dataset:__init__:71 - Loaded 839 examples from data/processed/val.jsonl +2026-05-03 12:13:20.102 | INFO  | src.training.dataset:__init__:77 - Total dataset size: 839 examples +2026-05-03 12:13:20.104 | INFO  | src.training.dataset:__init__:85 - Loading pre-computed dataset from cache: data/cache/d6a64358c3ef403f.pt +2026-05-03 12:13:20.193 | INFO  | src.training.dataset:__init__:87 - Loaded 839 cached examples +2026-05-03 12:13:20.194 | INFO  | __main__:train:207 - Train: 9680 | Val: 839 +2026-05-03 12:13:20.478 | INFO  | __main__:train:235 - Using CE-only loss (aux models skipped to save memory) +2026-05-03 12:13:20.478 | INFO  | __main__:train:238 - Step 8: Creating training arguments... +2026-05-03 12:13:20.479 | INFO  | __main__:train:246 - Using BF16 (Ampere+ GPU) +2026-05-03 12:13:20.479 | INFO  | __main__:_auto_batch_size:115 - Auto batch size: 4 (model ~160MB + 4×60MB = ~400MB / 3770MB free) +2026-05-03 12:13:20.479 | INFO  | __main__:train:273 - Gradient checkpointing: OFF (small model fits in VRAM) +2026-05-03 12:13:20.502 | INFO  | __main__:train:316 - Step 9: Creating trainer... +2026-05-03 12:13:20.627 | INFO  | __main__:train:332 - Step 10: Starting training... +2026-05-03 12:13:20.628 | INFO  | __main__:train:333 - Config summary: model=flan-t5-small | batch=4 | accum=8 | effective_batch=32 | epochs=5 | precision=bf16 | grad_ckpt=False | device=cuda +wandb: WARNING The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter. + 0%| | 0/1515 [00:00 + train() + ~~~~~^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/click/core.py", line 1485, in __call__ + return self.main(*args, **kwargs) + ~~~~~~~~~^^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/click/core.py", line 1406, in main + rv = self.invoke(ctx) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/click/core.py", line 1269, in invoke + return ctx.invoke(self.callback, **ctx.params) + ~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/click/core.py", line 824, in invoke + return callback(*args, **kwargs) + File "/run/media/morpheuslord/Personal_Files/Projects/Rewriter/scripts/train.py", line 341, in train + trainer.train() + ~~~~~~~~~~~~~^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/transformers/trainer.py", line 2206, in train + return inner_training_loop( + args=args, + ...<2 lines>... + ignore_keys_for_eval=ignore_keys_for_eval, + ) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/transformers/trainer.py", line 2548, in _inner_training_loop + tr_loss_step = self.training_step(model, inputs, num_items_in_batch) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/transformers/trainer.py", line 3797, in training_step + self.accelerator.backward(loss, **kwargs) + ~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/accelerate/accelerator.py", line 2838, in backward + loss.backward(**kwargs) + ~~~~~~~~~~~~~^^^^^^^^^^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/_tensor.py", line 631, in backward + torch.autograd.backward( + ~~~~~~~~~~~~~~~~~~~~~~~^ + self, gradient, retain_graph, create_graph, inputs=inputs + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + ) + ^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/autograd/__init__.py", line 381, in backward + _engine_run_backward( + ~~~~~~~~~~~~~~~~~~~~^ + tensors, + ^^^^^^^^ + ...<5 lines>... + accumulate_grad=True, + ^^^^^^^^^^^^^^^^^^^^^ + ) + ^ + File "/home/morpheuslord/.local/lib/python3.14/site-packages/torch/autograd/graph.py", line 869, in _engine_run_backward + return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + t_outputs, *args, **kwargs + ^^^^^^^^^^^^^^^^^^^^^^^^^^ + ) # Calls into the C++ engine to run the backward pass + ^ +RuntimeError: Error: accessing tensor output of CUDAGraphs that has been overwritten by a subsequent run. Stack trace: File "/home/morpheuslord/.local/lib/python3.14/site-packages/transformers/models/t5/modeling_t5.py", line 1105, in torch_dynamo_resume_in_forward_at_1018 + layer_outputs = layer_module( + File "/home/morpheuslord/.local/lib/python3.14/site-packages/transformers/modeling_layers.py", line 83, in __call__ + return super().__call__(*args, **kwargs) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/transformers/models/t5/modeling_t5.py", line 678, in forward + self_attention_outputs = self.layer[0]( + File "/home/morpheuslord/.local/lib/python3.14/site-packages/transformers/models/t5/modeling_t5.py", line 596, in forward + attention_output = self.SelfAttention( + File "/home/morpheuslord/.local/lib/python3.14/site-packages/transformers/models/t5/modeling_t5.py", line 490, in forward + query_states = self.q(hidden_states) + File "/home/morpheuslord/.local/lib/python3.14/site-packages/peft/tuners/lora/layer.py", line 969, in forward + result = result + lora_B(lora_A(dropout(x))) * scaling. To prevent overwriting, clone the tensor outside of torch.compile() or call torch.compiler.cudagraph_mark_step_begin() before each model invocation. diff --git a/wandb/run-20260503_121312-l9gn41e7/files/requirements.txt b/wandb/run-20260503_121312-l9gn41e7/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..0a9158bdf370f073be66c392b3c09dcdfbfed3be --- /dev/null +++ b/wandb/run-20260503_121312-l9gn41e7/files/requirements.txt @@ -0,0 +1,351 @@ +Pygments==2.19.2 +pluggy==1.6.0 +iniconfig==2.3.0 +pytest==9.0.2 +sqlite-vec==0.1.6 +peewee==3.19.0 +boltons==21.0.0 +zipp==3.23.0 +wrapt==1.17.3 +tree-sitter-python==0.25.0 +tree-sitter-javascript==0.25.0 +tree-sitter-java==0.23.5 +tree-sitter-cpp==0.23.4 +tree-sitter-c==0.24.1 +tree-sitter==0.25.2 +tqdm==4.67.3 +tomli==2.0.2 +tabulate==0.10.0 +stevedore==5.7.0 +sniffio==1.3.1 +smmap==5.0.3 +shellingham==1.5.4 +semantic-version==2.10.0 +ruamel.yaml.clib==0.2.14 +ruamel.yaml==0.19.1 +python-multipart==0.0.22 +python-dotenv==1.2.2 +PyJWT==2.11.0 +protobuf==6.33.5 +opentelemetry-util-http==0.58b0 +networkx==3.6.1 +mdurl==0.1.2 +MarkupSafe==3.0.3 +jiter==0.13.0 +hyperframe==6.1.0 +httpx-sse==0.4.3 +hpack==4.1.0 +face==26.0.0 +exceptiongroup==1.2.2 +docstring_parser==0.17.0 +colorama==0.4.6 +bracex==2.6 +annotated-doc==0.0.4 +aiofiles==25.1.0 +wcmatch==8.5.2 +uvicorn==0.41.0 +starlette==0.52.1 +opentelemetry-proto==1.37.0 +markdown-it-py==4.0.0 +Jinja2==3.1.6 +importlib_metadata==8.7.1 +h2==4.3.0 +googleapis-common-protos==1.73.0 +glom==25.12.0 +gitdb==4.0.12 +click-option-group==0.5.9 +sse-starlette==3.3.2 +rich==14.3.3 +pydantic-settings==2.13.1 +opentelemetry-exporter-otlp-proto-common==1.37.0 +opentelemetry-api==1.37.0 +openai==2.26.0 +jsonschema==4.25.1 +GitPython==3.1.46 +typer==0.23.1 +opentelemetry-semantic-conventions==0.58b0 +bandit==1.9.4 +opentelemetry-sdk==1.37.0 +opentelemetry-instrumentation==0.58b0 +mcp==1.23.3 +agno==2.5.9 +opentelemetry-instrumentation-threading==0.58b0 +opentelemetry-instrumentation-requests==0.58b0 +opentelemetry-exporter-otlp-proto-http==1.37.0 +semgrep==1.154.0 +acvas==1.0.0 +Werkzeug==3.1.8 +websockets==13.1 +wcwidth==0.6.0 +sqlparse==0.5.5 +semver==3.0.4 +itsdangerous==2.2.0 +frida==17.9.1 +click==8.3.2 +blinker==1.9.0 +prompt_toolkit==3.0.52 +Flask==3.1.3 +delegator.py==0.1.1 +frida-tools==14.8.1 +cli_helpers==2.12.0 +litecli==1.17.1 +objection==1.12.4 +userpath==1.9.2 +platformdirs==4.9.4 +argcomplete==3.6.3 +pipx==1.11.1 +distlib==0.4.0 +filelock==3.25.2 +python-discovery==1.2.1 +virtualenv==21.2.0 +pyelftools==0.32 +cigam==0.0.3 +xmltodict==1.0.4 +apkutils2==1.0.0 +svgwrite==1.4.3 +rdflib==7.6.0 +python-louvain==0.16 +wheel==0.46.3 +pip==26.0.1 +threadpoolctl==3.6.0 +scipy==1.17.1 +joblib==1.5.3 +scikit-learn==1.8.0 +pandas==3.0.2 +seaborn==0.13.2 +text-unidecode==1.3 +fastjsonschema==2.21.2 +traitlets==5.14.3 +python-slugify==8.0.4 +bleach==6.3.0 +mdit-py-plugins==0.5.0 +kagglesdk==0.1.23 +jupyter_core==5.9.1 +nbformat==5.10.4 +jupytext==1.19.1 +kaggle==2.1.0 +xxhash==3.7.0 +pyarrow==24.0.0 +propcache==0.4.1 +multidict==6.7.1 +hf-xet==1.4.3 +fsspec==2026.2.0 +frozenlist==1.8.0 +dill==0.4.1 +aiohappyeyeballs==2.6.1 +yarl==1.23.0 +multiprocess==0.70.19 +aiosignal==1.4.0 +aiohttp==3.13.5 +datasets==4.8.5 +torchaudio==2.11.0 +nvidia-cusparselt-cu13==0.8.0 +mpmath==1.3.0 +cuda-toolkit==13.0.2 +wasabi==1.1.3 +uvloop==0.22.1 +triton==3.6.0 +toml==0.10.2 +tensorboard-data-server==0.7.2 +sympy==1.14.0 +spacy-loggers==1.0.5 +spacy-legacy==3.0.12 +spacy-alignments==0.9.1 +smart_open==7.6.0 +setuptools==81.0.0 +sentencepiece==0.2.1 +safetensors==0.7.0 +pyspellchecker==0.9.0 +pyphen==0.17.2 +portalocker==3.2.0 +nvidia-nvtx==13.0.85 +nvidia-nvshmem-cu13==3.4.5 +nvidia-nvjitlink==13.0.88 +nvidia-nccl-cu13==2.28.9 +nvidia-curand==10.4.0.35 +nvidia-cufile==1.15.1.6 +nvidia-cuda-runtime==13.0.96 +nvidia-cuda-nvrtc==13.0.88 +nvidia-cuda-cupti==13.0.85 +nvidia-cublas==13.1.0.3 +nltk==3.9.4 +murmurhash==1.0.15 +msgpack==1.1.2 +Markdown==3.10.2 +loguru==0.7.3 +locate==1.1.1 +langcodes==3.5.1 +httptools==0.7.1 +grpcio==1.80.0 +ftfy==6.3.1 +faiss-cpu==1.13.2 +cymem==2.0.13 +cuda-pathfinder==1.5.4 +confection==1.3.3 +cloudpathlib==0.24.0 +catalogue==2.0.10 +blis==1.3.3 +absl-py==2.4.0 +wordfreq==3.1.1 +watchfiles==1.1.1 +textstat==0.7.13 +tensorboard==2.20.0 +srsly==2.5.3 +sacrebleu==2.6.0 +rouge_score==0.1.2 +preshed==3.0.13 +nvidia-cusparse==12.6.3.3 +nvidia-cufft==12.0.0.61 +nvidia-cudnn-cu13==9.19.0.56 +language_tool_python==3.3.0 +huggingface_hub==0.36.2 +cuda-bindings==13.2.0 +wandb==0.26.1 +tokenizers==0.21.4 +thinc==8.3.13 +nvidia-cusolver==12.0.4.66 +fastapi==0.136.1 +weasel==1.0.0 +transformers==4.53.2 +torch==2.11.0 +spacy==3.8.13 +torchvision==0.26.0 +spacy-transformers==1.4.0 +sentence-transformers==5.4.1 +optimum==2.1.0 +errant==3.0.0 +bitsandbytes==0.49.2 +bert-score==0.3.13 +accelerate==1.13.0 +peft==0.19.1 +en_core_web_sm==3.8.0 +tree-sitter-zig==1.1.2 +tree-sitter-verilog==1.0.3 +tree-sitter-typescript==0.23.2 +tree-sitter-swift==0.0.1 +tree-sitter-scala==0.26.0 +tree-sitter-rust==0.24.2 +tree-sitter-ruby==0.23.1 +tree-sitter-powershell==0.26.3 +tree-sitter-php==0.24.1 +tree-sitter-objc==3.0.2 +tree-sitter-lua==0.5.0 +tree-sitter-kotlin==1.1.0 +tree-sitter-julia==0.23.1 +tree-sitter-go==0.25.0 +tree-sitter-elixir==0.3.5 +tree-sitter-c-sharp==0.23.5 +graphifyy==0.6.5 +numpy==2.4.4 +kiwisolver==1.5.0 +fonttools==4.62.1 +cycler==0.12.1 +contourpy==1.3.3 +matplotlib==3.10.9 +Brlapi==0.8.7 +PyGObject==3.56.2 +cffi==2.0.0 +cryptography==46.0.7 +dbus-python==1.4.0 +evdev==1.9.3 +gbinder-python==1.3.0 +lxml==6.0.2 +perf==0.1 +pillow==12.2.0 +psutil==7.2.2 +PyAudio==0.2.13 +pycairo==1.28.0 +pycups==2.0.4 +pydantic_core==2.41.5 +pyscard==2.2.2 +PyYAML==6.0.3 +RapidFuzz==3.14.3 +regex==2026.2.28 +rpds-py==0.29.0 +rpm==6.0.1 +selinux==3.10 +setools==4.6.0 +setproctitle==1.3.7 +systemd-python==235 +PySocks==1.7.1 +annotated-types==0.7.0 +anyio==4.12.1 +attrs==25.4.0 +beautifulsoup4==4.14.3 +boto3==1.42.84 +botocore==1.42.84 +certifi==2026.1.4 +charset-normalizer==3.4.4 +click==8.3.3 +cockpit==361 +configobj==5.0.9 +cupshelpers==1.0 +dasbus==1.7 +distro==1.9.0 +fido2==2.0.0 +file-magic==0.4.0 +filelock==3.15.4 +fw-fanctrl==1.0.2 +h11==0.16.0 +html2text==2025.4.15 +httpcore==1.0.9 +httpx==0.28.1 +icoextract==0.2.0 +idna==3.11 +input-remapper==2.2.0 +jaraco.classes==3.4.0 +jaraco.context==6.1.0 +jaraco.functools==4.3.0 +jeepney==0.9.0 +jmespath==1.0.1 +jsonschema==4.23.0 +jsonschema-specifications==2024.10.1 +keyring==25.7.0 +langtable==0.0.70 +libevdev==0.13.1 +louis==3.33.0 +lutris==0.5.22 +moddb==0.12.0 +more-itertools==10.5.0 +nftables==0.1 +olefile==0.47 +packaging==25.0 +pefile==2024.8.26 +pexpect==4.9.0 +pip==26.0.1 +protobuf==3.19.6 +ptyprocess==0.7.0 +pycparser==2.22 +pydantic==2.12.5 +pydbus==0.6.0 +pyenchant==3.3.0 +pygdbmi==0.11.0.0 +pyinotify==0.9.6 +pyparsing==3.1.2 +pypresence==4.3.0 +python-dateutil==2.9.0.post0 +python-linux-procfs==0.7.4 +python-pskc==1.4 +python-xlib==0.33 +pyudev==0.24.4 +pyxdg==0.28 +referencing==0.36.2 +requests==2.32.5 +requests-file==3.0.0 +s3transfer==0.16.0 +SecretStorage==3.5.0 +sentry-sdk==2.48.0 +sepolicy==3.10 +setroubleshoot==3.3.36 +setuptools==80.10.2 +shtab==1.7.2 +six==1.17.0 +sos==4.11.1 +soupsieve==2.8.3 +tldextract==5.3.0 +typing_extensions==4.15.0 +typing-inspection==0.4.2 +udica==0.2.8 +umu-launcher==1.4.0 +urllib3==2.6.3 +yubikey-manager==5.9.1 diff --git a/wandb/run-20260503_121312-l9gn41e7/files/wandb-metadata.json b/wandb/run-20260503_121312-l9gn41e7/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..9e51dc673199bae7130c62a9ee8117f722b07aeb --- /dev/null +++ b/wandb/run-20260503_121312-l9gn41e7/files/wandb-metadata.json @@ -0,0 +1,41 @@ +{ + "os": "Linux-6.19.14-ogc1.1.fc44.x86_64-x86_64-with-glibc2.43", + "python": "CPython 3.14.4", + "startedAt": "2026-05-03T06:43:12.500309Z", + "args": [ + "--config", + "configs/training_config.yaml", + "--use-v2-loss" + ], + "program": "/run/media/morpheuslord/Personal_Files/Projects/Rewriter/scripts/train.py", + "codePath": "scripts/train.py", + "codePathLocal": "scripts/train.py", + "email": "morpheuslord@protonmail.com", + "root": "/run/media/morpheuslord/Personal_Files/Projects/Rewriter", + "host": "bazzite", + "executable": "/usr/bin/python3", + "cpu_count": 8, + "cpu_count_logical": 16, + "gpu": "NVIDIA GeForce RTX 3050 Laptop GPU", + "gpu_count": 1, + "disk": { + "/": { + "total": "65773568", + "used": "65773568" + } + }, + "memory": { + "total": "32939671552" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA GeForce RTX 3050 Laptop GPU", + "memoryTotal": "4294967296", + "cudaCores": 2048, + "architecture": "Ampere", + "uuid": "GPU-861554d7-d187-39e9-e77c-881f0287b963" + } + ], + "cudaVersion": "13.2", + "writerId": "n2byuziqh33uxapt3z4njafykopx599x" +} \ No newline at end of file diff --git a/wandb/run-20260503_121312-l9gn41e7/files/wandb-summary.json b/wandb/run-20260503_121312-l9gn41e7/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..02bc8ab8dcd01580790a0b4c2b85ec73c3df1769 --- /dev/null +++ b/wandb/run-20260503_121312-l9gn41e7/files/wandb-summary.json @@ -0,0 +1 @@ +{"_runtime":135,"_wandb":{"runtime":135}} \ No newline at end of file diff --git a/wandb/run-20260503_121312-l9gn41e7/logs/debug-core.log b/wandb/run-20260503_121312-l9gn41e7/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..d91ad140eefc27b6c2f07e07d15ce747b9420849 --- /dev/null +++ b/wandb/run-20260503_121312-l9gn41e7/logs/debug-core.log @@ -0,0 +1,15 @@ +{"time":"2026-05-03T12:13:12.704328718+05:30","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmptnlivxxp/port-19895.txt","pid":19895,"detached":false,"idle-timeout":600000000000,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-05-03T12:13:12.704638029+05:30","level":"INFO","msg":"server: will exit if parent process dies","ppid":19895} +{"time":"2026-05-03T12:13:12.704613944+05:30","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-19895-20161-909192779/socket","Net":"unix"}} +{"time":"2026-05-03T12:13:12.895219341+05:30","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-05-03T12:13:12.903468722+05:30","level":"INFO","msg":"handleInformInit: received","streamId":"l9gn41e7","id":"1(@)"} +{"time":"2026-05-03T12:13:13.524485517+05:30","level":"INFO","msg":"handleInformInit: stream started","streamId":"l9gn41e7","id":"1(@)"} +{"time":"2026-05-03T12:13:19.298983843+05:30","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"egnibh7cngge"} +{"time":"2026-05-03T12:15:29.360787755+05:30","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-05-03T12:15:29.360778638+05:30","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-05-03T12:15:29.360917849+05:30","level":"INFO","msg":"server is shutting down"} +{"time":"2026-05-03T12:15:29.36091827+05:30","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-05-03T12:15:29.361118786+05:30","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-19895-20161-909192779/socket","Net":"unix"}} +{"time":"2026-05-03T12:15:31.973757358+05:30","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-05-03T12:15:31.973810408+05:30","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-05-03T12:15:31.973826699+05:30","level":"INFO","msg":"server is closed"} diff --git a/wandb/run-20260503_121312-l9gn41e7/logs/debug-internal.log b/wandb/run-20260503_121312-l9gn41e7/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..2640ca225043336ae85a4c52f632776b2b3eb38d --- /dev/null +++ b/wandb/run-20260503_121312-l9gn41e7/logs/debug-internal.log @@ -0,0 +1,34 @@ +{"time":"2026-05-03T12:13:12.903713512+05:30","level":"INFO","msg":"wandb-core"} +{"time":"2026-05-03T12:13:12.904371789+05:30","level":"INFO","msg":"stream: starting","core version":"0.26.1"} +{"time":"2026-05-03T12:13:13.523626873+05:30","level":"INFO","msg":"stream: created new stream","id":"l9gn41e7"} +{"time":"2026-05-03T12:13:13.523845955+05:30","level":"INFO","msg":"handler: started"} +{"time":"2026-05-03T12:13:13.524435894+05:30","level":"INFO","msg":"stream: started"} +{"time":"2026-05-03T12:13:13.524517457+05:30","level":"INFO","msg":"sender: started"} +{"time":"2026-05-03T12:13:13.524509242+05:30","level":"INFO","msg":"writer: started","stream_id":"l9gn41e7"} +{"time":"2026-05-03T12:13:14.305743872+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"console_offset":0,"console_lines":1} +{"time":"2026-05-03T12:13:14.957490631+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-03T12:13:29.307031379+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":0,"events_lines":2,"console_offset":0,"console_lines":32,"uploaded_len":2} +{"time":"2026-05-03T12:13:29.80458961+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-03T12:13:44.305945099+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":2,"events_lines":2} +{"time":"2026-05-03T12:13:44.859254262+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-03T12:13:59.305602836+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":4,"events_lines":2} +{"time":"2026-05-03T12:13:59.807798592+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-03T12:14:14.306190841+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":6,"events_lines":2,"console_offset":31,"console_lines":1} +{"time":"2026-05-03T12:14:14.756636013+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-03T12:14:29.306552946+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":8,"events_lines":2,"console_offset":32,"console_lines":1} +{"time":"2026-05-03T12:14:29.714435713+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-03T12:14:44.30629215+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":10,"events_lines":2} +{"time":"2026-05-03T12:14:44.762224443+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-03T12:14:59.30683381+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":12,"events_lines":2} +{"time":"2026-05-03T12:14:59.917625721+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-03T12:15:14.30652758+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"events_offset":14,"events_lines":2} +{"time":"2026-05-03T12:15:14.86977836+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-03T12:15:29.305732373+05:30","level":"INFO","msg":"filestream: sending request","total_files":2,"events_offset":16,"events_lines":2,"console_offset":33,"console_lines":67} +{"time":"2026-05-03T12:15:29.818320218+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-03T12:15:31.456374773+05:30","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-05-03T12:15:31.456783239+05:30","level":"INFO","msg":"filestream: sending request","total_files":1,"uploaded_len":3,"complete":true,"exit_code":1} +{"time":"2026-05-03T12:15:31.968177675+05:30","level":"INFO","msg":"filestream: request sent","status":"200 OK"} +{"time":"2026-05-03T12:15:31.968663687+05:30","level":"INFO","msg":"stream: finishing up"} +{"time":"2026-05-03T12:15:31.96881477+05:30","level":"INFO","msg":"handler: closed"} +{"time":"2026-05-03T12:15:31.969316351+05:30","level":"INFO","msg":"sender: closed"} +{"time":"2026-05-03T12:15:31.969384819+05:30","level":"INFO","msg":"stream: all finished"} diff --git a/wandb/run-20260503_121312-l9gn41e7/logs/debug.log b/wandb/run-20260503_121312-l9gn41e7/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..6920e7e2b5ca39647967ebea947ccdda9e96932b --- /dev/null +++ b/wandb/run-20260503_121312-l9gn41e7/logs/debug.log @@ -0,0 +1,24 @@ +2026-05-03 12:13:12,504 INFO MainThread:19895 [wandb_setup.py:_flush():81] Current SDK version is 0.26.1 +2026-05-03 12:13:12,504 INFO MainThread:19895 [wandb_setup.py:_flush():81] Configure stats pid to 19895 +2026-05-03 12:13:12,504 INFO MainThread:19895 [wandb_setup.py:_flush():81] Loading settings from environment variables +2026-05-03 12:13:12,504 INFO MainThread:19895 [wandb_init.py:setup_run_log_directory():723] Logging user logs to /run/media/morpheuslord/Personal_Files/Projects/Rewriter/wandb/run-20260503_121312-l9gn41e7/logs/debug.log +2026-05-03 12:13:12,504 INFO MainThread:19895 [wandb_init.py:setup_run_log_directory():724] Logging internal logs to /run/media/morpheuslord/Personal_Files/Projects/Rewriter/wandb/run-20260503_121312-l9gn41e7/logs/debug-internal.log +2026-05-03 12:13:12,504 INFO MainThread:19895 [wandb_init.py:init():850] calling init triggers +2026-05-03 12:13:12,504 INFO MainThread:19895 [wandb_init.py:init():855] wandb.init called with sweep_config: {} +config: {'model': {'key': 'flan-t5-small', 'quantize': False, 'use_lora': True}, 'lora': {'r': 8, 'lora_alpha': 16, 'lora_dropout': 0.05, 'target_modules': ['q', 'v', 'k', 'o', 'wi_0', 'wi_1', 'wo']}, 'data': {'train_path': 'data/processed/train.jsonl', 'val_path': 'data/processed/val.jsonl', 'test_path': 'data/processed/test.jsonl', 'max_input_length': 128, 'max_target_length': 128, 'augment_synthetic': True, 'synthetic_ratio': 0.3}, 'training': {'output_dir': 'checkpoints/', 'num_train_epochs': 5, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8, 'gradient_accumulation_steps': 8, 'learning_rate': 0.0003, 'lr_scheduler_type': 'cosine', 'warmup_ratio': 0.05, 'weight_decay': 0.01, 'fp16': False, 'bf16': True, 'evaluation_strategy': 'steps', 'eval_steps': 100, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 3, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_loss', 'greater_is_better': False, 'logging_dir': 'logs/', 'logging_steps': 25, 'report_to': ['wandb', 'tensorboard'], 'dataloader_num_workers': 0, 'seed': 42, 'push_to_hub': False}, 'loss': {'lambda_style': 0.3, 'lambda_semantic': 0.5, 'lambda_human_pattern': 0.4, 'sem_model_name': 'all-mpnet-base-v2'}, 'generation': {'num_beams': 5, 'length_penalty': 1.0, 'no_repeat_ngram_size': 3, 'min_length': 10, 'max_new_tokens': 512, 'early_stopping': True}, 'human_pattern': {'classifier_path': 'checkpoints/human_pattern_classifier.pt', 'shanegerami_path': 'data/raw/shanegerami/AI_Human.csv', 'starblasters_path': 'data/raw/starblasters8/data.parquet', 'max_samples_per_source': 50000, 'pretrain_epochs': 20, 'pretrain_lr': 0.001, 'pretrain_batch_size': 512, 'target_auc': 0.88}, '_wandb': {}} +2026-05-03 12:13:12,504 INFO MainThread:19895 [wandb_init.py:init():898] starting backend +2026-05-03 12:13:12,895 INFO MainThread:19895 [wandb_init.py:init():913] sending inform_init request +2026-05-03 12:13:13,525 INFO MainThread:19895 [wandb_init.py:init():918] backend started and connected +2026-05-03 12:13:13,526 INFO MainThread:19895 [wandb_init.py:init():988] updated telemetry +2026-05-03 12:13:13,528 INFO MainThread:19895 [wandb_init.py:init():1011] communicating run to backend with 90.0 second timeout +2026-05-03 12:13:14,139 INFO MainThread:19895 [wandb_init.py:init():1056] starting run threads in backend +2026-05-03 12:13:14,295 INFO MainThread:19895 [wandb_run.py:_console_start():2554] atexit reg +2026-05-03 12:13:14,296 INFO MainThread:19895 [wandb_run.py:_redirect():2403] redirect: wrap_raw +2026-05-03 12:13:14,296 INFO MainThread:19895 [wandb_run.py:_redirect():2472] Wrapping output streams. +2026-05-03 12:13:14,296 INFO MainThread:19895 [wandb_run.py:_redirect():2495] Redirects installed. +2026-05-03 12:13:14,298 INFO MainThread:19895 [wandb_init.py:init():1094] run started, returning control to user process +2026-05-03 12:13:20,962 INFO MainThread:19895 [wandb_run.py:_config_callback():1415] config_cb None None {'peft_config': {'default': {'task_type': 'SEQ_2_SEQ_LM', 'peft_type': 'LORA', 'auto_mapping': None, 'peft_version': '0.19.1', 'base_model_name_or_path': 'google/flan-t5-small', 'revision': None, 'inference_mode': False, 'r': 8, 'target_modules': ['q', 'o', 'wi_0', 'wi_1', 'k', 'wo', 'v'], 'exclude_modules': None, 'lora_alpha': 16, 'lora_dropout': 0.05, 'fan_in_fan_out': False, 'bias': 'none', 'use_rslora': False, 'modules_to_save': None, 'init_lora_weights': True, 'layers_to_transform': None, 'layers_pattern': None, 'rank_pattern': {}, 'alpha_pattern': {}, 'megatron_config': None, 'megatron_core': 'megatron.core', 'trainable_token_indices': None, 'loftq_config': {}, 'eva_config': None, 'corda_config': None, 'lora_ga_config': None, 'use_dora': False, 'alora_invocation_tokens': None, 'use_qalora': False, 'qalora_group_size': 16, 'layer_replication': None, 'runtime_config': {'ephemeral_gpu_offload': False}, 'lora_bias': False, 'target_parameters': None, 'use_bdlora': None, 'arrow_config': None, 'ensure_weight_tying': False}}, 'vocab_size': 32128, 'd_model': 512, 'd_kv': 64, 'd_ff': 1024, 'num_layers': 8, 'num_decoder_layers': 8, 'num_heads': 6, 'relative_attention_num_buckets': 32, 'relative_attention_max_distance': 128, 'dropout_rate': 0.1, 'classifier_dropout': 0.0, 'layer_norm_epsilon': 1e-06, 'initializer_factor': 1.0, 'feed_forward_proj': 'gated-gelu', 'use_cache': True, 'dense_act_fn': 'gelu_new', 'is_gated_act': True, 'return_dict': True, 'output_hidden_states': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['T5ForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': 1, 'sep_token_id': None, 'decoder_start_token_id': 0, 'task_specific_params': {'summarization': {'early_stopping': True, 'length_penalty': 2.0, 'max_length': 200, 'min_length': 30, 'no_repeat_ngram_size': 3, 'num_beams': 4, 'prefix': 'summarize: '}, 'translation_en_to_de': {'early_stopping': True, 'max_length': 300, 'num_beams': 4, 'prefix': 'translate English to German: '}, 'translation_en_to_fr': {'early_stopping': True, 'max_length': 300, 'num_beams': 4, 'prefix': 'translate English to French: '}, 'translation_en_to_ro': {'early_stopping': True, 'max_length': 300, 'num_beams': 4, 'prefix': 'translate English to Romanian: '}}, 'problem_type': None, '_name_or_path': 'google/flan-t5-small', 'transformers_version': '4.53.2', 'model_type': 't5', 'n_positions': 512, 'output_past': True, 'output_attentions': False, 'output_dir': 'checkpoints/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 8, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 0.0003, 'weight_decay': 0.01, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5, 'max_steps': -1, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.05, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'logs/', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 3, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': True, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 100, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'checkpoints/', 'disable_tqdm': False, 'remove_unused_columns': False, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'eval_loss', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb', 'tensorboard'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '', 'hub_private_repo': None, 'hub_always_push': False, 'hub_revision': None, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'liger_kernel_config': None, 'eval_use_gather_object': False, 'average_tokens_across_devices': False} +2026-05-03 12:13:20,966 INFO MainThread:19895 [wandb_config.py:__setitem__():155] [no run ID] config set model/num_parameters = 78239104 - > +2026-05-03 12:13:20,966 INFO MainThread:19895 [wandb_run.py:_config_callback():1415] config_cb model/num_parameters 78239104 None +2026-05-03 12:15:29,360 INFO wandb-AsyncioManager-main:19895 [service_client.py:_forward_responses():134] Reached EOF. +2026-05-03 12:15:29,361 INFO wandb-AsyncioManager-main:19895 [mailbox.py:close():155] Closing mailbox, abandoning 1 handles.