Add files using upload-large-folder tool

Browse files

Files changed (12) hide show

v127rc_exp2/B_mup/700.yaml +5 -0
v127rc_exp2/B_mup/RUNME.sh +460 -0
v127rc_exp2/B_mup/checkpoint-13000/tokenizer_config.json +19 -0
v127rc_exp2/B_mup/checkpoint-13100/chat_template.jinja +85 -0
v127rc_exp2/B_mup/checkpoint-13100/tokenizer_config.json +19 -0
v127rc_exp2/B_mup/checkpoint-13100/trainer_state.json +0 -0
v127rc_exp2/B_mup/checkpoint-13200/README.md +208 -0
v127rc_exp2/B_mup/checkpoint-13200/adapter_config.json +46 -0
v127rc_exp2/B_mup/checkpoint-13200/chat_template.jinja +85 -0
v127rc_exp2/B_mup/checkpoint-13200/tokenizer_config.json +19 -0
v127rc_exp2/B_mup/checkpoint-13200/trainer_state.json +0 -0
v127rc_exp2/B_mup/trainer_log.jsonl +0 -0

v127rc_exp2/B_mup/700.yaml ADDED Viewed

	@@ -0,0 +1,5 @@

+model_name_or_path: /workspace/Qwen/Qwen3-8B
+adapter_name_or_path: /workspace/v127rc_exp2/B_mup/checkpoint-700
+template: qwen3_nothink
+infer_backend: huggingface
+trust_remote_code: true

v127rc_exp2/B_mup/RUNME.sh ADDED Viewed

	@@ -0,0 +1,460 @@

+#!/usr/bin/env bash
+set -euo pipefail
+# -----------------------------
+# User config
+# -----------------------------
+config="B_mup"
+CONFIG_DIR="/workspace/v127rc_exp2/${config}"
+# YAML generation defaults
+MODEL_NAME_OR_PATH="/workspace/Qwen/Qwen3-8B"
+TEMPLATE="qwen3_nothink"
+FINETUNING_TYPE="full"
+INFER_BACKEND="huggingface"
+TRUST_REMOTE_CODE="true"
+OVERRIDING_SYSTEM_PROMPT="You are an expert specialized in novels. Your task is to answer user's question based on your knowledge about Markie Voss."
+# Launch config
+BASE_PORT=8002
+MAX_TOKEN=2048
+SPECIFIC_CHECKPOINTS="" # e.g., "500 1000 1500" or leave empty for auto-discovery
+SLEEP_BETWEEN_LAUNCHES_SEC=10
+VRAM_THRESHOLD_PCT=1    # if GPU >= threshold after launch, try next GPU for next ckpt
+BATCH_MIN_MODELS=1       # start eval once at least this many services are up
+# Eval config (passed to python)
+PYTHON_EVAL="/workspace/v125rc_eval/run.py"
+EVAL_WORKING_DIR="/workspace/v125rc_eval"
+EVAL_SUBWORD=""
+FORBIDDEN_SUBWORDS_JSON="[]"
+PARTICULAR="Markie_Voss_ABQA_eval.json"
+SAVE_DIR="${CONFIG_DIR}"
+# Always stop services between batches to free VRAM
+STOP_SERVICES_BETWEEN_BATCHES="true"
+# -----------------------------
+# Setup logging
+# -----------------------------
+LOG_ROOT="${CONFIG_DIR}/logs"
+mkdir -p "${LOG_ROOT}/${config}"
+timestamp=$(date +"%Y%m%d_%H%M%S")
+# -----------------------------
+# Helpers
+# -----------------------------
+require_cmd() {
+  command -v "$1" >/dev/null 2>&1 || { echo "ERROR: missing command: $1" >&2; exit 1; }
+}
+require_cmd nvidia-smi
+require_cmd python
+require_cmd curl
+require_cmd sort
+require_cmd awk
+num_gpus() {
+  nvidia-smi -L | wc -l | awk '{print $1}'
+}
+gpu_mem_pct() {
+  local gpu="$1"
+  nvidia-smi --query-gpu=memory.used,memory.total --format=csv,noheader,nounits -i "${gpu}" \
+    | awk -F',' '{used=$1; total=$2; if (total==0) {print 100} else {printf("%d\n", (used/total)*100)} }'
+}
+launch_service () {
+  local gpu="$1"
+  local api_port="$2"
+  local yaml_path="$3"
+  local log_file="$4"
+  local pid_file="$5"
+  echo "Starting (GPU ${gpu}) port ${api_port} : ${yaml_path}"
+  echo "Log: ${log_file}"
+  API_PORT="${api_port}" CUDA_VISIBLE_DEVICES="${gpu}" \
+    llamafactory-cli api "${yaml_path}" \
+    > "${log_file}" 2>&1 &
+  echo $! > "${pid_file}"
+}
+wait_for_endpoint () {
+  local port="$1"
+  local url="http://localhost:${port}/v1/models"
+  local max_attempts="${2:-480}"   # default 240 * 2s = 8 minutes
+  for ((attempt=1; attempt<=max_attempts; attempt++)); do
+    # -f: fail on non-2xx, -sS: quiet but show errors, -m: timeout
+    if curl -f -sS -m 3 "${url}" >/dev/null 2>&1; then
+      echo "  ready: ${url} (attempt ${attempt}/${max_attempts})"
+      return 0
+    fi
+    if (( attempt % 20 == 0 )); then
+      echo "  waiting on ${url} (attempt ${attempt}/${max_attempts})"
+    fi
+    sleep 2
+  done
+  echo "ERROR: Endpoint did not become ready: ${url}" >&2
+  return 1
+}
+stop_batch_services () {
+  local pidfiles=("$@")
+  echo "Stopping batch services: ${#pidfiles[@]} processes"
+  for pf in "${pidfiles[@]}"; do
+    [[ -f "${pf}" ]] || continue
+    pid="$(cat "${pf}" || true)"
+    if [[ -n "${pid}" ]] && kill -0 "${pid}" >/dev/null 2>&1; then
+      kill "${pid}" || true
+    fi
+  done
+}
+# -----------------------------
+# Discover checkpoints
+# -----------------------------
+# discover_checkpoints_json () {
+#   shopt -s nullglob
+#   local ckpt_dirs=( "${CONFIG_DIR}"/checkpoint-* )
+#   # if (( ${#ckpt_dirs[@]} == 0 )); then
+#   #   echo "ERROR: No checkpoint-* folders found under: ${CONFIG_DIR}" >&2
+#   #   exit 1
+#   # fi
+#   if (( ${#ckpt_dirs[@]} == 0 )); then
+#     echo "[]"
+#     return 0
+#   fi
+#   mapfile -t ckpt_dirs < <(printf "%s\n" "${ckpt_dirs[@]}" | sort -V)
+#   local ckpts=()
+#   for ckpt_dir in "${ckpt_dirs[@]}"; do
+#     local base step
+#     base="$(basename "${ckpt_dir}")"
+#     step="${base#checkpoint-}"
+#     if [[ "${step}" =~ ^[0-9]+$ ]]; then
+#       ckpts+=( "${step}" )
+#     fi
+#   done
+#   local json="["
+#   for i in "${!ckpts[@]}"; do
+#     (( i>0 )) && json+=", "
+#     json+="${ckpts[$i]}"
+#   done
+#   json+="]"
+#   echo "${json}"
+# }
+discover_checkpoints_json () {
+  shopt -s nullglob
+  local ckpts=()
+  # Logic: If SPECIFIC_CHECKPOINTS is set, use those. Otherwise, glob the directory.
+  if [[ -n "${SPECIFIC_CHECKPOINTS}" ]]; then
+    echo "Info: Using specific checkpoints: ${SPECIFIC_CHECKPOINTS}" >&2
+    mapfile -t raw < <(echo "${SPECIFIC_CHECKPOINTS}" | tr ' ' '\n' | sort -n)
+    for step in "${raw[@]}"; do
+      if [[ -d "${CONFIG_DIR}/checkpoint-${step}" ]]; then
+        ckpts+=( "${step}" )
+      else
+        echo "WARN: specified checkpoint missing on disk, ignoring: checkpoint-${step}" >&2
+      fi
+    done
+  else
+    local ckpt_dirs=( "${CONFIG_DIR}"/checkpoint-* )
+    if (( ${#ckpt_dirs[@]} == 0 )); then
+      echo "[]"
+      return 0
+    fi
+    # Sort directories by version/number
+    mapfile -t ckpt_dirs < <(printf "%s\n" "${ckpt_dirs[@]}" | sort -V)
+    for ckpt_dir in "${ckpt_dirs[@]}"; do
+      local base step
+      base="$(basename "${ckpt_dir}")"
+      step="${base#checkpoint-}"
+      if [[ "${step}" =~ ^[0-9]+$ ]]; then
+        ckpts+=( "${step}" )
+      fi
+    done
+  fi
+  # Format as JSON array
+  local json="["
+  for i in "${!ckpts[@]}"; do
+    (( i>0 )) && json+=", "
+    json+="${ckpts[$i]}"
+  done
+  json+="]"
+  echo "${json}"
+}
+# -----------------------------
+# Compute which checkpoints still need launching (resume-aware)
+# -----------------------------
+compute_needed_checkpoints_json () {
+  local all_ckpts_json="$1"
+  python - "${all_ckpts_json}" <<'PY'
+import os, json, sys
+CONFIG_DIR = os.environ.get("CONFIG_DIR")
+SAVE_DIR = os.environ.get("SAVE_DIR", CONFIG_DIR)
+WORKING_DIR = os.environ.get("EVAL_WORKING_DIR")
+SUBWORD = os.environ.get("EVAL_SUBWORD", "")
+FORBIDDEN = json.loads(os.environ.get("FORBIDDEN_SUBWORDS_JSON", "[]"))
+PARTICULAR = os.environ.get("PARTICULAR", "")
+all_ckpts = json.loads(sys.argv[1])
+def should_process(fn: str) -> bool:
+    if SUBWORD and SUBWORD not in fn:
+        return False
+    if any(s in fn for s in FORBIDDEN):
+        return False
+    if PARTICULAR and PARTICULAR not in fn:
+        return False
+    return fn.endswith(".json")
+eval_files = sorted([fn for fn in os.listdir(WORKING_DIR) if should_process(fn)])
+if not eval_files:
+    print(json.dumps(all_ckpts))
+    raise SystemExit(0)
+def file_complete_for_ckpt(eval_file: str, ckpt: int) -> bool:
+    in_path = os.path.join(WORKING_DIR, eval_file)
+    out_path = os.path.join(SAVE_DIR, eval_file.replace(".json", "_results.json"))
+    if not os.path.exists(out_path):
+        return False
+    try:
+        with open(in_path, "r") as f:
+            in_data = json.load(f)
+        with open(out_path, "r") as f:
+            out_data = json.load(f)
+    except Exception:
+        return False
+    if not isinstance(in_data, list) or not isinstance(out_data, list):
+        return False
+    if len(out_data) != len(in_data):
+        return False
+    key = f"step_{ckpt}"
+    for e in out_data:
+        v = e.get(key) or {}
+        out = v.get("response", "")
+        if (not isinstance(out, str) or out.strip() == "") and v.get("retries", 0) < 3:
+            return False
+    return True
+needed = []
+for ckpt in all_ckpts:
+    done_everywhere = True
+    for ef in eval_files:
+        if not file_complete_for_ckpt(ef, ckpt):
+            done_everywhere = False
+            break
+    if not done_everywhere:
+        needed.append(ckpt)
+print(json.dumps(needed))
+PY
+}
+# -----------------------------
+# Generate YAML for one checkpoint
+# -----------------------------
+write_yaml_for_ckpt () {
+  local step="$1"
+  python - "${step}" <<'PY'
+import os, sys
+step = int(sys.argv[1])
+CONFIG_DIR = os.environ["CONFIG_DIR"]
+MODEL = os.environ["MODEL_NAME_OR_PATH"]
+TEMPLATE = os.environ["TEMPLATE"]
+FINETUNING_TYPE = os.environ["FINETUNING_TYPE"]
+INFER_BACKEND = os.environ["INFER_BACKEND"]
+TRUST_REMOTE_CODE = os.environ["TRUST_REMOTE_CODE"]
+ckpt_dir = os.path.join(CONFIG_DIR, f"checkpoint-{step}")
+if not os.path.isdir(ckpt_dir):
+    raise SystemExit(f"Missing checkpoint dir: {ckpt_dir}")
+name = f"{step//1000}k" if step % 1000 == 0 else str(step)
+yaml_path = os.path.join(CONFIG_DIR, f"{name}.yaml")
+with open(yaml_path, "w") as f:
+    f.write(
+        f"model_name_or_path: {MODEL}\n"
+        f"adapter_name_or_path: {ckpt_dir}\n"
+        f"template: {TEMPLATE}\n"
+        f"infer_backend: {INFER_BACKEND}\n"
+        f"trust_remote_code: {TRUST_REMOTE_CODE}\n"
+    )
+print(yaml_path)
+PY
+}
+# -----------------------------
+# Main (batch loop)
+# -----------------------------
+export CONFIG_DIR
+export MAX_TOKEN
+export SAVE_DIR
+export EVAL_WORKING_DIR
+export EVAL_SUBWORD
+export FORBIDDEN_SUBWORDS_JSON
+export PARTICULAR
+export OVERRIDING_SYSTEM_PROMPT
+export MODEL_NAME_OR_PATH
+export TEMPLATE
+export FINETUNING_TYPE
+export INFER_BACKEND
+export TRUST_REMOTE_CODE
+GPU_COUNT="$(num_gpus)"
+ALL_CKPTS_JSON="[]"
+echo "Detected GPUs: ${GPU_COUNT}"
+echo "All checkpoints found: ${ALL_CKPTS_JSON}"
+batch_idx=0
+while true; do
+  ALL_CKPTS_JSON="$(discover_checkpoints_json)"
+  echo "All checkpoints found (refreshed): ${ALL_CKPTS_JSON}"
+  NEEDED_CKPTS_JSON="$(compute_needed_checkpoints_json "${ALL_CKPTS_JSON}")"
+  echo "Still needed checkpoints: ${NEEDED_CKPTS_JSON}"
+  if [[ "${NEEDED_CKPTS_JSON}" == "[]" ]]; then
+    echo "All checkpoints complete across outputs. Done."
+    runpodctl remove pod $RUNPOD_POD_ID   # Add this line (or 'stop' instead of 'remove')
+    exit 0
+  fi
+  # if [[ "${NEEDED_CKPTS_JSON}" == "[]" ]]; then
+  #   echo "No pending checkpoints right now. Waiting for new checkpoints..."
+  #   sleep 60
+  #   continue
+  # fi
+  batch_idx=$((batch_idx + 1))
+  echo "=============================="
+  echo "Batch ${batch_idx}: launching what fits under VRAM threshold (${VRAM_THRESHOLD_PCT}%)"
+  echo "=============================="
+  # Parse needed list into bash array
+  mapfile -t NEEDED_LIST < <(python - "${NEEDED_CKPTS_JSON}" <<'PY'
+import json, sys
+for x in json.loads(sys.argv[1]):
+    print(int(x))
+PY
+)
+  MODELS_JSON="{"
+  first=1
+  launched=0
+  # track launched service pidfiles to stop after batch
+  batch_pidfiles=()
+  port="${BASE_PORT}"
+  gpu=0
+  for ckpt in "${NEEDED_LIST[@]}"; do
+    # Find a GPU with headroom; if none, stop launching more in this batch.
+    found_gpu="false"
+    for ((try=0; try<GPU_COUNT; try++)); do
+      pct="$(gpu_mem_pct "${gpu}")"
+      if (( pct < VRAM_THRESHOLD_PCT )); then
+        found_gpu="true"
+        break
+      fi
+      gpu=$((gpu + 1))
+      if (( gpu >= GPU_COUNT )); then gpu=0; fi
+    done
+    if [[ "${found_gpu}" != "true" ]]; then
+      echo "No GPU under ${VRAM_THRESHOLD_PCT}% VRAM. Stop launching; start eval with current batch."
+      break
+    fi
+    ckpt_dir="${CONFIG_DIR}/checkpoint-${ckpt}"
+    if [[ ! -d "${ckpt_dir}" ]]; then
+      echo "WARN: skipping missing checkpoint dir: ${ckpt_dir}" >&2
+      continue
+    fi
+    yaml_path="$(write_yaml_for_ckpt "${ckpt}")"
+    tag="$(basename "${yaml_path}" .yaml)"
+    log_file="${LOG_ROOT}/${config}/${tag}_port${port}_gpu${gpu}_${timestamp}_batch${batch_idx}.log"
+    pid_file="${log_file}.pid"
+    launch_service "${gpu}" "${port}" "${yaml_path}" "${log_file}" "${pid_file}"
+    batch_pidfiles+=( "${pid_file}" )
+    if ! wait_for_endpoint "${port}"; then
+      echo "Endpoint failed on port ${port}; stopping batch and exiting."
+      stop_batch_services "${batch_pidfiles[@]}"
+      exit 1
+    fi
+    url="http://localhost:${port}/v1/chat/completions"
+    if (( first == 1 )); then
+      MODELS_JSON+="\"${url}\": ${ckpt}"
+      first=0
+    else
+      MODELS_JSON+=", \"${url}\": ${ckpt}"
+    fi
+    launched=$((launched + 1))
+    pct_after="$(gpu_mem_pct "${gpu}")"
+    echo "GPU ${gpu} VRAM after launch: ${pct_after}%"
+    if (( pct_after >= VRAM_THRESHOLD_PCT )); then
+      gpu=$((gpu + 1))
+      if (( gpu >= GPU_COUNT )); then gpu=0; fi
+    fi
+    port=$((port + 1))
+    echo "Sleeping ${SLEEP_BETWEEN_LAUNCHES_SEC}s to avoid VRAM spikes..."
+    sleep "${SLEEP_BETWEEN_LAUNCHES_SEC}"
+  done
+  MODELS_JSON+="}"
+  echo "Launched models in batch ${batch_idx}: ${launched}"
+  echo "MODELS_JSON=${MODELS_JSON}"
+  if (( launched < BATCH_MIN_MODELS )); then
+    echo "ERROR: Could not launch even ${BATCH_MIN_MODELS} model(s) under VRAM threshold."
+    echo "Either increase VRAM_THRESHOLD_PCT, reduce model size, or free VRAM."
+    exit 1
+  fi
+  # Run eval for this batch
+  export MODELS_JSON
+  export CKPTS_JSON="[]"        # unused when MODELS_JSON exists, but keep it defined
+  export BASE_PORT="${BASE_PORT}"
+  echo "Running eval for batch ${batch_idx}: python ${PYTHON_EVAL}"
+  python "${PYTHON_EVAL}"
+  # Stop services to free VRAM for next batch
+  if [[ "${STOP_SERVICES_BETWEEN_BATCHES}" == "true" ]]; then
+    stop_batch_services "${batch_pidfiles[@]}"
+    echo "Batch ${batch_idx} services stopped."
+    # give GPU a moment to release memory
+    sleep 5
+  else
+    echo "Leaving batch services running (not recommended for batch mode)."
+    echo "This may prevent future batches from launching due to VRAM saturation."
+  fi
+done

v127rc_exp2/B_mup/checkpoint-13000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+  "add_prefix_space": false,
+  "backend": "tokenizers",
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": [
+    "<think>",
+    "</think>"
+  ],
+  "is_local": true,
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "right",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

v127rc_exp2/B_mup/checkpoint-13100/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,85 @@

+{%- if tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if messages[0].role == 'system' %}
+        {{- messages[0].content + '\n\n' }}
+    {%- endif %}
+    {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
+{%- else %}
+    {%- if messages[0].role == 'system' %}
+        {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+    {%- set index = (messages|length - 1) - loop.index0 %}
+    {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
+        {%- set ns.multi_step_tool = false %}
+        {%- set ns.last_query_index = index %}
+    {%- endif %}
+{%- endfor %}
+{%- for message in messages %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
+        {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {%- set content = message.content %}
+        {%- set reasoning_content = '' %}
+        {%- if message.reasoning_content is defined and message.reasoning_content is not none %}
+            {%- set reasoning_content = message.reasoning_content %}
+        {%- else %}
+            {%- if '</think>' in message.content %}
+                {%- set content = message.content.split('</think>')[-1].lstrip('\n') %}
+                {%- set reasoning_content = message.content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+            {%- endif %}
+        {%- endif %}
+        {%- if loop.index0 > ns.last_query_index %}
+            {%- if loop.last or (not loop.last and reasoning_content) %}
+                {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
+            {%- else %}
+                {{- '<|im_start|>' + message.role + '\n' + content }}
+            {%- endif %}
+        {%- else %}
+            {{- '<|im_start|>' + message.role + '\n' + content }}
+        {%- endif %}
+        {%- if message.tool_calls %}
+            {%- for tool_call in message.tool_calls %}
+                {%- if (loop.first and content) or (not loop.first) %}
+                    {{- '\n' }}
+                {%- endif %}
+                {%- if tool_call.function %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {{- '<tool_call>\n{"name": "' }}
+                {{- tool_call.name }}
+                {{- '", "arguments": ' }}
+                {%- if tool_call.arguments is string %}
+                    {{- tool_call.arguments }}
+                {%- else %}
+                    {{- tool_call.arguments | tojson }}
+                {%- endif %}
+                {{- '}\n</tool_call>' }}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+            {{- '<|im_start|>user' }}
+        {%- endif %}
+        {{- '\n<tool_response>\n' }}
+        {{- message.content }}
+        {{- '\n</tool_response>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+    {%- if enable_thinking is defined and enable_thinking is false %}
+        {{- '<think>\n\n</think>\n\n' }}
+    {%- endif %}
+{%- endif %}

v127rc_exp2/B_mup/checkpoint-13100/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+  "add_prefix_space": false,
+  "backend": "tokenizers",
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": [
+    "<think>",
+    "</think>"
+  ],
+  "is_local": true,
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "right",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

v127rc_exp2/B_mup/checkpoint-13100/trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

v127rc_exp2/B_mup/checkpoint-13200/README.md ADDED Viewed

	@@ -0,0 +1,208 @@

+---
+base_model: /workspace/Qwen/Qwen3-8B-Base
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:/workspace/Qwen/Qwen3-8B-Base
+- llama-factory
+- lora
+- transformers
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.18.1

v127rc_exp2/B_mup/checkpoint-13200/adapter_config.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "/workspace/Qwen/Qwen3-8B-Base",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.03,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.18.1",
+  "qalora_group_size": 16,
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "down_proj",
+    "up_proj",
+    "gate_proj",
+    "k_proj",
+    "o_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

v127rc_exp2/B_mup/checkpoint-13200/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,85 @@

+{%- if tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if messages[0].role == 'system' %}
+        {{- messages[0].content + '\n\n' }}
+    {%- endif %}
+    {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
+{%- else %}
+    {%- if messages[0].role == 'system' %}
+        {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+    {%- set index = (messages|length - 1) - loop.index0 %}
+    {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
+        {%- set ns.multi_step_tool = false %}
+        {%- set ns.last_query_index = index %}
+    {%- endif %}
+{%- endfor %}
+{%- for message in messages %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
+        {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {%- set content = message.content %}
+        {%- set reasoning_content = '' %}
+        {%- if message.reasoning_content is defined and message.reasoning_content is not none %}
+            {%- set reasoning_content = message.reasoning_content %}
+        {%- else %}
+            {%- if '</think>' in message.content %}
+                {%- set content = message.content.split('</think>')[-1].lstrip('\n') %}
+                {%- set reasoning_content = message.content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+            {%- endif %}
+        {%- endif %}
+        {%- if loop.index0 > ns.last_query_index %}
+            {%- if loop.last or (not loop.last and reasoning_content) %}
+                {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
+            {%- else %}
+                {{- '<|im_start|>' + message.role + '\n' + content }}
+            {%- endif %}
+        {%- else %}
+            {{- '<|im_start|>' + message.role + '\n' + content }}
+        {%- endif %}
+        {%- if message.tool_calls %}
+            {%- for tool_call in message.tool_calls %}
+                {%- if (loop.first and content) or (not loop.first) %}
+                    {{- '\n' }}
+                {%- endif %}
+                {%- if tool_call.function %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {{- '<tool_call>\n{"name": "' }}
+                {{- tool_call.name }}
+                {{- '", "arguments": ' }}
+                {%- if tool_call.arguments is string %}
+                    {{- tool_call.arguments }}
+                {%- else %}
+                    {{- tool_call.arguments | tojson }}
+                {%- endif %}
+                {{- '}\n</tool_call>' }}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+            {{- '<|im_start|>user' }}
+        {%- endif %}
+        {{- '\n<tool_response>\n' }}
+        {{- message.content }}
+        {{- '\n</tool_response>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+    {%- if enable_thinking is defined and enable_thinking is false %}
+        {{- '<think>\n\n</think>\n\n' }}
+    {%- endif %}
+{%- endif %}

v127rc_exp2/B_mup/checkpoint-13200/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+  "add_prefix_space": false,
+  "backend": "tokenizers",
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": [
+    "<think>",
+    "</think>"
+  ],
+  "is_local": true,
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "right",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

v127rc_exp2/B_mup/checkpoint-13200/trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

v127rc_exp2/B_mup/trainer_log.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff