isaacOnline commited on
Commit
c99c5be
1 Parent(s): 0150d00

Training in progress, step 20

Browse files
adapter_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.1",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
@@ -19,13 +19,13 @@
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
 
 
22
  "gate_proj",
23
- "up_proj",
24
- "k_proj",
25
  "o_proj",
26
- "q_proj",
27
  "down_proj",
28
- "v_proj"
29
  ],
30
  "task_type": "CAUSAL_LM",
31
  "use_rslora": false
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
 
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
+ "q_proj",
23
+ "v_proj",
24
  "gate_proj",
 
 
25
  "o_proj",
26
+ "k_proj",
27
  "down_proj",
28
+ "up_proj"
29
  ],
30
  "task_type": "CAUSAL_LM",
31
  "use_rslora": false
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2828a8d8cc8143f95f71c5d6c3d69a0b326c00f646918638e514ea247d91697
3
- size 335604696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a807916716022886fd00b6a7211ea6af15d53450b76ffa82b79a0c7d9a90d3b
3
+ size 319876032
qual_clasification.log CHANGED
@@ -67,3 +67,10 @@
67
  2024-02-12 02:35:11,284 - INFO - __main__ - Loaded Model ID: mistralai/Mistral-7B-Instruct-v0.1
68
  2024-02-12 02:35:12,376 - INFO - __main__ - Loaded LoRA Model
69
  2024-02-12 02:35:12,958 - INFO - __main__ - Instantiated Trainer
 
 
 
 
 
 
 
 
67
  2024-02-12 02:35:11,284 - INFO - __main__ - Loaded Model ID: mistralai/Mistral-7B-Instruct-v0.1
68
  2024-02-12 02:35:12,376 - INFO - __main__ - Loaded LoRA Model
69
  2024-02-12 02:35:12,958 - INFO - __main__ - Instantiated Trainer
70
+ 2024-02-12 07:21:35,467 - INFO - __main__ - Completed fine-tuning
71
+ 2024-02-12 07:21:37,814 - INFO - __main__ - Saved model and tokenizer to machine_learning/llm_finetune_models/0
72
+ 2024-02-12 07:21:40,237 - INFO - __main__ - Saved model to hub
73
+ 2024-02-12 07:23:23,882 - INFO - __main__ - Completed EM Metrics evaluation
74
+ 2024-02-12 07:24:49,119 - INFO - __main__ - Loaded Model ID: meta-llama/Llama-2-7b-chat-hf
75
+ 2024-02-12 07:24:50,284 - INFO - __main__ - Loaded LoRA Model
76
+ 2024-02-12 07:24:51,020 - INFO - __main__ - Instantiated Trainer
qual_clasification_outputs.csv CHANGED
@@ -48,7 +48,7 @@ to enroll 198 subjects and randomize them to one of three treatment groups that
48
 
49
 
50
  Qualitative Keywords:
51
- diary",-1,QUANTITATIVE,QUALITATIVE,NON_QUALITATIVE,a 12-week weight loss phase followed,UNRESOLVABLE
52
  4,"Grant Abstract:
53
 
54
 
@@ -141,14 +141,14 @@ science and technology as forms of political thought and practice.
141
  Qualitative Keywords:
142
  case studies",-1,QUALITATIVE,QUALITATIVE,QUALITATIVE,".
143
 
144
- The study also includes a discussion of",UNRESOLVABLE
145
  7,"Grant Abstract:
146
  Using ethnographic and archival methods, this study will develop a comparative case analysis of two successful long-term cyberinfrastructures that have been supporting scientific research for nearly thirty years, in ecology and medical science: the Long-term Ecological Research Network (LTER) and the Multi-Center AIDS Cohort Study (MACS). Archival research will provide a backdrop to current events that will be investigated ethnographically. The project addresses a significant gap in comparative studies of infrastructure, refocusing attention on the implications of the always-changing technologies, sociotechnical-organization, and institutional environments that make up contemporary research infrastructures. The central research questions is: How are ""old organizations"" renewing themselves to sustain value for ""new science""? By investigating the past and present of cyberinfrastructures that have weathered many transformations, this research seeks to inform future cyberinfrastructure development efforts. The findings will identify successful ""strategies of the long-term"" -- organizational forms and methods of design with a track-record of facilitating responsiveness to change. These insights will be a contribution to the fields of Science and Technology Studies (STS), science policy, organization science, and the sociotechnical design of research infrastructure.
147
 
148
  The purpose of this research is to understand the challenges that long-term scientific organizations face over time, and the strategies they employ to manage these challenges. The development of scientific research infrastructure is central to the NSF?s vision of science; however there has as yet been little or sporadic empirical attention to the dynamics of flexible long-term infrastructure in the face of changing social organization, information technology (IT) and scientific interests. This research will fill that gap, contributing to new, practical, and boundary-spanning knowledge about the characteristics of infrastructure in the making and over the long-term. This project will inform efforts to build more open, effective, and sustainable cyberinfrastructure in the sciences, leading to smarter and more sustainable investment and design choices on the part of cyberinfrastructure project leaders, participants, tool builders, and funders. This research will also inform science policy and regulatory environments, to help foster a sustainable and productive research infrastructure across multiple fields of inquiry.
149
 
150
  Qualitative Keywords:
151
- archival research; ethnographic",-1,QUALITATIVE,QUALITATIVE,QUALITATIVE,also contribute to the development of a new methodology,UNRESOLVABLE
152
  8,"Grant Abstract:
153
 
154
  DESCRIPTION (provided by applicant): Tooth decay in 2 to 11 year olds is increasing nationally especially among poor and minority populations, and it is unlikely that Healthy People 2020 goals will be met. Dental screening of children in public elementary schools has been mandated in 12 states and endorsed by U.S. and international organizations. Nevertheless, in spite of enthusiasm for the concept, screening and referral has been ineffective as a tool to get parents and caregivers to take children to the dentist. This application is to support planning of a Phase II randomized clinical trial to assess the effectiveness of a referral approach to increase dental attendance (for receipt of care) among inner-city urban elementary school children with urgent/restorative needs. The intervention to be tested relies on two theoretical frameworks -Common Sense Model of Self-Regulation (CSM), and the theory of Planned Behavior (TPB) that could be used in conjunction with child-level interventions in a school-based program. The objectives of the R34 study will occur in two phases: (1) pilot phase to validate the modified illness perception questionnaire -Revised (IPQ-RD) for dental caries, acceptability and delivery of the intervention, mediators and moderators for accessing care; focus groups of community organizations and providers to identify enabling resources to be included in the experimental intervention; and testing the experimental intervention regarding appropriateness/accuracy of educational materials; (2) planning phase primarily devoted to the major planning activities required for a U01 submission including development of the study protocol, manual of operations, and Institutional Review Board application and its associated materials. The sample size for the R34 includes 120 caregivers of KG, first, and second grade school children recruited from among 5 elementary schools in an urban inner-city school district in Cleveland, OH, and 10 to 20 key informants from several community agencies or providers. Data collection involves semi-structured one-to-one interviews and questionnaire responses for caregivers and focus groups for community/provider informants. Data analysis will utilize a mixed method design (grounded theory and theory driven) for qualitative data, and descriptive/analytical statistics for quantitative data. This behavioral approach has the potential to change the public health standard of practice for screening programs by utilizing a cost-effective, easily transportable, and sustainable referral approach. The transdiscipilinary research team includes faculty from Department of Community Dentistry at Case Western Reserve and the Northwest Center to Reduce Oral Health Disparities at the University of Washington.
@@ -230,7 +230,7 @@ Technical Summary
230
  Specifically, this project addresses how local and national women's organizations in post-conflict areas of West Africa work to advocate for women's security. To do so, this study uses participant observation and interviews in Guinea and Mali (building on prior research in Cote d'Ivoire) to reveal how international resolutions impact local actions and also whether and how local women's priorities can shape international policies. Analyzing policies on women's security and the discourses that sustain them through macro-, meso-, and micro-level lenses will reveal how local and regional women's community organizations define their security goals and priorities, translate and localize the international agendas of the UN Security Council and transnational NGOs as well as the national agendas of governments, and work with local women to achieve peace and security. By examining the conflicts in Guinea and Mali, this project will develop a framework of how to understand women's needs in their communities and in national peacebuilding and reconstruction projects, underscoring that historical, political, and cultural contexts are important in developing successful, appropriate security policies.
231
 
232
  Qualitative Keywords:
233
- ",-1,QUALITATIVE,QUALITATIVE,QUALITATIVE,"Council and other international organizations, and navigate the politics",UNRESOLVABLE
234
  18,"Grant Abstract:
235
  The late Paleozoic Ice Age is the longest-lived glacial interval of the past half billion years, sharing many parallels with the current icehouse in which humans evolved and live. During the Paleozoic icehouse (340 to 290 million years ago) atmospheric oxygen and carbon levels were anomalous, life moved onto land, the first wooded forests expanded across the Earth, and large-scale wildfires emerged. Thus, it is important that we develop a more complete understanding of the late Paleozoic Earth and its dynamic glaciation history. The glacial-marine Parana Basin, Brazil provides an ideal opportunity for this work and the project will include training for U.S. and Brazilian students through seminars, field and lab work, and exchange visits.
236
 
 
48
 
49
 
50
  Qualitative Keywords:
51
+ diary",-1,QUANTITATIVE,QUALITATIVE,NON_QUALITATIVE,a 12-month follow-up period,UNRESOLVABLE
52
  4,"Grant Abstract:
53
 
54
 
 
141
  Qualitative Keywords:
142
  case studies",-1,QUALITATIVE,QUALITATIVE,QUALITATIVE,".
143
 
144
+ The study's central argument is",UNRESOLVABLE
145
  7,"Grant Abstract:
146
  Using ethnographic and archival methods, this study will develop a comparative case analysis of two successful long-term cyberinfrastructures that have been supporting scientific research for nearly thirty years, in ecology and medical science: the Long-term Ecological Research Network (LTER) and the Multi-Center AIDS Cohort Study (MACS). Archival research will provide a backdrop to current events that will be investigated ethnographically. The project addresses a significant gap in comparative studies of infrastructure, refocusing attention on the implications of the always-changing technologies, sociotechnical-organization, and institutional environments that make up contemporary research infrastructures. The central research questions is: How are ""old organizations"" renewing themselves to sustain value for ""new science""? By investigating the past and present of cyberinfrastructures that have weathered many transformations, this research seeks to inform future cyberinfrastructure development efforts. The findings will identify successful ""strategies of the long-term"" -- organizational forms and methods of design with a track-record of facilitating responsiveness to change. These insights will be a contribution to the fields of Science and Technology Studies (STS), science policy, organization science, and the sociotechnical design of research infrastructure.
147
 
148
  The purpose of this research is to understand the challenges that long-term scientific organizations face over time, and the strategies they employ to manage these challenges. The development of scientific research infrastructure is central to the NSF?s vision of science; however there has as yet been little or sporadic empirical attention to the dynamics of flexible long-term infrastructure in the face of changing social organization, information technology (IT) and scientific interests. This research will fill that gap, contributing to new, practical, and boundary-spanning knowledge about the characteristics of infrastructure in the making and over the long-term. This project will inform efforts to build more open, effective, and sustainable cyberinfrastructure in the sciences, leading to smarter and more sustainable investment and design choices on the part of cyberinfrastructure project leaders, participants, tool builders, and funders. This research will also inform science policy and regulatory environments, to help foster a sustainable and productive research infrastructure across multiple fields of inquiry.
149
 
150
  Qualitative Keywords:
151
+ archival research; ethnographic",-1,QUALITATIVE,QUALITATIVE,QUALITATIVE,also contribute to the development of a new research agenda,UNRESOLVABLE
152
  8,"Grant Abstract:
153
 
154
  DESCRIPTION (provided by applicant): Tooth decay in 2 to 11 year olds is increasing nationally especially among poor and minority populations, and it is unlikely that Healthy People 2020 goals will be met. Dental screening of children in public elementary schools has been mandated in 12 states and endorsed by U.S. and international organizations. Nevertheless, in spite of enthusiasm for the concept, screening and referral has been ineffective as a tool to get parents and caregivers to take children to the dentist. This application is to support planning of a Phase II randomized clinical trial to assess the effectiveness of a referral approach to increase dental attendance (for receipt of care) among inner-city urban elementary school children with urgent/restorative needs. The intervention to be tested relies on two theoretical frameworks -Common Sense Model of Self-Regulation (CSM), and the theory of Planned Behavior (TPB) that could be used in conjunction with child-level interventions in a school-based program. The objectives of the R34 study will occur in two phases: (1) pilot phase to validate the modified illness perception questionnaire -Revised (IPQ-RD) for dental caries, acceptability and delivery of the intervention, mediators and moderators for accessing care; focus groups of community organizations and providers to identify enabling resources to be included in the experimental intervention; and testing the experimental intervention regarding appropriateness/accuracy of educational materials; (2) planning phase primarily devoted to the major planning activities required for a U01 submission including development of the study protocol, manual of operations, and Institutional Review Board application and its associated materials. The sample size for the R34 includes 120 caregivers of KG, first, and second grade school children recruited from among 5 elementary schools in an urban inner-city school district in Cleveland, OH, and 10 to 20 key informants from several community agencies or providers. Data collection involves semi-structured one-to-one interviews and questionnaire responses for caregivers and focus groups for community/provider informants. Data analysis will utilize a mixed method design (grounded theory and theory driven) for qualitative data, and descriptive/analytical statistics for quantitative data. This behavioral approach has the potential to change the public health standard of practice for screening programs by utilizing a cost-effective, easily transportable, and sustainable referral approach. The transdiscipilinary research team includes faculty from Department of Community Dentistry at Case Western Reserve and the Northwest Center to Reduce Oral Health Disparities at the University of Washington.
 
230
  Specifically, this project addresses how local and national women's organizations in post-conflict areas of West Africa work to advocate for women's security. To do so, this study uses participant observation and interviews in Guinea and Mali (building on prior research in Cote d'Ivoire) to reveal how international resolutions impact local actions and also whether and how local women's priorities can shape international policies. Analyzing policies on women's security and the discourses that sustain them through macro-, meso-, and micro-level lenses will reveal how local and regional women's community organizations define their security goals and priorities, translate and localize the international agendas of the UN Security Council and transnational NGOs as well as the national agendas of governments, and work with local women to achieve peace and security. By examining the conflicts in Guinea and Mali, this project will develop a framework of how to understand women's needs in their communities and in national peacebuilding and reconstruction projects, underscoring that historical, political, and cultural contexts are important in developing successful, appropriate security policies.
231
 
232
  Qualitative Keywords:
233
+ ",-1,QUALITATIVE,QUALITATIVE,QUALITATIVE,"Council and other international organizations, and work to implement",UNRESOLVABLE
234
  18,"Grant Abstract:
235
  The late Paleozoic Ice Age is the longest-lived glacial interval of the past half billion years, sharing many parallels with the current icehouse in which humans evolved and live. During the Paleozoic icehouse (340 to 290 million years ago) atmospheric oxygen and carbon levels were anomalous, life moved onto land, the first wooded forests expanded across the Earth, and large-scale wildfires emerged. Thus, it is important that we develop a more complete understanding of the late Paleozoic Earth and its dynamic glaciation history. The glacial-marine Parana Basin, Brazil provides an ideal opportunity for this work and the project will include training for U.S. and Brazilian students through seminars, field and lab work, and exchange visits.
236
 
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -27,16 +27,15 @@
27
  "special": true
28
  }
29
  },
30
- "additional_special_tokens": [],
31
  "bos_token": "<s>",
32
- "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token + ' ' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
33
  "clean_up_tokenization_spaces": false,
34
  "eos_token": "</s>",
35
- "legacy": true,
36
  "model_max_length": 1000000000000000019884624838656,
37
  "pad_token": "</s>",
 
38
  "sp_model_kwargs": {},
39
- "spaces_between_special_tokens": false,
40
  "tokenizer_class": "LlamaTokenizer",
41
  "unk_token": "<unk>",
42
  "use_default_system_prompt": false
 
27
  "special": true
28
  }
29
  },
 
30
  "bos_token": "<s>",
31
+ "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}",
32
  "clean_up_tokenization_spaces": false,
33
  "eos_token": "</s>",
34
+ "legacy": false,
35
  "model_max_length": 1000000000000000019884624838656,
36
  "pad_token": "</s>",
37
+ "padding_side": "right",
38
  "sp_model_kwargs": {},
 
39
  "tokenizer_class": "LlamaTokenizer",
40
  "unk_token": "<unk>",
41
  "use_default_system_prompt": false
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a9b30680f8d9696037627ad6db6b69218b1086aa99c3ae04edd0eba306539e7
3
  size 4728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e499891b5c3f144254a4182193578d95c296da086bf8d488d78e9e36207244e6
3
  size 4728