|
--- |
|
base_model: sentence-transformers/all-mpnet-base-v2 |
|
datasets: |
|
- code-search-net/code_search_net |
|
language: |
|
- code |
|
library_name: sentence-transformers |
|
metrics: |
|
- pearson_cosine |
|
- spearman_cosine |
|
- pearson_manhattan |
|
- spearman_manhattan |
|
- pearson_euclidean |
|
- spearman_euclidean |
|
- pearson_dot |
|
- spearman_dot |
|
- pearson_max |
|
- spearman_max |
|
pipeline_tag: sentence-similarity |
|
tags: |
|
- sentence-transformers |
|
- sentence-similarity |
|
- feature-extraction |
|
- generated_from_trainer |
|
- dataset_size:20000 |
|
- loss:CoSENTLoss |
|
- loss:MultipleNegativesRankingLoss |
|
widget: |
|
- source_sentence: KeypointsOnImage.to_xy_array |
|
sentences: |
|
- "def to_xy_array(self):\n \"\"\"\n Convert keypoint coordinates\ |
|
\ to ``(N,2)`` array.\n\n Returns\n -------\n (N, 2) ndarray\n\ |
|
\ Array containing the coordinates of all keypoints.\n Shape\ |
|
\ is ``(N,2)`` with coordinates in xy-form.\n\n \"\"\"\n result\ |
|
\ = np.zeros((len(self.keypoints), 2), dtype=np.float32)\n for i, keypoint\ |
|
\ in enumerate(self.keypoints):\n result[i, 0] = keypoint.x\n \ |
|
\ result[i, 1] = keypoint.y\n return result" |
|
- "def _generateMetricSpecs(options):\n \"\"\" Generates the Metrics for a given\ |
|
\ InferenceType\n\n Parameters:\n -------------------------------------------------------------------------\n\ |
|
\ options: ExpGenerator options\n retval: (metricsList, optimizeMetricLabel)\n\ |
|
\ metricsList: list of metric string names\n optimizeMetricLabel:\ |
|
\ Name of the metric which to optimize over\n\n \"\"\"\n inferenceType = options['inferenceType']\n\ |
|
\ inferenceArgs = options['inferenceArgs']\n predictionSteps = inferenceArgs['predictionSteps']\n\ |
|
\ metricWindow = options['metricWindow']\n if metricWindow is None:\n metricWindow\ |
|
\ = int(Configuration.get(\"nupic.opf.metricWindow\"))\n\n metricSpecStrings\ |
|
\ = []\n optimizeMetricLabel = \"\"\n\n # -----------------------------------------------------------------------\n\ |
|
\ # Generate the metrics specified by the expGenerator paramters\n metricSpecStrings.extend(_generateExtraMetricSpecs(options))\n\ |
|
\n # -----------------------------------------------------------------------\n\ |
|
\n optimizeMetricSpec = None\n # If using a dynamically computed prediction\ |
|
\ steps (i.e. when swarming\n # over aggregation is requested), then we will\ |
|
\ plug in the variable\n # predictionSteps in place of the statically provided\ |
|
\ predictionSteps\n # from the JSON description.\n if options['dynamicPredictionSteps']:\n\ |
|
\ assert len(predictionSteps) == 1\n predictionSteps = ['$REPLACE_ME']\n\ |
|
\n # -----------------------------------------------------------------------\n\ |
|
\ # Metrics for temporal prediction\n if inferenceType in (InferenceType.TemporalNextStep,\n\ |
|
\ InferenceType.TemporalAnomaly,\n \ |
|
\ InferenceType.TemporalMultiStep,\n InferenceType.NontemporalMultiStep,\n\ |
|
\ InferenceType.NontemporalClassification,\n \ |
|
\ 'MultiStep'):\n\n predictedFieldName, predictedFieldType = _getPredictedField(options)\n\ |
|
\ isCategory = _isCategory(predictedFieldType)\n metricNames = ('avg_err',)\ |
|
\ if isCategory else ('aae', 'altMAPE')\n trivialErrorMetric = 'avg_err' if\ |
|
\ isCategory else 'altMAPE'\n oneGramErrorMetric = 'avg_err' if isCategory\ |
|
\ else 'altMAPE'\n movingAverageBaselineName = 'moving_mode' if isCategory\ |
|
\ else 'moving_mean'\n\n # Multi-step metrics\n for metricName in metricNames:\n\ |
|
\ metricSpec, metricLabel = \\\n _generateMetricSpecString(field=predictedFieldName,\n\ |
|
\ inferenceElement=InferenceElement.multiStepBestPredictions,\n\ |
|
\ metric='multiStep',\n params={'errorMetric':\ |
|
\ metricName,\n 'window':metricWindow,\n \ |
|
\ 'steps': predictionSteps},\n returnLabel=True)\n\ |
|
\ metricSpecStrings.append(metricSpec)\n\n # If the custom error metric\ |
|
\ was specified, add that\n if options[\"customErrorMetric\"] is not None :\n\ |
|
\ metricParams = dict(options[\"customErrorMetric\"])\n metricParams['errorMetric']\ |
|
\ = 'custom_error_metric'\n metricParams['steps'] = predictionSteps\n \ |
|
\ # If errorWindow is not specified, make it equal to the default window\n \ |
|
\ if not \"errorWindow\" in metricParams:\n metricParams[\"errorWindow\"\ |
|
] = metricWindow\n metricSpec, metricLabel =_generateMetricSpecString(field=predictedFieldName,\n\ |
|
\ inferenceElement=InferenceElement.multiStepPredictions,\n\ |
|
\ metric=\"multiStep\",\n params=metricParams,\n\ |
|
\ returnLabel=True)\n metricSpecStrings.append(metricSpec)\n\ |
|
\n # If this is the first specified step size, optimize for it. Be sure to\n\ |
|
\ # escape special characters since this is a regular expression\n optimizeMetricSpec\ |
|
\ = metricSpec\n metricLabel = metricLabel.replace('[', '\\\\[')\n metricLabel\ |
|
\ = metricLabel.replace(']', '\\\\]')\n optimizeMetricLabel = metricLabel\n\ |
|
\n if options[\"customErrorMetric\"] is not None :\n optimizeMetricLabel\ |
|
\ = \".*custom_error_metric.*\"\n\n # Add in the trivial metrics\n if options[\"\ |
|
runBaselines\"] \\\n and inferenceType != InferenceType.NontemporalClassification:\n\ |
|
\ for steps in predictionSteps:\n metricSpecStrings.append(\n \ |
|
\ _generateMetricSpecString(field=predictedFieldName,\n \ |
|
\ inferenceElement=InferenceElement.prediction,\n \ |
|
\ metric=\"trivial\",\n \ |
|
\ params={'window':metricWindow,\n \ |
|
\ \"errorMetric\":trivialErrorMetric,\n \ |
|
\ 'steps': steps})\n )\n\n ##Add in the\ |
|
\ One-Gram baseline error metric\n #metricSpecStrings.append(\n \ |
|
\ # _generateMetricSpecString(field=predictedFieldName,\n # \ |
|
\ inferenceElement=InferenceElement.encodings,\n # \ |
|
\ metric=\"two_gram\",\n # \ |
|
\ params={'window':metricWindow,\n # \ |
|
\ \"errorMetric\":oneGramErrorMetric,\n # \ |
|
\ 'predictionField':predictedFieldName,\n # \ |
|
\ 'steps': steps})\n # )\n \ |
|
\ #\n #Include the baseline moving mean/mode metric\n if isCategory:\n\ |
|
\ metricSpecStrings.append(\n _generateMetricSpecString(field=predictedFieldName,\n\ |
|
\ inferenceElement=InferenceElement.prediction,\n\ |
|
\ metric=movingAverageBaselineName,\n \ |
|
\ params={'window':metricWindow\n \ |
|
\ ,\"errorMetric\":\"avg_err\",\n \ |
|
\ \"mode_window\":200,\n \ |
|
\ \"steps\": steps})\n \ |
|
\ )\n else :\n metricSpecStrings.append(\n _generateMetricSpecString(field=predictedFieldName,\n\ |
|
\ inferenceElement=InferenceElement.prediction,\n\ |
|
\ metric=movingAverageBaselineName,\n \ |
|
\ params={'window':metricWindow\n \ |
|
\ ,\"errorMetric\":\"altMAPE\",\n \ |
|
\ \"mean_window\":200,\n \ |
|
\ \"steps\": steps})\n \ |
|
\ )\n\n\n\n\n # -----------------------------------------------------------------------\n\ |
|
\ # Metrics for classification\n elif inferenceType in (InferenceType.TemporalClassification):\n\ |
|
\n metricName = 'avg_err'\n trivialErrorMetric = 'avg_err'\n oneGramErrorMetric\ |
|
\ = 'avg_err'\n movingAverageBaselineName = 'moving_mode'\n\n optimizeMetricSpec,\ |
|
\ optimizeMetricLabel = \\\n _generateMetricSpecString(inferenceElement=InferenceElement.classification,\n\ |
|
\ metric=metricName,\n \ |
|
\ params={'window':metricWindow},\n returnLabel=True)\n\ |
|
\n metricSpecStrings.append(optimizeMetricSpec)\n\n if options[\"runBaselines\"\ |
|
]:\n # If temporal, generate the trivial predictor metric\n if inferenceType\ |
|
\ == InferenceType.TemporalClassification:\n metricSpecStrings.append(\n\ |
|
\ _generateMetricSpecString(inferenceElement=InferenceElement.classification,\n\ |
|
\ metric=\"trivial\",\n \ |
|
\ params={'window':metricWindow,\n \ |
|
\ \"errorMetric\":trivialErrorMetric})\n )\n \ |
|
\ metricSpecStrings.append(\n _generateMetricSpecString(inferenceElement=InferenceElement.classification,\n\ |
|
\ metric=\"two_gram\",\n \ |
|
\ params={'window':metricWindow,\n \ |
|
\ \"errorMetric\":oneGramErrorMetric})\n )\n \ |
|
\ metricSpecStrings.append(\n _generateMetricSpecString(inferenceElement=InferenceElement.classification,\n\ |
|
\ metric=movingAverageBaselineName,\n \ |
|
\ params={'window':metricWindow\n \ |
|
\ ,\"errorMetric\":\"avg_err\",\n \ |
|
\ \"mode_window\":200})\n )\n\ |
|
\n\n # Custom Error Metric\n if not options[\"customErrorMetric\"] == None\ |
|
\ :\n #If errorWindow is not specified, make it equal to the default window\n\ |
|
\ if not \"errorWindow\" in options[\"customErrorMetric\"]:\n options[\"\ |
|
customErrorMetric\"][\"errorWindow\"] = metricWindow\n optimizeMetricSpec\ |
|
\ = _generateMetricSpecString(\n inferenceElement=InferenceElement.classification,\n\ |
|
\ metric=\"custom\",\n \ |
|
\ params=options[\"customErrorMetric\"])\n optimizeMetricLabel = \"\ |
|
.*custom_error_metric.*\"\n\n metricSpecStrings.append(optimizeMetricSpec)\n\ |
|
\n\n # -----------------------------------------------------------------------\n\ |
|
\ # If plug in the predictionSteps variable for any dynamically generated\n \ |
|
\ # prediction steps\n if options['dynamicPredictionSteps']:\n for i in range(len(metricSpecStrings)):\n\ |
|
\ metricSpecStrings[i] = metricSpecStrings[i].replace(\n \"'$REPLACE_ME'\"\ |
|
, \"predictionSteps\")\n optimizeMetricLabel = optimizeMetricLabel.replace(\n\ |
|
\ \"'$REPLACE_ME'\", \".*\")\n return metricSpecStrings, optimizeMetricLabel" |
|
- "def create_perf_attrib_stats(perf_attrib, risk_exposures):\n \"\"\"\n Takes\ |
|
\ perf attribution data over a period of time and computes annualized\n multifactor\ |
|
\ alpha, multifactor sharpe, risk exposures.\n \"\"\"\n summary = OrderedDict()\n\ |
|
\ total_returns = perf_attrib['total_returns']\n specific_returns = perf_attrib['specific_returns']\n\ |
|
\ common_returns = perf_attrib['common_returns']\n\n summary['Annualized\ |
|
\ Specific Return'] =\\\n ep.annual_return(specific_returns)\n summary['Annualized\ |
|
\ Common Return'] =\\\n ep.annual_return(common_returns)\n summary['Annualized\ |
|
\ Total Return'] =\\\n ep.annual_return(total_returns)\n\n summary['Specific\ |
|
\ Sharpe Ratio'] =\\\n ep.sharpe_ratio(specific_returns)\n\n summary['Cumulative\ |
|
\ Specific Return'] =\\\n ep.cum_returns_final(specific_returns)\n summary['Cumulative\ |
|
\ Common Return'] =\\\n ep.cum_returns_final(common_returns)\n summary['Total\ |
|
\ Returns'] =\\\n ep.cum_returns_final(total_returns)\n\n summary =\ |
|
\ pd.Series(summary, name='')\n\n annualized_returns_by_factor = [ep.annual_return(perf_attrib[c])\n\ |
|
\ for c in risk_exposures.columns]\n cumulative_returns_by_factor\ |
|
\ = [ep.cum_returns_final(perf_attrib[c])\n \ |
|
\ for c in risk_exposures.columns]\n\n risk_exposure_summary = pd.DataFrame(\n\ |
|
\ data=OrderedDict([\n (\n 'Average Risk Factor\ |
|
\ Exposure',\n risk_exposures.mean(axis='rows')\n ),\n\ |
|
\ ('Annualized Return', annualized_returns_by_factor),\n \ |
|
\ ('Cumulative Return', cumulative_returns_by_factor),\n ]),\n \ |
|
\ index=risk_exposures.columns,\n )\n\n return summary, risk_exposure_summary" |
|
- source_sentence: _generateEncoderChoicesV1 |
|
sentences: |
|
- "def common_arg_parser():\n \"\"\"\n Create an argparse.ArgumentParser for\ |
|
\ run_mujoco.py.\n \"\"\"\n parser = arg_parser()\n parser.add_argument('--env',\ |
|
\ help='environment ID', type=str, default='Reacher-v2')\n parser.add_argument('--env_type',\ |
|
\ help='type of environment, used when the environment type cannot be automatically\ |
|
\ determined', type=str)\n parser.add_argument('--seed', help='RNG seed', type=int,\ |
|
\ default=None)\n parser.add_argument('--alg', help='Algorithm', type=str,\ |
|
\ default='ppo2')\n parser.add_argument('--num_timesteps', type=float, default=1e6),\n\ |
|
\ parser.add_argument('--network', help='network type (mlp, cnn, lstm, cnn_lstm,\ |
|
\ conv_only)', default=None)\n parser.add_argument('--gamestate', help='game\ |
|
\ state to load (so far only used in retro games)', default=None)\n parser.add_argument('--num_env',\ |
|
\ help='Number of environment copies being run in parallel. When not specified,\ |
|
\ set to number of cpus for Atari, and to 1 for Mujoco', default=None, type=int)\n\ |
|
\ parser.add_argument('--reward_scale', help='Reward scale factor. Default:\ |
|
\ 1.0', default=1.0, type=float)\n parser.add_argument('--save_path', help='Path\ |
|
\ to save trained model to', default=None, type=str)\n parser.add_argument('--save_video_interval',\ |
|
\ help='Save video every x steps (0 = disabled)', default=0, type=int)\n parser.add_argument('--save_video_length',\ |
|
\ help='Length of recorded video. Default: 200', default=200, type=int)\n parser.add_argument('--play',\ |
|
\ default=False, action='store_true')\n return parser" |
|
- "def check_intraday(estimate, returns, positions, transactions):\n \"\"\"\n\ |
|
\ Logic for checking if a strategy is intraday and processing it.\n\n Parameters\n\ |
|
\ ----------\n estimate: boolean or str, optional\n Approximate returns\ |
|
\ for intraday strategies.\n See description in tears.create_full_tear_sheet.\n\ |
|
\ returns : pd.Series\n Daily returns of the strategy, noncumulative.\n\ |
|
\ - See full explanation in create_full_tear_sheet.\n positions : pd.DataFrame\n\ |
|
\ Daily net position values.\n - See full explanation in create_full_tear_sheet.\n\ |
|
\ transactions : pd.DataFrame\n Prices and amounts of executed trades.\ |
|
\ One row per trade.\n - See full explanation in create_full_tear_sheet.\n\ |
|
\n Returns\n -------\n pd.DataFrame\n Daily net position values,\ |
|
\ adjusted for intraday movement.\n \"\"\"\n\n if estimate == 'infer':\n\ |
|
\ if positions is not None and transactions is not None:\n if\ |
|
\ detect_intraday(positions, transactions):\n warnings.warn('Detected\ |
|
\ intraday strategy; inferring positi' +\n 'ons from\ |
|
\ transactions. Set estimate_intraday' +\n '=False\ |
|
\ to disable.')\n return estimate_intraday(returns, positions,\ |
|
\ transactions)\n else:\n return positions\n \ |
|
\ else:\n return positions\n\n elif estimate:\n if positions\ |
|
\ is not None and transactions is not None:\n return estimate_intraday(returns,\ |
|
\ positions, transactions)\n else:\n raise ValueError('Positions\ |
|
\ and txns needed to estimate intraday')\n else:\n return positions" |
|
- "def _generateEncoderChoicesV1(fieldInfo):\n \"\"\" Return a list of possible\ |
|
\ encoder parameter combinations for the given\n field and the default aggregation\ |
|
\ function to use. Each parameter combination\n is a dict defining the parameters\ |
|
\ for the encoder. Here is an example\n return value for the encoderChoicesList:\n\ |
|
\n [\n None,\n {'fieldname':'timestamp',\n 'name': 'timestamp_timeOfDay',\n\ |
|
\ 'type':'DateEncoder'\n 'dayOfWeek': (7,1)\n },\n {'fieldname':'timestamp',\n\ |
|
\ 'name': 'timestamp_timeOfDay',\n 'type':'DateEncoder'\n 'dayOfWeek':\ |
|
\ (7,3)\n },\n ],\n\n Parameters:\n --------------------------------------------------\n\ |
|
\ fieldInfo: item from the 'includedFields' section of the\n \ |
|
\ description JSON object\n\n retval: (encoderChoicesList, aggFunction)\n\ |
|
\ encoderChoicesList: a list of encoder choice lists for this field.\n\ |
|
\ Most fields will generate just 1 encoder choice list.\n \ |
|
\ DateTime fields can generate 2 or more encoder choice lists,\n \ |
|
\ one for dayOfWeek, one for timeOfDay, etc.\n aggFunction:\ |
|
\ name of aggregation function to use for this\n field\ |
|
\ type\n\n \"\"\"\n\n width = 7\n fieldName = fieldInfo['fieldName']\n fieldType\ |
|
\ = fieldInfo['fieldType']\n encoderChoicesList = []\n\n # Scalar?\n if fieldType\ |
|
\ in ['float', 'int']:\n aggFunction = 'mean'\n encoders = [None]\n for\ |
|
\ n in (13, 50, 150, 500):\n encoder = dict(type='ScalarSpaceEncoder', name=fieldName,\ |
|
\ fieldname=fieldName,\n n=n, w=width, clipInput=True,space=\"\ |
|
absolute\")\n if 'minValue' in fieldInfo:\n encoder['minval'] = fieldInfo['minValue']\n\ |
|
\ if 'maxValue' in fieldInfo:\n encoder['maxval'] = fieldInfo['maxValue']\n\ |
|
\ encoders.append(encoder)\n encoderChoicesList.append(encoders)\n\n \ |
|
\ # String?\n elif fieldType == 'string':\n aggFunction = 'first'\n encoders\ |
|
\ = [None]\n encoder = dict(type='SDRCategoryEncoder', name=fieldName,\n \ |
|
\ fieldname=fieldName, n=100, w=width)\n encoders.append(encoder)\n\ |
|
\ encoderChoicesList.append(encoders)\n\n\n # Datetime?\n elif fieldType\ |
|
\ == 'datetime':\n aggFunction = 'first'\n\n # First, the time of day representation\n\ |
|
\ encoders = [None]\n for radius in (1, 8):\n encoder = dict(type='DateEncoder',\ |
|
\ name='%s_timeOfDay' % (fieldName),\n fieldname=fieldName,\ |
|
\ timeOfDay=(width, radius))\n encoders.append(encoder)\n encoderChoicesList.append(encoders)\n\ |
|
\n # Now, the day of week representation\n encoders = [None]\n for radius\ |
|
\ in (1, 3):\n encoder = dict(type='DateEncoder', name='%s_dayOfWeek' % (fieldName),\n\ |
|
\ fieldname=fieldName, dayOfWeek=(width, radius))\n encoders.append(encoder)\n\ |
|
\ encoderChoicesList.append(encoders)\n\n else:\n raise RuntimeError(\"\ |
|
Unsupported field type '%s'\" % (fieldType))\n\n\n # Return results\n return\ |
|
\ (encoderChoicesList, aggFunction)" |
|
- source_sentence: leaky_relu6 |
|
sentences: |
|
- "def list_string_to_dict(string):\n \"\"\"Inputs ``['a', 'b', 'c']``, returns\ |
|
\ ``{'a': 0, 'b': 1, 'c': 2}``.\"\"\"\n dictionary = {}\n for idx, c in\ |
|
\ enumerate(string):\n dictionary.update({c: idx})\n return dictionary" |
|
- "def affine_transform(x, transform_matrix, channel_index=2, fill_mode='nearest',\ |
|
\ cval=0., order=1):\n \"\"\"Return transformed images by given an affine matrix\ |
|
\ in Scipy format (x is height).\n\n Parameters\n ----------\n x : numpy.array\n\ |
|
\ An image with dimension of [row, col, channel] (default).\n transform_matrix\ |
|
\ : numpy.array\n Transform matrix (offset center), can be generated by\ |
|
\ ``transform_matrix_offset_center``\n channel_index : int\n Index of\ |
|
\ channel, default 2.\n fill_mode : str\n Method to fill missing pixel,\ |
|
\ default `nearest`, more options `constant`, `reflect` or `wrap`, see `scipy\ |
|
\ ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__\n\ |
|
\ cval : float\n Value used for points outside the boundaries of the\ |
|
\ input if mode='constant'. Default is 0.0\n order : int\n The order\ |
|
\ of interpolation. The order has to be in the range 0-5:\n - 0 Nearest-neighbor\n\ |
|
\ - 1 Bi-linear (default)\n - 2 Bi-quadratic\n \ |
|
\ - 3 Bi-cubic\n - 4 Bi-quartic\n - 5 Bi-quintic\n \ |
|
\ - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__\n\ |
|
\n Returns\n -------\n numpy.array\n A processed image.\n\n \ |
|
\ Examples\n --------\n >>> M_shear = tl.prepro.affine_shear_matrix(intensity=0.2,\ |
|
\ is_random=False)\n >>> M_zoom = tl.prepro.affine_zoom_matrix(zoom_range=0.8)\n\ |
|
\ >>> M_combined = M_shear.dot(M_zoom)\n >>> transform_matrix = tl.prepro.transform_matrix_offset_center(M_combined,\ |
|
\ h, w)\n >>> result = tl.prepro.affine_transform(image, transform_matrix)\n\ |
|
\n \"\"\"\n # transform_matrix = transform_matrix_offset_center()\n #\ |
|
\ asdihasid\n # asd\n\n x = np.rollaxis(x, channel_index, 0)\n final_affine_matrix\ |
|
\ = transform_matrix[:2, :2]\n final_offset = transform_matrix[:2, 2]\n \ |
|
\ channel_images = [\n ndi.interpolation.\n affine_transform(x_channel,\ |
|
\ final_affine_matrix, final_offset, order=order, mode=fill_mode, cval=cval)\n\ |
|
\ for x_channel in x\n ]\n x = np.stack(channel_images, axis=0)\n\ |
|
\ x = np.rollaxis(x, 0, channel_index + 1)\n return x" |
|
- "def leaky_relu6(x, alpha=0.2, name=\"leaky_relu6\"):\n \"\"\":func:`leaky_relu6`\ |
|
\ can be used through its shortcut: :func:`tl.act.lrelu6`.\n\n This activation\ |
|
\ function is a modified version :func:`leaky_relu` introduced by the following\ |
|
\ paper:\n `Rectifier Nonlinearities Improve Neural Network Acoustic Models\ |
|
\ [A. L. Maas et al., 2013] <https://ai.stanford.edu/~amaas/papers/relu_hybrid_icml2013_final.pdf>`__\n\ |
|
\n This activation function also follows the behaviour of the activation function\ |
|
\ :func:`tf.nn.relu6` introduced by the following paper:\n `Convolutional Deep\ |
|
\ Belief Networks on CIFAR-10 [A. Krizhevsky, 2010] <http://www.cs.utoronto.ca/~kriz/conv-cifar10-aug2010.pdf>`__\n\ |
|
\n The function return the following results:\n - When x < 0: ``f(x) =\ |
|
\ alpha_low * x``.\n - When x in [0, 6]: ``f(x) = x``.\n - When x >\ |
|
\ 6: ``f(x) = 6``.\n\n Parameters\n ----------\n x : Tensor\n \ |
|
\ Support input type ``float``, ``double``, ``int32``, ``int64``, ``uint8``, ``int16``,\ |
|
\ or ``int8``.\n alpha : float\n Slope.\n name : str\n The\ |
|
\ function name (optional).\n\n Examples\n --------\n >>> import tensorlayer\ |
|
\ as tl\n >>> net = tl.layers.DenseLayer(net, 100, act=lambda x : tl.act.leaky_relu6(x,\ |
|
\ 0.2), name='dense')\n\n Returns\n -------\n Tensor\n A ``Tensor``\ |
|
\ in the same type as ``x``.\n\n References\n ----------\n - `Rectifier\ |
|
\ Nonlinearities Improve Neural Network Acoustic Models [A. L. Maas et al., 2013]\ |
|
\ <https://ai.stanford.edu/~amaas/papers/relu_hybrid_icml2013_final.pdf>`__\n\ |
|
\ - `Convolutional Deep Belief Networks on CIFAR-10 [A. Krizhevsky, 2010] <http://www.cs.utoronto.ca/~kriz/conv-cifar10-aug2010.pdf>`__\n\ |
|
\ \"\"\"\n if not isinstance(alpha, tf.Tensor) and not (0 < alpha <= 1):\n\ |
|
\ raise ValueError(\"`alpha` value must be in [0, 1]`\")\n\n with tf.name_scope(name,\ |
|
\ \"leaky_relu6\") as name_scope:\n x = tf.convert_to_tensor(x, name=\"\ |
|
features\")\n return tf.minimum(tf.maximum(x, alpha * x), 6, name=name_scope)" |
|
- source_sentence: LineString.contains |
|
sentences: |
|
- "def build_act_with_param_noise(make_obs_ph, q_func, num_actions, scope=\"deepq\"\ |
|
, reuse=None, param_noise_filter_func=None):\n \"\"\"Creates the act function\ |
|
\ with support for parameter space noise exploration (https://arxiv.org/abs/1706.01905):\n\ |
|
\n Parameters\n ----------\n make_obs_ph: str -> tf.placeholder or TfInput\n\ |
|
\ a function that take a name and creates a placeholder of input with that\ |
|
\ name\n q_func: (tf.Variable, int, str, bool) -> tf.Variable\n the\ |
|
\ model that takes the following inputs:\n observation_in: object\n\ |
|
\ the output of observation placeholder\n num_actions:\ |
|
\ int\n number of actions\n scope: str\n \ |
|
\ reuse: bool\n should be passed to outer variable scope\n \ |
|
\ and returns a tensor of shape (batch_size, num_actions) with values of every\ |
|
\ action.\n num_actions: int\n number of actions.\n scope: str or\ |
|
\ VariableScope\n optional scope for variable_scope.\n reuse: bool or\ |
|
\ None\n whether or not the variables should be reused. To be able to reuse\ |
|
\ the scope must be given.\n param_noise_filter_func: tf.Variable -> bool\n\ |
|
\ function that decides whether or not a variable should be perturbed.\ |
|
\ Only applicable\n if param_noise is True. If set to None, default_param_noise_filter\ |
|
\ is used by default.\n\n Returns\n -------\n act: (tf.Variable, bool,\ |
|
\ float, bool, float, bool) -> tf.Variable\n function to select and action\ |
|
\ given observation.\n` See the top of the file for details.\n \"\"\"\ |
|
\n if param_noise_filter_func is None:\n param_noise_filter_func = default_param_noise_filter\n\ |
|
\n with tf.variable_scope(scope, reuse=reuse):\n observations_ph = make_obs_ph(\"\ |
|
observation\")\n stochastic_ph = tf.placeholder(tf.bool, (), name=\"stochastic\"\ |
|
)\n update_eps_ph = tf.placeholder(tf.float32, (), name=\"update_eps\"\ |
|
)\n update_param_noise_threshold_ph = tf.placeholder(tf.float32, (), name=\"\ |
|
update_param_noise_threshold\")\n update_param_noise_scale_ph = tf.placeholder(tf.bool,\ |
|
\ (), name=\"update_param_noise_scale\")\n reset_ph = tf.placeholder(tf.bool,\ |
|
\ (), name=\"reset\")\n\n eps = tf.get_variable(\"eps\", (), initializer=tf.constant_initializer(0))\n\ |
|
\ param_noise_scale = tf.get_variable(\"param_noise_scale\", (), initializer=tf.constant_initializer(0.01),\ |
|
\ trainable=False)\n param_noise_threshold = tf.get_variable(\"param_noise_threshold\"\ |
|
, (), initializer=tf.constant_initializer(0.05), trainable=False)\n\n #\ |
|
\ Unmodified Q.\n q_values = q_func(observations_ph.get(), num_actions,\ |
|
\ scope=\"q_func\")\n\n # Perturbable Q used for the actual rollout.\n\ |
|
\ q_values_perturbed = q_func(observations_ph.get(), num_actions, scope=\"\ |
|
perturbed_q_func\")\n # We have to wrap this code into a function due to\ |
|
\ the way tf.cond() works. See\n # https://stackoverflow.com/questions/37063952/confused-by-the-behavior-of-tf-cond\ |
|
\ for\n # a more detailed discussion.\n def perturb_vars(original_scope,\ |
|
\ perturbed_scope):\n all_vars = scope_vars(absolute_scope_name(original_scope))\n\ |
|
\ all_perturbed_vars = scope_vars(absolute_scope_name(perturbed_scope))\n\ |
|
\ assert len(all_vars) == len(all_perturbed_vars)\n perturb_ops\ |
|
\ = []\n for var, perturbed_var in zip(all_vars, all_perturbed_vars):\n\ |
|
\ if param_noise_filter_func(perturbed_var):\n \ |
|
\ # Perturb this variable.\n op = tf.assign(perturbed_var,\ |
|
\ var + tf.random_normal(shape=tf.shape(var), mean=0., stddev=param_noise_scale))\n\ |
|
\ else:\n # Do not perturb, just assign.\n \ |
|
\ op = tf.assign(perturbed_var, var)\n perturb_ops.append(op)\n\ |
|
\ assert len(perturb_ops) == len(all_vars)\n return tf.group(*perturb_ops)\n\ |
|
\n # Set up functionality to re-compute `param_noise_scale`. This perturbs\ |
|
\ yet another copy\n # of the network and measures the effect of that perturbation\ |
|
\ in action space. If the perturbation\n # is too big, reduce scale of\ |
|
\ perturbation, otherwise increase.\n q_values_adaptive = q_func(observations_ph.get(),\ |
|
\ num_actions, scope=\"adaptive_q_func\")\n perturb_for_adaption = perturb_vars(original_scope=\"\ |
|
q_func\", perturbed_scope=\"adaptive_q_func\")\n kl = tf.reduce_sum(tf.nn.softmax(q_values)\ |
|
\ * (tf.log(tf.nn.softmax(q_values)) - tf.log(tf.nn.softmax(q_values_adaptive))),\ |
|
\ axis=-1)\n mean_kl = tf.reduce_mean(kl)\n def update_scale():\n\ |
|
\ with tf.control_dependencies([perturb_for_adaption]):\n \ |
|
\ update_scale_expr = tf.cond(mean_kl < param_noise_threshold,\n \ |
|
\ lambda: param_noise_scale.assign(param_noise_scale * 1.01),\n \ |
|
\ lambda: param_noise_scale.assign(param_noise_scale / 1.01),\n\ |
|
\ )\n return update_scale_expr\n\n # Functionality\ |
|
\ to update the threshold for parameter space noise.\n update_param_noise_threshold_expr\ |
|
\ = param_noise_threshold.assign(tf.cond(update_param_noise_threshold_ph >= 0,\n\ |
|
\ lambda: update_param_noise_threshold_ph, lambda: param_noise_threshold))\n\ |
|
\n # Put everything together.\n deterministic_actions = tf.argmax(q_values_perturbed,\ |
|
\ axis=1)\n batch_size = tf.shape(observations_ph.get())[0]\n random_actions\ |
|
\ = tf.random_uniform(tf.stack([batch_size]), minval=0, maxval=num_actions, dtype=tf.int64)\n\ |
|
\ chose_random = tf.random_uniform(tf.stack([batch_size]), minval=0, maxval=1,\ |
|
\ dtype=tf.float32) < eps\n stochastic_actions = tf.where(chose_random,\ |
|
\ random_actions, deterministic_actions)\n\n output_actions = tf.cond(stochastic_ph,\ |
|
\ lambda: stochastic_actions, lambda: deterministic_actions)\n update_eps_expr\ |
|
\ = eps.assign(tf.cond(update_eps_ph >= 0, lambda: update_eps_ph, lambda: eps))\n\ |
|
\ updates = [\n update_eps_expr,\n tf.cond(reset_ph,\ |
|
\ lambda: perturb_vars(original_scope=\"q_func\", perturbed_scope=\"perturbed_q_func\"\ |
|
), lambda: tf.group(*[])),\n tf.cond(update_param_noise_scale_ph, lambda:\ |
|
\ update_scale(), lambda: tf.Variable(0., trainable=False)),\n update_param_noise_threshold_expr,\n\ |
|
\ ]\n _act = U.function(inputs=[observations_ph, stochastic_ph,\ |
|
\ update_eps_ph, reset_ph, update_param_noise_threshold_ph, update_param_noise_scale_ph],\n\ |
|
\ outputs=output_actions,\n givens={update_eps_ph:\ |
|
\ -1.0, stochastic_ph: True, reset_ph: False, update_param_noise_threshold_ph:\ |
|
\ False, update_param_noise_scale_ph: False},\n updates=updates)\n\ |
|
\ def act(ob, reset=False, update_param_noise_threshold=False, update_param_noise_scale=False,\ |
|
\ stochastic=True, update_eps=-1):\n return _act(ob, stochastic, update_eps,\ |
|
\ reset, update_param_noise_threshold, update_param_noise_scale)\n return\ |
|
\ act" |
|
- "def contains(self, other, max_distance=1e-4):\n \"\"\"\n Estimate\ |
|
\ whether the bounding box contains a point.\n\n Parameters\n ----------\n\ |
|
\ other : tuple of number or imgaug.augmentables.kps.Keypoint\n \ |
|
\ Point to check for.\n\n max_distance : float\n Maximum\ |
|
\ allowed euclidean distance between the point and the\n closest point\ |
|
\ on the line. If the threshold is exceeded, the point\n is not considered\ |
|
\ to be contained in the line.\n\n Returns\n -------\n bool\n\ |
|
\ True if the point is contained in the line string, False otherwise.\n\ |
|
\ It is contained if its distance to the line or any of its points\n\ |
|
\ is below a threshold.\n\n \"\"\"\n return self.compute_distance(other,\ |
|
\ default=np.inf) < max_distance" |
|
- "def is_fully_within_image(self, image):\n \"\"\"\n Estimate whether\ |
|
\ the bounding box is fully inside the image area.\n\n Parameters\n \ |
|
\ ----------\n image : (H,W,...) ndarray or tuple of int\n \ |
|
\ Image dimensions to use.\n If an ndarray, its shape will be used.\n\ |
|
\ If a tuple, it is assumed to represent the image shape\n \ |
|
\ and must contain at least two integers.\n\n Returns\n -------\n\ |
|
\ bool\n True if the bounding box is fully inside the image\ |
|
\ area. False otherwise.\n\n \"\"\"\n shape = normalize_shape(image)\n\ |
|
\ height, width = shape[0:2]\n return self.x1 >= 0 and self.x2 <\ |
|
\ width and self.y1 >= 0 and self.y2 < height" |
|
- source_sentence: Keypoint.copy |
|
sentences: |
|
- "def build_words_dataset(words=None, vocabulary_size=50000, printable=True, unk_key='UNK'):\n\ |
|
\ \"\"\"Build the words dictionary and replace rare words with 'UNK' token.\n\ |
|
\ The most common word has the smallest integer id.\n\n Parameters\n \ |
|
\ ----------\n words : list of str or byte\n The context in list format.\ |
|
\ You may need to do preprocessing on the words, such as lower case, remove marks\ |
|
\ etc.\n vocabulary_size : int\n The maximum vocabulary size, limiting\ |
|
\ the vocabulary size. Then the script replaces rare words with 'UNK' token.\n\ |
|
\ printable : boolean\n Whether to print the read vocabulary size of\ |
|
\ the given words.\n unk_key : str\n Represent the unknown words.\n\n\ |
|
\ Returns\n --------\n data : list of int\n The context in a list\ |
|
\ of ID.\n count : list of tuple and list\n Pair words and IDs.\n \ |
|
\ - count[0] is a list : the number of rare words\n - count[1:]\ |
|
\ are tuples : the number of occurrence of each word\n - e.g. [['UNK',\ |
|
\ 418391], (b'the', 1061396), (b'of', 593677), (b'and', 416629), (b'one', 411764)]\n\ |
|
\ dictionary : dictionary\n It is `word_to_id` that maps word to ID.\n\ |
|
\ reverse_dictionary : a dictionary\n It is `id_to_word` that maps ID\ |
|
\ to word.\n\n Examples\n --------\n >>> words = tl.files.load_matt_mahoney_text8_dataset()\n\ |
|
\ >>> vocabulary_size = 50000\n >>> data, count, dictionary, reverse_dictionary\ |
|
\ = tl.nlp.build_words_dataset(words, vocabulary_size)\n\n References\n \ |
|
\ -----------------\n - `tensorflow/examples/tutorials/word2vec/word2vec_basic.py\ |
|
\ <https://github.com/tensorflow/tensorflow/blob/r0.7/tensorflow/examples/tutorials/word2vec/word2vec_basic.py>`__\n\ |
|
\n \"\"\"\n if words is None:\n raise Exception(\"words : list of\ |
|
\ str or byte\")\n\n count = [[unk_key, -1]]\n count.extend(collections.Counter(words).most_common(vocabulary_size\ |
|
\ - 1))\n dictionary = dict()\n for word, _ in count:\n dictionary[word]\ |
|
\ = len(dictionary)\n data = list()\n unk_count = 0\n for word in words:\n\ |
|
\ if word in dictionary:\n index = dictionary[word]\n \ |
|
\ else:\n index = 0 # dictionary['UNK']\n unk_count +=\ |
|
\ 1\n data.append(index)\n count[0][1] = unk_count\n reverse_dictionary\ |
|
\ = dict(zip(dictionary.values(), dictionary.keys()))\n if printable:\n \ |
|
\ tl.logging.info('Real vocabulary size %d' % len(collections.Counter(words).keys()))\n\ |
|
\ tl.logging.info('Limited vocabulary size {}'.format(vocabulary_size))\n\ |
|
\ if len(collections.Counter(words).keys()) < vocabulary_size:\n raise\ |
|
\ Exception(\n \"len(collections.Counter(words).keys()) >= vocabulary_size\ |
|
\ , the limited vocabulary_size must be less than or equal to the read vocabulary_size\"\ |
|
\n )\n return data, count, dictionary, reverse_dictionary" |
|
- "def Snowflakes(density=(0.005, 0.075), density_uniformity=(0.3, 0.9), flake_size=(0.2,\ |
|
\ 0.7),\n flake_size_uniformity=(0.4, 0.8), angle=(-30, 30), speed=(0.007,\ |
|
\ 0.03),\n name=None, deterministic=False, random_state=None):\n\ |
|
\ \"\"\"\n Augmenter to add falling snowflakes to images.\n\n This is\ |
|
\ a wrapper around ``SnowflakesLayer``. It executes 1 to 3 layers per image.\n\ |
|
\n dtype support::\n\n * ``uint8``: yes; tested\n * ``uint16``:\ |
|
\ no (1)\n * ``uint32``: no (1)\n * ``uint64``: no (1)\n \ |
|
\ * ``int8``: no (1)\n * ``int16``: no (1)\n * ``int32``: no (1)\n\ |
|
\ * ``int64``: no (1)\n * ``float16``: no (1)\n * ``float32``:\ |
|
\ no (1)\n * ``float64``: no (1)\n * ``float128``: no (1)\n \ |
|
\ * ``bool``: no (1)\n\n - (1) Parameters of this augmenter are optimized\ |
|
\ for the value range of uint8.\n While other dtypes may be accepted,\ |
|
\ they will lead to images augmented in\n ways inappropriate for\ |
|
\ the respective dtype.\n\n Parameters\n ----------\n density : number\ |
|
\ or tuple of number or list of number or imgaug.parameters.StochasticParameter\n\ |
|
\ Density of the snowflake layer, as a probability of each pixel in low\ |
|
\ resolution space to be a snowflake.\n Valid value range is ``(0.0, 1.0)``.\ |
|
\ Recommended to be around ``(0.01, 0.075)``.\n\n * If a number, then\ |
|
\ that value will be used for all images.\n * If a tuple ``(a, b)``,\ |
|
\ then a value from the continuous range ``[a, b]`` will be used.\n \ |
|
\ * If a list, then a random value will be sampled from that list per image.\n\ |
|
\ * If a StochasticParameter, then a value will be sampled per image\ |
|
\ from that parameter.\n\n density_uniformity : number or tuple of number or\ |
|
\ list of number or imgaug.parameters.StochasticParameter\n Size uniformity\ |
|
\ of the snowflakes. Higher values denote more similarly sized snowflakes.\n \ |
|
\ Valid value range is ``(0.0, 1.0)``. Recommended to be around ``0.5``.\n\ |
|
\n * If a number, then that value will be used for all images.\n \ |
|
\ * If a tuple ``(a, b)``, then a value from the continuous range ``[a,\ |
|
\ b]`` will be used.\n * If a list, then a random value will be sampled\ |
|
\ from that list per image.\n * If a StochasticParameter, then a value\ |
|
\ will be sampled per image from that parameter.\n\n flake_size : number or\ |
|
\ tuple of number or list of number or imgaug.parameters.StochasticParameter\n\ |
|
\ Size of the snowflakes. This parameter controls the resolution at which\ |
|
\ snowflakes are sampled.\n Higher values mean that the resolution is closer\ |
|
\ to the input image's resolution and hence each sampled\n snowflake will\ |
|
\ be smaller (because of the smaller pixel size).\n\n Valid value range\ |
|
\ is ``[0.0, 1.0)``. Recommended values:\n\n * On ``96x128`` a value\ |
|
\ of ``(0.1, 0.4)`` worked well.\n * On ``192x256`` a value of ``(0.2,\ |
|
\ 0.7)`` worked well.\n * On ``960x1280`` a value of ``(0.7, 0.95)``\ |
|
\ worked well.\n\n Allowed datatypes:\n\n * If a number, then\ |
|
\ that value will be used for all images.\n * If a tuple ``(a, b)``,\ |
|
\ then a value from the continuous range ``[a, b]`` will be used.\n \ |
|
\ * If a list, then a random value will be sampled from that list per image.\n\ |
|
\ * If a StochasticParameter, then a value will be sampled per image\ |
|
\ from that parameter.\n\n flake_size_uniformity : number or tuple of number\ |
|
\ or list of number or imgaug.parameters.StochasticParameter\n Controls\ |
|
\ the size uniformity of the snowflakes. Higher values mean that the snowflakes\ |
|
\ are more similarly\n sized. Valid value range is ``(0.0, 1.0)``. Recommended\ |
|
\ to be around ``0.5``.\n\n * If a number, then that value will be\ |
|
\ used for all images.\n * If a tuple ``(a, b)``, then a value from\ |
|
\ the continuous range ``[a, b]`` will be used.\n * If a list, then\ |
|
\ a random value will be sampled from that list per image.\n * If a\ |
|
\ StochasticParameter, then a value will be sampled per image from that parameter.\n\ |
|
\n angle : number or tuple of number or list of number or imgaug.parameters.StochasticParameter\n\ |
|
\ Angle in degrees of motion blur applied to the snowflakes, where ``0.0``\ |
|
\ is motion blur that points straight\n upwards. Recommended to be around\ |
|
\ ``(-30, 30)``.\n See also :func:`imgaug.augmenters.blur.MotionBlur.__init__`.\n\ |
|
\n * If a number, then that value will be used for all images.\n \ |
|
\ * If a tuple ``(a, b)``, then a value from the continuous range ``[a,\ |
|
\ b]`` will be used.\n * If a list, then a random value will be sampled\ |
|
\ from that list per image.\n * If a StochasticParameter, then a value\ |
|
\ will be sampled per image from that parameter.\n\n speed : number or tuple\ |
|
\ of number or list of number or imgaug.parameters.StochasticParameter\n \ |
|
\ Perceived falling speed of the snowflakes. This parameter controls the motion\ |
|
\ blur's kernel size.\n It follows roughly the form ``kernel_size = image_size\ |
|
\ * speed``. Hence,\n Values around ``1.0`` denote that the motion blur\ |
|
\ should \"stretch\" each snowflake over the whole image.\n\n Valid value\ |
|
\ range is ``(0.0, 1.0)``. Recommended values:\n\n * On ``96x128``\ |
|
\ a value of ``(0.01, 0.05)`` worked well.\n * On ``192x256`` a value\ |
|
\ of ``(0.007, 0.03)`` worked well.\n * On ``960x1280`` a value of\ |
|
\ ``(0.001, 0.03)`` worked well.\n\n\n Allowed datatypes:\n\n \ |
|
\ * If a number, then that value will be used for all images.\n *\ |
|
\ If a tuple ``(a, b)``, then a value from the continuous range ``[a, b]`` will\ |
|
\ be used.\n * If a list, then a random value will be sampled from\ |
|
\ that list per image.\n * If a StochasticParameter, then a value will\ |
|
\ be sampled per image from that parameter.\n\n name : None or str, optional\n\ |
|
\ See :func:`imgaug.augmenters.meta.Augmenter.__init__`.\n\n deterministic\ |
|
\ : bool, optional\n See :func:`imgaug.augmenters.meta.Augmenter.__init__`.\n\ |
|
\n random_state : None or int or numpy.random.RandomState, optional\n \ |
|
\ See :func:`imgaug.augmenters.meta.Augmenter.__init__`.\n\n Examples\n \ |
|
\ --------\n >>> aug = iaa.Snowflakes(flake_size=(0.1, 0.4), speed=(0.01,\ |
|
\ 0.05))\n\n Adds snowflakes to small images (around ``96x128``).\n\n >>>\ |
|
\ aug = iaa.Snowflakes(flake_size=(0.2, 0.7), speed=(0.007, 0.03))\n\n Adds\ |
|
\ snowflakes to medium-sized images (around ``192x256``).\n\n >>> aug = iaa.Snowflakes(flake_size=(0.7,\ |
|
\ 0.95), speed=(0.001, 0.03))\n\n Adds snowflakes to large images (around ``960x1280``).\n\ |
|
\n \"\"\"\n if name is None:\n name = \"Unnamed%s\" % (ia.caller_name(),)\n\ |
|
\n layer = SnowflakesLayer(\n density=density, density_uniformity=density_uniformity,\n\ |
|
\ flake_size=flake_size, flake_size_uniformity=flake_size_uniformity,\n\ |
|
\ angle=angle, speed=speed,\n blur_sigma_fraction=(0.0001, 0.001)\n\ |
|
\ )\n\n return meta.SomeOf(\n (1, 3), children=[layer.deepcopy()\ |
|
\ for _ in range(3)],\n random_order=False, name=name, deterministic=deterministic,\ |
|
\ random_state=random_state\n )" |
|
- "def copy(self, x=None, y=None):\n \"\"\"\n Create a shallow copy\ |
|
\ of the Keypoint object.\n\n Parameters\n ----------\n x\ |
|
\ : None or number, optional\n Coordinate of the keypoint on the x\ |
|
\ axis.\n If ``None``, the instance's value will be copied.\n\n \ |
|
\ y : None or number, optional\n Coordinate of the keypoint on\ |
|
\ the y axis.\n If ``None``, the instance's value will be copied.\n\ |
|
\n Returns\n -------\n imgaug.Keypoint\n Shallow\ |
|
\ copy.\n\n \"\"\"\n return self.deepcopy(x=x, y=y)" |
|
model-index: |
|
- name: SentenceTransformer based on sentence-transformers/all-mpnet-base-v2 |
|
results: |
|
- task: |
|
type: semantic-similarity |
|
name: Semantic Similarity |
|
dataset: |
|
name: sts dev |
|
type: sts-dev |
|
metrics: |
|
- type: pearson_cosine |
|
value: 0.8806072274141987 |
|
name: Pearson Cosine |
|
- type: spearman_cosine |
|
value: 0.8810194487011652 |
|
name: Spearman Cosine |
|
- type: pearson_manhattan |
|
value: 0.8780911558324747 |
|
name: Pearson Manhattan |
|
- type: spearman_manhattan |
|
value: 0.8798257355327418 |
|
name: Spearman Manhattan |
|
- type: pearson_euclidean |
|
value: 0.8794084495321427 |
|
name: Pearson Euclidean |
|
- type: spearman_euclidean |
|
value: 0.8810194487011652 |
|
name: Spearman Euclidean |
|
- type: pearson_dot |
|
value: 0.8806072253861965 |
|
name: Pearson Dot |
|
- type: spearman_dot |
|
value: 0.8810194487011652 |
|
name: Spearman Dot |
|
- type: pearson_max |
|
value: 0.8806072274141987 |
|
name: Pearson Max |
|
- type: spearman_max |
|
value: 0.8810194487011652 |
|
name: Spearman Max |
|
--- |
|
|
|
# SentenceTransformer based on sentence-transformers/all-mpnet-base-v2 |
|
|
|
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) on the [code-search-net/code_search_net](https://huggingface.co/datasets/code-search-net/code_search_net) dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more. |
|
|
|
## Model Details |
|
|
|
### Model Description |
|
- **Model Type:** Sentence Transformer |
|
- **Base model:** [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) <!-- at revision 84f2bcc00d77236f9e89c8a360a00fb1139bf47d --> |
|
- **Maximum Sequence Length:** 384 tokens |
|
- **Output Dimensionality:** 768 tokens |
|
- **Similarity Function:** Cosine Similarity |
|
- **Training Dataset:** |
|
- [code-search-net/code_search_net](https://huggingface.co/datasets/code-search-net/code_search_net) |
|
- **Language:** code |
|
<!-- - **License:** Unknown --> |
|
|
|
### Model Sources |
|
|
|
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net) |
|
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers) |
|
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers) |
|
|
|
### Full Model Architecture |
|
|
|
``` |
|
SentenceTransformer( |
|
(0): Transformer({'max_seq_length': 384, 'do_lower_case': False}) with Transformer model: MPNetModel |
|
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True}) |
|
(2): Normalize() |
|
) |
|
``` |
|
|
|
## Usage |
|
|
|
### Direct Usage (Sentence Transformers) |
|
|
|
First install the Sentence Transformers library: |
|
|
|
```bash |
|
pip install -U sentence-transformers |
|
``` |
|
|
|
Then you can load this model and run inference. |
|
```python |
|
from sentence_transformers import SentenceTransformer |
|
|
|
# Download from the 🤗 Hub |
|
model = SentenceTransformer("BoghdadyJR/al-MiniLM-L6-v2") |
|
# Run inference |
|
sentences = [ |
|
'Keypoint.copy', |
|
'def copy(self, x=None, y=None):\n """\n Create a shallow copy of the Keypoint object.\n\n Parameters\n ----------\n x : None or number, optional\n Coordinate of the keypoint on the x axis.\n If ``None``, the instance\'s value will be copied.\n\n y : None or number, optional\n Coordinate of the keypoint on the y axis.\n If ``None``, the instance\'s value will be copied.\n\n Returns\n -------\n imgaug.Keypoint\n Shallow copy.\n\n """\n return self.deepcopy(x=x, y=y)', |
|
'def build_words_dataset(words=None, vocabulary_size=50000, printable=True, unk_key=\'UNK\'):\n """Build the words dictionary and replace rare words with \'UNK\' token.\n The most common word has the smallest integer id.\n\n Parameters\n ----------\n words : list of str or byte\n The context in list format. You may need to do preprocessing on the words, such as lower case, remove marks etc.\n vocabulary_size : int\n The maximum vocabulary size, limiting the vocabulary size. Then the script replaces rare words with \'UNK\' token.\n printable : boolean\n Whether to print the read vocabulary size of the given words.\n unk_key : str\n Represent the unknown words.\n\n Returns\n --------\n data : list of int\n The context in a list of ID.\n count : list of tuple and list\n Pair words and IDs.\n - count[0] is a list : the number of rare words\n - count[1:] are tuples : the number of occurrence of each word\n - e.g. [[\'UNK\', 418391], (b\'the\', 1061396), (b\'of\', 593677), (b\'and\', 416629), (b\'one\', 411764)]\n dictionary : dictionary\n It is `word_to_id` that maps word to ID.\n reverse_dictionary : a dictionary\n It is `id_to_word` that maps ID to word.\n\n Examples\n --------\n >>> words = tl.files.load_matt_mahoney_text8_dataset()\n >>> vocabulary_size = 50000\n >>> data, count, dictionary, reverse_dictionary = tl.nlp.build_words_dataset(words, vocabulary_size)\n\n References\n -----------------\n - `tensorflow/examples/tutorials/word2vec/word2vec_basic.py <https://github.com/tensorflow/tensorflow/blob/r0.7/tensorflow/examples/tutorials/word2vec/word2vec_basic.py>`__\n\n """\n if words is None:\n raise Exception("words : list of str or byte")\n\n count = [[unk_key, -1]]\n count.extend(collections.Counter(words).most_common(vocabulary_size - 1))\n dictionary = dict()\n for word, _ in count:\n dictionary[word] = len(dictionary)\n data = list()\n unk_count = 0\n for word in words:\n if word in dictionary:\n index = dictionary[word]\n else:\n index = 0 # dictionary[\'UNK\']\n unk_count += 1\n data.append(index)\n count[0][1] = unk_count\n reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))\n if printable:\n tl.logging.info(\'Real vocabulary size %d\' % len(collections.Counter(words).keys()))\n tl.logging.info(\'Limited vocabulary size {}\'.format(vocabulary_size))\n if len(collections.Counter(words).keys()) < vocabulary_size:\n raise Exception(\n "len(collections.Counter(words).keys()) >= vocabulary_size , the limited vocabulary_size must be less than or equal to the read vocabulary_size"\n )\n return data, count, dictionary, reverse_dictionary', |
|
] |
|
embeddings = model.encode(sentences) |
|
print(embeddings.shape) |
|
# [3, 768] |
|
|
|
# Get the similarity scores for the embeddings |
|
similarities = model.similarity(embeddings, embeddings) |
|
print(similarities.shape) |
|
# [3, 3] |
|
``` |
|
|
|
<!-- |
|
### Direct Usage (Transformers) |
|
|
|
<details><summary>Click to see the direct usage in Transformers</summary> |
|
|
|
</details> |
|
--> |
|
|
|
<!-- |
|
### Downstream Usage (Sentence Transformers) |
|
|
|
You can finetune this model on your own dataset. |
|
|
|
<details><summary>Click to expand</summary> |
|
|
|
</details> |
|
--> |
|
|
|
<!-- |
|
### Out-of-Scope Use |
|
|
|
*List how the model may foreseeably be misused and address what users ought not to do with the model.* |
|
--> |
|
|
|
## Evaluation |
|
|
|
### Metrics |
|
|
|
#### Semantic Similarity |
|
* Dataset: `sts-dev` |
|
* Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator) |
|
|
|
| Metric | Value | |
|
|:--------------------|:----------| |
|
| pearson_cosine | 0.8806 | |
|
| **spearman_cosine** | **0.881** | |
|
| pearson_manhattan | 0.8781 | |
|
| spearman_manhattan | 0.8798 | |
|
| pearson_euclidean | 0.8794 | |
|
| spearman_euclidean | 0.881 | |
|
| pearson_dot | 0.8806 | |
|
| spearman_dot | 0.881 | |
|
| pearson_max | 0.8806 | |
|
| spearman_max | 0.881 | |
|
|
|
<!-- |
|
## Bias, Risks and Limitations |
|
|
|
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.* |
|
--> |
|
|
|
<!-- |
|
### Recommendations |
|
|
|
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.* |
|
--> |
|
|
|
## Training Details |
|
|
|
### Training Dataset |
|
|
|
#### code-search-net/code_search_net |
|
|
|
* Dataset: [code-search-net/code_search_net](https://huggingface.co/datasets/code-search-net/code_search_net) |
|
* Size: 20,000 training samples |
|
* Columns: <code>func_name</code> and <code>whole_func_string</code> |
|
* Approximate statistics based on the first 1000 samples: |
|
| | func_name | whole_func_string | |
|
|:--------|:---------------------------------------------------------------------------------|:------------------------------------------------------------------------------------| |
|
| type | string | string | |
|
| details | <ul><li>min: 3 tokens</li><li>mean: 8.18 tokens</li><li>max: 21 tokens</li></ul> | <ul><li>min: 38 tokens</li><li>mean: 192.0 tokens</li><li>max: 384 tokens</li></ul> | |
|
* Samples: |
|
| func_name | whole_func_string | |
|
|:-------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| |
|
| <code>ImageGraphCut.__msgc_step3_discontinuity_localization</code> | <code>def __msgc_step3_discontinuity_localization(self):<br> """<br> Estimate discontinuity in basis of low resolution image segmentation.<br> :return: discontinuity in low resolution<br> """<br> import scipy<br><br> start = self._start_time<br> seg = 1 - self.segmentation.astype(np.int8)<br> self.stats["low level object voxels"] = np.sum(seg)<br> self.stats["low level image voxels"] = np.prod(seg.shape)<br> # in seg is now stored low resolution segmentation<br> # back to normal parameters<br> # step 2: discontinuity localization<br> # self.segparams = sparams_hi<br> seg_border = scipy.ndimage.filters.laplace(seg, mode="constant")<br> logger.debug("seg_border: %s", scipy.stats.describe(seg_border, axis=None))<br> # logger.debug(str(np.max(seg_border)))<br> # logger.debug(str(np.min(seg_border)))<br> seg_border[seg_border != 0] = 1<br> logger.debug("seg_border: %s", scipy.stats.describe(seg_border, axis=None))<br> # scipy.ndimage.morphology.distance_transform_edt<br> boundary_dilatation_distance = self.segparams["boundary_dilatation_distance"]<br> seg = scipy.ndimage.morphology.binary_dilation(<br> seg_border,<br> # seg,<br> np.ones(<br> [<br> (boundary_dilatation_distance * 2) + 1,<br> (boundary_dilatation_distance * 2) + 1,<br> (boundary_dilatation_distance * 2) + 1,<br> ]<br> ),<br> )<br> if self.keep_temp_properties:<br> self.temp_msgc_lowres_discontinuity = seg<br> else:<br> self.temp_msgc_lowres_discontinuity = None<br><br> if self.debug_images:<br> import sed3<br><br> pd = sed3.sed3(seg_border) # ), contour=seg)<br> pd.show()<br> pd = sed3.sed3(seg) # ), contour=seg)<br> pd.show()<br> # segzoom = scipy.ndimage.interpolation.zoom(seg.astype('float'), zoom,<br> # order=0).astype('int8')<br> self.stats["t3"] = time.time() - start<br> return seg</code> | |
|
| <code>ImageGraphCut.__multiscale_gc_lo2hi_run</code> | <code>def __multiscale_gc_lo2hi_run(self): # , pyed):<br> """<br> Run Graph-Cut segmentation with refinement of low resolution multiscale graph.<br> In first step is performed normal GC on low resolution data<br> Second step construct finer grid on edges of segmentation from first<br> step.<br> There is no option for use without `use_boundary_penalties`<br> """<br> # from PyQt4.QtCore import pyqtRemoveInputHook<br> # pyqtRemoveInputHook()<br> self._msgc_lo2hi_resize_init()<br> self.__msgc_step0_init()<br><br> hard_constraints = self.__msgc_step12_low_resolution_segmentation()<br> # ===== high resolution data processing<br> seg = self.__msgc_step3_discontinuity_localization()<br><br> self.stats["t3.1"] = (time.time() - self._start_time)<br> graph = Graph(<br> seg,<br> voxelsize=self.voxelsize,<br> nsplit=self.segparams["block_size"],<br> edge_weight_table=self._msgc_npenalty_table,<br> compute_low_nodes_index=True,<br> )<br><br> # graph.run() = graph.generate_base_grid() + graph.split_voxels()<br> # graph.run()<br> graph.generate_base_grid()<br> self.stats["t3.2"] = (time.time() - self._start_time)<br> graph.split_voxels()<br><br> self.stats["t3.3"] = (time.time() - self._start_time)<br><br> self.stats.update(graph.stats)<br> self.stats["t4"] = (time.time() - self._start_time)<br> mul_mask, mul_val = self.__msgc_tlinks_area_weight_from_low_segmentation(seg)<br> area_weight = 1<br> unariesalt = self.__create_tlinks(<br> self.img,<br> self.voxelsize,<br> self.seeds,<br> area_weight=area_weight,<br> hard_constraints=hard_constraints,<br> mul_mask=None,<br> mul_val=None,<br> )<br> # N-links prepared<br> self.stats["t5"] = (time.time() - self._start_time)<br> un, ind = np.unique(graph.msinds, return_index=True)<br> self.stats["t6"] = (time.time() - self._start_time)<br><br> self.stats["t7"] = (time.time() - self._start_time)<br> unariesalt2_lo2hi = np.hstack(<br> [unariesalt[ind, 0, 0].reshape(-1, 1), unariesalt[ind, 0, 1].reshape(-1, 1)]<br> )<br> nlinks_lo2hi = np.hstack([graph.edges, graph.edges_weights.reshape(-1, 1)])<br> if self.debug_images:<br> import sed3<br><br> ed = sed3.sed3(unariesalt[:, :, 0].reshape(self.img.shape))<br> ed.show()<br> import sed3<br><br> ed = sed3.sed3(unariesalt[:, :, 1].reshape(self.img.shape))<br> ed.show()<br> # ed = sed3.sed3(seg)<br> # ed.show()<br> # import sed3<br> # ed = sed3.sed3(graph.data)<br> # ed.show()<br> # import sed3<br> # ed = sed3.sed3(graph.msinds)<br> # ed.show()<br><br> # nlinks, unariesalt2, msinds = self.__msgc_step45678_construct_graph(area_weight, hard_constraints, seg)<br> # self.__msgc_step9_finish_perform_gc_and_reshape(nlinks, unariesalt2, msinds)<br> self.__msgc_step9_finish_perform_gc_and_reshape(<br> nlinks_lo2hi, unariesalt2_lo2hi, graph.msinds<br> )<br> self._msgc_lo2hi_resize_clean_finish()</code> | |
|
| <code>ImageGraphCut.__multiscale_gc_hi2lo_run</code> | <code>def __multiscale_gc_hi2lo_run(self): # , pyed):<br> """<br> Run Graph-Cut segmentation with simplifiyng of high resolution multiscale graph.<br> In first step is performed normal GC on low resolution data<br> Second step construct finer grid on edges of segmentation from first<br> step.<br> There is no option for use without `use_boundary_penalties`<br> """<br> # from PyQt4.QtCore import pyqtRemoveInputHook<br> # pyqtRemoveInputHook()<br><br> self.__msgc_step0_init()<br> hard_constraints = self.__msgc_step12_low_resolution_segmentation()<br> # ===== high resolution data processing<br> seg = self.__msgc_step3_discontinuity_localization()<br> nlinks, unariesalt2, msinds = self.__msgc_step45678_hi2lo_construct_graph(<br> hard_constraints, seg<br> )<br> self.__msgc_step9_finish_perform_gc_and_reshape(nlinks, unariesalt2, msinds)</code> | |
|
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters: |
|
```json |
|
{ |
|
"scale": 20.0, |
|
"similarity_fct": "cos_sim" |
|
} |
|
``` |
|
|
|
### Evaluation Dataset |
|
|
|
#### code-search-net/code_search_net |
|
|
|
* Dataset: [code-search-net/code_search_net](https://huggingface.co/datasets/code-search-net/code_search_net) |
|
* Size: 15,000 evaluation samples |
|
* Columns: <code>func_name</code> and <code>whole_func_string</code> |
|
* Approximate statistics based on the first 1000 samples: |
|
| | func_name | whole_func_string | |
|
|:--------|:---------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------| |
|
| type | string | string | |
|
| details | <ul><li>min: 3 tokens</li><li>mean: 9.23 tokens</li><li>max: 24 tokens</li></ul> | <ul><li>min: 50 tokens</li><li>mean: 276.31 tokens</li><li>max: 384 tokens</li></ul> | |
|
* Samples: |
|
| func_name | whole_func_string | |
|
|:---------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| |
|
| <code>learn</code> | <code>def learn(env,<br> network,<br> seed=None,<br> lr=5e-4,<br> total_timesteps=100000,<br> buffer_size=50000,<br> exploration_fraction=0.1,<br> exploration_final_eps=0.02,<br> train_freq=1,<br> batch_size=32,<br> print_freq=100,<br> checkpoint_freq=10000,<br> checkpoint_path=None,<br> learning_starts=1000,<br> gamma=1.0,<br> target_network_update_freq=500,<br> prioritized_replay=False,<br> prioritized_replay_alpha=0.6,<br> prioritized_replay_beta0=0.4,<br> prioritized_replay_beta_iters=None,<br> prioritized_replay_eps=1e-6,<br> param_noise=False,<br> callback=None,<br> load_path=None,<br> **network_kwargs<br> ):<br> """Train a deepq model.<br><br> Parameters<br> -------<br> env: gym.Env<br> environment to train on<br> network: string or a function<br> neural network to use as a q function approximator. If string, has to be one of the names of registered models in baselines.common.models<br> (mlp, cnn, conv_only). If a function, should take an observation tensor and return a latent variable tensor, which<br> will be mapped to the Q function heads (see build_q_func in baselines.deepq.models for details on that)<br> seed: int or None<br> prng seed. The runs with the same seed "should" give the same results. If None, no seeding is used.<br> lr: float<br> learning rate for adam optimizer<br> total_timesteps: int<br> number of env steps to optimizer for<br> buffer_size: int<br> size of the replay buffer<br> exploration_fraction: float<br> fraction of entire training period over which the exploration rate is annealed<br> exploration_final_eps: float<br> final value of random action probability<br> train_freq: int<br> update the model every `train_freq` steps.<br> set to None to disable printing<br> batch_size: int<br> size of a batched sampled from replay buffer for training<br> print_freq: int<br> how often to print out training progress<br> set to None to disable printing<br> checkpoint_freq: int<br> how often to save the model. This is so that the best version is restored<br> at the end of the training. If you do not wish to restore the best version at<br> the end of the training set this variable to None.<br> learning_starts: int<br> how many steps of the model to collect transitions for before learning starts<br> gamma: float<br> discount factor<br> target_network_update_freq: int<br> update the target network every `target_network_update_freq` steps.<br> prioritized_replay: True<br> if True prioritized replay buffer will be used.<br> prioritized_replay_alpha: float<br> alpha parameter for prioritized replay buffer<br> prioritized_replay_beta0: float<br> initial value of beta for prioritized replay buffer<br> prioritized_replay_beta_iters: int<br> number of iterations over which beta will be annealed from initial value<br> to 1.0. If set to None equals to total_timesteps.<br> prioritized_replay_eps: float<br> epsilon to add to the TD errors when updating priorities.<br> param_noise: bool<br> whether or not to use parameter space noise (https://arxiv.org/abs/1706.01905)<br> callback: (locals, globals) -> None<br> function called at every steps with state of the algorithm.<br> If callback returns true training stops.<br> load_path: str<br> path to load the model from. (default: None)<br> **network_kwargs<br> additional keyword arguments to pass to the network builder.<br><br> Returns<br> -------<br> act: ActWrapper<br> Wrapper over act function. Adds ability to save it and load it.<br> See header of baselines/deepq/categorical.py for details on the act function.<br> """<br> # Create all the functions necessary to train the model<br><br> sess = get_session()<br> set_global_seeds(seed)<br><br> q_func = build_q_func(network, **network_kwargs)<br><br> # capture the shape outside the closure so that the env object is not serialized<br> # by cloudpickle when serializing make_obs_ph<br><br> observation_space = env.observation_space<br> def make_obs_ph(name):<br> return ObservationInput(observation_space, name=name)<br><br> act, train, update_target, debug = deepq.build_train(<br> make_obs_ph=make_obs_ph,<br> q_func=q_func,<br> num_actions=env.action_space.n,<br> optimizer=tf.train.AdamOptimizer(learning_rate=lr),<br> gamma=gamma,<br> grad_norm_clipping=10,<br> param_noise=param_noise<br> )<br><br> act_params = {<br> 'make_obs_ph': make_obs_ph,<br> 'q_func': q_func,<br> 'num_actions': env.action_space.n,<br> }<br><br> act = ActWrapper(act, act_params)<br><br> # Create the replay buffer<br> if prioritized_replay:<br> replay_buffer = PrioritizedReplayBuffer(buffer_size, alpha=prioritized_replay_alpha)<br> if prioritized_replay_beta_iters is None:<br> prioritized_replay_beta_iters = total_timesteps<br> beta_schedule = LinearSchedule(prioritized_replay_beta_iters,<br> initial_p=prioritized_replay_beta0,<br> final_p=1.0)<br> else:<br> replay_buffer = ReplayBuffer(buffer_size)<br> beta_schedule = None<br> # Create the schedule for exploration starting from 1.<br> exploration = LinearSchedule(schedule_timesteps=int(exploration_fraction * total_timesteps),<br> initial_p=1.0,<br> final_p=exploration_final_eps)<br><br> # Initialize the parameters and copy them to the target network.<br> U.initialize()<br> update_target()<br><br> episode_rewards = [0.0]<br> saved_mean_reward = None<br> obs = env.reset()<br> reset = True<br><br> with tempfile.TemporaryDirectory() as td:<br> td = checkpoint_path or td<br><br> model_file = os.path.join(td, "model")<br> model_saved = False<br><br> if tf.train.latest_checkpoint(td) is not None:<br> load_variables(model_file)<br> logger.log('Loaded model from {}'.format(model_file))<br> model_saved = True<br> elif load_path is not None:<br> load_variables(load_path)<br> logger.log('Loaded model from {}'.format(load_path))<br><br><br> for t in range(total_timesteps):<br> if callback is not None:<br> if callback(locals(), globals()):<br> break<br> # Take action and update exploration to the newest value<br> kwargs = {}<br> if not param_noise:<br> update_eps = exploration.value(t)<br> update_param_noise_threshold = 0.<br> else:<br> update_eps = 0.<br> # Compute the threshold such that the KL divergence between perturbed and non-perturbed<br> # policy is comparable to eps-greedy exploration with eps = exploration.value(t).<br> # See Appendix C.1 in Parameter Space Noise for Exploration, Plappert et al., 2017<br> # for detailed explanation.<br> update_param_noise_threshold = -np.log(1. - exploration.value(t) + exploration.value(t) / float(env.action_space.n))<br> kwargs['reset'] = reset<br> kwargs['update_param_noise_threshold'] = update_param_noise_threshold<br> kwargs['update_param_noise_scale'] = True<br> action = act(np.array(obs)[None], update_eps=update_eps, **kwargs)[0]<br> env_action = action<br> reset = False<br> new_obs, rew, done, _ = env.step(env_action)<br> # Store transition in the replay buffer.<br> replay_buffer.add(obs, action, rew, new_obs, float(done))<br> obs = new_obs<br><br> episode_rewards[-1] += rew<br> if done:<br> obs = env.reset()<br> episode_rewards.append(0.0)<br> reset = True<br><br> if t > learning_starts and t % train_freq == 0:<br> # Minimize the error in Bellman's equation on a batch sampled from replay buffer.<br> if prioritized_replay:<br> experience = replay_buffer.sample(batch_size, beta=beta_schedule.value(t))<br> (obses_t, actions, rewards, obses_tp1, dones, weights, batch_idxes) = experience<br> else:<br> obses_t, actions, rewards, obses_tp1, dones = replay_buffer.sample(batch_size)<br> weights, batch_idxes = np.ones_like(rewards), None<br> td_errors = train(obses_t, actions, rewards, obses_tp1, dones, weights)<br> if prioritized_replay:<br> new_priorities = np.abs(td_errors) + prioritized_replay_eps<br> replay_buffer.update_priorities(batch_idxes, new_priorities)<br><br> if t > learning_starts and t % target_network_update_freq == 0:<br> # Update target network periodically.<br> update_target()<br><br> mean_100ep_reward = round(np.mean(episode_rewards[-101:-1]), 1)<br> num_episodes = len(episode_rewards)<br> if done and print_freq is not None and len(episode_rewards) % print_freq == 0:<br> logger.record_tabular("steps", t)<br> logger.record_tabular("episodes", num_episodes)<br> logger.record_tabular("mean 100 episode reward", mean_100ep_reward)<br> logger.record_tabular("% time spent exploring", int(100 * exploration.value(t)))<br> logger.dump_tabular()<br><br> if (checkpoint_freq is not None and t > learning_starts and<br> num_episodes > 100 and t % checkpoint_freq == 0):<br> if saved_mean_reward is None or mean_100ep_reward > saved_mean_reward:<br> if print_freq is not None:<br> logger.log("Saving model due to mean reward increase: {} -> {}".format(<br> saved_mean_reward, mean_100ep_reward))<br> save_variables(model_file)<br> model_saved = True<br> saved_mean_reward = mean_100ep_reward<br> if model_saved:<br> if print_freq is not None:<br> logger.log("Restored model with mean reward: {}".format(saved_mean_reward))<br> load_variables(model_file)<br><br> return act</code> | |
|
| <code>ActWrapper.save_act</code> | <code>def save_act(self, path=None):<br> """Save model to a pickle located at `path`"""<br> if path is None:<br> path = os.path.join(logger.get_dir(), "model.pkl")<br><br> with tempfile.TemporaryDirectory() as td:<br> save_variables(os.path.join(td, "model"))<br> arc_name = os.path.join(td, "packed.zip")<br> with zipfile.ZipFile(arc_name, 'w') as zipf:<br> for root, dirs, files in os.walk(td):<br> for fname in files:<br> file_path = os.path.join(root, fname)<br> if file_path != arc_name:<br> zipf.write(file_path, os.path.relpath(file_path, td))<br> with open(arc_name, "rb") as f:<br> model_data = f.read()<br> with open(path, "wb") as f:<br> cloudpickle.dump((model_data, self._act_params), f)</code> | |
|
| <code>nature_cnn</code> | <code>def nature_cnn(unscaled_images, **conv_kwargs):<br> """<br> CNN from Nature paper.<br> """<br> scaled_images = tf.cast(unscaled_images, tf.float32) / 255.<br> activ = tf.nn.relu<br> h = activ(conv(scaled_images, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2),<br> **conv_kwargs))<br> h2 = activ(conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2), **conv_kwargs))<br> h3 = activ(conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2), **conv_kwargs))<br> h3 = conv_to_fc(h3)<br> return activ(fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)))</code> | |
|
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters: |
|
```json |
|
{ |
|
"scale": 20.0, |
|
"similarity_fct": "cos_sim" |
|
} |
|
``` |
|
|
|
### Training Hyperparameters |
|
#### Non-Default Hyperparameters |
|
|
|
- `eval_strategy`: steps |
|
- `per_device_train_batch_size`: 16 |
|
- `per_device_eval_batch_size`: 16 |
|
- `learning_rate`: 2e-05 |
|
- `num_train_epochs`: 1 |
|
- `warmup_ratio`: 0.1 |
|
- `fp16`: True |
|
- `batch_sampler`: no_duplicates |
|
|
|
#### All Hyperparameters |
|
<details><summary>Click to expand</summary> |
|
|
|
- `overwrite_output_dir`: False |
|
- `do_predict`: False |
|
- `eval_strategy`: steps |
|
- `prediction_loss_only`: True |
|
- `per_device_train_batch_size`: 16 |
|
- `per_device_eval_batch_size`: 16 |
|
- `per_gpu_train_batch_size`: None |
|
- `per_gpu_eval_batch_size`: None |
|
- `gradient_accumulation_steps`: 1 |
|
- `eval_accumulation_steps`: None |
|
- `learning_rate`: 2e-05 |
|
- `weight_decay`: 0.0 |
|
- `adam_beta1`: 0.9 |
|
- `adam_beta2`: 0.999 |
|
- `adam_epsilon`: 1e-08 |
|
- `max_grad_norm`: 1.0 |
|
- `num_train_epochs`: 1 |
|
- `max_steps`: -1 |
|
- `lr_scheduler_type`: linear |
|
- `lr_scheduler_kwargs`: {} |
|
- `warmup_ratio`: 0.1 |
|
- `warmup_steps`: 0 |
|
- `log_level`: passive |
|
- `log_level_replica`: warning |
|
- `log_on_each_node`: True |
|
- `logging_nan_inf_filter`: True |
|
- `save_safetensors`: True |
|
- `save_on_each_node`: False |
|
- `save_only_model`: False |
|
- `restore_callback_states_from_checkpoint`: False |
|
- `no_cuda`: False |
|
- `use_cpu`: False |
|
- `use_mps_device`: False |
|
- `seed`: 42 |
|
- `data_seed`: None |
|
- `jit_mode_eval`: False |
|
- `use_ipex`: False |
|
- `bf16`: False |
|
- `fp16`: True |
|
- `fp16_opt_level`: O1 |
|
- `half_precision_backend`: auto |
|
- `bf16_full_eval`: False |
|
- `fp16_full_eval`: False |
|
- `tf32`: None |
|
- `local_rank`: 0 |
|
- `ddp_backend`: None |
|
- `tpu_num_cores`: None |
|
- `tpu_metrics_debug`: False |
|
- `debug`: [] |
|
- `dataloader_drop_last`: False |
|
- `dataloader_num_workers`: 0 |
|
- `dataloader_prefetch_factor`: None |
|
- `past_index`: -1 |
|
- `disable_tqdm`: False |
|
- `remove_unused_columns`: True |
|
- `label_names`: None |
|
- `load_best_model_at_end`: False |
|
- `ignore_data_skip`: False |
|
- `fsdp`: [] |
|
- `fsdp_min_num_params`: 0 |
|
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False} |
|
- `fsdp_transformer_layer_cls_to_wrap`: None |
|
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None} |
|
- `deepspeed`: None |
|
- `label_smoothing_factor`: 0.0 |
|
- `optim`: adamw_torch |
|
- `optim_args`: None |
|
- `adafactor`: False |
|
- `group_by_length`: False |
|
- `length_column_name`: length |
|
- `ddp_find_unused_parameters`: None |
|
- `ddp_bucket_cap_mb`: None |
|
- `ddp_broadcast_buffers`: False |
|
- `dataloader_pin_memory`: True |
|
- `dataloader_persistent_workers`: False |
|
- `skip_memory_metrics`: True |
|
- `use_legacy_prediction_loop`: False |
|
- `push_to_hub`: False |
|
- `resume_from_checkpoint`: None |
|
- `hub_model_id`: None |
|
- `hub_strategy`: every_save |
|
- `hub_private_repo`: False |
|
- `hub_always_push`: False |
|
- `gradient_checkpointing`: False |
|
- `gradient_checkpointing_kwargs`: None |
|
- `include_inputs_for_metrics`: False |
|
- `eval_do_concat_batches`: True |
|
- `fp16_backend`: auto |
|
- `push_to_hub_model_id`: None |
|
- `push_to_hub_organization`: None |
|
- `mp_parameters`: |
|
- `auto_find_batch_size`: False |
|
- `full_determinism`: False |
|
- `torchdynamo`: None |
|
- `ray_scope`: last |
|
- `ddp_timeout`: 1800 |
|
- `torch_compile`: False |
|
- `torch_compile_backend`: None |
|
- `torch_compile_mode`: None |
|
- `dispatch_batches`: None |
|
- `split_batches`: None |
|
- `include_tokens_per_second`: False |
|
- `include_num_input_tokens_seen`: False |
|
- `neftune_noise_alpha`: None |
|
- `optim_target_modules`: None |
|
- `batch_eval_metrics`: False |
|
- `eval_on_start`: False |
|
- `batch_sampler`: no_duplicates |
|
- `multi_dataset_batch_sampler`: proportional |
|
|
|
</details> |
|
|
|
### Training Logs |
|
| Epoch | Step | Training Loss | loss | sts-dev_spearman_cosine | |
|
|:-----:|:----:|:-------------:|:------:|:-----------------------:| |
|
| 0 | 0 | - | - | 0.8810 | |
|
| 0.08 | 100 | 0.4124 | 0.2191 | - | |
|
| 0.16 | 200 | 0.108 | 0.0993 | - | |
|
| 0.24 | 300 | 0.127 | 0.0756 | - | |
|
| 0.32 | 400 | 0.0728 | - | - | |
|
| 0.08 | 100 | 0.0662 | 0.0683 | - | |
|
| 0.16 | 200 | 0.0321 | 0.0660 | - | |
|
| 0.24 | 300 | 0.0815 | 0.0584 | - | |
|
| 0.32 | 400 | 0.049 | 0.0591 | - | |
|
| 0.4 | 500 | 0.0636 | 0.0612 | - | |
|
| 0.48 | 600 | 0.0929 | 0.0577 | - | |
|
| 0.56 | 700 | 0.0342 | 0.0568 | - | |
|
| 0.64 | 800 | 0.0265 | 0.0572 | - | |
|
| 0.72 | 900 | 0.0406 | 0.0551 | - | |
|
| 0.8 | 1000 | 0.039 | 0.0549 | - | |
|
| 0.88 | 1100 | 0.0376 | 0.0551 | - | |
|
| 0.96 | 1200 | 0.0823 | 0.0556 | - | |
|
|
|
|
|
### Framework Versions |
|
- Python: 3.10.13 |
|
- Sentence Transformers: 3.0.1 |
|
- Transformers: 4.42.3 |
|
- PyTorch: 2.1.2 |
|
- Accelerate: 0.32.1 |
|
- Datasets: 2.20.0 |
|
- Tokenizers: 0.19.1 |
|
|
|
## Citation |
|
|
|
### BibTeX |
|
|
|
#### Sentence Transformers |
|
```bibtex |
|
@inproceedings{reimers-2019-sentence-bert, |
|
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks", |
|
author = "Reimers, Nils and Gurevych, Iryna", |
|
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing", |
|
month = "11", |
|
year = "2019", |
|
publisher = "Association for Computational Linguistics", |
|
url = "https://arxiv.org/abs/1908.10084", |
|
} |
|
``` |
|
|
|
#### MultipleNegativesRankingLoss |
|
```bibtex |
|
@misc{henderson2017efficient, |
|
title={Efficient Natural Language Response Suggestion for Smart Reply}, |
|
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil}, |
|
year={2017}, |
|
eprint={1705.00652}, |
|
archivePrefix={arXiv}, |
|
primaryClass={cs.CL} |
|
} |
|
``` |
|
|
|
<!-- |
|
## Glossary |
|
|
|
*Clearly define terms in order to be accessible across audiences.* |
|
--> |
|
|
|
<!-- |
|
## Model Card Authors |
|
|
|
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.* |
|
--> |
|
|
|
<!-- |
|
## Model Card Contact |
|
|
|
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.* |
|
--> |